Skip to content
This repository was archived by the owner on Jan 1, 2021. It is now read-only.

Commit 33d9678

Browse files
committed
code for lecture 04
1 parent 7395c45 commit 33d9678

File tree

2 files changed

+21
-23
lines changed

2 files changed

+21
-23
lines changed

examples/04_word2vec.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,17 @@ def word2vec(dataset):
3939
iterator = dataset.make_initializable_iterator()
4040
center_words, target_words = iterator.get_next()
4141

42-
# Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
43-
# Step 2: define weights. In word2vec, it's actually the weights that we care about
44-
with tf.name_scope("embed"):
42+
""" Step 2 + 3: define weights and embedding lookup.
43+
In word2vec, it's actually the weights that we care about
44+
"""
45+
with tf.name_scope('embed'):
4546
embed_matrix = tf.get_variable('embed_matrix',
4647
shape=[VOCAB_SIZE, EMBED_SIZE],
4748
initializer=tf.random_uniform_initializer())
49+
embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding')
4850

51+
# Step 4: construct variables for NCE loss and define loss function
4952
with tf.name_scope('loss'):
50-
# Step 3: define the inference
51-
embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed')
52-
53-
# Step 4: define loss function
54-
# construct variables for NCE loss
5553
nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE],
5654
initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5)))
5755
nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))

examples/04_word2vec_visualize.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,28 +42,28 @@ def __init__(self, dataset, vocab_size, embed_size, batch_size, num_sampled, lea
4242
self.lr = learning_rate
4343
self.global_step = tf.get_variable('global_step', initializer=tf.constant(0), trainable=False)
4444
self.skip_step = SKIP_STEP
45+
self.dataset = dataset
4546

4647
def _import_data(self):
4748
""" Step 1: import data
4849
"""
49-
self.iterator = dataset.make_initializable_iterator()
50-
self.center_words, self.target_words = self.iterator.get_next()
50+
with tf.name_scope('data'):
51+
self.iterator = self.dataset.make_initializable_iterator()
52+
self.center_words, self.target_words = self.iterator.get_next()
5153

5254
def _create_embedding(self):
53-
""" Step 2: define weights. In word2vec, it's actually the weights that we care about """
54-
# Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
55-
with tf.name_scope("embed"):
55+
""" Step 2 + 3: define weights and embedding lookup.
56+
In word2vec, it's actually the weights that we care about
57+
"""
58+
with tf.name_scope('embed'):
5659
self.embed_matrix = tf.get_variable('embed_matrix',
5760
shape=[self.vocab_size, self.embed_size],
5861
initializer=tf.random_uniform_initializer())
62+
self.embed = tf.nn.embedding_lookup(self.embed_matrix, self.center_words, name='embedding')
5963

6064
def _create_loss(self):
61-
""" Step 3 + 4: define the model + the loss function """
62-
with tf.name_scope("loss"):
63-
# Step 3: define the inference
64-
embed = tf.nn.embedding_lookup(self.embed_matrix, self.center_words, name='embed')
65-
66-
# Step 4: define loss function
65+
""" Step 4: define the loss function """
66+
with tf.name_scope('loss'):
6767
# construct variables for NCE loss
6868
nce_weight = tf.get_variable('nce_weight',
6969
shape=[self.vocab_size, self.embed_size],
@@ -74,7 +74,7 @@ def _create_loss(self):
7474
self.loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight,
7575
biases=nce_bias,
7676
labels=self.target_words,
77-
inputs=embed,
77+
inputs=self.embed,
7878
num_sampled=self.num_sampled,
7979
num_classes=self.vocab_size), name='loss')
8080
def _create_optimizer(self):
@@ -92,6 +92,7 @@ def _create_summaries(self):
9292

9393
def build_graph(self):
9494
""" Build the graph for our model """
95+
self._import_data()
9596
self._create_embedding()
9697
self._create_loss()
9798
self._create_optimizer()
@@ -157,8 +158,7 @@ def visualize(self, visual_fld, num_visualize):
157158
embedding.tensor_name = embedding_var.name
158159

159160
# link this tensor to its metadata file, in this case the first NUM_VISUALIZE words of vocab
160-
embedding.metadata_path = os.path.join(visual_fld,
161-
'vocab_' + str(num_visualize) + '.tsv')
161+
embedding.metadata_path = os.path.join(visual_fld, 'vocab_' + str(num_visualize) + '.tsv')
162162

163163
# saves a configuration file that TensorBoard will read during startup.
164164
projector.visualize_embeddings(summary_writer, config)
@@ -179,4 +179,4 @@ def main():
179179
model.visualize(VISUAL_FLD, NUM_VISUALIZE)
180180

181181
if __name__ == '__main__':
182-
main()
182+
main()

0 commit comments

Comments
 (0)