code for lecture 04

chiphuyen · chiphuyen · commit 33d9678dde0f · 2018-01-23T22:43:50.000-08:00
diff --git a/examples/04_word2vec.py b/examples/04_word2vec.py
@@ -39,19 +39,17 @@ def word2vec(dataset):
         iterator = dataset.make_initializable_iterator()
         center_words, target_words = iterator.get_next()
 
-    # Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
-    # Step 2: define weights. In word2vec, it's actually the weights that we care about
-    with tf.name_scope("embed"):
+    """ Step 2 + 3: define weights and embedding lookup.
+    In word2vec, it's actually the weights that we care about 
+    """
+    with tf.name_scope('embed'):
         embed_matrix = tf.get_variable('embed_matrix', 
                                         shape=[VOCAB_SIZE, EMBED_SIZE],
                                         initializer=tf.random_uniform_initializer())
+        embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding')
 
+    # Step 4: construct variables for NCE loss and define loss function
     with tf.name_scope('loss'):
-        # Step 3: define the inference
-        embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed')
-
-        # Step 4: define loss function
-        # construct variables for NCE loss
         nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE],
                         initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5)))
         nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))
diff --git a/examples/04_word2vec_visualize.py b/examples/04_word2vec_visualize.py
@@ -42,28 +42,28 @@ def __init__(self, dataset, vocab_size, embed_size, batch_size, num_sampled, lea
         self.lr = learning_rate
         self.global_step = tf.get_variable('global_step', initializer=tf.constant(0), trainable=False)
         self.skip_step = SKIP_STEP
+        self.dataset = dataset
 
     def _import_data(self):
         """ Step 1: import data
         """
-        self.iterator = dataset.make_initializable_iterator()
-        self.center_words, self.target_words = self.iterator.get_next()
+        with tf.name_scope('data'):
+            self.iterator = self.dataset.make_initializable_iterator()
+            self.center_words, self.target_words = self.iterator.get_next()
 
     def _create_embedding(self):
-        """ Step 2: define weights. In word2vec, it's actually the weights that we care about """
-        # Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
-        with tf.name_scope("embed"):
+        """ Step 2 + 3: define weights and embedding lookup.
+        In word2vec, it's actually the weights that we care about 
+        """
+        with tf.name_scope('embed'):
             self.embed_matrix = tf.get_variable('embed_matrix', 
                                                 shape=[self.vocab_size, self.embed_size],
                                                 initializer=tf.random_uniform_initializer())
+            self.embed = tf.nn.embedding_lookup(self.embed_matrix, self.center_words, name='embedding')
 
     def _create_loss(self):
-        """ Step 3 + 4: define the model + the loss function """
-        with tf.name_scope("loss"):
-            # Step 3: define the inference
-            embed = tf.nn.embedding_lookup(self.embed_matrix, self.center_words, name='embed')
-
-            # Step 4: define loss function
+        """ Step 4: define the loss function """
+        with tf.name_scope('loss'):
             # construct variables for NCE loss
             nce_weight = tf.get_variable('nce_weight', 
                         shape=[self.vocab_size, self.embed_size],
@@ -74,7 +74,7 @@ def _create_loss(self):
             self.loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, 
                                                 biases=nce_bias, 
                                                 labels=self.target_words, 
-                                                inputs=embed, 
+                                                inputs=self.embed, 
                                                 num_sampled=self.num_sampled, 
                                                 num_classes=self.vocab_size), name='loss')
     def _create_optimizer(self):
@@ -92,6 +92,7 @@ def _create_summaries(self):
 
     def build_graph(self):
         """ Build the graph for our model """
+        self._import_data()
         self._create_embedding()
         self._create_loss()
         self._create_optimizer()
@@ -157,8 +158,7 @@ def visualize(self, visual_fld, num_visualize):
             embedding.tensor_name = embedding_var.name
             
             # link this tensor to its metadata file, in this case the first NUM_VISUALIZE words of vocab
-            embedding.metadata_path = os.path.join(visual_fld,
-                'vocab_' + str(num_visualize) + '.tsv')
+            embedding.metadata_path = os.path.join(visual_fld, 'vocab_' + str(num_visualize) + '.tsv')
 
             # saves a configuration file that TensorBoard will read during startup.
             projector.visualize_embeddings(summary_writer, config)
@@ -179,4 +179,4 @@ def main():
     model.visualize(VISUAL_FLD, NUM_VISUALIZE)
 
 if __name__ == '__main__':
-    main()
+    main()