@@ -42,28 +42,28 @@ def __init__(self, dataset, vocab_size, embed_size, batch_size, num_sampled, lea
4242 self .lr = learning_rate
4343 self .global_step = tf .get_variable ('global_step' , initializer = tf .constant (0 ), trainable = False )
4444 self .skip_step = SKIP_STEP
45+ self .dataset = dataset
4546
4647 def _import_data (self ):
4748 """ Step 1: import data
4849 """
49- self .iterator = dataset .make_initializable_iterator ()
50- self .center_words , self .target_words = self .iterator .get_next ()
50+ with tf .name_scope ('data' ):
51+ self .iterator = self .dataset .make_initializable_iterator ()
52+ self .center_words , self .target_words = self .iterator .get_next ()
5153
5254 def _create_embedding (self ):
53- """ Step 2: define weights. In word2vec, it's actually the weights that we care about """
54- # Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
55- with tf .name_scope ("embed" ):
55+ """ Step 2 + 3: define weights and embedding lookup.
56+ In word2vec, it's actually the weights that we care about
57+ """
58+ with tf .name_scope ('embed' ):
5659 self .embed_matrix = tf .get_variable ('embed_matrix' ,
5760 shape = [self .vocab_size , self .embed_size ],
5861 initializer = tf .random_uniform_initializer ())
62+ self .embed = tf .nn .embedding_lookup (self .embed_matrix , self .center_words , name = 'embedding' )
5963
6064 def _create_loss (self ):
61- """ Step 3 + 4: define the model + the loss function """
62- with tf .name_scope ("loss" ):
63- # Step 3: define the inference
64- embed = tf .nn .embedding_lookup (self .embed_matrix , self .center_words , name = 'embed' )
65-
66- # Step 4: define loss function
65+ """ Step 4: define the loss function """
66+ with tf .name_scope ('loss' ):
6767 # construct variables for NCE loss
6868 nce_weight = tf .get_variable ('nce_weight' ,
6969 shape = [self .vocab_size , self .embed_size ],
@@ -74,7 +74,7 @@ def _create_loss(self):
7474 self .loss = tf .reduce_mean (tf .nn .nce_loss (weights = nce_weight ,
7575 biases = nce_bias ,
7676 labels = self .target_words ,
77- inputs = embed ,
77+ inputs = self . embed ,
7878 num_sampled = self .num_sampled ,
7979 num_classes = self .vocab_size ), name = 'loss' )
8080 def _create_optimizer (self ):
@@ -92,6 +92,7 @@ def _create_summaries(self):
9292
9393 def build_graph (self ):
9494 """ Build the graph for our model """
95+ self ._import_data ()
9596 self ._create_embedding ()
9697 self ._create_loss ()
9798 self ._create_optimizer ()
@@ -157,8 +158,7 @@ def visualize(self, visual_fld, num_visualize):
157158 embedding .tensor_name = embedding_var .name
158159
159160 # link this tensor to its metadata file, in this case the first NUM_VISUALIZE words of vocab
160- embedding .metadata_path = os .path .join (visual_fld ,
161- 'vocab_' + str (num_visualize ) + '.tsv' )
161+ embedding .metadata_path = os .path .join (visual_fld , 'vocab_' + str (num_visualize ) + '.tsv' )
162162
163163 # saves a configuration file that TensorBoard will read during startup.
164164 projector .visualize_embeddings (summary_writer , config )
@@ -179,4 +179,4 @@ def main():
179179 model .visualize (VISUAL_FLD , NUM_VISUALIZE )
180180
181181if __name__ == '__main__' :
182- main ()
182+ main ()
0 commit comments