chiphuyen
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎assignments/chatbot/chatbot.py‎
Lines changed: 9 additions & 8 deletions b/‎assignments/chatbot/chatbot.py‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎assignments/chatbot/config.py‎
Lines changed: 2 additions & 1 deletion b/‎assignments/chatbot/config.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎assignments/chatbot/data.py‎
Lines changed: 6 additions & 6 deletions b/‎assignments/chatbot/data.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎assignments/chatbot/model.py‎
Lines changed: 5 additions & 5 deletions b/‎assignments/chatbot/model.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎assignments/exercises/a1_q1_exercises.py‎ ‎assignments/exercises/e01.py‎assignments/exercises/a1_q1_exercises.py renamed to assignments/exercises/e01.py
Lines changed: 2 additions & 2 deletions b/‎assignments/exercises/a1_q1_exercises.py‎ ‎assignments/exercises/e01.py‎assignments/exercises/a1_q1_exercises.py renamed to assignments/exercises/e01.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎assignments/exercises/q1_exercises_sol.py‎ ‎assignments/exercises/e01_sol.py‎assignments/exercises/q1_exercises_sol.py renamed to assignments/exercises/e01_sol.py
Lines changed: 9 additions & 8 deletions b/‎assignments/exercises/q1_exercises_sol.py‎ ‎assignments/exercises/e01_sol.py‎assignments/exercises/q1_exercises_sol.py renamed to assignments/exercises/e01_sol.py
Lines changed: 9 additions & 8 deletions
diff --git a/‎assignments/style_transfer/content/resized_deadpool.jpg‎
23.6 KB b/‎assignments/style_transfer/content/resized_deadpool.jpg‎
23.6 KB
diff --git a/‎assignments/style_transfer/style_transfer.py‎
Lines changed: 32 additions & 8 deletions b/‎assignments/style_transfer/style_transfer.py‎
Lines changed: 32 additions & 8 deletions
diff --git a/‎assignments/style_transfer/styles/resized_guernica.jpg‎
22.9 KB b/‎assignments/style_transfer/styles/resized_guernica.jpg‎
22.9 KB
@@ -3,6 +3,10 @@ This repository contains code examples for the course CS 20SI: TensorFlow for De
 It will be updated as the class progresses. <br>
 Detailed syllabus and lecture notes can be found here http://cs20si.stanford.edu
 
+# Note (as of July 11, 2017)
+I've updated the code to TensorFlow 1.2 and Python3, except the code for chatbot. I will update the code for chatbot soon.
+
+
 ## Models include: <br>
 ### In the folder "examples": <br>
 Linear Regression with Chicago's Fire-Theft dataset<br>
 
@@ -19,6 +19,7 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 
 import argparse
 import os
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 import random
 import sys
 import time
@@ -33,7 +34,7 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 def _get_random_bucket(train_buckets_scale):
     """ Get a random bucket from which to choose a training sample """
     rand = random.random()
-    return min([i for i in xrange(len(train_buckets_scale))
+    return min([i for i in range(len(train_buckets_scale))
                 if train_buckets_scale[i] > rand])
 
 def _assert_lengths(encoder_size, decoder_size, encoder_inputs, decoder_inputs, decoder_masks):
@@ -59,9 +60,9 @@ def run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_
 
     # input feed: encoder inputs, decoder inputs, target_weights, as provided.
     input_feed = {}
-    for step in xrange(encoder_size):
+    for step in range(encoder_size):
         input_feed[model.encoder_inputs[step].name] = encoder_inputs[step]
-    for step in xrange(decoder_size):
+    for step in range(decoder_size):
         input_feed[model.decoder_inputs[step].name] = decoder_inputs[step]
         input_feed[model.decoder_masks[step].name] = decoder_masks[step]
 
@@ -75,7 +76,7 @@ def run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_
                        model.losses[bucket_id]]  # loss for this batch.
     else:
         output_feed = [model.losses[bucket_id]]  # loss for this batch.
-        for step in xrange(decoder_size):  # output logits.
+        for step in range(decoder_size):  # output logits.
             output_feed.append(model.outputs[bucket_id][step])
 
     outputs = sess.run(output_feed, input_feed)
@@ -91,12 +92,12 @@ def _get_buckets():
     """
     test_buckets = data.load_data('test_ids.enc', 'test_ids.dec')
     data_buckets = data.load_data('train_ids.enc', 'train_ids.dec')
-    train_bucket_sizes = [len(data_buckets[b]) for b in xrange(len(config.BUCKETS))]
+    train_bucket_sizes = [len(data_buckets[b]) for b in range(len(config.BUCKETS))]
     print("Number of samples in each bucket:\n", train_bucket_sizes)
     train_total_size = sum(train_bucket_sizes)
     # list of increasing numbers from 0 to 1 that we'll use to select a bucket.
     train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
-                           for i in xrange(len(train_bucket_sizes))]
+                           for i in range(len(train_bucket_sizes))]
     print("Bucket scale:\n", train_buckets_scale)
     return test_buckets, data_buckets, train_buckets_scale
 
@@ -117,7 +118,7 @@ def _check_restore_parameters(sess, saver):
 
 def _eval_test_set(sess, model, test_buckets):
     """ Evaluate on the test set. """
-    for bucket_id in xrange(len(config.BUCKETS)):
+    for bucket_id in range(len(config.BUCKETS)):
         if len(test_buckets[bucket_id]) == 0:
             print("  Test: empty bucket %d" % (bucket_id))
             continue
@@ -175,7 +176,7 @@ def _get_user_input():
 
 def _find_right_bucket(length):
     """ Find the proper bucket for an encoder input based on its length """
-    return min([b for b in xrange(len(config.BUCKETS))
+    return min([b for b in range(len(config.BUCKETS))
                 if config.BUCKETS[b][0] >= length])
 
 def _construct_response(output_logits, inv_dec_vocab):
 
@@ -47,7 +47,8 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 # [37049, 33519, 30223, 33513, 37371]
 # BUCKETS = [(8, 10), (12, 14), (16, 19), (23, 26), (39, 43)]
 
-BUCKETS = [(8, 10), (12, 14), (16, 19)]
+# BUCKETS = [(8, 10), (12, 14), (16, 19)]
+BUCKETS = [(16, 19)]
 
 NUM_LAYERS = 3
 HIDDEN_SIZE = 256
 
@@ -17,9 +17,9 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 """
 from __future__ import print_function
 
+import os
 import random
 import re
-import os
 
 import numpy as np
 
@@ -215,9 +215,9 @@ def _reshape_batch(inputs, size, batch_size):
     """ Create batch-major inputs. Batch inputs are just re-indexed inputs
     """
     batch_inputs = []
-    for length_id in xrange(size):
+    for length_id in range(size):
         batch_inputs.append(np.array([inputs[batch_id][length_id]
-                                    for batch_id in xrange(batch_size)], dtype=np.int32))
+                                    for batch_id in range(batch_size)], dtype=np.int32))
     return batch_inputs
 
 
@@ -227,7 +227,7 @@ def get_batch(data_bucket, bucket_id, batch_size=1):
     encoder_size, decoder_size = config.BUCKETS[bucket_id]
     encoder_inputs, decoder_inputs = [], []
 
-    for _ in xrange(batch_size):
+    for _ in range(batch_size):
         encoder_input, decoder_input = random.choice(data_bucket)
         # pad both encoder and decoder, reverse the encoder
         encoder_inputs.append(list(reversed(_pad_input(encoder_input, encoder_size))))
@@ -239,9 +239,9 @@ def get_batch(data_bucket, bucket_id, batch_size=1):
 
     # create decoder_masks to be 0 for decoders that are padding.
     batch_masks = []
-    for length_id in xrange(decoder_size):
+    for length_id in range(decoder_size):
         batch_mask = np.ones(batch_size, dtype=np.float32)
-        for batch_id in xrange(batch_size):
+        for batch_id in range(batch_size):
             # we set mask to 0 if the corresponding target is a PAD symbol.
             # the corresponding decoder is decoder_input shifted by 1 forward.
             if length_id < decoder_size - 1:
 
@@ -35,11 +35,11 @@ def _create_placeholders(self):
         # Feeds for inputs. It's a list of placeholders
         print('Create placeholders')
         self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))
-                               for i in xrange(config.BUCKETS[-1][0])]
+                               for i in range(config.BUCKETS[-1][0])]
         self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))
-                               for i in xrange(config.BUCKETS[-1][1] + 1)]
+                               for i in range(config.BUCKETS[-1][1] + 1)]
         self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))
-                              for i in xrange(config.BUCKETS[-1][1] + 1)]
+                              for i in range(config.BUCKETS[-1][1] + 1)]
 
         # Our targets are decoder inputs shifted by one (to ignore <s> symbol)
         self.targets = self.decoder_inputs[1:]
@@ -85,7 +85,7 @@ def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                                         softmax_loss_function=self.softmax_loss_function)
             # If we use output projection, we need to project outputs for decoding.
             if self.output_projection:
-                for bucket in xrange(len(config.BUCKETS)):
+                for bucket in range(len(config.BUCKETS)):
                     self.outputs[bucket] = [tf.matmul(output, 
                                             self.output_projection[0]) + self.output_projection[1]
                                             for output in self.outputs[bucket]]
@@ -111,7 +111,7 @@ def _creat_optimizer(self):
                 self.gradient_norms = []
                 self.train_ops = []
                 start = time.time()
-                for bucket in xrange(len(config.BUCKETS)):
+                for bucket in range(len(config.BUCKETS)):
 
                     clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket], 
                                                                  trainables),
 
@@ -14,7 +14,7 @@
 
 x = tf.random_uniform([])  # Empty array as shape creates a scalar.
 y = tf.random_uniform([])
-out = tf.cond(tf.less(x, y), lambda: tf.add(x, y), lambda: tf.sub(x, y))
+out = tf.cond(tf.greater(x, y), lambda: tf.add(x, y), lambda: tf.subtract(x, y))
 
 ###############################################################################
 # 1b: Create two 0-d tensors x and y randomly selected from -1 and 1.
@@ -75,7 +75,7 @@
 ###############################################################################
 # 1h: Create two tensors x and y of shape 300 from any normal distribution,
 # as long as they are from the same distribution.
-# Use tf.less() and tf.select() to return:
+# Use tf.cond() to return:
 # - The mean squared error of (x - y) if the average of all elements in (x - y)
 #   is negative, or
 # - The sum of absolute value of all elements in the tensor (x - y) otherwise.
 
@@ -2,7 +2,6 @@
 Solution to simple TensorFlow exercises
 For the problems 
 """
-
 import tensorflow as tf
 
 ###############################################################################
@@ -14,7 +13,7 @@
 
 x = tf.random_uniform([])  # Empty array as shape creates a scalar.
 y = tf.random_uniform([])
-out = tf.cond(tf.greater(x, y), lambda: tf.add(x, y), lambda: tf.sub(x, y))
+out = tf.cond(tf.greater(x, y), lambda: tf.add(x, y), lambda: tf.subtract(x, y))
 
 ###############################################################################
 # 1b: Create two 0-d tensors x and y randomly selected from -1 and 1.
@@ -25,8 +24,11 @@
 x = tf.random_uniform([], -1, 1, dtype=tf.float32)
 y = tf.random_uniform([], -1, 1, dtype=tf.float32)
 out = tf.case({tf.less(x, y): lambda: tf.add(x, y), 
-			tf.greater(x, y): lambda: tf.sub(x, y)}, 
+			tf.greater(x, y): lambda: tf.subtract(x, y)}, 
 			default=lambda: tf.constant(0.0), exclusive=True)
+print(x)
+sess = tf.InteractiveSession()
+print(sess.run(x))
 
 ###############################################################################
 # 1c: Create the tensor x of the value [[0, -2, -1], [0, 1, 2]] 
@@ -90,7 +92,7 @@
 ###############################################################################
 # 1h: Create two tensors x and y of shape 300 from any normal distribution,
 # as long as they are from the same distribution.
-# Use tf.less() and tf.select() to return:
+# Use tf.cond() to return:
 # - The mean squared error of (x - y) if the average of all elements in (x - y)
 #   is negative, or
 # - The sum of absolute value of all elements in the tensor (x - y) otherwise.
@@ -100,7 +102,6 @@
 x = tf.random_normal([300], mean=5, stddev=1)
 y = tf.random_normal([300], mean=5, stddev=1)
 average = tf.reduce_mean(x - y)
-condition = tf.less(average, 0)
-left_op = tf.reduce_mean(tf.square(x - y))
-right_op = tf.reduce_sum(tf.abs(x - y))
-out = tf.select(condition, left_op, right_op)
+def f1(): return tf.reduce_mean(tf.square(x - y))
+def f2(): return tf.reduce_sum(tf.abs(x - y))
+out = tf.cond(average < 0, f1, f2)
@@ -9,6 +9,7 @@
 from __future__ import print_function
 
 import os
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 import time
 
 import numpy as np
@@ -26,6 +27,9 @@
 IMAGE_WIDTH = 333
 NOISE_RATIO = 0.6 # percentage of weight of the noise for intermixing with the content image
 
+CONTENT_WEIGHT = 0.01
+STYLE_WEIGHT = 1
+
 # Layers used for style features. You can change this.
 STYLE_LAYERS = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
 W = [0.5, 1.0, 1.5, 3.0, 4.0] # give more weights to deeper layers.
@@ -62,13 +66,14 @@ def _create_content_loss(p, f):
         the content loss
 
     """
-    pass
+    return tf.reduce_sum((f - p) ** 2) / (4.0 * p.size)
 
 def _gram_matrix(F, N, M):
     """ Create and return the gram matrix for tensor F
         Hint: you'll first have to reshape F
     """
-    pass
+    F = tf.reshape(F, (M, N))
+    return tf.matmul(tf.transpose(F), F)
 
 def _single_style_loss(a, g):
     """ Calculate the style loss at a certain layer
@@ -82,7 +87,11 @@ def _single_style_loss(a, g):
         2. we'll use the same coefficient for style loss as in the paper
         3. a and g are feature representation, not gram matrices
     """
-    pass
+    N = a.shape[3] # number of filters
+    M = a.shape[1] * a.shape[2] # height times width of the feature map
+    A = _gram_matrix(a, N, M)
+    G = _gram_matrix(g, N, M)
+    return tf.reduce_sum((G - A) ** 2 / ((2 * N * M) ** 2))
 
 def _create_style_loss(A, model):
     """ Return the total style loss
@@ -92,7 +101,7 @@ def _create_style_loss(A, model):
 
     ###############################
     ## TO DO: return total style loss
-    pass
+    return sum([W[i] * E[i] for i in range(n_layers)])
     ###############################
 
 def _create_losses(model, input_image, content_image, style_image):
@@ -110,7 +119,7 @@ def _create_losses(model, input_image, content_image, style_image):
         ##########################################
         ## TO DO: create total loss. 
         ## Hint: don't forget the content loss and style loss weights
-        
+        total_loss = CONTENT_WEIGHT * content_loss + STYLE_WEIGHT * style_loss
         ##########################################
 
     return content_loss, style_loss, total_loss
@@ -119,7 +128,14 @@ def _create_summary(model):
     """ Create summary ops necessary
         Hint: don't forget to merge them
     """
-    pass
+    with tf.name_scope('summaries'):
+        tf.summary.scalar('content loss', model['content_loss'])
+        tf.summary.scalar('style loss', model['style_loss'])
+        tf.summary.scalar('total loss', model['total_loss'])
+        tf.summary.histogram('histogram content loss', model['content_loss'])
+        tf.summary.histogram('histogram style loss', model['style_loss'])
+        tf.summary.histogram('histogram total loss', model['total_loss'])
+        return tf.summary.merge_all()
 
 def train(model, generated_image, initial_image):
     """ Train your model.
@@ -132,6 +148,9 @@ def train(model, generated_image, initial_image):
         ## TO DO: 
         ## 1. initialize your variables
         ## 2. create writer to write your graph
+        saver = tf.train.Saver()
+        sess.run(tf.global_variables_initializer())
+        writer = tf.summary.FileWriter('graphs', sess.graph)
         ###############################
         sess.run(generated_image.assign(initial_image))
         ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
@@ -150,6 +169,8 @@ def train(model, generated_image, initial_image):
             if (index + 1) % skip_step == 0:
                 ###############################
                 ## TO DO: obtain generated image and loss
+                gen_image, total_loss, summary = sess.run([generated_image, model['total_loss'], 
+                                                             model['summary_op']])
 
                 ###############################
                 gen_image = gen_image + MEAN_PIXELS
@@ -172,9 +193,11 @@ def main():
         input_image = tf.Variable(np.zeros([1, IMAGE_HEIGHT, IMAGE_WIDTH, 3]), dtype=tf.float32)
 
     utils.download(VGG_DOWNLOAD_LINK, VGG_MODEL, EXPECTED_BYTES)
+    utils.make_dir('checkpoints')
+    utils.make_dir('outputs')
     model = vgg_model.load_vgg(VGG_MODEL, input_image)
     model['global_step'] = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
-    
+
     content_image = utils.get_resized_image(CONTENT_IMAGE, IMAGE_HEIGHT, IMAGE_WIDTH)
     content_image = content_image - MEAN_PIXELS
     style_image = utils.get_resized_image(STYLE_IMAGE, IMAGE_HEIGHT, IMAGE_WIDTH)
@@ -184,7 +207,8 @@ def main():
                                                     input_image, content_image, style_image)
     ###############################
     ## TO DO: create optimizer
-    ## model['optimizer'] = ...
+    model['optimizer'] = tf.train.AdamOptimizer(LR).minimize(model['total_loss'], 
+                                                            global_step=model['global_step'])
     ###############################
     model['summary_op'] = _create_summary(model)