Skip to content
This repository was archived by the owner on Jan 1, 2021. It is now read-only.

Commit 9c8c4be

Browse files
committed
assignment 2: style transfer
1 parent c032c45 commit 9c8c4be

File tree

3 files changed

+380
-0
lines changed

3 files changed

+380
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
""" Load VGGNet weights needed for the implementation in TensorFlow
2+
of the paper A Neural Algorithm of Artistic Style (Gatys et al., 2016)
3+
4+
Created by Chip Huyen (chiphuyen@cs.stanford.edu)
5+
CS20: "TensorFlow for Deep Learning Research"
6+
cs20.stanford.edu
7+
8+
For more details, please read the assignment handout:
9+
10+
"""
11+
import numpy as np
12+
import scipy.io
13+
import tensorflow as tf
14+
15+
import utils
16+
17+
# VGG-19 parameters file
18+
VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'
19+
VGG_FILENAME = 'imagenet-vgg-verydeep-19.mat'
20+
EXPECTED_BYTES = 534904783
21+
22+
class VGG(object):
23+
def __init__(self, input_img):
24+
utils.download(VGG_DOWNLOAD_LINK, VGG_FILENAME, EXPECTED_BYTES)
25+
self.vgg_layers = scipy.io.loadmat(VGG_FILENAME)['layers']
26+
self.input_img = input_img
27+
self.mean_pixels = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
28+
29+
def _weights(self, layer_idx, expected_layer_name):
30+
""" Return the weights and biases at layer_idx already trained by VGG
31+
"""
32+
W = self.vgg_layers[0][layer_idx][0][0][2][0][0]
33+
b = self.vgg_layers[0][layer_idx][0][0][2][0][1]
34+
layer_name = self.vgg_layers[0][layer_idx][0][0][0][0]
35+
assert layer_name == expected_layer_name
36+
return W, b.reshape(b.size)
37+
38+
def conv2d_relu(self, prev_layer, layer_idx, layer_name):
39+
""" Create a convolution layer with RELU using the weights and
40+
biases extracted from the VGG model at 'layer_idx'. You should use
41+
the function _weights() defined above to extract weights and biases.
42+
43+
_weights() returns numpy arrays, so you have to convert them to TF tensors.
44+
45+
Don't forget to apply relu to the output from the convolution.
46+
Inputs:
47+
prev_layer: the output tensor from the previous layer
48+
layer_idx: the index to current layer in vgg_layers
49+
layer_name: the string that is the name of the current layer.
50+
It's used to specify variable_scope.
51+
Hint for choosing strides size:
52+
for small images, you probably don't want to skip any pixel
53+
"""
54+
###############################
55+
## TO DO
56+
out = None
57+
###############################
58+
setattr(self, layer_name, out)
59+
60+
def avgpool(self, prev_layer, layer_name):
61+
""" Create the average pooling layer. The paper suggests that
62+
average pooling works better than max pooling.
63+
64+
Input:
65+
prev_layer: the output tensor from the previous layer
66+
layer_name: the string that you want to name the layer.
67+
It's used to specify variable_scope.
68+
69+
Hint for choosing strides and kszie: choose what you feel appropriate
70+
"""
71+
###############################
72+
## TO DO
73+
out = None
74+
###############################
75+
setattr(self, layer_name, out)
76+
77+
def load(self):
78+
self.conv2d_relu(self.input_img, 0, 'conv1_1')
79+
self.conv2d_relu(self.conv1_1, 2, 'conv1_2')
80+
self.avgpool(self.conv1_2, 'avgpool1')
81+
self.conv2d_relu(self.avgpool1, 5, 'conv2_1')
82+
self.conv2d_relu(self.conv2_1, 7, 'conv2_2')
83+
self.avgpool(self.conv2_2, 'avgpool2')
84+
self.conv2d_relu(self.avgpool2, 10, 'conv3_1')
85+
self.conv2d_relu(self.conv3_1, 12, 'conv3_2')
86+
self.conv2d_relu(self.conv3_2, 14, 'conv3_3')
87+
self.conv2d_relu(self.conv3_3, 16, 'conv3_4')
88+
self.avgpool(self.conv3_4, 'avgpool3')
89+
self.conv2d_relu(self.avgpool3, 19, 'conv4_1')
90+
self.conv2d_relu(self.conv4_1, 21, 'conv4_2')
91+
self.conv2d_relu(self.conv4_2, 23, 'conv4_3')
92+
self.conv2d_relu(self.conv4_3, 25, 'conv4_4')
93+
self.avgpool(self.conv4_4, 'avgpool4')
94+
self.conv2d_relu(self.avgpool4, 28, 'conv5_1')
95+
self.conv2d_relu(self.conv5_1, 30, 'conv5_2')
96+
self.conv2d_relu(self.conv5_2, 32, 'conv5_3')
97+
self.conv2d_relu(self.conv5_3, 34, 'conv5_4')
98+
self.avgpool(self.conv5_4, 'avgpool5')
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
import os
2+
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
3+
import time
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
8+
import load_vgg
9+
import utils
10+
11+
def setup():
12+
utils.safe_mkdir('checkpoints')
13+
utils.safe_mkdir('outputs')
14+
15+
class StyleTransfer(object):
16+
def __init__(self, content_img, style_img, img_width, img_height):
17+
'''
18+
img_width and img_height are the dimensions we expect from the generated image.
19+
We will resize input content image and input style image to match this dimension.
20+
Feel free to alter any hyperparameter here and see how it affects your training.
21+
'''
22+
self.img_width = img_width
23+
self.img_height = img_height
24+
self.content_img = utils.get_resized_image(content_img, img_width, img_height)
25+
self.style_img = utils.get_resized_image(style_img, img_width, img_height)
26+
self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)
27+
28+
###############################
29+
## TO DO
30+
## create global step (gstep) and hyperparameters for the model
31+
self.content_layer = 'conv4_2'
32+
self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
33+
# content_w, style_w: corresponding weights for content loss and style loss
34+
self.content_w = None
35+
self.style_w = None
36+
# style_layer_w: weights for different style layers. deep layers have more weights
37+
self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0]
38+
self.gstep = None # global step
39+
self.lr = None
40+
###############################
41+
42+
def create_input(self):
43+
'''
44+
We will use one input_img as a placeholder for the content image,
45+
style image, and generated image, because:
46+
1. they have the same dimension
47+
2. we have to extract the same set of features from them
48+
We use a variable instead of a placeholder because we're, at the same time,
49+
training the generated image to get the desirable result.
50+
51+
Note: image height corresponds to number of rows, not columns.
52+
'''
53+
with tf.variable_scope('input') as scope:
54+
self.input_img = tf.get_variable('in_img',
55+
shape=([1, self.img_height, self.img_width, 3]),
56+
dtype=tf.float32,
57+
initializer=tf.zeros_initializer())
58+
def load_vgg(self):
59+
'''
60+
Load the saved model parameters of VGG-19, using the input_img
61+
as the input to compute the output at each layer of vgg.
62+
63+
During training, VGG-19 mean-centered all images and found the mean pixels
64+
to be [123.68, 116.779, 103.939] along RGB dimensions. We have to subtract
65+
this mean from our images.
66+
67+
'''
68+
self.vgg = load_vgg.VGG(self.input_img)
69+
self.vgg.load()
70+
self.content_img -= self.vgg.mean_pixels
71+
self.style_img -= self.vgg.mean_pixels
72+
73+
def _content_loss(self, P, F):
74+
''' Calculate the loss between the feature representation of the
75+
content image and the generated image.
76+
77+
Inputs:
78+
P: content representation of the content image
79+
F: content representation of the generated image
80+
Read the assignment handout for more details
81+
82+
Note: Don't use the coefficient 0.5 as defined in the paper.
83+
Use the coefficient defined in the assignment handout.
84+
'''
85+
###############################
86+
## TO DO
87+
self.content_loss = None
88+
###############################
89+
90+
def _gram_matrix(self, F, N, M):
91+
""" Create and return the gram matrix for tensor F
92+
Hint: you'll first have to reshape F
93+
"""
94+
###############################
95+
## TO DO
96+
return None
97+
###############################
98+
99+
def _single_style_loss(self, a, g):
100+
""" Calculate the style loss at a certain layer
101+
Inputs:
102+
a is the feature representation of the style image at that layer
103+
g is the feature representation of the generated image at that layer
104+
Output:
105+
the style loss at a certain layer (which is E_l in the paper)
106+
107+
Hint: 1. you'll have to use the function _gram_matrix()
108+
2. we'll use the same coefficient for style loss as in the paper
109+
3. a and g are feature representation, not gram matrices
110+
"""
111+
###############################
112+
## TO DO
113+
return None
114+
###############################
115+
116+
def _style_loss(self, A):
117+
""" Calculate the total style loss as a weighted sum
118+
of style losses at all style layers
119+
Hint: you'll have to use _single_style_loss()
120+
"""
121+
###############################
122+
## TO DO
123+
self.style_loss = None
124+
###############################
125+
126+
def losses(self):
127+
with tf.variable_scope('losses') as scope:
128+
with tf.Session() as sess:
129+
# assign content image to the input variable
130+
sess.run(self.input_img.assign(self.content_img))
131+
gen_img_content = getattr(self.vgg, self.content_layer)
132+
content_img_content = sess.run(gen_img_content)
133+
self._content_loss(content_img_content, gen_img_content)
134+
135+
with tf.Session() as sess:
136+
sess.run(self.input_img.assign(self.style_img))
137+
style_layers = sess.run([getattr(self.vgg, layer) for layer in self.style_layers])
138+
self._style_loss(style_layers)
139+
140+
##########################################
141+
## TO DO: create total loss.
142+
## Hint: don't forget the weights for the content loss and style loss
143+
self.total_loss = None
144+
##########################################
145+
146+
def optimize(self):
147+
###############################
148+
## TO DO: create optimizer
149+
self.opt = None
150+
###############################
151+
152+
def create_summary(self):
153+
###############################
154+
## TO DO: create summaries for all the losses
155+
## Hint: don't forget to merge them
156+
self.summary_op = None
157+
###############################
158+
159+
160+
def build(self):
161+
self.create_input()
162+
self.load_vgg()
163+
self.losses()
164+
self.optimize()
165+
self.create_summary()
166+
167+
def train(self, n_iters):
168+
skip_step = 1
169+
with tf.Session() as sess:
170+
171+
###############################
172+
## TO DO:
173+
## 1. initialize your variables
174+
## 2. create writer to write your grapp
175+
###############################
176+
177+
sess.run(self.input_img.assign(self.initial_img))
178+
179+
###############################
180+
## TO DO:
181+
## 1. create a saver object
182+
## 2. check if a checkpoint exists, restore the variables
183+
##############################
184+
185+
initial_step = self.gstep.eval()
186+
187+
start_time = time.time()
188+
for index in range(initial_step, n_iters):
189+
if index >= 5 and index < 20:
190+
skip_step = 10
191+
elif index >= 20:
192+
skip_step = 20
193+
194+
sess.run(self.opt)
195+
if (index + 1) % skip_step == 0:
196+
###############################
197+
## TO DO: obtain generated image, loss, and summary
198+
gen_image, total_loss, summary = None, None, None
199+
###############################
200+
201+
# add back the mean pixels we subtracted before
202+
gen_image = gen_image + self.vgg.mean_pixels
203+
writer.add_summary(summary, global_step=index)
204+
print('Step {}\n Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))
205+
print(' Loss: {:5.1f}'.format(total_loss))
206+
print(' Took: {} seconds'.format(time.time() - start_time))
207+
start_time = time.time()
208+
209+
filename = 'outputs/%d.png' % (index)
210+
utils.save_image(filename, gen_image)
211+
212+
if (index + 1) % 20 == 0:
213+
###############################
214+
## TO DO: save the variables into a checkpoint
215+
###############################
216+
pass
217+
218+
if __name__ == '__main__':
219+
setup()
220+
machine = StyleTransfer('content/deadpool.jpg', 'styles/guernica.jpg', 333, 250)
221+
machine.build()
222+
machine.train(300)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
""" Utils needed for the implementation in TensorFlow
2+
of the paper A Neural Algorithm of Artistic Style (Gatys et al., 2016)
3+
4+
Created by Chip Huyen (chiphuyen@cs.stanford.edu)
5+
CS20: "TensorFlow for Deep Learning Research"
6+
cs20.stanford.edu
7+
8+
For more details, please read the assignment handout:
9+
10+
"""
11+
12+
import os
13+
14+
from PIL import Image, ImageOps
15+
import numpy as np
16+
import scipy.misc
17+
from six.moves import urllib
18+
19+
def download(download_link, file_name, expected_bytes):
20+
""" Download the pretrained VGG-19 model if it's not already downloaded """
21+
if os.path.exists(file_name):
22+
print("VGG-19 pre-trained model is ready")
23+
return
24+
print("Downloading the VGG pre-trained model. This might take a while ...")
25+
file_name, _ = urllib.request.urlretrieve(download_link, file_name)
26+
file_stat = os.stat(file_name)
27+
if file_stat.st_size == expected_bytes:
28+
print('Successfully downloaded VGG-19 pre-trained model', file_name)
29+
else:
30+
raise Exception('File ' + file_name +
31+
' might be corrupted. You should try downloading it with a browser.')
32+
33+
def get_resized_image(img_path, width, height, save=True):
34+
image = Image.open(img_path)
35+
# PIL is column major so you have to swap the places of width and height
36+
image = ImageOps.fit(image, (width, height), Image.ANTIALIAS)
37+
if save:
38+
image_dirs = img_path.split('/')
39+
image_dirs[-1] = 'resized_' + image_dirs[-1]
40+
out_path = '/'.join(image_dirs)
41+
if not os.path.exists(out_path):
42+
image.save(out_path)
43+
image = np.asarray(image, np.float32)
44+
return np.expand_dims(image, 0)
45+
46+
def generate_noise_image(content_image, width, height, noise_ratio=0.6):
47+
noise_image = np.random.uniform(-20, 20, (1, height, width, 3)).astype(np.float32)
48+
return noise_image * noise_ratio + content_image * (1 - noise_ratio)
49+
50+
def save_image(path, image):
51+
image = image[0]
52+
image = np.clip(image, 0, 255).astype('uint8')
53+
scipy.misc.imsave(path, image)
54+
55+
def safe_mkdir(path):
56+
""" Create a directory if there isn't one already. """
57+
try:
58+
os.mkdir(path)
59+
except OSError:
60+
pass

0 commit comments

Comments
 (0)