[机器学习笔记#5] Neural Style 分析及实验

实现的实验代码是这两天下班后顺着思路往下写的，比较丑陋，回头整理封装一下。有几个值得一提的细节： 1 )虽然标准的VGG16图像识别应用中输入图片尺寸被限制在了224x224，但在这里你可以想用多大用多大（因为forward pass不会传到FC layer，btw无法想象生成224x224的艺术图有卵用？）；2）原文使用的L-BFGS在tensorflow中并没有加入，所以使用AdamOptimizer取代；3）由于要对输入图片求导，所以不要用XXXXXnstant作为输入。。4）这玩意儿调参很麻烦

计算content feature的代码：

<code class="language-python">## compute content feature
with tf.Graph().as_default():
    image=tf.constant(content,dtype=tf.float32)
    with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points=vgg.vgg_16(image,spatial_squeeze=False)
    ## load vgg16 model    
    init_fn = slim.assign_from_checkpoint_fn(
    os.path.join('./model/', 'vgg_16.ckpt'),
    slim.get_model_variables())
    ## compute content Feature
    content_feature=tf.reshape(end_points['vgg_16/conv4/conv4_2'],shape=[-1])
    with tf.Session() as sess:
        init_fn(sess)
        content_ft_val=sess.run(content_feature)    
</code>

计算style feature的代码（用了许多hard-code）：

<code class="language-python">style_list=['vgg_16/conv1/conv1_1',
            'vgg_16/conv2/conv2_1',
            'vgg_16/conv3/conv3_1',
            'vgg_16/conv4/conv4_1',
            'vgg_16/conv5/conv5_1']
## compute style feature
def style_gram_compute(idx):
    dim=end_points[style_list[idx]].get_shape().as_list()
    style_feature=tf.reshape(end_points[style_list[idx]],shape=[-1,dim[-1]])
    style_gram=tf.reshape(
tf.matmul(style_feature,style_feature,transpose_a=True),
shape=[-1])/(dim[1]*dim[2])
    return style_gram,dim

## compute style feature
with tf.Graph().as_default():
    image=tf.constant(style,dtype=tf.float32)
    with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points=vgg.vgg_16(image,spatial_squeeze=False)
    ## load vgg16 model    
    init_fn = slim.assign_from_checkpoint_fn(
    os.path.join('./model/', 'vgg_16.ckpt'),
    slim.get_model_variables())
    ## compute content Feature
    style_gram_1,dim1=style_gram_compute(0)
    style_gram_2,dim2=style_gram_compute(1)
    style_gram_3,dim3=style_gram_compute(2)
    style_gram_4,dim4=style_gram_compute(3)
    style_gram_5,dim5=style_gram_compute(4)
    with tf.Session() as sess:
        init_fn(sess)
        style_gram_val_1=sess.run(style_gram_1)    
        style_gram_val_2=sess.run(style_gram_2)
        style_gram_val_3=sess.run(style_gram_3)
        style_gram_val_4=sess.run(style_gram_4)
        style_gram_val_5=sess.run(style_gram_5)
</code>

主优化程序：

<code class="language-python">def style_loss_compute(style_gram_val,target_style_gram):
    style_gram_const=tf.constant(style_gram_val)
    style_loss=tf.reduce_mean(tf.squared_difference(target_style_gram,style_gram_const))
    return style_loss

with tf.Graph().as_default():
    ### generate target image ####
    image=tf.Variable(tf.random_normal(shape=(1,h,w,3),stddev=np.std(content)*0.1,dtype=tf.float32))
    with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points=vgg.vgg_16(image,spatial_squeeze=False)
    ## load vgg16 model    
    init_fn = slim.assign_from_checkpoint_fn(
    os.path.join('./model/', 'vgg_16.ckpt'),
    slim.get_model_variables())
    ## compute target content Feature
    target_content_feature=tf.reshape(end_points['vgg_16/conv4/conv4_2'],shape=[-1])
    ## compute target style Feature
    target_style_gram_1,_=style_gram_compute(0)
    target_style_gram_2,_=style_gram_compute(1)
    target_style_gram_3,_=style_gram_compute(2)
    target_style_gram_4,_=style_gram_compute(3)
    target_style_gram_5,_=style_gram_compute(4)
    ## define content loss
    content_loss=tf.reduce_sum(tf.squared_difference(target_content_feature,content_ft_val))
    ## define style loss
    style_loss_1=style_loss_compute(style_gram_val_1,target_style_gram_1)
    style_loss_2=style_loss_compute(style_gram_val_2,target_style_gram_2)
    style_loss_3=style_loss_compute(style_gram_val_3,target_style_gram_3)
    style_loss_4=style_loss_compute(style_gram_val_4,target_style_gram_4)
    style_loss_5=style_loss_compute(style_gram_val_5,target_style_gram_5)
    w0=1.0/5
    style_loss=w0*style_loss_1+w0*style_loss_2+w0*style_loss_3+w0*style_loss_4+w0*style_loss_5
    ## define total loss
    total_loss=1e-3*content_loss+style_loss
    ## define train step
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 1.
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           100, 0.95, staircase=True)
    train_op=tf.train.AdamOptimizer(learning_rate).minimize(loss=total_loss,
                                                            var_list=[image],
                                                            global_step=global_step)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        init_fn(sess)
        max_iter=1000
        for iter in range(max_iter):
            _,loss_content_val,loss_style_val=sess.run([train_op,content_loss,style_loss]) 
            sys.stdout.write('\r'+'iteration:%4d-----Loss_content: %0f6-----Loss_style: %0f6-----learning_rate: %0f5'
                             %(iter,loss_content_val,loss_style_val,learning_rate.eval()))
            output=image.eval()
</code>