딥러닝의 정석 예제 코드 chapter 6 임베딩과 표상학습 autoencoder mnist

본 글은 딥러닝의 정석(Fundamentals of Deep Learning) 6장 임베딩과 표상학습의 autoencoder mnist 예제와 PCA와의 비교 예제 소스코드를 싣고 있다.

[Autoencoder mnist 소스 코드]

딥러닝의 정석의 예제 코드는 github에서 받을 수 있으나 몇몇 구문에서 오류가 발생하여 수정했다. 학습데이터는 MNIST가 사용되었으며, n_code는 책의 예제와 다르게 입력을 받지 않고, 2로 고정하였다.
 
<구현된 autoencoder 모델 그래프>

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python import control_flow_ops
import os,shutil
import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO
from IPython.display import clear_output, Image, display, HTML

tf.reset_default_graph()
tf.set_random_seed(777)
mnist = input_data.read_data_sets("../MNIST_data/",one_hot=True)

# Architecture
n_encoder_hidden_1 = 1000
n_encoder_hidden_2 = 500
n_encoder_hidden_3 = 250
n_decoder_hidden_1 = 250
n_decoder_hidden_2 = 500
n_decoder_hidden_3 = 1000

# Parameters
learning_rate= 0.001
training_epoches = 200
batch_size = 100
display_step = 1


def layer_batch_norm(x, n_out, phase_train):
    beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
    gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)

    beta = tf.get_variable("beta", [n_out], initializer=beta_init)
    gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)

    batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
    ema = tf.train.ExponentialMovingAverage(decay=0.9)
    ema_apply_op = ema.apply([batch_mean, batch_var])
    ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
    def mean_var_with_update():
        with tf.control_dependencies([ema_apply_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)
    mean, var = control_flow_ops.cond(phase_train,
        mean_var_with_update,
        lambda: (ema_mean, ema_var))

    reshaped_x = tf.reshape(x, [-1, 1, 1, n_out])
    normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var,
        beta, gamma, 1e-3, True)
    return tf.reshape(normed, [-1, n_out])

def layer(input, weight_shape, bias_shape, pahse_train):
    weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
    bias_init= tf.constant_initializer(value=0)
    W = tf.get_variable('W',weight_shape,initializer=weight_init)
    b = tf.get_variable('b',bias_shape,initializer=bias_init)
    logits = tf.matmul(input,W) + b
    return tf.nn.sigmoid(layer_batch_norm(logits,weight_shape[1],phase_train))
    #return tf.nn.sigmoid(logits)

def encoder(x, n_x, n_code, phase_train):
    with tf.variable_scope('encoder'):
        with tf.variable_scope('hidden_1'):
            hidden_1 = layer(x,[n_x,n_encoder_hidden_1],[n_encoder_hidden_1],phase_train)
        with tf.variable_scope('hidden_2'):
            hidden_2 = layer(hidden_1,[n_encoder_hidden_1,n_encoder_hidden_2],[n_encoder_hidden_2],phase_train)
        with tf.variable_scope('hidden_3'):
            hidden_3 = layer(hidden_2,[n_encoder_hidden_2,n_encoder_hidden_3],[n_encoder_hidden_3],phase_train)
        with tf.variable_scope('code'):
            code = layer(hidden_3,[n_encoder_hidden_3,n_code],[n_code],phase_train)
        
    return code

def decoder(code, n_code,n_out, phase_train):
    with tf.variable_scope('decoder'):
        with tf.variable_scope('hidden_1'):
            hidden_1 = layer(code,[n_code,n_decoder_hidden_1],[n_decoder_hidden_1],phase_train)
        with tf.variable_scope('hidden_2'):
            hidden_2 = layer(hidden_1,[n_decoder_hidden_1,n_decoder_hidden_2],[n_decoder_hidden_2],phase_train)
        with tf.variable_scope('hidden_3'):
            hidden_3 = layer(hidden_2,[n_decoder_hidden_2,n_decoder_hidden_3],[n_decoder_hidden_3],phase_train)
        with tf.variable_scope('out'):
            out = layer(hidden_3,[n_decoder_hidden_3,n_out],[n_out],phase_train)
        
    return out

def loss(output, x):
    l2 = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(output,x)),1))
    loss = tf.reduce_mean(l2)
    return loss

def training(cost, global_step):
    tf.summary.scalar('cost',cost)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,beta1=0.9,beta2=0.999,epsilon=1e-08,
                                       use_locking=False, name='Adam')
    train_op = optimizer.minimize(cost,global_step=global_step)
    return train_op

def image_summary(label, tensor):
    tensor_reshaped = tf.reshape(tensor,[-1,28,28,1])
    return tf.summary.image(label,tensor_reshaped)

def evaluate(output, x):
    image_summary('input_image',x)
    image_summary('out_image',output)
    l2 = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(output,x,name='val_diff')),1))
    val_loss = tf.reduce_mean(l2)
    tf.summary.scalar('val_cost',val_loss)
    return val_loss


if __name__ == '__main__':
    n_code = 2
    
    if os.path.exists('mnist_autoencoder_logs/'):
        shutil.rmtree('mnist_autoencoder_logs/',ignore_errors=True)
    
    with tf.Graph().as_default():
        
        with tf.variable_scope('autoencoder_model'):
            
            x= tf.placeholder(tf.float32,[None,784],name='input')
            phase_train = tf.placeholder(tf.bool,name='phase_train')
            
            code = encoder(x,784,int(n_code),phase_train)
            output = decoder(code,int(n_code),784,phase_train)
            
            cost = loss(output,x)

            global_step = tf.Variable(0,name='global_step',trainable=False)
            
            train_op = training(cost,global_step)
            eval_op = evaluate(output,x)
            
            summary_op = tf.summary.merge_all()
            saver = tf.train.Saver(max_to_keep=200)
            sess = tf.Session()
            summary_writer = tf.summary.FileWriter('mnist_autoencoder_logs/',graph_def=sess.graph_def)
            
            sess.run(tf.global_variables_initializer())
            
            # Training Cycle
            for epoch in range(training_epoches):
                avg_cost = 0
                total_batch= int(mnist.train.num_examples/batch_size)
                
                for i in range(total_batch):
                    minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
                    _, train_cost = sess.run([train_op,cost],feed_dict={x:minibatch_x,phase_train:True})
                    avg_cost += train_cost
                    
                avg_cost /= total_batch
                    
                if epoch % display_step == 0:
                    summary_str,val_loss = sess.run([summary_op,eval_op], feed_dict={x:mnist.validation.images,phase_train:False})
                    print('epoch:', epoch+1,' cost:',avg_cost,' validation loss:',val_loss)
                    summary_writer.add_summary(summary_str,sess.run(global_step))
                    saver.save(sess,'mnist_autoencoder_logs/model-checkpoint',global_step=global_step)
            
            print('Training Finished')
            
            test_loss = sess.run(eval_op, {x:mnist.test.images,phase_train:False})
            print('Test loss:', test_loss)


학습 결과는 아래와 같다.

epoch: 1  cost: 11.421402502926913  validation loss: 9.763931
epoch: 2  cost: 9.326198120117187  validation loss: 8.790714
…(중략)…
epoch: 198  cost: 4.9356590782512315  validation loss: 4.8779087
epoch: 199  cost: 4.927698595740578  validation loss: 4.9566693
epoch: 200  cost: 4.932985106381503  validation loss: 4.8705215
Training Finished
Test loss: 4.883771

Tesnsorboard로 본 학습 cost와 val_cost의 그래프다.
 


[PCA(Principal Component Analysis)와 autoencoder 비교 소스]

PCA를 사용할 때와 autoencoder를 사용할 때 어느 쪽이 embedding이 잘 이루어지는지 비교하기 위한 코드다. 

from sklearn import decomposition
from matplotlib import pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("../MNIST_data/",one_hot=False)
pca = decomposition.PCA(n_components=2)
pca.fit(mnist.train.images)
pca_codes= pca.transform(mnist.validation.images)

#MNIST 원본, PCA 재구성 이미지 출력
plt.imshow(mnist.validation.images[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()
pca_recon = pca.inverse_transform(pca_codes[:1])
plt.imshow(pca_recon[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()

plt.imshow(mnist.validation.images[1].reshape((28,28)),cmap=plt.cm.gray)
plt.show()
pca_recon = pca.inverse_transform(pca_codes[1:2])
plt.imshow(pca_recon[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()

plt.imshow(mnist.validation.images[2].reshape((28,28)),cmap=plt.cm.gray)
plt.show()
pca_recon = pca.inverse_transform(pca_codes[2:3])
plt.imshow(pca_recon[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()

autoencoder를 사용한 결과와 위 코드를 사용하여 나온 재구성 이미지를 비교하면 아래와 같다. PCA보다는 autoencoder를 사용한 embedding이 잘 이루어지는 것 같다.
 


관련 글:

댓글

이 블로그의 인기 게시물

간단한 cfar 알고리즘에 대해

쉽게 설명한 파티클 필터(particle filter) 동작 원리와 예제

아두이노(arduino) 심박센서 (heart rate sensor) 심박수 측정 example code

리눅스 디바이스 드라이버 기초와 예제

windows에서 간단하게 크롬캐스트(Chromecast)를 통해 윈도우 화면 미러링 방법