딥러닝의 정석 예제 코드 chapter 6 임베딩과 표상학습 autoencoder mnist

본 글은 딥러닝의 정석(Fundamentals of Deep Learning) 6장 임베딩과 표상학습의 autoencoder mnist 예제와 PCA와의 비교 예제 소스코드를 싣고 있다.

[Autoencoder mnist 소스 코드]

딥러닝의 정석의 예제 코드는 github에서 받을 수 있으나 몇몇 구문에서 오류가 발생하여 수정했다. 학습데이터는 MNIST가 사용되었으며, n_code는 책의 예제와 다르게 입력을 받지 않고, 2로 고정하였다.
 
<구현된 autoencoder 모델 그래프>

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python import control_flow_ops
import os,shutil
import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO
from IPython.display import clear_output, Image, display, HTML

tf.reset_default_graph()
tf.set_random_seed(777)
mnist = input_data.read_data_sets("../MNIST_data/",one_hot=True)

# Architecture
n_encoder_hidden_1 = 1000
n_encoder_hidden_2 = 500
n_encoder_hidden_3 = 250
n_decoder_hidden_1 = 250
n_decoder_hidden_2 = 500
n_decoder_hidden_3 = 1000

# Parameters
learning_rate= 0.001
training_epoches = 200
batch_size = 100
display_step = 1


def layer_batch_norm(x, n_out, phase_train):
    beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
    gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)

    beta = tf.get_variable("beta", [n_out], initializer=beta_init)
    gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)

    batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
    ema = tf.train.ExponentialMovingAverage(decay=0.9)
    ema_apply_op = ema.apply([batch_mean, batch_var])
    ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
    def mean_var_with_update():
        with tf.control_dependencies([ema_apply_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)
    mean, var = control_flow_ops.cond(phase_train,
        mean_var_with_update,
        lambda: (ema_mean, ema_var))

    reshaped_x = tf.reshape(x, [-1, 1, 1, n_out])
    normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var,
        beta, gamma, 1e-3, True)
    return tf.reshape(normed, [-1, n_out])

def layer(input, weight_shape, bias_shape, pahse_train):
    weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
    bias_init= tf.constant_initializer(value=0)
    W = tf.get_variable('W',weight_shape,initializer=weight_init)
    b = tf.get_variable('b',bias_shape,initializer=bias_init)
    logits = tf.matmul(input,W) + b
    return tf.nn.sigmoid(layer_batch_norm(logits,weight_shape[1],phase_train))
    #return tf.nn.sigmoid(logits)

def encoder(x, n_x, n_code, phase_train):
    with tf.variable_scope('encoder'):
        with tf.variable_scope('hidden_1'):
            hidden_1 = layer(x,[n_x,n_encoder_hidden_1],[n_encoder_hidden_1],phase_train)
        with tf.variable_scope('hidden_2'):
            hidden_2 = layer(hidden_1,[n_encoder_hidden_1,n_encoder_hidden_2],[n_encoder_hidden_2],phase_train)
        with tf.variable_scope('hidden_3'):
            hidden_3 = layer(hidden_2,[n_encoder_hidden_2,n_encoder_hidden_3],[n_encoder_hidden_3],phase_train)
        with tf.variable_scope('code'):
            code = layer(hidden_3,[n_encoder_hidden_3,n_code],[n_code],phase_train)
        
    return code

def decoder(code, n_code,n_out, phase_train):
    with tf.variable_scope('decoder'):
        with tf.variable_scope('hidden_1'):
            hidden_1 = layer(code,[n_code,n_decoder_hidden_1],[n_decoder_hidden_1],phase_train)
        with tf.variable_scope('hidden_2'):
            hidden_2 = layer(hidden_1,[n_decoder_hidden_1,n_decoder_hidden_2],[n_decoder_hidden_2],phase_train)
        with tf.variable_scope('hidden_3'):
            hidden_3 = layer(hidden_2,[n_decoder_hidden_2,n_decoder_hidden_3],[n_decoder_hidden_3],phase_train)
        with tf.variable_scope('out'):
            out = layer(hidden_3,[n_decoder_hidden_3,n_out],[n_out],phase_train)
        
    return out

def loss(output, x):
    l2 = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(output,x)),1))
    loss = tf.reduce_mean(l2)
    return loss

def training(cost, global_step):
    tf.summary.scalar('cost',cost)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,beta1=0.9,beta2=0.999,epsilon=1e-08,
                                       use_locking=False, name='Adam')
    train_op = optimizer.minimize(cost,global_step=global_step)
    return train_op

def image_summary(label, tensor):
    tensor_reshaped = tf.reshape(tensor,[-1,28,28,1])
    return tf.summary.image(label,tensor_reshaped)

def evaluate(output, x):
    image_summary('input_image',x)
    image_summary('out_image',output)
    l2 = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(output,x,name='val_diff')),1))
    val_loss = tf.reduce_mean(l2)
    tf.summary.scalar('val_cost',val_loss)
    return val_loss


if __name__ == '__main__':
    n_code = 2
    
    if os.path.exists('mnist_autoencoder_logs/'):
        shutil.rmtree('mnist_autoencoder_logs/',ignore_errors=True)
    
    with tf.Graph().as_default():
        
        with tf.variable_scope('autoencoder_model'):
            
            x= tf.placeholder(tf.float32,[None,784],name='input')
            phase_train = tf.placeholder(tf.bool,name='phase_train')
            
            code = encoder(x,784,int(n_code),phase_train)
            output = decoder(code,int(n_code),784,phase_train)
            
            cost = loss(output,x)

            global_step = tf.Variable(0,name='global_step',trainable=False)
            
            train_op = training(cost,global_step)
            eval_op = evaluate(output,x)
            
            summary_op = tf.summary.merge_all()
            saver = tf.train.Saver(max_to_keep=200)
            sess = tf.Session()
            summary_writer = tf.summary.FileWriter('mnist_autoencoder_logs/',graph_def=sess.graph_def)
            
            sess.run(tf.global_variables_initializer())
            
            # Training Cycle
            for epoch in range(training_epoches):
                avg_cost = 0
                total_batch= int(mnist.train.num_examples/batch_size)
                
                for i in range(total_batch):
                    minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
                    _, train_cost = sess.run([train_op,cost],feed_dict={x:minibatch_x,phase_train:True})
                    avg_cost += train_cost
                    
                avg_cost /= total_batch
                    
                if epoch % display_step == 0:
                    summary_str,val_loss = sess.run([summary_op,eval_op], feed_dict={x:mnist.validation.images,phase_train:False})
                    print('epoch:', epoch+1,' cost:',avg_cost,' validation loss:',val_loss)
                    summary_writer.add_summary(summary_str,sess.run(global_step))
                    saver.save(sess,'mnist_autoencoder_logs/model-checkpoint',global_step=global_step)
            
            print('Training Finished')
            
            test_loss = sess.run(eval_op, {x:mnist.test.images,phase_train:False})
            print('Test loss:', test_loss)


학습 결과는 아래와 같다.

epoch: 1  cost: 11.421402502926913  validation loss: 9.763931
epoch: 2  cost: 9.326198120117187  validation loss: 8.790714
…(중략)…
epoch: 198  cost: 4.9356590782512315  validation loss: 4.8779087
epoch: 199  cost: 4.927698595740578  validation loss: 4.9566693
epoch: 200  cost: 4.932985106381503  validation loss: 4.8705215
Training Finished
Test loss: 4.883771

Tesnsorboard로 본 학습 cost와 val_cost의 그래프다.
 


[PCA(Principal Component Analysis)와 autoencoder 비교 소스]

PCA를 사용할 때와 autoencoder를 사용할 때 어느 쪽이 embedding이 잘 이루어지는지 비교하기 위한 코드다. 

from sklearn import decomposition
from matplotlib import pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("../MNIST_data/",one_hot=False)
pca = decomposition.PCA(n_components=2)
pca.fit(mnist.train.images)
pca_codes= pca.transform(mnist.validation.images)

#MNIST 원본, PCA 재구성 이미지 출력
plt.imshow(mnist.validation.images[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()
pca_recon = pca.inverse_transform(pca_codes[:1])
plt.imshow(pca_recon[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()

plt.imshow(mnist.validation.images[1].reshape((28,28)),cmap=plt.cm.gray)
plt.show()
pca_recon = pca.inverse_transform(pca_codes[1:2])
plt.imshow(pca_recon[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()

plt.imshow(mnist.validation.images[2].reshape((28,28)),cmap=plt.cm.gray)
plt.show()
pca_recon = pca.inverse_transform(pca_codes[2:3])
plt.imshow(pca_recon[0].reshape((28,28)),cmap=plt.cm.gray)
plt.show()

autoencoder를 사용한 결과와 위 코드를 사용하여 나온 재구성 이미지를 비교하면 아래와 같다. PCA보다는 autoencoder를 사용한 embedding이 잘 이루어지는 것 같다.
 


관련 글:

댓글

이 블로그의 인기 게시물

간단한 cfar 알고리즘에 대해

windows에서 간단하게 크롬캐스트(Chromecast)를 통해 윈도우 화면 미러링 방법

쉽게 설명한 파티클 필터(particle filter) 동작 원리와 예제

바로 프로젝트 적용 가능한 FIR Filter (low/high/band pass filter )를 c나 python으로 만들기

python ctypes LoadLibrary로 windows dll 로드 및 함수 호출 예제