自编码网络介绍

深度学习领域主要有两种训练模式:一种是监督学习,另一种是非监督学习。前者有样本有标签,后者只有样本。此外还有半监督学习,半监督学习属于非监督学习领域

相对于监督学习来说,非监督学习就显得简单得多。非监督学习能让网络直接使用样本进行训练,不需要准备标签。接下来我们就来学习一个非监督模型的网络——自编码网络

介绍

人们看一张图时,一般是扫一眼物体,大致会得到图片的诸多特征。而计算机是逐个元素去读,怎么让计算机也具有人类的能力呢?用自编码网络

自编码网络是非监督学习领域中的一种,可以自动从无标注的数据中学习特征,是一种以重构输入信号为目标的神经网络,它可以给出比原始数据更好的特征描述,具有较强的特征学习能力,在深度学习中常用自编码网络生成的特征来取代原始数据,以得到更好的结果

自编码(AE)网络是输入等于输出的网络,最基本的模型可以是为三层神经网络,也就是输入层、隐藏层、输出层。其中,输入层的样本也会充当输出层的标签。即:这个神经网络就是一种尽可能复现输入信号的神经网络。

在上图中,自编码器要求输出尽可能等于输入,并且其隐藏层必须满足一定的稀疏性,是通过将隐藏层中的后一层个数比前一层神经元个数少的方式来实现稀疏效果的。相当于隐藏层对输入进行了压缩,并在输出层中解压缩。整个过程是一定会丢失信息的,但训练能够使丢失的信息尽量减少,最大化地保留其主要特征。这样的自动编码器可以捕捉代表输入数据的最重要的因素,类似PCA算法。

代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets("/data/",one_hot=True)
learning_rate=0.01
#第一层隐藏层节点
n_hidden_1=256
#第二层隐藏层节点
n_hidden_2=128
n_input=784 #28*28
x=tf.placeholder("float",[None,n_input])
y=x
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.zeros([n_hidden_1])),
'encoder_b2': tf.Variable(tf.zeros([n_hidden_2])),
'decoder_b1': tf.Variable(tf.zeros([n_hidden_1])),
'decoder_b2': tf.Variable(tf.zeros([n_input])),
}
#编码
def encoder(x):
layer_1=tf.nn.sigmoid(tf.add(tf.matmul(x,weights["encoder_h1"]),biases["encoder_b1"]))
layer_2=tf.nn.sigmoid(tf.add(tf.matmul(layer_1,weights["encoder_h2"]),biases["encoder_b2"]))
return layer_2
#解码
def decoder(x):
layer_1=tf.nn.sigmoid(tf.add(tf.matmul(x,weights["decoder_h1"]),biases["decoder_b1"]))
layer_2=tf.nn.sigmoid(tf.add(tf.matmul(layer_1,weights["decoder_h2"]),biases["decoder_b2"]))
return layer_2
encoder_out=encoder(x)
pred=decoder(encoder_out)
cost = tf.reduce_mean(tf.pow(y - pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# 训练参数
training_epochs = 100
batch_size = 256
display_step = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_batch = int(mnist.train.num_examples/batch_size)
for epoch in range(training_epochs):
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)#取数据
_, c = sess.run([optimizer, cost], feed_dict={x: batch_xs})# 训练模型
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),"cost=", "{:.9f}".format(c))
print("完成!")
# 测试
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# 计算错误率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", 1-accuracy.eval({x: mnist.test.images, y: mnist.test.images}))

show_num = 10
reconstruction = sess.run(
pred, feed_dict={x: mnist.test.images[:show_num]})
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(show_num):
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[1][i].imshow(np.reshape(reconstruction[i], (28, 28)))
plt.show()

线性解码器

在上面的代码中,使用的激活函数是sigmoid,这是一个S型激活函数,输出范围是[0,1]。当我们对最终提取的特征节点采用该激活函数时,就相当于对输入限制或缩放,使其位于[0,1]范围中。有一些数据集,比如MNIST,能方便地将输出缩放到[0,1]中,但是很难满足对输入值的要求。利用一个恒等式来作为激活函数,就可以解决这个问题,f(z)=z,即:没有激活函数。

注意:这个只是对最后的输出层而言,隐藏层要使用激活函数

由多个带有S型激活函数的隐藏层及一个线性输出层构成的自编码器,成为线性解码器

自编码的卷积网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/data/", one_hot=True)
learning_rate = 0.01
n_hidden_1 = 256
n_hidden_2 = 64
n_hidden_3 = 16
n_hidden_4 = 2
n_input = 784

x = tf.placeholder("float", [None,n_input])
y=x
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],)),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],)),
'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3],)),
'encoder_h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4],)),

'decoder_h1': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_3],)),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_2],)),
'decoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1],)),
'decoder_h4': tf.Variable(tf.random_normal([n_hidden_1, n_input],)),
}
biases = {
'encoder_b1': tf.Variable(tf.zeros([n_hidden_1])),
'encoder_b2': tf.Variable(tf.zeros([n_hidden_2])),
'encoder_b3': tf.Variable(tf.zeros([n_hidden_3])),
'encoder_b4': tf.Variable(tf.zeros([n_hidden_4])),

'decoder_b1': tf.Variable(tf.zeros([n_hidden_3])),
'decoder_b2': tf.Variable(tf.zeros([n_hidden_2])),
'decoder_b3': tf.Variable(tf.zeros([n_hidden_1])),
'decoder_b4': tf.Variable(tf.zeros([n_input])),
}
def encoder(x):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),biases['encoder_b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),biases['encoder_b2']))
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['encoder_h3']),biases['encoder_b3']))
layer_4 = tf.add(tf.matmul(layer_3, weights['encoder_h4']),biases['encoder_b4'])
return layer_4
def decoder(x):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),biases['decoder_b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['decoder_h3']),biases['decoder_b3']))
layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, weights['decoder_h4']),biases['decoder_b4']))
return layer_4

encoder_op = encoder(x)
y_pred = decoder(encoder_op)
cost = tf.reduce_mean(tf.pow(y - y_pred, 2))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
training_epochs = 100
batch_size = 256
display_step = 10

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_batch = int(mnist.train.num_examples/batch_size)
for epoch in range(training_epochs):
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),"cost=", "{:.9f}".format(c))
print("完成!")
show_num = 10
encode_decode = sess.run(
y_pred, feed_dict={x: mnist.test.images[:show_num]})
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(show_num):
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
plt.show()

如果读者得到更好的特征提取效果,可以将压缩的层数变得更多,但是由于sigmoid函数的缺陷,无法使用更深的层,所以只能做成4层压缩。这有一个解决办法——使用栈式自编码器

使用自编码的卷积网络

自编码结构不仅只用在全连接网络上,还可用在卷积网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/data/", one_hot=True)
#最大池化
def max_pool_with_argmax(net, stride):
_, mask = tf.nn.max_pool_with_argmax( net,ksize=[1, stride, stride, 1], strides=[1, stride, stride, 1],padding='SAME')
mask = tf.stop_gradient(mask)
net = tf.nn.max_pool(net, ksize=[1, stride, stride, 1],strides=[1, stride, stride, 1], padding='SAME')
return net, mask
#4*4----2*2--=2*2 【6,8,12,16】
#反池化
def unpool(net, mask, stride):
ksize = [1, stride, stride, 1]
input_shape = net.get_shape().as_list()
output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])

one_like_mask = tf.ones_like(mask)
batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int64), shape=[input_shape[0], 1, 1, 1])
b = one_like_mask * batch_range
y = mask // (output_shape[2] * output_shape[3])
x = mask % (output_shape[2] * output_shape[3]) // output_shape[3]
feature_range = tf.range(output_shape[3], dtype=tf.int64)
f = one_like_mask * feature_range

updates_size = tf.size(net)
indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
values = tf.reshape(net, [updates_size])
ret = tf.scatter_nd(indices, values, output_shape)
return ret



def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')

learning_rate = 0.01
n_conv_1 = 16
n_conv_2 = 32
n_input = 784
batchsize = 50

x = tf.placeholder("float", [batchsize, n_input])

x_image = tf.reshape(x, [-1,28,28,1])

def encoder(x):
h_conv1 = tf.nn.relu(conv2d(x, weights['encoder_conv1']) + biases['encoder_conv1'])
h_conv2 = tf.nn.relu(conv2d(h_conv1, weights['encoder_conv2']) + biases['encoder_conv2'])
return h_conv2,h_conv1

def decoder(x,conv1):
t_conv1 = tf.nn.conv2d_transpose(x-biases['decoder_conv2'], weights['decoder_conv2'], conv1.shape,[1,1,1,1])
t_x_image = tf.nn.conv2d_transpose(t_conv1-biases['decoder_conv1'], weights['decoder_conv1'], x_image.shape,[1,1,1,1])
return t_x_image

weights = {
'encoder_conv1': tf.Variable(tf.truncated_normal([5, 5, 1, n_conv_1],stddev=0.1)),
'encoder_conv2': tf.Variable(tf.random_normal([3, 3, n_conv_1, n_conv_2],stddev=0.1)),
'decoder_conv1': tf.Variable(tf.random_normal([5, 5, 1, n_conv_1],stddev=0.1)),
'decoder_conv2': tf.Variable(tf.random_normal([3, 3, n_conv_1, n_conv_2],stddev=0.1))
}
biases = {
'encoder_conv1': tf.Variable(tf.zeros([n_conv_1])),
'encoder_conv2': tf.Variable(tf.zeros([n_conv_2])),
'decoder_conv1': tf.Variable(tf.zeros([n_conv_1])),
'decoder_conv2': tf.Variable(tf.zeros([n_conv_2])),
}

encoder_out,conv1 = encoder(x_image)
h_pool2, mask = max_pool_with_argmax(encoder_out, 2)

h_upool = unpool(h_pool2, mask, 2)
pred = decoder(h_upool,conv1)

cost = tf.reduce_mean(tf.pow(x_image - pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)

training_epochs = 20

display_step = 5

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_batch = int(mnist.train.num_examples/batchsize)
for epoch in range(training_epochs):
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batchsize)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_xs})
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),"cost=", "{:.9f}".format(c))

print("完成!")
batch_xs, batch_ys = mnist.train.next_batch(batchsize)
print ("Error:", cost.eval({x: batch_xs}))
show_num = 10
reconstruction = sess.run(
pred, feed_dict={x: batch_xs})
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(show_num):
a[0][i].imshow(np.reshape(batch_xs[i], (28, 28)))
a[1][i].imshow(np.reshape(reconstruction[i], (28, 28)))
plt.show()
----本文结束,感谢您的阅读。如有错,请指正。----
大哥大嫂过年好!支持我一下呗
0%