自编码综合实现

常用方法

自编码常用方法:代替和级联

栈式自编码会将网络中的中间层作为下一个网络的输入进行训练,我们可以得到网络中每一个中间层的原始值,为了能有更好的效果,还可以使用级联的方式进一步优化网络参数。在已有的模型上接着优化参数的步骤习惯上成为“微调”。但是通常在大量已标注训练数据的情况下使用。在这样的情况下,微调能显著提升分类器性能。但如果有大量未标注数据,”微调“作用有限

去噪+栈式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets("/data/",one_hot=True)
train_X=mnist.train.images
train_Y=mnist.train.labels
test_X=mnist.train.images
test_Y=mnist.train.labels

n_input=784
n_hidden_1=256
n_hidden_2=128
n_classes=10

x=tf.placeholder("float",[None,n_input])
y=tf.placeholder("float",[None,n_input])
dropout_keep_prob=tf.placeholder("float")
l2x=tf.placeholder("float",[None,n_hidden_1])
l2y=tf.placeholder("float",[None,n_hidden_1])
l3x=tf.placeholder("float",[None,n_hidden_2])
l3y=tf.placeholder("float",[None,n_classes])

weights={
"h1":tf.Variable(tf.random_normal([n_input,n_hidden_1])),
"l1_h2":tf.Variable(tf.random_normal([n_hidden_1,n_hidden_1])),
"l1_out":tf.Variable(tf.random_normal([n_hidden_1,n_input])),

"l2_h1":tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
"l2_h2":tf.Variable(tf.random_normal([n_hidden_2,n_hidden_2])),
"l2_out":tf.Variable(tf.random_normal([n_hidden_2,n_hidden_1])),

"out":tf.Variable(tf.random_normal([n_hidden_2,n_classes]))
}
biases={
"b1":tf.Variable(tf.zeros([n_hidden_1])),
"l1_b2":tf.Variable(tf.zeros(n_hidden_1)),
"l1_out":tf.Variable(tf.zeros(n_input)),

"l2_b1":tf.Variable(tf.zeros([n_hidden_2])),
"l2_b2":tf.Variable(tf.zeros(n_hidden_2)),
"l2_out":tf.Variable(tf.zeros(n_hidden_1)),

"out":tf.Variable(tf.zeros(n_classes))
}

在这个例子中,要建立4个网络:每一层都用一个网格来训练训练,于是我们需要训练3个网格,最后再把训练好的各个层组合到一起,形成第4个网络。所以我们在上面代码中为每一层网络定义了占位符,接着定义了学习参数。

第一层网络结构

1
2
3
4
5
6
7
8
9
10
l1_out=tf.nn.sigmoid(tf.add(tf.matmul(x,weights["h1"]),biases["b1"]))
def noise_l1_autodecoder(layer_1,_weights,_biases,_keep_prob):
layer_1out=tf.nn.dropout(layer_1,_keep_prob)
layer_2=tf.nn.sigmoid(tf.add(tf.matmul(layer_1out,_weights["l1_h2"]),_biases["l1_b2"]))
layer_2out=tf.nn.dropout(layer_2,_keep_prob)
return tf.nn.sigmoid(tf.add(tf.matmul(layer_2out,_weights["l1_out"]),_biases["l1_out"]))

l1_reconstruction=noise_l1_autodecoder(l1_out,weights,biases,dropout_keep_prob)
l1_cost=tf.reduce_mean(tf.pow(l1_reconstruction-y,2))
l1_optm=tf.train.AdamOptimizer(0.01).minimize(l1_cost)

第二层网络结构

1
2
3
4
5
6
7
def l2_autodecoder(layer1_2,_weight,_biases):
layer1_2out=tf.nn.sigmoid(tf.add(tf.matmul(layer1_2,_weight["l2_h2"]),_biases["l2_b2"]))
return tf.nn.sigmoid(tf.matmul(layer1_2out,_weight["l2_out"])+_biases["l2_out"])
l2_out=tf.nn.sigmoid(tf.add(tf.matmul(l2x,weights["l2_h1"]),biases["l2_b1"]))
l2_reconstruction=l2_autodecoder(l2_out,weights,biases)
l2_cost=tf.reduce_mean(tf.pow(l2_reconstruction-l2y,2))
optm2=tf.train.AdamOptimizer(0.01).minimize(l2_cost)

第三层网络结构

1
2
3
l3_out = tf.matmul(l3x, weights['out']) + biases['out']
l3_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=l3_out, labels=l3y))
l3_optm = tf.train.AdamOptimizer(0.01).minimize(l3_cost)

定义级联网络结构

1
2
3
4
l1_l2out = tf.nn.sigmoid(tf.add(tf.matmul(l1_out, weights['l2_h1']), biases['l2_b1'])) 
pred = tf.matmul(l1_l2out, weights['out']) + biases['out']
cost3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=l3y))
optm3 = tf.train.AdamOptimizer(0.001).minimize(cost3)

第一层网络训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
epochs     = 50
batch_size = 100
disp_step = 10
load_epoch =49
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

print ("开始训练")
for epoch in range(epochs):
num_batch = int(mnist.train.num_examples/batch_size)
total_cost = 0.
for i in range(num_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
batch_xs_noisy = batch_xs + 0.3*np.random.randn(batch_size, 784)
feeds = {x: batch_xs_noisy, y: batch_xs, dropout_keep_prob: 0.5}
sess.run(l1_optm, feed_dict=feeds)
total_cost += sess.run(l1_cost, feed_dict=feeds)
if epoch % disp_step == 0:
print ("Epoch %02d/%02d average cost: %.6f"
% (epoch, epochs, total_cost/num_batch))
print ("完成")
#结果可视化
show_num = 10
test_noisy = mnist.test.images[:show_num] + 0.3*np.random.randn(show_num, 784)
encode_decode = sess.run(
l1_reconstruction, feed_dict={x: test_noisy, dropout_keep_prob: 1.})
f, a = plt.subplots(3, 10, figsize=(10, 3))
for i in range(show_num):
a[0][i].imshow(np.reshape(test_noisy[i], (28, 28)))
a[1][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[2][i].matshow(np.reshape(encode_decode[i], (28, 28)), cmap=plt.get_cmap('gray'))
plt.show()

第二层网络训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print ("开始训练")
for epoch in range(epochs):
num_batch = int(mnist.train.num_examples/batch_size)
total_cost = 0.
for i in range(num_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)

l1_h = sess.run(l1_out, feed_dict={x: batch_xs, y: batch_xs, dropout_keep_prob: 1.})
_,l2cost = sess.run([optm2,l2_cost], feed_dict={l2x: l1_h, l2y: l1_h })
total_cost += l2cost
if epoch % disp_step == 0:
print ("Epoch %02d/%02d average cost: %.6f"
% (epoch, epochs, total_cost/num_batch))
print(sess.run(weights['h1']))
print (weights['h1'].name)
print ("完成 layer_2 训练")
#结果可视化
show_num = 10
testvec = mnist.test.images[:show_num]
out1vec = sess.run(l1_out, feed_dict={x: testvec,y: testvec, dropout_keep_prob: 1.})
out2vec = sess.run(l2_reconstruction, feed_dict={l2x: out1vec})
f, a = plt.subplots(3, 10, figsize=(10, 3))
for i in range(show_num):
a[0][i].imshow(np.reshape(testvec[i], (28, 28)))
a[1][i].matshow(np.reshape(out1vec[i], (16, 16)), cmap=plt.get_cmap('gray'))
a[2][i].matshow(np.reshape(out2vec[i], (16, 16)), cmap=plt.get_cmap('gray'))
plt.show()

第三层网络训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print ("开始训练")
for epoch in range(epochs):
num_batch = int(mnist.train.num_examples/batch_size)
total_cost = 0.
for i in range(num_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
l1_h = sess.run(l1_out, feed_dict={x: batch_xs, y: batch_xs, dropout_keep_prob: 1.})
l2_h = sess.run(l2_out, feed_dict={l2x: l1_h, l2y: l1_h })
_,l3cost = sess.run([l3_optm,l3_cost], feed_dict={l3x: l2_h, l3y: batch_ys})
total_cost += l3cost
if epoch % disp_step == 0:
print ("Epoch %02d/%02d average cost: %.6f"
% (epoch, epochs, total_cost/num_batch))
print ("完成 layer_3 训练")

测试

1
2
3
4
5
# 测试 model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(l3y, 1))
# 计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, l3y: mnist.test.labels}))

可以看出,每层的训练参数叠在一起,网络会有比较好的表现

级联微调

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print ("开始训练")
for epoch in range(epochs):
num_batch = int(mnist.train.num_examples/batch_size)
total_cost = 0.
for i in range(num_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)

feeds = {x: batch_xs, l3y: batch_ys}
sess.run(optm3, feed_dict=feeds)
total_cost += sess.run(cost3, feed_dict=feeds)
if epoch % disp_step == 0:
print ("Epoch %02d/%02d average cost: %.6f"
% (epoch, epochs, total_cost/num_batch))

print ("完成 级联 训练")
# 测试 model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(l3y, 1))
# 计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, l3y: mnist.test.labels}))

从结果看,由于网络模型中各层的初始值都已经训练好了,所以一开始就是很低的错误率,且每次的迭代后,错误率都有很大幅度的下降。

----本文结束,感谢您的阅读。如有错,请指正。----
大哥大嫂过年好!支持我一下呗
0%