Fleeting Day(VOL.10)

每日总结190501、02

AlexNet 微调

Finetuning AlexNet with TensorFlow

下载Kaggle上的Dogs vs. Cats Redux Competetion数据集。

解压相关文件后,编写程序生成数据集的txt文件:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os

train_sets_dir = os.path.join(os.getcwd(), 'train')
train_images_file = os.listdir(train_sets_dir)
train_sets_list = []

for fn in train_images_file:
file_label = fn.split('.')[0]

if file_label == 'cat':
label = '0'
else:
label = '1'

path_and_label = os.path.join(train_sets_dir, fn) + ' ' + label + '\n'
train_sets_list.append(path_and_label)

validate_sets_list = train_sets_list[int(len(train_sets_list)*0.85):] # 15%作为验证集
train_sets_list = train_sets_list[:int(len(train_sets_list)*0.85)]

train_text = open('train.txt', 'w') # 写入txt文件
for img in train_sets_list:
train_text.writelines(img)

validate_text = open('validate.txt', 'w') # 写入txt文件
for img in validate_sets_list:
validate_text.writelines(img)

导入数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 数据处理

IMAGENET_MEAN = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32) # 用于放缩范围

def parse_image(filename, label):
img_string = tf.read_file(filename)
img_decoded = tf.image.decode_png(img_string, channels=3)
img_resized = tf.image.resize_images(img_decoded, [227, 227])
img_converted = tf.cast(img_resized, tf.float32)
img_centered = tf.subtract(img_resized, IMAGENET_MEAN)

return img_centered, label

def data_generate(txt_file, batch_size, num_classes, shuffle=True):

paths_and_labels = np.loadtxt(txt_file, dtype=str).tolist() # 读取文件,组成列表

if shuffle:
np.random.shuffle(paths_and_labels) # 打乱

paths, labels = zip(*[(l[0], int(l[1])) for l in paths_and_labels]) # 将paths和labels分开
steps_per_epoch = np.ceil(len(labels)/batch_size).astype(np.int32)

paths = tf.convert_to_tensor(paths, dtype=tf.string) # 转换为tensor
labels = tf.one_hot(labels, num_classes)
labels = tf.convert_to_tensor(labels, dtype=tf.float32)

dataset = tf.data.Dataset.from_tensor_slices((paths, labels)) # 创建数据集
dataset = dataset.map(parse_image) # 调函数进行预处理

if shuffle:
dataset = dataset.shuffle(buffer_size=batch_size)

dataset = dataset.batch(batch_size) # 小批量


return dataset, steps_per_epoch

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 导入数据

train_file = 'train.txt'
validate_file = 'validate.txt'


learning_rate = 0.01 # 超参数
num_epochs = 10
batch_size = 256

num_classes = 2
train_layers = ['fc8', 'fc7', 'fc6']

train_data, train_steps = data_generate(train_file, batch_size=batch_size, num_classes=num_classes)
validate_data, validate_steps = data_generate(validate_file, batch_size=batch_size, num_classes=num_classes)

iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) # 迭代器
train_init = iterator.make_initializer(train_data)
validate_init = iterator.make_initializer(validate_data)

imgs, labels = iterator.get_next()

建模:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 建立模型

model = AlexNetModel(num_classes=num_classes, skip_layer=train_layers)

with tf.name_scope('cross_entropy'):
loss = model.loss(imgs, labels)

optimizer = model.optimize(learning_rate=learning_rate)

with tf.name_scope("accuracy"):
correct_pred = tf.equal(tf.argmax(model.score, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.float32))

tf.summary.scalar('cross_entropy', loss)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()

训练:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from datetime import datetime 

display_step = 20
writer = tf.summary.FileWriter('./graph')
saver = tf.train.Saver()

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
writer.add_graph(sess.graph)

model.load_original_weights(sess)

print("{} Start training...".format(datetime.now()))

for epoch in range(num_epochs):

sess.run(train_init)
total_loss = 0
n_batches = 0
total_acc = 0
try:
while True:
_, l, ac = sess.run([optimizer, loss, accuracy])
total_loss += l
total_acc += ac
n_batches += 1
except tf.errors.OutOfRangeError:
pass

print('Average loss epoch {0}: {1}'.format(epoch, total_loss/n_batches))

print("{} Training Accuracy = {:.4f}".format(datetime.now(), total_acc/len(train_sets_list)))

print("{} Start validation".format(datetime.now()))
sess.run(validate_init)
total_correct_preds = 0

try:
while True:
accuracy_batch = sess.run(accuracy)
total_correct_preds += accuracy_batch
except tf.errors.OutOfRangeError:
pass

print("{} Validation Accuracy = {:.4f}".format(datetime.now(), total_correct_preds/len(validate_sets_list)))

print("{} Saving checkpoint of model...".format(datetime.now()))

model_name = os.path.join(os.getcwd() + '/model', 'model_epoch'+str(epoch+1)+'.ckpt')
save_path = saver.save(sess, model_name)

print("{} Model checkpoint saved at {}".format(datetime.now(), model_name))

测试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
import pandas as pd

test_sets_dir = os.path.join(os.getcwd(), 'test')
test_images_file = os.listdir(test_sets_dir)
test_images_file.sort(key=lambda x:int(x[:-4]))

test_sets_list = []

for fn in test_images_file:
path = os.path.join(test_sets_dir, fn) + '\n'
test_sets_list.append(path)

test_text = open('test.txt', 'w') # 写入txt文件
for img in test_sets_list:
test_text.writelines(img)

IMAGENET_MEAN = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32) # 用于放缩范围

def parse_test_image(filename):
img_string = tf.read_file(filename)
img_decoded = tf.image.decode_png(img_string, channels=3)
img_resized = tf.image.resize_images(img_decoded, [227, 227])
img_converted = tf.cast(img_resized, tf.float32)
img_centered = tf.subtract(img_resized, IMAGENET_MEAN)

return img_centered

images_path = np.loadtxt('./test.txt', dtype=str).tolist()
images_path = tf.convert_to_tensor(images_path, dtype=tf.string)
test_dataset = tf.data.Dataset.from_tensor_slices((images_path))
test_dataset = test_dataset.map(parse_test_image)
test_dataset = test_dataset.batch(1000)
test_iterator = test_dataset.make_one_shot_iterator()
test_image = test_iterator.get_next()

model = AlexNetModel(num_classes=2)
score = model.inference(test_image)

predicts = []
saver=tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, '/content/model/model_epoch10.ckpt')

try:
while True:
scores = sess.run(score)
predicts.extend(tf.argmax(scores, 1).eval())
except tf.errors.OutOfRangeError:
pass

results = pd.Series(predicts, name="label")
submission = pd.concat([pd.Series(range(1,12501),name = "id"), results],axis = 1)
submission.to_csv("sample_submission.csv",index=False)

Summary

折腾了两天。
脑子一片空白,又感觉不到自己是活着的了。

文章作者: Hugsy
文章链接: http://binweber.top/2019/05/02/daily_190501/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Sky Inside the Eyewall
支付宝打赏~
微信打赏~