Tensorflow是目前最流行的深度學習框架,我們可以用它來搭建自己的卷積神經網絡并訓練自己的分類器,本文介紹怎樣使用Tensorflow構建自己的CNN,怎樣訓練用于簡單的驗證碼識別的分類器。本文假設你已經安裝好了Tensorflow,了解過CNN的一些知識。
下面將分步介紹怎樣獲得訓練數據,怎樣使用tensorflow構建卷積神經網絡,怎樣訓練,以及怎樣測試訓練出來的分類器
1. 準備訓練樣本
使用Python的庫captcha來生成我們需要的訓練樣本,代碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
import sys import os import shutil import random import time #captcha是用于生成驗證碼圖片的庫,可以 pip install captcha 來安裝它 from captcha.image import ImageCaptcha #用于生成驗證碼的字符集 CHAR_SET = [ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' ] #字符集的長度 CHAR_SET_LEN = 10 #驗證碼的長度,每個驗證碼由4個數字組成 CAPTCHA_LEN = 4 #驗證碼圖片的存放路徑 CAPTCHA_IMAGE_PATH = 'E:/Tensorflow/captcha/images/' #用于模型測試的驗證碼圖片的存放路徑,它里面的驗證碼圖片作為測試集 TEST_IMAGE_PATH = 'E:/Tensorflow/captcha/test/' #用于模型測試的驗證碼圖片的個數,從生成的驗證碼圖片中取出來放入測試集中 TEST_IMAGE_NUMBER = 50 #生成驗證碼圖片,4位的十進制數字可以有10000種驗證碼 def generate_captcha_image(charSet = CHAR_SET, charSetLen = CHAR_SET_LEN, captchaImgPath = CAPTCHA_IMAGE_PATH): k = 0 total = 1 for i in range (CAPTCHA_LEN): total * = charSetLen for i in range (charSetLen): for j in range (charSetLen): for m in range (charSetLen): for n in range (charSetLen): captcha_text = charSet[i] + charSet[j] + charSet[m] + charSet[n] image = ImageCaptcha() image.write(captcha_text, captchaImgPath + captcha_text + '.jpg' ) k + = 1 sys.stdout.write( "\rCreating %d/%d" % (k, total)) sys.stdout.flush() #從驗證碼的圖片集中取出一部分作為測試集,這些圖片不參加訓練,只用于模型的測試 def prepare_test_set(): fileNameList = [] for filePath in os.listdir(CAPTCHA_IMAGE_PATH): captcha_name = filePath.split( '/' )[ - 1 ] fileNameList.append(captcha_name) random.seed(time.time()) random.shuffle(fileNameList) for i in range (TEST_IMAGE_NUMBER): name = fileNameList[i] shutil.move(CAPTCHA_IMAGE_PATH + name, TEST_IMAGE_PATH + name) if __name__ = = '__main__' : generate_captcha_image(CHAR_SET, CHAR_SET_LEN, CAPTCHA_IMAGE_PATH) prepare_test_set() sys.stdout.write( "\nFinished" ) sys.stdout.flush() |
運行上面的代碼,可以生成驗證碼圖片,
生成的驗證碼圖片如下圖所示:
2. 構建CNN,訓練分類器
代碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
import tensorflow as tf import numpy as np from PIL import Image import os import random import time #驗證碼圖片的存放路徑 CAPTCHA_IMAGE_PATH = 'E:/Tensorflow/captcha/images/' #驗證碼圖片的寬度 CAPTCHA_IMAGE_WIDHT = 160 #驗證碼圖片的高度 CAPTCHA_IMAGE_HEIGHT = 60 CHAR_SET_LEN = 10 CAPTCHA_LEN = 4 #60%的驗證碼圖片放入訓練集中 TRAIN_IMAGE_PERCENT = 0.6 #訓練集,用于訓練的驗證碼圖片的文件名 TRAINING_IMAGE_NAME = [] #驗證集,用于模型驗證的驗證碼圖片的文件名 VALIDATION_IMAGE_NAME = [] #存放訓練好的模型的路徑 MODEL_SAVE_PATH = 'E:/Tensorflow/captcha/models/' def get_image_file_name(imgPath = CAPTCHA_IMAGE_PATH): fileName = [] total = 0 for filePath in os.listdir(imgPath): captcha_name = filePath.split( '/' )[ - 1 ] fileName.append(captcha_name) total + = 1 return fileName, total #將驗證碼轉換為訓練時用的標簽向量,維數是 40 #例如,如果驗證碼是 ‘0296' ,則對應的標簽是 # [1 0 0 0 0 0 0 0 0 0 # 0 0 1 0 0 0 0 0 0 0 # 0 0 0 0 0 0 0 0 0 1 # 0 0 0 0 0 0 1 0 0 0] def name2label(name): label = np.zeros(CAPTCHA_LEN * CHAR_SET_LEN) for i, c in enumerate (name): idx = i * CHAR_SET_LEN + ord (c) - ord ( '0' ) label[idx] = 1 return label #取得驗證碼圖片的數據以及它的標簽 def get_data_and_label(fileName, filePath = CAPTCHA_IMAGE_PATH): pathName = os.path.join(filePath, fileName) img = Image. open (pathName) #轉為灰度圖 img = img.convert( "L" ) image_array = np.array(img) image_data = image_array.flatten() / 255 image_label = name2label(fileName[ 0 :CAPTCHA_LEN]) return image_data, image_label #生成一個訓練batch def get_next_batch(batchSize = 32 , trainOrTest = 'train' , step = 0 ): batch_data = np.zeros([batchSize, CAPTCHA_IMAGE_WIDHT * CAPTCHA_IMAGE_HEIGHT]) batch_label = np.zeros([batchSize, CAPTCHA_LEN * CHAR_SET_LEN]) fileNameList = TRAINING_IMAGE_NAME if trainOrTest = = 'validate' : fileNameList = VALIDATION_IMAGE_NAME totalNumber = len (fileNameList) indexStart = step * batchSize for i in range (batchSize): index = (i + indexStart) % totalNumber name = fileNameList[index] img_data, img_label = get_data_and_label(name) batch_data[i, : ] = img_data batch_label[i, : ] = img_label return batch_data, batch_label #構建卷積神經網絡并訓練 def train_data_with_CNN(): #初始化權值 def weight_variable(shape, name = 'weight' ): init = tf.truncated_normal(shape, stddev = 0.1 ) var = tf.Variable(initial_value = init, name = name) return var #初始化偏置 def bias_variable(shape, name = 'bias' ): init = tf.constant( 0.1 , shape = shape) var = tf.Variable(init, name = name) return var #卷積 def conv2d(x, W, name = 'conv2d' ): return tf.nn.conv2d(x, W, strides = [ 1 , 1 , 1 , 1 ], padding = 'SAME' , name = name) #池化 def max_pool_2X2(x, name = 'maxpool' ): return tf.nn.max_pool(x, ksize = [ 1 , 2 , 2 , 1 ], strides = [ 1 , 2 , 2 , 1 ], padding = 'SAME' , name = name) #輸入層 #請注意 X 的 name,在測試model時會用到它 X = tf.placeholder(tf.float32, [ None , CAPTCHA_IMAGE_WIDHT * CAPTCHA_IMAGE_HEIGHT], name = 'data-input' ) Y = tf.placeholder(tf.float32, [ None , CAPTCHA_LEN * CHAR_SET_LEN], name = 'label-input' ) x_input = tf.reshape(X, [ - 1 , CAPTCHA_IMAGE_HEIGHT, CAPTCHA_IMAGE_WIDHT, 1 ], name = 'x-input' ) #dropout,防止過擬合 #請注意 keep_prob 的 name,在測試model時會用到它 keep_prob = tf.placeholder(tf.float32, name = 'keep-prob' ) #第一層卷積 W_conv1 = weight_variable([ 5 , 5 , 1 , 32 ], 'W_conv1' ) B_conv1 = bias_variable([ 32 ], 'B_conv1' ) conv1 = tf.nn.relu(conv2d(x_input, W_conv1, 'conv1' ) + B_conv1) conv1 = max_pool_2X2(conv1, 'conv1-pool' ) conv1 = tf.nn.dropout(conv1, keep_prob) #第二層卷積 W_conv2 = weight_variable([ 5 , 5 , 32 , 64 ], 'W_conv2' ) B_conv2 = bias_variable([ 64 ], 'B_conv2' ) conv2 = tf.nn.relu(conv2d(conv1, W_conv2, 'conv2' ) + B_conv2) conv2 = max_pool_2X2(conv2, 'conv2-pool' ) conv2 = tf.nn.dropout(conv2, keep_prob) #第三層卷積 W_conv3 = weight_variable([ 5 , 5 , 64 , 64 ], 'W_conv3' ) B_conv3 = bias_variable([ 64 ], 'B_conv3' ) conv3 = tf.nn.relu(conv2d(conv2, W_conv3, 'conv3' ) + B_conv3) conv3 = max_pool_2X2(conv3, 'conv3-pool' ) conv3 = tf.nn.dropout(conv3, keep_prob) #全鏈接層 #每次池化后,圖片的寬度和高度均縮小為原來的一半,進過上面的三次池化,寬度和高度均縮小8倍 W_fc1 = weight_variable([ 20 * 8 * 64 , 1024 ], 'W_fc1' ) B_fc1 = bias_variable([ 1024 ], 'B_fc1' ) fc1 = tf.reshape(conv3, [ - 1 , 20 * 8 * 64 ]) fc1 = tf.nn.relu(tf.add(tf.matmul(fc1, W_fc1), B_fc1)) fc1 = tf.nn.dropout(fc1, keep_prob) #輸出層 W_fc2 = weight_variable([ 1024 , CAPTCHA_LEN * CHAR_SET_LEN], 'W_fc2' ) B_fc2 = bias_variable([CAPTCHA_LEN * CHAR_SET_LEN], 'B_fc2' ) output = tf.add(tf.matmul(fc1, W_fc2), B_fc2, 'output' ) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = Y, logits = output)) optimizer = tf.train.AdamOptimizer( 0.001 ).minimize(loss) predict = tf.reshape(output, [ - 1 , CAPTCHA_LEN, CHAR_SET_LEN], name = 'predict' ) labels = tf.reshape(Y, [ - 1 , CAPTCHA_LEN, CHAR_SET_LEN], name = 'labels' ) #預測結果 #請注意 predict_max_idx 的 name,在測試model時會用到它 predict_max_idx = tf.argmax(predict, axis = 2 , name = 'predict_max_idx' ) labels_max_idx = tf.argmax(labels, axis = 2 , name = 'labels_max_idx' ) predict_correct_vec = tf.equal(predict_max_idx, labels_max_idx) accuracy = tf.reduce_mean(tf.cast(predict_correct_vec, tf.float32)) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) steps = 0 for epoch in range ( 6000 ): train_data, train_label = get_next_batch( 64 , 'train' , steps) sess.run(optimizer, feed_dict = {X : train_data, Y : train_label, keep_prob: 0.75 }) if steps % 100 = = 0 : test_data, test_label = get_next_batch( 100 , 'validate' , steps) acc = sess.run(accuracy, feed_dict = {X : test_data, Y : test_label, keep_prob: 1.0 }) print ( "steps=%d, accuracy=%f" % (steps, acc)) if acc > 0.99 : saver.save(sess, MODEL_SAVE_PATH + "crack_captcha.model" , global_step = steps) break steps + = 1 if __name__ = = '__main__' : image_filename_list, total = get_image_file_name(CAPTCHA_IMAGE_PATH) random.seed(time.time()) #打亂順序 random.shuffle(image_filename_list) trainImageNumber = int (total * TRAIN_IMAGE_PERCENT) #分成測試集 TRAINING_IMAGE_NAME = image_filename_list[ : trainImageNumber] #和驗證集 VALIDATION_IMAGE_NAME = image_filename_list[trainImageNumber : ] train_data_with_CNN() print ( 'Training finished' ) |
運行上面的代碼,開始訓練,訓練要花些時間,如果沒有GPU的話,會慢些,
訓練完后,輸出如下結果,經過4100次的迭代,訓練出來的分類器模型在驗證集上識別的準確率為99.5%
生成的模型文件如下,在模型測試時將用到這些文件
3. 測試模型
編寫代碼,對訓練出來的模型進行測試
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
import tensorflow as tf import numpy as np from PIL import Image import os import matplotlib.pyplot as plt CAPTCHA_LEN = 4 MODEL_SAVE_PATH = 'E:/Tensorflow/captcha/models/' TEST_IMAGE_PATH = 'E:/Tensorflow/captcha/test/' def get_image_data_and_name(fileName, filePath = TEST_IMAGE_PATH): pathName = os.path.join(filePath, fileName) img = Image. open (pathName) #轉為灰度圖 img = img.convert( "L" ) image_array = np.array(img) image_data = image_array.flatten() / 255 image_name = fileName[ 0 :CAPTCHA_LEN] return image_data, image_name def digitalStr2Array(digitalStr): digitalList = [] for c in digitalStr: digitalList.append( ord (c) - ord ( '0' )) return np.array(digitalList) def model_test(): nameList = [] for pathName in os.listdir(TEST_IMAGE_PATH): nameList.append(pathName.split( '/' )[ - 1 ]) totalNumber = len (nameList) #加載graph saver = tf.train.import_meta_graph(MODEL_SAVE_PATH + "crack_captcha.model-4100.meta" ) graph = tf.get_default_graph() #從graph取得 tensor,他們的name是在構建graph時定義的(查看上面第2步里的代碼) input_holder = graph.get_tensor_by_name( "data-input:0" ) keep_prob_holder = graph.get_tensor_by_name( "keep-prob:0" ) predict_max_idx = graph.get_tensor_by_name( "predict_max_idx:0" ) with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(MODEL_SAVE_PATH)) count = 0 for fileName in nameList: img_data, img_name = get_image_data_and_name(fileName, TEST_IMAGE_PATH) predict = sess.run(predict_max_idx, feed_dict = {input_holder:[img_data], keep_prob_holder : 1.0 }) filePathName = TEST_IMAGE_PATH + fileName print (filePathName) img = Image. open (filePathName) plt.imshow(img) plt.axis( 'off' ) plt.show() predictValue = np.squeeze(predict) rightValue = digitalStr2Array(img_name) if np.array_equal(predictValue, rightValue): result = '正確' count + = 1 else : result = '錯誤' print ( '實際值:{}, 預測值:{},測試結果:{}' . format (rightValue, predictValue, result)) print ( '\n' ) print ( '正確率:%.2f%%(%d/%d)' % (count * 100 / totalNumber, count, totalNumber)) if __name__ = = '__main__' : model_test() |
對模型的測試結果如下,在測試集上識別的準確率為 94%
下面是兩個識別錯誤的驗證碼
以上這篇利用Tensorflow構建和訓練自己的CNN來做簡單的驗證碼識別方式就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支持服務器之家。
原文鏈接:https://blog.csdn.net/maliao1123/article/details/79415828