由于Rosenblatt感知器的局限性,對于非線性分類的效果不理想。為了對線性分類無法區分的數據進行分類,需要構建多層感知器結構對數據進行分類,多層感知器結構如下:
該網絡由輸入層,隱藏層,和輸出層構成,能表示種類繁多的非線性曲面,每一個隱藏層都有一個激活函數,將該單元的輸入數據與權值相乘后得到的值(即誘導局部域)經過激活函數,激活函數的輸出值作為該單元的輸出,激活函數類似與硬限幅函數,但硬限幅函數在閾值處是不可導的,而激活函數處處可導。本次程序中使用的激活函數是tanh函數,公式如下:
tanh函數的圖像如下:
程序中具體的tanh函數形式如下:
就是神經元j的誘導局部域
它的局部梯度分兩種情況:
(1)神經元j沒有位于隱藏層:
(2)神經元j位于隱藏層:
其中k是單元j后面相連的所有的單元。
局部梯度得到之后,根據增量梯度下降法的權值更新法則
即可得到下一次的權值w,經過若干次迭代,設定誤差條件,即可找到權值空間的最小值。
python程序如下,為了能夠可視化,訓練數據采用二維數據,每一個隱藏層有8個節點,設置了7個隱藏層,一個輸出層,輸出層有2個單元:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
|
import numpy as np import random import copy import matplotlib.pyplot as plt #x和d樣本初始化 train_x = [[ 1 , 6 ],[ 3 , 12 ],[ 3 , 9 ],[ 3 , 21 ],[ 2 , 16 ],[ 3 , 15 ]] d = [[ 1 , 0 ],[ 1 , 0 ],[ 0 , 1 ],[ 0 , 1 ],[ 1 , 0 ],[ 0 , 1 ]] warray_txn = len (train_x[ 0 ]) warray_n = warray_txn * 4 #基本參數初始化 oldmse = 10 * * 100 fh = 1 maxtrycount = 500 mycount = 0.0 if maxtrycount> = 20 : r = maxtrycount / 5 else : r = maxtrycount / 2 #sigmoid函數 ann_sigfun = None ann_delta_sigfun = None #總層數初始化,比非線性導數多一層線性層 alllevel_count = warray_txn * 4 # 非線性層數初始化 hidelevel_count = alllevel_count - 1 #學習率參數 learn_r0 = 0.002 learn_r = learn_r0 #動量參數 train_a0 = learn_r0 * 1.2 train_a = train_a0 expect_e = 0.05 #對輸入數據進行預處理 ann_max = [] for m_ani in xrange ( 0 ,warray_txn): #找出訓練數據中每一項的最大值 temp_x = np.array(train_x) ann_max.append(np. max (temp_x[:,m_ani])) ann_max = np.array(ann_max) def getnowsx(mysx,in_w): '''''生成本次的擴維輸入數據 ''' '''''mysx==>輸入數據,in_w==>權值矩陣,每一列為一個神經元的權值向量''' global warray_n mysx = np.array(mysx) x_end = [] for i in xrange ( 0 ,warray_n): x_end.append(np.dot(mysx,in_w[:,i])) return x_end def get_inlw(my_train_max,w_count,myin_x): '''''找出權值矩陣均值接近0,輸出結果方差接近1的權值矩陣''' #對隨機生成的多個權值進行優化選擇,選擇最優的權值 global warray_txn global warray_n mylw = [] y_in = [] #生成測試權值 mylw = np.random.rand(w_count,warray_txn,warray_n) for ii in xrange ( 0 ,warray_txn): mylw[:,ii,:] = mylw[:,ii,:] * 1 / float (my_train_max[ii]) - 1 / float (my_train_max[ii]) * 0.5 #計算輸出 for i in xrange ( 0 ,w_count): y_in.append([]) for xj in xrange ( 0 , len (myin_x)): y_in[i].append(getnowsx(myin_x[xj],mylw[i])) #計算均方差 mymin = 10 * * 5 mychoice = 0 for i in xrange ( 0 ,w_count): myvar = np.var(y_in[i]) if abs (myvar - 1 )<mymin: mymin = abs (myvar - 1 ) mychoice = i #返回數據整理的權值矩陣 return mylw[mychoice] mylnww = get_inlw(ann_max, 300 ,train_x) def get_inputx(mytrain_x,myin_w): '''''將訓練數據經過權值矩陣,形成擴維數據''' end_trainx = [] for i in xrange ( 0 , len (mytrain_x)): end_trainx.append(getnowsx(mytrain_x[i],myin_w)) return end_trainx x = get_inputx(train_x,mylnww) #用于輸入的擴維數據 #對測試數據進行擴維 def get_siminx(sim_x): global mylnww myxx = np.array(sim_x) return get_inputx(myxx,mylnww) #計算一層的初始化權值矩陣 def getlevelw(myin_x,wo_n,wi_n,w_count): mylw = [] y_in = [] #生成測試權值 mylw = np.random.rand(w_count,wi_n,wo_n) mylw = mylw * 2. - 1 #計算輸出 for i in xrange ( 0 ,w_count): y_in.append([]) for xj in xrange ( 0 , len (myin_x)): x_end = [] for myii in xrange ( 0 ,wo_n): x_end.append(np.dot(myin_x[xj],mylw[i,:,myii])) y_in[i].append(x_end) #計算均方差 mymin = 10 * * 3 mychoice = 0 for i in xrange ( 0 ,w_count): myvar = np.var(y_in[i]) if abs (myvar - 1 )<mymin: mymin = abs (myvar - 1 ) mychoice = i #返回數據整理的權值矩陣 csmylw = mylw[mychoice] return csmylw,y_in[mychoice] ann_w = [] def init_annw(): global x global hidelevel_count global warray_n global d global ann_w ann_w = [] lwyii = np.array(x) #初始化每層的w矩陣 for myn in xrange ( 0 ,hidelevel_count): #層數 ann_w.append([]) if myn = = hidelevel_count - 1 : for iii in xrange ( 0 ,warray_n): ann_w[myn].append([]) for jjj in xrange ( 0 ,warray_n): ann_w[myn][iii].append( 0.0 ) elif myn = = hidelevel_count - 2 : templw,lwyii = getlevelw(lwyii, len (d[ 0 ]),warray_n, 200 ) for xii in xrange ( 0 ,warray_n): ann_w[myn].append([]) for xjj in xrange ( 0 , len (d[ 0 ])): ann_w[myn][xii].append(templw[xii,xjj]) for xjj in xrange ( len (d[ 0 ]),warray_n): ann_w[myn][xii].append( 0.0 ) else : templw,lwyii = getlevelw(lwyii,warray_n,warray_n, 200 ) for xii in xrange ( 0 ,warray_n): ann_w[myn].append([]) for xjj in xrange ( 0 ,warray_n): ann_w[myn][xii].append(templw[xii,xjj]) ann_w = np.array(ann_w) def generate_lw(trycount): global ann_w print u "產生權值初始矩陣" , meanmin = 1 myann_w = ann_w alltry = 30 tryc = 0 while tryc<alltry: for i_i in range (trycount): print "." , init_annw() if abs (np.mean(np.array(ann_w)))<meanmin: meanmin = abs (np.mean(np.array(ann_w))) myann_w = ann_w tryc + = 1 if abs (np.mean(np.array(myann_w)))< 0.008 : break ann_w = myann_w print print u "權值矩陣平均:%f" % (np.mean(np.array(ann_w))) print u "權值矩陣方差:%f" % (np.var(np.array(ann_w))) generate_lw( 15 ) #前次訓練的權值矩陣 ann_oldw = copy.deepcopy(ann_w) #梯度初始化 #輸入層即第一層隱藏層不需要,所以第一層的空間無用 ann_delta = [] for i in xrange ( 0 ,hidelevel_count): ann_delta.append([]) for j in xrange ( 0 ,warray_n): ann_delta[i].append( 0.0 ) ann_delta = np.array(ann_delta) #輸出矩陣yi初始化 ann_yi = [] for i in xrange ( 0 ,alllevel_count): #第一維是層數,從0開始 ann_yi.append([]) for j in xrange ( 0 ,warray_n): #第二維是神經元 ann_yi[i].append( 0.0 ) ann_yi = np.array(ann_yi) #輸出層函數 def o_func(myy): myresult = [] mymean = np.mean(myy) for i in xrange ( 0 , len (myy)): if myy[i]> = mymean: myresult.append( 1.0 ) else : myresult.append( 0.0 ) return np.array(myresult) def get_e(myd,myo): return np.array(myd - myo) def ann_atanh(myv): atanh_a = 1.7159 #>0 atanh_b = 2 / float ( 3 ) #>0 temp_rs = atanh_a * np.tanh(atanh_b * myv) return temp_rs def ann_delta_atanh(myy,myd,nowlevel,level,n,mydelta,myw): anndelta = [] atanh_a = 1.7159 #>0 atanh_b = 2 / float ( 3 ) #>0 if nowlevel = = level: #輸出層 anndelta = ( float (atanh_b) / atanh_a) * (myd - myy) * (atanh_a - myy) * (atanh_a + myy) else : #隱藏層 anndelta = ( float (atanh_b) / atanh_a) * (atanh_a - myy) * (atanh_a + myy) temp_rs = [] for j in xrange ( 0 ,n): temp_rs.append( sum (myw[j] * mydelta)) anndelta = anndelta * temp_rs return anndelta def sample_train(myx,myd,n,sigmoid_func,delta_sigfun): '''''一個樣本的前向和后向計算''' global ann_yi global ann_delta global ann_w global ann_wj0 global ann_y0 global hidelevel_count global alllevel_count global learn_r global train_a global ann_oldw level = hidelevel_count allevel = alllevel_count #清空yi輸出信號數組 hidelevel = hidelevel_count alllevel = alllevel_count for i in xrange ( 0 ,alllevel): #第一維是層數,從0開始 for j in xrange ( 0 ,n): #第二維是神經元 ann_yi[i][j] = 0.0 ann_yi = np.array(ann_yi) yi = ann_yi #清空delta矩陣 for i in xrange ( 0 ,hidelevel - 1 ): for j in xrange ( 0 ,n): ann_delta[i][j] = 0.0 delta = ann_delta #保留W的拷貝,以便下一次迭代 ann_oldw = copy.deepcopy(ann_w) oldw = ann_oldw #前向計算 #對輸入變量進行預處理 myo = np.array([]) for nowlevel in xrange ( 0 ,alllevel): #一層層向前計算 #計算誘導局部域 my_y = [] myy = yi[nowlevel - 1 ] myw = ann_w[nowlevel - 1 ] if nowlevel = = 0 : #第一層隱藏層 my_y = myx yi[nowlevel] = my_y elif nowlevel = = (alllevel - 1 ): #輸出層 my_y = o_func(yi[nowlevel - 1 ,: len (myd)]) yi[nowlevel,: len (myd)] = my_y elif nowlevel = = (hidelevel - 1 ): #最后一層輸出層 for i in xrange ( 0 , len (myd)): temp_y = sigmoid_func(np.dot(myw[:,i],myy)) my_y.append(temp_y) yi[nowlevel,: len (myd)] = my_y else : #中間隱藏層 for i in xrange ( 0 , len (myy)): temp_y = sigmoid_func(np.dot(myw[:,i],myy)) my_y.append(temp_y) yi[nowlevel] = my_y #計算誤差與均方誤差 myo = yi[hidelevel - 1 ][: len (myd)] myo_end = yi[alllevel - 1 ][: len (myd)] mymse = get_e(myd,myo_end) #反向計算 #輸入層不需要計算delta,輸出層不需要計算W #計算delta for nowlevel in xrange (level - 1 , 0 , - 1 ): if nowlevel = = level - 1 : mydelta = delta[nowlevel] my_n = len (myd) else : mydelta = delta[nowlevel + 1 ] my_n = n myw = ann_w[nowlevel] if nowlevel = = level - 1 : #輸出層 mydelta = delta_sigfun(myo,myd, None , None , None , None , None ) ## mydelta=mymse*myo elif nowlevel = = level - 2 : #輸出隱藏層的前一層,因為輸出結果和前一層隱藏層的神經元數目可能存在不一致 #所以單獨處理,傳相當于輸出隱藏層的神經元數目的數據 mydelta = delta_sigfun(yi[nowlevel],myd,nowlevel,level - 1 ,my_n,mydelta[: len (myd)],myw[:,: len (myd)]) else : mydelta = delta_sigfun(yi[nowlevel],myd,nowlevel,level - 1 ,my_n,mydelta,myw) delta[nowlevel][:my_n] = mydelta #計算與更新權值W for nowlevel in xrange (level - 1 , 0 , - 1 ): #每個層的權值不一樣 if nowlevel = = level - 1 : #輸出層 my_n = len (myd) mylearn_r = learn_r * 0.8 mytrain_a = train_a * 1.6 elif nowlevel = = 1 : #輸入層 my_n = len (myd) mylearn_r = learn_r * 0.9 mytrain_a = train_a * 0.8 else : #其它層 my_n = n mylearn_r = learn_r mytrain_a = train_a pre_level_myy = yi[nowlevel - 1 ] pretrain_myww = oldw[nowlevel - 1 ] pretrain_myw = pretrain_myww[:,:my_n] #第二個調整參數 temp_i = [] for i in xrange ( 0 ,n): temp_i.append([]) for jj in xrange ( 0 ,my_n): temp_i[i].append(mylearn_r * delta[nowlevel,jj] * pre_level_myy[i]) temp_rs2 = np.array(temp_i) temp_rs1 = mytrain_a * pretrain_myw #總調整參數 temp_change = temp_rs1 + temp_rs2 my_ww = ann_w[nowlevel - 1 ] my_ww[:,:my_n] + = temp_change return mymse def train_update(level,nowtraincount,sigmoid_func,delta_sigfun): '''''一次讀取所有樣本,然后迭代一次進行訓練''' #打亂樣本順序 global learn_r global train_a global train_a0 global learn_r0 global r global x global d global maxtrycount global oldmse x_n = len (x) ids = range ( 0 ,x_n) train_ids = [] sample_x = [] sample_d = [] while len (ids)> 0 : myxz = random.randint( 0 , len (ids) - 1 ) train_ids.append(ids[myxz]) del ids[myxz] for i in xrange ( 0 , len (train_ids)): sample_x.append(x[train_ids[i]]) sample_d.append(d[train_ids[i]]) sample_x = np.array(sample_x) sample_d = np.array(sample_d) #讀入x的每個樣本,進行訓練 totalmse = 0.0 mymse = float ( 10 * * - 10 ) for i in xrange ( 0 ,x_n): mymse = sample_train(sample_x[i],sample_d[i],warray_n,sigmoid_func,delta_sigfun) totalmse + = sum (mymse * mymse) totalmse = np.sqrt(totalmse / float (x_n)) print u "誤差為:%f" % (totalmse) nowtraincount[ 0 ] + = 1 learn_r = learn_r0 / ( 1 + float (nowtraincount[ 0 ]) / r) train_a = train_a0 / ( 1 + float (nowtraincount[ 0 ]) / r) if nowtraincount[ 0 ]> = maxtrycount: return False , True ,totalmse elif totalmse<expect_e: #(totalmse-oldmse)/oldmse>0.1 and (totalmse-oldmse)/oldmse<1: print u "訓練成功,正在進行檢驗" totalmse = 0.0 for i in xrange ( 0 ,x_n): mytemper = (sample_d[i] - simulate(sample_x[i],sigmoid_func,delta_sigfun)) totalmse + = sum (mytemper * mytemper) totalmse = np.sqrt(totalmse / float (x_n)) if totalmse<expect_e: return False , False ,totalmse oldmse = totalmse return True , False ,totalmse def train(): '''''訓練樣本,多次迭代''' global hidelevel_count nowtraincount = [] nowtraincount.append( 0 ) #sigmoid函數指定 delta_sigfun = ann_delta_atanh sigmoid_func = ann_atanh tryerr = 0 while True : print u "-------開始第%d次訓練---------" % (nowtraincount[ 0 ] + 1 ), iscontinue,iscountout,mymse = train_update(hidelevel_count,nowtraincount,sigmoid_func,delta_sigfun) if not iscontinue: if iscountout : print u "訓練次數已到,誤差為:%f" % mymse tryerr + = 1 if tryerr> 3 : break else : print u "訓練失敗,重新嘗試第%d次" % tryerr nowtraincount[ 0 ] = 0 generate_lw( 15 + tryerr * 2 ) else : print u "訓練成功,誤差為:%f" % mymse break def simulate(myx,sigmoid_func,delta_sigfun): '''''一個樣本的仿真計算''' print u "仿真計算中" global ann_yi global ann_w global ann_wj0 global ann_y0 global hidelevel_count global alllevel_count global d myd = d[ 0 ] myx = np.array(myx) n = len (myx) level = hidelevel_count allevel = alllevel_count #清空yi輸出信號數組 hidelevel = hidelevel_count alllevel = alllevel_count for i in xrange ( 0 ,alllevel): #第一維是層數,從0開始 for j in xrange ( 0 ,n): #第二維是神經元 ann_yi[i][j] = 0.0 ann_yi = np.array(ann_yi) yi = ann_yi #前向計算 myo = np.array([]) myy = np.array([]) for nowlevel in xrange ( 0 ,alllevel): #一層層向前計算 #計算誘導局部域 my_y = [] myy = yi[nowlevel - 1 ] myw = ann_w[nowlevel - 1 ] if nowlevel = = 0 : #第一層隱藏層 my_y = myx yi[nowlevel] = my_y elif nowlevel = = (alllevel - 1 ): #線性輸出層,使用線性激活 my_y = o_func(yi[nowlevel - 1 ,: len (myd)]) yi[nowlevel,: len (myd)] = my_y elif nowlevel = = (hidelevel - 1 ): #最后一層隱藏輸出層,使用線性激活 for i in xrange ( 0 , len (myd)): temp_y = sigmoid_func(np.dot(myw[:,i],myy)) my_y.append(temp_y) yi[nowlevel,: len (myd)] = my_y else : #中間隱藏層 #中間隱藏層需要加上偏置 for i in xrange ( 0 , len (myy)): temp_y = sigmoid_func(np.dot(myw[:,i],myy)) my_y.append(temp_y) yi[nowlevel] = my_y return yi[alllevel - 1 ,: len (myd)] train() delta_sigfun = ann_delta_atanh sigmoid_func = ann_atanh for xn in xrange ( 0 , len (x)): if simulate(x[xn],sigmoid_func,delta_sigfun)[ 0 ]> 0 : plt.plot(train_x[xn][ 0 ],train_x[xn][ 1 ], "bo" ) else : plt.plot(train_x[xn][ 0 ],train_x[xn][ 1 ], "b*" ) temp_x = np.random.rand( 20 ) * 10 temp_y = np.random.rand( 20 ) * 20 + temp_x myx = temp_x myy = temp_y plt.subplot( 111 ) x_max = np. max (myx) + 5 x_min = np. min (myx) - 5 y_max = np. max (myy) + 5 y_min = np. min (myy) - 5 plt.xlim(x_min,x_max) plt.ylim(y_min,y_max) for i in xrange ( 0 , len (myx)): test = get_siminx([[myx[i],myy[i]]]) if simulate(test,sigmoid_func,delta_sigfun)[ 0 ]> 0 : plt.plot(myx[i],myy[i], "ro" ) else : plt.plot(myx[i],myy[i], "r*" ) plt.show() |
圖中藍色是訓練數據,紅色是測試數據,圈圈代表類型[1,0],星星代表類型[0,1]。
以上就是本文的全部內容,希望對大家的學習有所幫助,也希望大家多多支持服務器之家。
原文鏈接:http://blog.csdn.net/cui134/article/details/26823101