-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgpucnn_rpn.cpp
4794 lines (4217 loc) · 151 KB
/
gpucnn_rpn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <iostream>
#include <fstream>
#include <cmath>
#include <cstdio>
#include <ctime>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <iomanip>
#include <sstream>
#include <algorithm>
#include <cassert>
#include <stdio.h>
#include <stdlib.h>
/*#include <random> it seems cuda4.0 not support c++ 11 */
/*
* 20170625 9:44 完成:
* 图像、卷积、池层强项传播卷积层,
* 卷积层前向传播池层,
* 池层前向全连接,
* 全连接后向池层,
* 池层后向卷积,
* 卷积层后向池层、图像、卷积层,
* 卷积层计算kernel weights sum,
* 计算bias和weights的平均更新值。
* 下一步整合卷积层和池层进入整个网络运算!!
*
* 20170626-0724 here 完成后续卷积层、池层、全连接层的前向连接,
* 然后完成后向传播,然后对变化求和,然后更新weights。
*
* 20170709 全连接层前向传播和softmax没有问题了
* 检查到卷积后向传播到池层 修复了bugs,后向传播基本没有问题了。
* 20170710 修改随机数种子 这步十分必要,
* 每次计算都要使用不同的种子进行初始化,
* 否则一旦碰上无法收敛的情况则永远无法收敛。
* 20170711 修改softmax bugs
* 20170713 ReLU
* 20170714 Dropout
*
* 20170824 want 计算得到feature map
* */
/*
* VS2008 编译cpp文件obj
* 1.启动VS Command Prompt
* 2.输入命令 cl /Fo -c cppfullpath.cpp
* 3.obj文件在命令行窗口的工作目录下
*/
/*
* nvcc 编译cu文件成obj
* * 去掉 USE_GPU_MODE前面的注释
* nvcc -c -arch=sm_20 gpucnn.cu
*
*/
/**
分别编译obj以后,连接到一起生成exe
1.启动VS2008(VS9.0) Command Prompt
2.cd跳转到各个obj的目录下
3.运行下面命令
cl gpucnn.obj array3d.obj lodepng.obj wftools.obj wImage.obj jsoncpp.obj "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\lib\Win32\cudart.lib"
*
* */
//#define USE_GPU_MODE 1
#include "../../sharedcppcodes/array3d.h"
#include "../../sharedcppcodes/wImage.h"
#include "../../sharedcppcodes/wftools.h"
#include "../../sharedcppcodes/json.h"
#include "../../sharedcppcodes/wfsvgrectreader.h"
#ifndef USE_GPU_MODE
void cudaMalloc( float** pPtr , int n ){
}
void cudaFree(float* ptr){
}
const int cudaMemcpyHostToDevice = 0;
const int cudaMemcpyDeviceToHost = 1;
void cudaMemcpy( float* targetPtr , float* sourcePtr , int n , int mode ) {
}
#endif
#define SAFE_RELEASE(p) if(p){delete p;p=0;}
//高斯随机数 这个随机数很重要
//初始化bias和weights的时候
//必须使用这个高斯随机数否则不容易收敛
float gaussrandvalue( float muval , float stdval ){
static float V1, V2, S;
static int phase = 0;
float X;
if(phase == 0) {
do {
float U1 = (float)rand() / (RAND_MAX+1.f);
float U2 = (float)rand() / (RAND_MAX+1.f);
V1 = 2 * U1 - 1;
V2 = 2 * U2 - 1;
S = V1 * V1 + V2 * V2;
}
while(S >= 1 || S == 0);
X = V1 * sqrt(-2 * log(S) / S);
} else {
X = V2 * sqrt(-2 * log(S) / S);
}
phase = 1 - phase;
return muval + X * stdval ;
}
extern int writeArrayToFile(const char* prefix1,const char* prefix2,int index1,
const char* mid ,
int index2,
const char* tail,float* array,int nx,int ny,int nz) ;
extern void writeDebugFloatArray( const char* name , int index , float* farr , int nx,int ny,int nz,int nk );
extern void writeDebugLine( const char* name , int index ) ;
extern void writeDebugLineFloat( const char* name , float val ) ;
extern std::string currentDateTimeString( std::string ext);
extern void getTopNIndexFromArray( float* array , int arraysize , int n , int* iarr , float* varr ) ;
// A class for save anchor values , valid anchor box and sample type (0 for neg,1 for pos)
class GAnchorBox
{
public:
GAnchorBox(int type,int anchorId1,float x,float y,float w,float h,float* valArrToCopy, int valSize ) ;
~GAnchorBox() ;
int type ;//0 for negative , 1 for positive
int anchorId ;
float x,y,w,h ;
float* values ;
int size ;
};
GAnchorBox::GAnchorBox(int type1,int anchorId1,float x1,float y1,float w1,float h1,float* valArrToCopy, int valSize) {
this->type = type1 ;
this->anchorId = anchorId1 ;
this->x = x1 ;
this->y = y1 ;
this->w = w1 ;
this->h = h1 ;
this->size = valSize ;
this->values = new float[valSize] ;
memcpy( values , valArrToCopy , sizeof(float) * valSize ) ;
}
GAnchorBox::~GAnchorBox() {
SAFE_RELEASE(values) ;
}
// A float value array for host and device switch.
class GFloatArray{
public:
inline GFloatArray(int nfloat,bool fillRand):
m_hostMemory(0),
m_devMemory(0),
m_nfloat(nfloat),
m_nbytes(0)
{
this->m_nbytes=sizeof(float)*m_nfloat;
m_hostMemory=new float[m_nfloat];
float sc = sqrtf( 1.0f / m_nfloat ) ;
if( fillRand ){
for(int i = 0 ; i<m_nfloat ; ++ i ){
m_hostMemory[i] = gaussrandvalue(0.0f,sc) ;
}
}else{
memset(m_hostMemory , 0 , m_nbytes ) ;
}
cudaMalloc(&m_devMemory,m_nbytes);
this->copyHost2Device() ;
} ;
inline ~GFloatArray(){
m_nfloat=0;m_nbytes=0;
delete[] m_hostMemory;m_hostMemory=0;
cudaFree(m_devMemory) ;m_devMemory = 0 ;
} ;
inline void copyFromArrayHost( GFloatArray* fromArray ){
assert( this->m_nfloat == fromArray->getNFloat() ) ;
for(int i = 0 ; i<this->m_nfloat ; ++ i ){
this->getHostMemory()[i] = fromArray->getHostMemory()[i] ;
}
#ifdef USE_GPU_MODE
this->copyHost2Device() ;
#endif
} ;
inline float* getHostMemory(){ return m_hostMemory ; } ;
inline float* getDevMemory(){return m_devMemory; } ;
inline int getNFloat(){return m_nfloat;} ;
inline int getNBytes(){return m_nbytes;} ;
inline void copyHost2Device(){
cudaMemcpy( m_devMemory , m_hostMemory , m_nbytes , cudaMemcpyHostToDevice ) ;
} ;
inline void copyDeviceToHost(){
cudaMemcpy( m_hostMemory , m_devMemory , m_nbytes , cudaMemcpyDeviceToHost ) ;
} ;
private:
float* m_hostMemory ;
float* m_devMemory ;
int m_nfloat ;
int m_nbytes ;
} ;
//=======================================================================================
//=======================================================================================
//=======================================================================================
class GLabeledData{
public:
inline GLabeledData():m_dataPtr(0),m_label(0){} ;
GFloatArray* m_dataPtr ;
int m_label ;
int m_id ;
} ;
//=======================================================================================
//=======================================================================================
//=======================================================================================
enum GLayerType {
GLayerTypeNone ,
GLayerTypeFull ,
GLayerTypeConv ,
GLayerTypePool ,
GLayerTypeFastRCNNOutput ,
GLayerTypeRPN
};
class GLayer{
public:
GLayerType m_type ;
std::string m_layerName ;
bool m_fixWeightsAndBias ;
inline GLayerType getType() { return m_type ;} ;
virtual ~GLayer() ;
virtual Json::Value toJsonNode() ;
} ;
GLayer::~GLayer() {
}
Json::Value GLayer::toJsonNode() {
Json::Value node ;
return node ;
}
class GLayerFull : public GLayer {
public:
GLayerFull(int insize , int outsize ) ;
GLayerFull(Json::Value& jsonNode ) ;
~GLayerFull() ;
Json::Value toJsonNode() ;
GFloatArray* m_actiArray ;
GFloatArray* m_biasAndWeights ;
GFloatArray* m_biasAndWeightsChangesSum ;
GFloatArray* m_errorArray ;
GFloatArray* m_lastBiasAndWeightsChanges ;
GFloatArray* m_dropoutMaskArray ;// 0.0 for dropout , 1.0 for activ
int m_insize , m_outsize ;
bool m_useDropoutMask ;
void shuffleDropoutMaskArray() ;
void setAllMaskOne() ;
} ;
GLayerFull::~GLayerFull(){
SAFE_RELEASE(this->m_actiArray) ;
SAFE_RELEASE(this->m_biasAndWeights) ;
SAFE_RELEASE(this->m_biasAndWeightsChangesSum) ;
SAFE_RELEASE(this->m_errorArray) ;
SAFE_RELEASE(this->m_lastBiasAndWeightsChanges) ;
SAFE_RELEASE(this->m_dropoutMaskArray) ;
}
Json::Value GLayerFull::toJsonNode() {
Json::Value node ;
node["layer-name"] = this->m_layerName ;
node["layer-type"] = this->m_type ;
node["input-x-size"] = m_insize ;
node["input-y-size"] = 1 ;
node["input-z-size"] = 1 ;
node["output-x-size"] = m_outsize ;
node["output-y-size"] = 1 ;
node["output-z-size"] = 1 ;
node["wb-x-size"] = m_outsize ;
node["wb-y-size"] = m_insize + 1 ;
node["wb-z-size"] = 1 ;
node["wb-k-size"] = 1 ;
node["drop-out"] = this->m_useDropoutMask ;
node["fix-weights-bias"] = this->m_fixWeightsAndBias ;
int nwb = this->m_biasAndWeights->getNFloat() ;
#ifdef USE_GPU_MODE
this->m_biasAndWeights->copyDeviceToHost() ;
#endif
for(int i = 0 ; i<nwb ; ++ i ){
node["wb"][i] = this->m_biasAndWeights->getHostMemory()[i] ;
}
return node ;
}
GLayerFull::GLayerFull(int insize , int outsize ){
this->m_insize = insize ;
this->m_outsize = outsize ;
this->m_type = GLayerTypeFull ;
this->m_actiArray = new GFloatArray(outsize,false) ;
this->m_dropoutMaskArray = new GFloatArray(outsize,false) ;
this->m_useDropoutMask = false ;
this->m_fixWeightsAndBias = false ;
int bwsize = outsize + outsize * insize ;
this->m_biasAndWeights = new GFloatArray(bwsize,true) ;
this->m_biasAndWeightsChangesSum = new GFloatArray(bwsize,false) ;
this->m_errorArray = new GFloatArray(outsize,false) ;
this->m_lastBiasAndWeightsChanges = new GFloatArray(bwsize,false) ;
for(int i = 0 ; i<this->m_dropoutMaskArray->getNFloat() ; ++ i ){
this->m_dropoutMaskArray->getHostMemory()[i] = 1.0f ;
}
#ifdef USE_GPU_MODE
this->m_dropoutMaskArray->copyHost2Device() ;
#endif
}
GLayerFull::GLayerFull(Json::Value& jsonNode ){
m_type = (GLayerType)jsonNode["layer-type"].asInt() ;
assert( m_type == GLayerTypeFull ) ;
m_insize = jsonNode["input-x-size"].asInt() ;
m_outsize = jsonNode["output-x-size"].asInt() ;
m_layerName = jsonNode["layer-name"].asString() ;
this->m_actiArray = new GFloatArray(m_outsize,false) ;
this->m_dropoutMaskArray = new GFloatArray(m_outsize,false) ;
if( jsonNode.isMember("drop-out") ){
this->m_fixWeightsAndBias = jsonNode["drop-out"].asBool() ;
}else{
this->m_fixWeightsAndBias = false ;
}
if( jsonNode.isMember("fix-weights-bias") ){
this->m_fixWeightsAndBias = jsonNode["fix-weights-bias"].asBool() ;
}else{
this->m_fixWeightsAndBias = false ;
}
int bwsize = m_outsize + m_outsize * m_insize ;
//卷积核数组
if( jsonNode.isMember("wb") && jsonNode["wb"].size() > 0 ){
this->m_biasAndWeights = new GFloatArray( bwsize , false ) ;
int nwb = (int)jsonNode["wb"].size() ;
for(int i = 0 ; i<nwb ; ++ i ){
m_biasAndWeights->getHostMemory()[i] = jsonNode["wb"][i].asFloat() ;
}
#ifdef USE_GPU_MODE
m_biasAndWeights->copyHost2Device() ;
#endif
}else{
this->m_biasAndWeights = new GFloatArray(bwsize,true) ;
}
this->m_biasAndWeightsChangesSum = new GFloatArray(bwsize,false) ;
this->m_errorArray = new GFloatArray(m_outsize,false) ;
this->m_lastBiasAndWeightsChanges = new GFloatArray(bwsize,false) ;
for(int i = 0 ; i<this->m_dropoutMaskArray->getNFloat() ; ++ i ){
this->m_dropoutMaskArray->getHostMemory()[i] = 1.0f ;
}
#ifdef USE_GPU_MODE
this->m_dropoutMaskArray->copyHost2Device() ;
#endif
}
void GLayerFull::shuffleDropoutMaskArray() {
if( this->m_useDropoutMask ){
int size = this->m_dropoutMaskArray->getNFloat() ;
int halfsize = this->m_dropoutMaskArray->getNFloat()/2 ;
int nMask0 = 0 ;
for(int i = 0 ; i<size ; ++ i ){
this->m_dropoutMaskArray->getHostMemory()[i] = 1.0f ;
}
while( nMask0 < halfsize ){
int rIndex = rand()%size ;
if( this->m_dropoutMaskArray->getHostMemory()[rIndex] > 0.5f ){
this->m_dropoutMaskArray->getHostMemory()[rIndex] = 0.0f ;
++ nMask0 ;
}
}
#ifdef USE_GPU_MODE
this->m_dropoutMaskArray->copyHost2Device() ;
#endif
}
}
void GLayerFull::setAllMaskOne() {
if( this->m_useDropoutMask ){
int size = this->m_dropoutMaskArray->getNFloat() ;
for(int i = 0 ; i<size ; ++ i ){
this->m_dropoutMaskArray->getHostMemory()[i] = 1.0f ;
}
#ifdef USE_GPU_MODE
this->m_dropoutMaskArray->copyHost2Device() ;
#endif
}
}
//=======================================================================================
//=======================================================================================
//=======================================================================================
class GLayerFastRCNNOutput : public GLayer {
public:
GLayerFastRCNNOutput(int insize , int outsizeNoBack ) ;
~GLayerFastRCNNOutput() ;
GFloatArray* m_actiArraySoftmax ; // K+1 class
GFloatArray* m_biasAndWeightsSoftmax ;
GFloatArray* m_biasAndWeightsChangesSumSoftmax ;
GFloatArray* m_lastBiasAndWeightsChangesSoftmax ;
GFloatArray* m_actiArrayLoc ;// K class * 4
GFloatArray* m_biasAndWeightsLoc ;
GFloatArray* m_biasAndWeightsChangesSumLoc ;
GFloatArray* m_lastBiasAndWeightsChangesLoc ;
int m_insize , m_outsizeNoBack , m_outsizeWithBack ;
} ;
GLayerFastRCNNOutput::~GLayerFastRCNNOutput(){
SAFE_RELEASE(this->m_actiArraySoftmax) ;
SAFE_RELEASE(this->m_biasAndWeightsSoftmax) ;
SAFE_RELEASE(this->m_biasAndWeightsChangesSumSoftmax) ;
SAFE_RELEASE(this->m_lastBiasAndWeightsChangesSoftmax) ;
SAFE_RELEASE(this->m_actiArrayLoc) ;
SAFE_RELEASE(this->m_biasAndWeightsLoc) ;
SAFE_RELEASE(this->m_biasAndWeightsChangesSumLoc) ;
SAFE_RELEASE(this->m_lastBiasAndWeightsChangesLoc ) ;
}
GLayerFastRCNNOutput::GLayerFastRCNNOutput(int insize , int outsizeNoBack ){
this->m_insize = insize ;
this->m_outsizeNoBack = outsizeNoBack;
this->m_outsizeWithBack = this->m_outsizeNoBack + 1 ;
this->m_type = GLayerTypeFastRCNNOutput ;
int bwsize = m_outsizeWithBack + m_outsizeWithBack * insize ;
this->m_actiArraySoftmax = new GFloatArray(m_outsizeWithBack,false) ;
this->m_biasAndWeightsSoftmax = new GFloatArray(bwsize,true) ;
this->m_biasAndWeightsChangesSumSoftmax = new GFloatArray(bwsize,false) ;
this->m_lastBiasAndWeightsChangesSoftmax = new GFloatArray(bwsize,false) ;
int bwsizeloc = m_outsizeNoBack * 4 + m_outsizeNoBack * 4 * insize ;
this->m_actiArrayLoc = new GFloatArray(m_outsizeNoBack*4,false) ;
this->m_biasAndWeightsLoc = new GFloatArray(bwsizeloc,true) ;
this->m_biasAndWeightsChangesSumLoc = new GFloatArray(bwsizeloc,false) ;
this->m_lastBiasAndWeightsChangesLoc = new GFloatArray(bwsizeloc,false) ;
}
void cpu_fastrcnnOutput_forwardFromFull(
float* fastSoftmaxArray ,
float* fastLocArray ,
int fastLocSize ,
int fastSoftmaxSize ,
float* fastSoftmaxBiasAndWeights ,
float* fastLocBiasAndWeights ,
float* prevFullActiArray , int prevFullActiSize ){
assert( fastSoftmaxSize*4 - 4 == fastLocSize ) ;
//iout==0 is background softmax
for(int iout = 0 ; iout < fastSoftmaxSize ; ++ iout ){
float sum1 = fastSoftmaxBiasAndWeights[iout] ;
int iw = fastSoftmaxSize + iout ;
for( int iprev = 0 ; iprev < prevFullActiSize ; ++ iprev ){
sum1 += fastSoftmaxBiasAndWeights[iw] * prevFullActiArray[iprev] ;
iw += fastSoftmaxSize ;
}
fastSoftmaxArray[iout] = max(0.0f , sum1) ;//ReLU
}
//loc
for(int iloc = 0 ; iloc < fastLocSize ; ++ iloc ){
float sum1 = fastLocBiasAndWeights[iloc] ;
int iw = fastLocSize + iloc ;
for( int iprev = 0 ; iprev < prevFullActiSize ; ++ iprev ){
sum1 += fastLocBiasAndWeights[iw] * prevFullActiArray[iprev] ;
iw += fastLocSize ;
}
fastLocArray[iloc] = max(0.0f , sum1) ;//ReLU
}
}
void cpu_fastrcnn_backwardErrorFromLabelAndTrueRect( ){
//做不完了,直接看faster rcnn把 20170720
}
//=======================================================================================
//=======================================================================================
//=======================================================================================
//卷积层
class GLayerConv : public GLayer
{
public:
GLayerConv(int inx,int iny,int inz,int knx,int kny,int nk) ;
GLayerConv(Json::Value& jsonNode ) ;
~GLayerConv() ;
Json::Value toJsonNode() ;
int m_kernelCount ;
int m_kernelPixelCountPerBand ;
int m_inBandCount ;
int m_kXsize , m_kYsize ;
int m_ioXsize , m_ioYsize , m_ioPixelCountPerBand ;
int m_biasStartIndex ;
GFloatArray* m_actiArray ;
GFloatArray* m_reluArray ;//激励值大于0 =1.0f 否则=0.0f 这个是否需要有待确定
GFloatArray* m_errorArray ;
GFloatArray* m_kernelWeightsBiasArray ;
GFloatArray* m_kernelWeightsBiasChangeSumArray ;
GFloatArray* m_kernelWeightsBiasLastChangeArray ;
} ;
//图像的像素的组成格式如下 像素顺序按照先行后列在波段
GLayerConv::GLayerConv(int inx,int iny,int inz,int knx,int kny,int nk)
{
m_type = GLayerTypeConv ;
m_ioXsize = inx ; //输入输出影像的x大小
m_ioYsize = iny ; //输入输出影像的y大小
m_ioPixelCountPerBand = m_ioXsize * m_ioYsize ;//输入输出像素个数 m_ioPixelCount
this->m_fixWeightsAndBias = false ;
m_kXsize = knx ; //卷积x大小必须奇数
m_kYsize = kny ; //卷积y大小必须奇数
m_kernelPixelCountPerBand = m_kXsize * m_kYsize ;//卷积像素个数 m_kernelPixelCount
m_kernelCount = nk ;//卷积核心数量
m_inBandCount = inz ; //输入波段数 m_inPixelBandCount
int nfloatk = m_kernelPixelCountPerBand * m_inBandCount * m_kernelCount + m_kernelCount ;
this->m_biasStartIndex = m_kernelPixelCountPerBand * m_inBandCount * m_kernelCount ;//bugfixed.2017-8-26.
//卷积核数组
m_kernelWeightsBiasArray = new GFloatArray( nfloatk , true) ;
m_kernelWeightsBiasChangeSumArray = new GFloatArray( nfloatk , false) ;
m_kernelWeightsBiasLastChangeArray = new GFloatArray( nfloatk , false) ;
//激励值数组
int nfloatA = m_ioPixelCountPerBand * m_kernelCount ;
m_actiArray = new GFloatArray( nfloatA ,false ) ;
m_errorArray = new GFloatArray( nfloatA ,false ) ;
m_reluArray = new GFloatArray( nfloatA ,false ) ;
}
GLayerConv::GLayerConv(Json::Value& jsonNode ) {
m_type = (GLayerType) jsonNode["layer-type"].asInt() ;
assert( m_type == GLayerTypeConv ) ;
if( jsonNode.isMember("fix-weights-bias") ){
this->m_fixWeightsAndBias = jsonNode["fix-weights-bias"].asBool() ;
}else{
this->m_fixWeightsAndBias = false ;
}
m_layerName = jsonNode["layer-name"].asString() ;
m_ioXsize = jsonNode["input-x-size"].asInt() ; ; //输入输出影像的x大小
m_ioYsize = jsonNode["input-y-size"].asInt() ; ; //输入输出影像的y大小
m_ioPixelCountPerBand = m_ioXsize * m_ioYsize ;//输入输出像素个数 m_ioPixelCount
m_kXsize = jsonNode["wb-x-size"].asInt() ; ; //卷积x大小必须奇数
m_kYsize = jsonNode["wb-y-size"].asInt() ; ; //卷积y大小必须奇数
m_kernelPixelCountPerBand = m_kXsize * m_kYsize ;//卷积像素个数 m_kernelPixelCount
m_kernelCount = jsonNode["wb-k-size"].asInt() ; ;//卷积核心数量
m_inBandCount = jsonNode["input-z-size"].asInt() ; ; //输入波段数 m_inPixelBandCount
int nfloatk = m_kernelPixelCountPerBand * m_inBandCount * m_kernelCount + m_kernelCount ;
this->m_biasStartIndex = m_kernelPixelCountPerBand * m_inBandCount * m_kernelCount ;//bugfixed.2017-8-26.
//卷积核数组
if( jsonNode.isMember("wb") && jsonNode["wb"].size() > 0 ){
m_kernelWeightsBiasArray = new GFloatArray( nfloatk , false ) ;
int nwb = (int)jsonNode["wb"].size() ;
for(int i = 0 ; i<nwb ; ++ i ){
m_kernelWeightsBiasArray->getHostMemory()[i] = jsonNode["wb"][i].asFloat() ;
}
#ifdef USE_GPU_MODE
m_kernelWeightsBiasArray->copyHost2Device() ;
#endif
}else{
m_kernelWeightsBiasArray = new GFloatArray( nfloatk , true) ;
}
m_kernelWeightsBiasChangeSumArray = new GFloatArray( nfloatk , false) ;
m_kernelWeightsBiasLastChangeArray = new GFloatArray( nfloatk , false) ;
//激励值数组
int nfloatA = m_ioPixelCountPerBand * m_kernelCount ;
m_actiArray = new GFloatArray( nfloatA ,false ) ;
m_errorArray = new GFloatArray( nfloatA ,false ) ;
m_reluArray = new GFloatArray( nfloatA ,false ) ;
}
GLayerConv::~GLayerConv(){
SAFE_RELEASE(m_kernelWeightsBiasArray) ;
SAFE_RELEASE(m_kernelWeightsBiasChangeSumArray) ;
SAFE_RELEASE(m_kernelWeightsBiasLastChangeArray) ;
SAFE_RELEASE(m_actiArray) ;
SAFE_RELEASE(m_errorArray) ;
SAFE_RELEASE(m_reluArray) ;
}
Json::Value GLayerConv::toJsonNode() {
Json::Value node ;
node["layer-name"] = this->m_layerName ;
node["layer-type"] = this->m_type ;
node["input-x-size"] = this->m_ioXsize ;
node["input-y-size"] = this->m_ioYsize ;
node["input-z-size"] = this->m_inBandCount ;
node["output-x-size"] = this->m_ioXsize ;
node["output-y-size"] = this->m_ioYsize ;
node["output-z-size"] = this->m_kernelCount ;
node["wb-x-size"] = m_kXsize ;
node["wb-y-size"] = m_kYsize ;
node["wb-z-size"] = m_inBandCount ;
node["wb-k-size"] = m_kernelCount ;
node["fix-weights-bias"] = this->m_fixWeightsAndBias ;
int nwb = this->m_kernelWeightsBiasArray->getNFloat() ;
#ifdef USE_GPU_MODE
this->m_kernelWeightsBiasArray->copyDeviceToHost() ;
#endif
for(int i = 0 ; i<nwb ; ++ i ){
node["wb"][i] = this->m_kernelWeightsBiasArray->getHostMemory()[i] ;
}
return node ;
}
////////////////////////////////////////////////////////////////////////////////////
//池层
class GLayerPool : public GLayer
{
public:
GLayerPool(int inx,int iny,int inz) ;
~GLayerPool() ;
GLayerPool(Json::Value& jsonNode ) ;
Json::Value toJsonNode() ;
int inXSize , inYSize , bandCount ;
int outXSize , outYSize ;
GFloatArray* m_actiArray ;
GFloatArray* m_convIsMaxArray ;//Conv激励值于2x2最大 =1.0f 否则=0.0f
GFloatArray* m_errorArray ;
} ;
//图像的像素的组成格式如下 像素顺序按照先行后列在波段
GLayerPool::GLayerPool(int inx,int iny,int inz)
{
m_type = GLayerTypePool ;
inXSize = inx ; //输入输出影像的x大小
inYSize = iny ; //输入输出影像的y大小
bandCount = inz ;
outXSize = inXSize/2 ;
outYSize = inYSize/2 ;
this->m_fixWeightsAndBias = false ;
int nfc = inXSize * inYSize * bandCount ;
int nfp = outXSize * outYSize * bandCount ;
//激励值数组
m_actiArray = new GFloatArray( nfp ,false ) ;
m_errorArray = new GFloatArray( nfp ,false ) ;
m_convIsMaxArray = new GFloatArray( nfc ,false ) ;
}
GLayerPool::GLayerPool(Json::Value& jsonNode )
{
m_type = (GLayerType) jsonNode["layer-type"].asInt() ;
assert( m_type == GLayerTypePool ) ;
inXSize = jsonNode["input-x-size"].asInt() ; //输入输出影像的x大小
inYSize = jsonNode["input-y-size"].asInt() ; //输入输出影像的y大小
bandCount = jsonNode["input-z-size"].asInt() ;
outXSize = inXSize/2 ;
outYSize = inYSize/2 ;
m_layerName = jsonNode["layer-name"].asString() ;
if( jsonNode.isMember("fix-weights-bias") ){
this->m_fixWeightsAndBias = jsonNode["fix-weights-bias"].asBool() ;
}else{
this->m_fixWeightsAndBias = false ;
}
int nfc = inXSize * inYSize * bandCount ;
int nfp = outXSize * outYSize * bandCount ;
//激励值数组
m_actiArray = new GFloatArray( nfp ,false ) ;
m_errorArray = new GFloatArray( nfp ,false ) ;
m_convIsMaxArray = new GFloatArray( nfc ,false ) ;
}
GLayerPool::~GLayerPool(){
SAFE_RELEASE(m_actiArray) ;
SAFE_RELEASE(m_errorArray) ;
SAFE_RELEASE(m_convIsMaxArray) ;
}
Json::Value GLayerPool::toJsonNode() {
Json::Value node ;
node["layer-name"] = this->m_layerName ;
node["layer-type"] = this->m_type ;
node["input-x-size"] = this->inXSize ;
node["input-y-size"] = this->inYSize ;
node["input-z-size"] = this->bandCount ;
node["output-x-size"] = this->outXSize ;
node["output-y-size"] = this->outYSize ;
node["output-z-size"] = this->bandCount ;
node["wb-x-size"] = 0 ;
node["wb-y-size"] = 0 ;
node["wb-z-size"] = 0 ;
node["wb-k-size"] = 0 ;
node["fix-weights-bias"] = this->m_fixWeightsAndBias ;
return node ;
}
////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////
//
//
// Region Proposal Layer for Faster R-CNN
//
//
/////////////////////////////////////////////////////////////////////////////////
class GLayerRPN : public GLayer {
public:
GLayerRPN(int inx,int iny,int inz,int knx,int kny , float a2imgXScale , float a2imgYScale) ;
//GLayerRPN(Json::Value& jsonNode ) ;
~GLayerRPN() ;
//Json::Value toJsonNode() ;
GFloatArray* m_weightsArray ;// 3x3xinz windows filter
GFloatArray* m_weightsChangesSum ;// 3x3xinz windows filter
GFloatArray* m_lastWeightsChanges ;// 3x3xinz windows filter
GFloatArray* m_slidingWindowsOutput ;
GFloatArray* m_objArrayOutput ;
GFloatArray* m_locArrayOutput ;
GFloatArray* m_anchorObjBiasAndWeightsArray ;//full connect 1
GFloatArray* m_anchorLocBiasAndWeightsArray ;//full connect 2
int m_inputXSize , m_inputYSize , m_inputZSize ;
int m_kXSize , m_kYSize ;
int m_wndXCount , m_wndYCount ;
float m_anchor2imgXScale , m_anchor2imgYScale ;
} ;
GLayerRPN::~GLayerRPN(){
SAFE_RELEASE(this->m_weightsArray) ;
SAFE_RELEASE(this->m_weightsChangesSum) ;
SAFE_RELEASE(this->m_lastWeightsChanges) ;
SAFE_RELEASE(this->m_slidingWindowsOutput) ;
SAFE_RELEASE(this->m_objArrayOutput) ;
SAFE_RELEASE(this->m_locArrayOutput) ;
SAFE_RELEASE(this->m_anchorObjBiasAndWeightsArray) ;
SAFE_RELEASE(this->m_anchorLocBiasAndWeightsArray) ;
}
GLayerRPN::GLayerRPN( int inx , int iny , int inz , int knx , int kny , float a2imgXScale , float a2imgYScale){
m_type = GLayerTypeRPN ;
m_fixWeightsAndBias = false ;
m_inputXSize = inx ;
m_inputYSize = iny ;
m_inputZSize = inz ;
//3 x 3 window
m_kXSize = knx ;
m_kYSize = kny ;
//number of windows
m_wndXCount = m_inputXSize - m_kXSize + 1;
m_wndYCount = m_inputYSize - m_kYSize + 1;
m_anchor2imgXScale = a2imgXScale ;
m_anchor2imgYScale = a2imgYScale ;
int nanchor = 1 ;
int nfloatForWeightsArray = nanchor * m_kXSize * m_kYSize * m_inputZSize ;
m_weightsArray = new GFloatArray( nfloatForWeightsArray , true ) ;
m_weightsChangesSum = new GFloatArray( nfloatForWeightsArray , false ) ;
m_lastWeightsChanges = new GFloatArray( nfloatForWeightsArray , false ) ;
int nWnd = m_wndXCount * m_wndYCount ;
m_objArrayOutput = new GFloatArray( nWnd * 2 , false ) ;
m_locArrayOutput = new GFloatArray( nWnd * 4 , false ) ;
int nfSlidingWindowsOutput = m_wndXCount * m_wndYCount * m_inputZSize ;
m_slidingWindowsOutput = new GFloatArray( nfSlidingWindowsOutput , false ) ;
m_anchorObjBiasAndWeightsArray = 0 ;//这两个数字还没用到,需要检查一下 here
m_anchorLocBiasAndWeightsArray = 0 ;// here 让rpn跑起来!! 2017-8-24
}
//rpn输出滑动窗口结果数组
void cpu_rpn_forwardFromImageGenerateSlidingWindowFeatures(
float* inFeatureMapArray ,
int inxsize ,
int inysize ,
int inzsize ,
float* slidingKernelWeights ,
int slidexsize , //3 x 3
int slideysize ,
float* outSlidingWindowFeatures /* v0,v1,...v256,v0,v1...v256.... */
)
{
int wndXCount = inxsize - slidexsize + 1 ;
int wndYCount = inysize - slideysize + 1 ;
int nkxy = slidexsize * slideysize ;
int nixy = inxsize * inysize ;
for(int iwndy = 0 ; iwndy < wndYCount ; ++ iwndy ) {
for(int iwndx = 0 ; iwndx < wndXCount ; ++ iwndx ){
int wnd1dIndex = iwndy * wndXCount + iwndx ;
int outIndex0 = wnd1dIndex * inzsize ;
for(int iz = 0 ; iz < inzsize ; ++ iz ){
float sum1 = 0.0f ;
for(int iky = 0 ; iky < slideysize ; ++ iky ){
for(int ikx = 0 ; ikx < slidexsize ; ++ ikx ){
float weight = slidingKernelWeights[iz * nkxy + iky * slidexsize + ikx ] ;
float activ = inFeatureMapArray[iz*nixy + (iwndy+iky)*inxsize + (iwndx+ikx) ] ;
sum1 += weight * activ ;
}
}
outSlidingWindowFeatures[outIndex0+iz] = fmaxf(0.0f , sum1) ;//relu
}
}
}
}
//rpn输入滑动窗口结果输出Anchor object数组,每个anchor object由2个值或者4个值组成
void cpu_rpn_forwardFromSlidingWindowFeaturesToAnchorOutput(
float* slidingWindowFeatures ,
int featureCount , // num of features.
int featureSize , // num of float of a feature.
float* biasWeightsArray , // nx = outSizePerFeature , ny = featureSize + 1
int outSizePerFeature , // 2 for obj , 4 for loc.
float* anchorOutputArray ,// v0,v1,v0,v1,v0,v1,....
int actiFuncType //0-softmax , 1-relu
)
{
for(int iwnd = 0 ; iwnd < featureCount ; ++ iwnd ){
float softmaxsum = 0.0f ;
for(int iout = 0 ; iout < outSizePerFeature ; ++ iout ){
float sum1 = biasWeightsArray[iout] ;
for(int iz = 0 ; iz < featureSize ; ++ iz ){
float acti = slidingWindowFeatures[iwnd*featureSize+iz] ;
float weight = biasWeightsArray[(iz+1)*outSizePerFeature+iout] ;
sum1 += acti * weight ;
}
if( actiFuncType == 0 ){
float expval = expf(sum1) ;
softmaxsum += expval ;
anchorOutputArray[iwnd*outSizePerFeature+iout] = expval ;
}else{
anchorOutputArray[iwnd*outSizePerFeature+iout] = fmaxf(0.0f,sum1) ;//relu
}
}
if( actiFuncType == 0 ){
for(int iout = 0 ; iout<outSizePerFeature ; ++ iout ){
float expval = anchorOutputArray[iwnd*outSizePerFeature+iout] ;
anchorOutputArray[iwnd*outSizePerFeature+iout] = expval/softmaxsum ;
}
}
}
}
//计算每个anchor是否包含object,如果包含输出对应IoU最大的ground true box
void cpu_rpn_checkEveryAnchorHasObjectWithGroundTrueBoxes(
int anchorXCount ,
int anchorYCount ,
float* groundTrueBoxArray , //ground true box的坐标值在输入影像坐标系下 每个box四个值为x0,y0,wid,hei
int gtCount ,
float scaleAnchorXToImage ,
float scaleAnchorYToImage ,
int* outAnchorTypeArray , //0 discard , 1-no object , 2-has object
float* outAnchorGroundTrueBoxOffset //以anchor坐标中心为原点,对应该anchor最大IoU的groundTrueBox的坐标和长高
)
{
for(int iay = 0 ; iay < anchorYCount ; ++ iay ){
for(int iax = 0 ; iax < anchorXCount ; ++ iax ){
//float anchorCenterXInImage = ( iax + 1.5f ) * scaleAnchorXToImage ; //3x3 sliding window 将anchor坐标值转为输入图像坐标系
//float anchorCenterYInImage = ( iay + 1.5f ) * scaleAnchorYToImage ; //3x3 sliding window 将anchor坐标值转为输入图像坐标系
float anchorX0InImage = iax * scaleAnchorXToImage ;
float anchorY0InImage = iay * scaleAnchorYToImage ;
float anchorWidInImage = 3*scaleAnchorXToImage ;
float anchorHeiInImage = 3*scaleAnchorYToImage ;
float anchorX1InImage = anchorX0InImage + anchorWidInImage ;
float anchorY1InImage = anchorY0InImage + anchorHeiInImage ;
float anchorAreaInImage = anchorWidInImage * anchorHeiInImage ;
//针对每一个groundbox计算IoU 找到该anchor对应最大IoU的groundTrueBox
float theIou = -1.0f ;
float theGtX0 = 0.0f ;
float theGtY0 = 0.0f ;
float theGtWid = 0.0f ;
float theGtHei = 0.0f ;
for(int igt = 0 ; igt < gtCount ; ++ igt ){
float gtx0 = groundTrueBoxArray[igt*4+0] ;
float gty0 = groundTrueBoxArray[igt*4+1] ;
float gtwid = groundTrueBoxArray[igt*4+2] ;
float gthei = groundTrueBoxArray[igt*4+3] ;
float gtx1 = gtx0 + gtwid ;
float gty1 = gty0 + gthei ;
//intersection
float interx0 = fmaxf( anchorX0InImage , gtx0 ) ;
float interx1 = fminf( anchorX1InImage , gtx1 ) ;
float intery0 = fmaxf( anchorY0InImage , gty0 ) ;
float intery1 = fminf( anchorY1InImage , gty1 ) ;
float interwid = interx1 - interx0 ;
float interhei = intery1 - intery0 ;
if( interwid < 0 || interhei < 0 ){
continue ;//没有相交区域,不考虑这个gtbox
}else{
float interArea = interwid * interhei ;
float gtArea = gtwid * gthei ;
//union area
float unionArea = anchorAreaInImage + gtArea - interArea ;
float iou = interArea / unionArea ;
if( iou > theIou ){
theIou = iou ;
theGtX0 = gtx0 ;
theGtY0 = gty0 ;
theGtWid = gtwid ;
theGtHei = gthei ;
}
}
}// end for igt
int anchor1dIndex = iay * anchorXCount + iax ;
if( theIou > 0.7f ){
outAnchorTypeArray[anchor1dIndex] = 2 ;//positive sample
outAnchorGroundTrueBoxOffset[anchor1dIndex*4+0] = ( theGtX0 - anchorX0InImage ) / anchorWidInImage ;
outAnchorGroundTrueBoxOffset[anchor1dIndex*4+1] = ( theGtY0 - anchorY0InImage ) / anchorHeiInImage ;
outAnchorGroundTrueBoxOffset[anchor1dIndex*4+2] = logf( theGtWid / anchorWidInImage ) ;
outAnchorGroundTrueBoxOffset[anchor1dIndex*4+3] = logf( theGtHei / anchorHeiInImage ) ;