00001 #include "nnrbm.h"
00002
00003 extern StreamOutput cout;
00004
00008 NNRBM::NNRBM()
00009 {
00010
00011 m_nrTargets = 0;
00012 m_nrInputs = 0;
00013 m_nrExamplesTrain = 0;
00014 m_nrExamplesProbe = 0;
00015 m_inputsTrain = 0;
00016 m_inputsProbe = 0;
00017 m_targetsTrain = 0;
00018 m_targetsProbe = 0;
00019 m_initWeightFactor = 0;
00020 m_globalEpochs = 0;
00021 m_RPROP_etaPos = 0;
00022 m_RPROP_etaNeg = 0;
00023 m_RPROP_updateMin = 0;
00024 m_RPROP_updateMax = 0;
00025 m_learnRate = 0;
00026 m_learnRateMin = 0;
00027 m_learnrateDecreaseRate = 0;
00028 m_learnrateDecreaseRateEpoch = 0;
00029 m_momentum = 0;
00030 m_weightDecay = 0;
00031 m_minUpdateBound = 0;
00032 m_batchSize = 0;
00033 m_scaleOutputs = 0;
00034 m_offsetOutputs = 0;
00035 m_maxEpochs = 0;
00036 m_useBLAS = 0;
00037 m_enableRPROP = 0;
00038 m_normalTrainStopping = 0;
00039 m_nrLayer = 0;
00040 m_neuronsPerLayer = 0;
00041 m_nrWeights = 0;
00042 m_nrOutputs = 0;
00043 m_nrLayWeights = 0;
00044 m_outputs = 0;
00045 m_outputsTmp = 0;
00046 m_derivates = 0;
00047 m_d1 = 0;
00048 m_weights = 0;
00049 m_weightsTmp0 = 0;
00050 m_weightsTmp1 = 0;
00051 m_weightsTmp2 = 0;
00052 m_weightsBatchUpdate = 0;
00053 m_weightsOld = 0;
00054 m_weightsOldOld = 0;
00055 m_deltaW = 0;
00056 m_deltaWOld = 0;
00057 m_adaptiveRPROPlRate = 0;
00058 m_enableL1Regularization = 0;
00059 m_errorFunctionMAE = 0;
00060 m_activationFunctionPerLayer = 0;
00061 m_enableAutoencoder = 0;
00062 m_nrLayWeightOffsets = 0;
00063 m_sumSquaredError = 0.0;
00064 m_sumSquaredErrorSamples = 0.0;
00065 m_rbmLearnrateWeights = 0.0;
00066 m_rbmLearnrateBiasVis = 0.0;
00067 m_rbmLearnrateBiasHid = 0.0;
00068 m_rbmWeightDecay = 0.0;
00069 m_rbmMaxEpochs = 0;
00070 }
00071
00075 NNRBM::~NNRBM()
00076 {
00077 if ( m_neuronsPerLayer )
00078 delete[] m_neuronsPerLayer;
00079 m_neuronsPerLayer = 0;
00080 if ( m_nrLayWeights )
00081 delete[] m_nrLayWeights;
00082 m_nrLayWeights = 0;
00083 if ( m_outputs )
00084 delete[] m_outputs;
00085 m_outputs = 0;
00086 if ( m_outputsTmp )
00087 delete[] m_outputsTmp;
00088 m_outputsTmp = 0;
00089 if ( m_derivates )
00090 delete[] m_derivates;
00091 m_derivates = 0;
00092 if ( m_d1 )
00093 delete[] m_d1;
00094 m_d1 = 0;
00095 if ( m_weights )
00096 delete[] m_weights;
00097 m_weights = 0;
00098 if ( m_weightsTmp0 )
00099 delete[] m_weightsTmp0;
00100 m_weightsTmp0 = 0;
00101 if ( m_weightsTmp1 )
00102 delete[] m_weightsTmp1;
00103 m_weightsTmp1 = 0;
00104 if ( m_weightsTmp2 )
00105 delete[] m_weightsTmp2;
00106 m_weightsTmp2 = 0;
00107 if ( m_weightsBatchUpdate )
00108 delete[] m_weightsBatchUpdate;
00109 m_weightsBatchUpdate = 0;
00110 if ( m_weightsOld )
00111 delete[] m_weightsOld;
00112 m_weightsOld = 0;
00113 if ( m_weightsOldOld )
00114 delete[] m_weightsOldOld;
00115 m_weightsOldOld = 0;
00116 if ( m_deltaW )
00117 delete[] m_deltaW;
00118 m_deltaW = 0;
00119 if ( m_deltaWOld )
00120 delete[] m_deltaWOld;
00121 m_deltaWOld = 0;
00122 if ( m_adaptiveRPROPlRate )
00123 delete[] m_adaptiveRPROPlRate;
00124 m_adaptiveRPROPlRate = 0;
00125 if ( m_activationFunctionPerLayer )
00126 delete[] m_activationFunctionPerLayer;
00127 m_activationFunctionPerLayer = 0;
00128 if ( m_nrLayWeightOffsets )
00129 delete[] m_nrLayWeightOffsets;
00130 m_nrLayWeightOffsets = 0;
00131 }
00132
00138 void NNRBM::enableErrorFunctionMAE ( bool en )
00139 {
00140 m_errorFunctionMAE = en;
00141 cout<<"errorFunctionMAE:"<<m_errorFunctionMAE<<endl;
00142 }
00143
00149 void NNRBM::setNrTargets ( int n )
00150 {
00151 m_nrTargets = n;
00152 cout<<"nrTargets: "<<m_nrTargets<<endl;
00153 }
00154
00155
00161 void NNRBM::setNrInputs ( int n )
00162 {
00163 m_nrInputs = n;
00164 cout<<"nrInputs: "<<m_nrInputs<<endl;
00165 }
00171 void NNRBM::setNrExamplesTrain ( int n )
00172 {
00173 m_nrExamplesTrain = n;
00174
00175 }
00181 void NNRBM::setNrExamplesProbe ( int n )
00182 {
00183 m_nrExamplesProbe = n;
00184 cout<<"nrExamplesProbe: "<<m_nrExamplesProbe<<endl;
00185 }
00191 void NNRBM::setTrainInputs ( REAL* inputs )
00192 {
00193 m_inputsTrain = inputs;
00194
00195 }
00196
00202 void NNRBM::setTrainTargets ( REAL* targets )
00203 {
00204 m_targetsTrain = targets;
00205
00206 }
00207
00213 void NNRBM::setProbeInputs ( REAL* inputs )
00214 {
00215 m_inputsProbe = inputs;
00216
00217 }
00218
00224 void NNRBM::setProbeTargets ( REAL* targets )
00225 {
00226 m_targetsProbe = targets;
00227
00228 }
00229
00235 void NNRBM::setInitWeightFactor ( REAL factor )
00236 {
00237 m_initWeightFactor = factor;
00238 cout<<"initWeightFactor: "<<m_initWeightFactor<<endl;
00239 }
00240
00246 void NNRBM::setLearnrate ( REAL learnrate )
00247 {
00248 m_learnRate = learnrate;
00249 cout<<"learnRate: "<<m_learnRate<<endl;
00250 }
00251
00257 void NNRBM::setLearnrateMinimum ( REAL learnrateMin )
00258 {
00259 m_learnRateMin = learnrateMin;
00260 cout<<"learnRateMin: "<<m_learnRateMin<<endl;
00261 }
00262
00268 void NNRBM::setLearnrateSubtractionValueAfterEverySample ( REAL learnrateDecreaseRate )
00269 {
00270 m_learnrateDecreaseRate = learnrateDecreaseRate;
00271 cout<<"learnrateDecreaseRate: "<<m_learnrateDecreaseRate<<endl;
00272 }
00273
00274
00280 void NNRBM::setLearnrateSubtractionValueAfterEveryEpoch ( REAL learnrateDecreaseRate )
00281 {
00282 m_learnrateDecreaseRateEpoch = learnrateDecreaseRate;
00283 cout<<"learnrateDecreaseRateEpoch: "<<m_learnrateDecreaseRateEpoch<<endl;
00284 }
00285
00291 void NNRBM::setMomentum ( REAL momentum )
00292 {
00293 m_momentum = momentum;
00294 cout<<"momentum: "<<m_momentum<<endl;
00295 }
00296
00302 void NNRBM::setWeightDecay ( REAL weightDecay )
00303 {
00304 m_weightDecay = weightDecay;
00305 cout<<"weightDecay: "<<m_weightDecay<<endl;
00306 }
00307
00314 void NNRBM::setBatchSize ( int size )
00315 {
00316 m_batchSize = size;
00317 cout<<"batchSize: "<<m_batchSize<<endl;
00318 }
00319
00325 void NNRBM::setMinUpdateErrorBound ( REAL minUpdateBound )
00326 {
00327 m_minUpdateBound = minUpdateBound;
00328 cout<<"minUpdateBound: "<<m_minUpdateBound<<endl;
00329 }
00330
00336 void NNRBM::setMaxEpochs ( int epochs )
00337 {
00338 m_maxEpochs = epochs;
00339 cout<<"maxEpochs: "<<m_maxEpochs<<endl;
00340 }
00341
00353 void NNRBM::setRPROPPosNeg ( REAL etaPos, REAL etaNeg )
00354 {
00355 m_RPROP_etaPos = etaPos;
00356 m_RPROP_etaNeg = etaNeg;
00357 cout<<"RPROP_etaPos: "<<m_RPROP_etaPos<<" RPROP_etaNeg: "<<m_RPROP_etaNeg<<endl;
00358 }
00359
00367 void NNRBM::setRPROPMinMaxUpdate ( REAL min, REAL max )
00368 {
00369 m_RPROP_updateMin = min;
00370 m_RPROP_updateMax = max;
00371 cout<<"RPROP_updateMin: "<<m_RPROP_updateMin<<" RPROP_updateMax: "<<m_RPROP_updateMax<<endl;
00372 }
00373
00382 void NNRBM::setScaleOffset ( REAL scale, REAL offset )
00383 {
00384 m_scaleOutputs = scale;
00385 m_offsetOutputs = offset;
00386 cout<<"scaleOutputs: "<<m_scaleOutputs<<" offsetOutputs: "<<m_offsetOutputs<<" [transformation: output = outputNN * scale + offset]"<<endl;
00387 }
00388
00396 void NNRBM::setNormalTrainStopping ( bool en )
00397 {
00398 m_normalTrainStopping = en;
00399 cout<<"normalTrainStopping: "<<m_normalTrainStopping<<endl;
00400 }
00401
00407 void NNRBM::setL1Regularization ( bool en )
00408 {
00409 m_enableL1Regularization = en;
00410 cout<<"enableL1Regularization: "<<m_enableL1Regularization<<endl;
00411 }
00412
00421 void NNRBM::enableRPROP ( bool en )
00422 {
00423 m_enableRPROP = en;
00424 cout<<"enableRPROP: "<<m_enableRPROP<<endl;
00425 }
00426
00434 void NNRBM::useBLASforTraining ( bool enable )
00435 {
00436 m_useBLAS = enable;
00437 cout<<"useBLAS: "<<m_useBLAS<<endl;
00438 }
00439
00446 void NNRBM::setNNStructure ( int nrLayer, int* neuronsPerLayer, bool lastLinearLayer, int* layerType )
00447 {
00448 m_nrLayer = nrLayer;
00449 cout<<"nrLayer: "<<m_nrLayer<<endl;
00450
00451 cout<<"#layers: "<<m_nrLayer<<" ("<< ( m_nrLayer-1 ) <<" hidden layer, 1 output layer)"<<endl;
00452
00453
00454 m_neuronsPerLayer = new int[m_nrLayer+1];
00455 m_neuronsPerLayer[0] = m_nrInputs;
00456 for ( int i=0;i<m_nrLayer-1;i++ )
00457 m_neuronsPerLayer[1+i] = neuronsPerLayer[i];
00458 m_neuronsPerLayer[m_nrLayer] = m_nrTargets;
00459
00460 cout<<"Neurons per Layer: ";
00461 for ( int i=0;i<m_nrLayer+1;i++ )
00462 cout<<m_neuronsPerLayer[i]<<" ";
00463 cout<<endl;
00464
00465 cout<<"Outputs per Layer: ";
00466 for ( int i=0;i<m_nrLayer+1;i++ )
00467 cout<<m_neuronsPerLayer[i]+1<<" ";
00468 cout<<endl;
00469
00470 cout<<"OutOffsets per Layer: ";
00471 int cnt=0;
00472 for ( int i=0;i<m_nrLayer+1;i++ )
00473 {
00474 cout<<cnt<<" ";
00475 cnt += m_neuronsPerLayer[i]+1;
00476 }
00477 cout<<endl;
00478
00479 cout<<"Act. function per Layer(0=sig, 1=lin, 2=tanh, 3=softmax): ";
00480 m_activationFunctionPerLayer = new int[m_nrLayer+1];
00481 for ( int i=0;i<m_nrLayer+1;i++ )
00482 m_activationFunctionPerLayer[i] = 0;
00483 if ( layerType )
00484 {
00485 for ( int i=0;i<m_nrLayer+1;i++ )
00486 m_activationFunctionPerLayer[i] = layerType[i];
00487 }
00488 if ( m_enableAutoencoder )
00489 m_activationFunctionPerLayer[m_nrLayer/2] = 1;
00490 if ( lastLinearLayer )
00491 m_activationFunctionPerLayer[m_nrLayer] = 1;
00492 for ( int i=0;i<m_nrLayer+1;i++ )
00493 cout<<m_activationFunctionPerLayer[i]<<" ";
00494 cout<<endl;
00495
00496
00497 m_nrWeights = 0;
00498 m_nrOutputs = m_neuronsPerLayer[0] + 1;
00499 m_nrLayWeights = new int[m_nrLayer+1];
00500 m_nrLayWeightOffsets = new int[m_nrLayer+2];
00501 m_nrLayWeights[0] = 0;
00502 for ( int i=0;i<m_nrLayer;i++ )
00503 {
00504 m_nrLayWeights[i+1] = m_neuronsPerLayer[i+1] * ( m_neuronsPerLayer[i]+1 );
00505 m_nrWeights += m_nrLayWeights[i+1];
00506 m_nrOutputs += m_neuronsPerLayer[i+1] + 1;
00507 }
00508
00509
00510 cout<<"Weights per Layer: ";
00511 for ( int i=0;i<m_nrLayer+1;i++ )
00512 cout<<m_nrLayWeights[i]<<" ";
00513 cout<<endl;
00514
00515 cout<<"WeightOffsets per Layer: ";
00516 m_nrLayWeightOffsets[0] = 0;
00517 for ( int i=0;i<m_nrLayer+1;i++ )
00518 {
00519 cout<<m_nrLayWeightOffsets[i]<<" ";
00520 m_nrLayWeightOffsets[i+1] = m_nrLayWeightOffsets[i] + m_nrLayWeights[i];
00521 }
00522 cout<<endl;
00523
00524 cout<<"nrOutputs="<<m_nrOutputs<<" nrWeights="<<m_nrWeights<<endl;
00525
00526
00527 m_outputs = new REAL[m_nrOutputs];
00528 m_outputsTmp = new REAL[m_nrTargets];
00529 m_derivates = new REAL[m_nrOutputs];
00530 m_d1 = new REAL[m_nrOutputs];
00531
00532 for ( int i=0;i<m_nrOutputs;i++ )
00533 {
00534 m_outputs[i] = 1.0;
00535 m_derivates[i] = 0.0;
00536 m_d1[i] = 0.0;
00537 }
00538
00539
00540 m_weights = new REAL[m_nrWeights];
00541 m_weightsTmp0 = new REAL[m_nrWeights];
00542 m_weightsTmp1 = new REAL[m_nrWeights];
00543 m_weightsTmp2 = new REAL[m_nrWeights];
00544 m_weightsBatchUpdate = new REAL[m_nrWeights];
00545 m_weightsOld = new REAL[m_nrWeights];
00546 m_weightsOldOld = new REAL[m_nrWeights];
00547 m_deltaW = new REAL[m_nrWeights];
00548
00549 m_deltaWOld = new REAL[m_nrWeights];
00550 m_adaptiveRPROPlRate = new REAL[m_nrWeights];
00551 for ( int i=0;i<m_nrWeights;i++ )
00552 {
00553 m_deltaWOld[i] = 0.0;
00554 m_adaptiveRPROPlRate[i] = m_learnRate;
00555 }
00556 for ( int i=0;i<m_nrWeights;i++ )
00557 m_weights[i] = m_weightsOld[i] = m_deltaW[i] = m_weightsTmp0[i] = m_weightsTmp1[i] = m_weightsTmp2[i] = 0.0;
00558
00559
00560
00561
00562 }
00563
00571 REAL NNRBM::getInitWeight ( int fanIn )
00572 {
00573 double nr = 2.0* ( rand() / ( double ) RAND_MAX-0.5 );
00574 return ( 1.0/sqrt ( ( double ) fanIn ) ) * nr;
00575 }
00576
00582 void NNRBM::initNNWeights ( time_t seed )
00583 {
00584 srand ( seed );
00585 cout<<"init weights "<<endl;
00586 REAL factor = m_initWeightFactor;
00587 int cnt = 0;
00588 for ( int i=0;i<m_nrLayer;i++ )
00589 {
00590 int n = m_neuronsPerLayer[i+1];
00591 int nprev = m_neuronsPerLayer[i] + 1;
00592 for ( int j=0;j<n;j++ )
00593 {
00594 for ( int k=0;k<nprev;k++ )
00595 {
00596 m_weights[cnt] = m_weightsOld[i] = m_weightsOldOld[i] = getInitWeight ( nprev ) * factor;
00597 cnt++;
00598 }
00599 }
00600 }
00601
00602
00603 if ( cnt != m_nrWeights )
00604 assert ( false );
00605 }
00606
00615 void NNRBM::setRBMLearnParams ( REAL learnrateWeights, REAL learnrateBiasVis, REAL learnrateBiasHid, REAL weightDecay, int maxEpoch )
00616 {
00617 m_rbmLearnrateWeights = learnrateWeights;
00618 m_rbmLearnrateBiasVis = learnrateBiasVis;
00619 m_rbmLearnrateBiasHid = learnrateBiasHid;
00620 m_rbmWeightDecay = weightDecay;
00621 m_rbmMaxEpochs = maxEpoch;
00622 }
00623
00633 int NNRBM::getWeightIndex ( int layer, int neuron, int weight )
00634 {
00635 if ( layer == 0 )
00636 assert ( false );
00637
00638 int nrNeur = m_neuronsPerLayer[layer];
00639 int nrNeurPrev = m_neuronsPerLayer[layer-1];
00640 if ( neuron >= nrNeur )
00641 {
00642 cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl;
00643 assert ( false );
00644 }
00645 if ( weight >= nrNeurPrev )
00646 {
00647 cout<<"weight:"<<weight<<" nrNeurPrev:"<<nrNeurPrev<<endl;
00648 assert ( false );
00649 }
00650
00651 int ind = m_nrLayWeightOffsets[layer];
00652 if ( layer == 1 )
00653 ind += 1 + weight + neuron* ( nrNeurPrev + 1 );
00654 else
00655 ind += weight + neuron* ( nrNeurPrev + 1 );
00656
00657 if ( ind >= m_nrWeights )
00658 {
00659 cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl;
00660 assert ( false );
00661 }
00662
00663 return ind;
00664 }
00665
00674 int NNRBM::getBiasIndex ( int layer, int neuron )
00675 {
00676 if ( layer == 0 )
00677 assert ( false );
00678
00679 int nrNeur = m_neuronsPerLayer[layer];
00680 int nrNeurPrev = m_neuronsPerLayer[layer-1];
00681 if ( neuron >= nrNeur )
00682 {
00683 cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl;
00684 assert ( false );
00685 }
00686 int ind = m_nrLayWeightOffsets[layer];
00687 if ( layer == 1 )
00688 ind += neuron* ( nrNeurPrev + 1 );
00689 else
00690 ind += nrNeurPrev + neuron* ( nrNeurPrev + 1 );
00691
00692 if ( ind >= m_nrWeights )
00693 {
00694 cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl;
00695 assert ( false );
00696 }
00697
00698 return ind;
00699 }
00700
00709 int NNRBM::getOutputIndex ( int layer, int neuron )
00710 {
00711 if ( layer == 0 || layer > m_nrLayer )
00712 assert ( false );
00713
00714 if ( neuron >= m_neuronsPerLayer[layer] )
00715 assert ( false );
00716
00717 int ind = 0;
00718 for ( int i=0;i<layer;i++ )
00719 ind += m_neuronsPerLayer[i] + 1;
00720
00721 return ind + neuron;
00722 }
00723
00729 vector<int> NNRBM::getEncoder()
00730 {
00731 vector<int> v;
00732 cout<<"Encoder:"<<endl;
00733 int midLayer = m_nrLayer/2 + 1;
00734 int weightCnt = 0;
00735 for ( int i=0;i<midLayer;i++ )
00736 {
00737 v.push_back ( m_neuronsPerLayer[i] );
00738 weightCnt += m_nrLayWeights[i];
00739 cout<<"neurons|weights: "<<m_neuronsPerLayer[i]<<"|"<<m_nrLayWeights[i]<<endl;
00740 }
00741 v.push_back ( weightCnt );
00742 return v;
00743 }
00744
00759 void NNRBM::rbmPretraining ( REAL* input, REAL* target, int nSamples, int nTarget, int firstNLayer, int crossRun, int nFinetuningEpochs, double finetuningLearnRate )
00760 {
00761 cout<<endl<<endl<<"=== set structure for fine tuning of the unrolled autoencoder ==="<<endl;
00762 m_nnAuto = new NNRBM();
00763 m_nnAuto->setInitWeightFactor ( 0.0 );
00764 m_nnAuto->setScaleOffset ( m_scaleOutputs, m_offsetOutputs );
00765 m_nnAuto->setNrTargets ( m_nrInputs );
00766 m_nnAuto->setNrInputs ( m_nrInputs );
00767 m_nnAuto->setNrExamplesTrain ( nSamples );
00768 m_nnAuto->setTrainInputs ( input );
00769 m_nnAuto->setTrainTargets ( input );
00770
00771
00772 m_nnAuto->setInitWeightFactor ( m_initWeightFactor );
00773 if ( finetuningLearnRate != 0.0 )
00774 m_nnAuto->setLearnrate ( finetuningLearnRate );
00775 else
00776 m_nnAuto->setLearnrate ( m_learnRate );
00777 m_nnAuto->setLearnrateMinimum ( m_learnRateMin );
00778 m_nnAuto->setLearnrateSubtractionValueAfterEverySample ( m_learnrateDecreaseRate );
00779 m_nnAuto->setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateDecreaseRateEpoch );
00780 m_nnAuto->setMomentum ( m_momentum );
00781 m_nnAuto->setWeightDecay ( 0.0 );
00782 m_nnAuto->setMinUpdateErrorBound ( m_minUpdateBound );
00783 m_nnAuto->setBatchSize ( m_batchSize );
00784 m_nnAuto->setMaxEpochs ( m_maxEpochs );
00785 m_nnAuto->setL1Regularization ( m_enableL1Regularization );
00786 m_nnAuto->enableErrorFunctionMAE ( m_errorFunctionMAE );
00787 m_nnAuto->setNormalTrainStopping ( true );
00788 m_nnAuto->useBLASforTraining ( m_useBLAS );
00789
00790
00791 int nLayer = m_nrLayer;
00792 if ( firstNLayer != -1 )
00793 nLayer = firstNLayer;
00794
00795 int* layerConfig = new int[2* ( nLayer-1 ) ];
00796 for ( int i=0;i<2* ( nLayer-1 ) - 1;i++ )
00797 {
00798 if ( i < nLayer-1 )
00799 layerConfig[i] = m_neuronsPerLayer[i+1];
00800 else
00801 layerConfig[i] = m_neuronsPerLayer[2* ( nLayer-1 ) - 1 - i];
00802 }
00803 m_nnAuto->m_enableAutoencoder = true;
00804 m_nnAuto->setNNStructure ( 2* ( nLayer-1 ), layerConfig );
00805 m_nnAuto->initNNWeights ( 0 );
00806
00807
00808 cout<<endl<<"=== RBM pretraining ("<<nSamples<<" samples) ==="<<endl;
00809 int weightOffset = 0;
00810 REAL* batchdata = new REAL[nSamples*m_neuronsPerLayer[0]];
00811 for ( int j=0;j<nSamples*m_neuronsPerLayer[0];j++ )
00812 batchdata[j] = input[j];
00813
00814
00815 for ( int i=0;i<nLayer-1;i++ )
00816 {
00817 int nVis = m_neuronsPerLayer[i];
00818 int nHid = m_neuronsPerLayer[i+1];
00819 cout<<endl<<"layer:"<<i<<" nVis:"<<nVis<<" nHid:"<<nHid<<endl;
00820
00821
00822 REAL* batchdataNext = new REAL[nSamples*nHid];
00823 REAL* vishid = new REAL[nVis*nHid];
00824 REAL* visbias = new REAL[nVis];
00825 REAL* hidbias = new REAL[nHid];
00826 REAL* poshidprobs = new REAL[nHid];
00827 REAL* neghidprobs = new REAL[nHid];
00828 REAL* hidbiasinc = new REAL[nHid];
00829 REAL* visbiasinc = new REAL[nVis];
00830 REAL* poshidstates = new REAL[nHid];
00831 REAL* negdata = new REAL[nVis];
00832 REAL* neghidact = new REAL[nHid];
00833 REAL* negvisact = new REAL[nVis];
00834 REAL* poshidact = new REAL[nHid];
00835 REAL* posvisact = new REAL[nVis];
00836 REAL* probs = new REAL[nHid];
00837 REAL constant = 0.0;
00838
00839 for ( int j=0;j<nVis;j++ )
00840 {
00841 visbias[j] = constant;
00842 visbiasinc[j] = constant;
00843 negdata[j] = constant;
00844 }
00845 for ( int j=0;j<nHid;j++ )
00846 {
00847 hidbias[j] = constant;
00848 hidbiasinc[j] = constant;
00849 poshidprobs[j] = constant;
00850 neghidprobs[j] = constant;
00851 poshidstates[j] = constant;
00852 }
00853 for ( int j=0;j<nVis*nHid;j++ )
00854 {
00855 vishid[j] = constant;
00856 }
00857
00858
00859 unsigned int seed = rand();
00860 cout<<"seed:"<<seed<<endl;
00861
00862 GAUSS_DISTRIBUTION ( vishid, nVis*nHid, 0.0, 0.1, &seed );
00863 time_t t0 = time ( 0 );
00864
00865
00866
00867 for ( int epoch=0;epoch<m_rbmMaxEpochs;epoch++ )
00868 {
00869 double errsum = 0.0;
00870 int progress = nSamples/10 + 1, errcnt = 0;
00871
00872 for ( int sample=0;sample<nSamples;sample++ )
00873 {
00874
00875 REAL* data = batchdata + nVis * sample;
00876 REAL min = 1e10, max = -1e10, mean = 0.0;
00877 for ( int k=0;k<nVis;k++ )
00878 {
00879 if ( min > data[k] )
00880 min = data[k];
00881 if ( max < data[k] )
00882 max = data[k];
00883 mean += data[k];
00884 }
00885 mean /= ( double ) nVis;
00886
00887 if ( min > 1.0 || min < 0.0 )
00888 {
00889 cout<<"min:"<<min<<endl;
00890 assert ( false );
00891 }
00892 if ( max > 1.0 || max < 0.0 )
00893 {
00894 cout<<"max:"<<max<<endl;
00895 assert ( false );
00896 }
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, data, 1, 0.0, poshidprobs, 1 );
00916
00917
00918
00919 V_ADD ( nHid, poshidprobs, hidbias, poshidprobs );
00920 V_MULCI ( -1.0, poshidprobs, nHid );
00921 V_EXP ( poshidprobs, nHid );
00922 V_ADDCI ( 1.0, poshidprobs, nHid );
00923 V_INV ( nHid, poshidprobs, poshidprobs );
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936 V_COPY ( poshidprobs, poshidact, nHid );
00937
00938
00939
00940
00941 V_COPY ( data, posvisact, nVis );
00942
00943
00944 for ( int k=0;k<nHid;k++ )
00945 probs[k] = ( double ) rand() / ( double ) RAND_MAX;
00946
00947
00948 for ( int k=0;k<nHid;k++ )
00949 {
00950 if ( poshidprobs[k] > probs[k] )
00951 poshidstates[k] = 1.0;
00952 else
00953 poshidstates[k] = 0.0;
00954 }
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970 CBLAS_GEMV ( CblasRowMajor, CblasTrans, nHid, nVis, 1.0, vishid, nVis, poshidstates, 1, 0.0, negdata, 1 );
00971
00972
00973
00974 V_ADD ( nVis, negdata, visbias, negdata );
00975 V_MULCI ( -1.0, negdata, nVis );
00976 V_EXP ( negdata, nVis );
00977 V_ADDCI ( 1.0, negdata, nVis );
00978 V_INV ( nVis, negdata, negdata );
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, negdata, 1, 0.0, neghidprobs, 1 );
00990
00991
00992
00993 V_ADD ( nHid, neghidprobs, hidbias, neghidprobs );
00994 V_MULCI ( -1.0, neghidprobs, nHid );
00995 V_EXP ( neghidprobs, nHid );
00996 V_ADDCI ( 1.0, neghidprobs, nHid );
00997 V_INV ( nHid, neghidprobs, neghidprobs );
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007 V_COPY ( neghidprobs, neghidact, nHid );
01008
01009
01010
01011
01012
01013 V_COPY ( negdata, negvisact, nVis );
01014
01015
01016 double err = 0.0;
01017 for ( int k=0;k<nVis;k++ )
01018 err += ( data[k] - negdata[k] ) * ( data[k] - negdata[k] );
01019
01020 if ( isnan ( err ) || isinf ( err ) )
01021 cout<<endl;
01022
01023 errsum = err + errsum;
01024 errcnt += nVis;
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035 for ( int k=0;k<nHid;k++ )
01036 for ( int l=0;l<nVis;l++ )
01037 vishid[l+k*nVis] += m_rbmLearnrateWeights* ( ( data[l] * poshidprobs[k] - negdata[l] * neghidprobs[k] ) - m_rbmWeightDecay*vishid[l+k*nVis] );
01038
01039
01040
01041
01042 for ( int k=0;k<nVis;k++ )
01043 visbias[k] += m_rbmLearnrateBiasVis * ( posvisact[k]-negvisact[k] );
01044 for ( int k=0;k<nHid;k++ )
01045 hidbias[k] += m_rbmLearnrateBiasHid * ( poshidact[k]-neghidact[k] );
01046
01047 }
01048 for ( int sample=0;sample<nSamples;sample++ )
01049 {
01050 REAL* data = batchdata + nVis * sample;
01051
01052 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, data, 1, 0.0, poshidprobs, 1 );
01053 for ( int k=0;k<nHid;k++ )
01054 poshidprobs[k] = 1.0 / ( 1.0 + exp ( - ( poshidprobs[k]+hidbias[k] ) ) );
01055 REAL* batchPtr = batchdataNext + sample * nHid;
01056 for ( int k=0;k<nHid;k++ )
01057 batchPtr[k] = poshidprobs[k];
01058 }
01059 cout<<"epoch:"<<epoch+1<<"/"<<m_rbmMaxEpochs<<" errsum:"<<errsum<<" errcnt:"<<errcnt<<" mse:"<<errsum/ ( double ) errcnt<<" "<<time ( 0 )-t0<<"[s]"<<endl;
01060 t0 = time ( 0 );
01061 }
01062
01063 int nrWeightsHere = m_nrLayWeights[i];
01064 weightOffset += nrWeightsHere;
01065 int offset = weightOffset;
01066 cout<<"weight offset:"<<offset<<" offsetNext:"<<weightOffset<<endl;
01067 REAL* weightPtr = m_weights + offset;
01068
01069
01070 for ( int j=0;j<nHid;j++ )
01071 for ( int k=0;k<nVis;k++ )
01072 m_weights[getWeightIndex ( i + 1, j, k ) ] = vishid[k + j*nVis];
01073
01074 REAL* nnAutoWeights = m_nnAuto->getWeightPtr();
01075 for ( int j=0;j<nHid;j++ )
01076 for ( int k=0;k<nVis;k++ )
01077 {
01078 nnAutoWeights[m_nnAuto->getWeightIndex ( i + 1, j, k ) ] = vishid[k + j*nVis];
01079 nnAutoWeights[m_nnAuto->getWeightIndex ( 2* ( nLayer-1 ) - i, k, j ) ] = vishid[k + j*nVis];
01080 }
01081
01082
01083 for ( int j=0;j<nHid;j++ )
01084 m_weights[getBiasIndex ( i + 1, j ) ] = hidbias[j];
01085
01086 for ( int j=0;j<nHid;j++ )
01087 nnAutoWeights[m_nnAuto->getBiasIndex ( i + 1, j ) ] = hidbias[j];
01088 for ( int j=0;j<nVis;j++ )
01089 nnAutoWeights[m_nnAuto->getBiasIndex ( 2* ( nLayer-1 ) - i, j ) ] = visbias[j];
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197 delete[] batchdata;
01198 batchdata = new REAL[nSamples*nHid];
01199 for ( int j=0;j<nSamples*nHid;j++ )
01200 batchdata[j] = batchdataNext[j];
01201 delete[] batchdataNext;
01202 delete[] vishid;
01203 delete[] visbias;
01204 delete[] hidbias;
01205 delete[] poshidprobs;
01206 delete[] neghidprobs;
01207 delete[] hidbiasinc;
01208 delete[] visbiasinc;
01209 delete[] poshidstates;
01210 delete[] negdata;
01211 delete[] neghidact;
01212 delete[] negvisact;
01213 delete[] poshidact;
01214 delete[] posvisact;
01215 delete[] probs;
01216 }
01217
01218 delete[] batchdata;
01219
01220
01221 for ( int i=0;i<m_nnAuto->getNrWeights();i++ )
01222 {
01223 REAL* p = m_nnAuto->getWeightPtr();
01224 if ( p[i] == 0.0 || isnan ( p[i] ) )
01225 {
01226 cout<<"p["<<i<<"]:"<<p[i]<<endl;
01227 assert ( false );
01228 }
01229 }
01230
01231 cout<<"Weights summary from neural net:"<<endl;
01232 for ( int i=0;i<m_nrLayer;i++ )
01233 {
01234 int n = m_neuronsPerLayer[i+1];
01235 int nIn = m_neuronsPerLayer[i];
01236 cout<<"#neur:"<<n<<" #neurPrev:"<<nIn<<" ";
01237
01238
01239 REAL min = 1e10, max = -1e10;
01240 double sum = 0.0, cnt = 0.0, sum2 = 0.0;
01241
01242 for ( int j=0;j<n;j++ )
01243 for ( int k=0;k<nIn;k++ )
01244 {
01245 REAL v = m_weights[getWeightIndex ( i + 1, j, k ) ];
01246 if ( min > v )
01247 min = v;
01248 if ( max < v )
01249 max = v;
01250 sum += v;
01251 sum2 += v*v;
01252 cnt += 1.0;
01253 }
01254 cout<<" weights: cnt|min|max|mean|std: "<<cnt<<"|"<<min<<"|"<<max<<"|"<<sum/cnt<<"|"<<sqrt ( sum2/cnt- ( sum/cnt ) * ( sum/cnt ) ) <<endl;
01255
01256 min = 1e10;
01257 max = -1e10;
01258 sum = 0.0;
01259 cnt = 0.0;
01260 sum2 = 0.0;
01261
01262 for ( int j=0;j<n;j++ )
01263 {
01264 REAL v = m_weights[getBiasIndex ( i + 1, j ) ];
01265 if ( min > v )
01266 min = v;
01267 if ( max < v )
01268 max = v;
01269 sum += v;
01270 sum2 += v*v;
01271 cnt += 1.0;
01272 }
01273 cout<<" biases: cnt|min|max|mean|std: "<<cnt<<"|"<<min<<"|"<<max<<"|"<<sum/cnt<<"|"<<sqrt ( sum2/cnt- ( sum/cnt ) * ( sum/cnt ) ) <<endl;
01274
01275 }
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300 if ( nFinetuningEpochs != -1 )
01301 {
01302 cout<<"=== begin: finetuning ==="<<endl;
01303 m_nnAuto->m_normalTrainStopping = false;
01304 m_nnAuto->m_maxEpochs = nFinetuningEpochs;
01305 if ( nFinetuningEpochs > 0 )
01306 m_nnAuto->trainNN();
01307 cout<<"=== end: finetuning ==="<<endl;
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413 cout<<endl<<"Copy the autoencoder weights to the net"<<endl;
01414 cout<<"#weights:"<<m_nrLayWeightOffsets[nLayer]<<endl;
01415 for ( int i=0;i<nLayer;i++ )
01416 {
01417 cout<<"layer "<<i<<": #neurons:"<<m_neuronsPerLayer[i]<<" #neuronsAuto:"<<m_nnAuto->m_neuronsPerLayer[i]<<endl;
01418 if ( i > 0 )
01419 {
01420
01421 for ( int j=0;j<m_neuronsPerLayer[i];j++ )
01422 for ( int k=0;k<m_neuronsPerLayer[i-1];k++ )
01423 m_weights[getWeightIndex ( i,j,k ) ] = m_nnAuto->m_weights[m_nnAuto->getWeightIndex ( i,j,k ) ];
01424
01425 for ( int j=0;j<m_neuronsPerLayer[i];j++ )
01426 m_weights[getBiasIndex ( i,j ) ] = m_nnAuto->m_weights[m_nnAuto->getBiasIndex ( i,j ) ];
01427 }
01428 }
01429
01430 }
01431
01432
01433
01434
01435
01436
01437
01438
01439
01440
01441
01442
01443
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453
01454 }
01455
01465 void NNRBM::printMiddleLayerToFile(string fname, REAL* input, REAL* target, int nSamples, int nTarget)
01466 {
01467 fstream f0 ( fname.c_str(),ios::out );
01468 REAL* p = new REAL[m_nrInputs];
01469 for ( int i=0;i<nSamples;i++ )
01470 {
01471 m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p );
01472
01473 for ( int j=0;j<nTarget;j++ )
01474 f0<<target[j+i*nTarget]<<" ";
01475
01476 int midLayer = m_nrLayer - 1;
01477 for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ )
01478 f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" ";
01479
01480
01481
01482
01483 f0<<endl;
01484 }
01485 delete[] p;
01486 f0.close();
01487 }
01488
01497 void NNRBM::backpropBLAS ( REAL* input, REAL* target, bool* updateLayer )
01498 {
01499 REAL sum0, d1;
01500
01501 int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1;
01502 int n0 = m_neuronsPerLayer[m_nrLayer-1];
01503 int outputOffsetPrev = outputOffset - n0 - 1;
01504 int outputOffsetNext = outputOffset;
01505
01506 int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer];
01507 int weightOffsetNext, nP1;
01508
01509 REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error;
01510
01511
01512 for ( int i=0;i<m_nrTargets;i++ )
01513 {
01514 REAL out = m_outputs[outputOffset+i];
01515
01516 REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i];
01517 m_sumSquaredError += errorTrain * errorTrain;
01518 m_sumSquaredErrorSamples += 1.0;
01519
01520 targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs;
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533 error = out - targetConverted;
01534
01535 if ( m_errorFunctionMAE )
01536 error = error > 0.0? 1.0 : -1.0;
01537
01538 d1 = error * m_derivates[outputOffset+i];
01539 m_d1[outputOffset+i] = d1;
01540
01541 if ( updateLayer )
01542 if ( updateLayer[m_nrLayer] == false )
01543 continue;
01544
01545 deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 );
01546 if ( m_nrLayer==1 )
01547 {
01548 outputsPtr = input - 1;
01549 deltaWPtr[0] = d1;
01550 for ( int j=1;j<n0+1;j++ )
01551 deltaWPtr[j] = d1 * outputsPtr[j];
01552 }
01553 else
01554 {
01555 outputsPtr = m_outputs + outputOffsetPrev;
01556 for ( int j=0;j<n0+1;j++ )
01557 deltaWPtr[j] = d1 * outputsPtr[j];
01558 }
01559 }
01560
01561
01562 outputOffsetNext = outputOffset;
01563 outputOffset = outputOffsetPrev;
01564 n0 = m_neuronsPerLayer[m_nrLayer-2];
01565 outputOffsetPrev -= n0 + 1;
01566 weightOffset -= m_nrLayWeights[m_nrLayer-1];
01567 weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer];
01568
01569 for ( int i=m_nrLayer-1;i>0;i-- )
01570 {
01571 int n = m_neuronsPerLayer[i];
01572 int nNext = m_neuronsPerLayer[i+1];
01573 int nPrev = m_neuronsPerLayer[i-1];
01574 nP1 = n+1;
01575
01576 d1Ptr0 = m_d1 + outputOffsetNext;
01577 derivatesPtr = m_derivates + outputOffset;
01578 weightsPtr = m_weights + weightOffsetNext;
01579 d1Ptr = m_d1 + outputOffset;
01580 deltaWPtr = m_deltaW + weightOffset;
01581 if ( i==1 )
01582 outputsPtr = input;
01583 else
01584 outputsPtr = m_outputs + outputOffsetPrev;
01585
01586
01587 CBLAS_GEMV ( CblasRowMajor, CblasTrans, nNext, n, 1.0, weightsPtr, nP1, d1Ptr0, 1, 0.0, d1Ptr, 1 );
01588 V_MUL ( n, d1Ptr, derivatesPtr, d1Ptr );
01589
01590 bool updateWeights = true;
01591 if ( updateLayer )
01592 if ( updateLayer[i] == false )
01593 updateWeights = false;
01594
01595
01596 for ( int j=0;j<n;j++ )
01597 {
01598 if ( updateWeights )
01599 {
01600 if ( i==1 )
01601 {
01602 V_COPY ( outputsPtr, deltaWPtr+1, nPrev );
01603 deltaWPtr[0] = 1.0;
01604 }
01605 else
01606 V_COPY ( outputsPtr, deltaWPtr, nPrev+1 );
01607 V_MULC ( deltaWPtr, d1Ptr[j], deltaWPtr, nPrev+1 );
01608 }
01609 deltaWPtr += nPrev+1;
01610 }
01611
01612 outputOffsetNext = outputOffset;
01613 outputOffset = outputOffsetPrev;
01614 n0 = m_neuronsPerLayer[i-2];
01615 outputOffsetPrev -= n0 + 1;
01616 weightOffset -= m_nrLayWeights[i-1];
01617 weightOffsetNext -= m_nrLayWeights[i];
01618 }
01619 }
01620
01629 void NNRBM::backprop ( REAL* input, REAL* target, bool* updateLayer )
01630 {
01631 REAL sum0, d1;
01632
01633 int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1;
01634 int n0 = m_neuronsPerLayer[m_nrLayer-1];
01635 int outputOffsetPrev = outputOffset - n0 - 1;
01636 int outputOffsetNext = outputOffset;
01637
01638 int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer];
01639 int weightOffsetNext, nP1;
01640
01641 REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error;
01642
01643
01644 for ( int i=0;i<m_nrTargets;i++ )
01645 {
01646 double out = m_outputs[outputOffset+i];
01647
01648 REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i];
01649 m_sumSquaredError += errorTrain * errorTrain;
01650 m_sumSquaredErrorSamples += 1.0;
01651
01652 targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs;
01653 error = out - targetConverted;
01654
01655 if ( m_errorFunctionMAE )
01656 error = error > 0.0? 1.0 : -1.0;
01657 d1 = error * m_derivates[outputOffset+i];
01658 m_d1[outputOffset+i] = d1;
01659
01660 if ( updateLayer )
01661 if ( updateLayer[m_nrLayer] == false )
01662 continue;
01663
01664 deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 );
01665 if ( m_nrLayer==1 )
01666 {
01667 outputsPtr = input - 1;
01668 deltaWPtr[0] = d1;
01669 for ( int j=1;j<n0+1;j++ )
01670 deltaWPtr[j] = d1 * outputsPtr[j];
01671 }
01672 else
01673 {
01674 outputsPtr = m_outputs + outputOffsetPrev;
01675 for ( int j=0;j<n0+1;j++ )
01676 deltaWPtr[j] = d1 * outputsPtr[j];
01677 }
01678
01679 }
01680
01681
01682 outputOffsetNext = outputOffset;
01683 outputOffset = outputOffsetPrev;
01684 n0 = m_neuronsPerLayer[m_nrLayer-2];
01685 outputOffsetPrev -= n0 + 1;
01686 weightOffset -= m_nrLayWeights[m_nrLayer-1];
01687 weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer];
01688
01689 for ( int i=m_nrLayer-1;i>0;i-- )
01690 {
01691 int n = m_neuronsPerLayer[i];
01692 int nNext = m_neuronsPerLayer[i+1];
01693 int nPrev = m_neuronsPerLayer[i-1];
01694 nP1 = n+1;
01695
01696 d1Ptr0 = m_d1 + outputOffsetNext;
01697 derivatesPtr = m_derivates + outputOffset;
01698 weightsPtr = m_weights + weightOffsetNext;
01699 d1Ptr = m_d1 + outputOffset;
01700 deltaWPtr = m_deltaW + weightOffset;
01701 if ( i==1 )
01702 outputsPtr = input - 1;
01703 else
01704 outputsPtr = m_outputs + outputOffsetPrev;
01705
01706 bool updateWeights = true;
01707 if ( updateLayer )
01708 if ( updateLayer[i] == false )
01709 updateWeights = false;
01710
01711 for ( int j=0;j<n;j++ )
01712 {
01713
01714 sum0 = 0.0;
01715 for ( int k=0;k<nNext;k++ )
01716 sum0 += d1Ptr0[k] * weightsPtr[k*nP1];
01717 sum0 *= *derivatesPtr;
01718 d1Ptr[j] = sum0;
01719
01720 if ( updateWeights )
01721 {
01722
01723 if ( i==1 )
01724 {
01725 deltaWPtr[0] = sum0;
01726 for ( int k=1;k<nPrev+1;k++ )
01727 deltaWPtr[k] = sum0 * outputsPtr[k];
01728 }
01729 else
01730 {
01731 for ( int k=0;k<nPrev+1;k++ )
01732 deltaWPtr[k] = sum0 * outputsPtr[k];
01733 }
01734 }
01735
01736 deltaWPtr += nPrev+1;
01737 weightsPtr++;
01738 derivatesPtr++;
01739 }
01740
01741 outputOffsetNext = outputOffset;
01742 outputOffset = outputOffsetPrev;
01743 n0 = m_neuronsPerLayer[i-2];
01744 outputOffsetPrev -= n0 + 1;
01745 weightOffset -= m_nrLayWeights[i-1];
01746 weightOffsetNext -= m_nrLayWeights[i];
01747 }
01748
01749 }
01750
01758 void NNRBM::forwardCalculationBLAS ( REAL* input )
01759 {
01760 int outputOffset = m_neuronsPerLayer[0]+1, outputOffsetPrev = 0;
01761 REAL tmp0, tmp1, sum0;
01762 REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights;
01763
01764 for ( int i=0;i<m_nrLayer;i++ )
01765 {
01766 int n = m_neuronsPerLayer[i+1];
01767 int nprev = m_neuronsPerLayer[i] + 1;
01768 int inputOffset = 0;
01769 int activationType = m_activationFunctionPerLayer[i+1];
01770 ptr0 = m_outputs + outputOffset;
01771 ptr1 = m_derivates + outputOffset;
01772 if ( i==0 )
01773 {
01774 outputPtr = input;
01775 inputOffset = 1;
01776 }
01777 else
01778 outputPtr = m_outputs + outputOffsetPrev;
01779
01780
01781 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, n, nprev - inputOffset, 1.0, weightPtr + inputOffset, nprev, outputPtr, 1, 0.0, ptr0, 1 );
01782 if ( inputOffset )
01783 {
01784 for ( int j=0;j<n;j++ )
01785 ptr0[j] += weightPtr[j * nprev];
01786 }
01787
01788
01789 if ( activationType == 0 )
01790 {
01791 for ( int j=0;j<n;j++ )
01792 ptr0[j] = -ptr0[j];
01793 V_EXP ( ptr0, n );
01794 for ( int j=0;j<n;j++ )
01795 {
01796 ptr0[j] = 1.0 / ( 1.0 + ptr0[j] );
01797 ptr1[j] = ptr0[j] * ( 1.0 - ptr0[j] );
01798 }
01799 }
01800 else if ( activationType == 1 )
01801 {
01802 for ( int j=0;j<n;j++ )
01803 ptr1[j] = 1.0;
01804 }
01805 else if ( activationType == 2 )
01806 {
01807 V_TANH ( n, ptr0, ptr0 );
01808 V_SQR ( n, ptr0, ptr1 );
01809 for ( int j=0;j<n;j++ )
01810 ptr1[j] = 1.0 - ptr1[j];
01811 }
01812 else if ( activationType == 3 )
01813 {
01814 V_EXP ( ptr0, n );
01815
01816 REAL sumOut = 0.0, invSumOut;
01817 for ( int j=0;j<n;j++ )
01818 sumOut += ptr0[j];
01819
01820 invSumOut = 1.0 / sumOut;
01821
01822 for ( int j=0;j<n;j++ )
01823 ptr1[j] = ptr0[j] * ( sumOut - ptr0[j] ) * invSumOut * invSumOut;
01824
01825 for ( int j=0;j<n;j++ )
01826 ptr0[j] *= invSumOut;
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836
01837
01838
01839
01840 }
01841 else
01842 assert ( false );
01843
01844
01845 weightPtr += n*nprev;
01846 outputOffset += n+1;
01847 outputOffsetPrev += nprev;
01848
01849 }
01850
01851 }
01852
01860 void NNRBM::forwardCalculation ( REAL* input )
01861 {
01862 int outputOffset = m_neuronsPerLayer[0] + 1, outputOffsetPrev = 0;
01863 REAL tmp0, tmp1, sum0;
01864 REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights;
01865 for ( int i=0;i<m_nrLayer;i++ )
01866 {
01867 int n = m_neuronsPerLayer[i+1];
01868 int nprev = m_neuronsPerLayer[i] + 1;
01869 int loopOffset = i==0? 1 : 0;
01870 int activationType = m_activationFunctionPerLayer[i+1];
01871 ptr0 = m_outputs + outputOffset;
01872 ptr1 = m_derivates + outputOffset;
01873 if ( i==0 )
01874 outputPtr = input - loopOffset;
01875 else
01876 outputPtr = m_outputs + outputOffsetPrev;
01877
01878 for ( int j=0;j<n;j++ )
01879 {
01880 sum0 = i==0? weightPtr[0] : 0.0;
01881 for ( int k=loopOffset;k<nprev;k++ )
01882 sum0 += weightPtr[k] * outputPtr[k];
01883 weightPtr += nprev;
01884 if ( activationType == 0 )
01885 {
01886 tmp0 = exp ( -sum0 );
01887 ptr1[j] = tmp0/ ( ( 1.0 + tmp0 ) * ( 1.0 + tmp0 ) );
01888 ptr0[j] = 1.0 / ( 1.0 + tmp0 );
01889 }
01890 else if ( activationType == 1 )
01891 {
01892 ptr0[j] = sum0;
01893 ptr1[j] = 1.0;
01894 }
01895 else if ( activationType == 2 )
01896 {
01897 tmp0 = tanh ( sum0 );
01898 ptr0[j] = tmp0;
01899 ptr1[j] = ( 1.0 - tmp0*tmp0 );
01900 }
01901 else
01902 assert ( false );
01903 }
01904 outputOffset += n+1;
01905 outputOffsetPrev += nprev;
01906 }
01907 }
01908
01909 void NNRBM::printAutoencoderWeightsToJavascript(string fname)
01910 {
01911 fstream autoF(fname.c_str(),ios::out);
01912 autoF<<"function evalAuto(vIn,vOut)"<<endl;
01913 autoF<<"{"<<endl;
01914
01915 int outputOffset = m_neuronsPerLayer[0] + 1, outputOffsetPrev = 0;
01916 REAL tmp0, tmp1, sum0;
01917 REAL *weightPtr = m_weights;
01918 for ( int i=0;i<m_nrLayer;i++ )
01919 {
01920 int n = m_neuronsPerLayer[i+1];
01921 int nprev = m_neuronsPerLayer[i] + 1;
01922 int loopOffset = i==0? 1 : 0;
01923 int activationType = m_activationFunctionPerLayer[i+1];
01924
01925 if(i >= m_nrLayer/2)
01926 {
01927 autoF<<endl<<"// layer: "<<i<<endl;
01928 autoF<<"outVecPrev = new Array("<<nprev<<");"<<endl;
01929 autoF<<"for(k=0;k<"<<nprev<<";k++)"<<endl;
01930 if(i == m_nrLayer/2)
01931 autoF<<" outVecPrev[k]=vIn[k];"<<endl;
01932 else
01933 autoF<<" outVecPrev[k]=outVec[k];"<<endl;
01934 autoF<<"outVec = new Array("<<n+1<<");"<<endl;
01935 }
01936
01937 for ( int j=0;j<n;j++ )
01938 {
01939 if(i >= m_nrLayer/2)
01940 {
01941 autoF<<"// neuron: "<<j<<endl;
01942 autoF<<"w=[";
01943 for ( int k=loopOffset;k<nprev;k++ )
01944 autoF<<weightPtr[k]<<((k+1<nprev)?",":"");
01945 autoF<<"];"<<endl;
01946 autoF<<"sum=0.0;"<<endl;
01947 autoF<<"for(k=0;k<"<<nprev<<";k++)"<<endl;
01948 autoF<<" sum +=w[k]*outVecPrev[k];"<<endl;
01949 autoF<<"outVec["<<j<<"]=1.0/(1.0+Math.exp(-sum));"<<endl;
01950
01951 }
01952 weightPtr += nprev;
01953 }
01954
01955 if(i >= m_nrLayer/2)
01956 {
01957 autoF<<"outVec["<<n<<"]=1.0;"<<endl;
01958
01959 if(i==m_nrLayer-1)
01960 {
01961 autoF<<"//========= copy to output ========="<<endl;
01962 autoF<<"for(k=0;k<"<<n<<";k++)"<<endl;
01963 autoF<<" vOut[k]=outVec[k];"<<endl;
01964 }
01965 }
01966
01967 outputOffset += n+1;
01968 outputOffsetPrev += nprev;
01969 }
01970
01971 autoF<<"}"<<endl;
01972 autoF.close();
01973 }
01974
01981 int NNRBM::trainNN()
01982 {
01983 cout<<"Train the NN with "<<m_nrExamplesTrain<<" samples"<<endl;
01984 double rmseMin = 1e10, lastUpdate = 1e10, rmseProbeOld = 1e10, rmseTrain = 1e10, rmseProbe = 1e10;
01985 time_t t0 = time ( 0 );
01986 while ( 1 )
01987 {
01988 rmseProbeOld = rmseProbe;
01989 rmseTrain = getRMSETrain();
01990 rmseProbe = getRMSEProbe();
01991 lastUpdate = rmseProbeOld - rmseProbe;
01992
01993 cout<<"e:"<<m_globalEpochs<<" rmseTrain:"<<rmseTrain<<" rmseProbe:"<<rmseProbe<<" "<<flush;
01994
01995 if ( m_normalTrainStopping )
01996 {
01997 if ( rmseProbe < rmseMin )
01998 {
01999 rmseMin = rmseProbe;
02000 saveWeights();
02001 }
02002 else
02003 {
02004 cout<<"rmse rises."<<endl;
02005 return m_globalEpochs;
02006 }
02007 if ( m_minUpdateBound > fabs ( lastUpdate ) )
02008 {
02009 cout<<"min update too small (<"<<m_minUpdateBound<<")."<<endl;
02010 return m_globalEpochs;
02011 }
02012 }
02013 if ( m_maxEpochs == m_globalEpochs )
02014 {
02015 cout<<"max epochs reached."<<endl;
02016 return m_globalEpochs;
02017 }
02018
02019 trainOneEpoch();
02020
02021 cout<<"lRate:"<<m_learnRate<<" ";
02022 cout<<time ( 0 )-t0<<"[s]"<<endl;
02023 t0 = time ( 0 );
02024 }
02025 return -1;
02026 }
02027
02032 void NNRBM::trainOneEpoch ( bool* updateLayer )
02033 {
02034 int batchCnt = 0;
02035 V_ZERO ( m_weightsBatchUpdate, m_nrWeights );
02036 m_sumSquaredError = 0.0;
02037 m_sumSquaredErrorSamples = 0.0;
02038
02039 for ( int i=0;i<m_nrExamplesTrain;i++ )
02040 {
02041 if ( updateLayer != 0 )
02042 V_ZERO ( m_deltaW, m_nrWeights );
02043
02044 REAL* inputPtr = m_inputsTrain + i * m_nrInputs;
02045 REAL* targetPtr = m_targetsTrain + i * m_nrTargets;
02046
02047
02048 if ( m_useBLAS )
02049 forwardCalculationBLAS ( inputPtr );
02050 else
02051 forwardCalculation ( inputPtr );
02052
02053
02054 if ( m_useBLAS )
02055 backpropBLAS ( inputPtr, targetPtr, updateLayer );
02056 else
02057 backprop ( inputPtr, targetPtr, updateLayer );
02058
02059
02060 if ( m_batchSize > 1 )
02061 V_ADD ( m_nrWeights, m_deltaW, m_weightsBatchUpdate, m_weightsBatchUpdate );
02062
02063 batchCnt++;
02064
02065
02066 if ( batchCnt >= m_batchSize || i == m_nrExamplesTrain - 1 )
02067 {
02068
02069 batchCnt = 0;
02070 if ( m_batchSize > 1 )
02071 {
02072 V_COPY ( m_weightsBatchUpdate, m_deltaW, m_nrWeights );
02073 V_ZERO ( m_weightsBatchUpdate, m_nrWeights );
02074 }
02075
02076 if ( m_enableRPROP )
02077 {
02078
02079
02080
02081
02082
02083
02084
02085
02086 REAL dW, dWOld, sign, update, prod;
02087 for ( int j=0;j<m_nrWeights;j++ )
02088 {
02089 dW = m_deltaW[j];
02090 dWOld = m_deltaWOld[j];
02091 prod = dW * dWOld;
02092 sign = dW > 0.0? 1.0 : -1.0;
02093 if ( prod > 0.0 )
02094 {
02095 m_adaptiveRPROPlRate[j] *= m_RPROP_etaPos;
02096 if ( m_adaptiveRPROPlRate[j] > m_RPROP_updateMax )
02097 m_adaptiveRPROPlRate[j] = m_RPROP_updateMax;
02098 update = sign * m_adaptiveRPROPlRate[j];
02099 m_weights[j] -= update + m_weightDecay * m_weights[j];
02100 m_deltaWOld[j] = dW;
02101 }
02102 else if ( prod < 0.0 )
02103 {
02104 m_adaptiveRPROPlRate[j] *= m_RPROP_etaNeg;
02105 if ( m_adaptiveRPROPlRate[j] < m_RPROP_updateMin )
02106 m_adaptiveRPROPlRate[j] = m_RPROP_updateMin;
02107 m_deltaWOld[j] = 0.0;
02108 }
02109 else
02110 {
02111 update = sign * m_adaptiveRPROPlRate[j];
02112 m_weights[j] -= update + m_weightDecay * m_weights[j];
02113 m_deltaWOld[j] = dW;
02114 }
02115 }
02116 }
02117 else
02118 {
02119
02120
02121
02122
02123
02124
02125
02126
02127
02128
02129
02130
02131
02132
02133
02134
02135
02136
02137
02138
02139
02140
02141
02142
02143
02144
02145
02146 for ( int j=0;j<m_nrWeights;j++ )
02147 if ( isnan ( m_deltaW[j] ) || isinf ( m_deltaW[j] ) )
02148 {
02149 cout<<endl<<"value:"<<m_deltaW[j]<<endl<<"j:"<<j<<" i:"<<i<<endl;
02150 assert ( false );
02151 }
02152 V_MULC ( m_deltaW, m_learnRate, m_weightsTmp0, m_nrWeights );
02153
02154
02155 if ( m_weightDecay > 0.0 )
02156 {
02157 if ( m_enableL1Regularization )
02158 {
02159
02160
02161
02162 for ( int j=0;j<m_nrWeights;j++ )
02163 m_weightsTmp2[j] = 1.0;
02164 for ( int j=0;j<m_nrWeights;j++ )
02165 if ( m_weights[j]<0.0 )
02166 m_weightsTmp2[j] = -1.0;
02167
02168
02169
02170 CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weightsTmp2, 1, m_weightsTmp0, 1 );
02171 }
02172 else
02173
02174
02175 CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weights, 1, m_weightsTmp0, 1 );
02176 }
02177
02178
02179 if ( m_momentum > 0.0 )
02180 {
02181 V_MULC ( m_weightsTmp0, 1.0 - m_momentum, m_weightsTmp0, m_nrWeights );
02182 V_MULC ( m_weightsTmp1, m_momentum, m_weightsTmp1, m_nrWeights );
02183
02184
02185 V_ADD ( m_nrWeights, m_weightsTmp0, m_weightsTmp1, m_weightsTmp0 );
02186
02187 V_COPY ( m_weightsTmp0, m_weightsTmp1, m_nrWeights );
02188 }
02189
02190
02191 V_SUB ( m_nrWeights, m_weights, m_weightsTmp0, m_weights );
02192 }
02193 }
02194
02195
02196 m_learnRate -= m_learnrateDecreaseRate;
02197 if ( m_learnRate < m_learnRateMin )
02198 m_learnRate = m_learnRateMin;
02199
02200 }
02201
02202
02203 m_learnRate -= m_learnrateDecreaseRateEpoch;
02204 if ( m_learnRate < m_learnRateMin )
02205 m_learnRate = m_learnRateMin;
02206
02207
02208 m_globalEpochs++;
02209 }
02210
02217 void NNRBM::setGlobalEpochs ( int e )
02218 {
02219 m_globalEpochs = e;
02220 }
02221
02226 void NNRBM::printLearnrate()
02227 {
02228 cout<<"lRate:"<<m_learnRate<<" "<<flush;
02229 }
02230
02235 void NNRBM::saveWeights()
02236 {
02237
02238 }
02239
02247 void NNRBM::predictSingleInput ( REAL* input, REAL* output )
02248 {
02249 REAL* inputPtr = input;
02250
02251
02252 if ( m_useBLAS )
02253 forwardCalculationBLAS ( inputPtr );
02254 else
02255 forwardCalculation ( inputPtr );
02256
02257
02258 REAL* outputPtr = m_outputs + m_nrOutputs - m_nrTargets - 1;
02259 for ( int i=0;i<m_nrTargets;i++ )
02260 output[i] = outputPtr[i] * m_scaleOutputs + m_offsetOutputs;
02261 }
02262
02271 REAL NNRBM::calcRMSE ( REAL* inputs, REAL* targets, int examples )
02272 {
02273 double rmse = 0.0;
02274 for ( int i=0;i<examples;i++ )
02275 {
02276 REAL* inputPtr = inputs + i * m_nrInputs;
02277 REAL* targetPtr = targets + i * m_nrTargets;
02278
02279 predictSingleInput ( inputPtr, m_outputsTmp );
02280
02281 for ( int j=0;j<m_nrTargets;j++ )
02282 rmse += ( m_outputsTmp[j] - targetPtr[j] ) * ( m_outputsTmp[j] - targetPtr[j] );
02283 }
02284 rmse = sqrt ( rmse/ ( double ) ( examples*m_nrTargets ) );
02285 return rmse;
02286 }
02287
02293 REAL NNRBM::getRMSETrain()
02294 {
02295 return calcRMSE ( m_inputsTrain, m_targetsTrain, m_nrExamplesTrain );
02296 }
02297
02303 REAL NNRBM::getRMSEProbe()
02304 {
02305 return calcRMSE ( m_inputsProbe, m_targetsProbe, m_nrExamplesProbe );
02306 }
02307
02313 REAL* NNRBM::getWeightPtr()
02314 {
02315 return m_weights;
02316 }
02317
02323 void NNRBM::setWeights ( REAL* w )
02324 {
02325 cout<<"Set new weights"<<endl;
02326 for ( int i=0;i<m_nrWeights;i++ )
02327 m_weights[i] = w[i];
02328 }
02329
02335 int NNRBM::getNrWeights()
02336 {
02337 return m_nrWeights;
02338 }