#include <nnrbm.h>
Public Member Functions | |
NNRBM () | |
~NNRBM () | |
void | setNrTargets (int n) |
void | setNrInputs (int n) |
void | setNrExamplesTrain (int n) |
void | setNrExamplesProbe (int n) |
void | setTrainInputs (REAL *inputs) |
void | setTrainTargets (REAL *targets) |
void | setProbeInputs (REAL *inputs) |
void | setProbeTargets (REAL *targets) |
void | setInitWeightFactor (REAL factor) |
void | setLearnrate (REAL learnrate) |
void | setLearnrateMinimum (REAL learnrateMin) |
void | setLearnrateSubtractionValueAfterEverySample (REAL learnrateDecreaseRate) |
void | setLearnrateSubtractionValueAfterEveryEpoch (REAL learnrateDecreaseRate) |
void | setMomentum (REAL momentum) |
void | setWeightDecay (REAL weightDecay) |
void | setBatchSize (int size) |
void | setMinUpdateErrorBound (REAL minUpdateBound) |
void | setMaxEpochs (int epochs) |
void | setRPROPPosNeg (REAL etaPos, REAL etaNeg) |
void | setRPROPMinMaxUpdate (REAL min, REAL max) |
void | setL1Regularization (bool en) |
void | initNNWeights (time_t seed) |
void | enableErrorFunctionMAE (bool en) |
void | setRBMLearnParams (REAL learnrateWeights, REAL learnrateBiasVis, REAL learnrateBiasHid, REAL weightDecay, int maxEpoch) |
void | setNNStructure (int nrLayer, int *neuronsPerLayer, bool lastLinearLayer=false, int *layerType=0) |
void | printLearnrate () |
void | setScaleOffset (REAL scale, REAL offset) |
void | setNormalTrainStopping (bool en) |
void | setGlobalEpochs (int e) |
void | enableRPROP (bool en) |
void | useBLASforTraining (bool enable) |
void | trainOneEpoch (bool *updateLayer=0) |
int | trainNN () |
REAL | getRMSETrain () |
REAL | getRMSEProbe () |
void | predictSingleInput (REAL *input, REAL *output) |
REAL * | getWeightPtr () |
void | setWeights (REAL *w) |
int | getNrWeights () |
int | getWeightIndex (int layer, int neuron, int weight) |
int | getBiasIndex (int layer, int neuron) |
int | getOutputIndex (int layer, int neuron) |
void | rbmPretraining (REAL *input, REAL *target, int nSamples, int nTarget, int firstNLayer=-1, int crossRun=-1, int nFinetuningEpochs=-1, double finetuningLearnRate=0.0) |
vector< int > | getEncoder () |
void | printMiddleLayerToFile (string fname, REAL *input, REAL *target, int nSamples, int nTarget) |
void | printAutoencoderWeightsToJavascript (string fname) |
Public Attributes | |
double | m_sumSquaredError |
double | m_sumSquaredErrorSamples |
NNRBM * | m_nnAuto |
Private Member Functions | |
void | saveWeights () |
REAL | calcRMSE (REAL *inputs, REAL *targets, int examples) |
void | forwardCalculation (REAL *input) |
void | forwardCalculationBLAS (REAL *input) |
void | backpropBLAS (REAL *input, REAL *target, bool *updateLayer=0) |
void | backprop (REAL *input, REAL *target, bool *updateLayer=0) |
REAL | getInitWeight (int fanIn) |
Private Attributes | |
int | m_nrTargets |
int | m_nrInputs |
int | m_nrExamplesTrain |
int | m_nrExamplesProbe |
REAL * | m_inputsTrain |
REAL * | m_inputsProbe |
REAL * | m_targetsTrain |
REAL * | m_targetsProbe |
REAL | m_initWeightFactor |
int | m_globalEpochs |
REAL | m_RPROP_etaPos |
REAL | m_RPROP_etaNeg |
REAL | m_RPROP_updateMin |
REAL | m_RPROP_updateMax |
REAL | m_learnRate |
REAL | m_learnRateMin |
REAL | m_learnrateDecreaseRate |
REAL | m_learnrateDecreaseRateEpoch |
REAL | m_momentum |
REAL | m_weightDecay |
REAL | m_minUpdateBound |
int | m_batchSize |
REAL | m_rbmLearnrateWeights |
REAL | m_rbmLearnrateBiasVis |
REAL | m_rbmLearnrateBiasHid |
REAL | m_rbmWeightDecay |
int | m_rbmMaxEpochs |
REAL | m_scaleOutputs |
REAL | m_offsetOutputs |
int | m_maxEpochs |
bool | m_useBLAS |
bool | m_enableRPROP |
bool | m_normalTrainStopping |
bool | m_enableL1Regularization |
bool | m_errorFunctionMAE |
int | m_nrLayer |
int * | m_neuronsPerLayer |
int | m_nrWeights |
int | m_nrOutputs |
int * | m_nrLayWeights |
int * | m_nrLayWeightOffsets |
REAL * | m_outputs |
REAL * | m_outputsTmp |
REAL * | m_derivates |
REAL * | m_d1 |
REAL * | m_weights |
REAL * | m_weightsTmp0 |
REAL * | m_weightsTmp1 |
REAL * | m_weightsTmp2 |
REAL * | m_weightsBatchUpdate |
REAL * | m_weightsOld |
REAL * | m_weightsOldOld |
REAL * | m_deltaW |
REAL * | m_deltaWOld |
REAL * | m_adaptiveRPROPlRate |
int * | m_activationFunctionPerLayer |
bool | m_enableAutoencoder |
The target of this class is to give a basic and fast class for training and prediction It supports basic training functionality
Data (features + targets) memeory allocation and normalization must be done outside This class gets the pointer to the training and probe (=validation) set
Forward and backward calculation can be done in loops or in Vector-Matrix operations (BLAS) For large nets the BLAS calculation should be used (~2x faster)
Standard training is performed with global learnrate and stochastic gradient descent A batch training is also possible if the batchSize > 1
This class implements also the RPROP learning algorithm Rprop - Description and Implementation Details Martin Riedmiller, 1994. Technical report.
Definition at line 39 of file nnrbm.h.
NNRBM::NNRBM | ( | ) |
Constructor
Definition at line 8 of file nnrbm.cpp.
00009 { 00010 // init member vars 00011 m_nrTargets = 0; 00012 m_nrInputs = 0; 00013 m_nrExamplesTrain = 0; 00014 m_nrExamplesProbe = 0; 00015 m_inputsTrain = 0; 00016 m_inputsProbe = 0; 00017 m_targetsTrain = 0; 00018 m_targetsProbe = 0; 00019 m_initWeightFactor = 0; 00020 m_globalEpochs = 0; 00021 m_RPROP_etaPos = 0; 00022 m_RPROP_etaNeg = 0; 00023 m_RPROP_updateMin = 0; 00024 m_RPROP_updateMax = 0; 00025 m_learnRate = 0; 00026 m_learnRateMin = 0; 00027 m_learnrateDecreaseRate = 0; 00028 m_learnrateDecreaseRateEpoch = 0; 00029 m_momentum = 0; 00030 m_weightDecay = 0; 00031 m_minUpdateBound = 0; 00032 m_batchSize = 0; 00033 m_scaleOutputs = 0; 00034 m_offsetOutputs = 0; 00035 m_maxEpochs = 0; 00036 m_useBLAS = 0; 00037 m_enableRPROP = 0; 00038 m_normalTrainStopping = 0; 00039 m_nrLayer = 0; 00040 m_neuronsPerLayer = 0; 00041 m_nrWeights = 0; 00042 m_nrOutputs = 0; 00043 m_nrLayWeights = 0; 00044 m_outputs = 0; 00045 m_outputsTmp = 0; 00046 m_derivates = 0; 00047 m_d1 = 0; 00048 m_weights = 0; 00049 m_weightsTmp0 = 0; 00050 m_weightsTmp1 = 0; 00051 m_weightsTmp2 = 0; 00052 m_weightsBatchUpdate = 0; 00053 m_weightsOld = 0; 00054 m_weightsOldOld = 0; 00055 m_deltaW = 0; 00056 m_deltaWOld = 0; 00057 m_adaptiveRPROPlRate = 0; 00058 m_enableL1Regularization = 0; 00059 m_errorFunctionMAE = 0; 00060 m_activationFunctionPerLayer = 0; 00061 m_enableAutoencoder = 0; 00062 m_nrLayWeightOffsets = 0; 00063 m_sumSquaredError = 0.0; 00064 m_sumSquaredErrorSamples = 0.0; 00065 m_rbmLearnrateWeights = 0.0; 00066 m_rbmLearnrateBiasVis = 0.0; 00067 m_rbmLearnrateBiasHid = 0.0; 00068 m_rbmWeightDecay = 0.0; 00069 m_rbmMaxEpochs = 0; 00070 }
NNRBM::~NNRBM | ( | ) |
Destructor
Definition at line 75 of file nnrbm.cpp.
00076 { 00077 if ( m_neuronsPerLayer ) 00078 delete[] m_neuronsPerLayer; 00079 m_neuronsPerLayer = 0; 00080 if ( m_nrLayWeights ) 00081 delete[] m_nrLayWeights; 00082 m_nrLayWeights = 0; 00083 if ( m_outputs ) 00084 delete[] m_outputs; 00085 m_outputs = 0; 00086 if ( m_outputsTmp ) 00087 delete[] m_outputsTmp; 00088 m_outputsTmp = 0; 00089 if ( m_derivates ) 00090 delete[] m_derivates; 00091 m_derivates = 0; 00092 if ( m_d1 ) 00093 delete[] m_d1; 00094 m_d1 = 0; 00095 if ( m_weights ) 00096 delete[] m_weights; 00097 m_weights = 0; 00098 if ( m_weightsTmp0 ) 00099 delete[] m_weightsTmp0; 00100 m_weightsTmp0 = 0; 00101 if ( m_weightsTmp1 ) 00102 delete[] m_weightsTmp1; 00103 m_weightsTmp1 = 0; 00104 if ( m_weightsTmp2 ) 00105 delete[] m_weightsTmp2; 00106 m_weightsTmp2 = 0; 00107 if ( m_weightsBatchUpdate ) 00108 delete[] m_weightsBatchUpdate; 00109 m_weightsBatchUpdate = 0; 00110 if ( m_weightsOld ) 00111 delete[] m_weightsOld; 00112 m_weightsOld = 0; 00113 if ( m_weightsOldOld ) 00114 delete[] m_weightsOldOld; 00115 m_weightsOldOld = 0; 00116 if ( m_deltaW ) 00117 delete[] m_deltaW; 00118 m_deltaW = 0; 00119 if ( m_deltaWOld ) 00120 delete[] m_deltaWOld; 00121 m_deltaWOld = 0; 00122 if ( m_adaptiveRPROPlRate ) 00123 delete[] m_adaptiveRPROPlRate; 00124 m_adaptiveRPROPlRate = 0; 00125 if ( m_activationFunctionPerLayer ) 00126 delete[] m_activationFunctionPerLayer; 00127 m_activationFunctionPerLayer = 0; 00128 if ( m_nrLayWeightOffsets ) 00129 delete[] m_nrLayWeightOffsets; 00130 m_nrLayWeightOffsets = 0; 00131 }
void NNRBM::backprop | ( | REAL * | input, | |
REAL * | target, | |||
bool * | updateLayer = 0 | |||
) | [private] |
Calculate the weight update in the whole net with standard formulas (speed optimized) According to the backprop rule Weight updates are stored in m_deltaW
input | Input vector | |
target | Target values (vector) |
Definition at line 1629 of file nnrbm.cpp.
01630 { 01631 REAL sum0, d1; 01632 01633 int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1; // -1 for bias and output neuron 01634 int n0 = m_neuronsPerLayer[m_nrLayer-1]; 01635 int outputOffsetPrev = outputOffset - n0 - 1; 01636 int outputOffsetNext = outputOffset; 01637 01638 int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer]; 01639 int weightOffsetNext, nP1; 01640 01641 REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error; 01642 01643 // ================== the output neuron: d(j)=(b-o(j))*Aj' ================== 01644 for ( int i=0;i<m_nrTargets;i++ ) 01645 { 01646 double out = m_outputs[outputOffset+i]; 01647 01648 REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i]; 01649 m_sumSquaredError += errorTrain * errorTrain; 01650 m_sumSquaredErrorSamples += 1.0; 01651 01652 targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs; 01653 error = out - targetConverted; 01654 01655 if ( m_errorFunctionMAE ) 01656 error = error > 0.0? 1.0 : -1.0; 01657 d1 = error * m_derivates[outputOffset+i]; 01658 m_d1[outputOffset+i] = d1; 01659 01660 if ( updateLayer ) 01661 if ( updateLayer[m_nrLayer] == false ) 01662 continue; 01663 01664 deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 ); 01665 if ( m_nrLayer==1 ) 01666 { 01667 outputsPtr = input - 1; 01668 deltaWPtr[0] = d1; 01669 for ( int j=1;j<n0+1;j++ ) 01670 deltaWPtr[j] = d1 * outputsPtr[j]; 01671 } 01672 else 01673 { 01674 outputsPtr = m_outputs + outputOffsetPrev; 01675 for ( int j=0;j<n0+1;j++ ) 01676 deltaWPtr[j] = d1 * outputsPtr[j]; 01677 } 01678 01679 } 01680 01681 // ================== all other neurons in the net ================== 01682 outputOffsetNext = outputOffset; // next to current 01683 outputOffset = outputOffsetPrev; // current to prev 01684 n0 = m_neuronsPerLayer[m_nrLayer-2]; 01685 outputOffsetPrev -= n0 + 1; // prev newnrInputs_ 01686 weightOffset -= m_nrLayWeights[m_nrLayer-1]; // offset to weight pointer 01687 weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer]; 01688 01689 for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input 01690 { 01691 int n = m_neuronsPerLayer[i]; 01692 int nNext = m_neuronsPerLayer[i+1]; 01693 int nPrev = m_neuronsPerLayer[i-1]; 01694 nP1 = n+1; 01695 01696 d1Ptr0 = m_d1 + outputOffsetNext; 01697 derivatesPtr = m_derivates + outputOffset; 01698 weightsPtr = m_weights + weightOffsetNext; 01699 d1Ptr = m_d1 + outputOffset; 01700 deltaWPtr = m_deltaW + weightOffset; 01701 if ( i==1 ) 01702 outputsPtr = input - 1; 01703 else 01704 outputsPtr = m_outputs + outputOffsetPrev; 01705 01706 bool updateWeights = true; 01707 if ( updateLayer ) 01708 if ( updateLayer[i] == false ) 01709 updateWeights = false; 01710 01711 for ( int j=0;j<n;j++ ) // every neuron in the layer 01712 { 01713 // calc d1 01714 sum0 = 0.0; 01715 for ( int k=0;k<nNext;k++ ) // all neurons in the next layer: d(j)=Aj'*Sum(k,d(k)*w(k,j)) 01716 sum0 += d1Ptr0[k] * weightsPtr[k*nP1]; 01717 sum0 *= *derivatesPtr; 01718 d1Ptr[j] = sum0; 01719 01720 if ( updateWeights ) 01721 { 01722 // weight updates 01723 if ( i==1 ) 01724 { 01725 deltaWPtr[0] = sum0; 01726 for ( int k=1;k<nPrev+1;k++ ) 01727 deltaWPtr[k] = sum0 * outputsPtr[k]; 01728 } 01729 else 01730 { 01731 for ( int k=0;k<nPrev+1;k++ ) 01732 deltaWPtr[k] = sum0 * outputsPtr[k]; 01733 } 01734 } 01735 01736 deltaWPtr += nPrev+1; 01737 weightsPtr++; 01738 derivatesPtr++; 01739 } 01740 01741 outputOffsetNext = outputOffset; // next to current 01742 outputOffset = outputOffsetPrev; // current to prev 01743 n0 = m_neuronsPerLayer[i-2]; 01744 outputOffsetPrev -= n0 + 1; // prev new 01745 weightOffset -= m_nrLayWeights[i-1]; // offset to weight pointer 01746 weightOffsetNext -= m_nrLayWeights[i]; 01747 } 01748 01749 }
void NNRBM::backpropBLAS | ( | REAL * | input, | |
REAL * | target, | |||
bool * | updateLayer = 0 | |||
) | [private] |
Calculate the weight update in the whole net with BLAS (MKL) According to the backprop rule Weight updates are stored in m_deltaW
input | Input vector | |
target | Target values (vector) |
Definition at line 1497 of file nnrbm.cpp.
01498 { 01499 REAL sum0, d1; 01500 01501 int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1; // -1 for bias and output neuron 01502 int n0 = m_neuronsPerLayer[m_nrLayer-1]; 01503 int outputOffsetPrev = outputOffset - n0 - 1; 01504 int outputOffsetNext = outputOffset; 01505 01506 int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer]; 01507 int weightOffsetNext, nP1; 01508 01509 REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error; 01510 01511 // ================== the output neuron: d(j)=(b-o(j))*Aj' ================== 01512 for ( int i=0;i<m_nrTargets;i++ ) 01513 { 01514 REAL out = m_outputs[outputOffset+i]; 01515 01516 REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i]; 01517 m_sumSquaredError += errorTrain * errorTrain; 01518 m_sumSquaredErrorSamples += 1.0; 01519 01520 targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs; 01521 01522 // cross-entropy error: E = - target * log(out/target) 01523 // dE/dout = - target * (1/out) * out' 01524 //if(out < 1e-6) 01525 // out = 1e-6; 01526 //error = - targetConverted / out; 01527 01528 /*REAL eps = 1e-6; 01529 out = (out < eps)? eps : out; 01530 out = (out > 1.0-eps)? 1.0-eps : out; 01531 error = (out-targetConverted)/(out*(1.0-out));*/ 01532 01533 error = out - targetConverted; 01534 01535 if ( m_errorFunctionMAE ) 01536 error = error > 0.0? 1.0 : -1.0; 01537 01538 d1 = error * m_derivates[outputOffset+i]; 01539 m_d1[outputOffset+i] = d1; 01540 01541 if ( updateLayer ) 01542 if ( updateLayer[m_nrLayer] == false ) 01543 continue; 01544 01545 deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 ); 01546 if ( m_nrLayer==1 ) 01547 { 01548 outputsPtr = input - 1; 01549 deltaWPtr[0] = d1; 01550 for ( int j=1;j<n0+1;j++ ) 01551 deltaWPtr[j] = d1 * outputsPtr[j]; 01552 } 01553 else 01554 { 01555 outputsPtr = m_outputs + outputOffsetPrev; 01556 for ( int j=0;j<n0+1;j++ ) 01557 deltaWPtr[j] = d1 * outputsPtr[j]; 01558 } 01559 } 01560 01561 // ================== all other neurons in the net ================== 01562 outputOffsetNext = outputOffset; // next to current 01563 outputOffset = outputOffsetPrev; // current to prev 01564 n0 = m_neuronsPerLayer[m_nrLayer-2]; 01565 outputOffsetPrev -= n0 + 1; // prev newnrInputs_ 01566 weightOffset -= m_nrLayWeights[m_nrLayer-1]; // offset to weight pointer 01567 weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer]; 01568 01569 for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input 01570 { 01571 int n = m_neuronsPerLayer[i]; 01572 int nNext = m_neuronsPerLayer[i+1]; 01573 int nPrev = m_neuronsPerLayer[i-1]; 01574 nP1 = n+1; 01575 01576 d1Ptr0 = m_d1 + outputOffsetNext; 01577 derivatesPtr = m_derivates + outputOffset; 01578 weightsPtr = m_weights + weightOffsetNext; 01579 d1Ptr = m_d1 + outputOffset; 01580 deltaWPtr = m_deltaW + weightOffset; 01581 if ( i==1 ) 01582 outputsPtr = input; 01583 else 01584 outputsPtr = m_outputs + outputOffsetPrev; 01585 01586 // d(j) = SUM(d(k)*w(k,j)) 01587 CBLAS_GEMV ( CblasRowMajor, CblasTrans, nNext, n, 1.0, weightsPtr, nP1, d1Ptr0, 1, 0.0, d1Ptr, 1 ); // d1(j) =W_T*d1(k) 01588 V_MUL ( n, d1Ptr, derivatesPtr, d1Ptr ); 01589 01590 bool updateWeights = true; 01591 if ( updateLayer ) 01592 if ( updateLayer[i] == false ) 01593 updateWeights = false; 01594 01595 // every neuron in the layer calc weight update 01596 for ( int j=0;j<n;j++ ) 01597 { 01598 if ( updateWeights ) 01599 { 01600 if ( i==1 ) 01601 { 01602 V_COPY ( outputsPtr, deltaWPtr+1, nPrev ); 01603 deltaWPtr[0] = 1.0; 01604 } 01605 else 01606 V_COPY ( outputsPtr, deltaWPtr, nPrev+1 ); 01607 V_MULC ( deltaWPtr, d1Ptr[j], deltaWPtr, nPrev+1 ); 01608 } 01609 deltaWPtr += nPrev+1; 01610 } 01611 01612 outputOffsetNext = outputOffset; // next to current 01613 outputOffset = outputOffsetPrev; // current to prev 01614 n0 = m_neuronsPerLayer[i-2]; 01615 outputOffsetPrev -= n0 + 1; // prev new 01616 weightOffset -= m_nrLayWeights[i-1]; // offset to weight pointer 01617 weightOffsetNext -= m_nrLayWeights[i]; 01618 } 01619 }
REAL NNRBM::calcRMSE | ( | REAL * | inputs, | |
REAL * | targets, | |||
int | examples | |||
) | [private] |
Calculate the rmse over a given input/target set with the current neuronal net weight set
inputs | Input vectors (row wise) | |
targets | Target vectors (row wise) | |
examples | Number of examples |
Definition at line 2271 of file nnrbm.cpp.
02272 { 02273 double rmse = 0.0; 02274 for ( int i=0;i<examples;i++ ) 02275 { 02276 REAL* inputPtr = inputs + i * m_nrInputs; 02277 REAL* targetPtr = targets + i * m_nrTargets; 02278 02279 predictSingleInput ( inputPtr, m_outputsTmp ); 02280 02281 for ( int j=0;j<m_nrTargets;j++ ) 02282 rmse += ( m_outputsTmp[j] - targetPtr[j] ) * ( m_outputsTmp[j] - targetPtr[j] ); 02283 } 02284 rmse = sqrt ( rmse/ ( double ) ( examples*m_nrTargets ) ); 02285 return rmse; 02286 }
void NNRBM::enableErrorFunctionMAE | ( | bool | en | ) |
void NNRBM::enableRPROP | ( | bool | en | ) |
void NNRBM::forwardCalculation | ( | REAL * | input | ) | [private] |
Forward calculation through the NNRBM (with loops) Outputs are stored in m_outputs 1st derivates are stored in m_derivates
input | Input vector |
Definition at line 1860 of file nnrbm.cpp.
01861 { 01862 int outputOffset = m_neuronsPerLayer[0] + 1, outputOffsetPrev = 0; 01863 REAL tmp0, tmp1, sum0; 01864 REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights; 01865 for ( int i=0;i<m_nrLayer;i++ ) // to all layer 01866 { 01867 int n = m_neuronsPerLayer[i+1]; 01868 int nprev = m_neuronsPerLayer[i] + 1; 01869 int loopOffset = i==0? 1 : 0; 01870 int activationType = m_activationFunctionPerLayer[i+1]; 01871 ptr0 = m_outputs + outputOffset; 01872 ptr1 = m_derivates + outputOffset; 01873 if ( i==0 ) 01874 outputPtr = input - loopOffset; 01875 else 01876 outputPtr = m_outputs + outputOffsetPrev; 01877 01878 for ( int j=0;j<n;j++ ) // all neurons in this layer 01879 { 01880 sum0 = i==0? weightPtr[0] : 0.0; // dot product sum, for inputlayer: init with bias 01881 for ( int k=loopOffset;k<nprev;k++ ) // calc dot product 01882 sum0 += weightPtr[k] * outputPtr[k]; 01883 weightPtr += nprev; 01884 if ( activationType == 0 ) // sigmoid: f(x)=1/(1+exp(-x)) 01885 { 01886 tmp0 = exp ( -sum0 ); 01887 ptr1[j] = tmp0/ ( ( 1.0 + tmp0 ) * ( 1.0 + tmp0 ) ); // derivation 01888 ptr0[j] = 1.0 / ( 1.0 + tmp0 ); // activation function 01889 } 01890 else if ( activationType == 1 ) // linear: f(x)=x 01891 { 01892 ptr0[j] = sum0; 01893 ptr1[j] = 1.0; 01894 } 01895 else if ( activationType == 2 ) // tanh: f(x)=tanh(x) 01896 { 01897 tmp0 = tanh ( sum0 ); 01898 ptr0[j] = tmp0; 01899 ptr1[j] = ( 1.0 - tmp0*tmp0 ); 01900 } 01901 else 01902 assert ( false ); 01903 } 01904 outputOffset += n+1; // this points to first neuron in current layer 01905 outputOffsetPrev += nprev; // this points to first neuron in previous layer 01906 } 01907 }
void NNRBM::forwardCalculationBLAS | ( | REAL * | input | ) | [private] |
Forward calculation through the NNRBM with BLAS and VML (MKL) Outputs are stored in m_outputs 1st derivates are stored in m_derivates
input | Input vector |
Definition at line 1758 of file nnrbm.cpp.
01759 { 01760 int outputOffset = m_neuronsPerLayer[0]+1, outputOffsetPrev = 0; 01761 REAL tmp0, tmp1, sum0; 01762 REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights; 01763 01764 for ( int i=0;i<m_nrLayer;i++ ) // to all layer 01765 { 01766 int n = m_neuronsPerLayer[i+1]; 01767 int nprev = m_neuronsPerLayer[i] + 1; 01768 int inputOffset = 0; 01769 int activationType = m_activationFunctionPerLayer[i+1]; 01770 ptr0 = m_outputs + outputOffset; 01771 ptr1 = m_derivates + outputOffset; 01772 if ( i==0 ) 01773 { 01774 outputPtr = input; 01775 inputOffset = 1; 01776 } 01777 else 01778 outputPtr = m_outputs + outputOffsetPrev; 01779 01780 // WeightMatrix*InputVec = Outputs 01781 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, n, nprev - inputOffset, 1.0, weightPtr + inputOffset, nprev, outputPtr, 1, 0.0, ptr0, 1 ); 01782 if ( inputOffset ) 01783 { 01784 for ( int j=0;j<n;j++ ) 01785 ptr0[j] += weightPtr[j * nprev]; 01786 } 01787 01788 // activation fkt 01789 if ( activationType == 0 ) // sigmoid: f(x)=1/(1+exp(-x)) 01790 { 01791 for ( int j=0;j<n;j++ ) 01792 ptr0[j] = -ptr0[j]; 01793 V_EXP ( ptr0, n ); 01794 for ( int j=0;j<n;j++ ) 01795 { 01796 ptr0[j] = 1.0 / ( 1.0 + ptr0[j] ); // sigmoid 01797 ptr1[j] = ptr0[j] * ( 1.0 - ptr0[j] ); // derivative 01798 } 01799 } 01800 else if ( activationType == 1 ) // linear: f(x)=x, df(x)/dx=1 01801 { 01802 for ( int j=0;j<n;j++ ) 01803 ptr1[j] = 1.0; 01804 } 01805 else if ( activationType == 2 ) // tanh: f(x)=tanh(x) 01806 { 01807 V_TANH ( n, ptr0, ptr0 ); // m_outputs = tanh(m_outputs) 01808 V_SQR ( n, ptr0, ptr1 ); // ptr1[j] = ptr0[j]*ptr0[j] 01809 for ( int j=0;j<n;j++ ) 01810 ptr1[j] = 1.0 - ptr1[j]; 01811 } 01812 else if ( activationType == 3 ) // softmax: f(x)=exp(x)/sum(exp(xi)) (where xi are all outputs on this layer) 01813 { 01814 V_EXP ( ptr0, n ); 01815 01816 REAL sumOut = 0.0, invSumOut; 01817 for ( int j=0;j<n;j++ ) 01818 sumOut += ptr0[j]; 01819 01820 invSumOut = 1.0 / sumOut; 01821 01822 for ( int j=0;j<n;j++ ) 01823 ptr1[j] = ptr0[j] * ( sumOut - ptr0[j] ) * invSumOut * invSumOut; // derivative 01824 01825 for ( int j=0;j<n;j++ ) 01826 ptr0[j] *= invSumOut; // output 01827 01828 /* 01829 REAL sumOut = 0.0, invSumOut; 01830 for(int j=0;j<n;j++) 01831 sumOut += exp(ptr0[j]); 01832 invSumOut = 1.0 / sumOut; 01833 invSumOut = invSumOut * invSumOut; 01834 for(int j=0;j<n;j++) 01835 ptr1[j] = invSumOut * ( sumOut * exp(ptr0[j]) - exp(ptr0[j])*exp(ptr0[j]) ); // derivative 01836 // normalize output: outputs are probabilites 01837 for(int j=0;j<n;j++) 01838 ptr0[j] = exp(ptr0[j]) / sumOut; 01839 */ 01840 } 01841 else 01842 assert ( false ); 01843 01844 // update index 01845 weightPtr += n*nprev; 01846 outputOffset += n+1; // this points to first neuron in current layer 01847 outputOffsetPrev += nprev; // this points to first neuron in previous layer 01848 01849 } 01850 01851 }
int NNRBM::getBiasIndex | ( | int | layer, | |
int | neuron | |||
) |
Get the index to the bias weight:
layer | Weight on layer | |
neuron | Neuron number |
Definition at line 674 of file nnrbm.cpp.
00675 { 00676 if ( layer == 0 ) 00677 assert ( false ); 00678 00679 int nrNeur = m_neuronsPerLayer[layer]; 00680 int nrNeurPrev = m_neuronsPerLayer[layer-1]; 00681 if ( neuron >= nrNeur ) 00682 { 00683 cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl; 00684 assert ( false ); 00685 } 00686 int ind = m_nrLayWeightOffsets[layer]; 00687 if ( layer == 1 ) // input layer 00688 ind += neuron* ( nrNeurPrev + 1 ); 00689 else 00690 ind += nrNeurPrev + neuron* ( nrNeurPrev + 1 ); 00691 00692 if ( ind >= m_nrWeights ) 00693 { 00694 cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl; 00695 assert ( false ); 00696 } 00697 00698 return ind; 00699 }
vector< int > NNRBM::getEncoder | ( | ) |
return the number of neurons per layer of the configuration
Definition at line 729 of file nnrbm.cpp.
00730 { 00731 vector<int> v; 00732 cout<<"Encoder:"<<endl; 00733 int midLayer = m_nrLayer/2 + 1; 00734 int weightCnt = 0; 00735 for ( int i=0;i<midLayer;i++ ) 00736 { 00737 v.push_back ( m_neuronsPerLayer[i] ); 00738 weightCnt += m_nrLayWeights[i]; 00739 cout<<"neurons|weights: "<<m_neuronsPerLayer[i]<<"|"<<m_nrLayWeights[i]<<endl; 00740 } 00741 v.push_back ( weightCnt ); 00742 return v; 00743 }
REAL NNRBM::getInitWeight | ( | int | fanIn | ) | [private] |
Returen a random (uniform) weight init for a given number of input connections for this neuron 1/sqrt(fanIn) - rule (from Yann LeCun)
fanIn | The number of input connections for this neuron |
Definition at line 571 of file nnrbm.cpp.
00572 { 00573 double nr = 2.0* ( rand() / ( double ) RAND_MAX-0.5 ); // -1 .. +1 00574 return ( 1.0/sqrt ( ( double ) fanIn ) ) * nr; 00575 }
int NNRBM::getNrWeights | ( | ) |
int NNRBM::getOutputIndex | ( | int | layer, | |
int | neuron | |||
) |
Get the index of the output
layer | Output on layer | |
neuron | Neuron number |
Definition at line 709 of file nnrbm.cpp.
00710 { 00711 if ( layer == 0 || layer > m_nrLayer ) 00712 assert ( false ); 00713 00714 if ( neuron >= m_neuronsPerLayer[layer] ) 00715 assert ( false ); 00716 00717 int ind = 0; 00718 for ( int i=0;i<layer;i++ ) 00719 ind += m_neuronsPerLayer[i] + 1; 00720 00721 return ind + neuron; 00722 }
REAL NNRBM::getRMSEProbe | ( | ) |
REAL NNRBM::getRMSETrain | ( | ) |
int NNRBM::getWeightIndex | ( | int | layer, | |
int | neuron, | |||
int | weight | |||
) |
Get the index to the weights:
layer | Weight on layer | |
neuron | Neuron number | |
weight | Weight number |
Definition at line 633 of file nnrbm.cpp.
00634 { 00635 if ( layer == 0 ) 00636 assert ( false ); 00637 00638 int nrNeur = m_neuronsPerLayer[layer]; 00639 int nrNeurPrev = m_neuronsPerLayer[layer-1]; 00640 if ( neuron >= nrNeur ) 00641 { 00642 cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl; 00643 assert ( false ); 00644 } 00645 if ( weight >= nrNeurPrev ) 00646 { 00647 cout<<"weight:"<<weight<<" nrNeurPrev:"<<nrNeurPrev<<endl; 00648 assert ( false ); 00649 } 00650 00651 int ind = m_nrLayWeightOffsets[layer]; 00652 if ( layer == 1 ) // input layer 00653 ind += 1 + weight + neuron* ( nrNeurPrev + 1 ); 00654 else 00655 ind += weight + neuron* ( nrNeurPrev + 1 ); 00656 00657 if ( ind >= m_nrWeights ) 00658 { 00659 cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl; 00660 assert ( false ); 00661 } 00662 00663 return ind; 00664 }
REAL * NNRBM::getWeightPtr | ( | ) |
void NNRBM::initNNWeights | ( | time_t | seed | ) |
Init the whole weights in the net
seed | The random seed (same seed for exact same weight initalization) |
Definition at line 582 of file nnrbm.cpp.
00583 { 00584 srand ( seed ); 00585 cout<<"init weights "<<endl; 00586 REAL factor = m_initWeightFactor; 00587 int cnt = 0; 00588 for ( int i=0;i<m_nrLayer;i++ ) // through all layers 00589 { 00590 int n = m_neuronsPerLayer[i+1]; 00591 int nprev = m_neuronsPerLayer[i] + 1; // +1 for bias 00592 for ( int j=0;j<n;j++ ) // all neurons per layer 00593 { 00594 for ( int k=0;k<nprev;k++ ) // all weights from this neuron 00595 { 00596 m_weights[cnt] = m_weightsOld[i] = m_weightsOldOld[i] = getInitWeight ( nprev ) * factor; 00597 cnt++; 00598 } 00599 } 00600 } 00601 00602 // check the number 00603 if ( cnt != m_nrWeights ) 00604 assert ( false ); 00605 }
void NNRBM::predictSingleInput | ( | REAL * | input, | |
REAL * | output | |||
) |
Predict the output based on a input vector with the Neural Net The actual m_weights are used to perform forward calculation through the net
input | Input vector (pointer) | |
output | Output vector (pointer) |
Definition at line 2247 of file nnrbm.cpp.
02248 { 02249 REAL* inputPtr = input; 02250 02251 // forward 02252 if ( m_useBLAS ) 02253 forwardCalculationBLAS ( inputPtr ); 02254 else 02255 forwardCalculation ( inputPtr ); 02256 02257 // output correction 02258 REAL* outputPtr = m_outputs + m_nrOutputs - m_nrTargets - 1; 02259 for ( int i=0;i<m_nrTargets;i++ ) 02260 output[i] = outputPtr[i] * m_scaleOutputs + m_offsetOutputs; 02261 }
void NNRBM::printLearnrate | ( | ) |
void NNRBM::printMiddleLayerToFile | ( | string | fname, | |
REAL * | input, | |||
REAL * | target, | |||
int | nSamples, | |||
int | nTarget | |||
) |
Dummy function for printing the predictions from the middle (code) layer
fname | Filename | |
input | input matrix | |
target | target matrix | |
nSamples | nr of samples (rows) | |
nTarget | nr of targets (cols of target) |
Definition at line 1465 of file nnrbm.cpp.
01466 { 01467 fstream f0 ( fname.c_str(),ios::out ); 01468 REAL* p = new REAL[m_nrInputs]; 01469 for ( int i=0;i<nSamples;i++ ) 01470 { 01471 m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p ); 01472 01473 for ( int j=0;j<nTarget;j++ ) 01474 f0<<target[j+i*nTarget]<<" "; 01475 01476 int midLayer = m_nrLayer - 1; 01477 for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ ) 01478 f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" "; 01479 01480 //for ( int j=0;j<m_nrInputs;j++ ) 01481 // f0<<p[j]<<" "; 01482 01483 f0<<endl; 01484 } 01485 delete[] p; 01486 f0.close(); 01487 }
void NNRBM::rbmPretraining | ( | REAL * | input, | |
REAL * | target, | |||
int | nSamples, | |||
int | nTarget, | |||
int | firstNLayer = -1 , |
|||
int | crossRun = -1 , |
|||
int | nFinetuningEpochs = -1 , |
|||
double | finetuningLearnRate = 0.0 | |||
) |
Greedy layer-wise pretraining See G.Hinton http://www.cs.toronto.edu/~hinton/MatlabForSciencePaper.html This is directly translated from matlab code
input | input matrix (row wise) | |
target | target matrix | |
nSamples | number of samples | |
nTarget | number of targets | |
firstNLayer | where the autoendcoder ends | |
crossRun | which cross run do we have now | |
nFinetuningEpochs | gradient descent based finetuning number of epochs | |
finetuningLearnRate | learnrate in the finetuning phase |
Definition at line 759 of file nnrbm.cpp.
00760 { 00761 cout<<endl<<endl<<"=== set structure for fine tuning of the unrolled autoencoder ==="<<endl; 00762 m_nnAuto = new NNRBM(); 00763 m_nnAuto->setInitWeightFactor ( 0.0 ); 00764 m_nnAuto->setScaleOffset ( m_scaleOutputs, m_offsetOutputs ); 00765 m_nnAuto->setNrTargets ( m_nrInputs ); 00766 m_nnAuto->setNrInputs ( m_nrInputs ); 00767 m_nnAuto->setNrExamplesTrain ( nSamples ); 00768 m_nnAuto->setTrainInputs ( input ); 00769 m_nnAuto->setTrainTargets ( input ); 00770 00771 // learn parameters 00772 m_nnAuto->setInitWeightFactor ( m_initWeightFactor ); 00773 if ( finetuningLearnRate != 0.0 ) 00774 m_nnAuto->setLearnrate ( finetuningLearnRate ); 00775 else 00776 m_nnAuto->setLearnrate ( m_learnRate ); 00777 m_nnAuto->setLearnrateMinimum ( m_learnRateMin ); 00778 m_nnAuto->setLearnrateSubtractionValueAfterEverySample ( m_learnrateDecreaseRate ); 00779 m_nnAuto->setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateDecreaseRateEpoch ); 00780 m_nnAuto->setMomentum ( m_momentum ); 00781 m_nnAuto->setWeightDecay ( 0.0 ); 00782 m_nnAuto->setMinUpdateErrorBound ( m_minUpdateBound ); 00783 m_nnAuto->setBatchSize ( m_batchSize ); 00784 m_nnAuto->setMaxEpochs ( m_maxEpochs ); 00785 m_nnAuto->setL1Regularization ( m_enableL1Regularization ); 00786 m_nnAuto->enableErrorFunctionMAE ( m_errorFunctionMAE ); 00787 m_nnAuto->setNormalTrainStopping ( true ); 00788 m_nnAuto->useBLASforTraining ( m_useBLAS ); 00789 00790 00791 int nLayer = m_nrLayer; 00792 if ( firstNLayer != -1 ) 00793 nLayer = firstNLayer; 00794 00795 int* layerConfig = new int[2* ( nLayer-1 ) ]; 00796 for ( int i=0;i<2* ( nLayer-1 ) - 1;i++ ) 00797 { 00798 if ( i < nLayer-1 ) 00799 layerConfig[i] = m_neuronsPerLayer[i+1]; 00800 else 00801 layerConfig[i] = m_neuronsPerLayer[2* ( nLayer-1 ) - 1 - i]; 00802 } 00803 m_nnAuto->m_enableAutoencoder = true; 00804 m_nnAuto->setNNStructure ( 2* ( nLayer-1 ), layerConfig ); 00805 m_nnAuto->initNNWeights ( 0 ); 00806 00807 00808 cout<<endl<<"=== RBM pretraining ("<<nSamples<<" samples) ==="<<endl; 00809 int weightOffset = 0; 00810 REAL* batchdata = new REAL[nSamples*m_neuronsPerLayer[0]]; 00811 for ( int j=0;j<nSamples*m_neuronsPerLayer[0];j++ ) 00812 batchdata[j] = input[j]; 00813 00814 // all layers from output to input 00815 for ( int i=0;i<nLayer-1;i++ ) 00816 { 00817 int nVis = m_neuronsPerLayer[i]; 00818 int nHid = m_neuronsPerLayer[i+1]; 00819 cout<<endl<<"layer:"<<i<<" nVis:"<<nVis<<" nHid:"<<nHid<<endl; 00820 00821 // probabilities, biases and other 00822 REAL* batchdataNext = new REAL[nSamples*nHid]; 00823 REAL* vishid = new REAL[nVis*nHid]; 00824 REAL* visbias = new REAL[nVis]; 00825 REAL* hidbias = new REAL[nHid]; 00826 REAL* poshidprobs = new REAL[nHid]; 00827 REAL* neghidprobs = new REAL[nHid]; 00828 REAL* hidbiasinc = new REAL[nHid]; 00829 REAL* visbiasinc = new REAL[nVis]; 00830 REAL* poshidstates = new REAL[nHid]; 00831 REAL* negdata = new REAL[nVis]; 00832 REAL* neghidact = new REAL[nHid]; 00833 REAL* negvisact = new REAL[nVis]; 00834 REAL* poshidact = new REAL[nHid]; 00835 REAL* posvisact = new REAL[nVis]; 00836 REAL* probs = new REAL[nHid]; 00837 REAL constant = 0.0; 00838 //REAL constant = -1.0; 00839 for ( int j=0;j<nVis;j++ ) 00840 { 00841 visbias[j] = constant; 00842 visbiasinc[j] = constant; 00843 negdata[j] = constant; 00844 } 00845 for ( int j=0;j<nHid;j++ ) 00846 { 00847 hidbias[j] = constant; 00848 hidbiasinc[j] = constant; 00849 poshidprobs[j] = constant; 00850 neghidprobs[j] = constant; 00851 poshidstates[j] = constant; 00852 } 00853 for ( int j=0;j<nVis*nHid;j++ ) 00854 { 00855 vishid[j] = constant; 00856 } 00857 00858 // init weights 00859 unsigned int seed = rand(); 00860 cout<<"seed:"<<seed<<endl; 00861 //GAUSS_DISTRIBUTION(vishid, nVis*nHid, 0.0, 1.0/(double)sqrt((double)nVis), &seed); 00862 GAUSS_DISTRIBUTION ( vishid, nVis*nHid, 0.0, 0.1, &seed ); 00863 time_t t0 = time ( 0 ); 00864 00865 00866 // layer-wise training 00867 for ( int epoch=0;epoch<m_rbmMaxEpochs;epoch++ ) 00868 { 00869 double errsum = 0.0; 00870 int progress = nSamples/10 + 1, errcnt = 0; 00871 00872 for ( int sample=0;sample<nSamples;sample++ ) 00873 { 00874 // data vector 00875 REAL* data = batchdata + nVis * sample; 00876 REAL min = 1e10, max = -1e10, mean = 0.0; 00877 for ( int k=0;k<nVis;k++ ) 00878 { 00879 if ( min > data[k] ) 00880 min = data[k]; 00881 if ( max < data[k] ) 00882 max = data[k]; 00883 mean += data[k]; 00884 } 00885 mean /= ( double ) nVis; 00886 00887 if ( min > 1.0 || min < 0.0 ) 00888 { 00889 cout<<"min:"<<min<<endl; 00890 assert ( false ); 00891 } 00892 if ( max > 1.0 || max < 0.0 ) 00893 { 00894 cout<<"max:"<<max<<endl; 00895 assert ( false ); 00896 } 00897 00898 00899 // ===== POSITIVE PHASE ===== 00900 // this is like forward calculation 00901 // after calc the weighted sum per neuron, the values of the neurons get sampled 00902 00903 // poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); 00904 00905 // calc: poshidprobs = vishid * data 00906 00907 //for(int k=0;k<nHid;k++) 00908 //{ 00909 // REAL sum = 0.0; 00910 // for(int l=0;l<nVis;l++) 00911 // sum += data[l] * vishid[l + k*nVis]; 00912 // poshidprobs[k] = 1.0 / (1.0 + exp(-(sum+hidbias[k]))); 00913 //} 00914 00915 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, data, 1, 0.0, poshidprobs, 1 ); 00916 00917 //for(int k=0;k<nHid;k++) 00918 // poshidprobs[k] = 1.0 / (1.0 + exp(-(poshidprobs[k]+hidbias[k]))); 00919 V_ADD ( nHid, poshidprobs, hidbias, poshidprobs ); // poshidprobs[k]+hidbias[k] 00920 V_MULCI ( -1.0, poshidprobs, nHid ); // *(-1) 00921 V_EXP ( poshidprobs, nHid ); // exp(x) 00922 V_ADDCI ( 1.0, poshidprobs, nHid ); // +1 00923 V_INV ( nHid, poshidprobs, poshidprobs ); // inv(x) 00924 00925 // batchposhidprobs(:,:,batch)=poshidprobs; 00926 00927 // posprods = data' * poshidprobs; 00928 //for(int k=0;k<nHid;k++) 00929 // for(int l=0;l<nVis;l++) 00930 // posprods[l+k*nVis] = data[l] * poshidprobs[k]; 00931 00932 // poshidact = sum(poshidprobs); 00933 00934 //for(int k=0;k<nHid;k++) 00935 // poshidact[k] = poshidprobs[k]; 00936 V_COPY ( poshidprobs, poshidact, nHid ); // poshidact = poshidprobs 00937 00938 // posvisact = sum(data); 00939 //for(int k=0;k<nVis;k++) 00940 // posvisact[k] = data[k]; 00941 V_COPY ( data, posvisact, nVis ); // posvisact = data 00942 00943 // poshidstates = poshidprobs > rand(numcases,numhid); 00944 for ( int k=0;k<nHid;k++ ) 00945 probs[k] = ( double ) rand() / ( double ) RAND_MAX; 00946 //UNIFORM_DISTRIBUTION(probs, nHid, 0.0, 1.0, seed); 00947 00948 for ( int k=0;k<nHid;k++ ) 00949 { 00950 if ( poshidprobs[k] > probs[k] ) 00951 poshidstates[k] = 1.0; 00952 else 00953 poshidstates[k] = 0.0; 00954 } 00955 00956 00957 // ===== NEGATIVE PHASE ===== 00958 // reconstruct the input sample, based on binary representation in the hidden layer 00959 // use the reconstruction error as signal for learning 00960 00961 //negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1))); 00962 //for(int k=0;k<nVis;k++) 00963 //{ 00964 // REAL sum = 0.0; 00965 // for(int l=0;l<nHid;l++) 00966 // sum += poshidstates[l] * vishid[k + l*nVis]; 00967 // negdata[k] = sum;//1.0 / (1.0 + exp(-(sum+visbias[k]))); 00968 //} 00969 00970 CBLAS_GEMV ( CblasRowMajor, CblasTrans, nHid, nVis, 1.0, vishid, nVis, poshidstates, 1, 0.0, negdata, 1 ); 00971 00972 //for(int k=0;k<nVis;k++) 00973 // negdata[k] = 1.0 / (1.0 + exp(-(negdata[k]+visbias[k]))); 00974 V_ADD ( nVis, negdata, visbias, negdata ); // negdata[k]+visbias[k] 00975 V_MULCI ( -1.0, negdata, nVis ); // *(-1) 00976 V_EXP ( negdata, nVis ); // exp(x) 00977 V_ADDCI ( 1.0, negdata, nVis ); // +1 00978 V_INV ( nVis, negdata, negdata ); // inv(x) 00979 00980 //neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); 00981 //for(int k=0;k<nHid;k++) 00982 //{ 00983 // REAL sum = 0.0; 00984 // for(int l=0;l<nVis;l++) 00985 // sum += negdata[l] * vishid[l + k*nVis]; 00986 // neghidprobs[k] = 1.0 / (1.0 + exp(-(sum+hidbias[k]))); 00987 //} 00988 00989 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, negdata, 1, 0.0, neghidprobs, 1 ); 00990 00991 //for(int k=0;k<nHid;k++) 00992 // neghidprobs[k] = 1.0 / (1.0 + exp(-(neghidprobs[k]+hidbias[k]))); 00993 V_ADD ( nHid, neghidprobs, hidbias, neghidprobs ); // neghidprobs[k]+hidbias[k] 00994 V_MULCI ( -1.0, neghidprobs, nHid ); // *(-1) 00995 V_EXP ( neghidprobs, nHid ); // exp(x) 00996 V_ADDCI ( 1.0, neghidprobs, nHid ); // +1 00997 V_INV ( nHid, neghidprobs, neghidprobs ); // inv(x) 00998 00999 //negprods = negdata'*neghidprobs; 01000 //for(int k=0;k<nHid;k++) 01001 // for(int l=0;l<nVis;l++) 01002 // negprods[l+k*nVis] = negdata[l] * neghidprobs[k]; 01003 01004 //neghidact = sum(neghidprobs); 01005 //for(int k=0;k<nHid;k++) 01006 // neghidact[k] = neghidprobs[k]; 01007 V_COPY ( neghidprobs, neghidact, nHid ); // neghidact = neghidprobs 01008 01009 //negvisact = sum(negdata); 01010 //REAL negvisact = 0.0; 01011 //for(int k=0;k<nVis;k++) 01012 // negvisact[k] = negdata[k]; 01013 V_COPY ( negdata, negvisact, nVis ); // negvisact = negdata 01014 01015 //err= sum(sum( (data-negdata).^2 )); 01016 double err = 0.0; 01017 for ( int k=0;k<nVis;k++ ) 01018 err += ( data[k] - negdata[k] ) * ( data[k] - negdata[k] ); 01019 01020 if ( isnan ( err ) || isinf ( err ) ) 01021 cout<<endl; 01022 01023 errsum = err + errsum; 01024 errcnt += nVis; 01025 01026 01027 // ===== UPDATE WEIGHTS ===== 01028 //vishidinc = momentum*vishidinc + epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); 01029 //visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); 01030 //hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); 01031 //vishid = vishid + vishidinc; 01032 //visbiases = visbiases + visbiasinc; 01033 //hidbiases = hidbiases + hidbiasinc; 01034 01035 for ( int k=0;k<nHid;k++ ) 01036 for ( int l=0;l<nVis;l++ ) 01037 vishid[l+k*nVis] += m_rbmLearnrateWeights* ( ( data[l] * poshidprobs[k] - negdata[l] * neghidprobs[k] ) - m_rbmWeightDecay*vishid[l+k*nVis] ); 01038 01039 //for(int k=0;k<nHid*nVis;k++) 01040 // vishid[k] += m_rbmLearnrateWeights*(posprods[k]-negprods[k] - m_rbmWeightDecay*vishid[k]); 01041 01042 for ( int k=0;k<nVis;k++ ) 01043 visbias[k] += m_rbmLearnrateBiasVis * ( posvisact[k]-negvisact[k] ); 01044 for ( int k=0;k<nHid;k++ ) 01045 hidbias[k] += m_rbmLearnrateBiasHid * ( poshidact[k]-neghidact[k] ); 01046 01047 } 01048 for ( int sample=0;sample<nSamples;sample++ ) 01049 { 01050 REAL* data = batchdata + nVis * sample; 01051 // save for next batch 01052 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, data, 1, 0.0, poshidprobs, 1 ); 01053 for ( int k=0;k<nHid;k++ ) 01054 poshidprobs[k] = 1.0 / ( 1.0 + exp ( - ( poshidprobs[k]+hidbias[k] ) ) ); 01055 REAL* batchPtr = batchdataNext + sample * nHid; 01056 for ( int k=0;k<nHid;k++ ) 01057 batchPtr[k] = poshidprobs[k]; 01058 } 01059 cout<<"epoch:"<<epoch+1<<"/"<<m_rbmMaxEpochs<<" errsum:"<<errsum<<" errcnt:"<<errcnt<<" mse:"<<errsum/ ( double ) errcnt<<" "<<time ( 0 )-t0<<"[s]"<<endl; 01060 t0 = time ( 0 ); 01061 } 01062 01063 int nrWeightsHere = m_nrLayWeights[i]; 01064 weightOffset += nrWeightsHere; 01065 int offset = weightOffset; 01066 cout<<"weight offset:"<<offset<<" offsetNext:"<<weightOffset<<endl; 01067 REAL* weightPtr = m_weights + offset; 01068 01069 // copy weights to net 01070 for ( int j=0;j<nHid;j++ ) 01071 for ( int k=0;k<nVis;k++ ) 01072 m_weights[getWeightIndex ( i + 1, j, k ) ] = vishid[k + j*nVis]; 01073 // copy weights to nnAuto 01074 REAL* nnAutoWeights = m_nnAuto->getWeightPtr(); 01075 for ( int j=0;j<nHid;j++ ) 01076 for ( int k=0;k<nVis;k++ ) 01077 { 01078 nnAutoWeights[m_nnAuto->getWeightIndex ( i + 1, j, k ) ] = vishid[k + j*nVis]; 01079 nnAutoWeights[m_nnAuto->getWeightIndex ( 2* ( nLayer-1 ) - i, k, j ) ] = vishid[k + j*nVis]; 01080 } 01081 01082 // copy biases to net 01083 for ( int j=0;j<nHid;j++ ) 01084 m_weights[getBiasIndex ( i + 1, j ) ] = hidbias[j]; 01085 // copy biases to nnAuto 01086 for ( int j=0;j<nHid;j++ ) 01087 nnAutoWeights[m_nnAuto->getBiasIndex ( i + 1, j ) ] = hidbias[j]; 01088 for ( int j=0;j<nVis;j++ ) 01089 nnAutoWeights[m_nnAuto->getBiasIndex ( 2* ( nLayer-1 ) - i, j ) ] = visbias[j]; 01090 01091 /* 01092 // check it forward calc 01093 int noutOffset = 0; 01094 for(int j=0;j<i;j++) 01095 noutOffset += m_neuronsPerLayer[j] + 1; 01096 int sample = rand() % nSamples; 01097 cout<<"sample:"<<sample<<endl; 01098 // input and data vector 01099 REAL* data = batchdata + nVis * sample; 01100 REAL* in = input + m_neuronsPerLayer[0] * sample; 01101 // print NN outputs 01102 cout<<"===NN Outputs==="<<endl; 01103 forwardCalculationBLAS(in); 01104 int of = m_neuronsPerLayer[i]+1 + noutOffset; 01105 for(int j=0;j<nHid;j++) 01106 cout<<m_outputs[j+of]<<" "; 01107 cout<<endl; 01108 // print RBM output 01109 REAL* batchPtr = batchdataNext + sample * nHid; 01110 cout<<"===Poshidprobs==="<<endl; 01111 for(int k=0;k<nHid;k++) 01112 cout<<batchPtr[k]<<" "; 01113 cout<<endl<<endl; 01114 */ 01115 01116 01117 /* 01118 if(i==0) // input layer 01119 { 01120 // copy biases 01121 for(int j=0;j<nHid;j++) 01122 { 01123 int ind = j*(nVis+1); 01124 assert(ind+offset<m_nrWeights); 01125 weightPtr[ind] = hidbias[j]; 01126 } 01127 // copy weights 01128 for(int j=0;j<nHid;j++) 01129 for(int k=0;k<nVis;k++) 01130 { 01131 int ind = 1 + k + j*(nVis+1); 01132 assert(ind+offset<m_nrWeights); 01133 weightPtr[ind] = vishid[k + j*nVis]; 01134 } 01135 } 01136 else 01137 { 01138 // copy biases 01139 for(int j=0;j<nHid;j++) 01140 { 01141 int ind = nVis + j*(nVis+1); 01142 assert(ind+offset<m_nrWeights); 01143 weightPtr[ind] = hidbias[j]; 01144 } 01145 // copy weights 01146 for(int j=0;j<nHid;j++) 01147 for(int k=0;k<nVis;k++) 01148 { 01149 int ind = k + j*(nVis+1); 01150 assert(ind+offset<m_nrWeights); 01151 weightPtr[ind] = vishid[k + j*nVis]; 01152 } 01153 } 01154 */ 01155 01156 /* 01157 int checkEnd = offset + m_nrLayWeights[i+1]; 01158 cout<<"check end:"<<checkEnd<<endl; 01159 for(int j=0;j<checkEnd;j++) 01160 if(m_weights[j] == (float)0.01) 01161 { 01162 cout<<"j:"<<j<<endl; 01163 assert(false); 01164 } 01165 */ 01166 01167 /* 01168 // check it forward calc 01169 noutOffset = 0; 01170 for(int j=0;j<i;j++) 01171 noutOffset += m_neuronsPerLayer[j] + 1; 01172 //for(int sample=0;sample<nSamples&&sample<3;sample++) 01173 cout<<"sample:"<<sample<<endl; 01174 { 01175 // input and data vector 01176 REAL* data = batchdata + nVis * sample; 01177 REAL* in = input + m_neuronsPerLayer[0] * sample; 01178 01179 // print NN outputs 01180 cout<<"===NN Outputs==="<<endl; 01181 forwardCalculationBLAS(in); 01182 int of = m_neuronsPerLayer[i]+1 + noutOffset; 01183 for(int j=0;j<nHid;j++) 01184 cout<<m_outputs[j+of]<<" "; 01185 cout<<endl; 01186 01187 // print RBM output 01188 REAL* batchPtr = batchdataNext + sample * nHid; 01189 cout<<"===Poshidprobs==="<<endl; 01190 for(int k=0;k<nHid;k++) 01191 cout<<batchPtr[k]<<" "; 01192 01193 cout<<endl<<endl; 01194 } 01195 */ 01196 01197 delete[] batchdata; 01198 batchdata = new REAL[nSamples*nHid]; 01199 for ( int j=0;j<nSamples*nHid;j++ ) 01200 batchdata[j] = batchdataNext[j]; 01201 delete[] batchdataNext; 01202 delete[] vishid; 01203 delete[] visbias; 01204 delete[] hidbias; 01205 delete[] poshidprobs; 01206 delete[] neghidprobs; 01207 delete[] hidbiasinc; 01208 delete[] visbiasinc; 01209 delete[] poshidstates; 01210 delete[] negdata; 01211 delete[] neghidact; 01212 delete[] negvisact; 01213 delete[] poshidact; 01214 delete[] posvisact; 01215 delete[] probs; 01216 } 01217 01218 delete[] batchdata; 01219 01220 // check if all weights have values 01221 for ( int i=0;i<m_nnAuto->getNrWeights();i++ ) 01222 { 01223 REAL* p = m_nnAuto->getWeightPtr(); 01224 if ( p[i] == 0.0 || isnan ( p[i] ) ) 01225 { 01226 cout<<"p["<<i<<"]:"<<p[i]<<endl; 01227 assert ( false ); 01228 } 01229 } 01230 01231 cout<<"Weights summary from neural net:"<<endl; 01232 for ( int i=0;i<m_nrLayer;i++ ) 01233 { 01234 int n = m_neuronsPerLayer[i+1]; 01235 int nIn = m_neuronsPerLayer[i]; 01236 cout<<"#neur:"<<n<<" #neurPrev:"<<nIn<<" "; 01237 01238 // weights 01239 REAL min = 1e10, max = -1e10; 01240 double sum = 0.0, cnt = 0.0, sum2 = 0.0; 01241 01242 for ( int j=0;j<n;j++ ) 01243 for ( int k=0;k<nIn;k++ ) 01244 { 01245 REAL v = m_weights[getWeightIndex ( i + 1, j, k ) ]; 01246 if ( min > v ) 01247 min = v; 01248 if ( max < v ) 01249 max = v; 01250 sum += v; 01251 sum2 += v*v; 01252 cnt += 1.0; 01253 } 01254 cout<<" weights: cnt|min|max|mean|std: "<<cnt<<"|"<<min<<"|"<<max<<"|"<<sum/cnt<<"|"<<sqrt ( sum2/cnt- ( sum/cnt ) * ( sum/cnt ) ) <<endl; 01255 01256 min = 1e10; 01257 max = -1e10; 01258 sum = 0.0; 01259 cnt = 0.0; 01260 sum2 = 0.0; 01261 // biases 01262 for ( int j=0;j<n;j++ ) 01263 { 01264 REAL v = m_weights[getBiasIndex ( i + 1, j ) ]; 01265 if ( min > v ) 01266 min = v; 01267 if ( max < v ) 01268 max = v; 01269 sum += v; 01270 sum2 += v*v; 01271 cnt += 1.0; 01272 } 01273 cout<<" biases: cnt|min|max|mean|std: "<<cnt<<"|"<<min<<"|"<<max<<"|"<<sum/cnt<<"|"<<sqrt ( sum2/cnt- ( sum/cnt ) * ( sum/cnt ) ) <<endl; 01274 01275 } 01276 01277 // dummy print of middle layer 01278 /*if ( crossRun == 0 ) 01279 { 01280 fstream f0 ( "tmp/r0.txt",ios::out ); 01281 REAL* p = new REAL[m_nrInputs]; 01282 for ( int i=0;i<nSamples;i++ ) 01283 { 01284 m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p ); 01285 01286 for ( int j=0;j<nTarget;j++ ) 01287 f0<<target[j+i*nTarget]<<" "; 01288 01289 int midLayer = nLayer - 1; 01290 for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ ) 01291 f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" "; 01292 f0<<endl; 01293 01294 } 01295 delete[] p; 01296 f0.close(); 01297 }*/ 01298 01299 // finetuning of the autoencoder with backprop 01300 if ( nFinetuningEpochs != -1 ) 01301 { 01302 cout<<"=== begin: finetuning ==="<<endl; 01303 m_nnAuto->m_normalTrainStopping = false; 01304 m_nnAuto->m_maxEpochs = nFinetuningEpochs; 01305 if ( nFinetuningEpochs > 0 ) 01306 m_nnAuto->trainNN(); 01307 cout<<"=== end: finetuning ==="<<endl; 01308 /* 01309 cout<<endl<<"Adjust bias and weights in coding layer to met the mean=0 and std=1 condition"<<endl; 01310 int nCodeNeurons = m_neuronsPerLayer[nLayer-1]; 01311 cout<<"#codeNeurons:"<<nCodeNeurons<<endl; 01312 double* mean = new double[nCodeNeurons]; 01313 double* std = new double[nCodeNeurons]; 01314 REAL* p = new REAL[m_nrInputs]; 01315 for(int i=0;i<nCodeNeurons;i++) 01316 { 01317 mean[i] = 0.0; 01318 std[i] = 0.0; 01319 } 01320 for(int i=0;i<nSamples;i++) 01321 { 01322 m_nnAuto->predictSingleInput(input + i*m_nrInputs, p); 01323 for(int j=0;j<nCodeNeurons;j++) 01324 { 01325 REAL v = m_nnAuto->m_outputs[m_nnAuto->getOutputIndex(nLayer-1, j)]; 01326 mean[j] += v; 01327 std[j] += v*v; 01328 } 01329 } 01330 for(int i=0;i<nCodeNeurons;i++) 01331 { 01332 mean[i] /= (double)nSamples; 01333 std[i] /= (double)nSamples; 01334 std[i] = sqrt(std[i] - mean[i]*mean[i]); 01335 } 01336 cout<<endl<<"[no correction]:"<<endl; 01337 for(int i=0;i<nCodeNeurons;i++) 01338 cout<<"codeLayer "<<i<<": mean:"<<mean[i]<<" std:"<<std[i]<<endl; 01339 01340 // correct the coding weights 01341 // copy weights : from nnAuto to net 01342 for(int j=0;j<nCodeNeurons;j++) 01343 for(int k=0;k<m_neuronsPerLayer[nLayer-2];k++) 01344 m_nnAuto->m_weights[m_nnAuto->getWeightIndex(nLayer-1,j,k)] /= std[j]; 01345 // copy biases : from nnAuto to net 01346 //for(int j=0;j<nCodeNeurons;j++) 01347 // m_nnAuto->m_weights[getBiasIndex(nLayer-1,j)] -= mean[j]/std[j]; 01348 01349 for(int i=0;i<nCodeNeurons;i++) 01350 { 01351 mean[i] = 0.0; 01352 std[i] = 0.0; 01353 } 01354 for(int i=0;i<nSamples;i++) 01355 { 01356 m_nnAuto->predictSingleInput(input + i*m_nrInputs, p); 01357 for(int j=0;j<nCodeNeurons;j++) 01358 { 01359 REAL v = m_nnAuto->m_outputs[m_nnAuto->getOutputIndex(nLayer-1, j)]; 01360 mean[j] += v; 01361 std[j] += v*v; 01362 } 01363 } 01364 for(int i=0;i<nCodeNeurons;i++) 01365 { 01366 mean[i] /= (double)nSamples; 01367 std[i] /= (double)nSamples; 01368 std[i] = sqrt(std[i] - mean[i]*mean[i]); 01369 } 01370 cout<<endl<<"[with corrected std]:"<<endl; 01371 for(int i=0;i<nCodeNeurons;i++) 01372 cout<<"codeLayer "<<i<<": mean:"<<mean[i]<<" std:"<<std[i]<<endl; 01373 01374 // correct the coding weights 01375 // copy weights : from nnAuto to net 01376 //for(int j=0;j<nCodeNeurons;j++) 01377 // for(int k=0;k<m_neuronsPerLayer[nLayer-2];k++) 01378 // m_nnAuto->m_weights[m_nnAuto->getWeightIndex(nLayer-1,j,k)] /= std[j]; 01379 // copy biases : from nnAuto to net 01380 for(int j=0;j<nCodeNeurons;j++) 01381 m_nnAuto->m_weights[getBiasIndex(nLayer-1,j)] -= mean[j]; 01382 01383 for(int i=0;i<nCodeNeurons;i++) 01384 { 01385 mean[i] = 0.0; 01386 std[i] = 0.0; 01387 } 01388 for(int i=0;i<nSamples;i++) 01389 { 01390 m_nnAuto->predictSingleInput(input + i*m_nrInputs, p); 01391 for(int j=0;j<nCodeNeurons;j++) 01392 { 01393 REAL v = m_nnAuto->m_outputs[m_nnAuto->getOutputIndex(nLayer-1, j)]; 01394 mean[j] += v; 01395 std[j] += v*v; 01396 } 01397 } 01398 for(int i=0;i<nCodeNeurons;i++) 01399 { 01400 mean[i] /= (double)nSamples; 01401 std[i] /= (double)nSamples; 01402 std[i] = sqrt(std[i] - mean[i]*mean[i]); 01403 } 01404 cout<<endl<<"[with corrected std and mean]:"<<endl; 01405 for(int i=0;i<nCodeNeurons;i++) 01406 cout<<"codeLayer "<<i<<": mean:"<<mean[i]<<" std:"<<std[i]<<endl; 01407 01408 delete[] p; 01409 delete[] mean; 01410 delete[] std; 01411 */ 01412 01413 cout<<endl<<"Copy the autoencoder weights to the net"<<endl; 01414 cout<<"#weights:"<<m_nrLayWeightOffsets[nLayer]<<endl; 01415 for ( int i=0;i<nLayer;i++ ) 01416 { 01417 cout<<"layer "<<i<<": #neurons:"<<m_neuronsPerLayer[i]<<" #neuronsAuto:"<<m_nnAuto->m_neuronsPerLayer[i]<<endl; 01418 if ( i > 0 ) 01419 { 01420 // copy weights : from nnAuto to net 01421 for ( int j=0;j<m_neuronsPerLayer[i];j++ ) 01422 for ( int k=0;k<m_neuronsPerLayer[i-1];k++ ) 01423 m_weights[getWeightIndex ( i,j,k ) ] = m_nnAuto->m_weights[m_nnAuto->getWeightIndex ( i,j,k ) ]; 01424 // copy biases : from nnAuto to net 01425 for ( int j=0;j<m_neuronsPerLayer[i];j++ ) 01426 m_weights[getBiasIndex ( i,j ) ] = m_nnAuto->m_weights[m_nnAuto->getBiasIndex ( i,j ) ]; 01427 } 01428 } 01429 01430 } 01431 01432 // dummy print of middle layer 01433 /*if ( crossRun == 0 ) 01434 { 01435 fstream f0 ( "tmp/r1.txt",ios::out ); 01436 REAL* p = new REAL[m_nrInputs]; 01437 for ( int i=0;i<nSamples;i++ ) 01438 { 01439 m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p ); 01440 01441 for ( int j=0;j<nTarget;j++ ) 01442 f0<<target[j+i*nTarget]<<" "; 01443 01444 int midLayer = nLayer - 1; 01445 for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ ) 01446 f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" "; 01447 f0<<endl; 01448 01449 } 01450 delete[] p; 01451 f0.close(); 01452 } 01453 */ 01454 }
void NNRBM::saveWeights | ( | ) | [private] |
void NNRBM::setBatchSize | ( | int | size | ) |
void NNRBM::setGlobalEpochs | ( | int | e | ) |
void NNRBM::setInitWeightFactor | ( | REAL | factor | ) |
void NNRBM::setL1Regularization | ( | bool | en | ) |
void NNRBM::setLearnrate | ( | REAL | learnrate | ) |
void NNRBM::setLearnrateMinimum | ( | REAL | learnrateMin | ) |
void NNRBM::setLearnrateSubtractionValueAfterEveryEpoch | ( | REAL | learnrateDecreaseRate | ) |
Set the subtraction value per train epoch of the learning rate
learnrateDecreaseRate | The learnrate is subtracted by this value every train epoch |
Definition at line 280 of file nnrbm.cpp.
00281 { 00282 m_learnrateDecreaseRateEpoch = learnrateDecreaseRate; 00283 cout<<"learnrateDecreaseRateEpoch: "<<m_learnrateDecreaseRateEpoch<<endl; 00284 }
void NNRBM::setLearnrateSubtractionValueAfterEverySample | ( | REAL | learnrateDecreaseRate | ) |
Set the subtraction value per train example of the learning rate
learnrateDecreaseRate | The learnrate is subtracted by this value every train example |
Definition at line 268 of file nnrbm.cpp.
00269 { 00270 m_learnrateDecreaseRate = learnrateDecreaseRate; 00271 cout<<"learnrateDecreaseRate: "<<m_learnrateDecreaseRate<<endl; 00272 }
void NNRBM::setMaxEpochs | ( | int | epochs | ) |
void NNRBM::setMinUpdateErrorBound | ( | REAL | minUpdateBound | ) |
Set the minimal different between two succesive training epoch until the training breaks
minUpdateBound | The min. rmse update until training breaks |
Definition at line 325 of file nnrbm.cpp.
00326 { 00327 m_minUpdateBound = minUpdateBound; 00328 cout<<"minUpdateBound: "<<m_minUpdateBound<<endl; 00329 }
void NNRBM::setMomentum | ( | REAL | momentum | ) |
void NNRBM::setNNStructure | ( | int | nrLayer, | |
int * | neuronsPerLayer, | |||
bool | lastLinearLayer = false , |
|||
int * | layerType = 0 | |||
) |
Set the inner structure: layers and how many neurons per layer
nrLayer | Number of layers (2=one hidden layer, 3=2 hidden layer, 1=only output layer) | |
neuronsPerLayer | Integer pointer to the number of neurons per layer |
Definition at line 446 of file nnrbm.cpp.
00447 { 00448 m_nrLayer = nrLayer; 00449 cout<<"nrLayer: "<<m_nrLayer<<endl; 00450 00451 cout<<"#layers: "<<m_nrLayer<<" ("<< ( m_nrLayer-1 ) <<" hidden layer, 1 output layer)"<<endl; 00452 00453 // alloc space for structure variables 00454 m_neuronsPerLayer = new int[m_nrLayer+1]; 00455 m_neuronsPerLayer[0] = m_nrInputs; // number of inputs 00456 for ( int i=0;i<m_nrLayer-1;i++ ) 00457 m_neuronsPerLayer[1+i] = neuronsPerLayer[i]; 00458 m_neuronsPerLayer[m_nrLayer] = m_nrTargets; // one output 00459 00460 cout<<"Neurons per Layer: "; 00461 for ( int i=0;i<m_nrLayer+1;i++ ) 00462 cout<<m_neuronsPerLayer[i]<<" "; 00463 cout<<endl; 00464 00465 cout<<"Outputs per Layer: "; 00466 for ( int i=0;i<m_nrLayer+1;i++ ) 00467 cout<<m_neuronsPerLayer[i]+1<<" "; 00468 cout<<endl; 00469 00470 cout<<"OutOffsets per Layer: "; 00471 int cnt=0; 00472 for ( int i=0;i<m_nrLayer+1;i++ ) 00473 { 00474 cout<<cnt<<" "; 00475 cnt += m_neuronsPerLayer[i]+1; 00476 } 00477 cout<<endl; 00478 00479 cout<<"Act. function per Layer(0=sig, 1=lin, 2=tanh, 3=softmax): "; 00480 m_activationFunctionPerLayer = new int[m_nrLayer+1]; 00481 for ( int i=0;i<m_nrLayer+1;i++ ) 00482 m_activationFunctionPerLayer[i] = 0; 00483 if ( layerType ) 00484 { 00485 for ( int i=0;i<m_nrLayer+1;i++ ) 00486 m_activationFunctionPerLayer[i] = layerType[i]; 00487 } 00488 if ( m_enableAutoencoder ) 00489 m_activationFunctionPerLayer[m_nrLayer/2] = 1; 00490 if ( lastLinearLayer ) 00491 m_activationFunctionPerLayer[m_nrLayer] = 1; 00492 for ( int i=0;i<m_nrLayer+1;i++ ) 00493 cout<<m_activationFunctionPerLayer[i]<<" "; 00494 cout<<endl; 00495 00496 // init the total number of weights and outputs 00497 m_nrWeights = 0; 00498 m_nrOutputs = m_neuronsPerLayer[0] + 1; 00499 m_nrLayWeights = new int[m_nrLayer+1]; 00500 m_nrLayWeightOffsets = new int[m_nrLayer+2]; 00501 m_nrLayWeights[0] = 0; 00502 for ( int i=0;i<m_nrLayer;i++ ) 00503 { 00504 m_nrLayWeights[i+1] = m_neuronsPerLayer[i+1] * ( m_neuronsPerLayer[i]+1 ); // +1 for input bias 00505 m_nrWeights += m_nrLayWeights[i+1]; 00506 m_nrOutputs += m_neuronsPerLayer[i+1] + 1; // +1 for input bias 00507 } 00508 00509 // print it 00510 cout<<"Weights per Layer: "; 00511 for ( int i=0;i<m_nrLayer+1;i++ ) 00512 cout<<m_nrLayWeights[i]<<" "; 00513 cout<<endl; 00514 00515 cout<<"WeightOffsets per Layer: "; 00516 m_nrLayWeightOffsets[0] = 0; 00517 for ( int i=0;i<m_nrLayer+1;i++ ) 00518 { 00519 cout<<m_nrLayWeightOffsets[i]<<" "; 00520 m_nrLayWeightOffsets[i+1] = m_nrLayWeightOffsets[i] + m_nrLayWeights[i]; 00521 } 00522 cout<<endl; 00523 00524 cout<<"nrOutputs="<<m_nrOutputs<<" nrWeights="<<m_nrWeights<<endl; 00525 00526 // allocate the inner calculation structure 00527 m_outputs = new REAL[m_nrOutputs]; 00528 m_outputsTmp = new REAL[m_nrTargets]; 00529 m_derivates = new REAL[m_nrOutputs]; 00530 m_d1 = new REAL[m_nrOutputs]; 00531 00532 for ( int i=0;i<m_nrOutputs;i++ ) // init as biases 00533 { 00534 m_outputs[i] = 1.0; 00535 m_derivates[i] = 0.0; 00536 m_d1[i] = 0.0; 00537 } 00538 00539 // allocate weights and temp vars 00540 m_weights = new REAL[m_nrWeights]; 00541 m_weightsTmp0 = new REAL[m_nrWeights]; 00542 m_weightsTmp1 = new REAL[m_nrWeights]; 00543 m_weightsTmp2 = new REAL[m_nrWeights]; 00544 m_weightsBatchUpdate = new REAL[m_nrWeights]; 00545 m_weightsOld = new REAL[m_nrWeights]; 00546 m_weightsOldOld = new REAL[m_nrWeights]; 00547 m_deltaW = new REAL[m_nrWeights]; 00548 00549 m_deltaWOld = new REAL[m_nrWeights]; 00550 m_adaptiveRPROPlRate = new REAL[m_nrWeights]; 00551 for ( int i=0;i<m_nrWeights;i++ ) 00552 { 00553 m_deltaWOld[i] = 0.0; 00554 m_adaptiveRPROPlRate[i] = m_learnRate; 00555 } 00556 for ( int i=0;i<m_nrWeights;i++ ) 00557 m_weights[i] = m_weightsOld[i] = m_deltaW[i] = m_weightsTmp0[i] = m_weightsTmp1[i] = m_weightsTmp2[i] = 0.0; 00558 00559 // this should be implemented (LeCun suggest such a linear factor in the activation function) 00560 //m_linFac = 0.01; 00561 //cout<<"linFac="<<m_linFac<<" (no active, just tanh used)"<<endl; 00562 }
void NNRBM::setNormalTrainStopping | ( | bool | en | ) |
Set the train stop criteria en=0: training stops at maxEpochs en=1: training stops at maxEpochs or probe error rises or probe error is to small
en | Train stop criteria (0 is used for retraining) |
Definition at line 396 of file nnrbm.cpp.
00397 { 00398 m_normalTrainStopping = en; 00399 cout<<"normalTrainStopping: "<<m_normalTrainStopping<<endl; 00400 }
void NNRBM::setNrExamplesProbe | ( | int | n | ) |
void NNRBM::setNrExamplesTrain | ( | int | n | ) |
void NNRBM::setNrInputs | ( | int | n | ) |
void NNRBM::setNrTargets | ( | int | n | ) |
void NNRBM::setProbeInputs | ( | REAL * | inputs | ) |
void NNRBM::setProbeTargets | ( | REAL * | targets | ) |
void NNRBM::setRBMLearnParams | ( | REAL | learnrateWeights, | |
REAL | learnrateBiasVis, | |||
REAL | learnrateBiasHid, | |||
REAL | weightDecay, | |||
int | maxEpoch | |||
) |
Set learn parameters for RBM pretraining
learnrateWeights | ||
learnrateBiasVis | ||
learnrateBiasHid | ||
weightDecay |
Definition at line 615 of file nnrbm.cpp.
00616 { 00617 m_rbmLearnrateWeights = learnrateWeights; 00618 m_rbmLearnrateBiasVis = learnrateBiasVis; 00619 m_rbmLearnrateBiasHid = learnrateBiasHid; 00620 m_rbmWeightDecay = weightDecay; 00621 m_rbmMaxEpochs = maxEpoch; 00622 }
void NNRBM::setRPROPMinMaxUpdate | ( | REAL | min, | |
REAL | max | |||
) |
Set the min. and max. update values for the sign update in RPROP Weights updates can never be larger as max. and smaller as min.
min | Min. weight update value | |
max | Max. weight update value |
Definition at line 367 of file nnrbm.cpp.
00368 { 00369 m_RPROP_updateMin = min; 00370 m_RPROP_updateMax = max; 00371 cout<<"RPROP_updateMin: "<<m_RPROP_updateMin<<" RPROP_updateMax: "<<m_RPROP_updateMax<<endl; 00372 }
void NNRBM::setRPROPPosNeg | ( | REAL | etaPos, | |
REAL | etaNeg | |||
) |
Set the etaNeg and etaPos parameters in the RPROP learning algorithm
Learnrate adaption: adaptiveRPROPlRate = { if (dE/dW_old * dE/dW)>0 then adaptiveRPROPlRate*RPROP_etaPos if (dE/dW_old * dE/dW)<0 then adaptiveRPROPlRate*RPROP_etaNeg if (dE/dW_old * dE/dW)=0 then adaptiveRPROPlRate }
etaPos | etaPos parameter | |
etaNeg | etaNeg parameter |
Definition at line 353 of file nnrbm.cpp.
00354 { 00355 m_RPROP_etaPos = etaPos; 00356 m_RPROP_etaNeg = etaNeg; 00357 cout<<"RPROP_etaPos: "<<m_RPROP_etaPos<<" RPROP_etaNeg: "<<m_RPROP_etaNeg<<endl; 00358 }
void NNRBM::setScaleOffset | ( | REAL | scale, | |
REAL | offset | |||
) |
Set the scale and offset of the output of the NNRBM targets transformation: target = (targetOld - offset) / scale outputs transformation: output = outputNN * scale + offset
scale | Output scaling | |
offset | Output offset |
Definition at line 382 of file nnrbm.cpp.
00383 { 00384 m_scaleOutputs = scale; 00385 m_offsetOutputs = offset; 00386 cout<<"scaleOutputs: "<<m_scaleOutputs<<" offsetOutputs: "<<m_offsetOutputs<<" [transformation: output = outputNN * scale + offset]"<<endl; 00387 }
void NNRBM::setTrainInputs | ( | REAL * | inputs | ) |
void NNRBM::setTrainTargets | ( | REAL * | targets | ) |
void NNRBM::setWeightDecay | ( | REAL | weightDecay | ) |
void NNRBM::setWeights | ( | REAL * | w | ) |
int NNRBM::trainNN | ( | ) |
Train the whole Neural Network until break criteria is reached This method call trainOneEpoch() to train one epoch.
Definition at line 1981 of file nnrbm.cpp.
01982 { 01983 cout<<"Train the NN with "<<m_nrExamplesTrain<<" samples"<<endl; 01984 double rmseMin = 1e10, lastUpdate = 1e10, rmseProbeOld = 1e10, rmseTrain = 1e10, rmseProbe = 1e10; 01985 time_t t0 = time ( 0 ); 01986 while ( 1 ) 01987 { 01988 rmseProbeOld = rmseProbe; 01989 rmseTrain = getRMSETrain(); 01990 rmseProbe = getRMSEProbe(); 01991 lastUpdate = rmseProbeOld - rmseProbe; 01992 01993 cout<<"e:"<<m_globalEpochs<<" rmseTrain:"<<rmseTrain<<" rmseProbe:"<<rmseProbe<<" "<<flush; 01994 01995 if ( m_normalTrainStopping ) 01996 { 01997 if ( rmseProbe < rmseMin ) 01998 { 01999 rmseMin = rmseProbe; 02000 saveWeights(); 02001 } 02002 else 02003 { 02004 cout<<"rmse rises."<<endl; 02005 return m_globalEpochs; 02006 } 02007 if ( m_minUpdateBound > fabs ( lastUpdate ) ) 02008 { 02009 cout<<"min update too small (<"<<m_minUpdateBound<<")."<<endl; 02010 return m_globalEpochs; 02011 } 02012 } 02013 if ( m_maxEpochs == m_globalEpochs ) 02014 { 02015 cout<<"max epochs reached."<<endl; 02016 return m_globalEpochs; 02017 } 02018 02019 trainOneEpoch(); 02020 02021 cout<<"lRate:"<<m_learnRate<<" "; 02022 cout<<time ( 0 )-t0<<"[s]"<<endl; 02023 t0 = time ( 0 ); 02024 } 02025 return -1; 02026 }
void NNRBM::trainOneEpoch | ( | bool * | updateLayer = 0 |
) |
Train the whole Neural Network one epoch through the trainset with gradient decent
Definition at line 2032 of file nnrbm.cpp.
02033 { 02034 int batchCnt = 0; 02035 V_ZERO ( m_weightsBatchUpdate, m_nrWeights ); 02036 m_sumSquaredError = 0.0; 02037 m_sumSquaredErrorSamples = 0.0; 02038 02039 for ( int i=0;i<m_nrExamplesTrain;i++ ) 02040 { 02041 if ( updateLayer != 0 ) 02042 V_ZERO ( m_deltaW, m_nrWeights ); // clear the weight update, it might be that not every layer gets updates 02043 02044 REAL* inputPtr = m_inputsTrain + i * m_nrInputs; 02045 REAL* targetPtr = m_targetsTrain + i * m_nrTargets; 02046 02047 // forward 02048 if ( m_useBLAS ) 02049 forwardCalculationBLAS ( inputPtr ); 02050 else 02051 forwardCalculation ( inputPtr ); 02052 02053 // backward: calc weight update 02054 if ( m_useBLAS ) 02055 backpropBLAS ( inputPtr, targetPtr, updateLayer ); 02056 else 02057 backprop ( inputPtr, targetPtr, updateLayer ); 02058 02059 // accumulate the weight updates 02060 if ( m_batchSize > 1 ) 02061 V_ADD ( m_nrWeights, m_deltaW, m_weightsBatchUpdate, m_weightsBatchUpdate ); 02062 02063 batchCnt++; 02064 02065 // if batch size is reached, or the last element in training list 02066 if ( batchCnt >= m_batchSize || i == m_nrExamplesTrain - 1 ) 02067 { 02068 // batch init 02069 batchCnt = 0; 02070 if ( m_batchSize > 1 ) 02071 { 02072 V_COPY ( m_weightsBatchUpdate, m_deltaW, m_nrWeights ); // deltaW = weightsBatchUpdate 02073 V_ZERO ( m_weightsBatchUpdate, m_nrWeights ); 02074 } 02075 02076 if ( m_enableRPROP ) 02077 { 02078 // weight update: 02079 // deltaW = { if dE/dW>0 then -adaptiveRPROPlRate 02080 // if dE/dW<0 then +adaptiveRPROPlRate 02081 // if dE/dW=0 then 0 } 02082 // learnrate adaption: 02083 // adaptiveRPROPlRate = { if (dE/dW_old * dE/dW)>0 then adaptiveRPROPlRate*RPROP_etaPos 02084 // if (dE/dW_old * dE/dW)<0 then adaptiveRPROPlRate*RPROP_etaNeg 02085 // if (dE/dW_old * dE/dW)=0 then adaptiveRPROPlRate } 02086 REAL dW, dWOld, sign, update, prod; 02087 for ( int j=0;j<m_nrWeights;j++ ) 02088 { 02089 dW = m_deltaW[j]; 02090 dWOld = m_deltaWOld[j]; 02091 prod = dW * dWOld; 02092 sign = dW > 0.0? 1.0 : -1.0; 02093 if ( prod > 0.0 ) 02094 { 02095 m_adaptiveRPROPlRate[j] *= m_RPROP_etaPos; 02096 if ( m_adaptiveRPROPlRate[j] > m_RPROP_updateMax ) 02097 m_adaptiveRPROPlRate[j] = m_RPROP_updateMax; 02098 update = sign * m_adaptiveRPROPlRate[j]; 02099 m_weights[j] -= update + m_weightDecay * m_weights[j]; // weight update and weight decay 02100 m_deltaWOld[j] = dW; 02101 } 02102 else if ( prod < 0.0 ) 02103 { 02104 m_adaptiveRPROPlRate[j] *= m_RPROP_etaNeg; 02105 if ( m_adaptiveRPROPlRate[j] < m_RPROP_updateMin ) 02106 m_adaptiveRPROPlRate[j] = m_RPROP_updateMin; 02107 m_deltaWOld[j] = 0.0; 02108 } 02109 else // prod == 0.0 02110 { 02111 update = sign * m_adaptiveRPROPlRate[j]; 02112 m_weights[j] -= update + m_weightDecay * m_weights[j]; // weight update and weight decay 02113 m_deltaWOld[j] = dW; 02114 } 02115 } 02116 } 02117 else // stochastic gradient decent (batch-size: m_batchSize) 02118 { 02119 //=========== slower stochastic updates (without vector libraries) =========== 02120 // update weights + weight decay 02121 // formula: weights -= eta * (dE(w)/dw + lambda * w) 02122 //if(m_momentum > 0.0) 02123 //{ 02124 // for(int j=0;j<m_nrWeights;j++) 02125 // m_weightsOldOld[j] = m_weightsOld[j]; 02126 // for(int j=0;j<m_nrWeights;j++) 02127 // m_weightsOld[j] = m_weights[j]; 02128 //} 02129 // 02130 //if(m_momentum > 0.0) 02131 //{ 02132 // for(int j=0;j<m_nrWeights;j++) 02133 // { 02134 // m_weightsTmp0[j] = (1.0 - m_momentum)*m_weightsTmp0[j] + m_momentum*m_weightsTmp1[j]; 02135 // m_weightsTmp1[j] = m_weightsTmp0[j]; 02136 // m_weights[j] -= m_weightsTmp0[j]; 02137 // } 02138 //} 02139 //for(int j=0;j<m_nrWeights;j++) 02140 // m_weights[j] -= m_learnRate * (m_deltaW[j] + m_weightDecay * m_weights[j]); 02141 02142 02143 // update weights + weight decay(L2 reg.) 02144 // formula: weights = weights - eta * (dE(w)/dw + lambda * weights) 02145 // weights = weights - (eta*deltaW + eta*lambda*weights) 02146 for ( int j=0;j<m_nrWeights;j++ ) 02147 if ( isnan ( m_deltaW[j] ) || isinf ( m_deltaW[j] ) ) 02148 { 02149 cout<<endl<<"value:"<<m_deltaW[j]<<endl<<"j:"<<j<<" i:"<<i<<endl; 02150 assert ( false ); 02151 } 02152 V_MULC ( m_deltaW, m_learnRate, m_weightsTmp0, m_nrWeights ); // tmp0 = learnrate * deltaW 02153 02154 // if weight decay enabled 02155 if ( m_weightDecay > 0.0 ) 02156 { 02157 if ( m_enableL1Regularization ) 02158 { 02159 // update weights + L1 reg. 02160 // formula: weights = weights - eta * (dE(w)/dw + lambda*sign(w)) 02161 // weights = weights - (eta*deltaW + eta*lambda*sign(w)) 02162 for ( int j=0;j<m_nrWeights;j++ ) 02163 m_weightsTmp2[j] = 1.0; 02164 for ( int j=0;j<m_nrWeights;j++ ) 02165 if ( m_weights[j]<0.0 ) 02166 m_weightsTmp2[j] = -1.0; 02167 //REAL c = m_weightDecay * m_learnRate; 02168 //for(int j=0;j<m_nrWeights;j++) 02169 // m_weightsTmp0[j] += c * m_weightsTmp2[j]; 02170 CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weightsTmp2, 1, m_weightsTmp0, 1 ); // tmp0 = reg*learnrate*weights+tmp0 02171 } 02172 else 02173 //saxpy(n, a, x, incx, y, incy) 02174 //y := a*x + y 02175 CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weights, 1, m_weightsTmp0, 1 ); // tmp0 = reg*learnrate*weights+tmp0 02176 } 02177 02178 // if momentum is used 02179 if ( m_momentum > 0.0 ) 02180 { 02181 V_MULC ( m_weightsTmp0, 1.0 - m_momentum, m_weightsTmp0, m_nrWeights ); // tmp0 = tmp0 * (1 - momentum) [actual update] 02182 V_MULC ( m_weightsTmp1, m_momentum, m_weightsTmp1, m_nrWeights ); // tmp1 = tmp1 * momentum [last update] 02183 02184 // sum updates 02185 V_ADD ( m_nrWeights, m_weightsTmp0, m_weightsTmp1, m_weightsTmp0 ); // tmp0 = tmp0 + tmp1 02186 02187 V_COPY ( m_weightsTmp0, m_weightsTmp1, m_nrWeights ); // tmp1 = tmp0 02188 } 02189 02190 // standard weight update in the NNRBM 02191 V_SUB ( m_nrWeights, m_weights, m_weightsTmp0, m_weights ); // weights = weights - tmp0 02192 } 02193 } 02194 02195 // make the learnrate smaller (per sample) 02196 m_learnRate -= m_learnrateDecreaseRate; 02197 if ( m_learnRate < m_learnRateMin ) 02198 m_learnRate = m_learnRateMin; 02199 02200 } 02201 02202 // make the learnrate smaller (per epoch) 02203 m_learnRate -= m_learnrateDecreaseRateEpoch; 02204 if ( m_learnRate < m_learnRateMin ) 02205 m_learnRate = m_learnRateMin; 02206 02207 // epoch counter 02208 m_globalEpochs++; 02209 }
void NNRBM::useBLASforTraining | ( | bool | enable | ) |