NNRBM Class Reference

#include <nnrbm.h>

Inheritance diagram for NNRBM:

Framework

List of all members.

Public Member Functions

 NNRBM ()
 ~NNRBM ()
void setNrTargets (int n)
void setNrInputs (int n)
void setNrExamplesTrain (int n)
void setNrExamplesProbe (int n)
void setTrainInputs (REAL *inputs)
void setTrainTargets (REAL *targets)
void setProbeInputs (REAL *inputs)
void setProbeTargets (REAL *targets)
void setInitWeightFactor (REAL factor)
void setLearnrate (REAL learnrate)
void setLearnrateMinimum (REAL learnrateMin)
void setLearnrateSubtractionValueAfterEverySample (REAL learnrateDecreaseRate)
void setLearnrateSubtractionValueAfterEveryEpoch (REAL learnrateDecreaseRate)
void setMomentum (REAL momentum)
void setWeightDecay (REAL weightDecay)
void setBatchSize (int size)
void setMinUpdateErrorBound (REAL minUpdateBound)
void setMaxEpochs (int epochs)
void setRPROPPosNeg (REAL etaPos, REAL etaNeg)
void setRPROPMinMaxUpdate (REAL min, REAL max)
void setL1Regularization (bool en)
void initNNWeights (time_t seed)
void enableErrorFunctionMAE (bool en)
void setRBMLearnParams (REAL learnrateWeights, REAL learnrateBiasVis, REAL learnrateBiasHid, REAL weightDecay, int maxEpoch)
void setNNStructure (int nrLayer, int *neuronsPerLayer, bool lastLinearLayer=false, int *layerType=0)
void printLearnrate ()
void setScaleOffset (REAL scale, REAL offset)
void setNormalTrainStopping (bool en)
void setGlobalEpochs (int e)
void enableRPROP (bool en)
void useBLASforTraining (bool enable)
void trainOneEpoch (bool *updateLayer=0)
int trainNN ()
REAL getRMSETrain ()
REAL getRMSEProbe ()
void predictSingleInput (REAL *input, REAL *output)
REAL * getWeightPtr ()
void setWeights (REAL *w)
int getNrWeights ()
int getWeightIndex (int layer, int neuron, int weight)
int getBiasIndex (int layer, int neuron)
int getOutputIndex (int layer, int neuron)
void rbmPretraining (REAL *input, REAL *target, int nSamples, int nTarget, int firstNLayer=-1, int crossRun=-1, int nFinetuningEpochs=-1, double finetuningLearnRate=0.0)
vector< int > getEncoder ()
void printMiddleLayerToFile (string fname, REAL *input, REAL *target, int nSamples, int nTarget)
void printAutoencoderWeightsToJavascript (string fname)

Public Attributes

double m_sumSquaredError
double m_sumSquaredErrorSamples
NNRBMm_nnAuto

Private Member Functions

void saveWeights ()
REAL calcRMSE (REAL *inputs, REAL *targets, int examples)
void forwardCalculation (REAL *input)
void forwardCalculationBLAS (REAL *input)
void backpropBLAS (REAL *input, REAL *target, bool *updateLayer=0)
void backprop (REAL *input, REAL *target, bool *updateLayer=0)
REAL getInitWeight (int fanIn)

Private Attributes

int m_nrTargets
int m_nrInputs
int m_nrExamplesTrain
int m_nrExamplesProbe
REAL * m_inputsTrain
REAL * m_inputsProbe
REAL * m_targetsTrain
REAL * m_targetsProbe
REAL m_initWeightFactor
int m_globalEpochs
REAL m_RPROP_etaPos
REAL m_RPROP_etaNeg
REAL m_RPROP_updateMin
REAL m_RPROP_updateMax
REAL m_learnRate
REAL m_learnRateMin
REAL m_learnrateDecreaseRate
REAL m_learnrateDecreaseRateEpoch
REAL m_momentum
REAL m_weightDecay
REAL m_minUpdateBound
int m_batchSize
REAL m_rbmLearnrateWeights
REAL m_rbmLearnrateBiasVis
REAL m_rbmLearnrateBiasHid
REAL m_rbmWeightDecay
int m_rbmMaxEpochs
REAL m_scaleOutputs
REAL m_offsetOutputs
int m_maxEpochs
bool m_useBLAS
bool m_enableRPROP
bool m_normalTrainStopping
bool m_enableL1Regularization
bool m_errorFunctionMAE
int m_nrLayer
int * m_neuronsPerLayer
int m_nrWeights
int m_nrOutputs
int * m_nrLayWeights
int * m_nrLayWeightOffsets
REAL * m_outputs
REAL * m_outputsTmp
REAL * m_derivates
REAL * m_d1
REAL * m_weights
REAL * m_weightsTmp0
REAL * m_weightsTmp1
REAL * m_weightsTmp2
REAL * m_weightsBatchUpdate
REAL * m_weightsOld
REAL * m_weightsOldOld
REAL * m_deltaW
REAL * m_deltaWOld
REAL * m_adaptiveRPROPlRate
int * m_activationFunctionPerLayer
bool m_enableAutoencoder


Detailed Description

This is a Neural Network implementation with a RBM pretraining method

The target of this class is to give a basic and fast class for training and prediction It supports basic training functionality

Data (features + targets) memeory allocation and normalization must be done outside This class gets the pointer to the training and probe (=validation) set

Forward and backward calculation can be done in loops or in Vector-Matrix operations (BLAS) For large nets the BLAS calculation should be used (~2x faster)

Standard training is performed with global learnrate and stochastic gradient descent A batch training is also possible if the batchSize > 1

This class implements also the RPROP learning algorithm Rprop - Description and Implementation Details Martin Riedmiller, 1994. Technical report.

Definition at line 39 of file nnrbm.h.


Constructor & Destructor Documentation

NNRBM::NNRBM (  ) 

Constructor

Definition at line 8 of file nnrbm.cpp.

00009 {
00010     // init member vars
00011     m_nrTargets = 0;
00012     m_nrInputs = 0;
00013     m_nrExamplesTrain = 0;
00014     m_nrExamplesProbe = 0;
00015     m_inputsTrain = 0;
00016     m_inputsProbe = 0;
00017     m_targetsTrain = 0;
00018     m_targetsProbe = 0;
00019     m_initWeightFactor = 0;
00020     m_globalEpochs = 0;
00021     m_RPROP_etaPos = 0;
00022     m_RPROP_etaNeg = 0;
00023     m_RPROP_updateMin = 0;
00024     m_RPROP_updateMax = 0;
00025     m_learnRate = 0;
00026     m_learnRateMin = 0;
00027     m_learnrateDecreaseRate = 0;
00028     m_learnrateDecreaseRateEpoch = 0;
00029     m_momentum = 0;
00030     m_weightDecay = 0;
00031     m_minUpdateBound = 0;
00032     m_batchSize = 0;
00033     m_scaleOutputs = 0;
00034     m_offsetOutputs = 0;
00035     m_maxEpochs = 0;
00036     m_useBLAS = 0;
00037     m_enableRPROP = 0;
00038     m_normalTrainStopping = 0;
00039     m_nrLayer = 0;
00040     m_neuronsPerLayer = 0;
00041     m_nrWeights = 0;
00042     m_nrOutputs = 0;
00043     m_nrLayWeights = 0;
00044     m_outputs = 0;
00045     m_outputsTmp = 0;
00046     m_derivates = 0;
00047     m_d1 = 0;
00048     m_weights = 0;
00049     m_weightsTmp0 = 0;
00050     m_weightsTmp1 = 0;
00051     m_weightsTmp2 = 0;
00052     m_weightsBatchUpdate = 0;
00053     m_weightsOld = 0;
00054     m_weightsOldOld = 0;
00055     m_deltaW = 0;
00056     m_deltaWOld = 0;
00057     m_adaptiveRPROPlRate = 0;
00058     m_enableL1Regularization = 0;
00059     m_errorFunctionMAE = 0;
00060     m_activationFunctionPerLayer = 0;
00061     m_enableAutoencoder = 0;
00062     m_nrLayWeightOffsets = 0;
00063     m_sumSquaredError = 0.0;
00064     m_sumSquaredErrorSamples = 0.0;
00065     m_rbmLearnrateWeights = 0.0;
00066     m_rbmLearnrateBiasVis = 0.0;
00067     m_rbmLearnrateBiasHid = 0.0;
00068     m_rbmWeightDecay = 0.0;
00069     m_rbmMaxEpochs = 0;
00070 }

NNRBM::~NNRBM (  ) 

Destructor

Definition at line 75 of file nnrbm.cpp.

00076 {
00077     if ( m_neuronsPerLayer )
00078         delete[] m_neuronsPerLayer;
00079     m_neuronsPerLayer = 0;
00080     if ( m_nrLayWeights )
00081         delete[] m_nrLayWeights;
00082     m_nrLayWeights = 0;
00083     if ( m_outputs )
00084         delete[] m_outputs;
00085     m_outputs = 0;
00086     if ( m_outputsTmp )
00087         delete[] m_outputsTmp;
00088     m_outputsTmp = 0;
00089     if ( m_derivates )
00090         delete[] m_derivates;
00091     m_derivates = 0;
00092     if ( m_d1 )
00093         delete[] m_d1;
00094     m_d1 = 0;
00095     if ( m_weights )
00096         delete[] m_weights;
00097     m_weights = 0;
00098     if ( m_weightsTmp0 )
00099         delete[] m_weightsTmp0;
00100     m_weightsTmp0 = 0;
00101     if ( m_weightsTmp1 )
00102         delete[] m_weightsTmp1;
00103     m_weightsTmp1 = 0;
00104     if ( m_weightsTmp2 )
00105         delete[] m_weightsTmp2;
00106     m_weightsTmp2 = 0;
00107     if ( m_weightsBatchUpdate )
00108         delete[] m_weightsBatchUpdate;
00109     m_weightsBatchUpdate = 0;
00110     if ( m_weightsOld )
00111         delete[] m_weightsOld;
00112     m_weightsOld = 0;
00113     if ( m_weightsOldOld )
00114         delete[] m_weightsOldOld;
00115     m_weightsOldOld = 0;
00116     if ( m_deltaW )
00117         delete[] m_deltaW;
00118     m_deltaW = 0;
00119     if ( m_deltaWOld )
00120         delete[] m_deltaWOld;
00121     m_deltaWOld = 0;
00122     if ( m_adaptiveRPROPlRate )
00123         delete[] m_adaptiveRPROPlRate;
00124     m_adaptiveRPROPlRate = 0;
00125     if ( m_activationFunctionPerLayer )
00126         delete[] m_activationFunctionPerLayer;
00127     m_activationFunctionPerLayer = 0;
00128     if ( m_nrLayWeightOffsets )
00129         delete[] m_nrLayWeightOffsets;
00130     m_nrLayWeightOffsets = 0;
00131 }


Member Function Documentation

void NNRBM::backprop ( REAL *  input,
REAL *  target,
bool *  updateLayer = 0 
) [private]

Calculate the weight update in the whole net with standard formulas (speed optimized) According to the backprop rule Weight updates are stored in m_deltaW

Parameters:
input Input vector
target Target values (vector)

Definition at line 1629 of file nnrbm.cpp.

01630 {
01631     REAL sum0, d1;
01632 
01633     int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1;  // -1 for bias and output neuron
01634     int n0 = m_neuronsPerLayer[m_nrLayer-1];
01635     int outputOffsetPrev = outputOffset - n0 - 1;
01636     int outputOffsetNext = outputOffset;
01637 
01638     int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer];
01639     int weightOffsetNext, nP1;
01640 
01641     REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error;
01642 
01643     // ================== the output neuron:  d(j)=(b-o(j))*Aj' ==================
01644     for ( int i=0;i<m_nrTargets;i++ )
01645     {
01646         double out = m_outputs[outputOffset+i];
01647 
01648         REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i];
01649         m_sumSquaredError += errorTrain * errorTrain;
01650         m_sumSquaredErrorSamples += 1.0;
01651 
01652         targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs;
01653         error = out - targetConverted;
01654 
01655         if ( m_errorFunctionMAE )
01656             error = error > 0.0? 1.0 : -1.0;
01657         d1 = error * m_derivates[outputOffset+i];
01658         m_d1[outputOffset+i] = d1;
01659 
01660         if ( updateLayer )
01661             if ( updateLayer[m_nrLayer] == false )
01662                 continue;
01663 
01664         deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 );
01665         if ( m_nrLayer==1 )
01666         {
01667             outputsPtr = input - 1;
01668             deltaWPtr[0] = d1;
01669             for ( int j=1;j<n0+1;j++ )
01670                 deltaWPtr[j] = d1 * outputsPtr[j];
01671         }
01672         else
01673         {
01674             outputsPtr = m_outputs + outputOffsetPrev;
01675             for ( int j=0;j<n0+1;j++ )
01676                 deltaWPtr[j] = d1 * outputsPtr[j];
01677         }
01678 
01679     }
01680 
01681     // ================== all other neurons in the net ==================
01682     outputOffsetNext = outputOffset;  // next to current
01683     outputOffset = outputOffsetPrev;  // current to prev
01684     n0 = m_neuronsPerLayer[m_nrLayer-2];
01685     outputOffsetPrev -= n0 + 1;  // prev newnrInputs_
01686     weightOffset -= m_nrLayWeights[m_nrLayer-1];  // offset to weight pointer
01687     weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer];
01688 
01689     for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input
01690     {
01691         int n = m_neuronsPerLayer[i];
01692         int nNext = m_neuronsPerLayer[i+1];
01693         int nPrev = m_neuronsPerLayer[i-1];
01694         nP1 = n+1;
01695 
01696         d1Ptr0 = m_d1 + outputOffsetNext;
01697         derivatesPtr = m_derivates + outputOffset;
01698         weightsPtr = m_weights + weightOffsetNext;
01699         d1Ptr = m_d1 + outputOffset;
01700         deltaWPtr = m_deltaW + weightOffset;
01701         if ( i==1 )
01702             outputsPtr = input - 1;
01703         else
01704             outputsPtr = m_outputs + outputOffsetPrev;
01705 
01706         bool updateWeights = true;
01707         if ( updateLayer )
01708             if ( updateLayer[i] == false )
01709                 updateWeights = false;
01710 
01711         for ( int j=0;j<n;j++ ) // every neuron in the layer
01712         {
01713             // calc d1
01714             sum0 = 0.0;
01715             for ( int k=0;k<nNext;k++ ) // all neurons in the next layer:  d(j)=Aj'*Sum(k,d(k)*w(k,j))
01716                 sum0 += d1Ptr0[k] * weightsPtr[k*nP1];
01717             sum0 *= *derivatesPtr;
01718             d1Ptr[j] = sum0;
01719 
01720             if ( updateWeights )
01721             {
01722                 // weight updates
01723                 if ( i==1 )
01724                 {
01725                     deltaWPtr[0] = sum0;
01726                     for ( int k=1;k<nPrev+1;k++ )
01727                         deltaWPtr[k] = sum0 * outputsPtr[k];
01728                 }
01729                 else
01730                 {
01731                     for ( int k=0;k<nPrev+1;k++ )
01732                         deltaWPtr[k] = sum0 * outputsPtr[k];
01733                 }
01734             }
01735 
01736             deltaWPtr += nPrev+1;
01737             weightsPtr++;
01738             derivatesPtr++;
01739         }
01740 
01741         outputOffsetNext = outputOffset;  // next to current
01742         outputOffset = outputOffsetPrev;  // current to prev
01743         n0 = m_neuronsPerLayer[i-2];
01744         outputOffsetPrev -= n0 + 1;  // prev new
01745         weightOffset -= m_nrLayWeights[i-1];  // offset to weight pointer
01746         weightOffsetNext -= m_nrLayWeights[i];
01747     }
01748 
01749 }

void NNRBM::backpropBLAS ( REAL *  input,
REAL *  target,
bool *  updateLayer = 0 
) [private]

Calculate the weight update in the whole net with BLAS (MKL) According to the backprop rule Weight updates are stored in m_deltaW

Parameters:
input Input vector
target Target values (vector)

Definition at line 1497 of file nnrbm.cpp.

01498 {
01499     REAL sum0, d1;
01500 
01501     int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1;  // -1 for bias and output neuron
01502     int n0 = m_neuronsPerLayer[m_nrLayer-1];
01503     int outputOffsetPrev = outputOffset - n0 - 1;
01504     int outputOffsetNext = outputOffset;
01505 
01506     int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer];
01507     int weightOffsetNext, nP1;
01508 
01509     REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error;
01510 
01511     // ================== the output neuron:  d(j)=(b-o(j))*Aj' ==================
01512     for ( int i=0;i<m_nrTargets;i++ )
01513     {
01514         REAL out = m_outputs[outputOffset+i];
01515 
01516         REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i];
01517         m_sumSquaredError += errorTrain * errorTrain;
01518         m_sumSquaredErrorSamples += 1.0;
01519 
01520         targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs;
01521 
01522         // cross-entropy error: E = - target * log(out/target)
01523         // dE/dout = - target * (1/out) * out'
01524         //if(out < 1e-6)
01525         //    out = 1e-6;
01526         //error = - targetConverted / out;
01527 
01528         /*REAL eps = 1e-6;
01529         out = (out < eps)? eps : out;
01530         out = (out > 1.0-eps)? 1.0-eps : out;
01531         error = (out-targetConverted)/(out*(1.0-out));*/
01532 
01533         error = out - targetConverted;
01534 
01535         if ( m_errorFunctionMAE )
01536             error = error > 0.0? 1.0 : -1.0;
01537 
01538         d1 = error * m_derivates[outputOffset+i];
01539         m_d1[outputOffset+i] = d1;
01540 
01541         if ( updateLayer )
01542             if ( updateLayer[m_nrLayer] == false )
01543                 continue;
01544 
01545         deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 );
01546         if ( m_nrLayer==1 )
01547         {
01548             outputsPtr = input - 1;
01549             deltaWPtr[0] = d1;
01550             for ( int j=1;j<n0+1;j++ )
01551                 deltaWPtr[j] = d1 * outputsPtr[j];
01552         }
01553         else
01554         {
01555             outputsPtr = m_outputs + outputOffsetPrev;
01556             for ( int j=0;j<n0+1;j++ )
01557                 deltaWPtr[j] = d1 * outputsPtr[j];
01558         }
01559     }
01560 
01561     // ================== all other neurons in the net ==================
01562     outputOffsetNext = outputOffset;  // next to current
01563     outputOffset = outputOffsetPrev;  // current to prev
01564     n0 = m_neuronsPerLayer[m_nrLayer-2];
01565     outputOffsetPrev -= n0 + 1;  // prev newnrInputs_
01566     weightOffset -= m_nrLayWeights[m_nrLayer-1];  // offset to weight pointer
01567     weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer];
01568 
01569     for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input
01570     {
01571         int n = m_neuronsPerLayer[i];
01572         int nNext = m_neuronsPerLayer[i+1];
01573         int nPrev = m_neuronsPerLayer[i-1];
01574         nP1 = n+1;
01575 
01576         d1Ptr0 = m_d1 + outputOffsetNext;
01577         derivatesPtr = m_derivates + outputOffset;
01578         weightsPtr = m_weights + weightOffsetNext;
01579         d1Ptr = m_d1 + outputOffset;
01580         deltaWPtr = m_deltaW + weightOffset;
01581         if ( i==1 )
01582             outputsPtr = input;
01583         else
01584             outputsPtr = m_outputs + outputOffsetPrev;
01585 
01586         // d(j) = SUM(d(k)*w(k,j))
01587         CBLAS_GEMV ( CblasRowMajor, CblasTrans, nNext, n, 1.0, weightsPtr, nP1, d1Ptr0, 1, 0.0, d1Ptr, 1 );  // d1(j) =W_T*d1(k)
01588         V_MUL ( n, d1Ptr, derivatesPtr, d1Ptr );
01589 
01590         bool updateWeights = true;
01591         if ( updateLayer )
01592             if ( updateLayer[i] == false )
01593                 updateWeights = false;
01594 
01595         // every neuron in the layer calc weight update
01596         for ( int j=0;j<n;j++ )
01597         {
01598             if ( updateWeights )
01599             {
01600                 if ( i==1 )
01601                 {
01602                     V_COPY ( outputsPtr, deltaWPtr+1, nPrev );
01603                     deltaWPtr[0] = 1.0;
01604                 }
01605                 else
01606                     V_COPY ( outputsPtr, deltaWPtr, nPrev+1 );
01607                 V_MULC ( deltaWPtr, d1Ptr[j], deltaWPtr, nPrev+1 );
01608             }
01609             deltaWPtr += nPrev+1;
01610         }
01611 
01612         outputOffsetNext = outputOffset;  // next to current
01613         outputOffset = outputOffsetPrev;  // current to prev
01614         n0 = m_neuronsPerLayer[i-2];
01615         outputOffsetPrev -= n0 + 1;  // prev new
01616         weightOffset -= m_nrLayWeights[i-1];  // offset to weight pointer
01617         weightOffsetNext -= m_nrLayWeights[i];
01618     }
01619 }

REAL NNRBM::calcRMSE ( REAL *  inputs,
REAL *  targets,
int  examples 
) [private]

Calculate the rmse over a given input/target set with the current neuronal net weight set

Parameters:
inputs Input vectors (row wise)
targets Target vectors (row wise)
examples Number of examples
Returns:
RMSE on this set

Definition at line 2271 of file nnrbm.cpp.

02272 {
02273     double rmse = 0.0;
02274     for ( int i=0;i<examples;i++ )
02275     {
02276         REAL* inputPtr = inputs + i * m_nrInputs;
02277         REAL* targetPtr = targets + i * m_nrTargets;
02278 
02279         predictSingleInput ( inputPtr, m_outputsTmp );
02280 
02281         for ( int j=0;j<m_nrTargets;j++ )
02282             rmse += ( m_outputsTmp[j] - targetPtr[j] ) * ( m_outputsTmp[j] - targetPtr[j] );
02283     }
02284     rmse = sqrt ( rmse/ ( double ) ( examples*m_nrTargets ) );
02285     return rmse;
02286 }

void NNRBM::enableErrorFunctionMAE ( bool  en  ) 

Enable MeanAbsoluteError function

Parameters:
en 

Definition at line 138 of file nnrbm.cpp.

00139 {
00140     m_errorFunctionMAE = en;
00141     cout<<"errorFunctionMAE:"<<m_errorFunctionMAE<<endl;
00142 }

void NNRBM::enableRPROP ( bool  en  ) 

Enable the RPROP learning algorithm (1st order type) Ref: "RPROP - Descritpion and Implementation Details", Martin Riedmiller, 1994

Attention: This must called first before: setNNStructure

Parameters:
en Enables RPROP learning schema

Definition at line 421 of file nnrbm.cpp.

00422 {
00423     m_enableRPROP = en;
00424     cout<<"enableRPROP: "<<m_enableRPROP<<endl;
00425 }

void NNRBM::forwardCalculation ( REAL *  input  )  [private]

Forward calculation through the NNRBM (with loops) Outputs are stored in m_outputs 1st derivates are stored in m_derivates

Parameters:
input Input vector

Definition at line 1860 of file nnrbm.cpp.

01861 {
01862     int outputOffset = m_neuronsPerLayer[0] + 1, outputOffsetPrev = 0;
01863     REAL tmp0, tmp1, sum0;
01864     REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights;
01865     for ( int i=0;i<m_nrLayer;i++ ) // to all layer
01866     {
01867         int n = m_neuronsPerLayer[i+1];
01868         int nprev = m_neuronsPerLayer[i] + 1;
01869         int loopOffset = i==0? 1 : 0;
01870         int activationType = m_activationFunctionPerLayer[i+1];
01871         ptr0 = m_outputs + outputOffset;
01872         ptr1 = m_derivates + outputOffset;
01873         if ( i==0 )
01874             outputPtr = input - loopOffset;
01875         else
01876             outputPtr = m_outputs + outputOffsetPrev;
01877         
01878         for ( int j=0;j<n;j++ ) // all neurons in this layer
01879         {
01880             sum0 = i==0? weightPtr[0] : 0.0;  // dot product sum, for inputlayer: init with bias
01881             for ( int k=loopOffset;k<nprev;k++ ) // calc dot product
01882                 sum0 += weightPtr[k] * outputPtr[k];
01883             weightPtr += nprev;
01884             if ( activationType == 0 ) // sigmoid: f(x)=1/(1+exp(-x))
01885             {
01886                 tmp0 = exp ( -sum0 );
01887                 ptr1[j] = tmp0/ ( ( 1.0 + tmp0 ) * ( 1.0 + tmp0 ) );  // derivation
01888                 ptr0[j] = 1.0 / ( 1.0 + tmp0 );  // activation function
01889             }
01890             else if ( activationType == 1 ) // linear: f(x)=x
01891             {
01892                 ptr0[j] = sum0;
01893                 ptr1[j] = 1.0;
01894             }
01895             else if ( activationType == 2 ) // tanh: f(x)=tanh(x)
01896             {
01897                 tmp0 = tanh ( sum0 );
01898                 ptr0[j] = tmp0;
01899                 ptr1[j] = ( 1.0 - tmp0*tmp0 );
01900             }
01901             else
01902                 assert ( false );
01903         }
01904         outputOffset += n+1;  // this points to first neuron in current layer
01905         outputOffsetPrev += nprev;  // this points to first neuron in previous layer
01906     }
01907 }

void NNRBM::forwardCalculationBLAS ( REAL *  input  )  [private]

Forward calculation through the NNRBM with BLAS and VML (MKL) Outputs are stored in m_outputs 1st derivates are stored in m_derivates

Parameters:
input Input vector

Definition at line 1758 of file nnrbm.cpp.

01759 {
01760     int outputOffset = m_neuronsPerLayer[0]+1, outputOffsetPrev = 0;
01761     REAL tmp0, tmp1, sum0;
01762     REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights;
01763 
01764     for ( int i=0;i<m_nrLayer;i++ ) // to all layer
01765     {
01766         int n = m_neuronsPerLayer[i+1];
01767         int nprev = m_neuronsPerLayer[i] + 1;
01768         int inputOffset = 0;
01769         int activationType = m_activationFunctionPerLayer[i+1];
01770         ptr0 = m_outputs + outputOffset;
01771         ptr1 = m_derivates + outputOffset;
01772         if ( i==0 )
01773         {
01774             outputPtr = input;
01775             inputOffset = 1;
01776         }
01777         else
01778             outputPtr = m_outputs + outputOffsetPrev;
01779 
01780         // WeightMatrix*InputVec = Outputs
01781         CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, n, nprev - inputOffset, 1.0, weightPtr + inputOffset, nprev, outputPtr, 1, 0.0, ptr0, 1 );
01782         if ( inputOffset )
01783         {
01784             for ( int j=0;j<n;j++ )
01785                 ptr0[j] += weightPtr[j * nprev];
01786         }
01787 
01788         // activation fkt
01789         if ( activationType == 0 ) // sigmoid: f(x)=1/(1+exp(-x))
01790         {
01791             for ( int j=0;j<n;j++ )
01792                 ptr0[j] = -ptr0[j];
01793             V_EXP ( ptr0, n );
01794             for ( int j=0;j<n;j++ )
01795             {
01796                 ptr0[j] = 1.0 / ( 1.0 + ptr0[j] );  // sigmoid
01797                 ptr1[j] = ptr0[j] * ( 1.0 - ptr0[j] );  // derivative
01798             }
01799         }
01800         else if ( activationType == 1 ) // linear: f(x)=x,  df(x)/dx=1
01801         {
01802             for ( int j=0;j<n;j++ )
01803                 ptr1[j] = 1.0;
01804         }
01805         else if ( activationType == 2 ) // tanh: f(x)=tanh(x)
01806         {
01807             V_TANH ( n, ptr0, ptr0 );  // m_outputs = tanh(m_outputs)
01808             V_SQR ( n, ptr0, ptr1 );  // ptr1[j] = ptr0[j]*ptr0[j]
01809             for ( int j=0;j<n;j++ )
01810                 ptr1[j] = 1.0 - ptr1[j];
01811         }
01812         else if ( activationType == 3 ) // softmax: f(x)=exp(x)/sum(exp(xi))  (where xi are all outputs on this layer)
01813         {
01814             V_EXP ( ptr0, n );
01815 
01816             REAL sumOut = 0.0, invSumOut;
01817             for ( int j=0;j<n;j++ )
01818                 sumOut += ptr0[j];
01819 
01820             invSumOut = 1.0 / sumOut;
01821 
01822             for ( int j=0;j<n;j++ )
01823                 ptr1[j] = ptr0[j] * ( sumOut - ptr0[j] ) * invSumOut * invSumOut;  // derivative
01824 
01825             for ( int j=0;j<n;j++ )
01826                 ptr0[j] *= invSumOut;  // output
01827 
01828             /*
01829             REAL sumOut = 0.0, invSumOut;
01830             for(int j=0;j<n;j++)
01831                 sumOut += exp(ptr0[j]);
01832             invSumOut = 1.0 / sumOut;
01833             invSumOut = invSumOut * invSumOut;
01834             for(int j=0;j<n;j++)
01835                 ptr1[j] = invSumOut * ( sumOut * exp(ptr0[j]) - exp(ptr0[j])*exp(ptr0[j]) );  // derivative
01836             // normalize output: outputs are probabilites
01837             for(int j=0;j<n;j++)
01838                 ptr0[j] = exp(ptr0[j]) / sumOut;
01839             */
01840         }
01841         else
01842             assert ( false );
01843 
01844         // update index
01845         weightPtr += n*nprev;
01846         outputOffset += n+1;  // this points to first neuron in current layer
01847         outputOffsetPrev += nprev;  // this points to first neuron in previous layer
01848 
01849     }
01850 
01851 }

int NNRBM::getBiasIndex ( int  layer,
int  neuron 
)

Get the index to the bias weight:

  • m_weights[ind]

Parameters:
layer Weight on layer
neuron Neuron number
Returns:
ind

Definition at line 674 of file nnrbm.cpp.

00675 {
00676     if ( layer == 0 )
00677         assert ( false );
00678 
00679     int nrNeur = m_neuronsPerLayer[layer];
00680     int nrNeurPrev = m_neuronsPerLayer[layer-1];
00681     if ( neuron >= nrNeur )
00682     {
00683         cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl;
00684         assert ( false );
00685     }
00686     int ind = m_nrLayWeightOffsets[layer];
00687     if ( layer == 1 ) // input layer
00688         ind += neuron* ( nrNeurPrev + 1 );
00689     else
00690         ind += nrNeurPrev + neuron* ( nrNeurPrev + 1 );
00691 
00692     if ( ind >= m_nrWeights )
00693     {
00694         cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl;
00695         assert ( false );
00696     }
00697 
00698     return ind;
00699 }

vector< int > NNRBM::getEncoder (  ) 

return the number of neurons per layer of the configuration

Returns:
vector of int with neuron counts

Definition at line 729 of file nnrbm.cpp.

00730 {
00731     vector<int> v;
00732     cout<<"Encoder:"<<endl;
00733     int midLayer = m_nrLayer/2 + 1;
00734     int weightCnt = 0;
00735     for ( int i=0;i<midLayer;i++ )
00736     {
00737         v.push_back ( m_neuronsPerLayer[i] );
00738         weightCnt += m_nrLayWeights[i];
00739         cout<<"neurons|weights: "<<m_neuronsPerLayer[i]<<"|"<<m_nrLayWeights[i]<<endl;
00740     }
00741     v.push_back ( weightCnt );
00742     return v;
00743 }

REAL NNRBM::getInitWeight ( int  fanIn  )  [private]

Returen a random (uniform) weight init for a given number of input connections for this neuron 1/sqrt(fanIn) - rule (from Yann LeCun)

Parameters:
fanIn The number of input connections for this neuron
Returns:
Weight init value (uniform random)

Definition at line 571 of file nnrbm.cpp.

00572 {
00573     double nr = 2.0* ( rand() / ( double ) RAND_MAX-0.5 );  // -1 .. +1
00574     return ( 1.0/sqrt ( ( double ) fanIn ) ) * nr;
00575 }

int NNRBM::getNrWeights (  ) 

Returns the total number of weights

Returns:
Number of weights in this net (total number with bias weights)

Definition at line 2335 of file nnrbm.cpp.

02336 {
02337     return m_nrWeights;
02338 }

int NNRBM::getOutputIndex ( int  layer,
int  neuron 
)

Get the index of the output

  • m_outputs[ind]

Parameters:
layer Output on layer
neuron Neuron number
Returns:
ind, The index

Definition at line 709 of file nnrbm.cpp.

00710 {
00711     if ( layer == 0 || layer > m_nrLayer )
00712         assert ( false );
00713 
00714     if ( neuron >= m_neuronsPerLayer[layer] )
00715         assert ( false );
00716 
00717     int ind = 0;
00718     for ( int i=0;i<layer;i++ )
00719         ind += m_neuronsPerLayer[i] + 1;
00720 
00721     return ind + neuron;
00722 }

REAL NNRBM::getRMSEProbe (  ) 

Evaluate the probe error

Returns:
RMSE on the probe set

Definition at line 2303 of file nnrbm.cpp.

02304 {
02305     return calcRMSE ( m_inputsProbe, m_targetsProbe, m_nrExamplesProbe );
02306 }

REAL NNRBM::getRMSETrain (  ) 

Evaluate the train error

Returns:
RMSE on training set

Definition at line 2293 of file nnrbm.cpp.

02294 {
02295     return calcRMSE ( m_inputsTrain, m_targetsTrain, m_nrExamplesTrain );
02296 }

int NNRBM::getWeightIndex ( int  layer,
int  neuron,
int  weight 
)

Get the index to the weights:

  • m_weights[ind]

Parameters:
layer Weight on layer
neuron Neuron number
weight Weight number
Returns:
ind

Definition at line 633 of file nnrbm.cpp.

00634 {
00635     if ( layer == 0 )
00636         assert ( false );
00637 
00638     int nrNeur = m_neuronsPerLayer[layer];
00639     int nrNeurPrev = m_neuronsPerLayer[layer-1];
00640     if ( neuron >= nrNeur )
00641     {
00642         cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl;
00643         assert ( false );
00644     }
00645     if ( weight >= nrNeurPrev )
00646     {
00647         cout<<"weight:"<<weight<<" nrNeurPrev:"<<nrNeurPrev<<endl;
00648         assert ( false );
00649     }
00650 
00651     int ind = m_nrLayWeightOffsets[layer];
00652     if ( layer == 1 ) // input layer
00653         ind += 1 + weight + neuron* ( nrNeurPrev + 1 );
00654     else
00655         ind += weight + neuron* ( nrNeurPrev + 1 );
00656 
00657     if ( ind >= m_nrWeights )
00658     {
00659         cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl;
00660         assert ( false );
00661     }
00662 
00663     return ind;
00664 }

REAL * NNRBM::getWeightPtr (  ) 

Returns the pointer to the neuronal net weights (linear aligned from input to output)

Returns:
Pointer to NNRBM weights

Definition at line 2313 of file nnrbm.cpp.

02314 {
02315     return m_weights;
02316 }

void NNRBM::initNNWeights ( time_t  seed  ) 

Init the whole weights in the net

Parameters:
seed The random seed (same seed for exact same weight initalization)

Definition at line 582 of file nnrbm.cpp.

00583 {
00584     srand ( seed );
00585     cout<<"init weights "<<endl;
00586     REAL factor = m_initWeightFactor;
00587     int cnt = 0;
00588     for ( int i=0;i<m_nrLayer;i++ ) // through all layers
00589     {
00590         int n = m_neuronsPerLayer[i+1];
00591         int nprev = m_neuronsPerLayer[i] + 1;  // +1 for bias
00592         for ( int j=0;j<n;j++ ) // all neurons per layer
00593         {
00594             for ( int k=0;k<nprev;k++ ) // all weights from this neuron
00595             {
00596                 m_weights[cnt] = m_weightsOld[i] = m_weightsOldOld[i] = getInitWeight ( nprev ) * factor;
00597                 cnt++;
00598             }
00599         }
00600     }
00601 
00602     // check the number
00603     if ( cnt != m_nrWeights )
00604         assert ( false );
00605 }

void NNRBM::predictSingleInput ( REAL *  input,
REAL *  output 
)

Predict the output based on a input vector with the Neural Net The actual m_weights are used to perform forward calculation through the net

Parameters:
input Input vector (pointer)
output Output vector (pointer)

Definition at line 2247 of file nnrbm.cpp.

02248 {
02249     REAL* inputPtr = input;
02250 
02251     // forward
02252     if ( m_useBLAS )
02253         forwardCalculationBLAS ( inputPtr );
02254     else
02255         forwardCalculation ( inputPtr );
02256 
02257     // output correction
02258     REAL* outputPtr = m_outputs + m_nrOutputs - m_nrTargets - 1;
02259     for ( int i=0;i<m_nrTargets;i++ )
02260         output[i] = outputPtr[i] * m_scaleOutputs + m_offsetOutputs;
02261 }

void NNRBM::printLearnrate (  ) 

Print the learn rate

Definition at line 2226 of file nnrbm.cpp.

02227 {
02228     cout<<"lRate:"<<m_learnRate<<" "<<flush;
02229 }

void NNRBM::printMiddleLayerToFile ( string  fname,
REAL *  input,
REAL *  target,
int  nSamples,
int  nTarget 
)

Dummy function for printing the predictions from the middle (code) layer

Parameters:
fname Filename
input input matrix
target target matrix
nSamples nr of samples (rows)
nTarget nr of targets (cols of target)

Definition at line 1465 of file nnrbm.cpp.

01466 {
01467     fstream f0 ( fname.c_str(),ios::out );
01468     REAL* p = new REAL[m_nrInputs];
01469     for ( int i=0;i<nSamples;i++ )
01470     {
01471         m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p );
01472         
01473         for ( int j=0;j<nTarget;j++ )
01474             f0<<target[j+i*nTarget]<<" ";
01475         
01476         int midLayer = m_nrLayer - 1;
01477         for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ )
01478             f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" ";
01479         
01480         //for ( int j=0;j<m_nrInputs;j++ )
01481         //    f0<<p[j]<<" ";
01482         
01483         f0<<endl;
01484     }
01485     delete[] p;
01486     f0.close();
01487 }

void NNRBM::rbmPretraining ( REAL *  input,
REAL *  target,
int  nSamples,
int  nTarget,
int  firstNLayer = -1,
int  crossRun = -1,
int  nFinetuningEpochs = -1,
double  finetuningLearnRate = 0.0 
)

Greedy layer-wise pretraining See G.Hinton http://www.cs.toronto.edu/~hinton/MatlabForSciencePaper.html This is directly translated from matlab code

Parameters:
input input matrix (row wise)
target target matrix
nSamples number of samples
nTarget number of targets
firstNLayer where the autoendcoder ends
crossRun which cross run do we have now
nFinetuningEpochs gradient descent based finetuning number of epochs
finetuningLearnRate learnrate in the finetuning phase

Definition at line 759 of file nnrbm.cpp.

00760 {
00761     cout<<endl<<endl<<"=== set structure for fine tuning of the unrolled autoencoder ==="<<endl;
00762     m_nnAuto = new NNRBM();
00763     m_nnAuto->setInitWeightFactor ( 0.0 );
00764     m_nnAuto->setScaleOffset ( m_scaleOutputs, m_offsetOutputs );
00765     m_nnAuto->setNrTargets ( m_nrInputs );
00766     m_nnAuto->setNrInputs ( m_nrInputs );
00767     m_nnAuto->setNrExamplesTrain ( nSamples );
00768     m_nnAuto->setTrainInputs ( input );
00769     m_nnAuto->setTrainTargets ( input );
00770 
00771     // learn parameters
00772     m_nnAuto->setInitWeightFactor ( m_initWeightFactor );
00773     if ( finetuningLearnRate != 0.0 )
00774         m_nnAuto->setLearnrate ( finetuningLearnRate );
00775     else
00776         m_nnAuto->setLearnrate ( m_learnRate );
00777     m_nnAuto->setLearnrateMinimum ( m_learnRateMin );
00778     m_nnAuto->setLearnrateSubtractionValueAfterEverySample ( m_learnrateDecreaseRate );
00779     m_nnAuto->setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateDecreaseRateEpoch );
00780     m_nnAuto->setMomentum ( m_momentum );
00781     m_nnAuto->setWeightDecay ( 0.0 );
00782     m_nnAuto->setMinUpdateErrorBound ( m_minUpdateBound );
00783     m_nnAuto->setBatchSize ( m_batchSize );
00784     m_nnAuto->setMaxEpochs ( m_maxEpochs );
00785     m_nnAuto->setL1Regularization ( m_enableL1Regularization );
00786     m_nnAuto->enableErrorFunctionMAE ( m_errorFunctionMAE );
00787     m_nnAuto->setNormalTrainStopping ( true );
00788     m_nnAuto->useBLASforTraining ( m_useBLAS );
00789 
00790 
00791     int nLayer = m_nrLayer;
00792     if ( firstNLayer != -1 )
00793         nLayer = firstNLayer;
00794 
00795     int* layerConfig = new int[2* ( nLayer-1 ) ];
00796     for ( int i=0;i<2* ( nLayer-1 ) - 1;i++ )
00797     {
00798         if ( i < nLayer-1 )
00799             layerConfig[i] = m_neuronsPerLayer[i+1];
00800         else
00801             layerConfig[i] = m_neuronsPerLayer[2* ( nLayer-1 ) - 1 - i];
00802     }
00803     m_nnAuto->m_enableAutoencoder = true;
00804     m_nnAuto->setNNStructure ( 2* ( nLayer-1 ), layerConfig );
00805     m_nnAuto->initNNWeights ( 0 );
00806 
00807 
00808     cout<<endl<<"=== RBM pretraining ("<<nSamples<<" samples) ==="<<endl;
00809     int weightOffset = 0;
00810     REAL* batchdata = new REAL[nSamples*m_neuronsPerLayer[0]];
00811     for ( int j=0;j<nSamples*m_neuronsPerLayer[0];j++ )
00812         batchdata[j] = input[j];
00813 
00814     // all layers from output to input
00815     for ( int i=0;i<nLayer-1;i++ )
00816     {
00817         int nVis = m_neuronsPerLayer[i];
00818         int nHid = m_neuronsPerLayer[i+1];
00819         cout<<endl<<"layer:"<<i<<" nVis:"<<nVis<<" nHid:"<<nHid<<endl;
00820 
00821         // probabilities, biases and other
00822         REAL* batchdataNext = new REAL[nSamples*nHid];
00823         REAL* vishid = new REAL[nVis*nHid];
00824         REAL* visbias = new REAL[nVis];
00825         REAL* hidbias = new REAL[nHid];
00826         REAL* poshidprobs = new REAL[nHid];
00827         REAL* neghidprobs = new REAL[nHid];
00828         REAL* hidbiasinc = new REAL[nHid];
00829         REAL* visbiasinc = new REAL[nVis];
00830         REAL* poshidstates = new REAL[nHid];
00831         REAL* negdata = new REAL[nVis];
00832         REAL* neghidact = new REAL[nHid];
00833         REAL* negvisact = new REAL[nVis];
00834         REAL* poshidact = new REAL[nHid];
00835         REAL* posvisact = new REAL[nVis];
00836         REAL* probs = new REAL[nHid];
00837         REAL constant = 0.0;
00838         //REAL constant = -1.0;
00839         for ( int j=0;j<nVis;j++ )
00840         {
00841             visbias[j] = constant;
00842             visbiasinc[j] = constant;
00843             negdata[j] = constant;
00844         }
00845         for ( int j=0;j<nHid;j++ )
00846         {
00847             hidbias[j] = constant;
00848             hidbiasinc[j] = constant;
00849             poshidprobs[j] = constant;
00850             neghidprobs[j] = constant;
00851             poshidstates[j] = constant;
00852         }
00853         for ( int j=0;j<nVis*nHid;j++ )
00854         {
00855             vishid[j] = constant;
00856         }
00857 
00858         // init weights
00859         unsigned int seed = rand();
00860         cout<<"seed:"<<seed<<endl;
00861         //GAUSS_DISTRIBUTION(vishid, nVis*nHid, 0.0, 1.0/(double)sqrt((double)nVis), &seed);
00862         GAUSS_DISTRIBUTION ( vishid, nVis*nHid, 0.0, 0.1, &seed );
00863         time_t t0 = time ( 0 );
00864 
00865 
00866         // layer-wise training
00867         for ( int epoch=0;epoch<m_rbmMaxEpochs;epoch++ )
00868         {
00869             double errsum = 0.0;
00870             int progress = nSamples/10 + 1, errcnt = 0;
00871 
00872             for ( int sample=0;sample<nSamples;sample++ )
00873             {
00874                 // data vector
00875                 REAL* data = batchdata + nVis * sample;
00876                 REAL min = 1e10, max = -1e10, mean = 0.0;
00877                 for ( int k=0;k<nVis;k++ )
00878                 {
00879                     if ( min > data[k] )
00880                         min = data[k];
00881                     if ( max < data[k] )
00882                         max = data[k];
00883                     mean += data[k];
00884                 }
00885                 mean /= ( double ) nVis;
00886 
00887                 if ( min > 1.0 || min < 0.0 )
00888                 {
00889                     cout<<"min:"<<min<<endl;
00890                     assert ( false );
00891                 }
00892                 if ( max > 1.0 || max < 0.0 )
00893                 {
00894                     cout<<"max:"<<max<<endl;
00895                     assert ( false );
00896                 }
00897 
00898 
00899                 // ===== POSITIVE PHASE =====
00900                 // this is like forward calculation
00901                 // after calc the weighted sum per neuron, the values of the neurons get sampled
00902 
00903                 // poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1)));
00904 
00905                 // calc: poshidprobs = vishid * data
00906 
00907                 //for(int k=0;k<nHid;k++)
00908                 //{
00909                 //    REAL sum = 0.0;
00910                 //    for(int l=0;l<nVis;l++)
00911                 //        sum += data[l] * vishid[l + k*nVis];
00912                 //    poshidprobs[k] = 1.0 / (1.0 + exp(-(sum+hidbias[k])));
00913                 //}
00914 
00915                 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, data, 1, 0.0, poshidprobs, 1 );
00916 
00917                 //for(int k=0;k<nHid;k++)
00918                 //    poshidprobs[k] = 1.0 / (1.0 + exp(-(poshidprobs[k]+hidbias[k])));
00919                 V_ADD ( nHid, poshidprobs, hidbias, poshidprobs );  // poshidprobs[k]+hidbias[k]
00920                 V_MULCI ( -1.0, poshidprobs, nHid );             // *(-1)
00921                 V_EXP ( poshidprobs, nHid );                     // exp(x)
00922                 V_ADDCI ( 1.0, poshidprobs, nHid );              // +1
00923                 V_INV ( nHid, poshidprobs, poshidprobs );        // inv(x)
00924 
00925                 // batchposhidprobs(:,:,batch)=poshidprobs;
00926 
00927                 // posprods    = data' * poshidprobs;
00928                 //for(int k=0;k<nHid;k++)
00929                 //    for(int l=0;l<nVis;l++)
00930                 //        posprods[l+k*nVis] = data[l] * poshidprobs[k];
00931 
00932                 // poshidact   = sum(poshidprobs);
00933 
00934                 //for(int k=0;k<nHid;k++)
00935                 //    poshidact[k] = poshidprobs[k];
00936                 V_COPY ( poshidprobs, poshidact, nHid );   // poshidact = poshidprobs
00937 
00938                 // posvisact = sum(data);
00939                 //for(int k=0;k<nVis;k++)
00940                 //    posvisact[k] = data[k];
00941                 V_COPY ( data, posvisact, nVis );   // posvisact = data
00942 
00943                 // poshidstates = poshidprobs > rand(numcases,numhid);
00944                 for ( int k=0;k<nHid;k++ )
00945                     probs[k] = ( double ) rand() / ( double ) RAND_MAX;
00946                 //UNIFORM_DISTRIBUTION(probs, nHid, 0.0, 1.0, seed);
00947 
00948                 for ( int k=0;k<nHid;k++ )
00949                 {
00950                     if ( poshidprobs[k] > probs[k] )
00951                         poshidstates[k] = 1.0;
00952                     else
00953                         poshidstates[k] = 0.0;
00954                 }
00955 
00956 
00957                 // ===== NEGATIVE PHASE =====
00958                 // reconstruct the input sample, based on binary representation in the hidden layer
00959                 // use the reconstruction error as signal for learning
00960 
00961                 //negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));
00962                 //for(int k=0;k<nVis;k++)
00963                 //{
00964                 //    REAL sum = 0.0;
00965                 //    for(int l=0;l<nHid;l++)
00966                 //        sum += poshidstates[l] * vishid[k + l*nVis];
00967                 //    negdata[k] = sum;//1.0 / (1.0 + exp(-(sum+visbias[k])));
00968                 //}
00969 
00970                 CBLAS_GEMV ( CblasRowMajor, CblasTrans, nHid, nVis, 1.0, vishid, nVis, poshidstates, 1, 0.0, negdata, 1 );
00971 
00972                 //for(int k=0;k<nVis;k++)
00973                 //    negdata[k] = 1.0 / (1.0 + exp(-(negdata[k]+visbias[k])));
00974                 V_ADD ( nVis, negdata, visbias, negdata );  // negdata[k]+visbias[k]
00975                 V_MULCI ( -1.0, negdata, nVis );             // *(-1)
00976                 V_EXP ( negdata, nVis );                     // exp(x)
00977                 V_ADDCI ( 1.0, negdata, nVis );              // +1
00978                 V_INV ( nVis, negdata, negdata );        // inv(x)
00979 
00980                 //neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1)));
00981                 //for(int k=0;k<nHid;k++)
00982                 //{
00983                 //    REAL sum = 0.0;
00984                 //    for(int l=0;l<nVis;l++)
00985                 //        sum += negdata[l] * vishid[l + k*nVis];
00986                 //    neghidprobs[k] = 1.0 / (1.0 + exp(-(sum+hidbias[k])));
00987                 //}
00988 
00989                 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, negdata, 1, 0.0, neghidprobs, 1 );
00990 
00991                 //for(int k=0;k<nHid;k++)
00992                 //    neghidprobs[k] = 1.0 / (1.0 + exp(-(neghidprobs[k]+hidbias[k])));
00993                 V_ADD ( nHid, neghidprobs, hidbias, neghidprobs );  // neghidprobs[k]+hidbias[k]
00994                 V_MULCI ( -1.0, neghidprobs, nHid );             // *(-1)
00995                 V_EXP ( neghidprobs, nHid );                     // exp(x)
00996                 V_ADDCI ( 1.0, neghidprobs, nHid );              // +1
00997                 V_INV ( nHid, neghidprobs, neghidprobs );        // inv(x)
00998 
00999                 //negprods  = negdata'*neghidprobs;
01000                 //for(int k=0;k<nHid;k++)
01001                 //    for(int l=0;l<nVis;l++)
01002                 //        negprods[l+k*nVis] = negdata[l] * neghidprobs[k];
01003 
01004                 //neghidact = sum(neghidprobs);
01005                 //for(int k=0;k<nHid;k++)
01006                 //    neghidact[k] = neghidprobs[k];
01007                 V_COPY ( neghidprobs, neghidact, nHid );   // neghidact = neghidprobs
01008 
01009                 //negvisact = sum(negdata);
01010                 //REAL negvisact = 0.0;
01011                 //for(int k=0;k<nVis;k++)
01012                 //    negvisact[k] = negdata[k];
01013                 V_COPY ( negdata, negvisact, nVis );   // negvisact = negdata
01014 
01015                 //err= sum(sum( (data-negdata).^2 ));
01016                 double err = 0.0;
01017                 for ( int k=0;k<nVis;k++ )
01018                     err += ( data[k] - negdata[k] ) * ( data[k] - negdata[k] );
01019 
01020                 if ( isnan ( err ) || isinf ( err ) )
01021                     cout<<endl;
01022 
01023                 errsum = err + errsum;
01024                 errcnt += nVis;
01025 
01026 
01027                 // ===== UPDATE WEIGHTS =====
01028                 //vishidinc = momentum*vishidinc + epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);
01029                 //visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);
01030                 //hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);
01031                 //vishid = vishid + vishidinc;
01032                 //visbiases = visbiases + visbiasinc;
01033                 //hidbiases = hidbiases + hidbiasinc;
01034 
01035                 for ( int k=0;k<nHid;k++ )
01036                     for ( int l=0;l<nVis;l++ )
01037                         vishid[l+k*nVis] += m_rbmLearnrateWeights* ( ( data[l] * poshidprobs[k] - negdata[l] * neghidprobs[k] ) - m_rbmWeightDecay*vishid[l+k*nVis] );
01038 
01039                 //for(int k=0;k<nHid*nVis;k++)
01040                 //    vishid[k] += m_rbmLearnrateWeights*(posprods[k]-negprods[k] - m_rbmWeightDecay*vishid[k]);
01041 
01042                 for ( int k=0;k<nVis;k++ )
01043                     visbias[k] += m_rbmLearnrateBiasVis * ( posvisact[k]-negvisact[k] );
01044                 for ( int k=0;k<nHid;k++ )
01045                     hidbias[k] += m_rbmLearnrateBiasHid * ( poshidact[k]-neghidact[k] );
01046 
01047             }
01048             for ( int sample=0;sample<nSamples;sample++ )
01049             {
01050                 REAL* data = batchdata + nVis * sample;
01051                 // save for next batch
01052                 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, nHid, nVis, 1.0, vishid, nVis, data, 1, 0.0, poshidprobs, 1 );
01053                 for ( int k=0;k<nHid;k++ )
01054                     poshidprobs[k] = 1.0 / ( 1.0 + exp ( - ( poshidprobs[k]+hidbias[k] ) ) );
01055                 REAL* batchPtr = batchdataNext + sample * nHid;
01056                 for ( int k=0;k<nHid;k++ )
01057                     batchPtr[k] = poshidprobs[k];
01058             }
01059             cout<<"epoch:"<<epoch+1<<"/"<<m_rbmMaxEpochs<<"  errsum:"<<errsum<<"  errcnt:"<<errcnt<<"  mse:"<<errsum/ ( double ) errcnt<<"  "<<time ( 0 )-t0<<"[s]"<<endl;
01060             t0 = time ( 0 );
01061         }
01062 
01063         int nrWeightsHere = m_nrLayWeights[i];
01064         weightOffset += nrWeightsHere;
01065         int offset = weightOffset;
01066         cout<<"weight offset:"<<offset<<"  offsetNext:"<<weightOffset<<endl;
01067         REAL* weightPtr = m_weights + offset;
01068 
01069         // copy weights to net
01070         for ( int j=0;j<nHid;j++ )
01071             for ( int k=0;k<nVis;k++ )
01072                 m_weights[getWeightIndex ( i + 1, j, k ) ] = vishid[k + j*nVis];
01073         // copy weights to nnAuto
01074         REAL* nnAutoWeights = m_nnAuto->getWeightPtr();
01075         for ( int j=0;j<nHid;j++ )
01076             for ( int k=0;k<nVis;k++ )
01077             {
01078                 nnAutoWeights[m_nnAuto->getWeightIndex ( i + 1, j, k ) ] = vishid[k + j*nVis];
01079                 nnAutoWeights[m_nnAuto->getWeightIndex ( 2* ( nLayer-1 ) - i, k, j ) ] = vishid[k + j*nVis];
01080             }
01081 
01082         // copy biases to net
01083         for ( int j=0;j<nHid;j++ )
01084             m_weights[getBiasIndex ( i + 1, j ) ] = hidbias[j];
01085         // copy biases to nnAuto
01086         for ( int j=0;j<nHid;j++ )
01087             nnAutoWeights[m_nnAuto->getBiasIndex ( i + 1, j ) ] = hidbias[j];
01088         for ( int j=0;j<nVis;j++ )
01089             nnAutoWeights[m_nnAuto->getBiasIndex ( 2* ( nLayer-1 ) - i, j ) ] = visbias[j];
01090 
01091         /*
01092         // check it forward calc
01093         int noutOffset = 0;
01094         for(int j=0;j<i;j++)
01095             noutOffset += m_neuronsPerLayer[j] + 1;
01096         int sample = rand() % nSamples;
01097         cout<<"sample:"<<sample<<endl;
01098         // input and data vector
01099         REAL* data = batchdata + nVis * sample;
01100         REAL* in = input + m_neuronsPerLayer[0] * sample;
01101         // print NN outputs
01102         cout<<"===NN Outputs==="<<endl;
01103         forwardCalculationBLAS(in);
01104         int of = m_neuronsPerLayer[i]+1 + noutOffset;
01105         for(int j=0;j<nHid;j++)
01106             cout<<m_outputs[j+of]<<" ";
01107         cout<<endl;
01108         // print RBM output
01109         REAL* batchPtr = batchdataNext + sample * nHid;
01110         cout<<"===Poshidprobs==="<<endl;
01111         for(int k=0;k<nHid;k++)
01112             cout<<batchPtr[k]<<" ";
01113         cout<<endl<<endl;
01114         */
01115 
01116 
01117         /*
01118         if(i==0) // input layer
01119         {
01120             // copy biases
01121             for(int j=0;j<nHid;j++)
01122             {
01123                 int ind = j*(nVis+1);
01124                 assert(ind+offset<m_nrWeights);
01125                 weightPtr[ind] = hidbias[j];
01126             }
01127             // copy weights
01128             for(int j=0;j<nHid;j++)
01129                 for(int k=0;k<nVis;k++)
01130                 {
01131                     int ind = 1 + k + j*(nVis+1);
01132                     assert(ind+offset<m_nrWeights);
01133                     weightPtr[ind] = vishid[k + j*nVis];
01134                 }
01135         }
01136         else
01137         {
01138             // copy biases
01139             for(int j=0;j<nHid;j++)
01140             {
01141                 int ind = nVis + j*(nVis+1);
01142                 assert(ind+offset<m_nrWeights);
01143                 weightPtr[ind] = hidbias[j];
01144             }
01145             // copy weights
01146             for(int j=0;j<nHid;j++)
01147                 for(int k=0;k<nVis;k++)
01148                 {
01149                     int ind = k + j*(nVis+1);
01150                     assert(ind+offset<m_nrWeights);
01151                     weightPtr[ind] = vishid[k + j*nVis];
01152                 }
01153         }
01154         */
01155 
01156         /*
01157         int checkEnd = offset + m_nrLayWeights[i+1];
01158         cout<<"check end:"<<checkEnd<<endl;
01159         for(int j=0;j<checkEnd;j++)
01160         if(m_weights[j] == (float)0.01)
01161         {
01162             cout<<"j:"<<j<<endl;
01163             assert(false);
01164         }
01165         */
01166 
01167         /*
01168         // check it forward calc
01169         noutOffset = 0;
01170         for(int j=0;j<i;j++)
01171             noutOffset += m_neuronsPerLayer[j] + 1;
01172         //for(int sample=0;sample<nSamples&&sample<3;sample++)
01173         cout<<"sample:"<<sample<<endl;
01174         {
01175             // input and data vector
01176             REAL* data = batchdata + nVis * sample;
01177             REAL* in = input + m_neuronsPerLayer[0] * sample;
01178 
01179             // print NN outputs
01180             cout<<"===NN Outputs==="<<endl;
01181             forwardCalculationBLAS(in);
01182             int of = m_neuronsPerLayer[i]+1 + noutOffset;
01183             for(int j=0;j<nHid;j++)
01184                 cout<<m_outputs[j+of]<<" ";
01185             cout<<endl;
01186 
01187             // print RBM output
01188             REAL* batchPtr = batchdataNext + sample * nHid;
01189             cout<<"===Poshidprobs==="<<endl;
01190             for(int k=0;k<nHid;k++)
01191                 cout<<batchPtr[k]<<" ";
01192 
01193             cout<<endl<<endl;
01194         }
01195         */
01196 
01197         delete[] batchdata;
01198         batchdata = new REAL[nSamples*nHid];
01199         for ( int j=0;j<nSamples*nHid;j++ )
01200             batchdata[j] = batchdataNext[j];
01201         delete[] batchdataNext;
01202         delete[] vishid;
01203         delete[] visbias;
01204         delete[] hidbias;
01205         delete[] poshidprobs;
01206         delete[] neghidprobs;
01207         delete[] hidbiasinc;
01208         delete[] visbiasinc;
01209         delete[] poshidstates;
01210         delete[] negdata;
01211         delete[] neghidact;
01212         delete[] negvisact;
01213         delete[] poshidact;
01214         delete[] posvisact;
01215         delete[] probs;
01216     }
01217 
01218     delete[] batchdata;
01219 
01220     // check if all weights have values
01221     for ( int i=0;i<m_nnAuto->getNrWeights();i++ )
01222     {
01223         REAL* p = m_nnAuto->getWeightPtr();
01224         if ( p[i] == 0.0 || isnan ( p[i] ) )
01225         {
01226             cout<<"p["<<i<<"]:"<<p[i]<<endl;
01227             assert ( false );
01228         }
01229     }
01230 
01231     cout<<"Weights summary from neural net:"<<endl;
01232     for ( int i=0;i<m_nrLayer;i++ )
01233     {
01234         int n = m_neuronsPerLayer[i+1];
01235         int nIn = m_neuronsPerLayer[i];
01236         cout<<"#neur:"<<n<<" #neurPrev:"<<nIn<<" ";
01237 
01238         // weights
01239         REAL min = 1e10, max = -1e10;
01240         double sum = 0.0, cnt = 0.0, sum2 = 0.0;
01241 
01242         for ( int j=0;j<n;j++ )
01243             for ( int k=0;k<nIn;k++ )
01244             {
01245                 REAL v = m_weights[getWeightIndex ( i + 1, j, k ) ];
01246                 if ( min > v )
01247                     min = v;
01248                 if ( max < v )
01249                     max = v;
01250                 sum += v;
01251                 sum2 += v*v;
01252                 cnt += 1.0;
01253             }
01254         cout<<" weights: cnt|min|max|mean|std: "<<cnt<<"|"<<min<<"|"<<max<<"|"<<sum/cnt<<"|"<<sqrt ( sum2/cnt- ( sum/cnt ) * ( sum/cnt ) ) <<endl;
01255 
01256         min = 1e10;
01257         max = -1e10;
01258         sum = 0.0;
01259         cnt = 0.0;
01260         sum2 = 0.0;
01261         // biases
01262         for ( int j=0;j<n;j++ )
01263         {
01264             REAL v = m_weights[getBiasIndex ( i + 1, j ) ];
01265             if ( min > v )
01266                 min = v;
01267             if ( max < v )
01268                 max = v;
01269             sum += v;
01270             sum2 += v*v;
01271             cnt += 1.0;
01272         }
01273         cout<<"                       biases: cnt|min|max|mean|std: "<<cnt<<"|"<<min<<"|"<<max<<"|"<<sum/cnt<<"|"<<sqrt ( sum2/cnt- ( sum/cnt ) * ( sum/cnt ) ) <<endl;
01274 
01275     }
01276 
01277     // dummy print of middle layer
01278     /*if ( crossRun == 0 )
01279     {
01280         fstream f0 ( "tmp/r0.txt",ios::out );
01281         REAL* p = new REAL[m_nrInputs];
01282         for ( int i=0;i<nSamples;i++ )
01283         {
01284             m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p );
01285 
01286             for ( int j=0;j<nTarget;j++ )
01287                 f0<<target[j+i*nTarget]<<" ";
01288 
01289             int midLayer = nLayer - 1;
01290             for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ )
01291                 f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" ";
01292             f0<<endl;
01293 
01294         }
01295         delete[] p;
01296         f0.close();
01297     }*/
01298 
01299     // finetuning of the autoencoder with backprop
01300     if ( nFinetuningEpochs != -1 )
01301     {
01302         cout<<"=== begin: finetuning ==="<<endl;
01303         m_nnAuto->m_normalTrainStopping = false;
01304         m_nnAuto->m_maxEpochs = nFinetuningEpochs;
01305         if ( nFinetuningEpochs > 0 )
01306             m_nnAuto->trainNN();
01307         cout<<"=== end: finetuning ==="<<endl;
01308         /*
01309         cout<<endl<<"Adjust bias and weights in coding layer to met the mean=0 and std=1 condition"<<endl;
01310         int nCodeNeurons = m_neuronsPerLayer[nLayer-1];
01311         cout<<"#codeNeurons:"<<nCodeNeurons<<endl;
01312         double* mean = new double[nCodeNeurons];
01313         double* std = new double[nCodeNeurons];
01314         REAL* p = new REAL[m_nrInputs];
01315         for(int i=0;i<nCodeNeurons;i++)
01316         {
01317             mean[i] = 0.0;
01318             std[i] = 0.0;
01319         }
01320         for(int i=0;i<nSamples;i++)
01321         {
01322             m_nnAuto->predictSingleInput(input + i*m_nrInputs, p);
01323             for(int j=0;j<nCodeNeurons;j++)
01324             {
01325                 REAL v = m_nnAuto->m_outputs[m_nnAuto->getOutputIndex(nLayer-1, j)];
01326                 mean[j] += v;
01327                 std[j] += v*v;
01328             }
01329         }
01330         for(int i=0;i<nCodeNeurons;i++)
01331         {
01332             mean[i] /= (double)nSamples;
01333             std[i] /= (double)nSamples;
01334             std[i] = sqrt(std[i] - mean[i]*mean[i]);
01335         }
01336         cout<<endl<<"[no correction]:"<<endl;
01337         for(int i=0;i<nCodeNeurons;i++)
01338             cout<<"codeLayer "<<i<<":  mean:"<<mean[i]<<"  std:"<<std[i]<<endl;
01339 
01340         // correct the coding weights
01341         // copy weights : from nnAuto to net
01342         for(int j=0;j<nCodeNeurons;j++)
01343             for(int k=0;k<m_neuronsPerLayer[nLayer-2];k++)
01344                 m_nnAuto->m_weights[m_nnAuto->getWeightIndex(nLayer-1,j,k)] /= std[j];
01345         // copy biases : from nnAuto to net
01346         //for(int j=0;j<nCodeNeurons;j++)
01347         //    m_nnAuto->m_weights[getBiasIndex(nLayer-1,j)] -= mean[j]/std[j];
01348 
01349         for(int i=0;i<nCodeNeurons;i++)
01350         {
01351             mean[i] = 0.0;
01352             std[i] = 0.0;
01353         }
01354         for(int i=0;i<nSamples;i++)
01355         {
01356             m_nnAuto->predictSingleInput(input + i*m_nrInputs, p);
01357             for(int j=0;j<nCodeNeurons;j++)
01358             {
01359                 REAL v = m_nnAuto->m_outputs[m_nnAuto->getOutputIndex(nLayer-1, j)];
01360                 mean[j] += v;
01361                 std[j] += v*v;
01362             }
01363         }
01364         for(int i=0;i<nCodeNeurons;i++)
01365         {
01366             mean[i] /= (double)nSamples;
01367             std[i] /= (double)nSamples;
01368             std[i] = sqrt(std[i] - mean[i]*mean[i]);
01369         }
01370         cout<<endl<<"[with corrected std]:"<<endl;
01371         for(int i=0;i<nCodeNeurons;i++)
01372             cout<<"codeLayer "<<i<<":  mean:"<<mean[i]<<"  std:"<<std[i]<<endl;
01373 
01374         // correct the coding weights
01375         // copy weights : from nnAuto to net
01376         //for(int j=0;j<nCodeNeurons;j++)
01377         //    for(int k=0;k<m_neuronsPerLayer[nLayer-2];k++)
01378         //        m_nnAuto->m_weights[m_nnAuto->getWeightIndex(nLayer-1,j,k)] /= std[j];
01379         // copy biases : from nnAuto to net
01380         for(int j=0;j<nCodeNeurons;j++)
01381             m_nnAuto->m_weights[getBiasIndex(nLayer-1,j)] -= mean[j];
01382 
01383         for(int i=0;i<nCodeNeurons;i++)
01384         {
01385             mean[i] = 0.0;
01386             std[i] = 0.0;
01387         }
01388         for(int i=0;i<nSamples;i++)
01389         {
01390             m_nnAuto->predictSingleInput(input + i*m_nrInputs, p);
01391             for(int j=0;j<nCodeNeurons;j++)
01392             {
01393                 REAL v = m_nnAuto->m_outputs[m_nnAuto->getOutputIndex(nLayer-1, j)];
01394                 mean[j] += v;
01395                 std[j] += v*v;
01396             }
01397         }
01398         for(int i=0;i<nCodeNeurons;i++)
01399         {
01400             mean[i] /= (double)nSamples;
01401             std[i] /= (double)nSamples;
01402             std[i] = sqrt(std[i] - mean[i]*mean[i]);
01403         }
01404         cout<<endl<<"[with corrected std and mean]:"<<endl;
01405         for(int i=0;i<nCodeNeurons;i++)
01406             cout<<"codeLayer "<<i<<":  mean:"<<mean[i]<<"  std:"<<std[i]<<endl;
01407 
01408         delete[] p;
01409         delete[] mean;
01410         delete[] std;
01411         */
01412 
01413         cout<<endl<<"Copy the autoencoder weights to the net"<<endl;
01414         cout<<"#weights:"<<m_nrLayWeightOffsets[nLayer]<<endl;
01415         for ( int i=0;i<nLayer;i++ )
01416         {
01417             cout<<"layer "<<i<<": #neurons:"<<m_neuronsPerLayer[i]<<"  #neuronsAuto:"<<m_nnAuto->m_neuronsPerLayer[i]<<endl;
01418             if ( i > 0 )
01419             {
01420                 // copy weights : from nnAuto to net
01421                 for ( int j=0;j<m_neuronsPerLayer[i];j++ )
01422                     for ( int k=0;k<m_neuronsPerLayer[i-1];k++ )
01423                         m_weights[getWeightIndex ( i,j,k ) ] = m_nnAuto->m_weights[m_nnAuto->getWeightIndex ( i,j,k ) ];
01424                 // copy biases : from nnAuto to net
01425                 for ( int j=0;j<m_neuronsPerLayer[i];j++ )
01426                     m_weights[getBiasIndex ( i,j ) ] = m_nnAuto->m_weights[m_nnAuto->getBiasIndex ( i,j ) ];
01427             }
01428         }
01429 
01430     }
01431 
01432     // dummy print of middle layer
01433     /*if ( crossRun == 0 )
01434     {
01435         fstream f0 ( "tmp/r1.txt",ios::out );
01436         REAL* p = new REAL[m_nrInputs];
01437         for ( int i=0;i<nSamples;i++ )
01438         {
01439             m_nnAuto->predictSingleInput ( input + i*m_nrInputs, p );
01440 
01441             for ( int j=0;j<nTarget;j++ )
01442                 f0<<target[j+i*nTarget]<<" ";
01443 
01444             int midLayer = nLayer - 1;
01445             for ( int j=0;j<m_nnAuto->m_neuronsPerLayer[midLayer];j++ )
01446                 f0<<m_nnAuto->m_outputs[m_nnAuto->getOutputIndex ( midLayer, j ) ]<<" ";
01447             f0<<endl;
01448 
01449         }
01450         delete[] p;
01451         f0.close();
01452     }
01453     */
01454 }

void NNRBM::saveWeights (  )  [private]

Weights saving

Definition at line 2235 of file nnrbm.cpp.

02236 {
02237     // nothing here
02238 }

void NNRBM::setBatchSize ( int  size  ) 

Set the batch size. Weights are updated after each batch gradient summ. If the batch size is smaller as 2, the training is a stochastic gradient decent

Parameters:
size The batch size (1..trainExamples)

Definition at line 314 of file nnrbm.cpp.

00315 {
00316     m_batchSize = size;
00317     cout<<"batchSize: "<<m_batchSize<<endl;
00318 }

void NNRBM::setGlobalEpochs ( int  e  ) 

Set the global epoch counter This can be used to reset the number of epochs to 0

Parameters:
e Number of epochs

Definition at line 2217 of file nnrbm.cpp.

02218 {
02219     m_globalEpochs = e;
02220 }

void NNRBM::setInitWeightFactor ( REAL  factor  ) 

Set the init weight factor (in 1/sqrt(fanIn) rule)

Parameters:
factor The correction factor

Definition at line 235 of file nnrbm.cpp.

00236 {
00237     m_initWeightFactor = factor;
00238     cout<<"initWeightFactor: "<<m_initWeightFactor<<endl;
00239 }

void NNRBM::setL1Regularization ( bool  en  ) 

Enables L1 regularization (disable L2[weight decay])

Parameters:
en true=enabled

Definition at line 407 of file nnrbm.cpp.

00408 {
00409     m_enableL1Regularization = en;
00410     cout<<"enableL1Regularization: "<<m_enableL1Regularization<<endl;
00411 }

void NNRBM::setLearnrate ( REAL  learnrate  ) 

Set the global learnrate eta

Parameters:
learnrate Learnrate eta

Definition at line 246 of file nnrbm.cpp.

00247 {
00248     m_learnRate = learnrate;
00249     cout<<"learnRate: "<<m_learnRate<<endl;
00250 }

void NNRBM::setLearnrateMinimum ( REAL  learnrateMin  ) 

Set the lower bound of the per-sample learnrate decrease

Parameters:
learnrateMin Lower bound of learnrate

Definition at line 257 of file nnrbm.cpp.

00258 {
00259     m_learnRateMin = learnrateMin;
00260     cout<<"learnRateMin: "<<m_learnRateMin<<endl;
00261 }

void NNRBM::setLearnrateSubtractionValueAfterEveryEpoch ( REAL  learnrateDecreaseRate  ) 

Set the subtraction value per train epoch of the learning rate

Parameters:
learnrateDecreaseRate The learnrate is subtracted by this value every train epoch

Definition at line 280 of file nnrbm.cpp.

00281 {
00282     m_learnrateDecreaseRateEpoch = learnrateDecreaseRate;
00283     cout<<"learnrateDecreaseRateEpoch: "<<m_learnrateDecreaseRateEpoch<<endl;
00284 }

void NNRBM::setLearnrateSubtractionValueAfterEverySample ( REAL  learnrateDecreaseRate  ) 

Set the subtraction value per train example of the learning rate

Parameters:
learnrateDecreaseRate The learnrate is subtracted by this value every train example

Definition at line 268 of file nnrbm.cpp.

00269 {
00270     m_learnrateDecreaseRate = learnrateDecreaseRate;
00271     cout<<"learnrateDecreaseRate: "<<m_learnrateDecreaseRate<<endl;
00272 }

void NNRBM::setMaxEpochs ( int  epochs  ) 

Set the maximal epochs of training, if maxEpochs are reached the training breaks

Parameters:
epochs Max. number of train epochs on trainingset

Definition at line 336 of file nnrbm.cpp.

00337 {
00338     m_maxEpochs = epochs;
00339     cout<<"maxEpochs: "<<m_maxEpochs<<endl;
00340 }

void NNRBM::setMinUpdateErrorBound ( REAL  minUpdateBound  ) 

Set the minimal different between two succesive training epoch until the training breaks

Parameters:
minUpdateBound The min. rmse update until training breaks

Definition at line 325 of file nnrbm.cpp.

00326 {
00327     m_minUpdateBound = minUpdateBound;
00328     cout<<"minUpdateBound: "<<m_minUpdateBound<<endl;
00329 }

void NNRBM::setMomentum ( REAL  momentum  ) 

Set the momentum value. Momentum term is for goint into the old gradient value of the last epoch

Parameters:
momentum The momentum value (0..1). Typical value is 0.1

Definition at line 291 of file nnrbm.cpp.

00292 {
00293     m_momentum = momentum;
00294     cout<<"momentum: "<<m_momentum<<endl;
00295 }

void NNRBM::setNNStructure ( int  nrLayer,
int *  neuronsPerLayer,
bool  lastLinearLayer = false,
int *  layerType = 0 
)

Set the inner structure: layers and how many neurons per layer

Parameters:
nrLayer Number of layers (2=one hidden layer, 3=2 hidden layer, 1=only output layer)
neuronsPerLayer Integer pointer to the number of neurons per layer

Definition at line 446 of file nnrbm.cpp.

00447 {
00448     m_nrLayer = nrLayer;
00449     cout<<"nrLayer: "<<m_nrLayer<<endl;
00450 
00451     cout<<"#layers: "<<m_nrLayer<<" ("<< ( m_nrLayer-1 ) <<" hidden layer, 1 output layer)"<<endl;
00452 
00453     // alloc space for structure variables
00454     m_neuronsPerLayer = new int[m_nrLayer+1];
00455     m_neuronsPerLayer[0] = m_nrInputs;  // number of inputs
00456     for ( int i=0;i<m_nrLayer-1;i++ )
00457         m_neuronsPerLayer[1+i] = neuronsPerLayer[i];
00458     m_neuronsPerLayer[m_nrLayer] = m_nrTargets;  // one output
00459 
00460     cout<<"Neurons    per Layer: ";
00461     for ( int i=0;i<m_nrLayer+1;i++ )
00462         cout<<m_neuronsPerLayer[i]<<" ";
00463     cout<<endl;
00464 
00465     cout<<"Outputs    per Layer: ";
00466     for ( int i=0;i<m_nrLayer+1;i++ )
00467         cout<<m_neuronsPerLayer[i]+1<<" ";
00468     cout<<endl;
00469 
00470     cout<<"OutOffsets per Layer: ";
00471     int cnt=0;
00472     for ( int i=0;i<m_nrLayer+1;i++ )
00473     {
00474         cout<<cnt<<" ";
00475         cnt += m_neuronsPerLayer[i]+1;
00476     }
00477     cout<<endl;
00478 
00479     cout<<"Act. function per Layer(0=sig, 1=lin, 2=tanh, 3=softmax): ";
00480     m_activationFunctionPerLayer = new int[m_nrLayer+1];
00481     for ( int i=0;i<m_nrLayer+1;i++ )
00482         m_activationFunctionPerLayer[i] = 0;
00483     if ( layerType )
00484     {
00485         for ( int i=0;i<m_nrLayer+1;i++ )
00486             m_activationFunctionPerLayer[i] = layerType[i];
00487     }
00488     if ( m_enableAutoencoder )
00489         m_activationFunctionPerLayer[m_nrLayer/2] = 1;
00490     if ( lastLinearLayer )
00491         m_activationFunctionPerLayer[m_nrLayer] = 1;
00492     for ( int i=0;i<m_nrLayer+1;i++ )
00493         cout<<m_activationFunctionPerLayer[i]<<" ";
00494     cout<<endl;
00495 
00496     // init the total number of weights and outputs
00497     m_nrWeights = 0;
00498     m_nrOutputs = m_neuronsPerLayer[0] + 1;
00499     m_nrLayWeights = new int[m_nrLayer+1];
00500     m_nrLayWeightOffsets = new int[m_nrLayer+2];
00501     m_nrLayWeights[0] = 0;
00502     for ( int i=0;i<m_nrLayer;i++ )
00503     {
00504         m_nrLayWeights[i+1] = m_neuronsPerLayer[i+1] * ( m_neuronsPerLayer[i]+1 );  // +1 for input bias
00505         m_nrWeights += m_nrLayWeights[i+1];
00506         m_nrOutputs += m_neuronsPerLayer[i+1] + 1;  // +1 for input bias
00507     }
00508 
00509     // print it
00510     cout<<"Weights       per Layer: ";
00511     for ( int i=0;i<m_nrLayer+1;i++ )
00512         cout<<m_nrLayWeights[i]<<" ";
00513     cout<<endl;
00514 
00515     cout<<"WeightOffsets per Layer: ";
00516     m_nrLayWeightOffsets[0] = 0;
00517     for ( int i=0;i<m_nrLayer+1;i++ )
00518     {
00519         cout<<m_nrLayWeightOffsets[i]<<" ";
00520         m_nrLayWeightOffsets[i+1] = m_nrLayWeightOffsets[i] + m_nrLayWeights[i];
00521     }
00522     cout<<endl;
00523 
00524     cout<<"nrOutputs="<<m_nrOutputs<<"  nrWeights="<<m_nrWeights<<endl;
00525 
00526     // allocate the inner calculation structure
00527     m_outputs = new REAL[m_nrOutputs];
00528     m_outputsTmp = new REAL[m_nrTargets];
00529     m_derivates = new REAL[m_nrOutputs];
00530     m_d1 = new REAL[m_nrOutputs];
00531 
00532     for ( int i=0;i<m_nrOutputs;i++ ) // init as biases
00533     {
00534         m_outputs[i] = 1.0;
00535         m_derivates[i] = 0.0;
00536         m_d1[i] = 0.0;
00537     }
00538 
00539     // allocate weights and temp vars
00540     m_weights = new REAL[m_nrWeights];
00541     m_weightsTmp0 = new REAL[m_nrWeights];
00542     m_weightsTmp1 = new REAL[m_nrWeights];
00543     m_weightsTmp2 = new REAL[m_nrWeights];
00544     m_weightsBatchUpdate = new REAL[m_nrWeights];
00545     m_weightsOld = new REAL[m_nrWeights];
00546     m_weightsOldOld = new REAL[m_nrWeights];
00547     m_deltaW = new REAL[m_nrWeights];
00548 
00549     m_deltaWOld = new REAL[m_nrWeights];
00550     m_adaptiveRPROPlRate = new REAL[m_nrWeights];
00551     for ( int i=0;i<m_nrWeights;i++ )
00552     {
00553         m_deltaWOld[i] = 0.0;
00554         m_adaptiveRPROPlRate[i] = m_learnRate;
00555     }
00556     for ( int i=0;i<m_nrWeights;i++ )
00557         m_weights[i] = m_weightsOld[i] = m_deltaW[i] = m_weightsTmp0[i] = m_weightsTmp1[i] = m_weightsTmp2[i] = 0.0;
00558 
00559     // this should be implemented (LeCun suggest such a linear factor in the activation function)
00560     //m_linFac = 0.01;
00561     //cout<<"linFac="<<m_linFac<<" (no active, just tanh used)"<<endl;
00562 }

void NNRBM::setNormalTrainStopping ( bool  en  ) 

Set the train stop criteria en=0: training stops at maxEpochs en=1: training stops at maxEpochs or probe error rises or probe error is to small

Parameters:
en Train stop criteria (0 is used for retraining)

Definition at line 396 of file nnrbm.cpp.

00397 {
00398     m_normalTrainStopping = en;
00399     cout<<"normalTrainStopping: "<<m_normalTrainStopping<<endl;
00400 }

void NNRBM::setNrExamplesProbe ( int  n  ) 

Set the number of examples in the probe (validation) set

Parameters:
n Number of examples in the probe set

Definition at line 181 of file nnrbm.cpp.

00182 {
00183     m_nrExamplesProbe = n;
00184     cout<<"nrExamplesProbe: "<<m_nrExamplesProbe<<endl;
00185 }

void NNRBM::setNrExamplesTrain ( int  n  ) 

Set the number of examples in the training set

Parameters:
n Number of examples in the training set

Definition at line 171 of file nnrbm.cpp.

00172 {
00173     m_nrExamplesTrain = n;
00174     //cout<<"nrExamplesTrain: "<<m_nrExamplesTrain<<endl;
00175 }

void NNRBM::setNrInputs ( int  n  ) 

Set the number of inputs (input features)

Parameters:
n The number of inputs

Definition at line 161 of file nnrbm.cpp.

00162 {
00163     m_nrInputs = n;
00164     cout<<"nrInputs: "<<m_nrInputs<<endl;
00165 }

void NNRBM::setNrTargets ( int  n  ) 

Set the number of targets (outputs)

Parameters:
n Number of target values

Definition at line 149 of file nnrbm.cpp.

00150 {
00151     m_nrTargets = n;
00152     cout<<"nrTargets: "<<m_nrTargets<<endl;
00153 }

void NNRBM::setProbeInputs ( REAL *  inputs  ) 

Set the probe input data (REAL pointer)

Parameters:
inputs Pointer to the probe inputs (row wise)

Definition at line 213 of file nnrbm.cpp.

00214 {
00215     m_inputsProbe = inputs;
00216     //cout<<"inputsProbe: "<<m_inputsProbe<<endl;
00217 }

void NNRBM::setProbeTargets ( REAL *  targets  ) 

Set the probe target values (REAL pointer)

Parameters:
targets Pointer to the probe target values (row wise)

Definition at line 224 of file nnrbm.cpp.

00225 {
00226     m_targetsProbe = targets;
00227     //cout<<"targetsProbe: "<<m_targetsProbe<<endl;
00228 }

void NNRBM::setRBMLearnParams ( REAL  learnrateWeights,
REAL  learnrateBiasVis,
REAL  learnrateBiasHid,
REAL  weightDecay,
int  maxEpoch 
)

Set learn parameters for RBM pretraining

Parameters:
learnrateWeights 
learnrateBiasVis 
learnrateBiasHid 
weightDecay 

Definition at line 615 of file nnrbm.cpp.

00616 {
00617     m_rbmLearnrateWeights = learnrateWeights;
00618     m_rbmLearnrateBiasVis = learnrateBiasVis;
00619     m_rbmLearnrateBiasHid = learnrateBiasHid;
00620     m_rbmWeightDecay = weightDecay;
00621     m_rbmMaxEpochs = maxEpoch;
00622 }

void NNRBM::setRPROPMinMaxUpdate ( REAL  min,
REAL  max 
)

Set the min. and max. update values for the sign update in RPROP Weights updates can never be larger as max. and smaller as min.

Parameters:
min Min. weight update value
max Max. weight update value

Definition at line 367 of file nnrbm.cpp.

00368 {
00369     m_RPROP_updateMin = min;
00370     m_RPROP_updateMax = max;
00371     cout<<"RPROP_updateMin: "<<m_RPROP_updateMin<<"  RPROP_updateMax: "<<m_RPROP_updateMax<<endl;
00372 }

void NNRBM::setRPROPPosNeg ( REAL  etaPos,
REAL  etaNeg 
)

Set the etaNeg and etaPos parameters in the RPROP learning algorithm

Learnrate adaption: adaptiveRPROPlRate = { if (dE/dW_old * dE/dW)>0 then adaptiveRPROPlRate*RPROP_etaPos if (dE/dW_old * dE/dW)<0 then adaptiveRPROPlRate*RPROP_etaNeg if (dE/dW_old * dE/dW)=0 then adaptiveRPROPlRate }

Parameters:
etaPos etaPos parameter
etaNeg etaNeg parameter

Definition at line 353 of file nnrbm.cpp.

00354 {
00355     m_RPROP_etaPos = etaPos;
00356     m_RPROP_etaNeg = etaNeg;
00357     cout<<"RPROP_etaPos: "<<m_RPROP_etaPos<<"  RPROP_etaNeg: "<<m_RPROP_etaNeg<<endl;
00358 }

void NNRBM::setScaleOffset ( REAL  scale,
REAL  offset 
)

Set the scale and offset of the output of the NNRBM targets transformation: target = (targetOld - offset) / scale outputs transformation: output = outputNN * scale + offset

Parameters:
scale Output scaling
offset Output offset

Definition at line 382 of file nnrbm.cpp.

00383 {
00384     m_scaleOutputs = scale;
00385     m_offsetOutputs = offset;
00386     cout<<"scaleOutputs: "<<m_scaleOutputs<<"   offsetOutputs: "<<m_offsetOutputs<<"  [transformation: output = outputNN * scale + offset]"<<endl;
00387 }

void NNRBM::setTrainInputs ( REAL *  inputs  ) 

Set the training input data (REAL pointer)

Parameters:
inputs Pointer to the train inputs (row wise)

Definition at line 191 of file nnrbm.cpp.

00192 {
00193     m_inputsTrain = inputs;
00194     //cout<<"inputsTrain: "<<m_inputsTrain<<endl;
00195 }

void NNRBM::setTrainTargets ( REAL *  targets  ) 

Set the training target values (REAL pointer)

Parameters:
targets Pointer to the train target values (row wise)

Definition at line 202 of file nnrbm.cpp.

00203 {
00204     m_targetsTrain = targets;
00205     //cout<<"targetsTrain: "<<m_targetsTrain<<endl;
00206 }

void NNRBM::setWeightDecay ( REAL  weightDecay  ) 

Set the weight decay factor. This is L2 regularization of weights. Penalizes large weights

Parameters:
weightDecay Weight decay factor (0=no regularization)

Definition at line 302 of file nnrbm.cpp.

00303 {
00304     m_weightDecay = weightDecay;
00305     cout<<"weightDecay: "<<m_weightDecay<<endl;
00306 }

void NNRBM::setWeights ( REAL *  w  ) 

Load an external weights to the NNRBM weights

Parameters:
w Pointer to new weights

Definition at line 2323 of file nnrbm.cpp.

02324 {
02325     cout<<"Set new weights"<<endl;
02326     for ( int i=0;i<m_nrWeights;i++ )
02327         m_weights[i] = w[i];
02328 }

int NNRBM::trainNN (  ) 

Train the whole Neural Network until break criteria is reached This method call trainOneEpoch() to train one epoch.

Returns:
The number of epochs where the probe rmse is minimal

Definition at line 1981 of file nnrbm.cpp.

01982 {
01983     cout<<"Train the NN with "<<m_nrExamplesTrain<<" samples"<<endl;
01984     double rmseMin = 1e10, lastUpdate = 1e10, rmseProbeOld = 1e10, rmseTrain = 1e10, rmseProbe = 1e10;
01985     time_t t0 = time ( 0 );
01986     while ( 1 )
01987     {
01988         rmseProbeOld = rmseProbe;
01989         rmseTrain = getRMSETrain();
01990         rmseProbe = getRMSEProbe();
01991         lastUpdate = rmseProbeOld - rmseProbe;
01992 
01993         cout<<"e:"<<m_globalEpochs<<"  rmseTrain:"<<rmseTrain<<"  rmseProbe:"<<rmseProbe<<"  "<<flush;
01994 
01995         if ( m_normalTrainStopping )
01996         {
01997             if ( rmseProbe < rmseMin )
01998             {
01999                 rmseMin = rmseProbe;
02000                 saveWeights();
02001             }
02002             else
02003             {
02004                 cout<<"rmse rises."<<endl;
02005                 return m_globalEpochs;
02006             }
02007             if ( m_minUpdateBound > fabs ( lastUpdate ) )
02008             {
02009                 cout<<"min update too small (<"<<m_minUpdateBound<<")."<<endl;
02010                 return m_globalEpochs;
02011             }
02012         }
02013         if ( m_maxEpochs == m_globalEpochs )
02014         {
02015             cout<<"max epochs reached."<<endl;
02016             return m_globalEpochs;
02017         }
02018 
02019         trainOneEpoch();
02020 
02021         cout<<"lRate:"<<m_learnRate<<"  ";
02022         cout<<time ( 0 )-t0<<"[s]"<<endl;
02023         t0 = time ( 0 );
02024     }
02025     return -1;
02026 }

void NNRBM::trainOneEpoch ( bool *  updateLayer = 0  ) 

Train the whole Neural Network one epoch through the trainset with gradient decent

Definition at line 2032 of file nnrbm.cpp.

02033 {
02034     int batchCnt = 0;
02035     V_ZERO ( m_weightsBatchUpdate, m_nrWeights );
02036     m_sumSquaredError = 0.0;
02037     m_sumSquaredErrorSamples = 0.0;
02038 
02039     for ( int i=0;i<m_nrExamplesTrain;i++ )
02040     {
02041         if ( updateLayer != 0 )
02042             V_ZERO ( m_deltaW, m_nrWeights );  // clear the weight update, it might be that not every layer gets updates
02043 
02044         REAL* inputPtr = m_inputsTrain + i * m_nrInputs;
02045         REAL* targetPtr = m_targetsTrain + i * m_nrTargets;
02046 
02047         // forward
02048         if ( m_useBLAS )
02049             forwardCalculationBLAS ( inputPtr );
02050         else
02051             forwardCalculation ( inputPtr );
02052 
02053         // backward: calc weight update
02054         if ( m_useBLAS )
02055             backpropBLAS ( inputPtr, targetPtr, updateLayer );
02056         else
02057             backprop ( inputPtr, targetPtr, updateLayer );
02058 
02059         // accumulate the weight updates
02060         if ( m_batchSize > 1 )
02061             V_ADD ( m_nrWeights, m_deltaW, m_weightsBatchUpdate, m_weightsBatchUpdate );
02062 
02063         batchCnt++;
02064 
02065         // if batch size is reached, or the last element in training list
02066         if ( batchCnt >= m_batchSize || i == m_nrExamplesTrain - 1 )
02067         {
02068             // batch init
02069             batchCnt = 0;
02070             if ( m_batchSize > 1 )
02071             {
02072                 V_COPY ( m_weightsBatchUpdate, m_deltaW, m_nrWeights ); // deltaW = weightsBatchUpdate
02073                 V_ZERO ( m_weightsBatchUpdate, m_nrWeights );
02074             }
02075 
02076             if ( m_enableRPROP )
02077             {
02078                 // weight update:
02079                 // deltaW = {  if dE/dW>0  then  -adaptiveRPROPlRate
02080                 //             if dE/dW<0  then  +adaptiveRPROPlRate
02081                 //             if dE/dW=0  then   0  }
02082                 // learnrate adaption:
02083                 // adaptiveRPROPlRate = {  if (dE/dW_old * dE/dW)>0  then  adaptiveRPROPlRate*RPROP_etaPos
02084                 //                         if (dE/dW_old * dE/dW)<0  then  adaptiveRPROPlRate*RPROP_etaNeg
02085                 //                         if (dE/dW_old * dE/dW)=0  then  adaptiveRPROPlRate  }
02086                 REAL dW, dWOld, sign, update, prod;
02087                 for ( int j=0;j<m_nrWeights;j++ )
02088                 {
02089                     dW = m_deltaW[j];
02090                     dWOld = m_deltaWOld[j];
02091                     prod = dW * dWOld;
02092                     sign = dW > 0.0? 1.0 : -1.0;
02093                     if ( prod > 0.0 )
02094                     {
02095                         m_adaptiveRPROPlRate[j] *= m_RPROP_etaPos;
02096                         if ( m_adaptiveRPROPlRate[j] > m_RPROP_updateMax )
02097                             m_adaptiveRPROPlRate[j] = m_RPROP_updateMax;
02098                         update = sign * m_adaptiveRPROPlRate[j];
02099                         m_weights[j] -= update + m_weightDecay * m_weights[j];   // weight update and weight decay
02100                         m_deltaWOld[j] = dW;
02101                     }
02102                     else if ( prod < 0.0 )
02103                     {
02104                         m_adaptiveRPROPlRate[j] *= m_RPROP_etaNeg;
02105                         if ( m_adaptiveRPROPlRate[j] < m_RPROP_updateMin )
02106                             m_adaptiveRPROPlRate[j] = m_RPROP_updateMin;
02107                         m_deltaWOld[j] = 0.0;
02108                     }
02109                     else // prod == 0.0
02110                     {
02111                         update = sign * m_adaptiveRPROPlRate[j];
02112                         m_weights[j] -= update + m_weightDecay * m_weights[j];   // weight update and weight decay
02113                         m_deltaWOld[j] = dW;
02114                     }
02115                 }
02116             }
02117             else  // stochastic gradient decent (batch-size: m_batchSize)
02118             {
02119                 //=========== slower stochastic updates (without vector libraries) ===========
02120                 // update weights + weight decay
02121                 // formula: weights -= eta * (dE(w)/dw + lambda * w)
02122                 //if(m_momentum > 0.0)
02123                 //{
02124                 //    for(int j=0;j<m_nrWeights;j++)
02125                 //        m_weightsOldOld[j] = m_weightsOld[j];
02126                 //    for(int j=0;j<m_nrWeights;j++)
02127                 //        m_weightsOld[j] = m_weights[j];
02128                 //}
02129                 //
02130                 //if(m_momentum > 0.0)
02131                 //{
02132                 //    for(int j=0;j<m_nrWeights;j++)
02133                 //    {
02134                 //        m_weightsTmp0[j] = (1.0 - m_momentum)*m_weightsTmp0[j] + m_momentum*m_weightsTmp1[j];
02135                 //        m_weightsTmp1[j] = m_weightsTmp0[j];
02136                 //        m_weights[j] -= m_weightsTmp0[j];
02137                 //    }
02138                 //}
02139                 //for(int j=0;j<m_nrWeights;j++)
02140                 //    m_weights[j] -= m_learnRate * (m_deltaW[j] + m_weightDecay * m_weights[j]);
02141 
02142 
02143                 // update weights + weight decay(L2 reg.)
02144                 // formula: weights = weights - eta * (dE(w)/dw + lambda * weights)
02145                 //          weights = weights - (eta*deltaW + eta*lambda*weights)
02146                 for ( int j=0;j<m_nrWeights;j++ )
02147                     if ( isnan ( m_deltaW[j] ) || isinf ( m_deltaW[j] ) )
02148                     {
02149                         cout<<endl<<"value:"<<m_deltaW[j]<<endl<<"j:"<<j<<" i:"<<i<<endl;
02150                         assert ( false );
02151                     }
02152                 V_MULC ( m_deltaW, m_learnRate, m_weightsTmp0, m_nrWeights );     // tmp0 = learnrate * deltaW
02153 
02154                 // if weight decay enabled
02155                 if ( m_weightDecay > 0.0 )
02156                 {
02157                     if ( m_enableL1Regularization )
02158                     {
02159                         // update weights + L1 reg.
02160                         // formula: weights = weights - eta * (dE(w)/dw + lambda*sign(w))
02161                         //          weights = weights - (eta*deltaW + eta*lambda*sign(w))
02162                         for ( int j=0;j<m_nrWeights;j++ )
02163                             m_weightsTmp2[j] = 1.0;
02164                         for ( int j=0;j<m_nrWeights;j++ )
02165                             if ( m_weights[j]<0.0 )
02166                                 m_weightsTmp2[j] = -1.0;
02167                         //REAL c = m_weightDecay * m_learnRate;
02168                         //for(int j=0;j<m_nrWeights;j++)
02169                         //    m_weightsTmp0[j] += c * m_weightsTmp2[j];
02170                         CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weightsTmp2, 1, m_weightsTmp0, 1 );  // tmp0 = reg*learnrate*weights+tmp0
02171                     }
02172                     else
02173                         //saxpy(n, a, x, incx, y, incy)
02174                         //y := a*x + y
02175                         CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weights, 1, m_weightsTmp0, 1 );  // tmp0 = reg*learnrate*weights+tmp0
02176                 }
02177 
02178                 // if momentum is used
02179                 if ( m_momentum > 0.0 )
02180                 {
02181                     V_MULC ( m_weightsTmp0, 1.0 - m_momentum, m_weightsTmp0, m_nrWeights ); // tmp0 = tmp0 * (1 - momentum)   [actual update]
02182                     V_MULC ( m_weightsTmp1, m_momentum, m_weightsTmp1, m_nrWeights );    // tmp1 = tmp1 * momentum         [last update]
02183 
02184                     // sum updates
02185                     V_ADD ( m_nrWeights, m_weightsTmp0, m_weightsTmp1, m_weightsTmp0 ); // tmp0 = tmp0 + tmp1
02186 
02187                     V_COPY ( m_weightsTmp0, m_weightsTmp1, m_nrWeights );               // tmp1 = tmp0
02188                 }
02189 
02190                 // standard weight update in the NNRBM
02191                 V_SUB ( m_nrWeights, m_weights, m_weightsTmp0, m_weights );       // weights = weights - tmp0
02192             }
02193         }
02194 
02195         // make the learnrate smaller (per sample)
02196         m_learnRate -= m_learnrateDecreaseRate;
02197         if ( m_learnRate < m_learnRateMin )
02198             m_learnRate = m_learnRateMin;
02199 
02200     }
02201 
02202     // make the learnrate smaller (per epoch)
02203     m_learnRate -= m_learnrateDecreaseRateEpoch;
02204     if ( m_learnRate < m_learnRateMin )
02205         m_learnRate = m_learnRateMin;
02206 
02207     // epoch counter
02208     m_globalEpochs++;
02209 }

void NNRBM::useBLASforTraining ( bool  enable  ) 

Set the forward/backward calculation type enable=1: BLAS Level 2 from MKL is used to perform Vector-Matrix operation for speedup training enable=0: Standard loops for calculation

Parameters:
enable Enables BLAS usage for speedup large nets

Definition at line 434 of file nnrbm.cpp.

00435 {
00436     m_useBLAS = enable;
00437     cout<<"useBLAS: "<<m_useBLAS<<endl;
00438 }


The documentation for this class was generated from the following files:

Generated on Tue Jan 26 09:21:15 2010 for ELF by  doxygen 1.5.8