NN Class Reference

#include <nn.h>

Inheritance diagram for NN:


List of all members.

Public Member Functions

 NN ()
 ~NN ()
void setNrTargets (int n)
void setNrInputs (int n)
void setNrExamplesTrain (int n)
void setNrExamplesProbe (int n)
void setTrainInputs (REAL *inputs)
void setTrainTargets (REAL *targets)
void setProbeInputs (REAL *inputs)
void setProbeTargets (REAL *targets)
void setInitWeightFactor (REAL factor)
void setLearnrate (REAL learnrate)
void setLearnrateMinimum (REAL learnrateMin)
void setLearnrateSubtractionValueAfterEverySample (REAL learnrateDecreaseRate)
void setLearnrateSubtractionValueAfterEveryEpoch (REAL learnrateDecreaseRate)
void setMomentum (REAL momentum)
void setWeightDecay (REAL weightDecay)
void setBatchSize (int size)
void setMinUpdateErrorBound (REAL minUpdateBound)
void setMaxEpochs (int epochs)
void setRPROPPosNeg (REAL etaPos, REAL etaNeg)
void setRPROPMinMaxUpdate (REAL min, REAL max)
void setL1Regularization (bool en)
void initNNWeights (time_t seed)
void enableErrorFunctionMAE (bool en)
void setActivationFunctionType (int type)
void setNNStructure (int nrLayer, int *neuronsPerLayer)
void printLearnrate ()
void setScaleOffset (REAL scale, REAL offset)
void setNormalTrainStopping (bool en)
void setGlobalEpochs (int e)
void enableRPROP (bool en)
void useBLASforTraining (bool enable)
void trainOneEpoch ()
int trainNN ()
REAL getRMSETrain ()
REAL getRMSEProbe ()
void predictSingleInput (REAL *input, REAL *output)
REAL * getWeightPtr ()
void setWeights (REAL *w)
int getNrWeights ()
int getWeightIndex (int layer, int neuron, int weight)
int getBiasIndex (int layer, int neuron)
int getOutputIndex (int layer, int neuron)

Public Attributes

double m_sumSquaredError
double m_sumSquaredErrorSamples

Private Member Functions

void saveWeights ()
REAL calcRMSE (REAL *inputs, REAL *targets, int examples)
void forwardCalculation (REAL *input)
void forwardCalculationBLAS (REAL *input)
void backpropBLAS (REAL *input, REAL *target)
void backprop (REAL *input, REAL *target)
REAL getInitWeight (int fanIn)

Private Attributes

int m_nrTargets
int m_nrInputs
int m_nrExamplesTrain
int m_nrExamplesProbe
REAL * m_inputsTrain
REAL * m_inputsProbe
REAL * m_targetsTrain
REAL * m_targetsProbe
REAL m_initWeightFactor
int m_globalEpochs
REAL m_RPROP_updateMin
REAL m_RPROP_updateMax
REAL m_learnRate
REAL m_learnRateMin
REAL m_learnrateDecreaseRate
REAL m_learnrateDecreaseRateEpoch
REAL m_momentum
REAL m_weightDecay
REAL m_minUpdateBound
int m_batchSize
int m_activationFunctionType
REAL m_scaleOutputs
REAL m_offsetOutputs
int m_maxEpochs
bool m_useBLAS
bool m_enableRPROP
bool m_normalTrainStopping
bool m_enableL1Regularization
bool m_errorFunctionMAE
int m_nrLayer
int * m_neuronsPerLayer
int m_nrWeights
int m_nrOutputs
int * m_nrLayWeights
int * m_nrLayWeightOffsets
REAL * m_outputs
REAL * m_outputsTmp
REAL * m_derivates
REAL * m_d1
REAL * m_weights
REAL * m_weightsTmp0
REAL * m_weightsTmp1
REAL * m_weightsTmp2
REAL * m_weightsBatchUpdate
REAL * m_weightsOld
REAL * m_weightsOldOld
REAL * m_deltaW
REAL * m_deltaWOld
REAL * m_adaptiveRPROPlRate

Detailed Description

This is a Neural Network implementation

The target of this class is to give a basic and fast class for training and prediction It supports basic training functionality

Data (features + targets) memeory allocation and normalization must be done outside This class gets the pointer to the training and probe (=validation) set

Forward and backward calculation can be done in loops or in Vector-Matrix operations (BLAS) For large nets the BLAS calculation should be used (~2x faster)

Standard training is performed with global learnrate and stochastic gradient descent A batch training is also possible if the batchSize > 1

This class implements also the RPROP learning algorithm Rprop - Description and Implementation Details Martin Riedmiller, 1994. Technical report.

Definition at line 38 of file nn.h.

Constructor & Destructor Documentation

NN::NN (  ) 


Definition at line 8 of file nn.cpp.

00009 {
00010     // init member vars
00011     m_nrTargets = 0;
00012     m_nrInputs = 0;
00013     m_nrExamplesTrain = 0;
00014     m_nrExamplesProbe = 0;
00015     m_inputsTrain = 0;
00016     m_inputsProbe = 0;
00017     m_targetsTrain = 0;
00018     m_targetsProbe = 0;
00019     m_initWeightFactor = 0;
00020     m_globalEpochs = 0;
00021     m_RPROP_etaPos = 0;
00022     m_RPROP_etaNeg = 0;
00023     m_RPROP_updateMin = 0;
00024     m_RPROP_updateMax = 0;
00025     m_learnRate = 0;
00026     m_learnRateMin = 0;
00027     m_learnrateDecreaseRate = 0;
00028     m_learnrateDecreaseRateEpoch = 0;
00029     m_momentum = 0;
00030     m_weightDecay = 0;
00031     m_minUpdateBound = 0;
00032     m_batchSize = 0;
00033     m_scaleOutputs = 0;
00034     m_offsetOutputs = 0;
00035     m_maxEpochs = 0;
00036     m_useBLAS = 0;
00037     m_enableRPROP = 0;
00038     m_normalTrainStopping = 0;
00039     m_nrLayer = 0;
00040     m_neuronsPerLayer = 0;
00041     m_nrWeights = 0;
00042     m_nrOutputs = 0;
00043     m_nrLayWeights = 0;
00044     m_outputs = 0;
00045     m_outputsTmp = 0;
00046     m_derivates = 0;
00047     m_d1 = 0;
00048     m_weights = 0;
00049     m_weightsTmp0 = 0;
00050     m_weightsTmp1 = 0;
00051     m_weightsTmp2 = 0;
00052     m_weightsBatchUpdate = 0;
00053     m_weightsOld = 0;
00054     m_weightsOldOld = 0;
00055     m_deltaW = 0;
00056     m_deltaWOld = 0;
00057     m_adaptiveRPROPlRate = 0;
00058     m_enableL1Regularization = 0;
00059     m_errorFunctionMAE = 0;
00060     m_sumSquaredError = 0.0;
00061     m_sumSquaredErrorSamples = 0;
00062     m_nrLayWeightOffsets = 0;
00063     m_sumSquaredError = 0.0;
00064     m_sumSquaredErrorSamples = 0.0;
00065     m_activationFunctionType = 0;
00066 }

NN::~NN (  ) 


Definition at line 71 of file nn.cpp.

00072 {
00073     if ( m_neuronsPerLayer )
00074         delete[] m_neuronsPerLayer;
00075     m_neuronsPerLayer = 0;
00076     if ( m_nrLayWeights )
00077         delete[] m_nrLayWeights;
00078     m_nrLayWeights = 0;
00079     if ( m_outputs )
00080         delete[] m_outputs;
00081     m_outputs = 0;
00082     if ( m_outputsTmp )
00083         delete[] m_outputsTmp;
00084     m_outputsTmp = 0;
00085     if ( m_derivates )
00086         delete[] m_derivates;
00087     m_derivates = 0;
00088     if ( m_d1 )
00089         delete[] m_d1;
00090     m_d1 = 0;
00091     if ( m_weights )
00092         delete[] m_weights;
00093     m_weights = 0;
00094     if ( m_weightsTmp0 )
00095         delete[] m_weightsTmp0;
00096     m_weightsTmp0 = 0;
00097     if ( m_weightsTmp1 )
00098         delete[] m_weightsTmp1;
00099     m_weightsTmp1 = 0;
00100     if ( m_weightsTmp2 )
00101         delete[] m_weightsTmp2;
00102     m_weightsTmp2 = 0;
00103     if ( m_weightsBatchUpdate )
00104         delete[] m_weightsBatchUpdate;
00105     m_weightsBatchUpdate = 0;
00106     if ( m_weightsOld )
00107         delete[] m_weightsOld;
00108     m_weightsOld = 0;
00109     if ( m_weightsOldOld )
00110         delete[] m_weightsOldOld;
00111     m_weightsOldOld = 0;
00112     if ( m_deltaW )
00113         delete[] m_deltaW;
00114     m_deltaW = 0;
00115     if ( m_deltaWOld )
00116         delete[] m_deltaWOld;
00117     m_deltaWOld = 0;
00118     if ( m_adaptiveRPROPlRate )
00119         delete[] m_adaptiveRPROPlRate;
00120     m_adaptiveRPROPlRate = 0;
00121     if ( m_nrLayWeightOffsets )
00122         delete[] m_nrLayWeightOffsets;
00123     m_nrLayWeightOffsets = 0;
00124 }

Member Function Documentation

void NN::backprop ( REAL *  input,
REAL *  target 
) [private]

Calculate the weight update in the whole net with standard formulas (speed optimized) According to the backprop rule Weight updates are stored in m_deltaW

input Input vector
target Target values (vector)

Definition at line 823 of file nn.cpp.

00824 {
00825     REAL sum0, d1;
00827     int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1;  // -1 for bias and output neuron
00828     int n0 = m_neuronsPerLayer[m_nrLayer-1];
00829     int outputOffsetPrev = outputOffset - n0 - 1;
00830     int outputOffsetNext = outputOffset;
00832     int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer];
00833     int weightOffsetNext, nP1;
00835     REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error;
00837     // ================== the output neuron:  d(j)=(b-o(j))*Aj' ==================
00838     for ( int i=0;i<m_nrTargets;i++ )
00839     {
00840         double out = m_outputs[outputOffset+i];
00842         REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i];
00843         m_sumSquaredError += errorTrain * errorTrain;
00844         m_sumSquaredErrorSamples++;
00846         targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs;
00847         error = out - targetConverted;
00849         if ( m_errorFunctionMAE )
00850             error = error > 0.0? 1.0 : -1.0;
00851         d1 = error * m_derivates[outputOffset+i];
00852         m_d1[outputOffset+i] = d1;
00853         deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 );
00854         if ( m_nrLayer==1 )
00855         {
00856             outputsPtr = input - 1;
00857             deltaWPtr[0] = d1;
00858             for ( int j=1;j<n0+1;j++ )
00859                 deltaWPtr[j] = d1 * outputsPtr[j];
00860         }
00861         else
00862         {
00863             outputsPtr = m_outputs + outputOffsetPrev;
00864             for ( int j=0;j<n0+1;j++ )
00865                 deltaWPtr[j] = d1 * outputsPtr[j];
00866         }
00868     }
00870     // ================== all other neurons in the net ==================
00871     outputOffsetNext = outputOffset;  // next to current
00872     outputOffset = outputOffsetPrev;  // current to prev
00873     n0 = m_neuronsPerLayer[m_nrLayer-2];
00874     outputOffsetPrev -= n0 + 1;  // prev newnrInputs_
00875     weightOffset -= m_nrLayWeights[m_nrLayer-1];  // offset to weight pointer
00876     weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer];
00878     for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input
00879     {
00880         int n = m_neuronsPerLayer[i];
00881         int nNext = m_neuronsPerLayer[i+1];
00882         int nPrev = m_neuronsPerLayer[i-1];
00883         nP1 = n+1;
00885         d1Ptr0 = m_d1 + outputOffsetNext;
00886         derivatesPtr = m_derivates + outputOffset;
00887         weightsPtr = m_weights + weightOffsetNext;
00888         d1Ptr = m_d1 + outputOffset;
00889         deltaWPtr = m_deltaW + weightOffset;
00890         if ( i==1 )
00891             outputsPtr = input - 1;
00892         else
00893             outputsPtr = m_outputs + outputOffsetPrev;
00895         for ( int j=0;j<n;j++ ) // every neuron in the layer
00896         {
00897             // calc d1
00898             sum0 = 0.0;
00899             for ( int k=0;k<nNext;k++ ) // all neurons in the next layer:  d(j)=Aj'*Sum(k,d(k)*w(k,j))
00900                 sum0 += d1Ptr0[k] * weightsPtr[k*nP1];
00901             sum0 *= *derivatesPtr;
00902             d1Ptr[j] = sum0;
00904             // weight updates
00905             if ( i==1 )
00906             {
00907                 deltaWPtr[0] = sum0;
00908                 for ( int k=1;k<nPrev+1;k++ )
00909                     deltaWPtr[k] = sum0 * outputsPtr[k];
00910             }
00911             else
00912             {
00913                 for ( int k=0;k<nPrev+1;k++ )
00914                     deltaWPtr[k] = sum0 * outputsPtr[k];
00915             }
00916             deltaWPtr += nPrev+1;
00917             weightsPtr ++;
00918             derivatesPtr++;
00919         }
00921         outputOffsetNext = outputOffset;  // next to current
00922         outputOffset = outputOffsetPrev;  // current to prev
00923         n0 = m_neuronsPerLayer[i-2];
00924         outputOffsetPrev -= n0 + 1;  // prev new
00925         weightOffset -= m_nrLayWeights[i-1];  // offset to weight pointer
00926         weightOffsetNext -= m_nrLayWeights[i];
00927     }
00929 }

void NN::backpropBLAS ( REAL *  input,
REAL *  target 
) [private]

Calculate the weight update in the whole net with BLAS (MKL) According to the backprop rule Weight updates are stored in m_deltaW

input Input vector
target Target values (vector)

Definition at line 717 of file nn.cpp.

00718 {
00719     REAL sum0, d1;
00721     int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1;  // -1 for bias and output neuron
00722     int n0 = m_neuronsPerLayer[m_nrLayer-1];
00723     int outputOffsetPrev = outputOffset - n0 - 1;
00724     int outputOffsetNext = outputOffset;
00726     int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer];
00727     int weightOffsetNext, nP1;
00729     REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error;
00731     // ================== the output neuron:  d(j)=(b-o(j))*Aj' ==================
00732     for ( int i=0;i<m_nrTargets;i++ )
00733     {
00734         REAL out = m_outputs[outputOffset+i];
00736         REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i];
00737         m_sumSquaredError += errorTrain * errorTrain;
00738         m_sumSquaredErrorSamples += 1.0;
00740         targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs;
00741         error = out - targetConverted;
00743         if ( m_errorFunctionMAE )
00744             error = error > 0.0? 1.0 : -1.0;
00745         d1 = error * m_derivates[outputOffset+i];
00746         m_d1[outputOffset+i] = d1;
00747         deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 );
00748         if ( m_nrLayer==1 )
00749         {
00750             outputsPtr = input - 1;
00751             deltaWPtr[0] = d1;
00752             for ( int j=1;j<n0+1;j++ )
00753                 deltaWPtr[j] = d1 * outputsPtr[j];
00754         }
00755         else
00756         {
00757             outputsPtr = m_outputs + outputOffsetPrev;
00758             for ( int j=0;j<n0+1;j++ )
00759                 deltaWPtr[j] = d1 * outputsPtr[j];
00760         }
00761     }
00763     // ================== all other neurons in the net ==================
00764     outputOffsetNext = outputOffset;  // next to current
00765     outputOffset = outputOffsetPrev;  // current to prev
00766     n0 = m_neuronsPerLayer[m_nrLayer-2];
00767     outputOffsetPrev -= n0 + 1;  // prev newnrInputs_
00768     weightOffset -= m_nrLayWeights[m_nrLayer-1];  // offset to weight pointer
00769     weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer];
00771     for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input
00772     {
00773         int n = m_neuronsPerLayer[i];
00774         int nNext = m_neuronsPerLayer[i+1];
00775         int nPrev = m_neuronsPerLayer[i-1];
00776         nP1 = n+1;
00778         d1Ptr0 = m_d1 + outputOffsetNext;
00779         derivatesPtr = m_derivates + outputOffset;
00780         weightsPtr = m_weights + weightOffsetNext;
00781         d1Ptr = m_d1 + outputOffset;
00782         deltaWPtr = m_deltaW + weightOffset;
00783         if ( i==1 )
00784             outputsPtr = input;
00785         else
00786             outputsPtr = m_outputs + outputOffsetPrev;
00788         // d(j) = SUM(d(k)*w(k,j))
00789         CBLAS_GEMV ( CblasRowMajor, CblasTrans, nNext, n, 1.0, weightsPtr, nP1, d1Ptr0, 1, 0.0, d1Ptr, 1 );  // d1(j) =W_T*d1(k)
00790         V_MUL ( n, d1Ptr, derivatesPtr, d1Ptr );
00792         // every neuron in the layer calc weight update
00793         for ( int j=0;j<n;j++ )
00794         {
00795             if ( i==1 )
00796             {
00797                 V_COPY ( outputsPtr, deltaWPtr+1, nPrev );
00798                 deltaWPtr[0] = 1.0;
00799             }
00800             else
00801                 V_COPY ( outputsPtr, deltaWPtr, nPrev+1 );
00802             V_MULC ( deltaWPtr, d1Ptr[j], deltaWPtr, nPrev+1 );
00803             deltaWPtr += nPrev+1;
00804         }
00806         outputOffsetNext = outputOffset;  // next to current
00807         outputOffset = outputOffsetPrev;  // current to prev
00808         n0 = m_neuronsPerLayer[i-2];
00809         outputOffsetPrev -= n0 + 1;  // prev new
00810         weightOffset -= m_nrLayWeights[i-1];  // offset to weight pointer
00811         weightOffsetNext -= m_nrLayWeights[i];
00812     }
00813 }

REAL NN::calcRMSE ( REAL *  inputs,
REAL *  targets,
int  examples 
) [private]

Calculate the rmse over a given input/target set with the current neuronal net weight set

inputs Input vectors (row wise)
targets Target vectors (row wise)
examples Number of examples
RMSE on this set

Definition at line 1401 of file nn.cpp.

01402 {
01403     double rmse = 0.0;
01404     for ( int i=0;i<examples;i++ )
01405     {
01406         REAL* inputPtr = inputs + i * m_nrInputs;
01407         REAL* targetPtr = targets + i * m_nrTargets;
01409         predictSingleInput ( inputPtr, m_outputsTmp );
01411         for ( int j=0;j<m_nrTargets;j++ )
01412             rmse += ( m_outputsTmp[j] - targetPtr[j] ) * ( m_outputsTmp[j] - targetPtr[j] );
01413     }
01414     rmse = sqrt ( rmse/ ( double ) ( examples*m_nrTargets ) );
01415     return rmse;
01416 }

void NN::enableErrorFunctionMAE ( bool  en  ) 

Enable MeanAbsoluteError function


Definition at line 131 of file nn.cpp.

00132 {
00133     m_errorFunctionMAE = en;
00134     cout<<"errorFunctionMAE:"<<m_errorFunctionMAE<<endl;
00135 }

void NN::enableRPROP ( bool  en  ) 

Enable the RPROP learning algorithm (1st order type) Ref: "RPROP - Descritpion and Implementation Details", Martin Riedmiller, 1994

Attention: This must called first before: setNNStructure

en Enables RPROP learning schema

Definition at line 414 of file nn.cpp.

00415 {
00416     m_enableRPROP = en;
00417     cout<<"enableRPROP: "<<m_enableRPROP<<endl;
00418 }

void NN::forwardCalculation ( REAL *  input  )  [private]

Forward calculation through the NN (with loops) Outputs are stored in m_outputs 1st derivates are stored in m_derivates

input Input vector

Definition at line 1037 of file nn.cpp.

01038 {
01039     int outputOffset = m_neuronsPerLayer[0] + 1, outputOffsetPrev = 0;
01040     REAL tmp0, tmp1, sum0;
01041     REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights;
01042     for ( int i=0;i<m_nrLayer;i++ ) // to all layer
01043     {
01044         int n = m_neuronsPerLayer[i+1];
01045         int nprev = m_neuronsPerLayer[i] + 1;
01046         int loopOffset = i==0? 1 : 0;
01047         ptr0 = m_outputs + outputOffset;
01048         ptr1 = m_derivates + outputOffset;
01049         if ( i==0 )
01050             outputPtr = input - loopOffset;
01051         else
01052             outputPtr = m_outputs + outputOffsetPrev;
01053         for ( int j=0;j<n;j++ ) // all neurons in this layer
01054         {
01055             sum0 = i==0? weightPtr[0] : 0.0;  // dot product sum, for inputlayer: init with bias
01056             for ( int k=loopOffset;k<nprev;k++ ) // calc dot product
01057                 sum0 += weightPtr[k] * outputPtr[k];
01058             weightPtr += nprev;
01060             if(m_activationFunctionType == 0)
01061             {
01062                 // activation fkt: f(x)=tanh(x)
01063                 tmp0 = tanh ( sum0 );
01064                 ptr0[j] = tmp0;
01065                 ptr1[j] = ( 1.0 - tmp0*tmp0 );
01066             }
01067             else if(m_activationFunctionType == 1)
01068             {
01069                 // activation fkt: f(x)=sin(x)+0.01*x
01070                 REAL piHalf = 1.570796326794897;
01071                 REAL v = ptr0[j], sign = v>0.0? 1.0 : -1.0;
01072                 if(v > -piHalf && v < piHalf)
01073                 {
01074                     ptr0[j] = sin(v) + v * 0.01;
01075                     ptr1[j] = cos(v) + sign * 0.01;
01076                 }
01077                 else  // sumWeights is outside a half periode +/-pi/2
01078                 {
01079                     ptr0[j] = sign + v * 0.01;
01080                     ptr1[j] = sign * 0.01;
01081                 }
01082             }
01083             else if(m_activationFunctionType == 2)
01084             {
01085                 // activation fkt: f(x)= wenn x>0: f(x)=x^(1+tanh(v)*mul)
01086                 //                       wenn x<0: f(x)=-(-x)^(1+tanh(-v)*mul)
01087                 REAL mul = 0.5;  // 0.25 : swing: [-1.5...+1.5]   // 0.5 : swing: [-1.18195...+1.18195]
01088                 REAL v = ptr0[j], tanhV = tanh(v), tanhVNeg = -tanhV;
01089                 if(v >= 0.0)
01090                 {
01091                     ptr0[j] = pow(v,1.0+tanhV*mul); //pow(v,0.3);
01092                     ptr1[j] = pow(v,tanhV*mul)*(1.0+tanhV*mul)+ptr0[j]*log(v)*mul*(1.0-tanhV*tanhV);
01093                     if(isnan(ptr1[j]) || isinf(ptr1[j]))
01094                         ptr1[j] = 1.0;
01095                 }
01096                 else
01097                 {
01098                     ptr0[j] = -pow(-v,1.0+tanhVNeg*mul); //-pow(-v,0.3);
01099                     ptr1[j] = -pow(-v,tanhVNeg*mul)*(1.0+tanhVNeg*mul)*(-1.0)+ptr0[j]*log(-v)*mul*(1.0-tanhV*tanhV)*(-1.0);
01100                     if(isnan(ptr1[j]) || isinf(ptr1[j]))
01101                         ptr1[j] = -1.0;
01102                 }
01103             }
01104             else
01105                 assert(false);
01106         }
01107         outputOffset += n+1;  // this points to first neuron in current layer
01108         outputOffsetPrev += nprev;  // this points to first neuron in previous layer
01109     }
01111 }

void NN::forwardCalculationBLAS ( REAL *  input  )  [private]

Forward calculation through the NN with BLAS and VML (MKL) Outputs are stored in m_outputs 1st derivates are stored in m_derivates

input Input vector

Definition at line 938 of file nn.cpp.

00939 {
00940     int outputOffset = m_neuronsPerLayer[0]+1, outputOffsetPrev = 0;
00941     REAL tmp0, tmp1, sum0;
00942     REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights;
00944     for ( int i=0;i<m_nrLayer;i++ ) // to all layer
00945     {
00946         int n = m_neuronsPerLayer[i+1];
00947         int nprev = m_neuronsPerLayer[i] + 1;
00948         int inputOffset = 0;
00949         ptr0 = m_outputs + outputOffset;
00950         ptr1 = m_derivates + outputOffset;
00951         if ( i==0 )
00952         {
00953             outputPtr = input;
00954             inputOffset = 1;
00955         }
00956         else
00957             outputPtr = m_outputs + outputOffsetPrev;
00959         // WeightMatrix*InputVec = Outputs
00960         CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, n, nprev - inputOffset, 1.0, weightPtr + inputOffset, nprev, outputPtr, 1, 0.0, ptr0, 1 );
00961         if ( inputOffset )
00962         {
00963             for ( int j=0;j<n;j++ )
00964                 ptr0[j] += weightPtr[j * nprev];
00965         }
00967         if(m_activationFunctionType == 0)
00968         {
00969             // activation fkt: f(x)=tanh(x)
00970             V_TANH ( n, ptr0, ptr0 );  // m_outputs = tanh(m_outputs)
00971             V_SQR ( n, ptr0, ptr1 );   // m_derivates = tanh(m_outputs) * tanh(m_outputs)
00972             for ( int j=0;j<n;j++ )
00973                 ptr1[j] = 1.0 - ptr1[j];
00974         }
00975         else if(m_activationFunctionType == 1)
00976         {
00977             // activation fkt: f(x)=sin(x)+0.01*x
00978             REAL piHalf = 1.570796326794897;
00979             for(int j=0;j<n;j++)
00980             {
00981                 REAL v = ptr0[j], sign = v>0.0? 1.0 : -1.0;
00982                 if(v > -piHalf && v < piHalf)
00983                 {
00984                     ptr0[j] = sin(v) + v * 0.01;
00985                     ptr1[j] = cos(v) + sign * 0.01;
00986                 }
00987                 else  // sumWeights is outside a half periode +/-pi/2
00988                 {
00989                     ptr0[j] = sign + v * 0.01;
00990                     ptr1[j] = sign * 0.01;
00991                 }
00992             }
00993         }
00994         else if(m_activationFunctionType == 2)
00995         {
00996             // activation fkt: f(x)= wenn x>0: f(x)=x^(1+tanh(v)*mul)
00997             //                       wenn x<0: f(x)=-(-x)^(1+tanh(-v)*mul)
00998             REAL mul = 0.5;  // 0.25 : swing: [-1.5...+1.5]
00999             for(int j=0;j<n;j++)
01000             {
01001                 REAL v = ptr0[j], tanhV = tanh(v), tanhVNeg = -tanhV;
01002                 if(v >= 0.0)
01003                 {
01004                     ptr0[j] = pow(v,1.0+tanhV*mul); //pow(v,0.3);
01005                     ptr1[j] = pow(v,tanhV*mul)*(1.0+tanhV*mul)+ptr0[j]*log(v)*mul*(1.0-tanhV*tanhV);
01006                     if(isnan(ptr1[j]) || isinf(ptr1[j]))
01007                         ptr1[j] = 1.0;
01008                 }
01009                 else
01010                 {
01011                     ptr0[j] = -pow(-v,1.0+tanhVNeg*mul); //-pow(-v,0.3);
01012                     ptr1[j] = -pow(-v,tanhVNeg*mul)*(1.0+tanhVNeg*mul)*(-1.0)+ptr0[j]*log(-v)*mul*(1.0-tanhV*tanhV)*(-1.0);
01013                     if(isnan(ptr1[j]) || isinf(ptr1[j]))
01014                         ptr1[j] = -1.0;
01015                 }
01016             }
01017         }
01018         else
01019             assert(false);
01021         // update index
01022         weightPtr += n*nprev;
01023         outputOffset += n+1;  // this points to first neuron in current layer
01024         outputOffsetPrev += nprev;  // this points to first neuron in previous layer
01026     }
01028 }

int NN::getBiasIndex ( int  layer,
int  neuron 

Get the index to the bias weight:

  • m_weights[ind]

layer Weight on layer
neuron Neuron number

Definition at line 509 of file nn.cpp.

00510 {
00511     if ( layer == 0 )
00512         assert ( false );
00514     int nrNeur = m_neuronsPerLayer[layer];
00515     int nrNeurPrev = m_neuronsPerLayer[layer-1];
00516     if ( neuron >= nrNeur )
00517     {
00518         cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl;
00519         assert ( false );
00520     }
00521     int ind = m_nrLayWeightOffsets[layer];
00522     if ( layer == 1 ) // input layer
00523         ind += neuron* ( nrNeurPrev + 1 );
00524     else
00525         ind += nrNeurPrev + neuron* ( nrNeurPrev + 1 );
00527     if ( ind >= m_nrWeights )
00528     {
00529         cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl;
00530         assert ( false );
00531     }
00533     return ind;
00534 }

REAL NN::getInitWeight ( int  fanIn  )  [private]

Returen a random (uniform) weight init for a given number of input connections for this neuron 1/sqrt(fanIn) - rule (from Yann LeCun)

fanIn The number of input connections for this neuron
Weight init value (uniform random)

Definition at line 673 of file nn.cpp.

00674 {
00675     double nr = 2.0* ( rand() / ( double ) RAND_MAX-0.5 );  // -1 .. +1
00676     return ( 1.0/sqrt ( ( double ) fanIn ) ) * nr;
00677 }

int NN::getNrWeights (  ) 

Returns the total number of weights

Number of weights in this net (total number with bias weights)

Definition at line 1465 of file nn.cpp.

01466 {
01467     return m_nrWeights;
01468 }

int NN::getOutputIndex ( int  layer,
int  neuron 

Get the index of the output

  • m_outputs[ind]

layer Output on layer
neuron Neuron number
ind, The index

Definition at line 544 of file nn.cpp.

00545 {
00546     if ( layer == 0 || layer > m_nrLayer )
00547         assert ( false );
00549     if ( neuron >= m_neuronsPerLayer[layer] )
00550         assert ( false );
00552     int ind = 0;
00553     for ( int i=0;i<layer;i++ )
00554         ind += m_neuronsPerLayer[i] + 1;
00556     return ind + neuron;
00557 }

REAL NN::getRMSEProbe (  ) 

Evaluate the probe error

RMSE on the probe set

Definition at line 1433 of file nn.cpp.

01434 {
01435     return calcRMSE ( m_inputsProbe, m_targetsProbe, m_nrExamplesProbe );
01436 }

REAL NN::getRMSETrain (  ) 

Evaluate the train error

RMSE on training set

Definition at line 1423 of file nn.cpp.

01424 {
01425     return calcRMSE ( m_inputsTrain, m_targetsTrain, m_nrExamplesTrain );
01426 }

int NN::getWeightIndex ( int  layer,
int  neuron,
int  weight 

Get the index to the weights:

  • m_weights[ind]

layer Weight on layer
neuron Neuron number
weight Weight number

Definition at line 468 of file nn.cpp.

00469 {
00470     if ( layer == 0 )
00471         assert ( false );
00473     int nrNeur = m_neuronsPerLayer[layer];
00474     int nrNeurPrev = m_neuronsPerLayer[layer-1];
00475     if ( neuron >= nrNeur )
00476     {
00477         cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl;
00478         assert ( false );
00479     }
00480     if ( weight >= nrNeurPrev )
00481     {
00482         cout<<"weight:"<<weight<<" nrNeurPrev:"<<nrNeurPrev<<endl;
00483         assert ( false );
00484     }
00486     int ind = m_nrLayWeightOffsets[layer];
00487     if ( layer == 1 ) // input layer
00488         ind += 1 + weight + neuron* ( nrNeurPrev + 1 );
00489     else
00490         ind += weight + neuron* ( nrNeurPrev + 1 );
00492     if ( ind >= m_nrWeights )
00493     {
00494         cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl;
00495         assert ( false );
00496     }
00498     return ind;
00499 }

REAL * NN::getWeightPtr (  ) 

Returns the pointer to the neuronal net weights (linear aligned from input to output)

Pointer to NN weights

Definition at line 1443 of file nn.cpp.

01444 {
01445     return m_weights;
01446 }

void NN::initNNWeights ( time_t  seed  ) 

Init the whole weights in the net

seed The random seed (same seed for exact same weight initalization)

Definition at line 684 of file nn.cpp.

00685 {
00686     srand ( seed );
00687     cout<<"init weights ";
00688     REAL factor = m_initWeightFactor;
00689     int cnt = 0;
00690     for ( int i=0;i<m_nrLayer;i++ ) // through all layers
00691     {
00692         int n = m_neuronsPerLayer[i+1];
00693         int nprev = m_neuronsPerLayer[i] + 1;  // +1 for bias
00694         for ( int j=0;j<n;j++ ) // all neurons per layer
00695         {
00696             for ( int k=0;k<nprev;k++ ) // all weights from this neuron
00697             {
00698                 m_weights[cnt] = m_weightsOld[i] = m_weightsOldOld[i] = getInitWeight ( nprev ) * factor;
00699                 cnt++;
00700             }
00701         }
00702     }
00704     // check the number
00705     if ( cnt != m_nrWeights )
00706         assert ( false );
00707 }

void NN::predictSingleInput ( REAL *  input,
REAL *  output 

Predict the output based on a input vector with the Neural Net The actual m_weights are used to perform forward calculation through the net

input Input vector (pointer)
output Output vector (pointer)

Definition at line 1377 of file nn.cpp.

01378 {
01379     REAL* inputPtr = input;
01381     // forward
01382     if ( m_useBLAS )
01383         forwardCalculationBLAS ( inputPtr );
01384     else
01385         forwardCalculation ( inputPtr );
01387     // output correction
01388     REAL* outputPtr = m_outputs + m_nrOutputs - m_nrTargets - 1;
01389     for ( int i=0;i<m_nrTargets;i++ )
01390         output[i] = outputPtr[i] * m_scaleOutputs + m_offsetOutputs;
01391 }

void NN::printLearnrate (  ) 

Print the learn rate

Definition at line 1356 of file nn.cpp.

01357 {
01358     cout<<"lRate:"<<m_learnRate<<" "<<flush;
01359 }

void NN::saveWeights (  )  [private]

Weights saving

Definition at line 1365 of file nn.cpp.

01366 {
01367     // nothing here
01368 }

void NN::setActivationFunctionType ( int  type  ) 

Set the type of activation function in all layers

type 0=tanh, 1=sin

Definition at line 438 of file nn.cpp.

00439 {
00440     if(type==0)
00441     {
00442         cout<<"activationFunctionType: tanh"<<endl;
00443         m_activationFunctionType = 0;
00444     }
00445     else if(type==1)
00446     {
00447         cout<<"activationFunctionType: sin"<<endl;
00448         m_activationFunctionType = 1;
00449     }
00450     else if(type==2)
00451     {
00452         cout<<"activationFunctionType: tanhMod0"<<endl;
00453         m_activationFunctionType = 2;
00454     }
00455     else
00456         assert(false);
00457 }

void NN::setBatchSize ( int  size  ) 

Set the batch size. Weights are updated after each batch gradient summ. If the batch size is smaller as 2, the training is a stochastic gradient decent

size The batch size (1..trainExamples)

Definition at line 307 of file nn.cpp.

00308 {
00309     m_batchSize = size;
00310     cout<<"batchSize: "<<m_batchSize<<endl;
00311 }

void NN::setGlobalEpochs ( int  e  ) 

Set the global epoch counter This can be used to reset the number of epochs to 0

e Number of epochs

Definition at line 1347 of file nn.cpp.

01348 {
01349     m_globalEpochs = e;
01350 }

void NN::setInitWeightFactor ( REAL  factor  ) 

Set the init weight factor (in 1/sqrt(fanIn) rule)

factor The correction factor

Definition at line 228 of file nn.cpp.

00229 {
00230     m_initWeightFactor = factor;
00231     cout<<"initWeightFactor: "<<m_initWeightFactor<<endl;
00232 }

void NN::setL1Regularization ( bool  en  ) 

Enables L1 regularization (disable L2[weight decay])

en true=enabled

Definition at line 400 of file nn.cpp.

00401 {
00402     m_enableL1Regularization = en;
00403     cout<<"enableL1Regularization: "<<m_enableL1Regularization<<endl;
00404 }

void NN::setLearnrate ( REAL  learnrate  ) 

Set the global learnrate eta

learnrate Learnrate eta

Definition at line 239 of file nn.cpp.

00240 {
00241     m_learnRate = learnrate;
00242     cout<<"learnRate: "<<m_learnRate<<endl;
00243 }

void NN::setLearnrateMinimum ( REAL  learnrateMin  ) 

Set the lower bound of the per-sample learnrate decrease

learnrateMin Lower bound of learnrate

Definition at line 250 of file nn.cpp.

00251 {
00252     m_learnRateMin = learnrateMin;
00253     cout<<"learnRateMin: "<<m_learnRateMin<<endl;
00254 }

void NN::setLearnrateSubtractionValueAfterEveryEpoch ( REAL  learnrateDecreaseRate  ) 

Set the subtraction value per train epoch of the learning rate

learnrateDecreaseRate The learnrate is subtracted by this value every train epoch

Definition at line 273 of file nn.cpp.

00274 {
00275     m_learnrateDecreaseRateEpoch = learnrateDecreaseRate;
00276     cout<<"learnrateDecreaseRateEpoch: "<<m_learnrateDecreaseRateEpoch<<endl;
00277 }

void NN::setLearnrateSubtractionValueAfterEverySample ( REAL  learnrateDecreaseRate  ) 

Set the subtraction value per train example of the learning rate

learnrateDecreaseRate The learnrate is subtracted by this value every train example

Definition at line 261 of file nn.cpp.

00262 {
00263     m_learnrateDecreaseRate = learnrateDecreaseRate;
00264     cout<<"learnrateDecreaseRate: "<<m_learnrateDecreaseRate<<endl;
00265 }

void NN::setMaxEpochs ( int  epochs  ) 

Set the maximal epochs of training, if maxEpochs are reached the training breaks

epochs Max. number of train epochs on trainingset

Definition at line 329 of file nn.cpp.

00330 {
00331     m_maxEpochs = epochs;
00332     cout<<"maxEpochs: "<<m_maxEpochs<<endl;
00333 }

void NN::setMinUpdateErrorBound ( REAL  minUpdateBound  ) 

Set the minimal different between two succesive training epoch until the training breaks

minUpdateBound The min. rmse update until training breaks

Definition at line 318 of file nn.cpp.

00319 {
00320     m_minUpdateBound = minUpdateBound;
00321     cout<<"minUpdateBound: "<<m_minUpdateBound<<endl;
00322 }

void NN::setMomentum ( REAL  momentum  ) 

Set the momentum value. Momentum term is for goint into the old gradient value of the last epoch

momentum The momentum value (0..1). Typical value is 0.1

Definition at line 284 of file nn.cpp.

00285 {
00286     m_momentum = momentum;
00287     cout<<"momentum: "<<m_momentum<<endl;
00288 }

void NN::setNNStructure ( int  nrLayer,
int *  neuronsPerLayer 

Set the inner structure: layers and how many neurons per layer

nrLayer Number of layers (2=one hidden layer, 3=2 hidden layer, 1=only output layer)
neuronsPerLayer Integer pointer to the number of neurons per layer

Definition at line 565 of file nn.cpp.

00566 {
00567     m_nrLayer = nrLayer;
00568     cout<<"nrLayer: "<<m_nrLayer<<endl;
00570     cout<<"#layers: "<<m_nrLayer<<" ("<< ( m_nrLayer-1 ) <<" hidden layer, 1 output layer)"<<endl;
00572     // alloc space for structure variables
00573     m_neuronsPerLayer = new int[m_nrLayer+1];
00574     m_neuronsPerLayer[0] = m_nrInputs;  // number of inputs
00575     for ( int i=0;i<m_nrLayer-1;i++ )
00576         m_neuronsPerLayer[1+i] = neuronsPerLayer[i];
00577     m_neuronsPerLayer[m_nrLayer] = m_nrTargets;  // one output
00579     cout<<"Neurons    per Layer: ";
00580     for ( int i=0;i<m_nrLayer+1;i++ )
00581         cout<<m_neuronsPerLayer[i]<<" ";
00582     cout<<endl;
00584     cout<<"Outputs    per Layer: ";
00585     for ( int i=0;i<m_nrLayer+1;i++ )
00586         cout<<m_neuronsPerLayer[i]+1<<" ";
00587     cout<<endl;
00589     cout<<"OutOffsets per Layer: ";
00590     int cnt=0;
00591     for ( int i=0;i<m_nrLayer+1;i++ )
00592     {
00593         cout<<cnt<<" ";
00594         cnt += m_neuronsPerLayer[i]+1;
00595     }
00596     cout<<endl;
00598     // init the total number of weights and outputs
00599     m_nrWeights = 0;
00600     m_nrOutputs = m_neuronsPerLayer[0] + 1;
00601     m_nrLayWeights = new int[m_nrLayer+1];
00602     m_nrLayWeightOffsets = new int[m_nrLayer+2];
00603     m_nrLayWeights[0] = 0;
00604     for ( int i=0;i<m_nrLayer;i++ )
00605     {
00606         m_nrLayWeights[i+1] = m_neuronsPerLayer[i+1] * ( m_neuronsPerLayer[i]+1 );  // +1 for input bias
00607         m_nrWeights += m_nrLayWeights[i+1];
00608         m_nrOutputs += m_neuronsPerLayer[i+1] + 1;  // +1 for input bias
00609     }
00611     // print it
00612     cout<<"Weights       per Layer: ";
00613     for ( int i=0;i<m_nrLayer+1;i++ )
00614         cout<<m_nrLayWeights[i]<<" ";
00615     cout<<endl;
00617     cout<<"WeightOffsets per Layer: ";
00618     m_nrLayWeightOffsets[0] = 0;
00619     for ( int i=0;i<m_nrLayer+1;i++ )
00620     {
00621         cout<<m_nrLayWeightOffsets[i]<<" ";
00622         m_nrLayWeightOffsets[i+1] = m_nrLayWeightOffsets[i] + m_nrLayWeights[i];
00623     }
00624     cout<<endl;
00626     cout<<"nrOutputs="<<m_nrOutputs<<"  nrWeights="<<m_nrWeights<<endl;
00628     // allocate the inner calculation structure
00629     m_outputs = new REAL[m_nrOutputs];
00630     m_outputsTmp = new REAL[m_nrTargets];
00631     m_derivates = new REAL[m_nrOutputs];
00632     m_d1 = new REAL[m_nrOutputs];
00634     for ( int i=0;i<m_nrOutputs;i++ ) // init as biases
00635     {
00636         m_outputs[i] = 1.0;
00637         m_derivates[i] = 0.0;
00638         m_d1[i] = 0.0;
00639     }
00641     // allocate weights and temp vars
00642     m_weights = new REAL[m_nrWeights];
00643     m_weightsTmp0 = new REAL[m_nrWeights];
00644     m_weightsTmp1 = new REAL[m_nrWeights];
00645     m_weightsTmp2 = new REAL[m_nrWeights];
00646     m_weightsBatchUpdate = new REAL[m_nrWeights];
00647     m_weightsOld = new REAL[m_nrWeights];
00648     m_weightsOldOld = new REAL[m_nrWeights];
00649     m_deltaW = new REAL[m_nrWeights];
00651     m_deltaWOld = new REAL[m_nrWeights];
00652     m_adaptiveRPROPlRate = new REAL[m_nrWeights];
00653     for ( int i=0;i<m_nrWeights;i++ )
00654     {
00655         m_deltaWOld[i] = 0.0;
00656         m_adaptiveRPROPlRate[i] = m_learnRate;
00657     }
00658     for ( int i=0;i<m_nrWeights;i++ )
00659         m_weights[i] = m_weightsOld[i] = m_deltaW[i] = m_weightsTmp0[i] = m_weightsTmp1[i] = m_weightsTmp2[i] = 0.0;
00661     // this should be implemented (LeCun suggest such a linear factor in the activation function)
00662     //m_linFac = 0.01;
00663     //cout<<"linFac="<<m_linFac<<" (no active, just tanh used)"<<endl;
00664 }

void NN::setNormalTrainStopping ( bool  en  ) 

Set the train stop criteria en=0: training stops at maxEpochs en=1: training stops at maxEpochs or probe error rises or probe error is to small

en Train stop criteria (0 is used for retraining)

Definition at line 389 of file nn.cpp.

00390 {
00391     m_normalTrainStopping = en;
00392     cout<<"normalTrainStopping: "<<m_normalTrainStopping<<endl;
00393 }

void NN::setNrExamplesProbe ( int  n  ) 

Set the number of examples in the probe (validation) set

n Number of examples in the probe set

Definition at line 174 of file nn.cpp.

00175 {
00176     m_nrExamplesProbe = n;
00177     cout<<"nrExamplesProbe: "<<m_nrExamplesProbe<<endl;
00178 }

void NN::setNrExamplesTrain ( int  n  ) 

Set the number of examples in the training set

n Number of examples in the training set

Definition at line 164 of file nn.cpp.

00165 {
00166     m_nrExamplesTrain = n;
00167     //cout<<"nrExamplesTrain: "<<m_nrExamplesTrain<<endl;
00168 }

void NN::setNrInputs ( int  n  ) 

Set the number of inputs (input features)

n The number of inputs

Definition at line 154 of file nn.cpp.

00155 {
00156     m_nrInputs = n;
00157     cout<<"nrInputs: "<<m_nrInputs<<endl;
00158 }

void NN::setNrTargets ( int  n  ) 

Set the number of targets (outputs)

n Number of target values

Definition at line 142 of file nn.cpp.

00143 {
00144     m_nrTargets = n;
00145     cout<<"nrTargets: "<<m_nrTargets<<endl;
00146 }

void NN::setProbeInputs ( REAL *  inputs  ) 

Set the probe input data (REAL pointer)

inputs Pointer to the probe inputs (row wise)

Definition at line 206 of file nn.cpp.

00207 {
00208     m_inputsProbe = inputs;
00209     //cout<<"inputsProbe: "<<m_inputsProbe<<endl;
00210 }

void NN::setProbeTargets ( REAL *  targets  ) 

Set the probe target values (REAL pointer)

targets Pointer to the probe target values (row wise)

Definition at line 217 of file nn.cpp.

00218 {
00219     m_targetsProbe = targets;
00220     //cout<<"targetsProbe: "<<m_targetsProbe<<endl;
00221 }

void NN::setRPROPMinMaxUpdate ( REAL  min,
REAL  max 

Set the min. and max. update values for the sign update in RPROP Weights updates can never be larger as max. and smaller as min.

min Min. weight update value
max Max. weight update value

Definition at line 360 of file nn.cpp.

00361 {
00362     m_RPROP_updateMin = min;
00363     m_RPROP_updateMax = max;
00364     cout<<"RPROP_updateMin: "<<m_RPROP_updateMin<<"  RPROP_updateMax: "<<m_RPROP_updateMax<<endl;
00365 }

void NN::setRPROPPosNeg ( REAL  etaPos,
REAL  etaNeg 

Set the etaNeg and etaPos parameters in the RPROP learning algorithm

Learnrate adaption: adaptiveRPROPlRate = { if (dE/dW_old * dE/dW)>0 then adaptiveRPROPlRate*RPROP_etaPos if (dE/dW_old * dE/dW)<0 then adaptiveRPROPlRate*RPROP_etaNeg if (dE/dW_old * dE/dW)=0 then adaptiveRPROPlRate }

etaPos etaPos parameter
etaNeg etaNeg parameter

Definition at line 346 of file nn.cpp.

00347 {
00348     m_RPROP_etaPos = etaPos;
00349     m_RPROP_etaNeg = etaNeg;
00350     cout<<"RPROP_etaPos: "<<m_RPROP_etaPos<<"  RPROP_etaNeg: "<<m_RPROP_etaNeg<<endl;
00351 }

void NN::setScaleOffset ( REAL  scale,
REAL  offset 

Set the scale and offset of the output of the NN targets transformation: target = (targetOld - offset) / scale outputs transformation: output = outputNN * scale + offset

scale Output scaling
offset Output offset

Definition at line 375 of file nn.cpp.

00376 {
00377     m_scaleOutputs = scale;
00378     m_offsetOutputs = offset;
00379     cout<<"scaleOutputs: "<<m_scaleOutputs<<"   offsetOutputs: "<<m_offsetOutputs<<"  [transformation: output = outputNN * scale + offset]"<<endl;
00380 }

void NN::setTrainInputs ( REAL *  inputs  ) 

Set the training input data (REAL pointer)

inputs Pointer to the train inputs (row wise)

Definition at line 184 of file nn.cpp.

00185 {
00186     m_inputsTrain = inputs;
00187     //cout<<"inputsTrain: "<<m_inputsTrain<<endl;
00188 }

void NN::setTrainTargets ( REAL *  targets  ) 

Set the training target values (REAL pointer)

targets Pointer to the train target values (row wise)

Definition at line 195 of file nn.cpp.

00196 {
00197     m_targetsTrain = targets;
00198     //cout<<"targetsTrain: "<<m_targetsTrain<<endl;
00199 }

void NN::setWeightDecay ( REAL  weightDecay  ) 

Set the weight decay factor. This is L2 regularization of weights. Penalizes large weights

weightDecay Weight decay factor (0=no regularization)

Definition at line 295 of file nn.cpp.

00296 {
00297     m_weightDecay = weightDecay;
00298     cout<<"weightDecay: "<<m_weightDecay<<endl;
00299 }

void NN::setWeights ( REAL *  w  ) 

Load an external weights to the NN weights

w Pointer to new weights

Definition at line 1453 of file nn.cpp.

01454 {
01455     cout<<"Set new weights"<<endl;
01456     for ( int i=0;i<m_nrWeights;i++ )
01457         m_weights[i] = w[i];
01458 }

int NN::trainNN (  ) 

Train the whole Neural Network until break criteria is reached This method call trainOneEpoch() to train one epoch.

The number of epochs where the probe rmse is minimal

Definition at line 1119 of file nn.cpp.

01120 {
01121     cout<<"Train the NN with "<<m_nrExamplesTrain<<" samples"<<endl;
01122     double rmseMin = 1e10, lastUpdate = 1e10, rmseProbeOld = 1e10, rmseTrain = 1e10, rmseProbe = 1e10;
01123     time_t t0 = time ( 0 );
01124     while ( 1 )
01125     {
01126         rmseProbeOld = rmseProbe;
01127         rmseTrain = getRMSETrain();
01128         rmseProbe = getRMSEProbe();
01129         lastUpdate = rmseProbeOld - rmseProbe;
01131         cout<<"e:"<<m_globalEpochs<<"  rmseTrain:"<<rmseTrain<<"  rmseProbe:"<<rmseProbe<<"  "<<flush;
01133         if ( m_normalTrainStopping )
01134         {
01135             if ( rmseProbe < rmseMin )
01136             {
01137                 rmseMin = rmseProbe;
01138                 saveWeights();
01139             }
01140             else
01141             {
01142                 cout<<"rmse rises."<<endl;
01143                 return m_globalEpochs;
01144             }
01145             if ( m_minUpdateBound > fabs ( lastUpdate ) )
01146             {
01147                 cout<<"min update too small (<"<<m_minUpdateBound<<")."<<endl;
01148                 return m_globalEpochs;
01149             }
01150         }
01151         if ( m_maxEpochs == m_globalEpochs )
01152         {
01153             cout<<"max epochs reached."<<endl;
01154             return m_globalEpochs;
01155         }
01157         trainOneEpoch();
01159         cout<<"lRate:"<<m_learnRate<<"  ";
01160         cout<<time ( 0 )-t0<<"[s]"<<endl;
01161         t0 = time ( 0 );
01162     }
01163     return -1;
01164 }

void NN::trainOneEpoch (  ) 

Train the whole Neural Network one epoch through the trainset with gradient decent

Definition at line 1170 of file nn.cpp.

01171 {
01172     int batchCnt = 0;
01173     V_ZERO ( m_weightsBatchUpdate, m_nrWeights );
01174     m_sumSquaredError = 0.0;
01175     m_sumSquaredErrorSamples = 0.0;
01177     for ( int i=0;i<m_nrExamplesTrain;i++ )
01178     {
01179         REAL* inputPtr = m_inputsTrain + i * m_nrInputs;
01180         REAL* targetPtr = m_targetsTrain + i * m_nrTargets;
01182         // forward
01183         if ( m_useBLAS )
01184             forwardCalculationBLAS ( inputPtr );
01185         else
01186             forwardCalculation ( inputPtr );
01188         // backward: calc weight update
01189         if ( m_useBLAS )
01190             backpropBLAS ( inputPtr, targetPtr );
01191         else
01192             backprop ( inputPtr, targetPtr );
01194         // accumulate the weight updates
01195         if ( m_batchSize > 1 )
01196             V_ADD ( m_nrWeights, m_deltaW, m_weightsBatchUpdate, m_weightsBatchUpdate );
01198         batchCnt++;
01200         // if batch size is reached, or the last element in training list
01201         if ( batchCnt >= m_batchSize || i == m_nrExamplesTrain - 1 )
01202         {
01203             // batch init
01204             batchCnt = 0;
01205             if ( m_batchSize > 1 )
01206             {
01207                 V_COPY ( m_weightsBatchUpdate, m_deltaW, m_nrWeights ); // deltaW = weightsBatchUpdate
01208                 V_ZERO ( m_weightsBatchUpdate, m_nrWeights );
01209             }
01211             if ( m_enableRPROP )
01212             {
01213                 // weight update:
01214                 // deltaW = {  if dE/dW>0  then  -adaptiveRPROPlRate
01215                 //             if dE/dW<0  then  +adaptiveRPROPlRate
01216                 //             if dE/dW=0  then   0  }
01217                 // learnrate adaption:
01218                 // adaptiveRPROPlRate = {  if (dE/dW_old * dE/dW)>0  then  adaptiveRPROPlRate*RPROP_etaPos
01219                 //                         if (dE/dW_old * dE/dW)<0  then  adaptiveRPROPlRate*RPROP_etaNeg
01220                 //                         if (dE/dW_old * dE/dW)=0  then  adaptiveRPROPlRate  }
01221                 REAL dW, dWOld, sign, update, prod;
01222                 for ( int j=0;j<m_nrWeights;j++ )
01223                 {
01224                     dW = m_deltaW[j];
01225                     dWOld = m_deltaWOld[j];
01226                     prod = dW * dWOld;
01227                     sign = dW > 0.0? 1.0 : -1.0;
01228                     if ( prod > 0.0 )
01229                     {
01230                         m_adaptiveRPROPlRate[j] *= m_RPROP_etaPos;
01231                         if ( m_adaptiveRPROPlRate[j] > m_RPROP_updateMax )
01232                             m_adaptiveRPROPlRate[j] = m_RPROP_updateMax;
01233                         update = sign * m_adaptiveRPROPlRate[j];
01234                         m_weights[j] -= update + m_weightDecay * m_weights[j];   // weight update and weight decay
01235                         m_deltaWOld[j] = dW;
01236                     }
01237                     else if ( prod < 0.0 )
01238                     {
01239                         m_adaptiveRPROPlRate[j] *= m_RPROP_etaNeg;
01240                         if ( m_adaptiveRPROPlRate[j] < m_RPROP_updateMin )
01241                             m_adaptiveRPROPlRate[j] = m_RPROP_updateMin;
01242                         m_deltaWOld[j] = 0.0;
01243                     }
01244                     else // prod == 0.0
01245                     {
01246                         update = sign * m_adaptiveRPROPlRate[j];
01247                         m_weights[j] -= update + m_weightDecay * m_weights[j];   // weight update and weight decay
01248                         m_deltaWOld[j] = dW;
01249                     }
01250                 }
01251             }
01252             else  // stochastic gradient decent (batch-size: m_batchSize)
01253             {
01254                 //=========== slower stochastic updates (without vector libraries) ===========
01255                 // update weights + weight decay
01256                 // formula: weights -= eta * (dE(w)/dw + lambda * w)
01257                 //if(m_momentum > 0.0)
01258                 //{
01259                 //    for(int j=0;j<m_nrWeights;j++)
01260                 //        m_weightsOldOld[j] = m_weightsOld[j];
01261                 //    for(int j=0;j<m_nrWeights;j++)
01262                 //        m_weightsOld[j] = m_weights[j];
01263                 //}
01264                 //
01265                 //if(m_momentum > 0.0)
01266                 //{
01267                 //    for(int j=0;j<m_nrWeights;j++)
01268                 //    {
01269                 //        m_weightsTmp0[j] = (1.0 - m_momentum)*m_weightsTmp0[j] + m_momentum*m_weightsTmp1[j];
01270                 //        m_weightsTmp1[j] = m_weightsTmp0[j];
01271                 //        m_weights[j] -= m_weightsTmp0[j];
01272                 //    }
01273                 //}
01274                 //for(int j=0;j<m_nrWeights;j++)
01275                 //    m_weights[j] -= m_learnRate * (m_deltaW[j] + m_weightDecay * m_weights[j]);
01278                 // update weights + weight decay(L2 reg.)
01279                 // formula: weights = weights - eta * (dE(w)/dw + lambda * weights)
01280                 //          weights = weights - (eta*deltaW + eta*lambda*weights)
01281                 V_MULC ( m_deltaW, m_learnRate, m_weightsTmp0, m_nrWeights );     // tmp0 = learnrate * deltaW
01283                 // if weight decay enabled
01284                 if ( m_weightDecay > 0.0 )
01285                 {
01286                     if ( m_enableL1Regularization )
01287                     {
01288                         // update weights + L1 reg.
01289                         // formula: weights = weights - eta * (dE(w)/dw + lambda*sign(w))
01290                         //          weights = weights - (eta*deltaW + eta*lambda*sign(w))
01291                         for ( int j=0;j<m_nrWeights;j++ )
01292                             m_weightsTmp2[j] = 1.0;
01293                         for ( int j=0;j<m_nrWeights;j++ )
01294                             if ( m_weights[j]<0.0 )
01295                                 m_weightsTmp2[j] = -1.0;
01296                         //REAL c = m_weightDecay * m_learnRate;
01297                         //for(int j=0;j<m_nrWeights;j++)
01298                         //    m_weightsTmp0[j] += c * m_weightsTmp2[j];
01299                         CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weightsTmp2, 1, m_weightsTmp0, 1 );  // tmp0 = reg*learnrate*weights+tmp0
01300                     }
01301                     else
01302                         //saxpy(n, a, x, incx, y, incy)
01303                         //y := a*x + y
01304                         CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weights, 1, m_weightsTmp0, 1 );  // tmp0 = reg*learnrate*weights+tmp0
01305                 }
01307                 // if momentum is used
01308                 if ( m_momentum > 0.0 )
01309                 {
01310                     V_MULC ( m_weightsTmp0, 1.0 - m_momentum, m_weightsTmp0, m_nrWeights ); // tmp0 = tmp0 * (1 - momentum)   [actual update]
01311                     V_MULC ( m_weightsTmp1, m_momentum, m_weightsTmp1, m_nrWeights );    // tmp1 = tmp1 * momentum         [last update]
01313                     // sum updates
01314                     V_ADD ( m_nrWeights, m_weightsTmp0, m_weightsTmp1, m_weightsTmp0 ); // tmp0 = tmp0 + tmp1
01316                     V_COPY ( m_weightsTmp0, m_weightsTmp1, m_nrWeights );               // tmp1 = tmp0
01317                 }
01319                 // standard weight update in the NN
01320                 V_SUB ( m_nrWeights, m_weights, m_weightsTmp0, m_weights );       // weights = weights - tmp0
01321             }
01322         }
01324         // make the learnrate smaller (per sample)
01325         m_learnRate -= m_learnrateDecreaseRate;
01326         if ( m_learnRate < m_learnRateMin )
01327             m_learnRate = m_learnRateMin;
01329     }
01331     // make the learnrate smaller (per epoch)
01332     m_learnRate -= m_learnrateDecreaseRateEpoch;
01333     if ( m_learnRate < m_learnRateMin )
01334         m_learnRate = m_learnRateMin;
01336     // epoch counter
01337     m_globalEpochs++;
01339 }

void NN::useBLASforTraining ( bool  enable  ) 

Set the forward/backward calculation type enable=1: BLAS Level 2 from MKL is used to perform Vector-Matrix operation for speedup training enable=0: Standard loops for calculation

enable Enables BLAS usage for speedup large nets

Definition at line 427 of file nn.cpp.

00428 {
00429     m_useBLAS = enable;
00430     cout<<"useBLAS: "<<m_useBLAS<<endl;
00431 }

The documentation for this class was generated from the following files:

Generated on Tue Jan 26 09:21:13 2010 for ELF by  doxygen 1.5.8