#include <nn.h>
Public Member Functions | |
NN () | |
~NN () | |
void | setNrTargets (int n) |
void | setNrInputs (int n) |
void | setNrExamplesTrain (int n) |
void | setNrExamplesProbe (int n) |
void | setTrainInputs (REAL *inputs) |
void | setTrainTargets (REAL *targets) |
void | setProbeInputs (REAL *inputs) |
void | setProbeTargets (REAL *targets) |
void | setInitWeightFactor (REAL factor) |
void | setLearnrate (REAL learnrate) |
void | setLearnrateMinimum (REAL learnrateMin) |
void | setLearnrateSubtractionValueAfterEverySample (REAL learnrateDecreaseRate) |
void | setLearnrateSubtractionValueAfterEveryEpoch (REAL learnrateDecreaseRate) |
void | setMomentum (REAL momentum) |
void | setWeightDecay (REAL weightDecay) |
void | setBatchSize (int size) |
void | setMinUpdateErrorBound (REAL minUpdateBound) |
void | setMaxEpochs (int epochs) |
void | setRPROPPosNeg (REAL etaPos, REAL etaNeg) |
void | setRPROPMinMaxUpdate (REAL min, REAL max) |
void | setL1Regularization (bool en) |
void | initNNWeights (time_t seed) |
void | enableErrorFunctionMAE (bool en) |
void | setActivationFunctionType (int type) |
void | setNNStructure (int nrLayer, int *neuronsPerLayer) |
void | printLearnrate () |
void | setScaleOffset (REAL scale, REAL offset) |
void | setNormalTrainStopping (bool en) |
void | setGlobalEpochs (int e) |
void | enableRPROP (bool en) |
void | useBLASforTraining (bool enable) |
void | trainOneEpoch () |
int | trainNN () |
REAL | getRMSETrain () |
REAL | getRMSEProbe () |
void | predictSingleInput (REAL *input, REAL *output) |
REAL * | getWeightPtr () |
void | setWeights (REAL *w) |
int | getNrWeights () |
int | getWeightIndex (int layer, int neuron, int weight) |
int | getBiasIndex (int layer, int neuron) |
int | getOutputIndex (int layer, int neuron) |
Public Attributes | |
double | m_sumSquaredError |
double | m_sumSquaredErrorSamples |
Private Member Functions | |
void | saveWeights () |
REAL | calcRMSE (REAL *inputs, REAL *targets, int examples) |
void | forwardCalculation (REAL *input) |
void | forwardCalculationBLAS (REAL *input) |
void | backpropBLAS (REAL *input, REAL *target) |
void | backprop (REAL *input, REAL *target) |
REAL | getInitWeight (int fanIn) |
Private Attributes | |
int | m_nrTargets |
int | m_nrInputs |
int | m_nrExamplesTrain |
int | m_nrExamplesProbe |
REAL * | m_inputsTrain |
REAL * | m_inputsProbe |
REAL * | m_targetsTrain |
REAL * | m_targetsProbe |
REAL | m_initWeightFactor |
int | m_globalEpochs |
REAL | m_RPROP_etaPos |
REAL | m_RPROP_etaNeg |
REAL | m_RPROP_updateMin |
REAL | m_RPROP_updateMax |
REAL | m_learnRate |
REAL | m_learnRateMin |
REAL | m_learnrateDecreaseRate |
REAL | m_learnrateDecreaseRateEpoch |
REAL | m_momentum |
REAL | m_weightDecay |
REAL | m_minUpdateBound |
int | m_batchSize |
int | m_activationFunctionType |
REAL | m_scaleOutputs |
REAL | m_offsetOutputs |
int | m_maxEpochs |
bool | m_useBLAS |
bool | m_enableRPROP |
bool | m_normalTrainStopping |
bool | m_enableL1Regularization |
bool | m_errorFunctionMAE |
int | m_nrLayer |
int * | m_neuronsPerLayer |
int | m_nrWeights |
int | m_nrOutputs |
int * | m_nrLayWeights |
int * | m_nrLayWeightOffsets |
REAL * | m_outputs |
REAL * | m_outputsTmp |
REAL * | m_derivates |
REAL * | m_d1 |
REAL * | m_weights |
REAL * | m_weightsTmp0 |
REAL * | m_weightsTmp1 |
REAL * | m_weightsTmp2 |
REAL * | m_weightsBatchUpdate |
REAL * | m_weightsOld |
REAL * | m_weightsOldOld |
REAL * | m_deltaW |
REAL * | m_deltaWOld |
REAL * | m_adaptiveRPROPlRate |
The target of this class is to give a basic and fast class for training and prediction It supports basic training functionality
Data (features + targets) memeory allocation and normalization must be done outside This class gets the pointer to the training and probe (=validation) set
Forward and backward calculation can be done in loops or in Vector-Matrix operations (BLAS) For large nets the BLAS calculation should be used (~2x faster)
Standard training is performed with global learnrate and stochastic gradient descent A batch training is also possible if the batchSize > 1
This class implements also the RPROP learning algorithm Rprop - Description and Implementation Details Martin Riedmiller, 1994. Technical report.
Definition at line 38 of file nn.h.
NN::NN | ( | ) |
Constructor
Definition at line 8 of file nn.cpp.
00009 { 00010 // init member vars 00011 m_nrTargets = 0; 00012 m_nrInputs = 0; 00013 m_nrExamplesTrain = 0; 00014 m_nrExamplesProbe = 0; 00015 m_inputsTrain = 0; 00016 m_inputsProbe = 0; 00017 m_targetsTrain = 0; 00018 m_targetsProbe = 0; 00019 m_initWeightFactor = 0; 00020 m_globalEpochs = 0; 00021 m_RPROP_etaPos = 0; 00022 m_RPROP_etaNeg = 0; 00023 m_RPROP_updateMin = 0; 00024 m_RPROP_updateMax = 0; 00025 m_learnRate = 0; 00026 m_learnRateMin = 0; 00027 m_learnrateDecreaseRate = 0; 00028 m_learnrateDecreaseRateEpoch = 0; 00029 m_momentum = 0; 00030 m_weightDecay = 0; 00031 m_minUpdateBound = 0; 00032 m_batchSize = 0; 00033 m_scaleOutputs = 0; 00034 m_offsetOutputs = 0; 00035 m_maxEpochs = 0; 00036 m_useBLAS = 0; 00037 m_enableRPROP = 0; 00038 m_normalTrainStopping = 0; 00039 m_nrLayer = 0; 00040 m_neuronsPerLayer = 0; 00041 m_nrWeights = 0; 00042 m_nrOutputs = 0; 00043 m_nrLayWeights = 0; 00044 m_outputs = 0; 00045 m_outputsTmp = 0; 00046 m_derivates = 0; 00047 m_d1 = 0; 00048 m_weights = 0; 00049 m_weightsTmp0 = 0; 00050 m_weightsTmp1 = 0; 00051 m_weightsTmp2 = 0; 00052 m_weightsBatchUpdate = 0; 00053 m_weightsOld = 0; 00054 m_weightsOldOld = 0; 00055 m_deltaW = 0; 00056 m_deltaWOld = 0; 00057 m_adaptiveRPROPlRate = 0; 00058 m_enableL1Regularization = 0; 00059 m_errorFunctionMAE = 0; 00060 m_sumSquaredError = 0.0; 00061 m_sumSquaredErrorSamples = 0; 00062 m_nrLayWeightOffsets = 0; 00063 m_sumSquaredError = 0.0; 00064 m_sumSquaredErrorSamples = 0.0; 00065 m_activationFunctionType = 0; 00066 }
NN::~NN | ( | ) |
Destructor
Definition at line 71 of file nn.cpp.
00072 { 00073 if ( m_neuronsPerLayer ) 00074 delete[] m_neuronsPerLayer; 00075 m_neuronsPerLayer = 0; 00076 if ( m_nrLayWeights ) 00077 delete[] m_nrLayWeights; 00078 m_nrLayWeights = 0; 00079 if ( m_outputs ) 00080 delete[] m_outputs; 00081 m_outputs = 0; 00082 if ( m_outputsTmp ) 00083 delete[] m_outputsTmp; 00084 m_outputsTmp = 0; 00085 if ( m_derivates ) 00086 delete[] m_derivates; 00087 m_derivates = 0; 00088 if ( m_d1 ) 00089 delete[] m_d1; 00090 m_d1 = 0; 00091 if ( m_weights ) 00092 delete[] m_weights; 00093 m_weights = 0; 00094 if ( m_weightsTmp0 ) 00095 delete[] m_weightsTmp0; 00096 m_weightsTmp0 = 0; 00097 if ( m_weightsTmp1 ) 00098 delete[] m_weightsTmp1; 00099 m_weightsTmp1 = 0; 00100 if ( m_weightsTmp2 ) 00101 delete[] m_weightsTmp2; 00102 m_weightsTmp2 = 0; 00103 if ( m_weightsBatchUpdate ) 00104 delete[] m_weightsBatchUpdate; 00105 m_weightsBatchUpdate = 0; 00106 if ( m_weightsOld ) 00107 delete[] m_weightsOld; 00108 m_weightsOld = 0; 00109 if ( m_weightsOldOld ) 00110 delete[] m_weightsOldOld; 00111 m_weightsOldOld = 0; 00112 if ( m_deltaW ) 00113 delete[] m_deltaW; 00114 m_deltaW = 0; 00115 if ( m_deltaWOld ) 00116 delete[] m_deltaWOld; 00117 m_deltaWOld = 0; 00118 if ( m_adaptiveRPROPlRate ) 00119 delete[] m_adaptiveRPROPlRate; 00120 m_adaptiveRPROPlRate = 0; 00121 if ( m_nrLayWeightOffsets ) 00122 delete[] m_nrLayWeightOffsets; 00123 m_nrLayWeightOffsets = 0; 00124 }
void NN::backprop | ( | REAL * | input, | |
REAL * | target | |||
) | [private] |
Calculate the weight update in the whole net with standard formulas (speed optimized) According to the backprop rule Weight updates are stored in m_deltaW
input | Input vector | |
target | Target values (vector) |
Definition at line 823 of file nn.cpp.
00824 { 00825 REAL sum0, d1; 00826 00827 int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1; // -1 for bias and output neuron 00828 int n0 = m_neuronsPerLayer[m_nrLayer-1]; 00829 int outputOffsetPrev = outputOffset - n0 - 1; 00830 int outputOffsetNext = outputOffset; 00831 00832 int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer]; 00833 int weightOffsetNext, nP1; 00834 00835 REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error; 00836 00837 // ================== the output neuron: d(j)=(b-o(j))*Aj' ================== 00838 for ( int i=0;i<m_nrTargets;i++ ) 00839 { 00840 double out = m_outputs[outputOffset+i]; 00841 00842 REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i]; 00843 m_sumSquaredError += errorTrain * errorTrain; 00844 m_sumSquaredErrorSamples++; 00845 00846 targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs; 00847 error = out - targetConverted; 00848 00849 if ( m_errorFunctionMAE ) 00850 error = error > 0.0? 1.0 : -1.0; 00851 d1 = error * m_derivates[outputOffset+i]; 00852 m_d1[outputOffset+i] = d1; 00853 deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 ); 00854 if ( m_nrLayer==1 ) 00855 { 00856 outputsPtr = input - 1; 00857 deltaWPtr[0] = d1; 00858 for ( int j=1;j<n0+1;j++ ) 00859 deltaWPtr[j] = d1 * outputsPtr[j]; 00860 } 00861 else 00862 { 00863 outputsPtr = m_outputs + outputOffsetPrev; 00864 for ( int j=0;j<n0+1;j++ ) 00865 deltaWPtr[j] = d1 * outputsPtr[j]; 00866 } 00867 00868 } 00869 00870 // ================== all other neurons in the net ================== 00871 outputOffsetNext = outputOffset; // next to current 00872 outputOffset = outputOffsetPrev; // current to prev 00873 n0 = m_neuronsPerLayer[m_nrLayer-2]; 00874 outputOffsetPrev -= n0 + 1; // prev newnrInputs_ 00875 weightOffset -= m_nrLayWeights[m_nrLayer-1]; // offset to weight pointer 00876 weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer]; 00877 00878 for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input 00879 { 00880 int n = m_neuronsPerLayer[i]; 00881 int nNext = m_neuronsPerLayer[i+1]; 00882 int nPrev = m_neuronsPerLayer[i-1]; 00883 nP1 = n+1; 00884 00885 d1Ptr0 = m_d1 + outputOffsetNext; 00886 derivatesPtr = m_derivates + outputOffset; 00887 weightsPtr = m_weights + weightOffsetNext; 00888 d1Ptr = m_d1 + outputOffset; 00889 deltaWPtr = m_deltaW + weightOffset; 00890 if ( i==1 ) 00891 outputsPtr = input - 1; 00892 else 00893 outputsPtr = m_outputs + outputOffsetPrev; 00894 00895 for ( int j=0;j<n;j++ ) // every neuron in the layer 00896 { 00897 // calc d1 00898 sum0 = 0.0; 00899 for ( int k=0;k<nNext;k++ ) // all neurons in the next layer: d(j)=Aj'*Sum(k,d(k)*w(k,j)) 00900 sum0 += d1Ptr0[k] * weightsPtr[k*nP1]; 00901 sum0 *= *derivatesPtr; 00902 d1Ptr[j] = sum0; 00903 00904 // weight updates 00905 if ( i==1 ) 00906 { 00907 deltaWPtr[0] = sum0; 00908 for ( int k=1;k<nPrev+1;k++ ) 00909 deltaWPtr[k] = sum0 * outputsPtr[k]; 00910 } 00911 else 00912 { 00913 for ( int k=0;k<nPrev+1;k++ ) 00914 deltaWPtr[k] = sum0 * outputsPtr[k]; 00915 } 00916 deltaWPtr += nPrev+1; 00917 weightsPtr ++; 00918 derivatesPtr++; 00919 } 00920 00921 outputOffsetNext = outputOffset; // next to current 00922 outputOffset = outputOffsetPrev; // current to prev 00923 n0 = m_neuronsPerLayer[i-2]; 00924 outputOffsetPrev -= n0 + 1; // prev new 00925 weightOffset -= m_nrLayWeights[i-1]; // offset to weight pointer 00926 weightOffsetNext -= m_nrLayWeights[i]; 00927 } 00928 00929 }
void NN::backpropBLAS | ( | REAL * | input, | |
REAL * | target | |||
) | [private] |
Calculate the weight update in the whole net with BLAS (MKL) According to the backprop rule Weight updates are stored in m_deltaW
input | Input vector | |
target | Target values (vector) |
Definition at line 717 of file nn.cpp.
00718 { 00719 REAL sum0, d1; 00720 00721 int outputOffset = m_nrOutputs - m_neuronsPerLayer[m_nrLayer] - 1; // -1 for bias and output neuron 00722 int n0 = m_neuronsPerLayer[m_nrLayer-1]; 00723 int outputOffsetPrev = outputOffset - n0 - 1; 00724 int outputOffsetNext = outputOffset; 00725 00726 int weightOffset = m_nrWeights - m_nrLayWeights[m_nrLayer]; 00727 int weightOffsetNext, nP1; 00728 00729 REAL *deltaWPtr, *derivatesPtr, *weightsPtr, *outputsPtr, *d1Ptr, *d1Ptr0, targetConverted, error; 00730 00731 // ================== the output neuron: d(j)=(b-o(j))*Aj' ================== 00732 for ( int i=0;i<m_nrTargets;i++ ) 00733 { 00734 REAL out = m_outputs[outputOffset+i]; 00735 00736 REAL errorTrain = out * m_scaleOutputs + m_offsetOutputs - target[i]; 00737 m_sumSquaredError += errorTrain * errorTrain; 00738 m_sumSquaredErrorSamples += 1.0; 00739 00740 targetConverted = ( target[i] - m_offsetOutputs ) / m_scaleOutputs; 00741 error = out - targetConverted; 00742 00743 if ( m_errorFunctionMAE ) 00744 error = error > 0.0? 1.0 : -1.0; 00745 d1 = error * m_derivates[outputOffset+i]; 00746 m_d1[outputOffset+i] = d1; 00747 deltaWPtr = m_deltaW + weightOffset + i* ( n0+1 ); 00748 if ( m_nrLayer==1 ) 00749 { 00750 outputsPtr = input - 1; 00751 deltaWPtr[0] = d1; 00752 for ( int j=1;j<n0+1;j++ ) 00753 deltaWPtr[j] = d1 * outputsPtr[j]; 00754 } 00755 else 00756 { 00757 outputsPtr = m_outputs + outputOffsetPrev; 00758 for ( int j=0;j<n0+1;j++ ) 00759 deltaWPtr[j] = d1 * outputsPtr[j]; 00760 } 00761 } 00762 00763 // ================== all other neurons in the net ================== 00764 outputOffsetNext = outputOffset; // next to current 00765 outputOffset = outputOffsetPrev; // current to prev 00766 n0 = m_neuronsPerLayer[m_nrLayer-2]; 00767 outputOffsetPrev -= n0 + 1; // prev newnrInputs_ 00768 weightOffset -= m_nrLayWeights[m_nrLayer-1]; // offset to weight pointer 00769 weightOffsetNext = m_nrWeights - m_nrLayWeights[m_nrLayer]; 00770 00771 for ( int i=m_nrLayer-1;i>0;i-- ) // all layers from output to input 00772 { 00773 int n = m_neuronsPerLayer[i]; 00774 int nNext = m_neuronsPerLayer[i+1]; 00775 int nPrev = m_neuronsPerLayer[i-1]; 00776 nP1 = n+1; 00777 00778 d1Ptr0 = m_d1 + outputOffsetNext; 00779 derivatesPtr = m_derivates + outputOffset; 00780 weightsPtr = m_weights + weightOffsetNext; 00781 d1Ptr = m_d1 + outputOffset; 00782 deltaWPtr = m_deltaW + weightOffset; 00783 if ( i==1 ) 00784 outputsPtr = input; 00785 else 00786 outputsPtr = m_outputs + outputOffsetPrev; 00787 00788 // d(j) = SUM(d(k)*w(k,j)) 00789 CBLAS_GEMV ( CblasRowMajor, CblasTrans, nNext, n, 1.0, weightsPtr, nP1, d1Ptr0, 1, 0.0, d1Ptr, 1 ); // d1(j) =W_T*d1(k) 00790 V_MUL ( n, d1Ptr, derivatesPtr, d1Ptr ); 00791 00792 // every neuron in the layer calc weight update 00793 for ( int j=0;j<n;j++ ) 00794 { 00795 if ( i==1 ) 00796 { 00797 V_COPY ( outputsPtr, deltaWPtr+1, nPrev ); 00798 deltaWPtr[0] = 1.0; 00799 } 00800 else 00801 V_COPY ( outputsPtr, deltaWPtr, nPrev+1 ); 00802 V_MULC ( deltaWPtr, d1Ptr[j], deltaWPtr, nPrev+1 ); 00803 deltaWPtr += nPrev+1; 00804 } 00805 00806 outputOffsetNext = outputOffset; // next to current 00807 outputOffset = outputOffsetPrev; // current to prev 00808 n0 = m_neuronsPerLayer[i-2]; 00809 outputOffsetPrev -= n0 + 1; // prev new 00810 weightOffset -= m_nrLayWeights[i-1]; // offset to weight pointer 00811 weightOffsetNext -= m_nrLayWeights[i]; 00812 } 00813 }
REAL NN::calcRMSE | ( | REAL * | inputs, | |
REAL * | targets, | |||
int | examples | |||
) | [private] |
Calculate the rmse over a given input/target set with the current neuronal net weight set
inputs | Input vectors (row wise) | |
targets | Target vectors (row wise) | |
examples | Number of examples |
Definition at line 1401 of file nn.cpp.
01402 { 01403 double rmse = 0.0; 01404 for ( int i=0;i<examples;i++ ) 01405 { 01406 REAL* inputPtr = inputs + i * m_nrInputs; 01407 REAL* targetPtr = targets + i * m_nrTargets; 01408 01409 predictSingleInput ( inputPtr, m_outputsTmp ); 01410 01411 for ( int j=0;j<m_nrTargets;j++ ) 01412 rmse += ( m_outputsTmp[j] - targetPtr[j] ) * ( m_outputsTmp[j] - targetPtr[j] ); 01413 } 01414 rmse = sqrt ( rmse/ ( double ) ( examples*m_nrTargets ) ); 01415 return rmse; 01416 }
void NN::enableErrorFunctionMAE | ( | bool | en | ) |
void NN::enableRPROP | ( | bool | en | ) |
void NN::forwardCalculation | ( | REAL * | input | ) | [private] |
Forward calculation through the NN (with loops) Outputs are stored in m_outputs 1st derivates are stored in m_derivates
input | Input vector |
Definition at line 1037 of file nn.cpp.
01038 { 01039 int outputOffset = m_neuronsPerLayer[0] + 1, outputOffsetPrev = 0; 01040 REAL tmp0, tmp1, sum0; 01041 REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights; 01042 for ( int i=0;i<m_nrLayer;i++ ) // to all layer 01043 { 01044 int n = m_neuronsPerLayer[i+1]; 01045 int nprev = m_neuronsPerLayer[i] + 1; 01046 int loopOffset = i==0? 1 : 0; 01047 ptr0 = m_outputs + outputOffset; 01048 ptr1 = m_derivates + outputOffset; 01049 if ( i==0 ) 01050 outputPtr = input - loopOffset; 01051 else 01052 outputPtr = m_outputs + outputOffsetPrev; 01053 for ( int j=0;j<n;j++ ) // all neurons in this layer 01054 { 01055 sum0 = i==0? weightPtr[0] : 0.0; // dot product sum, for inputlayer: init with bias 01056 for ( int k=loopOffset;k<nprev;k++ ) // calc dot product 01057 sum0 += weightPtr[k] * outputPtr[k]; 01058 weightPtr += nprev; 01059 01060 if(m_activationFunctionType == 0) 01061 { 01062 // activation fkt: f(x)=tanh(x) 01063 tmp0 = tanh ( sum0 ); 01064 ptr0[j] = tmp0; 01065 ptr1[j] = ( 1.0 - tmp0*tmp0 ); 01066 } 01067 else if(m_activationFunctionType == 1) 01068 { 01069 // activation fkt: f(x)=sin(x)+0.01*x 01070 REAL piHalf = 1.570796326794897; 01071 REAL v = ptr0[j], sign = v>0.0? 1.0 : -1.0; 01072 if(v > -piHalf && v < piHalf) 01073 { 01074 ptr0[j] = sin(v) + v * 0.01; 01075 ptr1[j] = cos(v) + sign * 0.01; 01076 } 01077 else // sumWeights is outside a half periode +/-pi/2 01078 { 01079 ptr0[j] = sign + v * 0.01; 01080 ptr1[j] = sign * 0.01; 01081 } 01082 } 01083 else if(m_activationFunctionType == 2) 01084 { 01085 // activation fkt: f(x)= wenn x>0: f(x)=x^(1+tanh(v)*mul) 01086 // wenn x<0: f(x)=-(-x)^(1+tanh(-v)*mul) 01087 REAL mul = 0.5; // 0.25 : swing: [-1.5...+1.5] // 0.5 : swing: [-1.18195...+1.18195] 01088 REAL v = ptr0[j], tanhV = tanh(v), tanhVNeg = -tanhV; 01089 if(v >= 0.0) 01090 { 01091 ptr0[j] = pow(v,1.0+tanhV*mul); //pow(v,0.3); 01092 ptr1[j] = pow(v,tanhV*mul)*(1.0+tanhV*mul)+ptr0[j]*log(v)*mul*(1.0-tanhV*tanhV); 01093 if(isnan(ptr1[j]) || isinf(ptr1[j])) 01094 ptr1[j] = 1.0; 01095 } 01096 else 01097 { 01098 ptr0[j] = -pow(-v,1.0+tanhVNeg*mul); //-pow(-v,0.3); 01099 ptr1[j] = -pow(-v,tanhVNeg*mul)*(1.0+tanhVNeg*mul)*(-1.0)+ptr0[j]*log(-v)*mul*(1.0-tanhV*tanhV)*(-1.0); 01100 if(isnan(ptr1[j]) || isinf(ptr1[j])) 01101 ptr1[j] = -1.0; 01102 } 01103 } 01104 else 01105 assert(false); 01106 } 01107 outputOffset += n+1; // this points to first neuron in current layer 01108 outputOffsetPrev += nprev; // this points to first neuron in previous layer 01109 } 01110 01111 }
void NN::forwardCalculationBLAS | ( | REAL * | input | ) | [private] |
Forward calculation through the NN with BLAS and VML (MKL) Outputs are stored in m_outputs 1st derivates are stored in m_derivates
input | Input vector |
Definition at line 938 of file nn.cpp.
00939 { 00940 int outputOffset = m_neuronsPerLayer[0]+1, outputOffsetPrev = 0; 00941 REAL tmp0, tmp1, sum0; 00942 REAL *outputPtr, *ptr0, *ptr1, *weightPtr = m_weights; 00943 00944 for ( int i=0;i<m_nrLayer;i++ ) // to all layer 00945 { 00946 int n = m_neuronsPerLayer[i+1]; 00947 int nprev = m_neuronsPerLayer[i] + 1; 00948 int inputOffset = 0; 00949 ptr0 = m_outputs + outputOffset; 00950 ptr1 = m_derivates + outputOffset; 00951 if ( i==0 ) 00952 { 00953 outputPtr = input; 00954 inputOffset = 1; 00955 } 00956 else 00957 outputPtr = m_outputs + outputOffsetPrev; 00958 00959 // WeightMatrix*InputVec = Outputs 00960 CBLAS_GEMV ( CblasRowMajor, CblasNoTrans, n, nprev - inputOffset, 1.0, weightPtr + inputOffset, nprev, outputPtr, 1, 0.0, ptr0, 1 ); 00961 if ( inputOffset ) 00962 { 00963 for ( int j=0;j<n;j++ ) 00964 ptr0[j] += weightPtr[j * nprev]; 00965 } 00966 00967 if(m_activationFunctionType == 0) 00968 { 00969 // activation fkt: f(x)=tanh(x) 00970 V_TANH ( n, ptr0, ptr0 ); // m_outputs = tanh(m_outputs) 00971 V_SQR ( n, ptr0, ptr1 ); // m_derivates = tanh(m_outputs) * tanh(m_outputs) 00972 for ( int j=0;j<n;j++ ) 00973 ptr1[j] = 1.0 - ptr1[j]; 00974 } 00975 else if(m_activationFunctionType == 1) 00976 { 00977 // activation fkt: f(x)=sin(x)+0.01*x 00978 REAL piHalf = 1.570796326794897; 00979 for(int j=0;j<n;j++) 00980 { 00981 REAL v = ptr0[j], sign = v>0.0? 1.0 : -1.0; 00982 if(v > -piHalf && v < piHalf) 00983 { 00984 ptr0[j] = sin(v) + v * 0.01; 00985 ptr1[j] = cos(v) + sign * 0.01; 00986 } 00987 else // sumWeights is outside a half periode +/-pi/2 00988 { 00989 ptr0[j] = sign + v * 0.01; 00990 ptr1[j] = sign * 0.01; 00991 } 00992 } 00993 } 00994 else if(m_activationFunctionType == 2) 00995 { 00996 // activation fkt: f(x)= wenn x>0: f(x)=x^(1+tanh(v)*mul) 00997 // wenn x<0: f(x)=-(-x)^(1+tanh(-v)*mul) 00998 REAL mul = 0.5; // 0.25 : swing: [-1.5...+1.5] 00999 for(int j=0;j<n;j++) 01000 { 01001 REAL v = ptr0[j], tanhV = tanh(v), tanhVNeg = -tanhV; 01002 if(v >= 0.0) 01003 { 01004 ptr0[j] = pow(v,1.0+tanhV*mul); //pow(v,0.3); 01005 ptr1[j] = pow(v,tanhV*mul)*(1.0+tanhV*mul)+ptr0[j]*log(v)*mul*(1.0-tanhV*tanhV); 01006 if(isnan(ptr1[j]) || isinf(ptr1[j])) 01007 ptr1[j] = 1.0; 01008 } 01009 else 01010 { 01011 ptr0[j] = -pow(-v,1.0+tanhVNeg*mul); //-pow(-v,0.3); 01012 ptr1[j] = -pow(-v,tanhVNeg*mul)*(1.0+tanhVNeg*mul)*(-1.0)+ptr0[j]*log(-v)*mul*(1.0-tanhV*tanhV)*(-1.0); 01013 if(isnan(ptr1[j]) || isinf(ptr1[j])) 01014 ptr1[j] = -1.0; 01015 } 01016 } 01017 } 01018 else 01019 assert(false); 01020 01021 // update index 01022 weightPtr += n*nprev; 01023 outputOffset += n+1; // this points to first neuron in current layer 01024 outputOffsetPrev += nprev; // this points to first neuron in previous layer 01025 01026 } 01027 01028 }
int NN::getBiasIndex | ( | int | layer, | |
int | neuron | |||
) |
Get the index to the bias weight:
layer | Weight on layer | |
neuron | Neuron number |
Definition at line 509 of file nn.cpp.
00510 { 00511 if ( layer == 0 ) 00512 assert ( false ); 00513 00514 int nrNeur = m_neuronsPerLayer[layer]; 00515 int nrNeurPrev = m_neuronsPerLayer[layer-1]; 00516 if ( neuron >= nrNeur ) 00517 { 00518 cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl; 00519 assert ( false ); 00520 } 00521 int ind = m_nrLayWeightOffsets[layer]; 00522 if ( layer == 1 ) // input layer 00523 ind += neuron* ( nrNeurPrev + 1 ); 00524 else 00525 ind += nrNeurPrev + neuron* ( nrNeurPrev + 1 ); 00526 00527 if ( ind >= m_nrWeights ) 00528 { 00529 cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl; 00530 assert ( false ); 00531 } 00532 00533 return ind; 00534 }
REAL NN::getInitWeight | ( | int | fanIn | ) | [private] |
Returen a random (uniform) weight init for a given number of input connections for this neuron 1/sqrt(fanIn) - rule (from Yann LeCun)
fanIn | The number of input connections for this neuron |
Definition at line 673 of file nn.cpp.
00674 { 00675 double nr = 2.0* ( rand() / ( double ) RAND_MAX-0.5 ); // -1 .. +1 00676 return ( 1.0/sqrt ( ( double ) fanIn ) ) * nr; 00677 }
int NN::getNrWeights | ( | ) |
int NN::getOutputIndex | ( | int | layer, | |
int | neuron | |||
) |
Get the index of the output
layer | Output on layer | |
neuron | Neuron number |
Definition at line 544 of file nn.cpp.
00545 { 00546 if ( layer == 0 || layer > m_nrLayer ) 00547 assert ( false ); 00548 00549 if ( neuron >= m_neuronsPerLayer[layer] ) 00550 assert ( false ); 00551 00552 int ind = 0; 00553 for ( int i=0;i<layer;i++ ) 00554 ind += m_neuronsPerLayer[i] + 1; 00555 00556 return ind + neuron; 00557 }
REAL NN::getRMSEProbe | ( | ) |
REAL NN::getRMSETrain | ( | ) |
int NN::getWeightIndex | ( | int | layer, | |
int | neuron, | |||
int | weight | |||
) |
Get the index to the weights:
layer | Weight on layer | |
neuron | Neuron number | |
weight | Weight number |
Definition at line 468 of file nn.cpp.
00469 { 00470 if ( layer == 0 ) 00471 assert ( false ); 00472 00473 int nrNeur = m_neuronsPerLayer[layer]; 00474 int nrNeurPrev = m_neuronsPerLayer[layer-1]; 00475 if ( neuron >= nrNeur ) 00476 { 00477 cout<<"neuron:"<<neuron<<" nrNeur:"<<nrNeur<<endl; 00478 assert ( false ); 00479 } 00480 if ( weight >= nrNeurPrev ) 00481 { 00482 cout<<"weight:"<<weight<<" nrNeurPrev:"<<nrNeurPrev<<endl; 00483 assert ( false ); 00484 } 00485 00486 int ind = m_nrLayWeightOffsets[layer]; 00487 if ( layer == 1 ) // input layer 00488 ind += 1 + weight + neuron* ( nrNeurPrev + 1 ); 00489 else 00490 ind += weight + neuron* ( nrNeurPrev + 1 ); 00491 00492 if ( ind >= m_nrWeights ) 00493 { 00494 cout<<"ind:"<<ind<<" m_nrWeights:"<<m_nrWeights<<endl; 00495 assert ( false ); 00496 } 00497 00498 return ind; 00499 }
REAL * NN::getWeightPtr | ( | ) |
void NN::initNNWeights | ( | time_t | seed | ) |
Init the whole weights in the net
seed | The random seed (same seed for exact same weight initalization) |
Definition at line 684 of file nn.cpp.
00685 { 00686 srand ( seed ); 00687 cout<<"init weights "; 00688 REAL factor = m_initWeightFactor; 00689 int cnt = 0; 00690 for ( int i=0;i<m_nrLayer;i++ ) // through all layers 00691 { 00692 int n = m_neuronsPerLayer[i+1]; 00693 int nprev = m_neuronsPerLayer[i] + 1; // +1 for bias 00694 for ( int j=0;j<n;j++ ) // all neurons per layer 00695 { 00696 for ( int k=0;k<nprev;k++ ) // all weights from this neuron 00697 { 00698 m_weights[cnt] = m_weightsOld[i] = m_weightsOldOld[i] = getInitWeight ( nprev ) * factor; 00699 cnt++; 00700 } 00701 } 00702 } 00703 00704 // check the number 00705 if ( cnt != m_nrWeights ) 00706 assert ( false ); 00707 }
void NN::predictSingleInput | ( | REAL * | input, | |
REAL * | output | |||
) |
Predict the output based on a input vector with the Neural Net The actual m_weights are used to perform forward calculation through the net
input | Input vector (pointer) | |
output | Output vector (pointer) |
Definition at line 1377 of file nn.cpp.
01378 { 01379 REAL* inputPtr = input; 01380 01381 // forward 01382 if ( m_useBLAS ) 01383 forwardCalculationBLAS ( inputPtr ); 01384 else 01385 forwardCalculation ( inputPtr ); 01386 01387 // output correction 01388 REAL* outputPtr = m_outputs + m_nrOutputs - m_nrTargets - 1; 01389 for ( int i=0;i<m_nrTargets;i++ ) 01390 output[i] = outputPtr[i] * m_scaleOutputs + m_offsetOutputs; 01391 }
void NN::printLearnrate | ( | ) |
void NN::saveWeights | ( | ) | [private] |
void NN::setActivationFunctionType | ( | int | type | ) |
Set the type of activation function in all layers
type | 0=tanh, 1=sin |
Definition at line 438 of file nn.cpp.
00439 { 00440 if(type==0) 00441 { 00442 cout<<"activationFunctionType: tanh"<<endl; 00443 m_activationFunctionType = 0; 00444 } 00445 else if(type==1) 00446 { 00447 cout<<"activationFunctionType: sin"<<endl; 00448 m_activationFunctionType = 1; 00449 } 00450 else if(type==2) 00451 { 00452 cout<<"activationFunctionType: tanhMod0"<<endl; 00453 m_activationFunctionType = 2; 00454 } 00455 else 00456 assert(false); 00457 }
void NN::setBatchSize | ( | int | size | ) |
void NN::setGlobalEpochs | ( | int | e | ) |
void NN::setInitWeightFactor | ( | REAL | factor | ) |
void NN::setL1Regularization | ( | bool | en | ) |
void NN::setLearnrate | ( | REAL | learnrate | ) |
void NN::setLearnrateMinimum | ( | REAL | learnrateMin | ) |
void NN::setLearnrateSubtractionValueAfterEveryEpoch | ( | REAL | learnrateDecreaseRate | ) |
Set the subtraction value per train epoch of the learning rate
learnrateDecreaseRate | The learnrate is subtracted by this value every train epoch |
Definition at line 273 of file nn.cpp.
00274 { 00275 m_learnrateDecreaseRateEpoch = learnrateDecreaseRate; 00276 cout<<"learnrateDecreaseRateEpoch: "<<m_learnrateDecreaseRateEpoch<<endl; 00277 }
void NN::setLearnrateSubtractionValueAfterEverySample | ( | REAL | learnrateDecreaseRate | ) |
Set the subtraction value per train example of the learning rate
learnrateDecreaseRate | The learnrate is subtracted by this value every train example |
Definition at line 261 of file nn.cpp.
00262 { 00263 m_learnrateDecreaseRate = learnrateDecreaseRate; 00264 cout<<"learnrateDecreaseRate: "<<m_learnrateDecreaseRate<<endl; 00265 }
void NN::setMaxEpochs | ( | int | epochs | ) |
void NN::setMinUpdateErrorBound | ( | REAL | minUpdateBound | ) |
Set the minimal different between two succesive training epoch until the training breaks
minUpdateBound | The min. rmse update until training breaks |
Definition at line 318 of file nn.cpp.
00319 { 00320 m_minUpdateBound = minUpdateBound; 00321 cout<<"minUpdateBound: "<<m_minUpdateBound<<endl; 00322 }
void NN::setMomentum | ( | REAL | momentum | ) |
void NN::setNNStructure | ( | int | nrLayer, | |
int * | neuronsPerLayer | |||
) |
Set the inner structure: layers and how many neurons per layer
nrLayer | Number of layers (2=one hidden layer, 3=2 hidden layer, 1=only output layer) | |
neuronsPerLayer | Integer pointer to the number of neurons per layer |
Definition at line 565 of file nn.cpp.
00566 { 00567 m_nrLayer = nrLayer; 00568 cout<<"nrLayer: "<<m_nrLayer<<endl; 00569 00570 cout<<"#layers: "<<m_nrLayer<<" ("<< ( m_nrLayer-1 ) <<" hidden layer, 1 output layer)"<<endl; 00571 00572 // alloc space for structure variables 00573 m_neuronsPerLayer = new int[m_nrLayer+1]; 00574 m_neuronsPerLayer[0] = m_nrInputs; // number of inputs 00575 for ( int i=0;i<m_nrLayer-1;i++ ) 00576 m_neuronsPerLayer[1+i] = neuronsPerLayer[i]; 00577 m_neuronsPerLayer[m_nrLayer] = m_nrTargets; // one output 00578 00579 cout<<"Neurons per Layer: "; 00580 for ( int i=0;i<m_nrLayer+1;i++ ) 00581 cout<<m_neuronsPerLayer[i]<<" "; 00582 cout<<endl; 00583 00584 cout<<"Outputs per Layer: "; 00585 for ( int i=0;i<m_nrLayer+1;i++ ) 00586 cout<<m_neuronsPerLayer[i]+1<<" "; 00587 cout<<endl; 00588 00589 cout<<"OutOffsets per Layer: "; 00590 int cnt=0; 00591 for ( int i=0;i<m_nrLayer+1;i++ ) 00592 { 00593 cout<<cnt<<" "; 00594 cnt += m_neuronsPerLayer[i]+1; 00595 } 00596 cout<<endl; 00597 00598 // init the total number of weights and outputs 00599 m_nrWeights = 0; 00600 m_nrOutputs = m_neuronsPerLayer[0] + 1; 00601 m_nrLayWeights = new int[m_nrLayer+1]; 00602 m_nrLayWeightOffsets = new int[m_nrLayer+2]; 00603 m_nrLayWeights[0] = 0; 00604 for ( int i=0;i<m_nrLayer;i++ ) 00605 { 00606 m_nrLayWeights[i+1] = m_neuronsPerLayer[i+1] * ( m_neuronsPerLayer[i]+1 ); // +1 for input bias 00607 m_nrWeights += m_nrLayWeights[i+1]; 00608 m_nrOutputs += m_neuronsPerLayer[i+1] + 1; // +1 for input bias 00609 } 00610 00611 // print it 00612 cout<<"Weights per Layer: "; 00613 for ( int i=0;i<m_nrLayer+1;i++ ) 00614 cout<<m_nrLayWeights[i]<<" "; 00615 cout<<endl; 00616 00617 cout<<"WeightOffsets per Layer: "; 00618 m_nrLayWeightOffsets[0] = 0; 00619 for ( int i=0;i<m_nrLayer+1;i++ ) 00620 { 00621 cout<<m_nrLayWeightOffsets[i]<<" "; 00622 m_nrLayWeightOffsets[i+1] = m_nrLayWeightOffsets[i] + m_nrLayWeights[i]; 00623 } 00624 cout<<endl; 00625 00626 cout<<"nrOutputs="<<m_nrOutputs<<" nrWeights="<<m_nrWeights<<endl; 00627 00628 // allocate the inner calculation structure 00629 m_outputs = new REAL[m_nrOutputs]; 00630 m_outputsTmp = new REAL[m_nrTargets]; 00631 m_derivates = new REAL[m_nrOutputs]; 00632 m_d1 = new REAL[m_nrOutputs]; 00633 00634 for ( int i=0;i<m_nrOutputs;i++ ) // init as biases 00635 { 00636 m_outputs[i] = 1.0; 00637 m_derivates[i] = 0.0; 00638 m_d1[i] = 0.0; 00639 } 00640 00641 // allocate weights and temp vars 00642 m_weights = new REAL[m_nrWeights]; 00643 m_weightsTmp0 = new REAL[m_nrWeights]; 00644 m_weightsTmp1 = new REAL[m_nrWeights]; 00645 m_weightsTmp2 = new REAL[m_nrWeights]; 00646 m_weightsBatchUpdate = new REAL[m_nrWeights]; 00647 m_weightsOld = new REAL[m_nrWeights]; 00648 m_weightsOldOld = new REAL[m_nrWeights]; 00649 m_deltaW = new REAL[m_nrWeights]; 00650 00651 m_deltaWOld = new REAL[m_nrWeights]; 00652 m_adaptiveRPROPlRate = new REAL[m_nrWeights]; 00653 for ( int i=0;i<m_nrWeights;i++ ) 00654 { 00655 m_deltaWOld[i] = 0.0; 00656 m_adaptiveRPROPlRate[i] = m_learnRate; 00657 } 00658 for ( int i=0;i<m_nrWeights;i++ ) 00659 m_weights[i] = m_weightsOld[i] = m_deltaW[i] = m_weightsTmp0[i] = m_weightsTmp1[i] = m_weightsTmp2[i] = 0.0; 00660 00661 // this should be implemented (LeCun suggest such a linear factor in the activation function) 00662 //m_linFac = 0.01; 00663 //cout<<"linFac="<<m_linFac<<" (no active, just tanh used)"<<endl; 00664 }
void NN::setNormalTrainStopping | ( | bool | en | ) |
Set the train stop criteria en=0: training stops at maxEpochs en=1: training stops at maxEpochs or probe error rises or probe error is to small
en | Train stop criteria (0 is used for retraining) |
Definition at line 389 of file nn.cpp.
00390 { 00391 m_normalTrainStopping = en; 00392 cout<<"normalTrainStopping: "<<m_normalTrainStopping<<endl; 00393 }
void NN::setNrExamplesProbe | ( | int | n | ) |
void NN::setNrExamplesTrain | ( | int | n | ) |
void NN::setNrInputs | ( | int | n | ) |
void NN::setNrTargets | ( | int | n | ) |
void NN::setProbeInputs | ( | REAL * | inputs | ) |
void NN::setProbeTargets | ( | REAL * | targets | ) |
void NN::setRPROPMinMaxUpdate | ( | REAL | min, | |
REAL | max | |||
) |
Set the min. and max. update values for the sign update in RPROP Weights updates can never be larger as max. and smaller as min.
min | Min. weight update value | |
max | Max. weight update value |
Definition at line 360 of file nn.cpp.
00361 { 00362 m_RPROP_updateMin = min; 00363 m_RPROP_updateMax = max; 00364 cout<<"RPROP_updateMin: "<<m_RPROP_updateMin<<" RPROP_updateMax: "<<m_RPROP_updateMax<<endl; 00365 }
void NN::setRPROPPosNeg | ( | REAL | etaPos, | |
REAL | etaNeg | |||
) |
Set the etaNeg and etaPos parameters in the RPROP learning algorithm
Learnrate adaption: adaptiveRPROPlRate = { if (dE/dW_old * dE/dW)>0 then adaptiveRPROPlRate*RPROP_etaPos if (dE/dW_old * dE/dW)<0 then adaptiveRPROPlRate*RPROP_etaNeg if (dE/dW_old * dE/dW)=0 then adaptiveRPROPlRate }
etaPos | etaPos parameter | |
etaNeg | etaNeg parameter |
Definition at line 346 of file nn.cpp.
00347 { 00348 m_RPROP_etaPos = etaPos; 00349 m_RPROP_etaNeg = etaNeg; 00350 cout<<"RPROP_etaPos: "<<m_RPROP_etaPos<<" RPROP_etaNeg: "<<m_RPROP_etaNeg<<endl; 00351 }
void NN::setScaleOffset | ( | REAL | scale, | |
REAL | offset | |||
) |
Set the scale and offset of the output of the NN targets transformation: target = (targetOld - offset) / scale outputs transformation: output = outputNN * scale + offset
scale | Output scaling | |
offset | Output offset |
Definition at line 375 of file nn.cpp.
00376 { 00377 m_scaleOutputs = scale; 00378 m_offsetOutputs = offset; 00379 cout<<"scaleOutputs: "<<m_scaleOutputs<<" offsetOutputs: "<<m_offsetOutputs<<" [transformation: output = outputNN * scale + offset]"<<endl; 00380 }
void NN::setTrainInputs | ( | REAL * | inputs | ) |
void NN::setTrainTargets | ( | REAL * | targets | ) |
void NN::setWeightDecay | ( | REAL | weightDecay | ) |
void NN::setWeights | ( | REAL * | w | ) |
int NN::trainNN | ( | ) |
Train the whole Neural Network until break criteria is reached This method call trainOneEpoch() to train one epoch.
Definition at line 1119 of file nn.cpp.
01120 { 01121 cout<<"Train the NN with "<<m_nrExamplesTrain<<" samples"<<endl; 01122 double rmseMin = 1e10, lastUpdate = 1e10, rmseProbeOld = 1e10, rmseTrain = 1e10, rmseProbe = 1e10; 01123 time_t t0 = time ( 0 ); 01124 while ( 1 ) 01125 { 01126 rmseProbeOld = rmseProbe; 01127 rmseTrain = getRMSETrain(); 01128 rmseProbe = getRMSEProbe(); 01129 lastUpdate = rmseProbeOld - rmseProbe; 01130 01131 cout<<"e:"<<m_globalEpochs<<" rmseTrain:"<<rmseTrain<<" rmseProbe:"<<rmseProbe<<" "<<flush; 01132 01133 if ( m_normalTrainStopping ) 01134 { 01135 if ( rmseProbe < rmseMin ) 01136 { 01137 rmseMin = rmseProbe; 01138 saveWeights(); 01139 } 01140 else 01141 { 01142 cout<<"rmse rises."<<endl; 01143 return m_globalEpochs; 01144 } 01145 if ( m_minUpdateBound > fabs ( lastUpdate ) ) 01146 { 01147 cout<<"min update too small (<"<<m_minUpdateBound<<")."<<endl; 01148 return m_globalEpochs; 01149 } 01150 } 01151 if ( m_maxEpochs == m_globalEpochs ) 01152 { 01153 cout<<"max epochs reached."<<endl; 01154 return m_globalEpochs; 01155 } 01156 01157 trainOneEpoch(); 01158 01159 cout<<"lRate:"<<m_learnRate<<" "; 01160 cout<<time ( 0 )-t0<<"[s]"<<endl; 01161 t0 = time ( 0 ); 01162 } 01163 return -1; 01164 }
void NN::trainOneEpoch | ( | ) |
Train the whole Neural Network one epoch through the trainset with gradient decent
Definition at line 1170 of file nn.cpp.
01171 { 01172 int batchCnt = 0; 01173 V_ZERO ( m_weightsBatchUpdate, m_nrWeights ); 01174 m_sumSquaredError = 0.0; 01175 m_sumSquaredErrorSamples = 0.0; 01176 01177 for ( int i=0;i<m_nrExamplesTrain;i++ ) 01178 { 01179 REAL* inputPtr = m_inputsTrain + i * m_nrInputs; 01180 REAL* targetPtr = m_targetsTrain + i * m_nrTargets; 01181 01182 // forward 01183 if ( m_useBLAS ) 01184 forwardCalculationBLAS ( inputPtr ); 01185 else 01186 forwardCalculation ( inputPtr ); 01187 01188 // backward: calc weight update 01189 if ( m_useBLAS ) 01190 backpropBLAS ( inputPtr, targetPtr ); 01191 else 01192 backprop ( inputPtr, targetPtr ); 01193 01194 // accumulate the weight updates 01195 if ( m_batchSize > 1 ) 01196 V_ADD ( m_nrWeights, m_deltaW, m_weightsBatchUpdate, m_weightsBatchUpdate ); 01197 01198 batchCnt++; 01199 01200 // if batch size is reached, or the last element in training list 01201 if ( batchCnt >= m_batchSize || i == m_nrExamplesTrain - 1 ) 01202 { 01203 // batch init 01204 batchCnt = 0; 01205 if ( m_batchSize > 1 ) 01206 { 01207 V_COPY ( m_weightsBatchUpdate, m_deltaW, m_nrWeights ); // deltaW = weightsBatchUpdate 01208 V_ZERO ( m_weightsBatchUpdate, m_nrWeights ); 01209 } 01210 01211 if ( m_enableRPROP ) 01212 { 01213 // weight update: 01214 // deltaW = { if dE/dW>0 then -adaptiveRPROPlRate 01215 // if dE/dW<0 then +adaptiveRPROPlRate 01216 // if dE/dW=0 then 0 } 01217 // learnrate adaption: 01218 // adaptiveRPROPlRate = { if (dE/dW_old * dE/dW)>0 then adaptiveRPROPlRate*RPROP_etaPos 01219 // if (dE/dW_old * dE/dW)<0 then adaptiveRPROPlRate*RPROP_etaNeg 01220 // if (dE/dW_old * dE/dW)=0 then adaptiveRPROPlRate } 01221 REAL dW, dWOld, sign, update, prod; 01222 for ( int j=0;j<m_nrWeights;j++ ) 01223 { 01224 dW = m_deltaW[j]; 01225 dWOld = m_deltaWOld[j]; 01226 prod = dW * dWOld; 01227 sign = dW > 0.0? 1.0 : -1.0; 01228 if ( prod > 0.0 ) 01229 { 01230 m_adaptiveRPROPlRate[j] *= m_RPROP_etaPos; 01231 if ( m_adaptiveRPROPlRate[j] > m_RPROP_updateMax ) 01232 m_adaptiveRPROPlRate[j] = m_RPROP_updateMax; 01233 update = sign * m_adaptiveRPROPlRate[j]; 01234 m_weights[j] -= update + m_weightDecay * m_weights[j]; // weight update and weight decay 01235 m_deltaWOld[j] = dW; 01236 } 01237 else if ( prod < 0.0 ) 01238 { 01239 m_adaptiveRPROPlRate[j] *= m_RPROP_etaNeg; 01240 if ( m_adaptiveRPROPlRate[j] < m_RPROP_updateMin ) 01241 m_adaptiveRPROPlRate[j] = m_RPROP_updateMin; 01242 m_deltaWOld[j] = 0.0; 01243 } 01244 else // prod == 0.0 01245 { 01246 update = sign * m_adaptiveRPROPlRate[j]; 01247 m_weights[j] -= update + m_weightDecay * m_weights[j]; // weight update and weight decay 01248 m_deltaWOld[j] = dW; 01249 } 01250 } 01251 } 01252 else // stochastic gradient decent (batch-size: m_batchSize) 01253 { 01254 //=========== slower stochastic updates (without vector libraries) =========== 01255 // update weights + weight decay 01256 // formula: weights -= eta * (dE(w)/dw + lambda * w) 01257 //if(m_momentum > 0.0) 01258 //{ 01259 // for(int j=0;j<m_nrWeights;j++) 01260 // m_weightsOldOld[j] = m_weightsOld[j]; 01261 // for(int j=0;j<m_nrWeights;j++) 01262 // m_weightsOld[j] = m_weights[j]; 01263 //} 01264 // 01265 //if(m_momentum > 0.0) 01266 //{ 01267 // for(int j=0;j<m_nrWeights;j++) 01268 // { 01269 // m_weightsTmp0[j] = (1.0 - m_momentum)*m_weightsTmp0[j] + m_momentum*m_weightsTmp1[j]; 01270 // m_weightsTmp1[j] = m_weightsTmp0[j]; 01271 // m_weights[j] -= m_weightsTmp0[j]; 01272 // } 01273 //} 01274 //for(int j=0;j<m_nrWeights;j++) 01275 // m_weights[j] -= m_learnRate * (m_deltaW[j] + m_weightDecay * m_weights[j]); 01276 01277 01278 // update weights + weight decay(L2 reg.) 01279 // formula: weights = weights - eta * (dE(w)/dw + lambda * weights) 01280 // weights = weights - (eta*deltaW + eta*lambda*weights) 01281 V_MULC ( m_deltaW, m_learnRate, m_weightsTmp0, m_nrWeights ); // tmp0 = learnrate * deltaW 01282 01283 // if weight decay enabled 01284 if ( m_weightDecay > 0.0 ) 01285 { 01286 if ( m_enableL1Regularization ) 01287 { 01288 // update weights + L1 reg. 01289 // formula: weights = weights - eta * (dE(w)/dw + lambda*sign(w)) 01290 // weights = weights - (eta*deltaW + eta*lambda*sign(w)) 01291 for ( int j=0;j<m_nrWeights;j++ ) 01292 m_weightsTmp2[j] = 1.0; 01293 for ( int j=0;j<m_nrWeights;j++ ) 01294 if ( m_weights[j]<0.0 ) 01295 m_weightsTmp2[j] = -1.0; 01296 //REAL c = m_weightDecay * m_learnRate; 01297 //for(int j=0;j<m_nrWeights;j++) 01298 // m_weightsTmp0[j] += c * m_weightsTmp2[j]; 01299 CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weightsTmp2, 1, m_weightsTmp0, 1 ); // tmp0 = reg*learnrate*weights+tmp0 01300 } 01301 else 01302 //saxpy(n, a, x, incx, y, incy) 01303 //y := a*x + y 01304 CBLAS_AXPY ( m_nrWeights, m_weightDecay * m_learnRate, m_weights, 1, m_weightsTmp0, 1 ); // tmp0 = reg*learnrate*weights+tmp0 01305 } 01306 01307 // if momentum is used 01308 if ( m_momentum > 0.0 ) 01309 { 01310 V_MULC ( m_weightsTmp0, 1.0 - m_momentum, m_weightsTmp0, m_nrWeights ); // tmp0 = tmp0 * (1 - momentum) [actual update] 01311 V_MULC ( m_weightsTmp1, m_momentum, m_weightsTmp1, m_nrWeights ); // tmp1 = tmp1 * momentum [last update] 01312 01313 // sum updates 01314 V_ADD ( m_nrWeights, m_weightsTmp0, m_weightsTmp1, m_weightsTmp0 ); // tmp0 = tmp0 + tmp1 01315 01316 V_COPY ( m_weightsTmp0, m_weightsTmp1, m_nrWeights ); // tmp1 = tmp0 01317 } 01318 01319 // standard weight update in the NN 01320 V_SUB ( m_nrWeights, m_weights, m_weightsTmp0, m_weights ); // weights = weights - tmp0 01321 } 01322 } 01323 01324 // make the learnrate smaller (per sample) 01325 m_learnRate -= m_learnrateDecreaseRate; 01326 if ( m_learnRate < m_learnRateMin ) 01327 m_learnRate = m_learnRateMin; 01328 01329 } 01330 01331 // make the learnrate smaller (per epoch) 01332 m_learnRate -= m_learnrateDecreaseRateEpoch; 01333 if ( m_learnRate < m_learnRateMin ) 01334 m_learnRate = m_learnRateMin; 01335 01336 // epoch counter 01337 m_globalEpochs++; 01338 01339 }
void NN::useBLASforTraining | ( | bool | enable | ) |