NeuralNetwork.cpp

00001 #include "NeuralNetwork.h"
00002 
00003 extern StreamOutput cout;
00004 
00008 NeuralNetwork::NeuralNetwork()
00009 {
00010     cout<<"NeuralNetwork"<<endl;
00011     // init member vars
00012     m_inputs = 0;
00013     m_nn = 0;
00014     m_epoch = 0;
00015     m_nrLayer = 0;
00016     m_batchSize = 0;
00017     m_offsetOutputs = 0;
00018     m_scaleOutputs = 0;
00019     m_initWeightFactor = 0;
00020     m_learnrate = 0;
00021     m_learnrateMinimum = 0;
00022     m_learnrateSubtractionValueAfterEverySample = 0;
00023     m_learnrateSubtractionValueAfterEveryEpoch = 0;
00024     m_momentum = 0;
00025     m_weightDecay = 0;
00026     m_minUpdateErrorBound = 0;
00027     m_etaPosRPROP = 0;
00028     m_etaNegRPROP = 0;
00029     m_minUpdateRPROP = 0;
00030     m_maxUpdateRPROP = 0;
00031     m_enableRPROP = 0;
00032     m_useBLASforTraining = 0;
00033     m_enableL1Regularization = 0;
00034     m_enableErrorFunctionMAE = 0;
00035 }
00036 
00040 NeuralNetwork::~NeuralNetwork()
00041 {
00042     cout<<"descructor NeuralNetwork"<<endl;
00043 
00044     for ( int i=0;i<m_nCross+1;i++ )
00045     {
00046         if ( m_nn[i] )
00047             delete m_nn[i];
00048         m_nn[i] = 0;
00049     }
00050     delete[] m_nn;
00051 
00052 }
00053 
00058 void NeuralNetwork::readSpecificMaps()
00059 {
00060     cout<<"Read specific maps"<<endl;
00061 
00062     // read dsc vars
00063     m_nrLayer = m_intMap["nrLayer"];
00064     m_batchSize = m_intMap["batchSize"];
00065     m_offsetOutputs = m_doubleMap["offsetOutputs"];
00066     m_scaleOutputs = m_doubleMap["scaleOutputs"];
00067     m_initWeightFactor = m_doubleMap["initWeightFactor"];
00068     m_learnrate = m_doubleMap["learnrate"];
00069     m_learnrateMinimum = m_doubleMap["learnrateMinimum"];
00070     m_learnrateSubtractionValueAfterEverySample = m_doubleMap["learnrateSubtractionValueAfterEverySample"];
00071     m_learnrateSubtractionValueAfterEveryEpoch = m_doubleMap["learnrateSubtractionValueAfterEveryEpoch"];
00072     m_momentum = m_doubleMap["momentum"];
00073     m_weightDecay = m_doubleMap["weightDecay"];
00074     m_minUpdateErrorBound = m_doubleMap["minUpdateErrorBound"];
00075     m_etaPosRPROP = m_doubleMap["etaPosRPROP"];
00076     m_etaNegRPROP = m_doubleMap["etaNegRPROP"];
00077     m_minUpdateRPROP = m_doubleMap["minUpdateRPROP"];
00078     m_maxUpdateRPROP = m_doubleMap["maxUpdateRPROP"];
00079     m_enableL1Regularization = m_boolMap["enableL1Regularization"];
00080     m_enableErrorFunctionMAE = m_boolMap["enableErrorFunctionMAE"];
00081     m_enableRPROP = m_boolMap["enableRPROP"];
00082     m_useBLASforTraining = m_boolMap["useBLASforTraining"];
00083     m_neuronsPerLayer = m_stringMap["neuronsPerLayer"];
00084     m_activationFunction = m_stringMap["activationFunction"];
00085 }
00086 
00092 void NeuralNetwork::demo()
00093 {
00094     int inputs = 784, targets = 10, exampleTrain = 60000, exampleTest = 100;
00095     REAL* trainInput = new REAL[inputs * exampleTrain];
00096     REAL* testInput = new REAL[inputs * exampleTest];
00097     REAL* trainTarget = new REAL[targets * exampleTrain];
00098     REAL* testTarget = new REAL[targets * exampleTest];
00099 
00100     for ( int i=0;i<inputs*exampleTrain;i++ )
00101         trainInput[i] = ( double ) rand() / ( double ) RAND_MAX - 0.5;
00102     for ( int i=0;i<inputs*exampleTest;i++ )
00103         testInput[i] = ( double ) rand() / ( double ) RAND_MAX - 0.5;
00104     for ( int i=0;i<targets*exampleTrain;i++ )
00105         trainTarget[i] = ( double ) rand() / ( double ) RAND_MAX - 0.5;
00106     for ( int i=0;i<targets*exampleTest;i++ )
00107         testTarget[i] = ( double ) rand() / ( double ) RAND_MAX - 0.5;
00108 
00109     NN nn;
00110     nn.setNrTargets ( targets );
00111     nn.setNrInputs ( inputs );
00112     nn.setNrExamplesTrain ( exampleTrain );
00113     nn.setNrExamplesProbe ( exampleTest );
00114     nn.setTrainInputs ( trainInput );
00115     nn.setTrainTargets ( trainTarget );
00116     nn.setProbeInputs ( testInput );
00117     nn.setProbeTargets ( testTarget );
00118 
00119     // learn parameters
00120     nn.setInitWeightFactor ( m_initWeightFactor );
00121     nn.setLearnrate ( m_learnrate );
00122     nn.setLearnrateMinimum ( m_learnrateMinimum );
00123     nn.setLearnrateSubtractionValueAfterEverySample ( m_learnrateSubtractionValueAfterEverySample );
00124     nn.setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateSubtractionValueAfterEveryEpoch );
00125     nn.setMomentum ( m_momentum );
00126     nn.setWeightDecay ( m_weightDecay );
00127     nn.setMinUpdateErrorBound ( m_minUpdateErrorBound );
00128     nn.setBatchSize ( m_batchSize );
00129     nn.setMaxEpochs ( m_maxTuninigEpochs );
00130     nn.setL1Regularization ( m_enableL1Regularization );
00131     nn.enableErrorFunctionMAE ( m_enableErrorFunctionMAE );
00132 
00133     // set net inner stucture
00134     int nrLayer = m_nrLayer;
00135     int* neuronsPerLayer = Data::splitStringToIntegerList ( m_neuronsPerLayer, ',' );
00136     nn.enableRPROP ( m_enableRPROP );
00137     nn.setNNStructure ( nrLayer, neuronsPerLayer );
00138     nn.useBLASforTraining ( m_useBLASforTraining );
00139     nn.setScaleOffset ( 1.0, 0.0 );
00140     nn.setRPROPPosNeg ( m_etaPosRPROP, m_etaNegRPROP );
00141     nn.setRPROPMinMaxUpdate ( m_minUpdateRPROP, m_maxUpdateRPROP );
00142     nn.setNormalTrainStopping ( true );
00143     nn.initNNWeights ( 0 );
00144     nn.setActivationFunctionType( 0 );
00145     delete[] neuronsPerLayer;
00146 
00147     // training
00148     nn.trainNN();
00149 
00150     delete[] trainInput;
00151     delete[] testInput;
00152     delete[] trainTarget;
00153     delete[] testTarget;
00154 }
00155 
00160 void NeuralNetwork::modelInit()
00161 {
00162     // demonstration training on random data
00163     //demo();
00164 
00165     // add tunable parameters
00166     m_epoch = 0;
00167     paramEpochValues.push_back ( &m_epoch );
00168     paramEpochNames.push_back ( "epoch" );
00169 
00170 
00171     // set up NNs
00172     // nCross + 1 (for retraining)
00173     if ( m_nn == 0 )
00174     {
00175         m_nn = new NN*[m_nCross+1];
00176         for ( int i=0;i<m_nCross+1;i++ )
00177             m_nn[i] = 0;
00178     }
00179     for ( int i=0;i<m_nCross+1;i++ )
00180     {
00181         cout<<"Create a Neural Network ("<<i+1<<"/"<<m_nCross+1<<")"<<endl;
00182         if ( m_nn[i] == 0 )
00183             m_nn[i] = new NN();
00184         m_nn[i]->setNrTargets ( m_nClass*m_nDomain );
00185         m_nn[i]->setNrInputs ( m_nFeatures );
00186         m_nn[i]->setNrExamplesTrain ( 0 );
00187         m_nn[i]->setNrExamplesProbe ( 0 );
00188         m_nn[i]->setTrainInputs ( 0 );
00189         m_nn[i]->setTrainTargets ( 0 );
00190         m_nn[i]->setProbeInputs ( 0 );
00191         m_nn[i]->setProbeTargets ( 0 );
00192         m_nn[i]->setGlobalEpochs ( 0 );
00193 
00194         // learn parameters
00195         m_nn[i]->setInitWeightFactor ( m_initWeightFactor );
00196         m_nn[i]->setLearnrate ( m_learnrate );
00197         m_nn[i]->setLearnrateMinimum ( m_learnrateMinimum );
00198         m_nn[i]->setLearnrateSubtractionValueAfterEverySample ( m_learnrateSubtractionValueAfterEverySample );
00199         m_nn[i]->setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateSubtractionValueAfterEveryEpoch );
00200         m_nn[i]->setMomentum ( m_momentum );
00201         m_nn[i]->setWeightDecay ( m_weightDecay );
00202         m_nn[i]->setMinUpdateErrorBound ( m_minUpdateErrorBound );
00203         m_nn[i]->setBatchSize ( m_batchSize );
00204         m_nn[i]->setMaxEpochs ( m_maxTuninigEpochs );
00205         m_nn[i]->setL1Regularization ( m_enableL1Regularization );
00206         m_nn[i]->enableErrorFunctionMAE ( m_enableErrorFunctionMAE );
00207         if(m_activationFunction=="tanh")
00208             m_nn[i]->setActivationFunctionType(0);
00209         else if(m_activationFunction=="sin")
00210             m_nn[i]->setActivationFunctionType(1);
00211         else if(m_activationFunction=="tanhMod0")
00212             m_nn[i]->setActivationFunctionType(2);
00213         else
00214             assert(false);
00215         // set net inner stucture
00216         int nrLayer = m_nrLayer;
00217         int* neuronsPerLayer = Data::splitStringToIntegerList ( m_neuronsPerLayer, ',' );
00218         m_nn[i]->enableRPROP ( m_enableRPROP );
00219         m_nn[i]->setNNStructure ( nrLayer, neuronsPerLayer );
00220 
00221         m_nn[i]->setScaleOffset ( m_scaleOutputs, m_offsetOutputs );
00222         m_nn[i]->setRPROPPosNeg ( m_etaPosRPROP, m_etaNegRPROP );
00223         m_nn[i]->setRPROPMinMaxUpdate ( m_minUpdateRPROP, m_maxUpdateRPROP );
00224         m_nn[i]->setNormalTrainStopping ( true );
00225         m_nn[i]->useBLASforTraining ( m_useBLASforTraining );
00226         m_nn[i]->initNNWeights ( m_randSeed );
00227         delete[] neuronsPerLayer;
00228 
00229         cout<<endl<<endl;
00230     }
00231 }
00232 
00241 void NeuralNetwork::predictAllOutputs ( REAL* rawInputs, REAL* outputs, uint nSamples, uint crossRun )
00242 {
00243     // predict all samples
00244     for ( int i=0;i<nSamples;i++ )
00245         m_nn[crossRun]->predictSingleInput ( rawInputs + i*m_nFeatures, outputs + i*m_nClass*m_nDomain );
00246 }
00247 
00257 void NeuralNetwork::modelUpdate ( REAL* input, REAL* target, uint nSamples, uint crossRun )
00258 {
00259     m_nn[crossRun]->setTrainInputs ( input );
00260     m_nn[crossRun]->setTrainTargets ( target );
00261     m_nn[crossRun]->setNrExamplesTrain ( nSamples );
00262 
00263     if ( crossRun < m_nCross || (crossRun==0 && m_validationType=="ValidationSet") )
00264     {
00265         // one gradient descent step (one epoch)
00266         m_nn[crossRun]->trainOneEpoch();
00267 
00268         stringstream s;
00269         s<<"[t:"<<sqrt ( m_nn[crossRun]->m_sumSquaredError/ ( double ) m_nn[crossRun]->m_sumSquaredErrorSamples ) <<"]";
00270         cout<<s.str();
00271         if ( crossRun == m_nCross - 1 || (crossRun==0 && m_validationType=="ValidationSet") )
00272             m_nn[crossRun]->printLearnrate();
00273     }
00274     else
00275     {
00276         // retraining with fix number of epochs
00277         cout<<endl<<"Tune: Training of full trainset "<<endl;
00278         m_nn[crossRun]->setNormalTrainStopping ( false );
00279         int maxEpochs = m_epochParamBest[0];
00280         if ( maxEpochs == 0 )
00281             maxEpochs = 1;  // train at least one epoch
00282         cout<<"Best #epochs (on cross validation): "<<maxEpochs<<endl;
00283         m_nn[crossRun]->setMaxEpochs ( maxEpochs );
00284 
00285         // train the net
00286         int epochs = m_nn[crossRun]->trainNN();
00287         cout<<endl;
00288     }
00289 }
00290 
00295 void NeuralNetwork::saveWeights ( int cross )
00296 {
00297     char buf[1024];
00298     sprintf ( buf,"%02d",cross );
00299     string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00300     if ( m_inRetraining )
00301         cout<<"Save:"<<name<<endl;
00302     int n = m_nn[cross]->getNrWeights();
00303     REAL* w = m_nn[cross]->getWeightPtr();
00304 
00305     fstream f ( name.c_str(), ios::out );
00306     f.write ( ( char* ) &m_nTrain, sizeof ( int ) );
00307     f.write ( ( char* ) &m_nFeatures, sizeof ( int ) );
00308     f.write ( ( char* ) &m_nClass, sizeof ( int ) );
00309     f.write ( ( char* ) &m_nDomain, sizeof ( int ) );
00310     f.write ( ( char* ) &n, sizeof ( int ) );
00311     f.write ( ( char* ) w, sizeof ( REAL ) *n );
00312     f.write ( ( char* ) &m_maxSwing, sizeof ( double ) );
00313     f.close();
00314 }
00315 
00320 void NeuralNetwork::loadWeights ( int cross )
00321 {
00322     // load weights
00323     char buf[1024];
00324     sprintf ( buf,"%02d",cross );
00325     string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00326     cout<<"Load:"<<name<<endl;
00327     fstream f ( name.c_str(), ios::in );
00328     if ( f.is_open() == false )
00329         assert ( false );
00330     f.read ( ( char* ) &m_nTrain, sizeof ( int ) );
00331     f.read ( ( char* ) &m_nFeatures, sizeof ( int ) );
00332     f.read ( ( char* ) &m_nClass, sizeof ( int ) );
00333     f.read ( ( char* ) &m_nDomain, sizeof ( int ) );
00334 
00335     // set up NNs (only the last one is used)
00336     m_nn = new NN*[m_nCross+1];
00337     for ( int i=0;i<m_nCross+1;i++ )
00338         m_nn[i] = 0;
00339     m_nn[cross] = new NN();
00340     m_nn[cross]->setNrTargets ( m_nClass*m_nDomain );
00341     m_nn[cross]->setNrInputs ( m_nFeatures );
00342     m_nn[cross]->setNrExamplesTrain ( 0 );
00343     m_nn[cross]->setNrExamplesProbe ( 0 );
00344     m_nn[cross]->setTrainInputs ( 0 );
00345     m_nn[cross]->setTrainTargets ( 0 );
00346     m_nn[cross]->setProbeInputs ( 0 );
00347     m_nn[cross]->setProbeTargets ( 0 );
00348 
00349     // learn parameters
00350     m_nn[cross]->setInitWeightFactor ( m_initWeightFactor );
00351     m_nn[cross]->setLearnrate ( m_learnrate );
00352     m_nn[cross]->setLearnrateMinimum ( m_learnrateMinimum );
00353     m_nn[cross]->setLearnrateSubtractionValueAfterEverySample ( m_learnrateSubtractionValueAfterEverySample );
00354     m_nn[cross]->setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateSubtractionValueAfterEveryEpoch );
00355     m_nn[cross]->setMomentum ( m_momentum );
00356     m_nn[cross]->setWeightDecay ( m_weightDecay );
00357     m_nn[cross]->setMinUpdateErrorBound ( m_minUpdateErrorBound );
00358     m_nn[cross]->setBatchSize ( m_batchSize );
00359     m_nn[cross]->setMaxEpochs ( m_maxTuninigEpochs );
00360     m_nn[cross]->setL1Regularization ( m_enableL1Regularization );
00361     m_nn[cross]->enableErrorFunctionMAE ( m_enableErrorFunctionMAE );
00362     if(m_activationFunction=="tanh")
00363         m_nn[cross]->setActivationFunctionType(0);
00364     else if(m_activationFunction=="sin")
00365         m_nn[cross]->setActivationFunctionType(1);
00366     else if(m_activationFunction=="tanhMod0")
00367         m_nn[cross]->setActivationFunctionType(2);
00368     else
00369         assert(false);
00370     
00371     // set net inner stucture
00372     int nrLayer = m_nrLayer;
00373     int* neuronsPerLayer = Data::splitStringToIntegerList ( m_neuronsPerLayer, ',' );
00374     m_nn[cross]->setNNStructure ( nrLayer, neuronsPerLayer );
00375 
00376     m_nn[cross]->setRPROPPosNeg ( m_etaPosRPROP, m_etaNegRPROP );
00377     m_nn[cross]->setRPROPMinMaxUpdate ( m_minUpdateRPROP, m_maxUpdateRPROP );
00378     m_nn[cross]->setScaleOffset ( m_scaleOutputs, m_offsetOutputs );
00379     m_nn[cross]->setNormalTrainStopping ( true );
00380     m_nn[cross]->enableRPROP ( m_enableRPROP );
00381     m_nn[cross]->useBLASforTraining ( m_useBLASforTraining );
00382     m_nn[cross]->initNNWeights ( m_randSeed );
00383     delete[] neuronsPerLayer;
00384 
00385     int n = 0;
00386     f.read ( ( char* ) &n, sizeof ( int ) );
00387 
00388     REAL* w = new REAL[n];
00389 
00390     f.read ( ( char* ) w, sizeof ( REAL ) *n );
00391     f.read ( ( char* ) &m_maxSwing, sizeof ( double ) );
00392     f.close();
00393 
00394     // init the NN weights
00395     m_nn[cross]->setWeights ( w );
00396 
00397     if ( w )
00398         delete[] w;
00399     w = 0;
00400 }
00401 
00405 void NeuralNetwork::loadMetaWeights ( int cross )
00406 {
00407     // nothing to do in a gradient-descent based algorithm
00408 }
00409 
00415 string NeuralNetwork::templateGenerator ( int id, string preEffect, int nameID, bool blendStop )
00416 {
00417     stringstream s;
00418     s<<"ALGORITHM=NeuralNetwork"<<endl;
00419     s<<"ID="<<id<<endl;
00420     s<<"TRAIN_ON_FULLPREDICTOR="<<preEffect<<endl;
00421     s<<"DISABLE=0"<<endl;
00422     s<<endl;
00423     s<<"[int]"<<endl;
00424     s<<"nrLayer=3"<<endl;
00425     s<<"batchSize=1"<<endl;
00426     s<<"minTuninigEpochs=30"<<endl;
00427     s<<"maxTuninigEpochs=100"<<endl;
00428     s<<endl;
00429     s<<"[double]"<<endl;
00430     s<<"initMaxSwing=1.0"<<endl;
00431     s<<endl;
00432     s<<"offsetOutputs=0.0"<<endl;
00433     s<<"scaleOutputs=1.2"<<endl;
00434     s<<endl;
00435     s<<"etaPosRPROP=1.005"<<endl;
00436     s<<"etaNegRPROP=0.99"<<endl;
00437     s<<"minUpdateRPROP=1e-8"<<endl;
00438     s<<"maxUpdateRPROP=1e-2"<<endl;
00439     s<<endl;
00440     s<<"initWeightFactor=1.0"<<endl;
00441     s<<"learnrate=1e-3"<<endl;
00442     s<<"learnrateMinimum=1e-5"<<endl;
00443     s<<"learnrateSubtractionValueAfterEverySample=0.0"<<endl;
00444     s<<"learnrateSubtractionValueAfterEveryEpoch=0.0"<<endl;
00445     s<<"momentum=0.0"<<endl;
00446     s<<"weightDecay=0.0"<<endl;
00447     s<<"minUpdateErrorBound=1e-6"<<endl;
00448     s<<endl;
00449     s<<"[bool]"<<endl;
00450     s<<"enableErrorFunctionMAE=0"<<endl;
00451     s<<"enableL1Regularization=0"<<endl;
00452     s<<"enableClipping=1"<<endl;
00453     s<<"enableTuneSwing=0"<<endl;
00454     s<<"useBLASforTraining=1"<<endl;
00455     s<<"enableRPROP=0"<<endl;
00456     s<<endl;
00457     s<<"minimzeProbe="<< ( !blendStop ) <<endl;
00458     s<<"minimzeProbeClassificationError=0"<<endl;
00459     s<<"minimzeBlend="<<blendStop<<endl;
00460     s<<"minimzeBlendClassificationError=0"<<endl;
00461     s<<endl;
00462     s<<"[string]"<<endl;
00463     s<<"activationFunction=tanh"<<endl;
00464     s<<"neuronsPerLayer=30,20,40,30,100,-1"<<endl;
00465     s<<"weightFile=NeuralNetwork_"<<nameID<<"_weights.dat"<<endl;
00466     s<<"fullPrediction=NeuralNetwork_"<<nameID<<".dat"<<endl;
00467 
00468     return s.str();
00469 }