#include <Autoencoder.h>
Public Member Functions | |
Autoencoder () | |
~Autoencoder () | |
virtual double | train () |
void | readMaps () |
void | modelInit () |
void | modelUpdate (REAL *input, REAL *target, uint nSamples, uint crossRun) |
void | predictAllOutputs (REAL *rawInputs, REAL *outputs, uint nSamples, uint crossRun) |
void | readSpecificMaps () |
void | saveWeights () |
void | loadWeights () |
virtual double | calcRMSEonProbe () |
virtual double | calcRMSEonBlend () |
virtual void | setPredictionMode (int cross) |
virtual void | predictMultipleOutputs (REAL *rawInput, REAL *effect, REAL *output, int *label, int nSamples, int crossRun) |
virtual void | saveBestPrediction () |
void | readDataset (Data *data, string datasetName) |
void | loadNormalizations () |
Static Public Member Functions | |
static string | templateGenerator (int id, string preEffect, int nameID, bool blendStop) |
Private Attributes | |
REAL ** | m_inputs |
NNRBM ** | m_nn |
int | m_epoch |
bool * | m_isFirstEpoch |
int | m_maxTuninigEpochs |
int | m_minTuninigEpochs |
int | m_nrLayer |
int | m_batchSize |
int | m_nFixEpochs |
double | m_offsetOutputs |
double | m_scaleOutputs |
double | m_initWeightFactor |
double | m_learnrate |
double | m_learnrateMinimum |
double | m_learnrateSubtractionValueAfterEverySample |
double | m_learnrateSubtractionValueAfterEveryEpoch |
double | m_momentum |
double | m_weightDecay |
double | m_minUpdateErrorBound |
double | m_etaPosRPROP |
double | m_etaNegRPROP |
double | m_minUpdateRPROP |
double | m_maxUpdateRPROP |
bool | m_enableL1Regularization |
bool | m_enableErrorFunctionMAE |
bool | m_enableRPROP |
bool | m_useBLASforTraining |
string | m_neuronsPerLayer |
REAL * | m_meanRBM |
REAL * | m_stdRBM |
This is for preprocessing the dataset Here, a RBM is used to initialize the weight of a deep neural network Finally, the net gets finetuned by standard backprop with stochastic gradient descent Stopping criteria of finetuning is minimum reconstruction RMSE on the cross validation set
Definition at line 22 of file Autoencoder.h.
Autoencoder::Autoencoder | ( | ) |
Constructor
Definition at line 8 of file Autoencoder.cpp.
00009 { 00010 cout<<"Autoencoder"<<endl; 00011 // init member vars 00012 m_inputs = 0; 00013 m_nn = 0; 00014 m_epoch = 0; 00015 m_nrLayer = 0; 00016 m_batchSize = 0; 00017 m_offsetOutputs = 0; 00018 m_scaleOutputs = 0; 00019 m_initWeightFactor = 0; 00020 m_learnrate = 0; 00021 m_learnrateMinimum = 0; 00022 m_learnrateSubtractionValueAfterEverySample = 0; 00023 m_learnrateSubtractionValueAfterEveryEpoch = 0; 00024 m_momentum = 0; 00025 m_weightDecay = 0; 00026 m_minUpdateErrorBound = 0; 00027 m_etaPosRPROP = 0; 00028 m_etaNegRPROP = 0; 00029 m_minUpdateRPROP = 0; 00030 m_maxUpdateRPROP = 0; 00031 m_enableRPROP = 0; 00032 m_useBLASforTraining = 0; 00033 m_enableL1Regularization = 0; 00034 m_enableErrorFunctionMAE = 0; 00035 m_isFirstEpoch = 0; 00036 m_meanRBM = 0; 00037 m_stdRBM = 0; 00038 m_minTuninigEpochs = 0; 00039 m_maxTuninigEpochs = 0; 00040 m_nFixEpochs = -1; 00041 }
Autoencoder::~Autoencoder | ( | ) |
Destructor
Definition at line 46 of file Autoencoder.cpp.
00047 { 00048 cout<<"descructor Autoencoder"<<endl; 00049 00050 for ( int i=0;i<m_nCross+1;i++ ) 00051 { 00052 if ( m_nn[i] ) 00053 delete m_nn[i]; 00054 m_nn[i] = 0; 00055 } 00056 delete[] m_nn; 00057 00058 if ( m_isFirstEpoch ) 00059 delete[] m_isFirstEpoch; 00060 m_isFirstEpoch = 0; 00061 }
double Autoencoder::calcRMSEonProbe | ( | ) | [virtual] |
Calculate the RMSE on all probe sets with cross validation
Implements Algorithm.
Definition at line 245 of file Autoencoder.cpp.
00246 { 00247 double rmse = 0.0; 00248 uint rmseCnt = 0; 00249 int nThreads = m_maxThreadsInCross; // get #available threads 00250 00251 for ( int i=0;i<m_nCross;i+=nThreads ) // all cross validation sets 00252 { 00253 // predict the probeset 00254 int* nSamples = new int[nThreads]; 00255 double* rmses = new double[nThreads]; 00256 uint* rmseCnts = new uint[nThreads]; 00257 REAL** predictionProbe = new REAL*[nThreads]; 00258 for ( int t=0;t<nThreads;t++ ) 00259 { 00260 nSamples[t] = m_probeSize[i+t]; 00261 rmses[t] = 0.0; 00262 rmseCnts[t] = 0; 00263 predictionProbe[t] = new REAL[nSamples[t]*m_nFeatures]; 00264 } 00265 00266 // parallel training of the cross-validation sets with OPENMP 00267 #pragma omp parallel for 00268 for ( int t=0;t<nThreads;t++ ) 00269 { 00270 cout<<"."<<flush; 00271 if ( m_enableSaveMemory ) 00272 fillNCrossValidationSet ( i+t ); 00273 modelUpdate ( m_train[i+t], m_trainTarget[i+t], m_trainSize[i+t], i+t ); 00274 00275 // predict all samples 00276 for ( int j=0;j<nSamples[t];j++ ) 00277 m_nn[i+t]->m_nnAuto->predictSingleInput ( m_probe[i+t] + j*m_nFeatures, predictionProbe[t] + j*m_nFeatures ); 00278 00279 for ( int j=0;j<nSamples[t]*m_nFeatures;j++ ) // error over all probe samples 00280 { 00281 REAL err = predictionProbe[t][j] - m_probe[i+t][j]; 00282 rmses[t] += err * err; 00283 rmseCnts[t]++; 00284 } 00285 //cout<<"[p:"<<sqrt(rmses[t]/(double)rmseCnts[t])<<"]"<<flush; 00286 00287 if ( m_enableSaveMemory ) 00288 freeNCrossValidationSet ( i+t ); 00289 } 00290 00291 // calc rmse sums 00292 for ( int t=0;t<nThreads;t++ ) 00293 { 00294 rmse += rmses[t]; 00295 rmseCnt += rmseCnts[t]; 00296 } 00297 00298 delete[] nSamples; 00299 delete[] rmses; 00300 delete[] rmseCnts; 00301 for ( int j=0;j<nThreads;j++ ) 00302 delete[] predictionProbe[j]; 00303 } 00304 00305 return sqrt ( rmse/ ( double ) rmseCnt ); 00306 }
void Autoencoder::loadNormalizations | ( | ) |
Load mean and std of the dataset
Definition at line 221 of file Autoencoder.cpp.
00222 { 00223 m_meanRBM = new REAL[m_nFeatures]; 00224 m_stdRBM = new REAL[m_nFeatures]; 00225 00226 cout<<"load the 0..1 normalizations (length "<<m_nFeatures<<")"<<endl; 00227 string meanName = m_datasetPath + "/" + m_tempPath + "/AutoencoderDataMean.dat"; 00228 string stdName = m_datasetPath + "/" + m_tempPath + "/AutoencoderDataStd.dat"; 00229 cout<<"meanName:"<<meanName<<endl<<"stdName:"<<stdName<<endl; 00230 fstream fMean ( meanName.c_str(),ios::in ); 00231 fstream fStd ( stdName.c_str(),ios::in ); 00232 if ( fMean.is_open() == false || fStd.is_open() == false ) 00233 assert ( false ); 00234 fMean.read ( ( char* ) m_meanRBM, sizeof ( REAL ) *m_nFeatures ); 00235 fStd.read ( ( char* ) m_stdRBM, sizeof ( REAL ) *m_nFeatures ); 00236 fMean.close(); 00237 fStd.close(); 00238 }
void Autoencoder::loadWeights | ( | ) |
Load the weights and all other parameters and make the model ready to predict
Definition at line 560 of file Autoencoder.cpp.
00561 { 00562 // load weights 00563 string name = m_datasetPath + "/" + m_tempPath + "/AutoencoderWeights.dat"; 00564 cout<<"Load:"<<name<<endl; 00565 fstream f ( name.c_str(), ios::in ); 00566 if ( f.is_open() == false ) 00567 assert ( false ); 00568 00569 // new net 00570 m_nn = new NNRBM*[m_nCross+1]; 00571 for ( int i=0;i<m_nCross+1;i++ ) 00572 m_nn[i] = 0; 00573 m_nn[0] = new NNRBM(); 00574 m_nn[0]->m_nnAuto = new NNRBM(); 00575 00576 // #layers 00577 int l; 00578 f.read ( ( char* ) &l, sizeof ( int ) ); 00579 int* neur = new int[l-1]; 00580 00581 // #features 00582 int in; 00583 f.read ( ( char* ) &in, sizeof ( int ) ); 00584 m_nn[0]->m_nnAuto->setNrInputs ( in ); 00585 m_nFeatures = in; 00586 00587 // neurons per layer 00588 for ( int i=0;i<l-1;i++ ) 00589 { 00590 int n; 00591 f.read ( ( char* ) &n, sizeof ( int ) ); 00592 neur[i] = n; 00593 } 00594 int ntar; 00595 f.read ( ( char* ) &ntar, sizeof ( int ) ); 00596 m_nn[0]->m_nnAuto->setNrTargets ( ntar ); 00597 m_nClass = ntar; 00598 m_nDomain = 1; 00599 00600 // net scale/offset 00601 f.read ( ( char* ) &m_scaleOutputs, sizeof ( double ) ); 00602 f.read ( ( char* ) &m_offsetOutputs, sizeof ( double ) ); 00603 f.read ( ( char* ) &m_useBLASforTraining, sizeof ( bool ) ); 00604 00605 m_nn[0]->m_nnAuto->setNNStructure ( l, neur, true ); 00606 m_nn[0]->m_nnAuto->setScaleOffset ( m_scaleOutputs, m_offsetOutputs ); 00607 m_nn[0]->m_nnAuto->useBLASforTraining ( m_useBLASforTraining ); 00608 m_nn[0]->m_nnAuto->setInitWeightFactor ( 0.0 ); 00609 m_nn[0]->m_nnAuto->initNNWeights ( 0 ); 00610 00611 // number of weights 00612 int nw = m_nn[0]->m_nnAuto->getNrWeights(); 00613 int nwFile; 00614 f.read ( ( char* ) &nwFile, sizeof ( int ) ); 00615 if ( nw != nwFile ) 00616 assert ( false ); 00617 00618 // weights 00619 REAL* ptr = m_nn[0]->m_nnAuto->getWeightPtr(); 00620 f.read ( ( char* ) ptr, sizeof ( REAL ) *nw ); 00621 00622 delete[] neur; 00623 f.close(); 00624 }
void Autoencoder::modelInit | ( | ) |
Init the NN model
Definition at line 348 of file Autoencoder.cpp.
00349 { 00350 // set up NNs 00351 // nCross + 1 (for retraining) 00352 if ( m_nn == 0 ) 00353 { 00354 m_nn = new NNRBM*[m_nCross+1]; 00355 for ( int i=0;i<m_nCross+1;i++ ) 00356 m_nn[i] = 0; 00357 } 00358 for ( int i=0;i<m_nCross+1;i++ ) 00359 { 00360 cout<<"Create a Neural Network ("<<i+1<<"/"<<m_nCross+1<<")"<<endl; 00361 if ( m_nn[i] == 0 ) 00362 m_nn[i] = new NNRBM(); 00363 m_nn[i]->setNrTargets ( m_nClass*m_nDomain ); 00364 m_nn[i]->setNrInputs ( m_nFeatures ); 00365 m_nn[i]->setNrExamplesTrain ( 0 ); 00366 m_nn[i]->setNrExamplesProbe ( 0 ); 00367 m_nn[i]->setTrainInputs ( 0 ); 00368 m_nn[i]->setTrainTargets ( 0 ); 00369 m_nn[i]->setProbeInputs ( 0 ); 00370 m_nn[i]->setProbeTargets ( 0 ); 00371 m_nn[i]->setGlobalEpochs ( 0 ); 00372 00373 // learn parameters 00374 m_nn[i]->setInitWeightFactor ( m_initWeightFactor ); 00375 m_nn[i]->setLearnrate ( m_learnrate ); 00376 m_nn[i]->setLearnrateMinimum ( m_learnrateMinimum ); 00377 m_nn[i]->setLearnrateSubtractionValueAfterEverySample ( m_learnrateSubtractionValueAfterEverySample ); 00378 m_nn[i]->setLearnrateSubtractionValueAfterEveryEpoch ( m_learnrateSubtractionValueAfterEveryEpoch ); 00379 m_nn[i]->setMomentum ( m_momentum ); 00380 m_nn[i]->setWeightDecay ( m_weightDecay ); 00381 m_nn[i]->setMinUpdateErrorBound ( m_minUpdateErrorBound ); 00382 m_nn[i]->setBatchSize ( m_batchSize ); 00383 m_nn[i]->setMaxEpochs ( m_maxTuninigEpochs ); 00384 m_nn[i]->setL1Regularization ( m_enableL1Regularization ); 00385 m_nn[i]->enableErrorFunctionMAE ( m_enableErrorFunctionMAE ); 00386 00387 m_nn[i]->setRBMLearnParams ( m_doubleMap["rbmLearnrateWeights"], m_doubleMap["rbmLearnrateBiasVis"], m_doubleMap["rbmLearnrateBiasHid"], m_doubleMap["rbmWeightDecay"], m_doubleMap["rbmMaxEpochs"] ); 00388 00389 // set net inner stucture 00390 int nrLayer = m_nrLayer; 00391 int* neuronsPerLayer = Data::splitStringToIntegerList ( m_neuronsPerLayer, ',' ); 00392 m_nn[i]->enableRPROP ( m_enableRPROP ); 00393 m_nn[i]->setNNStructure ( nrLayer, neuronsPerLayer, true ); 00394 00395 m_nn[i]->setScaleOffset ( m_scaleOutputs, m_offsetOutputs ); 00396 m_nn[i]->setRPROPPosNeg ( m_etaPosRPROP, m_etaNegRPROP ); 00397 m_nn[i]->setRPROPMinMaxUpdate ( m_minUpdateRPROP, m_maxUpdateRPROP ); 00398 m_nn[i]->setNormalTrainStopping ( true ); 00399 m_nn[i]->useBLASforTraining ( m_useBLASforTraining ); 00400 m_nn[i]->initNNWeights ( m_randSeed ); 00401 delete[] neuronsPerLayer; 00402 00403 cout<<endl<<endl; 00404 } 00405 00406 if ( m_isFirstEpoch == 0 ) 00407 m_isFirstEpoch = new bool[m_nCross+1]; 00408 for ( int i=0;i<m_nCross+1;i++ ) 00409 m_isFirstEpoch[i] = false; 00410 }
void Autoencoder::modelUpdate | ( | REAL * | input, | |
REAL * | target, | |||
uint | nSamples, | |||
uint | crossRun | |||
) |
Make a model update, set the new cross validation set or set the whole training set for retraining
input | Pointer to input (can be cross validation set, or whole training set) (rows x nFeatures) | |
target | The targets (can be cross validation targets) | |
nSamples | The sample size (rows) in input | |
crossRun | The cross validation run (for training) |
Definition at line 450 of file Autoencoder.cpp.
00451 { 00452 if ( m_isFirstEpoch[crossRun] == true ) 00453 { 00454 m_nn[crossRun]->m_nnAuto->setTrainInputs ( input ); 00455 m_nn[crossRun]->m_nnAuto->setTrainTargets ( input ); 00456 m_nn[crossRun]->m_nnAuto->setNrExamplesTrain ( nSamples ); 00457 } 00458 else 00459 { 00460 m_nn[crossRun]->setTrainInputs ( input ); 00461 m_nn[crossRun]->setTrainTargets ( target ); 00462 m_nn[crossRun]->setNrExamplesTrain ( nSamples ); 00463 } 00464 00465 if ( crossRun < m_nCross ) 00466 { 00467 if ( m_isFirstEpoch[crossRun] == false ) 00468 { 00469 m_isFirstEpoch[crossRun] = true; 00470 00471 // start the layerwise RBM pretraining 00472 m_nn[crossRun]->rbmPretraining ( input, target, nSamples, m_nDomain*m_nClass, -1, crossRun ); 00473 } 00474 else 00475 { 00476 // one gradient descent step (one epoch) 00477 m_nn[crossRun]->m_nnAuto->trainOneEpoch(); 00478 stringstream s; 00479 s<<"[t:"<<sqrt ( m_nn[crossRun]->m_nnAuto->m_sumSquaredError/ ( double ) m_nn[crossRun]->m_nnAuto->m_sumSquaredErrorSamples ) <<"] "; 00480 cout<<s.str() <<flush; 00481 if ( crossRun == m_nCross - 1 ) 00482 m_nn[crossRun]->m_nnAuto->printLearnrate(); 00483 } 00484 } 00485 else 00486 { 00487 cout<<endl<<"Tune: Training of full trainset "<<endl; 00488 00489 if ( m_isFirstEpoch[crossRun] == false ) 00490 { 00491 m_isFirstEpoch[crossRun] = true; 00492 00493 // start the layerwise RBM pretraining 00494 m_nn[crossRun]->rbmPretraining ( input, target, nSamples, m_nDomain*m_nClass ); 00495 //m_nn[crossRun]->printMiddleLayerToFile("tmp/pre.txt", input, target, nSamples, m_nDomain*m_nClass); 00496 //m_nn[crossRun]->m_nnAuto->printAutoencoderWeightsToJavascript("tmp/autoPre.txt"); 00497 } 00498 00499 // retraining with fix number of epochs 00500 m_nn[crossRun]->m_nnAuto->setNormalTrainStopping ( false ); 00501 int maxEpochs; 00502 if ( m_nFixEpochs != -1 ) 00503 maxEpochs = m_nFixEpochs; 00504 else 00505 maxEpochs = m_epochParamBest[0]; 00506 if ( maxEpochs == 0 ) 00507 maxEpochs = 1; // train at least one epoch 00508 cout<<"Best #epochs (on cross validation): "<<maxEpochs<<endl; 00509 m_nn[crossRun]->m_nnAuto->setMaxEpochs ( maxEpochs ); 00510 00511 // train the net 00512 int epochs = m_nn[crossRun]->m_nnAuto->trainNN(); 00513 //m_nn[crossRun]->printMiddleLayerToFile("tmp/fine.txt", input, target, nSamples, m_nDomain*m_nClass); 00514 //m_nn[crossRun]->m_nnAuto->printAutoencoderWeightsToJavascript("tmp/autoFine.txt"); 00515 cout<<endl; 00516 } 00517 }
void Autoencoder::predictAllOutputs | ( | REAL * | rawInputs, | |
REAL * | outputs, | |||
uint | nSamples, | |||
uint | crossRun | |||
) |
Prediction for outside use, predicts outputs based on raw input values
rawInputs | The input feature, without normalization (raw) | |
outputs | The output value (prediction of target) | |
nSamples | The input size (number of rows) | |
crossRun | Number of cross validation run (in training) |
Definition at line 420 of file Autoencoder.cpp.
00421 { 00422 if ( m_meanRBM == 0 && m_stdRBM == 0 ) 00423 { 00424 // predict all samples 00425 for ( int i=0;i<nSamples;i++ ) 00426 m_nn[crossRun]->m_nnAuto->predictSingleInput ( rawInputs + i*m_nFeatures, outputs + i*m_nClass*m_nDomain ); 00427 } 00428 else 00429 { 00430 REAL* in = new REAL[m_nFeatures]; 00431 for ( int i=0;i<nSamples;i++ ) 00432 { 00433 for ( int j=0;j<m_nFeatures;j++ ) 00434 in[j] = ( rawInputs[i*m_nFeatures+j] - m_meanRBM[j] ) / m_stdRBM[j]; 00435 m_nn[crossRun]->m_nnAuto->predictSingleInput ( in, outputs + i*m_nClass*m_nDomain ); 00436 } 00437 delete[] in; 00438 } 00439 }
void Autoencoder::readDataset | ( | Data * | data, | |
string | datasetName | |||
) |
Read and reduce the dimensionality of a pretrained dataset
data | Data object | |
datasetName | dataset name |
Definition at line 121 of file Autoencoder.cpp.
00122 { 00123 cout<<"Read data set and run the trained autoencoder"<<endl; 00124 00125 data->readDataset ( datasetName ); 00126 00127 // load the normalizations 00128 cout<<"load the 0..1 normalizations"<<endl; 00129 REAL* mean = new REAL[data->m_nFeatures]; 00130 REAL* std = new REAL[data->m_nFeatures]; 00131 string meanName = data->m_datasetPath + "/" + data->m_tempPath + "/AutoencoderDataMean.dat"; 00132 string stdName = data->m_datasetPath + "/" + data->m_tempPath + "/AutoencoderDataStd.dat"; 00133 cout<<"meanName:"<<meanName<<endl<<"stdName:"<<stdName<<endl; 00134 fstream fMean ( meanName.c_str(),ios::in ); 00135 fstream fStd ( stdName.c_str(),ios::in ); 00136 if ( fMean.is_open() == false || fStd.is_open() == false ) 00137 assert ( false ); 00138 fMean.read ( ( char* ) mean, sizeof ( REAL ) *data->m_nFeatures ); 00139 fStd.read ( ( char* ) std, sizeof ( REAL ) *data->m_nFeatures ); 00140 fMean.close(); 00141 fStd.close(); 00142 00143 // normalize train data 00144 cout<<"normalize train data"<<endl; 00145 for ( int i=0;i<data->m_nTrain;i++ ) 00146 for ( int j=0;j<data->m_nFeatures;j++ ) 00147 { 00148 data->m_trainOrig[j+i*data->m_nFeatures] = ( data->m_trainOrig[j+i*data->m_nFeatures] - mean[j] ) / std[j]; 00149 REAL v = data->m_trainOrig[j+i*data->m_nFeatures]; 00150 if ( v > 1.0 || v < 0.0 ) 00151 cout<<"warning (0>v>1), transformed train value:"<<v<<endl; 00152 } 00153 00154 // normalize test data 00155 cout<<"normalize test data"<<endl; 00156 for ( int i=0;i<data->m_nTest;i++ ) 00157 for ( int j=0;j<data->m_nFeatures;j++ ) 00158 { 00159 data->m_testOrig[j+i*data->m_nFeatures] = ( data->m_testOrig[j+i*data->m_nFeatures] - mean[j] ) / std[j]; 00160 REAL v = data->m_testOrig[j+i*data->m_nFeatures]; 00161 if ( v > 1.0 || v < 0.0 ) 00162 cout<<"warning (0>v>1), transformed test value:"<<v<<endl; 00163 } 00164 00165 delete[] mean; 00166 delete[] std; 00167 00168 loadWeights(); 00169 00170 // new data 00171 cout<<"allocate new data matrices: #features:"<<m_nClass<<endl; 00172 REAL* trainOrig = new REAL[m_nClass * data->m_nTrain]; 00173 REAL* testOrig = new REAL[m_nClass * data->m_nTest]; 00174 00175 // calculate dim. reduction 00176 time_t t0 = time ( 0 ); 00177 cout<<"calculate dim. reduction: train set (size:"<<data->m_nTrain<<") "<<flush; 00178 predictAllOutputs ( data->m_trainOrig, trainOrig, data->m_nTrain, 0 ); 00179 cout<<time ( 0 )-t0<<"[s]"<<endl; 00180 cout<<"calculate dim. reduction: test set (size:"<<data->m_nTest<<") "<<flush; 00181 t0 = time ( 0 ); 00182 predictAllOutputs ( data->m_testOrig, testOrig, data->m_nTest, 0 ); 00183 cout<<time ( 0 )-t0<<"[s]"<<endl; 00184 00185 delete[] data->m_trainOrig; 00186 delete[] data->m_testOrig; 00187 00188 data->m_trainOrig = trainOrig; 00189 data->m_testOrig = testOrig; 00190 data->m_nFeatures = m_nClass; 00191 00192 // write new datasets 00193 // load the normalizations 00194 cout<<"write new datasets (AutoencoderDataTrain/Test/Targets)"<<endl; 00195 00196 string trainName = data->m_datasetPath + "/" + data->m_tempPath + "/AutoencoderDataTrain.dat"; 00197 fstream fTrain ( trainName.c_str(),ios::out ); 00198 fTrain.write ( ( char* ) trainOrig, sizeof ( REAL ) *m_nClass * data->m_nTrain ); 00199 fTrain.close(); 00200 00201 string testName = data->m_datasetPath + "/" + data->m_tempPath + "/AutoencoderDataTest.dat"; 00202 fstream fTest ( testName.c_str(),ios::out ); 00203 fTest.write ( ( char* ) testOrig, sizeof ( REAL ) *m_nClass * data->m_nTest ); 00204 fTest.close(); 00205 00206 string trainTargetName = data->m_datasetPath + "/" + data->m_tempPath + "/AutoencoderDataTrainTarget.dat"; 00207 fstream fTrainTarget ( trainTargetName.c_str(),ios::out ); 00208 fTrainTarget.write ( ( char* ) data->m_trainTargetOrig, sizeof ( REAL ) *data->m_nClass * data->m_nTrain ); 00209 fTrainTarget.close(); 00210 00211 string testTargetName = data->m_datasetPath + "/" + data->m_tempPath + "/AutoencoderDataTestTarget.dat"; 00212 fstream fTestTarget ( testTargetName.c_str(),ios::out ); 00213 fTestTarget.write ( ( char* ) data->m_testTargetOrig, sizeof ( REAL ) *data->m_nClass * data->m_nTest ); 00214 fTestTarget.close(); 00215 }
void Autoencoder::readMaps | ( | ) |
void Autoencoder::readSpecificMaps | ( | ) |
Read the Algorithm specific values from the description file
Definition at line 312 of file Autoencoder.cpp.
00313 { 00314 cout<<"Read specific maps"<<endl; 00315 00316 m_minTuninigEpochs = m_intMap["minTuninigEpochs"]; 00317 m_maxTuninigEpochs = m_intMap["maxTuninigEpochs"]; 00318 00319 // read dsc vars 00320 m_nrLayer = m_intMap["nrLayer"]; 00321 m_batchSize = m_intMap["batchSize"]; 00322 m_offsetOutputs = m_doubleMap["offsetOutputs"]; 00323 m_scaleOutputs = m_doubleMap["scaleOutputs"]; 00324 m_initWeightFactor = m_doubleMap["initWeightFactor"]; 00325 m_learnrate = m_doubleMap["learnrate"]; 00326 m_learnrateMinimum = m_doubleMap["learnrateMinimum"]; 00327 m_learnrateSubtractionValueAfterEverySample = m_doubleMap["learnrateSubtractionValueAfterEverySample"]; 00328 m_learnrateSubtractionValueAfterEveryEpoch = m_doubleMap["learnrateSubtractionValueAfterEveryEpoch"]; 00329 m_momentum = m_doubleMap["momentum"]; 00330 m_weightDecay = m_doubleMap["weightDecay"]; 00331 m_minUpdateErrorBound = m_doubleMap["minUpdateErrorBound"]; 00332 m_etaPosRPROP = m_doubleMap["etaPosRPROP"]; 00333 m_etaNegRPROP = m_doubleMap["etaNegRPROP"]; 00334 m_minUpdateRPROP = m_doubleMap["minUpdateRPROP"]; 00335 m_maxUpdateRPROP = m_doubleMap["maxUpdateRPROP"]; 00336 m_enableL1Regularization = m_boolMap["enableL1Regularization"]; 00337 m_enableErrorFunctionMAE = m_boolMap["enableErrorFunctionMAE"]; 00338 m_enableRPROP = m_boolMap["enableRPROP"]; 00339 m_useBLASforTraining = m_boolMap["useBLASforTraining"]; 00340 m_neuronsPerLayer = m_stringMap["neuronsPerLayer"]; 00341 m_nFixEpochs = m_intMap["nFixEpochs"]; 00342 }
void Autoencoder::saveWeights | ( | ) |
Save the weights and all other parameters for load the complete prediction model
Definition at line 523 of file Autoencoder.cpp.
00524 { 00525 string name = m_datasetPath + "/" + m_tempPath + "/AutoencoderWeights.dat"; 00526 if ( m_inRetraining ) 00527 cout<<"Save:"<<name<<endl; 00528 REAL* w = m_nn[m_nCross]->m_nnAuto->getWeightPtr(); 00529 vector<int> v = m_nn[m_nCross]->m_nnAuto->getEncoder(); 00530 00531 fstream f ( name.c_str(), ios::out ); 00532 00533 // #layers 00534 int l = v.size() - 2; 00535 f.write ( ( char* ) &l, sizeof ( int ) ); 00536 00537 // neurons per layer 00538 for ( int i=0;i<v.size()-1;i++ ) 00539 f.write ( ( char* ) &v[i], sizeof ( int ) ); 00540 00541 // net scale/offset 00542 f.write ( ( char* ) &m_scaleOutputs, sizeof ( double ) ); 00543 f.write ( ( char* ) &m_offsetOutputs, sizeof ( double ) ); 00544 f.write ( ( char* ) &m_useBLASforTraining, sizeof ( bool ) ); 00545 00546 // number of weights 00547 int n = v[v.size()-1]; 00548 f.write ( ( char* ) &n, sizeof ( int ) ); 00549 00550 // weights 00551 f.write ( ( char* ) w, sizeof ( REAL ) *n ); 00552 00553 f.close(); 00554 }
string Autoencoder::templateGenerator | ( | int | id, | |
string | preEffect, | |||
int | nameID, | |||
bool | blendStop | |||
) | [static] |
Generates a template of the description file
Definition at line 631 of file Autoencoder.cpp.
00632 { 00633 stringstream s; 00634 s<<"ALGORITHM=Autoencoder"<<endl; 00635 s<<"ID="<<id<<endl; 00636 s<<"TRAIN_ON_FULLPREDICTOR="<<preEffect<<endl; 00637 s<<"DISABLE=0"<<endl; 00638 s<<endl; 00639 s<<"[int]"<<endl; 00640 s<<"nrLayer=4"<<endl; 00641 s<<"batchSize=1"<<endl; 00642 s<<"minTuninigEpochs=30"<<endl; 00643 s<<"maxTuninigEpochs=100"<<endl; 00644 s<<"nFixEpochs=-1"<<endl; 00645 s<<endl; 00646 s<<"[double]"<<endl; 00647 s<<"initMaxSwing=1.0"<<endl; 00648 s<<endl; 00649 s<<"offsetOutputs=0.0"<<endl; 00650 s<<"scaleOutputs=1.2"<<endl; 00651 s<<endl; 00652 s<<"etaPosRPROP=1.005"<<endl; 00653 s<<"etaNegRPROP=0.99"<<endl; 00654 s<<"minUpdateRPROP=1e-8"<<endl; 00655 s<<"maxUpdateRPROP=1e-2"<<endl; 00656 s<<endl; 00657 s<<"initWeightFactor=1.0"<<endl; 00658 s<<"learnrate=1e-3"<<endl; 00659 s<<"learnrateMinimum=1e-5"<<endl; 00660 s<<"learnrateSubtractionValueAfterEverySample=0.0"<<endl; 00661 s<<"learnrateSubtractionValueAfterEveryEpoch=0.0"<<endl; 00662 s<<"momentum=0.0"<<endl; 00663 s<<"weightDecay=0.0"<<endl; 00664 s<<"minUpdateErrorBound=1e-6"<<endl; 00665 s<<endl; 00666 s<<"[bool]"<<endl; 00667 s<<"enableErrorFunctionMAE=0"<<endl; 00668 s<<"enableL1Regularization=0"<<endl; 00669 s<<"enableClipping=1"<<endl; 00670 s<<"enableTuneSwing=0"<<endl; 00671 s<<"useBLASforTraining=1"<<endl; 00672 s<<"enableRPROP=0"<<endl; 00673 s<<endl; 00674 s<<"minimzeProbe="<< ( !blendStop ) <<endl; 00675 s<<"minimzeProbeClassificationError=0"<<endl; 00676 s<<"minimzeBlend="<<blendStop<<endl; 00677 s<<"minimzeBlendClassificationError=0"<<endl; 00678 s<<endl; 00679 s<<"[string]"<<endl; 00680 s<<"neuronsPerLayer=30,20,40,30,100,-1"<<endl; 00681 00682 return s.str(); 00683 }
double Autoencoder::train | ( | ) | [virtual] |
Train the autoencoder
Implements Algorithm.
Definition at line 78 of file Autoencoder.cpp.
00079 { 00080 cout<<"Start train Autoencoder"<<endl; 00081 00082 // read standard and specific values 00083 readSpecificMaps(); 00084 00085 modelInit(); 00086 00087 if ( m_nFixEpochs == -1 ) 00088 { 00089 cout<<endl<<"============================ START TRAIN (param tuning) ============================="<<endl<<endl; 00090 cout<<"Parameters to tune:"<<endl; 00091 00092 addEpochParameter ( &m_epoch, "epoch" ); 00093 00094 // start the structured searcher 00095 cout<<"(min|max. epochs: "<<m_minTuninigEpochs<<"|"<<m_maxTuninigEpochs<<")"<<endl; 00096 expSearcher ( m_minTuninigEpochs, m_maxTuninigEpochs, 3, 1, 0.8, true, false ); 00097 00098 cout<<endl<<"============================ END auto-optimize ============================="<<endl<<endl; 00099 } 00100 00101 cout<<"Update model on whole training set"<<endl<<endl; 00102 // retrain the model with whole trainingset (disable cross validation) 00103 if ( m_enableSaveMemory ) 00104 fillNCrossValidationSet ( m_nCross ); 00105 00106 modelUpdate ( m_train[m_nCross], m_trainTarget[m_nCross], m_nTrain, m_nCross ); 00107 saveWeights(); 00108 00109 if ( m_enableSaveMemory ) 00110 freeNCrossValidationSet ( m_nCross ); 00111 00112 return 0.0; 00113 }