00001 #include "AlgorithmExploration.h"
00002
00003 extern StreamOutput cout;
00004
00008 AlgorithmExploration::AlgorithmExploration()
00009 {
00010 cout<<"AlgorithmExploration"<<endl;
00011 }
00012
00016 AlgorithmExploration::~AlgorithmExploration()
00017 {
00018 cout<<"descructor AlgorithmExploration"<<endl;
00019 }
00020
00027 void AlgorithmExploration::start()
00028 {
00029 time_t t0 = time ( 0 );
00030 cout.setOutputFile ( "out.txt" );
00031
00032
00033 vector<string> algos;
00034 algos.push_back ( "LM" );
00035 algos.push_back ( "NN" );
00036 algos.push_back ( "KNN" );
00037
00038 algos.push_back ( "KRR" );
00039
00040 map<string,string> algoDscMap;
00041 algoDscMap["LM"] = "LinearModel_1.dsc";
00042 algoDscMap["NN"] = "NeuralNetwork_1.dsc";
00043 algoDscMap["KNN"] = "KNearestNeighbor_1.dsc";
00044 algoDscMap["PR"] = "PolynomialRegression_1.dsc";
00045 algoDscMap["KRR"] = "KernelRidgeRegression_1.dsc";
00046
00047 map<string,string> algoPredMap;
00048 algoPredMap["LM"] = "LinearModel_1.dat";
00049 algoPredMap["NN"] = "NeuralNetwork_1.dat";
00050 algoPredMap["KNN"] = "KNearestNeighbor_1.dat";
00051 algoPredMap["PR"] = "PolynomialRegression_1.dat";
00052 algoPredMap["KRR"] = "KernelRidgeRegression_1.dat";
00053
00054
00055 vector<string> datasets;
00056
00057
00058
00059
00060
00061
00062 datasets.push_back ( "HEPATITIS" );
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077 string logResidualFilename = "logResidual.txt";
00078 string logResidualCascadeFilename = "logResidualCascade.txt";
00079 string logCascadeFilename = "logCascade.txt";
00080 string logStackingFilename = "logStacking.txt";
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095 bool enableResidual = false;
00096 bool enableResidualCascade = false;
00097 bool enableCascade = false;
00098 bool enableStacking = true;
00099
00100
00101
00102 int nTestSplits = 100;
00103
00104
00105 m_trainList.clear();
00106 for ( int depth=1;depth<algos.size() +1;depth++ )
00107 {
00108 vector<string> stack;
00109 randPerm ( stack, algos, depth );
00110 }
00111 cout<<endl<<"Residual/Cascade train list"<<endl;
00112 for ( int i=0;i<m_trainList.size();i++ )
00113 {
00114 for ( int j=0;j<m_trainList[i].size();j++ )
00115 cout<<m_trainList[i][j]<<" ";
00116 cout<<endl;
00117 }
00118
00119
00120 cout<<endl<<"Stacking train list"<<endl;
00121 vector<vector<string> > stackingTrainListTmp;
00122 vector<vector<string> > stackingTrainList;
00123 for ( int i=0;i<m_trainList.size();i++ )
00124 {
00125 stackingTrainListTmp.push_back ( m_trainList[i] );
00126 sort ( stackingTrainListTmp[i].begin(), stackingTrainListTmp[i].end() );
00127
00128 bool found = false;
00129 for ( int j=0;j<stackingTrainList.size();j++ )
00130 {
00131 if ( stackingTrainList[j] == stackingTrainListTmp[i] )
00132 found = true;
00133 }
00134 if ( found == false )
00135 {
00136 stackingTrainList.push_back ( m_trainList[i] );
00137 int s = stackingTrainList.size();
00138 sort ( stackingTrainList[s-1].begin(), stackingTrainList[s-1].end() );
00139 }
00140 }
00141 for ( int i=0;i<stackingTrainList.size();i++ )
00142 {
00143 for ( int j=0;j<stackingTrainList[i].size();j++ )
00144 cout<<stackingTrainList[i][j]<<" ";
00145 cout<<endl;
00146 }
00147
00148
00149 cout.disableAllOutputs();
00150
00151
00152 for ( int datasetCnt=0;datasetCnt<datasets.size();datasetCnt++ )
00153 {
00154 string path = datasets[datasetCnt] + "/";
00155 printf ( "\nLog: %s (nTestsets:%d)\n", ( path + logResidualFilename ).c_str(),nTestSplits );
00156 printf ( "Log: %s (nTestsets:%d)\n", ( path + logResidualCascadeFilename ).c_str(),nTestSplits );
00157 printf ( "Log: %s (nTestsets:%d)\n", ( path + logCascadeFilename ).c_str(),nTestSplits );
00158 printf ( "Log: %s (nTestsets:%d)\n", ( path + logStackingFilename ).c_str(),nTestSplits );
00159 fstream fAnalyzeResidual ( ( path + logResidualFilename ).c_str(),ios::out );
00160 fstream fAnalyzeResidualCascade ( ( path + logResidualCascadeFilename ).c_str(),ios::out );
00161 fstream fAnalyzeCascade ( ( path + logCascadeFilename ).c_str(),ios::out );
00162 fstream fAnalyzeStacking ( ( path + logStackingFilename ).c_str(),ios::out );
00163 fstream f;
00164
00165 time_t runTime = time ( 0 );
00166
00167
00168 for ( int testCnt=0;testCnt<nTestSplits;testCnt++ )
00169 {
00170 printf ( " %d ",testCnt );
00171 fflush ( stdout );
00172
00173 uint randomSeedSplit = time ( 0 ) + testCnt;
00174
00175
00176
00177 for ( int run=0;run<m_trainList.size() && enableResidual;run++ )
00178 {
00179 uint randomSeed = randomSeedSplit + run;
00180
00181 printf ( "r" );
00182 fflush ( stdout );
00183
00184
00185 string master;
00186 vector<string> algoDscList;
00187 for ( int i=0;i<m_trainList[run].size();i++ )
00188 algoDscList.push_back ( algoDscMap[m_trainList[run][i]] );
00189 bool clas = true;
00190 bool cascade = false;
00191 master = Scheduler::masterDscTemplateGenerator ( datasets[datasetCnt], clas, algoDscList, randomSeed, "LinearRegression", cascade );
00192 f.open ( ( path+"Master.dsc" ).c_str(),ios::out );
00193 f<<master;
00194 f.close();
00195
00196
00197 string preEffect = "";
00198 for ( int i=0;i<m_trainList[run].size();i++ )
00199 {
00200 string algoStr;
00201 if ( m_trainList[run][i] == "LM" )
00202 algoStr = LinearModel::templateGenerator ( i+1, preEffect, 1, true );
00203 else if ( m_trainList[run][i] == "NN" )
00204 algoStr = NeuralNetwork::templateGenerator ( i+1, preEffect, 1, true );
00205 else if ( m_trainList[run][i] == "KNN" )
00206 algoStr = KNearestNeighbor::templateGenerator ( i+1, preEffect, 1, true );
00207 else if ( m_trainList[run][i] == "PR" )
00208 algoStr = PolynomialRegression::templateGenerator ( i+1, preEffect, 1, true );
00209 else if ( m_trainList[run][i] == "KRR" )
00210 algoStr = KernelRidgeRegression::templateGenerator ( i+1, preEffect, 1, true );
00211 else
00212 assert ( false );
00213 preEffect = algoPredMap[m_trainList[run][i]];
00214
00215
00216 f.open ( ( path+algoDscMap[m_trainList[run][i]] ).c_str(),ios::out );
00217 f<<algoStr;
00218 f.close();
00219 }
00220
00221
00222 Scheduler s;
00223 s.readMasterDscFile ( datasets[datasetCnt], "Master.dsc" );
00224
00225 s.train();
00226 s.predict();
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237 REAL rmse = s.getPredictionRMSE();
00238 REAL classErr = s.getClassificationError();
00239 fAnalyzeResidual<<rmse<<" "<<classErr<<" ";
00240 }
00241 fAnalyzeResidual<<endl;
00242
00243
00244
00245
00246 for ( int run=0;run<m_trainList.size() && enableResidualCascade;run++ )
00247 {
00248 uint randomSeed = randomSeedSplit + run;
00249
00250 printf ( "m" );
00251 fflush ( stdout );
00252
00253
00254 string master;
00255 vector<string> algoDscList;
00256 for ( int i=0;i<m_trainList[run].size();i++ )
00257 algoDscList.push_back ( algoDscMap[m_trainList[run][i]] );
00258 bool clas = true;
00259 bool cascade = true;
00260 master = Scheduler::masterDscTemplateGenerator ( datasets[datasetCnt], clas, algoDscList, randomSeed, "LinearRegression", cascade );
00261 f.open ( ( path+"Master.dsc" ).c_str(),ios::out );
00262 f<<master;
00263 f.close();
00264
00265
00266 string preEffect = "";
00267 for ( int i=0;i<m_trainList[run].size();i++ )
00268 {
00269 string algoStr;
00270 if ( m_trainList[run][i] == "LM" )
00271 algoStr = LinearModel::templateGenerator ( i+1, preEffect, 1, true );
00272 else if ( m_trainList[run][i] == "NN" )
00273 algoStr = NeuralNetwork::templateGenerator ( i+1, preEffect, 1, true );
00274 else if ( m_trainList[run][i] == "KNN" )
00275 algoStr = KNearestNeighbor::templateGenerator ( i+1, preEffect, 1, true );
00276 else if ( m_trainList[run][i] == "PR" )
00277 algoStr = PolynomialRegression::templateGenerator ( i+1, preEffect, 1, true );
00278 else if ( m_trainList[run][i] == "KRR" )
00279 algoStr = KernelRidgeRegression::templateGenerator ( i+1, preEffect, 1, true );
00280 else
00281 assert ( false );
00282 preEffect = algoPredMap[m_trainList[run][i]];
00283
00284
00285 f.open ( ( path+algoDscMap[m_trainList[run][i]] ).c_str(),ios::out );
00286 f<<algoStr;
00287 f.close();
00288 }
00289
00290
00291 Scheduler s;
00292 s.readMasterDscFile ( datasets[datasetCnt], "Master.dsc" );
00293 s.train();
00294 s.predict();
00295
00296
00297 REAL rmse = s.getPredictionRMSE();
00298 REAL classErr = s.getClassificationError();
00299 fAnalyzeResidualCascade<<rmse<<" "<<classErr<<" ";
00300 }
00301 fAnalyzeResidualCascade<<endl;
00302
00303
00304
00305
00306 for ( int run=0;run<m_trainList.size() && enableCascade;run++ )
00307 {
00308 uint randomSeed = randomSeedSplit + run;
00309
00310 printf ( "c" );
00311 fflush ( stdout );
00312
00313
00314 string master;
00315 vector<string> algoDscList;
00316 for ( int i=0;i<m_trainList[run].size();i++ )
00317 algoDscList.push_back ( algoDscMap[m_trainList[run][i]] );
00318 bool clas = true;
00319 bool cascade = true;
00320 master = Scheduler::masterDscTemplateGenerator ( datasets[datasetCnt], clas, algoDscList, randomSeed, "TakeLast", cascade );
00321 f.open ( ( path+"Master.dsc" ).c_str(),ios::out );
00322 f<<master;
00323 f.close();
00324
00325
00326 string preEffect = "";
00327 for ( int i=0;i<m_trainList[run].size();i++ )
00328 {
00329 string algoStr;
00330 if ( m_trainList[run][i] == "LM" )
00331 algoStr = LinearModel::templateGenerator ( i+1, preEffect, 1, false );
00332 else if ( m_trainList[run][i] == "NN" )
00333 algoStr = NeuralNetwork::templateGenerator ( i+1, preEffect, 1, false );
00334 else if ( m_trainList[run][i] == "KNN" )
00335 algoStr = KNearestNeighbor::templateGenerator ( i+1, preEffect, 1, false );
00336 else if ( m_trainList[run][i] == "PR" )
00337 algoStr = PolynomialRegression::templateGenerator ( i+1, preEffect, 1, false );
00338 else if ( m_trainList[run][i] == "KRR" )
00339 algoStr = KernelRidgeRegression::templateGenerator ( i+1, preEffect, 1, true );
00340 else
00341 assert ( false );
00342
00343
00344
00345 f.open ( ( path+algoDscMap[m_trainList[run][i]] ).c_str(),ios::out );
00346 f<<algoStr;
00347 f.close();
00348 }
00349
00350
00351 Scheduler s;
00352 s.readMasterDscFile ( datasets[datasetCnt], "Master.dsc" );
00353 s.train();
00354 s.predict();
00355
00356
00357 REAL rmse = s.getPredictionRMSE();
00358 REAL classErr = s.getClassificationError();
00359 fAnalyzeCascade<<rmse<<" "<<classErr<<" ";
00360 }
00361 fAnalyzeCascade<<endl;
00362
00363
00364
00365
00366 for ( int run=0;run<stackingTrainList.size() && enableStacking;run++ )
00367 {
00368 uint randomSeed = randomSeedSplit + run;
00369
00370 printf ( "s" );
00371 fflush ( stdout );
00372
00373
00374 string master;
00375 vector<string> algoDscList;
00376 for ( int i=0;i<stackingTrainList[run].size();i++ )
00377 algoDscList.push_back ( algoDscMap[stackingTrainList[run][i]] );
00378 bool clas = true;
00379 bool cascade = false;
00380 master = Scheduler::masterDscTemplateGenerator ( datasets[datasetCnt], clas, algoDscList, randomSeed, "LinearRegressionNonNeg", cascade );
00381 f.open ( ( path+"Master.dsc" ).c_str(),ios::out );
00382 f<<master;
00383 f.close();
00384
00385
00386 string preEffect = "";
00387 for ( int i=0;i<stackingTrainList[run].size();i++ )
00388 {
00389 string algoStr;
00390 if ( stackingTrainList[run][i] == "LM" )
00391 algoStr = LinearModel::templateGenerator ( i+1, preEffect, 1, false );
00392 else if ( stackingTrainList[run][i] == "NN" )
00393 algoStr = NeuralNetwork::templateGenerator ( i+1, preEffect, 1, false );
00394 else if ( stackingTrainList[run][i] == "KNN" )
00395 algoStr = KNearestNeighbor::templateGenerator ( i+1, preEffect, 1, false );
00396 else if ( stackingTrainList[run][i] == "PR" )
00397 algoStr = PolynomialRegression::templateGenerator ( i+1, preEffect, 1, false );
00398 else if ( stackingTrainList[run][i] == "KRR" )
00399 algoStr = KernelRidgeRegression::templateGenerator ( i+1, preEffect, 1, false );
00400 else
00401 assert ( false );
00402
00403
00404
00405 f.open ( ( path+algoDscMap[stackingTrainList[run][i]] ).c_str(),ios::out );
00406 f<<algoStr;
00407 f.close();
00408 }
00409
00410
00411 Scheduler s;
00412 s.readMasterDscFile ( datasets[datasetCnt], "Master.dsc" );
00413
00414 s.train();
00415 s.predict();
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426 REAL rmse = s.getPredictionRMSE();
00427 REAL classErr = s.getClassificationError();
00428 fAnalyzeStacking<<rmse<<" "<<classErr<<" ";
00429 }
00430 fAnalyzeStacking<<endl;
00431
00432 }
00433
00434 printf ( " run: %d[s]\n", ( int ) ( time ( 0 )-runTime ) );
00435
00436 fAnalyzeResidual.close();
00437 fAnalyzeCascade.close();
00438 fAnalyzeStacking.close();
00439 }
00440
00441 printf ( "Finished in: %d[s]\n", ( int ) ( time ( 0 )-t0 ) );
00442 }
00443
00454 void AlgorithmExploration::randPerm ( vector<string> algorithmStack, vector<string> availableAlgorithms, int maxDepth )
00455 {
00456 if ( maxDepth == 0 )
00457 {
00458 m_trainList.push_back ( algorithmStack );
00459 return;
00460 }
00461
00462 int size = availableAlgorithms.size();
00463 for ( int i=0;i<size;i++ )
00464 {
00465 vector<string> stack = algorithmStack;
00466 stack.push_back ( availableAlgorithms[i] );
00467 vector<string> algos;
00468 for ( int j=0;j<size;j++ )
00469 if ( i != j )
00470 algos.push_back ( availableAlgorithms[j] );
00471 randPerm ( stack, algos, maxDepth-1 );
00472 }
00473 }
00474