StandardAlgorithm.cpp

00001 #include "StandardAlgorithm.h"
00002 
00003 extern StreamOutput cout;
00004 
00008 StandardAlgorithm::StandardAlgorithm()
00009 {
00010     cout<<"StandardAlgorithm"<<endl;
00011     // init member vars
00012     m_blendStop = 0;
00013     m_maxSwing = 0;
00014     m_crossValidationPrediction = 0;
00015     m_prediction = 0;
00016     m_predictionBest = 0;
00017     m_predictionProbe = 0;
00018     m_singlePrediction = 0;
00019     m_labelPrediction = 0;
00020     m_wrongLabelCnt = 0;
00021     m_maxTuninigEpochs = 0;
00022     m_minTuninigEpochs = 0;
00023     m_enableClipping = 0;
00024     m_enableTuneSwing = 0;
00025     m_minimzeProbe = 0;
00026     m_minimzeProbeClassificationError = 0;
00027     m_minimzeBlend = 0;
00028     m_minimzeBlendClassificationError = 0;
00029     m_initMaxSwing = 0;
00030     m_outOfBagEstimate = 0;
00031     m_outOfBagEstimateCnt = 0;
00032 
00033 }
00034 
00038 StandardAlgorithm::~StandardAlgorithm()
00039 {
00040     cout<<"descructor StandardAlgorithm"<<endl;
00041 
00042     if ( m_blendStop )
00043         delete m_blendStop;
00044     m_blendStop = 0;
00045 
00046     if ( m_prediction )
00047         delete[] m_prediction;
00048     m_prediction = 0;
00049     if ( m_predictionBest )
00050         delete[] m_predictionBest;
00051     m_predictionBest = 0;
00052     for ( int i=0;i<m_maxThreadsInCross;i++ )
00053     {
00054         if ( m_predictionProbe )
00055         {
00056             if ( m_predictionProbe[i] )
00057                 delete[] m_predictionProbe[i];
00058             m_predictionProbe[i] = 0;
00059         }
00060     }
00061     if ( m_predictionProbe )
00062         delete[] m_predictionProbe;
00063     m_predictionProbe = 0;
00064     if ( m_labelPrediction )
00065         delete[] m_labelPrediction;
00066     m_labelPrediction = 0;
00067     if ( m_singlePrediction )
00068         delete[] m_singlePrediction;
00069     m_singlePrediction = 0;
00070 
00071     if ( m_crossValidationPrediction )
00072         delete[] m_crossValidationPrediction;
00073     m_crossValidationPrediction = 0;
00074     if ( m_wrongLabelCnt )
00075         delete[] m_wrongLabelCnt;
00076     m_wrongLabelCnt = 0;
00077     if(m_outOfBagEstimate)
00078         delete[] m_outOfBagEstimate;
00079     m_outOfBagEstimate = 0;
00080     if(m_outOfBagEstimateCnt)
00081         delete[] m_outOfBagEstimateCnt;
00082     m_outOfBagEstimateCnt = 0;
00083 }
00084 
00090 double StandardAlgorithm::train()
00091 {
00092     cout<<"Start train StandardAlgorithm"<<endl;
00093 
00094     init();
00095 
00096     modelInit();
00097 
00098     double rmse = m_blendStop->calcBlending();
00099     cout<<endl<<"ERR Blend:"<<rmse<<endl;
00100 
00101     cout<<endl<<"============================ START TRAIN (param tuning) ============================="<<endl<<endl;
00102     cout<<"Parameters to tune:"<<endl;
00103 
00104     // automatically tune parameters
00105     m_maxSwing = m_initMaxSwing;
00106     for ( int i=0;i<paramEpochValues.size();i++ )
00107     {
00108         addEpochParameter ( paramEpochValues[i], paramEpochNames[i] );
00109         cout<<"[EPOCH] name:"<<paramEpochNames[i]<<"   initValue:"<<*paramEpochValues[i]<<endl;
00110     }
00111     for ( int i=0;i<paramDoubleValues.size();i++ )
00112     {
00113         addDoubleParameter ( paramDoubleValues[i], paramDoubleNames[i] );
00114         cout<<"[REAL] name:"<<paramDoubleNames[i]<<"   initValue:"<<*paramDoubleValues[i]<<endl;
00115     }
00116     for ( int i=0;i<paramIntValues.size();i++ )
00117     {
00118         addIntegerParameter ( paramIntValues[i], paramIntNames[i] );
00119         cout<<"[INT]  name:"<<paramIntNames[i]<<"   initValue:"<<*paramIntValues[i]<<endl;
00120     }
00121     if ( m_enableTuneSwing )
00122     {
00123         addDoubleParameter ( &m_maxSwing, "swing" );
00124         cout<<"[REAL] name:"<<"swing"<<"   initValue:"<<m_maxSwing<<endl;
00125     }
00126 
00127     // when in multiple optimization loop
00128     if ( m_loadWeightsBeforeTraining )
00129         loadMetaWeights ( m_nCross );
00130 
00131     // start the structured searcher
00132     cout<<"(min|max. epochs: "<<m_minTuninigEpochs<<"|"<<m_maxTuninigEpochs<<")"<<endl;
00133     expSearcher ( m_minTuninigEpochs, m_maxTuninigEpochs, 3, 1, 0.8, m_minimzeProbe, m_minimzeBlend );
00134 
00135     // remove the parameters from the searchers
00136     for ( int i=0;i<paramEpochValues.size();i++ )
00137         removeEpochParameter ( paramEpochNames[i] );
00138     for ( int i=0;i<paramDoubleValues.size();i++ )
00139         removeDoubleParameter ( paramDoubleNames[i] );
00140     for ( int i=0;i<paramIntValues.size();i++ )
00141         removeIntegerParameter ( paramIntNames[i] );
00142     if ( m_enableTuneSwing )
00143         removeDoubleParameter ( "swing" );
00144 
00145     paramEpochValues.clear();
00146     paramEpochNames.clear();
00147     paramDoubleValues.clear();
00148     paramDoubleNames.clear();
00149     paramIntValues.clear();
00150     paramIntNames.clear();
00151 
00152 
00153     cout<<endl<<"============================ END auto-optimize ============================="<<endl<<endl;
00154 
00155     // calculate all train targets with cross validation
00156     calculateFullPrediction();
00157 
00158     return expSearchGetLowestError();
00159 }
00160 
00165 void StandardAlgorithm::readMaps()
00166 {
00167     cout<<"Read dsc maps (standard values)"<<endl;
00168     m_minTuninigEpochs = m_intMap["minTuninigEpochs"];
00169     m_maxTuninigEpochs = m_intMap["maxTuninigEpochs"];
00170     m_initMaxSwing = m_doubleMap["initMaxSwing"];
00171     m_enableClipping = m_boolMap["enableClipping"];
00172     m_enableTuneSwing = m_boolMap["enableTuneSwing"];
00173     m_minimzeProbe = m_boolMap["minimzeProbe"];
00174     m_minimzeProbeClassificationError = m_boolMap["minimzeProbeClassificationError"];
00175     m_minimzeBlend = m_boolMap["minimzeBlend"];
00176     m_minimzeBlendClassificationError = m_boolMap["minimzeBlendClassificationError"];
00177     m_weightFile = m_stringMap["weightFile"];
00178     m_fullPrediction = m_stringMap["fullPrediction"];
00179 }
00180 
00184 void StandardAlgorithm::init()
00185 {
00186     cout<<"Init standard algorithm"<<endl;
00187 
00188     // read standard and specific values
00189     readMaps();
00190     readSpecificMaps();
00191 
00192     if ( m_blendStop == 0 )
00193     {
00194         // init blendStop data
00195         m_blendStop = new BlendStopping ( this, "tune" );
00196         m_blendStop->setRegularization ( m_blendingRegularization );
00197 
00198         m_wrongLabelCnt = new int[m_nDomain];
00199         m_singlePrediction = new REAL[m_nClass * m_nDomain];
00200 
00201         if(m_validationType == "ValidationSet")
00202         {
00203             m_prediction = new REAL[m_validSize * m_nClass * m_nDomain];
00204             m_predictionBest = new REAL[m_validSize * m_nClass * m_nDomain];
00205             m_labelPrediction = new int[m_validSize * m_nDomain];
00206             return;
00207         }
00208         
00209         // prediction on trainset for all classes
00210         m_prediction = new REAL[m_nTrain * m_nClass * m_nDomain];
00211         m_predictionBest = new REAL[m_nTrain * m_nClass * m_nDomain];
00212         m_predictionProbe = new REAL*[m_maxThreadsInCross];
00213         for ( int i=0;i<m_maxThreadsInCross;i++ )
00214             m_predictionProbe[i] = new REAL[m_nTrain * m_nClass * m_nDomain];
00215         m_labelPrediction = new int[m_nTrain * m_nDomain];
00216         
00217         // cross validation training
00218         m_crossValidationPrediction = new REAL[m_nTrain*m_nClass*m_nDomain];
00219 
00220         if(m_validationType == "Bagging")
00221         {
00222             m_outOfBagEstimate = new REAL[m_nTrain * m_nClass * m_nDomain];
00223             m_outOfBagEstimateCnt = new int[m_nTrain];
00224         }
00225     }
00226 }
00227 
00233 double StandardAlgorithm::calcRMSEonProbe()
00234 {
00235     double rmse = 0.0, mae = 0.0;
00236     int nThreads = m_maxThreadsInCross;  // get #available threads
00237     for ( int d=0;d<m_nDomain;d++ )
00238         m_wrongLabelCnt[d] = 0;
00239 
00240     if(m_validationType == "ValidationSet")
00241     {
00242         // train the model on the train set
00243         modelUpdate ( m_trainOrig, m_trainTargetOrig, m_nTrain, 0 );
00244         
00245         // predict the validation set
00246         REAL* effect = new REAL[m_validSize * m_nClass * m_nDomain];
00247         for(uint i=0;i<m_validSize * m_nClass * m_nDomain;i++)
00248             effect[i] = 0.0;
00249         predictMultipleOutputs ( m_valid, effect, m_prediction, m_labelPrediction, m_validSize, 0 );
00250         delete[] effect;
00251         
00252         // calculate the error on the validation set
00253         for ( int i=0;i<m_validSize;i++)
00254         {
00255             // copy to blending vector and to internal prediction
00256             for ( int j=0;j<m_nClass*m_nDomain;j++ )
00257             {
00258                 REAL prediction = m_prediction[i*m_nClass*m_nDomain + j];
00259                 m_blendStop->m_newPrediction[j][i] = prediction;
00260                 m_prediction[i*m_nClass*m_nDomain + j] = prediction;
00261                 
00262                 rmse += ( prediction - m_validTarget[i*m_nClass*m_nDomain + j] ) * ( prediction - m_validTarget[i*m_nClass*m_nDomain + j] );
00263                 mae += fabs ( prediction - m_validTarget[i*m_nClass*m_nDomain + j] );
00264             }
00265             
00266             // count wrong labeled examples
00267             if ( Framework::getDatasetType() )
00268             {
00269                 for ( int d=0;d<m_nDomain;d++ )
00270                     if ( m_labelPrediction[d+i*m_nDomain] != m_validLabel[d+i*m_nDomain] )
00271                         m_wrongLabelCnt[d]++;
00272             }
00273         }
00274         
00275         // print the classification error rate
00276         double classificationError = 1.0;
00277         if ( Framework::getDatasetType() )
00278         {
00279             int nWrong = 0;
00280             for ( int d=0;d<m_nDomain;d++ )
00281             {
00282                 nWrong += m_wrongLabelCnt[d];
00283                 //if(m_nDomain > 1)
00284                 //    cout<<"["<<(double)m_wrongLabelCnt[d]/(double)m_validSize<<"] ";
00285             }
00286             classificationError = ( double ) nWrong/ ( ( double ) m_validSize*m_nDomain );
00287             cout<<" [classErr:"<<100.0*classificationError<<"%] ";
00288         }
00289         if ( m_minimzeProbeClassificationError )
00290             return classificationError;
00291 
00292         rmse = sqrt ( rmse/ ( ( double ) m_validSize * ( double ) m_nClass * ( double ) m_nDomain ) );
00293         mae = mae/ ( ( double ) m_validSize * ( double ) m_nClass * ( double ) m_nDomain );
00294 
00295         if ( m_errorFunction=="MAE" )
00296             return mae;
00297         else if ( m_errorFunction=="AUC" && Framework::getDatasetType() )
00298         {
00299             cout<<"[rmse:"<<rmse<<"]"<<flush;
00300 
00301             // calc area under curve
00302             REAL* tmp = new REAL[m_validSize*m_nClass*m_nDomain];
00303             for ( int i=0;i<m_validSize;i++ )
00304                 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00305                     tmp[i + j*m_validSize] = m_prediction[j + i*m_nClass*m_nDomain];
00306             REAL auc = getAUC ( tmp, m_validLabel, m_nClass, m_nDomain, m_validSize );
00307             delete[] tmp;
00308             return auc;
00309         }
00310 
00311         return rmse;
00312     }
00313     
00314     if(m_validationType == "Bagging")
00315     {
00316         for(int i=0;i<m_nTrain * m_nClass * m_nDomain;i++)
00317             m_outOfBagEstimate[i] = 0.0;
00318         for(int i=0;i<m_nTrain;i++)
00319             m_outOfBagEstimateCnt[i] = 0;
00320         for(int i=0;i<m_nTrain;i++)
00321             for(int j=0;j<m_nClass * m_nDomain;j++)
00322                 m_prediction[i*m_nClass*m_nDomain+j] = m_targetMean[j];  // some of the out-of-bag estimates are not predicted
00323     }
00324     
00325     for ( int i=0;i<m_nCross;i+=nThreads ) // all cross validation sets
00326     {
00327         // predict the probeset
00328         int* nSamples = new int[nThreads];
00329         int** labels = new int*[nThreads];
00330         for ( int j=0;j<nThreads;j++ )
00331         {
00332             nSamples[j] = m_probeSize[i+j];
00333             labels[j] = new int[nSamples[j]*m_nDomain];
00334         }
00335 
00336         if ( nThreads > 1 )
00337         {
00338             // parallel training of the cross-validation sets with OPENMP
00339 #pragma omp parallel for
00340             for ( int t=0;t<nThreads;t++ )
00341             {
00342                 cout<<"."<<flush;
00343                 if ( m_enableSaveMemory )
00344                     fillNCrossValidationSet ( i+t );
00345                 modelUpdate ( m_train[i+t], m_trainTargetResidual[i+t], m_trainSize[i+t], i+t );
00346                 predictMultipleOutputs ( m_probe[i+t], m_probeTargetEffect[i+t], m_predictionProbe[t], labels[t], nSamples[t], i+t );
00347                 if ( m_enableSaveMemory )
00348                     freeNCrossValidationSet ( i+t );
00349             }
00350         }
00351         else
00352         {
00353             cout<<"."<<flush;
00354             if ( m_enableSaveMemory )
00355                 fillNCrossValidationSet ( i );
00356             modelUpdate ( m_train[i], m_trainTargetResidual[i], m_trainSize[i], i );
00357             predictMultipleOutputs ( m_probe[i], m_probeTargetEffect[i], m_predictionProbe[0], labels[0], nSamples[0], i );
00358             if ( m_enableSaveMemory )
00359                 freeNCrossValidationSet ( i );
00360         }
00361 
00362         // merge the probe predictions
00363         if(m_validationType == "Bagging")
00364         {
00365             for ( int thread=0;thread<nThreads;thread++ )
00366             {
00367                 for ( int j=0;j<nSamples[thread];j++ )
00368                 {
00369                     int idx = m_probeIndex[i+thread][j];
00370                     for(int k=0;k<m_nClass * m_nDomain;k++)
00371                     {
00372                         REAL prediction = m_predictionProbe[thread][j*m_nClass*m_nDomain + k];
00373                         m_outOfBagEstimate[idx*m_nClass*m_nDomain + k] += prediction;
00374                     }
00375                     m_outOfBagEstimateCnt[idx]++;
00376                 }
00377             }
00378         }
00379         else
00380         {
00381             for ( int thread=0;thread<nThreads;thread++ )
00382             {
00383                 for ( int j=0;j<nSamples[thread];j++ ) // for all samples in this set
00384                 {
00385                     int idx = m_probeIndex[i+thread][j];
00386     
00387                     // copy to blending vector and to internal prediction
00388                     for ( int k=0;k<m_nClass*m_nDomain;k++ )
00389                     {
00390                         REAL prediction = m_predictionProbe[thread][j*m_nClass*m_nDomain + k];
00391                         m_blendStop->m_newPrediction[k][idx] = prediction;
00392                         m_prediction[idx*m_nClass*m_nDomain + k] = prediction;
00393                         rmse += ( prediction - m_probeTarget[i+thread][m_nClass*m_nDomain*j + k] ) * ( prediction - m_probeTarget[i+thread][m_nClass*m_nDomain*j + k] );
00394                         mae += fabs ( prediction - m_probeTarget[i+thread][m_nClass*m_nDomain*j + k] );
00395                     }
00396     
00397                     // count wrong labeled examples
00398                     if ( Framework::getDatasetType() )
00399                     {
00400                         for ( int d=0;d<m_nDomain;d++ )
00401                             if ( labels[thread][d+j*m_nDomain] != m_probeLabel[i+thread][d+j*m_nDomain] )
00402                                 m_wrongLabelCnt[d]++;
00403                     }
00404                 }
00405             }
00406         }
00407         
00408         // free memory
00409         for ( int j=0;j<nThreads;j++ )
00410         {
00411             if ( labels[j] )
00412                 delete[] labels[j];
00413             labels[j] = 0;
00414         }
00415         if ( nSamples )
00416             delete[] nSamples;
00417         nSamples = 0;
00418         if ( labels )
00419             delete[] labels;
00420         labels = 0;
00421 
00422     }
00423 
00424     if(m_validationType == "Bagging")
00425     {
00426         for(int i=0;i<m_nTrain;i++)
00427         {
00428             int c = m_outOfBagEstimateCnt[i];
00429             for(int j=0;j<m_nClass*m_nDomain;j++)
00430                 m_prediction[i*m_nClass*m_nDomain+j] = (c==0 ? m_targetMean[j] : (m_outOfBagEstimate[i*m_nClass*m_nDomain+j] / (REAL)c));
00431             
00432             for(int j=0;j<m_nClass*m_nDomain;j++)
00433             {
00434                 REAL prediction = m_prediction[i*m_nClass*m_nDomain+j];
00435                 m_blendStop->m_newPrediction[j][i] = prediction;
00436                 rmse += ( prediction - m_trainTargetOrig[m_nClass*m_nDomain*i + j] ) * ( prediction - m_trainTargetOrig[m_nClass*m_nDomain*i + j] );
00437                 mae += fabs ( prediction - m_trainTargetOrig[m_nClass*m_nDomain*i + j] );
00438             }
00439             
00440              // count wrong labeled examples
00441             if ( Framework::getDatasetType() )
00442             {
00443                 for(int j=0;j<m_nDomain;j++)
00444                 {
00445                     int indBest = -1;
00446                     REAL max = -1e10;
00447                     for(int k=0;k<m_nClass;k++)
00448                     {
00449                         if(max < m_prediction[i*m_nClass + j*m_nClass + k])
00450                         {
00451                             max = m_prediction[i*m_nClass + j*m_nClass + k];
00452                             indBest = k;
00453                         }
00454                     }
00455                     if(indBest != m_trainLabelOrig[i+j*m_nClass])
00456                         m_wrongLabelCnt[j]++;
00457                 }
00458             }
00459         }
00460     }
00461     
00462     // print the classification error rate
00463     double classificationError = 1.0;
00464     if ( Framework::getDatasetType() )
00465     {
00466         int nWrong = 0;
00467         for ( int d=0;d<m_nDomain;d++ )
00468         {
00469             nWrong += m_wrongLabelCnt[d];
00470             //if(m_nDomain > 1)
00471             //    cout<<"["<<(double)m_wrongLabelCnt[d]/(double)m_nTrain<<"] ";
00472         }
00473         classificationError = ( double ) nWrong/ ( ( double ) m_nTrain*m_nDomain );
00474         cout<<" [classErr:"<<100.0*classificationError<<"%] ";
00475     }
00476     if ( m_minimzeProbeClassificationError )
00477         return classificationError;
00478 
00479     rmse = sqrt ( rmse/ ( ( double ) m_nTrain * ( double ) m_nClass * ( double ) m_nDomain ) );
00480     mae = mae/ ( ( double ) m_nTrain * ( double ) m_nClass * ( double ) m_nDomain );
00481 
00482     if ( m_errorFunction=="MAE" )
00483         return mae;
00484     else if ( m_errorFunction=="AUC" && Framework::getDatasetType() )
00485     {
00486         cout<<"[rmse:"<<rmse<<"]"<<flush;
00487 
00488         // calc area under curve
00489         REAL* tmp = new REAL[m_nTrain*m_nClass*m_nDomain];
00490         for ( int i=0;i<m_nTrain;i++ )
00491             for ( int j=0;j<m_nClass*m_nDomain;j++ )
00492                 tmp[i + j*m_nTrain] = m_prediction[j + i*m_nClass*m_nDomain];
00493         REAL auc = getAUC ( tmp, m_trainLabelOrig, m_nClass, m_nDomain, m_nTrain );
00494         delete[] tmp;
00495         return auc;
00496     }
00497 
00498     return rmse;
00499 }
00500 
00505 void StandardAlgorithm::saveBestPrediction()
00506 {
00507     cout<<"[saveBest]";
00508     memcpy ( m_predictionBest, m_prediction, sizeof ( REAL ) * (m_validationType == "ValidationSet" ? m_validSize : m_nTrain) * m_nClass * m_nDomain );
00509     m_blendStop->saveTmpBestWeights();
00510     if(m_validationType == "ValidationSet")
00511         saveWeights(0);
00512     else
00513     {
00514         if ( m_validationType == "CrossFoldMean" || m_validationType == "Bagging" )
00515             for ( int i=0;i<m_nCross;i++ )
00516                 saveWeights ( i );
00517     }
00518 }
00519 
00523 double StandardAlgorithm::calcRMSEonBlend()
00524 {
00525     double rmse = calcRMSEonProbe();
00526     cout<<" [probe:"<<rmse<<"] ";
00527     double rmseBlend = m_blendStop->calcBlending();
00528     if ( m_minimzeBlendClassificationError )
00529         return m_blendStop->getClassificationError();
00530     return rmseBlend;
00531 }
00532 
00537 void StandardAlgorithm::setPredictionMode ( int cross )
00538 {
00539     cout<<"Set algorithm in prediction mode"<<endl;
00540     readMaps();
00541     readSpecificMaps();
00542     loadWeights ( cross );
00543 }
00544 
00554 void StandardAlgorithm::predictMultipleOutputs ( REAL* rawInput, REAL* effect, REAL* output, int* labels, int nSamples, int crossRun )
00555 {
00556     // model prediction
00557     predictAllOutputs ( rawInput, output, nSamples, crossRun );
00558 
00559     if ( m_enableTuneSwing ) // clip the output (clip swing)
00560     {
00561         IPPS_THRESHOLD ( output, output, nSamples*m_nClass*m_nDomain, -m_maxSwing, ippCmpLess );  // clip negative
00562         IPPS_THRESHOLD ( output, output, nSamples*m_nClass*m_nDomain, +m_maxSwing, ippCmpGreater );  // clip positive
00563     }
00564 
00565     // add the output from the preprocessor (=effect)
00566     V_ADD ( nSamples*m_nClass*m_nDomain, output, effect, output );
00567 
00568     // calc output labels (for classification dataset)
00569     if ( Framework::getDatasetType() )
00570     {
00571         // in all domains
00572         for ( int d=0;d<m_nDomain;d++ )
00573         {
00574             // calc output labels
00575             for ( int i=0;i<nSamples;i++ )
00576             {
00577                 // find max. output value
00578                 int indMax = -1;
00579                 REAL max = -1e10;
00580                 for ( int j=0;j<m_nClass;j++ )
00581                 {
00582                     if ( max < output[d*m_nClass + i*m_nDomain*m_nClass + j] )
00583                     {
00584                         max = output[d*m_nClass + i*m_nDomain*m_nClass + j];
00585                         indMax = j;
00586                     }
00587                 }
00588                 labels[d+i*m_nDomain] = indMax;
00589             }
00590         }
00591     }
00592 
00593     // clip final outputs
00594     if ( m_enableClipping )
00595     {
00596         IPPS_THRESHOLD ( output, output, nSamples*m_nClass*m_nDomain, m_negativeTarget, ippCmpLess );  // clip negative
00597         IPPS_THRESHOLD ( output, output, nSamples*m_nClass*m_nDomain, m_positiveTarget, ippCmpGreater );  // clip positive
00598     }
00599 
00600     // add small noise
00601     if ( m_addOutputNoise > 0.0 )
00602         for ( int i=0;i<nSamples*m_nClass*m_nDomain;i++ )
00603             output[i] += NumericalTools::getNormRandomNumber ( 0.0, m_addOutputNoise );
00604 
00605 }
00606 
00611 void StandardAlgorithm::writeFullPrediction(int nSamples)
00612 {
00613     // calc train RMSE
00614     double rmse = 0.0, err;
00615     for ( int i=0;i<nSamples;i++ )
00616     {
00617         for ( int j=0;j<m_nClass*m_nDomain;j++ )
00618         {
00619             err = m_prediction[j+i*m_nClass*m_nDomain] - m_trainTargetOrig[j+i*m_nClass*m_nDomain];
00620             rmse += err*err;
00621         }
00622     }
00623 
00624     // write file
00625     string name = m_datasetPath + "/" + m_fullPredPath + "/" + m_fullPrediction;
00626     cout<<"Write full prediction: "<<name<<" (RMSE:"<<sqrt ( rmse/ ( double ) ( nSamples*m_nClass*m_nDomain ) ) <<")";
00627     fstream f;
00628     f.open ( name.c_str(),ios::out );
00629     f.write ( ( char* ) m_prediction, sizeof ( REAL ) *nSamples*m_nClass*m_nDomain );
00630     f.close();
00631     cout<<endl;
00632 }
00633 
00641 void StandardAlgorithm::calculateFullPrediction()
00642 {
00643     double rmse = 0.0;
00644     cout<<endl<<"Calculate FullPrediction (write the prediction of the trainingset with cross validation)"<<endl<<endl;
00645 
00646     // re-calculate the blending weights (necessary for minimize only probe)
00647     if ( m_minimzeProbe )
00648     {
00649         double rmseBlend = m_blendStop->calcBlending();
00650         m_blendStop->saveTmpBestWeights();
00651         cout<<"rmseBlend:"<<rmseBlend<<endl;
00652     }
00653 
00654     // save linear blending weights
00655     m_blendStop->saveBlendingWeights ( m_datasetPath + "/" + m_tempPath, true );
00656     cout<<endl;
00657 
00658     memcpy ( m_prediction, m_predictionBest, sizeof ( REAL ) * (m_validationType == "ValidationSet"?m_validSize:m_nTrain) * m_nClass * m_nDomain );
00659     writeFullPrediction(m_validationType == "ValidationSet"?m_validSize:m_nTrain);
00660 
00661     m_inRetraining = true;
00662 
00663     if ( m_validationType == "Retraining" )
00664     {
00665         cout<<"Validation type: Retraining"<<endl;
00666         cout<<"Update model on whole training set"<<endl<<endl;
00667         time_t retrainTime = time ( 0 );
00668 
00669         // retrain the model with whole trainingset (disable cross validation)
00670         if ( m_enableSaveMemory )
00671             fillNCrossValidationSet ( m_nCross );
00672 
00673         // tmp variables, used for bagging
00674         REAL* trainOrig = 0;
00675         REAL* targetOrig = 0;
00676         REAL* targetEffectOrig = 0;
00677         REAL* targetResidualOrig = 0;
00678         int* labelOrig = 0;
00679 
00680         if ( m_enableBagging )
00681         {
00682             cout<<"Save orig data, create boostrap sample for retraining"<<endl;
00683             trainOrig = m_train[m_nCross];
00684             targetOrig = m_trainTarget[m_nCross];
00685             targetEffectOrig = m_trainTargetEffect[m_nCross];
00686             targetResidualOrig = m_trainTargetResidual[m_nCross];
00687             labelOrig = m_trainLabel[m_nCross];
00688             doBootstrapSampling ( 0, m_train[m_nCross], m_trainTarget[m_nCross], m_trainTargetEffect[m_nCross], m_trainTargetResidual[m_nCross], m_trainLabel[m_nCross] ); // bootstrap sample
00689             //doBootstrapSampling(0, m_train[m_nCross], m_trainTarget[m_nCross], m_trainTargetEffect[m_nCross], m_trainTargetResidual[m_nCross], m_trainLabel[m_nCross], m_nTrain * 0.8);
00690         }
00691 
00692         modelUpdate ( m_train[m_nCross], m_trainTargetResidual[m_nCross], m_nTrain, m_nCross );
00693         saveWeights ( m_nCross );
00694 
00695         // calc retrain rmse
00696         cout<<"Calculate retrain RMSE (on trainset)"<<endl;
00697         rmse = 0.0;
00698         memset ( m_prediction, 0, sizeof ( REAL ) *m_nTrain*m_nClass*m_nDomain );
00699         predictMultipleOutputs ( m_train[m_nCross], m_trainTargetEffect[m_nCross], m_prediction, m_labelPrediction, m_nTrain, m_nCross );
00700         for ( int i=0;i<m_nTrain*m_nClass*m_nDomain;i++ )
00701             rmse += ( m_prediction[i] - m_trainTarget[m_nCross][i] ) * ( m_prediction[i] - m_trainTarget[m_nCross][i] );
00702         rmse = sqrt ( rmse/ ( double ) ( m_nTrain * m_nClass * m_nDomain ) );
00703         cout<<"Train of this algorithm (RMSE after retraining): "<<rmse<<endl;
00704 
00705         if ( m_enableBagging )
00706         {
00707             cout<<"Restore orig data"<<endl;
00708             if ( m_train[m_nCross] )
00709                 delete[] m_train[m_nCross];
00710             if ( m_trainTarget[m_nCross] )
00711                 delete[] m_trainTarget[m_nCross];
00712             if ( m_trainTargetEffect[m_nCross] )
00713                 delete[] m_trainTargetEffect[m_nCross];
00714             if ( m_trainTargetResidual[m_nCross] )
00715                 delete[] m_trainTargetResidual[m_nCross];
00716             if ( m_trainLabel[m_nCross] )
00717                 delete[] m_trainLabel[m_nCross];
00718             m_train[m_nCross] = trainOrig;
00719             m_trainTarget[m_nCross] = targetOrig;
00720             m_trainTargetEffect[m_nCross] = targetEffectOrig;
00721             m_trainTargetResidual[m_nCross] = targetResidualOrig;
00722             m_trainLabel[m_nCross] = labelOrig;
00723         }
00724 
00725         if ( m_enableSaveMemory )
00726             freeNCrossValidationSet ( m_nCross );
00727 
00728         cout<<"Total retrain time:"<<time ( 0 )-retrainTime<<"[s]"<<endl;
00729     }
00730     /*else if(m_validationType == "CrossFoldMean" || m_validationType == "Bagging")
00731     {
00732         cout<<"Validation type: "<<m_validationType<<endl;
00733         for(int i=0;i<m_nCross;i++)
00734             saveWeights(i);
00735     }
00736     else
00737         assert(false);
00738     */
00739 
00740     // print summary
00741     cout<<endl<<"==========================================================================="<<endl;
00742     BlendStopping bb ( this, m_fullPrediction );
00743     bb.setRegularization ( m_blendingRegularization );
00744     if ( m_datasetName=="NETFLIX" && Framework::getAdditionalStartupParameter() >= 0 )
00745     {
00746         cout<<"Dataset:NETFLIX, slot:"<<Framework::getAdditionalStartupParameter() <<" ";
00747         char buf[512];
00748         sprintf ( buf,"p%d",Framework::getAdditionalStartupParameter() );
00749         string pName = string ( NETFLIX_SLOTDATA_ROOT_DIR ) + buf + "/trainPrediction.data";
00750         cout<<"pName:"<<pName<<endl;
00751         rmse = bb.calcBlending ( ( char* ) pName.c_str() );
00752     }
00753     else
00754         rmse = bb.calcBlending ( ( char* ) ( m_datasetPath + "/" + m_tempPath + "/trainPrediction.data" ).c_str() );
00755     bb.saveBlendingWeights ( m_datasetPath + "/" + m_tempPath );
00756     cout<<endl<<"BLEND RMSE OF ACTUAL FULLPREDICTION PATH:"<<rmse<<endl;
00757     cout<<"==========================================================================="<<endl<<endl;
00758 
00759 }

Generated on Tue Jan 26 09:20:59 2010 for ELF by  doxygen 1.5.8