00001 #include "BlendStopping.h"
00002
00003 extern StreamOutput cout;
00004
00008 BlendStopping::BlendStopping()
00009 {
00010 cout<<"Constructor BlendStopping"<<endl;
00011
00012
00013 m_newPrediction = 0;
00014 m_nPredictors = 0;
00015 m_nClass = 0;
00016 m_nDomain = 0;
00017 m_nTrain = 0;
00018 m_blendingRegularization = 0;
00019 m_classificationError = 0;
00020 m_singlePrediction = 0;
00021 m_prediction = 0;
00022 m_A = 0;
00023 m_b = 0;
00024 m_x = 0;
00025 m_xBest = 0;
00026 m_data = 0;
00027 m_paramsAUC = 0;
00028 m_optimizeRMSE = 0;
00029 m_globalWeights = 0;
00030 }
00031
00037 BlendStopping::BlendStopping ( Algorithm *data )
00038 {
00039 cout<<"Constructor BlendStopping"<<endl;
00040
00041 m_newPrediction = 0;
00042 m_nPredictors = 0;
00043 m_nClass = 0;
00044 m_nDomain = 0;
00045 m_nTrain = 0;
00046 m_blendingRegularization = 0;
00047 m_classificationError = 0;
00048 m_singlePrediction = 0;
00049 m_prediction = 0;
00050 m_A = 0;
00051 m_b = 0;
00052 m_x = 0;
00053 m_xBest = 0;
00054 m_data = 0;
00055 m_paramsAUC = 0;
00056 m_optimizeRMSE = 0;
00057
00058 m_data = data;
00059 m_nClass = m_data->m_nClass;
00060 m_nDomain = m_data->m_nDomain;
00061 m_nTrain = m_data->m_nTrain;
00062 m_globalWeights = m_data->m_enableGlobalBlendingWeights;
00063 }
00064
00071 BlendStopping::BlendStopping ( Algorithm *data, string algorithmFullPrediction )
00072 {
00073 cout<<"Constructor BlendStopping"<<endl;
00074
00075 m_newPrediction = 0;
00076 m_nPredictors = 0;
00077 m_nClass = 0;
00078 m_nDomain = 0;
00079 m_nTrain = 0;
00080 m_blendingRegularization = 0;
00081 m_classificationError = 0;
00082 m_singlePrediction = 0;
00083 m_prediction = 0;
00084 m_A = 0;
00085 m_b = 0;
00086 m_x = 0;
00087 m_xBest = 0;
00088 m_data = 0;
00089 m_paramsAUC = 0;
00090 m_optimizeRMSE = 0;
00091
00092 m_data = data;
00093 m_nClass = m_data->m_nClass;
00094 m_nDomain = m_data->m_nDomain;
00095 m_nTrain = m_data->m_validationType=="ValidationSet"? m_data->m_validSize : m_data->m_nTrain;
00096 m_globalWeights = m_data->m_enableGlobalBlendingWeights;
00097
00098 m_algorithmFullPrediction = algorithmFullPrediction;
00099 string directory = m_data->m_datasetPath + "/" + m_data->m_fullPredPath + "/";
00100
00101 vector<string> files = m_data->m_algorithmNameList;
00102
00103
00104 string additionalFullPrediction = m_data->m_datasetPath + "/" + m_data->m_fullPredPath + "/" + m_algorithmFullPrediction;
00105 fstream f ( additionalFullPrediction.c_str(), ios::in );
00106 if ( f.is_open() )
00107 {
00108 cout<<"ADD:"<<additionalFullPrediction<<" ";
00109 files.push_back ( additionalFullPrediction );
00110 m_algorithmFullPrediction = "";
00111 }
00112 f.close();
00113
00114 m_nPredictors = 0;
00115 for ( int i=0;i<files.size();i++ )
00116 {
00117 if ( files[i].at ( files[i].size()-1 ) != '.' && files[i].find ( ".dat" ) == files[i].length()-4 )
00118 {
00119
00120 m_usedFiles.push_back ( files[i] );
00121 m_nPredictors++;
00122 }
00123 }
00124
00125
00126 m_nPredictors++;
00127
00128
00129 if ( m_algorithmFullPrediction=="tune" )
00130 {
00131 m_nPredictors++;
00132 cout<<"Number of predictors for blendStopping: "<<m_nPredictors<<" (+1 const, +1 new)"<<endl;
00133 }
00134 else
00135 cout<<"Number of predictors for blendStopping: "<<m_nPredictors<<" (+1 const)"<<endl;
00136
00137
00138
00139 m_singlePrediction = new REAL[m_nTrain];
00140 m_prediction = new REAL[m_nTrain*m_nClass*m_nDomain];
00141 m_A = new REAL*[m_nClass*m_nDomain];
00142 m_b = new REAL*[m_nClass*m_nDomain];
00143 m_x = new REAL*[m_nClass*m_nDomain];
00144 m_xBest = new REAL*[m_nClass*m_nDomain];
00145 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00146 {
00147 m_A[i] = new REAL[m_nPredictors * m_nTrain];
00148 m_b[i] = new REAL[m_nTrain];
00149 m_x[i] = new REAL[m_nPredictors];
00150 m_xBest[i] = new REAL[m_nPredictors];
00151 for ( int j=0;j<m_nPredictors;j++ )
00152 {
00153 m_x[i][j] = 0.0;
00154 m_xBest[i][j] = 0.0;
00155 }
00156 }
00157
00158
00159 for ( int i=0;i<m_nTrain;i++ )
00160 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00161 {
00162 REAL t = m_data->m_trainTargetOrig[i*m_nClass*m_nDomain + j];
00163 if(m_data->m_validationType == "ValidationSet")
00164 t = m_data->m_validTarget[i*m_nClass*m_nDomain + j];
00165
00166 m_b[j][i] = t;
00167 }
00168
00169
00170 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00171 for ( int j=0;j<m_nTrain;j++ )
00172 m_A[i][j] = 1.0;
00173
00174
00175 cout<<endl;
00176 for ( int i=0;i<m_usedFiles.size();i++ )
00177 {
00178 fstream f ( m_usedFiles[i].c_str(), ios::in );
00179 if ( f.is_open() == false )
00180 {
00181 cout<<"Cannot open:"<<m_usedFiles[i]<<endl;
00182 assert ( false );
00183 }
00184 REAL* cache0 = new REAL[m_nTrain*m_nClass*m_nDomain];
00185 f.read ( ( char* ) cache0, sizeof ( REAL ) *m_nTrain*m_nClass*m_nDomain );
00186 f.close();
00187 double rmse0 = 0.0, err;
00188 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00189 {
00190 for ( int k=0;k<m_nTrain;k++ )
00191 {
00192 m_A[j][k + ( i+1 ) *m_nTrain] = cache0[j + k*m_nClass*m_nDomain];
00193 err = m_A[j][k + ( i+1 ) *m_nTrain] - m_b[j][k];
00194 rmse0 += err * err;
00195 }
00196 }
00197 cout<<"File:"<<m_usedFiles[i]<<" RMSE:"<<sqrt ( rmse0/ ( double ) ( m_nClass*m_nTrain*m_nDomain ) ) <<endl;
00198 if ( cache0 )
00199 delete[] cache0;
00200 cache0 = 0;
00201 }
00202 cout<<endl;
00203
00204
00205 if ( m_algorithmFullPrediction=="tune" )
00206 {
00207 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00208 for ( int j=0;j<m_nTrain;j++ )
00209 m_A[i][ ( m_nPredictors - 1 ) *m_nTrain + j] = 0.0;
00210
00211
00212 m_newPrediction = new REAL*[m_nClass*m_nDomain];
00213 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00214 m_newPrediction[i] = m_A[i] + ( m_nPredictors - 1 ) *m_nTrain;
00215 }
00216
00217 if ( m_data->m_errorFunction=="AUC" )
00218 m_paramsAUC = new double[m_nClass*m_nPredictors*m_nDomain];
00219 }
00220
00224 BlendStopping::~BlendStopping()
00225 {
00226 cout<<"destructor BlendStopping"<<endl;
00227 if ( m_singlePrediction )
00228 delete[] m_singlePrediction;
00229 m_singlePrediction = 0;
00230 if ( m_prediction )
00231 delete[] m_prediction;
00232 m_prediction = 0;
00233 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00234 {
00235 if ( m_A )
00236 {
00237 if ( m_A[i] )
00238 delete[] m_A[i];
00239 m_A[i] = 0;
00240 }
00241 if ( m_b )
00242 {
00243 if ( m_b[i] )
00244 delete[] m_b[i];
00245 m_b[i] = 0;
00246 }
00247 if ( m_x )
00248 {
00249 if ( m_x[i] )
00250 delete[] m_x[i];
00251 m_x[i] = 0;
00252 }
00253 if ( m_xBest )
00254 {
00255 if ( m_xBest[i] )
00256 delete[] m_xBest[i];
00257 m_xBest[i] = 0;
00258 }
00259 }
00260 if ( m_A )
00261 delete[] m_A;
00262 m_A = 0;
00263 if ( m_b )
00264 delete[] m_b;
00265 m_b = 0;
00266 if ( m_x )
00267 delete[] m_x;
00268 m_x = 0;
00269 if ( m_xBest )
00270 delete[] m_xBest;
00271 m_xBest = 0;
00272 if ( m_newPrediction )
00273 delete[] m_newPrediction;
00274 m_newPrediction = 0;
00275 if ( m_paramsAUC )
00276 delete[] m_paramsAUC;
00277 m_paramsAUC = 0;
00278 }
00279
00284 void BlendStopping::saveTmpBestWeights()
00285 {
00286 cout<<"[SB]";
00287 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00288 for ( int j=0;j<m_nPredictors;j++ )
00289 m_xBest[i][j] = m_x[i][j];
00290 }
00291
00298 double BlendStopping::calcBlending ( char* fname )
00299 {
00300 cout<<" [CalcBlend] ";
00301 double rmse = 0.0, mae = 0.0, err;
00302 double* rmseClass = new double[m_nClass*m_nDomain];
00303
00304
00305 if ( m_data->m_blendingEnableCrossValidation == true && m_data->m_blendingAlgorithm != "Average" && m_data->m_blendingAlgorithm != "TakeLast" )
00306 {
00307 if ( m_data->m_errorFunction=="AUC" && Framework::getDatasetType() && m_data->m_blendingAlgorithm=="NelderMead" )
00308 {
00309
00310 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00311 for ( int j=0;j<m_nPredictors;j++ )
00312 m_x[i][j] = m_xBest[i][j] = ( j+1==m_nPredictors ) ? 0.1 : 1.0;
00313
00314 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00315 {
00316 for ( int j=0;j<m_nPredictors;j++ )
00317 {
00318 char buf[1024];
00319 sprintf ( buf,"w-t%d-p%d",i,j );
00320 m_paramsAUC[i*m_nPredictors+j] = m_x[i][j];
00321 addDoubleParameter ( & ( m_paramsAUC[i*m_nPredictors+j] ), buf, -100.0, 100.0 );
00322 }
00323 }
00324
00325
00326 setDebug ( 0 );
00327
00328
00329 NelderMeadSearch ( 500 );
00330
00331
00332 int cnt = 0;
00333 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00334 {
00335 for ( int j=0;j<m_nPredictors;j++ )
00336 {
00337 char buf[1024];
00338 sprintf ( buf,"w-t%d-p%d",i,j );
00339
00340 m_x[i][j] = m_paramsAUC[i*m_nPredictors+j];
00341 m_xBest[i][j] = m_paramsAUC[i*m_nPredictors+j];
00342 removeDoubleParameter ( buf );
00343 }
00344 }
00345 }
00346 else
00347 {
00348 setDebug ( 0 );
00349 addDoubleParameter ( &m_blendingRegularization, "lambda", 1e-10, 1e3 );
00350
00351
00352 int minTuninigEpochs = 0, maxTuninigEpochs = 30;
00353 m_optimizeRMSE = 1;
00354 expSearcher ( minTuninigEpochs, maxTuninigEpochs, 3, 1, 0.8, true, false );
00355 m_optimizeRMSE = 0;
00356
00357 removeDoubleParameter ( "lambda" );
00358
00359 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00360 for ( int j=0;j<m_nPredictors;j++ )
00361 m_xBest[i][j] = m_x[i][j];
00362
00363 if ( fname == 0 )
00364 {
00365
00366
00367 }
00368 else
00369 cout<<endl<<"Blending-CV-error:"<<m_expSearchErrorBest<<endl;
00370 }
00371 }
00372
00373 cout<<"lambda:"<<m_blendingRegularization<<" ";
00374
00375 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00376 {
00377 rmseClass[i] = 0.0;
00378
00379 if ( m_data->m_blendingAlgorithm=="Average" )
00380 {
00381
00382 m_x[i][0] = 0.0;
00383 REAL normalizer = 1.0 / ( double ) ( m_nPredictors-1 );
00384 for ( int j=1;j<m_nPredictors;j++ )
00385 m_x[i][j] = normalizer;
00386 }
00387 else if ( m_data->m_blendingAlgorithm=="TakeLast" )
00388 {
00389
00390 for ( int j=0;j<m_nPredictors;j++ )
00391 m_x[i][j] = 0.0;
00392 m_x[i][m_nPredictors-1] = 1.0;
00393 }
00394 else if ( m_data->m_blendingAlgorithm=="LinearRegression" )
00395 {
00396 if ( m_globalWeights )
00397 {
00398 REAL* A = new REAL[m_nTrain*m_nPredictors*m_nClass*m_nDomain], *b = new REAL[m_nTrain*m_nClass*m_nDomain];
00399 for ( int j=0;j<m_nPredictors;j++ )
00400 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00401 for ( int l=0;l<m_nTrain;l++ )
00402 A[l + k*m_nTrain + j*m_nTrain*m_nClass*m_nDomain] = m_A[k][l+j*m_nTrain];
00403 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00404 for ( int l=0;l<m_nTrain;l++ )
00405 b[l + k*m_nTrain] = m_b[k][l];
00406 m_numTools.RidgeRegressionMultisolutionSinglecallGELSS ( A, b, m_x[i], m_nTrain*m_nClass*m_nDomain, m_nPredictors, 1, m_blendingRegularization, false );
00407 delete[] A;
00408 delete[] b;
00409 }
00410 else
00411 m_numTools.RidgeRegressionMultisolutionSinglecallGELSS ( m_A[i], m_b[i], m_x[i], m_nTrain, m_nPredictors, 1, m_blendingRegularization, false );
00412 }
00413 else if ( m_data->m_blendingAlgorithm=="LinearRegressionNonNeg" )
00414 {
00415 if ( m_globalWeights )
00416 {
00417 REAL* A = new REAL[m_nTrain*m_nPredictors*m_nClass*m_nDomain], *b = new REAL[m_nTrain*m_nClass*m_nDomain];
00418 for ( int j=0;j<m_nPredictors;j++ )
00419 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00420 for ( int l=0;l<m_nTrain;l++ )
00421 A[l + k*m_nTrain + j*m_nTrain*m_nClass*m_nDomain] = m_A[k][l+j*m_nTrain];
00422 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00423 for ( int l=0;l<m_nTrain;l++ )
00424 b[l + k*m_nTrain] = m_b[k][l];
00425 m_numTools.RidgeRegressionNonNegSinglecall ( A, b, m_x[i], m_nTrain, m_nPredictors, m_blendingRegularization, 1e-10, 1000, false );
00426 delete[] A;
00427 delete[] b;
00428 }
00429 else
00430 m_numTools.RidgeRegressionNonNegSinglecall ( m_A[i], m_b[i], m_x[i], m_nTrain, m_nPredictors, m_blendingRegularization, 1e-10, 1000, false );
00431
00432
00433 REAL sumX = 0.0;
00434 for ( int j=1;j<m_nPredictors;j++ )
00435 sumX += m_x[i][j];
00436 if ( sumX == 0.0 )
00437 {
00438 if ( i > 0 )
00439 for ( int j=0;j<m_nPredictors;j++ )
00440 m_x[i][j] = m_x[i-1][j];
00441 else
00442 {
00443 for ( int j=0;j<m_nPredictors;j++ )
00444 m_x[i][j] = 0.0;
00445 m_x[i][m_nPredictors-1] = 1.0;
00446 }
00447 }
00448 }
00449 else if ( m_data->m_blendingAlgorithm=="NelderMead" )
00450 {
00451
00452
00453
00454 }
00455
00456
00457 CBLAS_GEMM ( CblasColMajor, CblasNoTrans, CblasNoTrans, m_nTrain, 1, m_nPredictors , 1.0, m_A[i], m_nTrain, m_x[i], m_nPredictors, 0.0, m_singlePrediction, m_nTrain );
00458
00459 if ( m_data->m_enablePostBlendClipping )
00460 {
00461 IPPS_THRESHOLD ( m_singlePrediction, m_singlePrediction, m_nTrain, m_data->m_negativeTarget, ippCmpLess );
00462 IPPS_THRESHOLD ( m_singlePrediction, m_singlePrediction, m_nTrain, m_data->m_positiveTarget, ippCmpGreater );
00463 }
00464
00465 memcpy ( ( char* ) ( m_prediction+m_nTrain*i ), m_singlePrediction, sizeof ( REAL ) *m_nTrain );
00466 for ( int j=0;j<m_nTrain;j++ )
00467 {
00468 rmse += ( m_singlePrediction[j] - m_b[i][j] ) * ( m_singlePrediction[j] - m_b[i][j] );
00469 mae += fabs ( m_singlePrediction[j] - m_b[i][j] );
00470 rmseClass[i] += ( m_singlePrediction[j] - m_b[i][j] ) * ( m_singlePrediction[j] - m_b[i][j] );
00471 }
00472 }
00473
00474
00475
00476
00477
00478 if ( fname )
00479 printWeights ( false );
00480
00481 if ( fname )
00482 {
00483 cout<<"[Write train prediction:"<<fname<<"]"<<" nSamples:"<<m_nTrain<<endl;
00484 fstream f ( fname, ios::out );
00485 if ( f.is_open() == false )
00486 assert ( false );
00487 REAL* tmpOut = new REAL[m_nTrain*m_nClass*m_nDomain];
00488 for ( int i=0;i<m_nTrain;i++ )
00489 {
00490 int realIndex = m_data->m_mixDatasetIndices[i];
00491 if(m_data->m_validationType=="ValidationSet")
00492 realIndex = i;
00493 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00494 tmpOut[j*m_nTrain + realIndex] = m_prediction[j*m_nTrain + i];
00495 }
00496 for ( int i=0;i<m_nTrain;i++ )
00497 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00498 {
00499 float predictionSP = tmpOut[j*m_nTrain + i];
00500 f.write ( ( char* ) &predictionSP,sizeof ( float ) );
00501 }
00502 f.close();
00503 if ( tmpOut )
00504 delete[] tmpOut;
00505 tmpOut = 0;
00506 }
00507
00508
00509 if ( Framework::getDatasetType() )
00510 {
00511 int* wrongLabelCnt = new int[m_nDomain];
00512 for ( int d=0;d<m_nDomain;d++ )
00513 wrongLabelCnt[d] = 0;
00514
00515
00516 for ( int d=0;d<m_nDomain;d++ )
00517 {
00518
00519 for ( int i=0;i<m_nTrain;i++ )
00520 {
00521 REAL max = -1e10;
00522 int indBest = -1;
00523 for ( int j=0;j<m_nClass;j++ )
00524 {
00525 if ( max < m_prediction[d*m_nClass*m_nTrain + j*m_nTrain + i] )
00526 {
00527 max = m_prediction[d*m_nClass*m_nTrain + j*m_nTrain + i];
00528 indBest = j;
00529 }
00530 }
00531 if (m_data->m_validationType == "ValidationSet")
00532 {
00533 if ( indBest != m_data->m_validLabel[d + i*m_nDomain] )
00534 wrongLabelCnt[d]++;
00535 }
00536 else
00537 {
00538 if ( indBest != m_data->m_trainLabelOrig[d + i*m_nDomain] )
00539 wrongLabelCnt[d]++;
00540 }
00541 }
00542 }
00543
00544 int nWrong = 0;
00545 for ( int d=0;d<m_nDomain;d++ )
00546 {
00547 nWrong += wrongLabelCnt[d];
00548
00549
00550 }
00551 m_classificationError = ( double ) nWrong/ ( ( double ) m_nTrain*m_nDomain );
00552 cout<<" [classErr:"<<100.0*m_classificationError<<"%] ";
00553
00554 delete[] wrongLabelCnt;
00555 }
00556
00557 if ( rmseClass )
00558 delete[] rmseClass;
00559 rmseClass = 0;
00560
00561 rmse = sqrt ( rmse/ ( ( double ) m_nTrain * ( double ) m_nClass * ( double ) m_nDomain ) );
00562 mae = mae/ ( ( double ) m_nTrain * ( double ) m_nClass * ( double ) m_nDomain );
00563
00564 if ( m_data->m_errorFunction=="AUC" && Framework::getDatasetType() )
00565 {
00566 cout<<"[rmse:"<<rmse<<"] ";
00567
00568
00569 REAL auc = getAUC ( m_prediction, m_data->m_trainLabelOrig, m_data->m_nClass, m_data->m_nDomain, m_data->m_nTrain );
00570 return auc;
00571 }
00572 else if ( m_data->m_errorFunction=="MAE" )
00573 return mae;
00574
00575 return rmse;
00576 }
00577
00583 double BlendStopping::getClassificationError()
00584 {
00585 return m_classificationError;
00586 }
00587
00593 void BlendStopping::saveBlendingWeights ( string path, bool saveBest )
00594 {
00595 printWeights ( saveBest );
00596
00597 char buf[1024];
00598 sprintf ( buf, "blendingWeights_%02d.dat", m_nPredictors );
00599 string name = path + "/" + string ( buf );
00600 cout<<"Save blending weights: "<<name<<endl;
00601 fstream f ( name.c_str(), ios::out );
00602 f.write ( ( char* ) &m_nClass, sizeof ( int ) );
00603 f.write ( ( char* ) &m_nDomain, sizeof ( int ) );
00604 f.write ( ( char* ) &m_nPredictors, sizeof ( int ) );
00605
00606
00607 REAL** xPtr = (saveBest==true?m_xBest:m_x);
00608 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00609 f.write ( ( char* ) xPtr[i], sizeof ( REAL ) *m_nPredictors );
00610
00611 f.close();
00612 }
00613
00620 void BlendStopping::loadBlendingWeights ( string path, int nPredictors )
00621 {
00622 char buf[1024];
00623 sprintf ( buf, "blendingWeights_%02d.dat", nPredictors );
00624 string name = path + "/" + string ( buf );
00625
00626 cout<<"Load blending weights: "<<name<<endl;
00627 fstream f ( name.c_str(), ios::in );
00628 if ( f.is_open() == false )
00629 assert ( false );
00630 f.read ( ( char* ) &m_nClass, sizeof ( int ) );
00631 f.read ( ( char* ) &m_nDomain, sizeof ( int ) );
00632 f.read ( ( char* ) &m_nPredictors, sizeof ( int ) );
00633
00634
00635 if ( m_x == 0 )
00636 {
00637 m_x = new REAL*[m_nClass*m_nDomain];
00638 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00639 m_x[i] = new REAL[m_nPredictors];
00640 }
00641
00642
00643 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00644 f.read ( ( char* ) m_x[i], sizeof ( REAL ) *m_nPredictors );
00645
00646 printWeights ( false );
00647
00648 f.close();
00649 }
00650
00659 void BlendStopping::predictEnsembleOutput ( REAL** predictions, REAL* output )
00660 {
00661 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00662 output[i] = 0.0;
00663 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00664 {
00665 for ( int j=0;j<m_nPredictors;j++ )
00666 output[i] += m_x[i][j] * predictions[j][i];
00667
00668
00669 if ( m_data->m_enablePostBlendClipping )
00670 output[i] = NumericalTools::clipValue ( output[i], m_data->m_negativeTarget, m_data->m_positiveTarget );
00671 }
00672 }
00673
00678 void BlendStopping::printWeights ( bool printBest )
00679 {
00680 cout<<"Blending weights (row: classes, col: predictors[1.col=const predictor])"<<endl;
00681 REAL** xPtr = printBest==true?m_xBest : m_x;
00682 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00683 {
00684 if ( cout.m_enableOutput )
00685 {
00686 for ( int j=0;j<m_nPredictors;j++ )
00687 printf ( "%1.3f\t",xPtr[i][j] );
00688 printf ( "\n" );
00689 }
00690 }
00691 }
00692
00699 void BlendStopping::setRegularization ( REAL reg )
00700 {
00701 cout<<"Blending regularization: "<<reg<<endl;
00702 m_blendingRegularization = reg;
00703 }
00704
00705
00712 double BlendStopping::calcRMSEonProbe()
00713 {
00714 if ( m_optimizeRMSE )
00715 {
00716 int *randomIndex = new int[m_nTrain];
00717 REAL* tmpA = new REAL[m_nTrain * m_nPredictors];
00718 REAL* tmpB = new REAL[m_nTrain];
00719
00720
00721 for ( int i=0;i<m_nTrain;i++ )
00722 {
00723 if(m_data->m_validationType=="Bagging")
00724 randomIndex[i] = rand() % m_data->m_nCross;
00725 else
00726 randomIndex[i] = m_data->m_crossIndex[i];
00727 }
00728
00729 double rmse = 0.0;
00730 int cnt = 0;
00731
00732 for ( int cross=0;cross<m_data->m_nCross;cross++ )
00733 {
00734 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00735 {
00736
00737 for ( int j=0;j<m_nTrain;j++ )
00738 {
00739 if ( randomIndex[j] == cross )
00740 {
00741 for ( int k=0;k<m_nPredictors;k++ )
00742 {
00743 tmpA[j + k*m_nTrain] = m_A[i][j + k*m_nTrain];
00744 m_A[i][j + k*m_nTrain] = 0.0;
00745 }
00746 tmpB[j] = m_b[i][j];
00747 m_b[i][j] = 0.0;
00748 }
00749 }
00750
00751
00752 if ( m_data->m_blendingAlgorithm=="LinearRegression" || m_data->m_blendingAlgorithm=="NelderMead" )
00753 {
00754 if ( m_globalWeights )
00755 {
00756 REAL* A = new REAL[m_nTrain*m_nPredictors*m_nClass*m_nDomain], *b = new REAL[m_nTrain*m_nClass*m_nDomain];
00757 for ( int j=0;j<m_nPredictors;j++ )
00758 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00759 for ( int l=0;l<m_nTrain;l++ )
00760 A[l + k*m_nTrain + j*m_nTrain*m_nClass*m_nDomain] = m_A[k][l+j*m_nTrain];
00761 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00762 for ( int l=0;l<m_nTrain;l++ )
00763 b[l + k*m_nTrain] = m_b[k][l];
00764 m_numTools.RidgeRegressionMultisolutionSinglecallGELSS ( A, b, m_x[i], m_nTrain*m_nClass*m_nDomain, m_nPredictors, 1, m_blendingRegularization, false );
00765 delete[] A;
00766 delete[] b;
00767 }
00768 else
00769 m_numTools.RidgeRegressionMultisolutionSinglecallGELSS ( m_A[i], m_b[i], m_x[i], m_nTrain, m_nPredictors, 1, m_blendingRegularization, false );
00770 }
00771 else if ( m_data->m_blendingAlgorithm=="LinearRegressionNonNeg" )
00772 {
00773 if ( m_globalWeights )
00774 {
00775 REAL* A = new REAL[m_nTrain*m_nPredictors*m_nClass*m_nDomain], *b = new REAL[m_nTrain*m_nClass*m_nDomain];
00776 for ( int j=0;j<m_nPredictors;j++ )
00777 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00778 for ( int l=0;l<m_nTrain;l++ )
00779 A[l + k*m_nTrain + j*m_nTrain*m_nClass*m_nDomain] = m_A[k][l+j*m_nTrain];
00780 for ( int k=0;k<m_nClass*m_nDomain;k++ )
00781 for ( int l=0;l<m_nTrain;l++ )
00782 b[l + k*m_nTrain] = m_b[k][l];
00783 m_numTools.RidgeRegressionNonNegSinglecall ( A, b, m_x[i], m_nTrain, m_nPredictors, m_blendingRegularization, 1e-10, 1000, false );
00784 delete[] A;
00785 delete[] b;
00786 }
00787 else
00788 m_numTools.RidgeRegressionNonNegSinglecall ( m_A[i], m_b[i], m_x[i], m_nTrain, m_nPredictors, m_blendingRegularization, 1e-10, 1000, false );
00789 }
00790
00791
00792 for ( int j=0;j<m_nTrain;j++ )
00793 {
00794 if ( randomIndex[j] == cross )
00795 {
00796 for ( int k=0;k<m_nPredictors;k++ )
00797 m_A[i][j + k*m_nTrain] = tmpA[j + k*m_nTrain];
00798 m_b[i][j] = tmpB[j];
00799 }
00800 }
00801
00802
00803 CBLAS_GEMM ( CblasColMajor, CblasNoTrans, CblasNoTrans, m_nTrain, 1, m_nPredictors , 1.0, m_A[i], m_nTrain, m_x[i], m_nPredictors, 0.0, m_singlePrediction, m_nTrain );
00804 for ( int j=0;j<m_nTrain;j++ )
00805 {
00806 if ( randomIndex[j] == cross )
00807 {
00808 if ( m_data->m_enablePostBlendClipping )
00809 {
00810 if ( m_singlePrediction[j] < m_data->m_negativeTarget )
00811 m_singlePrediction[j] = m_data->m_negativeTarget;
00812 if ( m_singlePrediction[j] > m_data->m_positiveTarget )
00813 m_singlePrediction[j] = m_data->m_positiveTarget;
00814 }
00815 m_prediction[j + m_nTrain*i] = m_singlePrediction[j];
00816 rmse += ( m_singlePrediction[j] - m_b[i][j] ) * ( m_singlePrediction[j] - m_b[i][j] );
00817 cnt++;
00818 }
00819 }
00820
00821 }
00822 }
00823 if ( randomIndex )
00824 delete[] randomIndex;
00825 randomIndex = 0;
00826 if ( tmpA )
00827 delete[] tmpA;
00828 tmpA = 0;
00829 if ( tmpB )
00830 delete[] tmpB;
00831 tmpB = 0;
00832 rmse = sqrt ( rmse/ ( double ) cnt );
00833 return rmse;
00834 }
00835 if ( m_optimizeRMSE == 0 )
00836 {
00837
00838 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00839 for ( int j=0;j<m_nPredictors;j++ )
00840 m_x[i][j] = m_paramsAUC[j + i*m_nPredictors];
00841
00842
00843 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00844 for ( int j=0;j<m_nTrain;j++ )
00845 {
00846 REAL out = 0.0;
00847 for ( int k=0;k<m_nPredictors;k++ )
00848 out += m_A[i][j+k*m_nTrain] * m_x[i][k];
00849 m_prediction[j+i*m_nTrain] = out;
00850 }
00851
00852
00853 REAL auc = getAUC ( m_prediction, m_data->m_trainLabelOrig, m_data->m_nClass, m_data->m_nDomain, m_data->m_nTrain );
00854 return auc;
00855 }
00856
00857 assert ( false );
00858 return 0;
00859 }
00860
00864 double BlendStopping::calcRMSEonBlend()
00865 {
00866 assert ( false );
00867 return 0.0;
00868 }
00869
00873 void BlendStopping::saveBestPrediction()
00874 {
00875
00876 }