00001 #include "KNearestNeighbor.h"
00002
00003 extern StreamOutput cout;
00004
00008 KNearestNeighbor::KNearestNeighbor()
00009 {
00010 cout<<"KNearestNeighbor"<<endl;
00011
00012 m_targetActualPtr = 0;
00013 m_lengthActual = 0;
00014 m_isLoadWeights = 0;
00015 m_xNormalized = 0;
00016 m_predictionAllOutputs = 0;
00017 m_predictionAllOutputsAllTargets = 0;
00018 m_scale = 0;
00019 m_scale2 = 0;
00020 m_offset = 0;
00021 m_globalOffset = 0;
00022 m_power = 0;
00023 m_k = 0;
00024 m_euclideanPower = 1.0;
00025 m_euclideanLimit = 0.01;
00026 m_evaluationBlockSize = 0;
00027 m_enableAdvancedKNN = 0;
00028 m_dataDenormalization = 0;
00029 m_takeEuclideanDistance = 0;
00030 m_optimizeEuclideanPower = 0;
00031 }
00032
00036 KNearestNeighbor::~KNearestNeighbor()
00037 {
00038 cout<<"descructor KNearestNeighbor"<<endl;
00039
00040 if ( m_xNormalized )
00041 {
00042 for ( int i=0;i<m_nCross+1;i++ )
00043 if ( m_xNormalized[i] )
00044 delete[] m_xNormalized[i];
00045 delete[] m_xNormalized;
00046 }
00047 m_xNormalized = 0;
00048 if ( m_targetActualPtr )
00049 delete[] m_targetActualPtr;
00050 m_targetActualPtr = 0;
00051 if ( m_lengthActual )
00052 delete[] m_lengthActual;
00053 m_lengthActual = 0;
00054 }
00055
00060 void KNearestNeighbor::readSpecificMaps()
00061 {
00062 cout<<"Read specific maps"<<endl;
00063
00064
00065 m_evaluationBlockSize = m_intMap["evaluationBlockSize"];
00066 m_k = m_intMap["kInit"];
00067 m_euclideanPower = m_doubleMap["euclideanPower"];
00068 m_euclideanLimit = m_doubleMap["euclideanLimit"];
00069 m_enableAdvancedKNN = m_boolMap["enableAdvancedKNN"];
00070 m_takeEuclideanDistance = m_boolMap["takeEuclideanDistance"];
00071 m_dataDenormalization = m_boolMap["dataDenormalization"];
00072 m_optimizeEuclideanPower = m_boolMap["optimizeEuclideanPower"];
00073 m_scale = m_doubleMap["initScale"];
00074 m_scale2 = m_doubleMap["initScale2"];
00075 m_offset = m_doubleMap["initOffset"];
00076 m_globalOffset = m_doubleMap["initGlobalOffset"];
00077 m_power = m_doubleMap["initPower"];
00078 }
00079
00084 void KNearestNeighbor::modelInit()
00085 {
00086
00087
00088
00089 if ( m_enableAdvancedKNN )
00090 {
00091 paramDoubleValues.push_back ( &m_globalOffset );
00092 paramDoubleNames.push_back ( "globalOffset" );
00093 paramDoubleValues.push_back ( &m_scale );
00094 paramDoubleNames.push_back ( "scale" );
00095 paramDoubleValues.push_back ( &m_scale2 );
00096 paramDoubleNames.push_back ( "scale2" );
00097 paramDoubleValues.push_back ( &m_offset );
00098 paramDoubleNames.push_back ( "offset" );
00099 paramDoubleValues.push_back ( &m_power );
00100 paramDoubleNames.push_back ( "power" );
00101 }
00102
00103
00104 paramIntValues.push_back ( &m_k );
00105 paramIntNames.push_back ( "k" );
00106
00107 if ( m_takeEuclideanDistance && m_optimizeEuclideanPower )
00108 {
00109 paramDoubleValues.push_back ( &m_euclideanPower );
00110 paramDoubleNames.push_back ( "euclideanPower" );
00111 paramDoubleValues.push_back ( &m_euclideanLimit );
00112 paramDoubleNames.push_back ( "euclideanLimit" );
00113 }
00114
00115
00116 if ( m_xNormalized == 0 )
00117 {
00118 m_xNormalized = new REAL*[m_nCross+1];
00119 m_targetActualPtr = new REAL*[m_nCross+1];
00120 m_lengthActual = new int[m_nCross+1];
00121 for ( int i=0;i<m_nCross+1;i++ )
00122 {
00123 m_xNormalized[i] = 0;
00124 m_targetActualPtr[i] = 0;
00125 m_lengthActual[i] = m_trainSize[i];
00126 }
00127 }
00128 }
00129
00138 void KNearestNeighbor::predictAllOutputs ( REAL* rawInputs, REAL* outputs, uint nSamples, uint crossRun )
00139 {
00140 int lengthActual = m_lengthActual[crossRun];
00141 REAL* targetActualPtr = m_targetActualPtr[crossRun];
00142
00143
00144 for ( int i=0;i<nSamples;i++ )
00145 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00146 outputs[i*m_nClass*m_nDomain + j] = 0.0;
00147
00148 REAL* allCorrelations = new REAL[m_evaluationBlockSize * lengthActual];
00149 REAL* allCorrelationsTransposed = new REAL[m_evaluationBlockSize * lengthActual];
00150 REAL* euclTmp = new REAL[m_nFeatures];
00151 REAL* squaredSumInputs = new REAL[m_evaluationBlockSize];
00152 REAL* squaredSumData = new REAL[lengthActual];
00153
00154
00155 int sampleOffset = 0;
00156 bool breakLoop = true;
00157 while ( breakLoop )
00158 {
00159 int blockSize = nSamples - sampleOffset;
00160 if ( blockSize >= m_evaluationBlockSize )
00161 blockSize = m_evaluationBlockSize;
00162 if ( sampleOffset + blockSize == nSamples )
00163 breakLoop = false;
00164
00165
00166 REAL* zScores = new REAL[blockSize*m_nFeatures];
00167 for ( int i=0;i<blockSize;i++ )
00168 {
00169 REAL* ptr0 = zScores + i*m_nFeatures;
00170 REAL* ptr1 = rawInputs + ( i+sampleOffset ) *m_nFeatures;
00171
00172 REAL mean = 0.0, std = 0.0, denormalized;
00173 for ( int j=0;j<m_nFeatures;j++ )
00174 {
00175 denormalized = ptr1[j];
00176 if ( m_dataDenormalization )
00177 denormalized = denormalized * m_std[j] + m_mean[j];
00178 mean += denormalized;
00179 }
00180 mean /= ( REAL ) m_nFeatures;
00181 if ( m_takeEuclideanDistance )
00182 mean = 0.0;
00183 for ( int j=0;j<m_nFeatures;j++ )
00184 {
00185 denormalized = ptr1[j];
00186 if ( m_dataDenormalization )
00187 denormalized = denormalized * m_std[j] + m_mean[j];
00188 std += ( denormalized - mean ) * ( denormalized - mean );
00189 }
00190 std = sqrt ( std / ( REAL ) ( m_nFeatures-1 ) );
00191 if ( m_takeEuclideanDistance )
00192 std = 1.0;
00193 for ( int j=0;j<m_nFeatures;j++ )
00194 {
00195 denormalized = ptr1[j];
00196 if ( m_dataDenormalization )
00197 denormalized = denormalized * m_std[j] + m_mean[j];
00198 ptr0[j] = ( denormalized - mean ) / std;
00199 }
00200 }
00201
00202
00203 if ( m_takeEuclideanDistance )
00204 {
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226 for ( uint i=0;i<blockSize;i++ )
00227 {
00228 double sum = 0.0;
00229 REAL *ptr = zScores + i*m_nFeatures;
00230 for ( uint k=0;k<m_nFeatures;k++ )
00231 sum += ptr[k] * ptr[k];
00232 squaredSumInputs[i] = sum;
00233 }
00234 for ( uint i=0;i<lengthActual;i++ )
00235 {
00236 double sum = 0.0;
00237 REAL *ptr = m_xNormalized[crossRun] + i*m_nFeatures;
00238 for ( uint k=0;k<m_nFeatures;k++ )
00239 sum += ptr[k] * ptr[k];
00240 squaredSumData[i] = sum;
00241 }
00242
00243
00244 CBLAS_GEMM ( CblasRowMajor, CblasNoTrans, CblasTrans, lengthActual, blockSize, m_nFeatures, 1.0, m_xNormalized[crossRun], m_nFeatures, zScores, m_nFeatures, 0.0, allCorrelations, blockSize );
00245
00246
00247
00248 for ( int i=0;i<lengthActual;i++ )
00249 for ( int j=0;j<blockSize;j++ )
00250 allCorrelationsTransposed[i + j*lengthActual] = allCorrelations[i*blockSize + j];
00251
00252 for ( int i=0;i<blockSize;i++ )
00253 {
00254
00255
00256
00257
00258
00259 REAL* corrPtr = allCorrelationsTransposed + i*lengthActual;
00260 REAL sqSum = squaredSumInputs[i];
00261
00262 for ( int j=0;j<lengthActual;j++ )
00263 {
00264 corrPtr[j] = squaredSumData[j] + sqSum - 2.0 * corrPtr[j];
00265
00266 }
00267 }
00268
00269 V_SQRT ( blockSize*lengthActual, allCorrelationsTransposed, allCorrelationsTransposed );
00270 for ( uint i=0;i<blockSize*lengthActual;i++ )
00271 allCorrelationsTransposed[i] += m_euclideanLimit;
00272 V_INV ( blockSize*lengthActual, allCorrelationsTransposed, allCorrelationsTransposed );
00273 V_POWC ( blockSize*lengthActual, allCorrelationsTransposed, m_euclideanPower, allCorrelationsTransposed );
00274 }
00275 else
00276 {
00277
00278
00279 CBLAS_GEMM ( CblasRowMajor, CblasNoTrans, CblasTrans, lengthActual, blockSize, m_nFeatures, 1.0, m_xNormalized[crossRun], m_nFeatures, zScores, m_nFeatures, 0.0, allCorrelations, blockSize );
00280
00281 REAL scale = 1.0 / ( REAL ) ( m_nFeatures );
00282 V_MULC ( allCorrelations, scale, allCorrelations, blockSize * lengthActual );
00283 IPPS_THRESHOLD ( allCorrelations, allCorrelations, blockSize * lengthActual, -1.0, ippCmpLess );
00284 IPPS_THRESHOLD ( allCorrelations, allCorrelations, blockSize * lengthActual, +1.0, ippCmpGreater );
00285
00286
00287
00288 for ( int i=0;i<lengthActual;i++ )
00289 for ( int j=0;j<blockSize;j++ )
00290 allCorrelationsTransposed[i + j*lengthActual] = allCorrelations[i*blockSize + j];
00291 }
00292
00293 int* indexKBest = new int[m_k];
00294 REAL* valueKBest = new REAL[m_k];
00295
00296
00297 for ( int i=0;i<blockSize;i++ )
00298 {
00299 REAL* predictionPtr = outputs + sampleOffset*m_nClass*m_nDomain + i*m_nClass*m_nDomain;
00300 REAL* corrPtr = allCorrelationsTransposed + i*lengthActual;
00301
00302 REAL worstValue = 1e10;
00303 int worstIdx = -1;
00304 int topKLength = 0;
00305
00306
00307 for ( int j=0;j<lengthActual;j++ )
00308 {
00309 REAL c = corrPtr[j];
00310
00311
00312 if ( topKLength < m_k )
00313 {
00314 indexKBest[topKLength] = j;
00315 valueKBest[topKLength] = c;
00316 if ( worstValue > c )
00317 {
00318 worstValue = c;
00319 worstIdx = topKLength;
00320 }
00321 topKLength++;
00322 }
00323 else if ( c > worstValue )
00324 {
00325 indexKBest[worstIdx] = j;
00326 valueKBest[worstIdx] = c;
00327
00328
00329 worstIdx = -1;
00330 worstValue = 1e10;
00331 for ( int k=0;k<m_k;k++ )
00332 if ( worstValue > valueKBest[k] )
00333 {
00334 worstValue = valueKBest[k];
00335 worstIdx = k;
00336 }
00337 }
00338 }
00339
00340
00341 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00342 {
00343 REAL sumCV = 0.0, sumC = 0.0, v;
00344 for ( int k=0;k<topKLength;k++ )
00345 {
00346 int idx = indexKBest[k];
00347 REAL c = corrPtr[idx];
00348 if ( c > 1e6 && m_takeEuclideanDistance )
00349 continue;
00350 REAL v = targetActualPtr[j + idx*m_nClass*m_nDomain];
00351
00352
00353 if ( m_enableAdvancedKNN )
00354 {
00355 REAL sign = c>0.0? 1.0 : -1.0;
00356 c = pow ( fabs ( c ), ( float ) m_power ) * sign;
00357 c = m_scale2 * 1.0 / ( 1.0 + exp ( - ( m_scale * c - m_offset ) ) ) + m_globalOffset;
00358 }
00359
00360 sumCV += c * v;
00361 sumC += fabs ( c );
00362 }
00363 v = sumCV / sumC;
00364 if ( isnan ( v ) || isinf ( v ) )
00365 v = 0.0;
00366 predictionPtr[j] = v;
00367 }
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418 }
00419
00420 sampleOffset += blockSize;
00421
00422 delete[] indexKBest;
00423 delete[] valueKBest;
00424
00425 if ( zScores )
00426 delete[] zScores;
00427 zScores = 0;
00428 }
00429
00430 delete[] allCorrelations;
00431 delete[] allCorrelationsTransposed;
00432 delete[] euclTmp;
00433 delete[] squaredSumInputs;
00434 delete[] squaredSumData;
00435 }
00436
00452 void KNearestNeighbor::modelUpdate ( REAL* input, REAL* target, uint nSamples, uint crossRun )
00453 {
00454 m_targetActualPtr[crossRun] = target;
00455 m_lengthActual[crossRun] = nSamples;
00456
00457
00458 if ( m_xNormalized[crossRun] == 0 )
00459 m_xNormalized[crossRun] = new REAL[nSamples*m_nFeatures];
00460
00461
00462 for ( int i=0;i<nSamples;i++ )
00463 {
00464 REAL* ptr0 = m_xNormalized[crossRun] + i*m_nFeatures;
00465 REAL* ptr1 = input + i*m_nFeatures;
00466
00467
00468 REAL mean = 0.0, std = 0.0, denormalized;
00469 for ( int j=0;j<m_nFeatures;j++ )
00470 {
00471 denormalized = ptr1[j];
00472 if ( m_dataDenormalization )
00473 denormalized = denormalized * m_std[j] + m_mean[j];
00474 mean += denormalized;
00475 }
00476 mean = mean / ( REAL ) m_nFeatures;
00477 if ( m_takeEuclideanDistance )
00478 mean = 0.0;
00479 for ( int j=0;j<m_nFeatures;j++ )
00480 {
00481 denormalized = ptr1[j];
00482 if ( m_dataDenormalization )
00483 denormalized = denormalized * m_std[j] + m_mean[j];
00484 std += ( denormalized - mean ) * ( denormalized - mean );
00485 }
00486 if ( std == 0.0 )
00487 std = 1.0;
00488 else
00489 std = sqrt ( std / ( REAL ) ( m_nFeatures-1 ) );
00490 if ( m_takeEuclideanDistance )
00491 std = 1.0;
00492 for ( int j=0;j<m_nFeatures;j++ )
00493 {
00494 denormalized = ptr1[j];
00495 if ( m_dataDenormalization )
00496 denormalized = denormalized * m_std[j] + m_mean[j];
00497 ptr0[j] = ( denormalized - mean ) / std;
00498 }
00499 }
00500 }
00501
00506 void KNearestNeighbor::saveWeights ( int cross )
00507 {
00508 char buf[1024];
00509 sprintf ( buf,"%02d",cross );
00510 string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00511 if ( m_inRetraining )
00512 cout<<"Save:"<<name<<endl;
00513 fstream f ( name.c_str(), ios::out );
00514 f.write ( ( char* ) &m_nTrain, sizeof ( int ) );
00515 f.write ( ( char* ) &m_nFeatures, sizeof ( int ) );
00516 f.write ( ( char* ) &m_nClass, sizeof ( int ) );
00517 f.write ( ( char* ) &m_nDomain, sizeof ( int ) );
00518 f.write ( ( char* ) &m_lengthActual[cross], sizeof ( int ) );
00519 f.write ( ( char* ) m_xNormalized[cross], sizeof ( REAL ) *m_lengthActual[cross]*m_nFeatures );
00520 f.write ( ( char* ) m_targetActualPtr[cross], sizeof ( REAL ) *m_lengthActual[cross]*m_nClass*m_nDomain );
00521 f.write ( ( char* ) &m_scale, sizeof ( double ) );
00522 f.write ( ( char* ) &m_scale2, sizeof ( double ) );
00523 f.write ( ( char* ) &m_offset, sizeof ( double ) );
00524 f.write ( ( char* ) &m_globalOffset, sizeof ( double ) );
00525 f.write ( ( char* ) &m_k, sizeof ( int ) );
00526 f.write ( ( char* ) &m_euclideanPower, sizeof ( double ) );
00527 f.write ( ( char* ) &m_euclideanLimit, sizeof ( double ) );
00528 f.write ( ( char* ) &m_maxSwing, sizeof ( double ) );
00529 f.close();
00530 }
00531
00536 void KNearestNeighbor::loadWeights ( int cross )
00537 {
00538 char buf[1024];
00539 sprintf ( buf,"%02d",cross );
00540 string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00541 cout<<"Load:"<<name<<endl;
00542 fstream f ( name.c_str(), ios::in );
00543 if ( f.is_open() == false )
00544 assert ( false );
00545 f.read ( ( char* ) &m_nTrain, sizeof ( int ) );
00546 f.read ( ( char* ) &m_nFeatures, sizeof ( int ) );
00547 f.read ( ( char* ) &m_nClass, sizeof ( int ) );
00548 f.read ( ( char* ) &m_nDomain, sizeof ( int ) );
00549
00550 m_xNormalized = new REAL*[m_nCross+1];
00551 m_targetActualPtr = new REAL*[m_nCross+1];
00552 m_lengthActual = new int[m_nCross+1];
00553 for ( int i=0;i<m_nCross+1;i++ )
00554 {
00555 m_xNormalized[i] = 0;
00556 m_targetActualPtr[i] = 0;
00557 m_lengthActual[i] = 0;
00558 }
00559
00560 f.read ( ( char* ) &m_lengthActual[cross], sizeof ( int ) );
00561
00562
00563 m_xNormalized[cross] = new REAL[m_lengthActual[cross]*m_nFeatures];
00564 m_targetActualPtr[cross] = new REAL[m_lengthActual[cross]*m_nClass*m_nDomain];
00565
00566 f.read ( ( char* ) m_xNormalized[cross], sizeof ( REAL ) *m_lengthActual[cross]*m_nFeatures );
00567 f.read ( ( char* ) m_targetActualPtr[cross], sizeof ( REAL ) *m_lengthActual[cross]*m_nClass*m_nDomain );
00568 f.read ( ( char* ) &m_scale, sizeof ( double ) );
00569 f.read ( ( char* ) &m_scale2, sizeof ( double ) );
00570 f.read ( ( char* ) &m_offset, sizeof ( double ) );
00571 f.read ( ( char* ) &m_globalOffset, sizeof ( double ) );
00572 f.read ( ( char* ) &m_k, sizeof ( int ) );
00573 f.read ( ( char* ) &m_euclideanPower, sizeof ( double ) );
00574 f.read ( ( char* ) &m_euclideanLimit, sizeof ( double ) );
00575 f.read ( ( char* ) &m_maxSwing, sizeof ( double ) );
00576 f.close();
00577
00578 m_isLoadWeights = 1;
00579 }
00580
00584 void KNearestNeighbor::loadMetaWeights ( int cross )
00585 {
00586 char buf[1024];
00587 sprintf ( buf,"%02d",cross );
00588 string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00589 cout<<"LoadMeta:"<<name<<endl;
00590 fstream f ( name.c_str(), ios::in );
00591 if ( f.is_open() == false )
00592 assert ( false );
00593 f.read ( ( char* ) &m_nTrain, sizeof ( int ) );
00594 f.read ( ( char* ) &m_nFeatures, sizeof ( int ) );
00595 f.read ( ( char* ) &m_nClass, sizeof ( int ) );
00596 f.read ( ( char* ) &m_nDomain, sizeof ( int ) );
00597
00598 int lengthActual;
00599 f.read ( ( char* ) &lengthActual, sizeof ( int ) );
00600
00601 REAL* tmp = new REAL[lengthActual*m_nFeatures];
00602 f.read ( ( char* ) tmp, sizeof ( REAL ) *lengthActual*m_nFeatures );
00603 delete[] tmp;
00604 tmp = new REAL[lengthActual*m_nClass*m_nDomain];
00605 f.read ( ( char* ) tmp, sizeof ( REAL ) *lengthActual*m_nClass*m_nDomain );
00606 delete[] tmp;
00607 f.read ( ( char* ) &m_scale, sizeof ( double ) );
00608 f.read ( ( char* ) &m_scale2, sizeof ( double ) );
00609 f.read ( ( char* ) &m_offset, sizeof ( double ) );
00610 f.read ( ( char* ) &m_globalOffset, sizeof ( double ) );
00611 f.read ( ( char* ) &m_k, sizeof ( int ) );
00612 f.read ( ( char* ) &m_euclideanPower, sizeof ( double ) );
00613 f.read ( ( char* ) &m_euclideanLimit, sizeof ( double ) );
00614 f.read ( ( char* ) &m_maxSwing, sizeof ( double ) );
00615 f.close();
00616 }
00617
00623 string KNearestNeighbor::templateGenerator ( int id, string preEffect, int nameID, bool blendStop )
00624 {
00625 stringstream s;
00626 s<<"ALGORITHM=KNearestNeighbor"<<endl;
00627 s<<"ID="<<id<<endl;
00628 s<<"TRAIN_ON_FULLPREDICTOR="<<preEffect<<endl;
00629 s<<"DISABLE=0"<<endl;
00630 s<<endl;
00631 s<<"[int]"<<endl;
00632 s<<"kInit=10"<<endl;
00633 s<<"maxTuninigEpochs=10"<<endl;
00634 s<<"evaluationBlockSize=1000"<<endl;
00635 s<<endl;
00636 s<<"[double]"<<endl;
00637 s<<"initMaxSwing=1.0"<<endl;
00638 s<<"initScale=5.0"<<endl;
00639 s<<"initScale2=1.0"<<endl;
00640 s<<"initOffset=2.0"<<endl;
00641 s<<"initGlobalOffset=-0.05"<<endl;
00642 s<<"initPower=1.0"<<endl;
00643 s<<"euclideanPower=1.0"<<endl;
00644 s<<"euclideanLimit=0.01"<<endl;
00645 s<<endl;
00646 s<<"[bool]"<<endl;
00647 s<<"takeEuclideanDistance=1"<<endl;
00648 s<<"dataDenormalization=0"<<endl;
00649 s<<"optimizeEuclideanPower=0"<<endl;
00650 s<<endl;
00651 s<<"enableAdvancedKNN=0"<<endl;
00652 s<<endl;
00653 s<<"enableClipping=1"<<endl;
00654 s<<"enableTuneSwing=0"<<endl;
00655 s<<endl;
00656 s<<"minimzeProbe="<< ( !blendStop ) <<endl;
00657 s<<"minimzeProbeClassificationError=0"<<endl;
00658 s<<"minimzeBlend="<<blendStop<<endl;
00659 s<<"minimzeBlendClassificationError=0"<<endl;
00660 s<<endl;
00661 s<<"[string]"<<endl;
00662 s<<"weightFile=KNearestNeighbor_"<<nameID<<"_weights.dat"<<endl;
00663 s<<"fullPrediction=KNearestNeighbor_"<<nameID<<".dat"<<endl;
00664
00665 return s.str();
00666 }