00001 #include "GBDT.h"
00002
00003 extern StreamOutput cout;
00004
00012 bool compareNodeReduced ( nodeReduced n0, nodeReduced n1 )
00013 {
00014 return n0.m_size < n1.m_size;
00015 }
00016
00020 GBDT::GBDT()
00021 {
00022 cout<<"GBDT"<<endl;
00023
00024 m_featureSubspaceSize = 0;
00025 m_maxTreeLeafes = 0;
00026 m_useOptSplitPoint = true;
00027 m_lRate = 0.0;
00028 m_globalMean = 0;
00029 m_calculateGlobalMean = true;
00030 m_treeTargets = 0;
00031 }
00032
00036 GBDT::~GBDT()
00037 {
00038 cout<<"descructor GBDT"<<endl;
00039 }
00040
00045 void GBDT::readSpecificMaps()
00046 {
00047 m_featureSubspaceSize = m_intMap["featureSubspaceSize"];
00048 m_maxTreeLeafes = m_intMap["maxTreeLeafes"];
00049 m_useOptSplitPoint = m_boolMap["useOptSplitPoint"];
00050 m_calculateGlobalMean = m_boolMap["calculateGlobalMean"];
00051 m_lRate = m_doubleMap["lRate"];
00052 }
00053
00058 void GBDT::modelInit()
00059 {
00060
00061 m_epoch = 0;
00062 paramEpochValues.push_back ( &m_epoch );
00063 paramEpochNames.push_back ( "epoch" );
00064
00065 if ( m_featureSubspaceSize > m_nFeatures )
00066 m_featureSubspaceSize = m_nFeatures;
00067
00068
00069 m_globalMean = new REAL*[m_nCross+1];
00070 for ( int i=0;i<m_nCross+1;i++ )
00071 {
00072 m_globalMean[i] = new REAL[m_nClass*m_nDomain];
00073 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00074 m_globalMean[i][j] = 0.0;
00075 }
00076
00077
00078 m_treeTargets = new REAL*[m_nCross+1];
00079 for ( int i=0;i<m_nCross;i++ )
00080 m_treeTargets[i] = new REAL[m_trainSize[i]*m_nClass*m_nDomain];
00081 m_treeTargets[m_nCross] = new REAL[m_trainSize[m_nCross]*m_nClass*m_nDomain];
00082
00083
00084 m_trees = new node**[m_nCross+1];
00085 for ( int i=0;i<m_nCross+1;i++ )
00086 {
00087 m_trees[i] = new node*[m_nClass*m_nDomain];
00088 for ( uint j=0;j<m_nClass*m_nDomain;j++ )
00089 {
00090 m_trees[i][j] = new node[m_maxTuninigEpochs];
00091 for ( uint k=0;k<m_maxTuninigEpochs;k++ )
00092 {
00093 m_trees[i][j][k].m_featureNr = -1;
00094 m_trees[i][j][k].m_value = 1e10;
00095 m_trees[i][j][k].m_toSmallerEqual = 0;
00096 m_trees[i][j][k].m_toLarger = 0;
00097 m_trees[i][j][k].m_trainSamples = 0;
00098 m_trees[i][j][k].m_nSamples = -1;
00099 }
00100 }
00101 }
00102
00103
00104 if(m_validationType == "ValidationSet")
00105 {
00106 m_validationPredictions = new double*[1];
00107 m_validationPredictions[0] = new double[m_nClass*m_nDomain*m_validSize];
00108 for(int i=0;i<m_nClass*m_nDomain*m_validSize;i++)
00109 m_validationPredictions[0][i] = 0.0;
00110 }
00111 else
00112 {
00113 m_validationPredictions = new double*[m_nCross];
00114 for(int i=0;i<m_nCross;i++)
00115 {
00116 m_validationPredictions[i] = new double[m_nClass*m_nDomain*m_probeSize[i]];
00117 for(int j=0;j<m_nClass*m_nDomain*m_probeSize[i];j++)
00118 m_validationPredictions[i][j] = 0.0;
00119 }
00120 }
00121
00122 srand ( Framework::getRandomSeed() );
00123 }
00124
00133 void GBDT::predictAllOutputs ( REAL* rawInputs, REAL* outputs, uint nSamples, uint crossRun )
00134 {
00135 bool mode = Framework::getFrameworkMode();
00136
00137 for ( uint i=0;i<nSamples;i++ )
00138 {
00139 REAL* ptr = rawInputs + i * m_nFeatures;
00140
00141
00142 for ( uint j=0;j<m_nClass*m_nDomain;j++ )
00143 {
00144 if((crossRun >= m_nCross && m_validationType != "ValidationSet") || mode)
00145 {
00146 double sum = m_globalMean[crossRun][j];
00147
00148 for ( int k=0;k<m_epoch+1;k++ )
00149 {
00150 REAL v = predictSingleTree ( & ( m_trees[crossRun][j][k] ), ptr );
00151 sum += m_lRate * v;
00152 }
00153 outputs[i*m_nClass*m_nDomain + j] = sum;
00154 }
00155 else
00156 {
00157
00158 double sum = m_validationPredictions[crossRun][i*m_nClass*m_nDomain + j];
00159 double v = predictSingleTree ( & ( m_trees[crossRun][j][m_epoch] ), ptr );
00160 sum += m_lRate * v;
00161 m_validationPredictions[crossRun][i*m_nClass*m_nDomain + j] = sum;
00162 outputs[i*m_nClass*m_nDomain + j] = sum + (double)m_globalMean[crossRun][j];
00163 }
00164 }
00165 }
00166 }
00167
00175 REAL GBDT::predictSingleTree ( node* n, REAL* input )
00176 {
00177
00178 if ( n->m_toSmallerEqual == 0 && n->m_toLarger == 0 )
00179 return n->m_value;
00180
00181 int nr = n->m_featureNr;
00182
00183
00184
00185
00186
00187 REAL thresh = n->m_value;
00188 REAL feature = input[nr];
00189
00190 if ( feature <= thresh )
00191 return predictSingleTree ( n->m_toSmallerEqual, input );
00192 return predictSingleTree ( n->m_toLarger, input );
00193 }
00194
00204 void GBDT::modelUpdate ( REAL* input, REAL* target, uint nSamples, uint crossRun )
00205 {
00206 REAL* singleTarget = new REAL[nSamples];
00207 REAL* inputTmp = new REAL[nSamples*m_featureSubspaceSize];
00208 REAL* inputTargetsSort = new REAL[nSamples*m_featureSubspaceSize];
00209 bool* usedFeatures = new bool[m_nFeatures];
00210 int* sortIndex = new int[nSamples];
00211 int* radixTmp0 = new int[nSamples];
00212 REAL* radixTmp1 = new REAL[nSamples];
00213
00214 if ( crossRun == m_nCross && m_validationType != "ValidationSet")
00215 m_epoch = 0;
00216
00217 uint loopEnd = 1;
00218 if(crossRun==m_nCross && m_validationType != "ValidationSet")
00219 loopEnd = m_epochParamBest[0]+1;
00220
00221 for ( uint boostLoop=0;boostLoop < loopEnd;boostLoop++ )
00222 {
00223 time_t t0 = time ( 0 );
00224 if ( crossRun == m_nCross && m_validationType != "ValidationSet" )
00225 cout<<"epoch:"<<m_epoch<<" "<<flush;
00226
00227
00228 for ( uint i=0;i<m_nClass*m_nDomain;i++ )
00229 {
00230
00231 if ( m_epoch == 0 )
00232 {
00233 double sum = 0.0;
00234 if ( m_calculateGlobalMean )
00235 {
00236 for ( uint j=0;j<nSamples;j++ )
00237 sum += target[j*m_nClass*m_nDomain+i];
00238 sum /= ( double ) nSamples;
00239 }
00240 m_globalMean[crossRun][i] = sum;
00241 cout<<"globalMean:"<<sum<<" "<<flush;
00242
00243 for ( uint j=0;j<nSamples;j++ )
00244 m_treeTargets[crossRun][i*nSamples+j] = target[j*m_nClass*m_nDomain+i] - sum;
00245 }
00246
00247 for ( uint j=0;j<nSamples;j++ )
00248 singleTarget[j] = m_treeTargets[crossRun][i*nSamples+j];
00249
00250 deque<nodeReduced> largestNodes;
00251 for ( uint j=0;j<m_nFeatures;j++ )
00252 usedFeatures[j] = false;
00253
00254
00255 m_trees[crossRun][i][m_epoch].m_trainSamples = new int[nSamples];
00256 m_trees[crossRun][i][m_epoch].m_nSamples = nSamples;
00257 int* ptr = m_trees[crossRun][i][m_epoch].m_trainSamples;
00258 for ( uint j=0;j<nSamples;j++ )
00259 ptr[j] = j;
00260
00261 nodeReduced firstNode;
00262 firstNode.m_node = & ( m_trees[crossRun][i][m_epoch] );
00263 firstNode.m_size = nSamples;
00264 largestNodes.push_back ( firstNode );
00265 push_heap ( largestNodes.begin(), largestNodes.end(), compareNodeReduced );
00266
00267
00268
00269 for ( int j=0;j<m_maxTreeLeafes;j++ )
00270 {
00271 node* largestNode = largestNodes[0].m_node;
00272 trainSingleTree ( largestNode, largestNodes, input, inputTmp, inputTargetsSort, singleTarget, usedFeatures, nSamples, sortIndex, radixTmp0, radixTmp1 );
00273 }
00274
00275
00276
00277
00278 cleanTree ( & ( m_trees[crossRun][i][m_epoch] ) );
00279
00280
00281 double trainRMSE = 0.0;
00282
00283 for ( uint j=0;j<nSamples;j++ )
00284 {
00285 REAL p = predictSingleTree ( & ( m_trees[crossRun][i][m_epoch] ), input+j*m_nFeatures );
00286
00287 m_treeTargets[crossRun][i*nSamples+j] -= m_lRate * p;
00288 double err = m_treeTargets[crossRun][i*nSamples+j];
00289 trainRMSE += err * err;
00290 }
00291
00292 if ( crossRun == m_nCross )
00293 cout<<"tRMSE["<<i<<"]:"<<sqrt ( trainRMSE/ ( double ) nSamples ) <<" "<<flush;
00294
00295 }
00296
00297 if ( crossRun == m_nCross && boostLoop < m_epochParamBest[0] && m_validationType != "ValidationSet")
00298 {
00299 cout<<time ( 0 )-t0<<"[s]"<<endl;
00300 m_epoch++;
00301 }
00302 }
00303 delete[] usedFeatures;
00304 delete[] singleTarget;
00305 delete[] inputTmp;
00306 delete[] inputTargetsSort;
00307 delete[] sortIndex;
00308 delete[] radixTmp0;
00309 delete[] radixTmp1;
00310
00311 }
00312
00319 void GBDT::cleanTree ( node* n )
00320 {
00321 if ( n->m_trainSamples )
00322 {
00323 delete[] n->m_trainSamples;
00324 n->m_trainSamples = 0;
00325 }
00326 n->m_nSamples = 0;
00327
00328 if ( n->m_toSmallerEqual )
00329 cleanTree ( n->m_toSmallerEqual );
00330 if ( n->m_toLarger )
00331 cleanTree ( n->m_toLarger );
00332 }
00333
00347 void GBDT::trainSingleTree ( node* n, deque<nodeReduced> &largestNodes, REAL* input, REAL* inputTmp, REAL* inputTargetsSort, REAL* singleTarget, bool* usedFeatures, int nSamples, int* sortIndex, int* radixTmp0, REAL* radixTmp1)
00348 {
00349
00350 int nS = largestNodes.size();
00351 if ( nS >= m_maxTreeLeafes || n->m_nSamples <= 1 )
00352 return;
00353
00354
00355 if ( largestNodes.size() > 0 )
00356 {
00357
00358 pop_heap ( largestNodes.begin(),largestNodes.end(),compareNodeReduced );
00359 largestNodes.pop_back();
00360 }
00361
00362
00363 int nNodeSamples = n->m_nSamples;
00364
00365
00366 int *randFeatureIDs = new int[m_featureSubspaceSize];
00367 if ( m_featureSubspaceSize < m_nFeatures )
00368 {
00369 for ( uint i=0;i<m_featureSubspaceSize;i++ )
00370 {
00371 uint idx = rand() % m_nFeatures;
00372 while ( usedFeatures[idx] )
00373 idx = rand() % m_nFeatures;
00374 randFeatureIDs[i] = idx;
00375 usedFeatures[idx] = true;
00376 }
00377 }
00378 else
00379 for ( uint i=0;i<m_featureSubspaceSize;i++ )
00380 randFeatureIDs[i] = i;
00381
00382
00383 double sumTarget = 0.0, sum2Target = 0.0;
00384 for ( uint j=0;j<nNodeSamples;j++ )
00385 {
00386 REAL v = singleTarget[n->m_trainSamples[j]];
00387 sumTarget += v;
00388 sum2Target += v*v;
00389 }
00390
00391 int bestFeature = -1, bestFeaturePos = -1;
00392 double bestFeatureRMSE = 1e10;
00393 REAL bestFeatureLow = 1e10, bestFeatureHi = 1e10;
00394 REAL optFeatureSplitValue = 1e10;
00395
00396
00397 for ( int i=0;i<m_featureSubspaceSize;i++ )
00398 {
00399
00400 REAL optimalSplitValue = 0.0;
00401 double rmseBest = 1e10;
00402 REAL meanLowBest = 1e10, meanHiBest = 1e10;
00403 int bestPos = -1;
00404 double sumLow = 0.0, sum2Low = 0.0, sumHi = sumTarget, sum2Hi = sum2Target, cntLow = 0.0, cntHi = nNodeSamples;
00405 REAL* ptrInput = inputTmp + i * nNodeSamples;
00406 REAL* ptrTarget = inputTargetsSort + i * nNodeSamples;
00407
00408
00409 int nr = randFeatureIDs[i];
00410 for ( int j=0;j<nNodeSamples;j++ )
00411 ptrInput[j] = input[nr+n->m_trainSamples[j]*m_nFeatures];
00412
00413 if ( m_useOptSplitPoint == false )
00414 {
00415 for ( int j=0;j<nNodeSamples;j++ )
00416 ptrTarget[j] = singleTarget[n->m_trainSamples[j]];
00417
00418 REAL* ptrInput = inputTmp + i * nNodeSamples;
00419 bestPos = rand() % nNodeSamples;
00420 optimalSplitValue = ptrInput[bestPos];
00421 sumLow = 0.0;
00422 sum2Low = 0.0;
00423 cntLow = 0.0;
00424 sumHi = 0.0;
00425 sum2Hi = 0.0;
00426 cntHi = 0.0;
00427 for ( int j=0;j<nNodeSamples;j++ )
00428 {
00429 REAL v = ptrInput[j];
00430 REAL t = ptrTarget[j];
00431 if ( ptrInput[j] <= optimalSplitValue )
00432 {
00433 sumLow += t;
00434 sum2Low += t*t;
00435 cntLow += 1.0;
00436 }
00437 else
00438 {
00439 sumHi += t;
00440 sum2Hi += t*t;
00441 cntHi += 1.0;
00442 }
00443 }
00444 rmseBest = ( sum2Low/cntLow - ( sumLow/cntLow ) * ( sumLow/cntLow ) ) *cntLow;
00445 rmseBest += ( sum2Hi/cntHi - ( sumHi/cntHi ) * ( sumHi/cntHi ) ) *cntHi;
00446 rmseBest = sqrt ( rmseBest/ ( cntLow+cntHi ) );
00447 meanLowBest = sumLow/cntLow;
00448 meanHiBest = sumHi/cntHi;
00449 }
00450 else
00451 {
00452
00453 for ( int j=0;j<nNodeSamples;j++ )
00454 sortIndex[j] = j;
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476 SORT_ASCEND_RADIX_I ( ptrInput, sizeof(REAL), sortIndex, radixTmp0, nNodeSamples);
00477 for(int j=0;j<nNodeSamples;j++)
00478 radixTmp1[j] = ptrInput[j];
00479 for(int j=0;j<nNodeSamples;j++)
00480 {
00481 int idx = sortIndex[j];
00482 ptrInput[j] = radixTmp1[idx];
00483 ptrTarget[j] = singleTarget[n->m_trainSamples[idx]];
00484 }
00485
00486 int j = 0;
00487 while ( j < nNodeSamples-1 )
00488 {
00489 REAL t = ptrTarget[j];
00490 sumLow += t;
00491 sum2Low += t*t;
00492 sumHi -= t;
00493 sum2Hi -= t*t;
00494 cntLow += 1.0;
00495 cntHi -= 1.0;
00496
00497 REAL v0 = ptrInput[j], v1 = 1e10;
00498 if ( j < nNodeSamples -1 )
00499 v1 = ptrInput[j+1];
00500 if ( v0 == v1 )
00501 {
00502 j++;
00503 continue;
00504 }
00505
00506 double rmse = ( sum2Low/cntLow - ( sumLow/cntLow ) * ( sumLow/cntLow ) ) *cntLow;
00507 rmse += ( sum2Hi/cntHi - ( sumHi/cntHi ) * ( sumHi/cntHi ) ) *cntHi;
00508 rmse = sqrt ( rmse/ ( cntLow+cntHi ) );
00509
00510 if ( rmse < rmseBest )
00511 {
00512 optimalSplitValue = v0;
00513 rmseBest = rmse;
00514 bestPos = j+1;
00515 meanLowBest = sumLow/cntLow;
00516 meanHiBest = sumHi/cntHi;
00517 }
00518
00519 j++;
00520 }
00521 }
00522
00523 if ( rmseBest < bestFeatureRMSE )
00524 {
00525 bestFeature = i;
00526 bestFeaturePos = bestPos;
00527 bestFeatureRMSE = rmseBest;
00528 optFeatureSplitValue = optimalSplitValue;
00529 bestFeatureLow = meanLowBest;
00530 bestFeatureHi = meanHiBest;
00531 }
00532 }
00533
00534
00535 for ( uint i=0;i<m_nFeatures;i++ )
00536 usedFeatures[i] = false;
00537
00538 n->m_featureNr = randFeatureIDs[bestFeature];
00539 n->m_value = optFeatureSplitValue;
00540
00541 delete[] randFeatureIDs;
00542
00543 if ( n->m_featureNr < 0 || n->m_featureNr >= m_nFeatures )
00544 {
00545 cout<<"f="<<n->m_featureNr<<endl;
00546 assert ( false );
00547 }
00548
00549
00550 int cnt = 0;
00551 for ( int i=0;i<nNodeSamples;i++ )
00552 {
00553 int nr = n->m_featureNr;
00554 if ( input[nr + n->m_trainSamples[i]*m_nFeatures] <= optFeatureSplitValue )
00555 cnt++;
00556 }
00557
00558 int* lowList = new int[cnt];
00559 int* hiList = new int[nNodeSamples-cnt];
00560 if ( cnt == 0 )
00561 lowList = 0;
00562 if ( nNodeSamples-cnt == 0 )
00563 hiList = 0;
00564
00565 int lowCnt = 0, hiCnt = 0;
00566 double lowMean = 0.0, hiMean = 0.0;
00567 for ( int i=0;i<nNodeSamples;i++ )
00568 {
00569 int nr = n->m_featureNr;
00570 if ( input[nr + n->m_trainSamples[i]*m_nFeatures] <= optFeatureSplitValue )
00571 {
00572 lowList[lowCnt] = n->m_trainSamples[i];
00573 lowMean += singleTarget[n->m_trainSamples[i]];
00574 lowCnt++;
00575 }
00576 else
00577 {
00578 hiList[hiCnt] = n->m_trainSamples[i];
00579 hiMean += singleTarget[n->m_trainSamples[i]];
00580 hiCnt++;
00581 }
00582 }
00583 lowMean /= lowCnt;
00584 hiMean /= hiCnt;
00585
00586 if ( hiCnt+lowCnt != nNodeSamples || lowCnt != cnt )
00587 assert ( false );
00588
00589
00590
00591
00592 if ( lowCnt < 1 || hiCnt < 1 )
00593 {
00594 n->m_featureNr = -1;
00595 n->m_value = lowCnt < 1 ? hiMean : lowMean;
00596 n->m_toSmallerEqual = 0;
00597 n->m_toLarger = 0;
00598 if ( n->m_trainSamples )
00599 delete[] n->m_trainSamples;
00600 n->m_trainSamples = 0;
00601 n->m_nSamples = 0;
00602
00603 nodeReduced currentNode;
00604 currentNode.m_node = n;
00605 currentNode.m_size = 0;
00606 largestNodes.push_back ( currentNode );
00607 push_heap ( largestNodes.begin(), largestNodes.end(), compareNodeReduced );
00608
00609 return;
00610 }
00611
00612
00613 n->m_toSmallerEqual = new node;
00614 n->m_toSmallerEqual->m_featureNr = -1;
00615 n->m_toSmallerEqual->m_value = lowMean;
00616 n->m_toSmallerEqual->m_toSmallerEqual = 0;
00617 n->m_toSmallerEqual->m_toLarger = 0;
00618 n->m_toSmallerEqual->m_trainSamples = lowList;
00619 n->m_toSmallerEqual->m_nSamples = lowCnt;
00620
00621
00622 n->m_toLarger = new node;
00623 n->m_toLarger->m_featureNr = -1;
00624 n->m_toLarger->m_value = hiMean;
00625 n->m_toLarger->m_toSmallerEqual = 0;
00626 n->m_toLarger->m_toLarger = 0;
00627 n->m_toLarger->m_trainSamples = hiList;
00628 n->m_toLarger->m_nSamples = hiCnt;
00629
00630
00631 nodeReduced lowNode, hiNode;
00632 lowNode.m_node = n->m_toSmallerEqual;
00633 lowNode.m_size = lowCnt;
00634 hiNode.m_node = n->m_toLarger;
00635 hiNode.m_size = hiCnt;
00636
00637 largestNodes.push_back ( lowNode );
00638 push_heap ( largestNodes.begin(), largestNodes.end(), compareNodeReduced );
00639
00640 largestNodes.push_back ( hiNode );
00641 push_heap ( largestNodes.begin(), largestNodes.end(), compareNodeReduced );
00642 }
00643
00648 void GBDT::saveWeights ( int cross )
00649 {
00650 char buf[1024];
00651 sprintf ( buf,"%02d",cross );
00652 string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00653 if ( m_inRetraining )
00654 cout<<"Save:"<<name<<endl;
00655 fstream f ( name.c_str(), ios::out );
00656
00657
00658 f.write ( ( char* ) &m_lRate, sizeof ( double ) );
00659
00660
00661 f.write ( ( char* ) &m_epoch, sizeof ( int ) );
00662
00663
00664 f.write ( ( char* ) m_globalMean[cross], sizeof ( REAL ) *m_nClass*m_nDomain );
00665
00666
00667 for ( int i=0;i<m_nClass*m_nDomain;i++ )
00668 for ( int j=0;j<m_epoch+1;j++ )
00669 saveTreeRecursive ( & ( m_trees[cross][i][j] ), f );
00670
00671 f.close();
00672 }
00673
00680 void GBDT::saveTreeRecursive ( node* n, fstream &f )
00681 {
00682 f.write ( ( char* ) n, sizeof ( node ) );
00683 if ( n->m_toSmallerEqual )
00684 saveTreeRecursive ( n->m_toSmallerEqual, f );
00685 if ( n->m_toLarger )
00686 saveTreeRecursive ( n->m_toLarger, f );
00687 }
00688
00693 void GBDT::loadWeights ( int cross )
00694 {
00695 char buf[1024];
00696 sprintf ( buf,"%02d",cross );
00697 string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00698 cout<<"Load:"<<name<<endl;
00699 fstream f ( name.c_str(), ios::in );
00700 if ( f.is_open() == false )
00701 assert ( false );
00702
00703
00704 f.read ( ( char* ) &m_lRate, sizeof ( double ) );
00705
00706
00707 f.read ( ( char* ) &m_epoch, sizeof ( int ) );
00708
00709
00710 m_globalMean = new REAL*[m_nCross+1];
00711 m_globalMean[cross] = new REAL[m_nClass*m_nDomain];
00712 f.read ( ( char* ) m_globalMean[cross], sizeof ( REAL ) *m_nClass*m_nDomain );
00713
00714
00715 m_trees = new node**[m_nCross+1];
00716 m_trees[cross] = new node*[m_nClass*m_nDomain];
00717 for ( uint i=0;i<m_nClass*m_nDomain;i++ )
00718 {
00719 m_trees[cross][i] = new node[m_epoch+1];
00720 for ( uint j=0;j<m_epoch+1;j++ )
00721 loadTreeRecursive ( & ( m_trees[cross][i][j] ), f );
00722 }
00723
00724 f.close();
00725 }
00726
00730 void GBDT::loadMetaWeights ( int cross )
00731 {
00732
00733 }
00734
00741 void GBDT::loadTreeRecursive ( node* n, fstream &f )
00742 {
00743 f.read ( ( char* ) n, sizeof ( node ) );
00744
00745 if ( n->m_toSmallerEqual )
00746 {
00747 n->m_toSmallerEqual = new node;
00748 loadTreeRecursive ( n->m_toSmallerEqual, f );
00749 }
00750 if ( n->m_toLarger )
00751 {
00752 n->m_toLarger = new node;
00753 loadTreeRecursive ( n->m_toLarger, f );
00754 }
00755 }
00756
00762 string GBDT::templateGenerator ( int id, string preEffect, int nameID, bool blendStop )
00763 {
00764 stringstream s;
00765 s<<"ALGORITHM=GBDT"<<endl;
00766 s<<"ID="<<id<<endl;
00767 s<<"TRAIN_ON_FULLPREDICTOR="<<preEffect<<endl;
00768 s<<"DISABLE=0"<<endl;
00769 s<<endl;
00770 s<<"[int]"<<endl;
00771 s<<"minTuninigEpochs=20"<<endl;
00772 s<<"maxTuninigEpochs=100"<<endl;
00773 s<<"featureSubspaceSize=10"<<endl;
00774 s<<"maxTreeLeafs=100"<<endl;
00775 s<<endl;
00776 s<<"[double]"<<endl;
00777 s<<"initMaxSwing=1.0"<<endl;
00778 s<<"lRate=0.1"<<endl;
00779 s<<endl;
00780 s<<"[bool]"<<endl;
00781 s<<"calculateGlobalMean=1"<<endl;
00782 s<<"useOptSplitPoint=1"<<endl;
00783 s<<"enableClipping=0"<<endl;
00784 s<<"enableTuneSwing=0"<<endl;
00785 s<<endl;
00786 s<<"minimzeProbe="<< ( !blendStop ) <<endl;
00787 s<<"minimzeProbeClassificationError=0"<<endl;
00788 s<<"minimzeBlend="<<blendStop<<endl;
00789 s<<"minimzeBlendClassificationError=0"<<endl;
00790 s<<endl;
00791 s<<"[string]"<<endl;
00792 s<<"weightFile="<<"GBDT_"<<nameID<<"_weights.dat"<<endl;
00793 s<<"fullPrediction=GBDT_"<<nameID<<".dat"<<endl;
00794
00795 return s.str();
00796 }