00001 #include "Data.h"
00002
00003 extern StreamOutput cout;
00004
00008 Data::Data()
00009 {
00010 cout<<"Constructor Data"<<endl;
00011
00012
00013 m_algorithmID = 0;
00014 m_randSeed = 0;
00015 m_nMixDataset = 0;
00016 m_nMixTrainList = 0;
00017 m_nCross = 0;
00018 m_validationType = "Retraining";
00019 m_maxThreadsInCross = 0;
00020 m_enableGlobalMeanStdEstimate = 0;
00021 m_positiveTarget = 0;
00022 m_negativeTarget = 0;
00023 m_blendingRegularization = 0;
00024 m_enableGlobalBlendingWeights = 0;
00025 m_blendingEnableCrossValidation = 0;
00026 m_enablePostNNBlending = 0;
00027 m_enableCascadeLearning = 0;
00028 m_nCascadeInputs = 0;
00029 m_cascadeInputs = 0;
00030 m_nFeatures = 0;
00031 m_nClass = 0;
00032 m_nDomain = 0;
00033 m_mixDatasetIndices = 0;
00034 m_mixList = 0;
00035 m_crossIndex = 0;
00036 m_nTrain = 0;
00037 m_trainOrig = 0;
00038 m_trainTargetOrig = 0;
00039 m_trainTargetOrigEffect = 0;
00040 m_trainTargetOrigResidual = 0;
00041 m_trainLabelOrig = 0;
00042 m_trainBaggingIndex = 0;
00043 m_nTest = 0;
00044 m_testOrig = 0;
00045 m_testTargetOrig = 0;
00046 m_testLabelOrig = 0;
00047 m_slotBoundaries = 0;
00048 m_trainSize = 0;
00049 m_train = 0;
00050 m_trainTarget = 0;
00051 m_trainTargetEffect = 0;
00052 m_trainTargetResidual = 0;
00053 m_trainLabel = 0;
00054 m_probeSize = 0;
00055 m_probe = 0;
00056 m_probeTarget = 0;
00057 m_probeTargetEffect = 0;
00058 m_probeTargetResidual = 0;
00059 m_probeLabel = 0;
00060 m_probeIndex = 0;
00061 m_validSize = 0;
00062 m_valid = 0;
00063 m_validTarget = 0;
00064 m_validLabel = 0;
00065 m_mean = 0;
00066 m_std = 0;
00067 m_standardDeviationMin = 0;
00068 m_targetMean = 0;
00069 m_enableSaveMemory = 0;
00070 m_support = 0;
00071 m_enablePostBlendClipping = 0;
00072 m_addOutputNoise = 0;
00073 m_enableFeatureSelection = 0;
00074 m_featureSelectionWriteBinaryDataset = 0;
00075 m_enableBagging = 0;
00076 m_randomSeedBagging = 0;
00077 m_enableStaticNormalization = 0;
00078 m_staticMeanNormalization = 0.0;
00079 m_staticStdNormalization = 1.0;
00080 m_enableProbablisticNormalization = 0;
00081 m_dimensionalityReduction = "";
00082 m_subsampleTrainSet = 1.0;
00083 m_subsampleFeatures = 1.0;
00084 m_disableTraining = false;
00085 m_globalTrainingLoops = 1;
00086 m_addConstantInput = 0;
00087 m_loadWeightsBeforeTraining = false;
00088 }
00089
00093 Data::~Data()
00094 {
00095 cout<<"destructor Data"<<endl;
00096
00097 }
00098
00104 void Data::deleteMemory()
00105 {
00106 cout<<"Delete internal memory"<<endl;
00107
00108
00109 if ( m_trainOrig )
00110 delete[] m_trainOrig;
00111 m_trainOrig = 0;
00112 if ( m_trainTargetOrig )
00113 delete[] m_trainTargetOrig;
00114 m_trainTargetOrig = 0;
00115 if ( m_trainLabelOrig )
00116 delete[] m_trainLabelOrig;
00117 m_trainLabelOrig = 0;
00118 if ( m_testOrig )
00119 delete[] m_testOrig;
00120 m_testOrig = 0;
00121 if ( m_testTargetOrig )
00122 delete[] m_testTargetOrig;
00123 m_testTargetOrig = 0;
00124 if ( m_testLabelOrig )
00125 delete[] m_testLabelOrig;
00126 m_testLabelOrig = 0;
00127
00128
00129 if ( m_mean )
00130 delete[] m_mean;
00131 m_mean = 0;
00132 if ( m_std )
00133 delete[] m_std;
00134 m_std = 0;
00135 if ( m_trainTargetOrigEffect )
00136 delete[] m_trainTargetOrigEffect;
00137 m_trainTargetOrigEffect = 0;
00138 if ( m_trainTargetOrigResidual )
00139 delete[] m_trainTargetOrigResidual;
00140 m_trainTargetOrigResidual = 0;
00141
00142 for ( int i=0;i<m_nCross+1;i++ )
00143 {
00144 if ( m_train )
00145 {
00146 if ( m_train[i] )
00147 delete[] m_train[i];
00148 m_train[i] = 0;
00149 }
00150 if ( m_trainTarget )
00151 {
00152 if ( m_trainTarget[i] )
00153 delete[] m_trainTarget[i];
00154 m_trainTarget[i] = 0;
00155 }
00156 if ( m_trainTargetEffect )
00157 {
00158 if ( m_trainTargetEffect[i] )
00159 delete[] m_trainTargetEffect[i];
00160 m_trainTargetEffect[i] = 0;
00161 }
00162 if ( m_trainTargetResidual )
00163 {
00164 if ( m_trainTargetResidual[i] )
00165 delete[] m_trainTargetResidual[i];
00166 m_trainTargetResidual[i] = 0;
00167 }
00168 if ( m_trainLabel )
00169 {
00170 if ( m_trainLabel[i] )
00171 delete[] m_trainLabel[i];
00172 m_trainLabel[i] = 0;
00173 }
00174 if ( m_validationType == "Bagging" )
00175 {
00176 if( m_trainBaggingIndex )
00177 {
00178 if ( m_trainBaggingIndex[i] )
00179 delete[] m_trainBaggingIndex[i];
00180 m_trainBaggingIndex[i] = 0;
00181 }
00182 }
00183 if ( m_probe )
00184 {
00185 if ( m_probe[i] )
00186 delete[] m_probe[i];
00187 m_probe[i] = 0;
00188 }
00189 if ( m_probeTarget )
00190 {
00191 if ( m_probeTarget[i] )
00192 delete[] m_probeTarget[i];
00193 m_probeTarget[i] = 0;
00194 }
00195 if ( m_probeTargetEffect )
00196 {
00197 if ( m_probeTargetEffect[i] )
00198 delete[] m_probeTargetEffect[i];
00199 m_probeTargetEffect[i] = 0;
00200 }
00201 if ( m_probeTargetResidual )
00202 {
00203 if ( m_probeTargetResidual[i] )
00204 delete[] m_probeTargetResidual[i];
00205 m_probeTargetResidual[i] = 0;
00206 }
00207 if ( m_probeLabel )
00208 {
00209 if ( m_probeLabel[i] )
00210 delete[] m_probeLabel[i];
00211 m_probeLabel[i] = 0;
00212 }
00213 if ( m_probeIndex )
00214 {
00215 if ( m_probeIndex[i] )
00216 delete[] m_probeIndex[i];
00217 m_probeIndex[i] = 0;
00218 }
00219 }
00220 if ( m_train )
00221 delete[] m_train;
00222 m_train = 0;
00223 if ( m_trainTarget )
00224 delete[] m_trainTarget;
00225 m_trainTarget = 0;
00226 if ( m_trainTargetEffect )
00227 delete[] m_trainTargetEffect;
00228 m_trainTargetEffect = 0;
00229 if ( m_trainTargetResidual )
00230 delete[] m_trainTargetResidual;
00231 m_trainTargetResidual = 0;
00232 if ( m_trainLabel )
00233 delete[] m_trainLabel;
00234 m_trainLabel = 0;
00235 if(m_validationType == "Bagging")
00236 {
00237 if(m_trainBaggingIndex)
00238 delete[] m_trainBaggingIndex;
00239 m_trainBaggingIndex = 0;
00240 }
00241 if ( m_probe )
00242 delete[] m_probe;
00243 m_probe = 0;
00244 if ( m_probeTarget )
00245 delete[] m_probeTarget;
00246 m_probeTarget = 0;
00247 if ( m_probeTargetEffect )
00248 delete[] m_probeTargetEffect;
00249 m_probeTargetEffect = 0;
00250 if ( m_probeTargetResidual )
00251 delete[] m_probeTargetResidual;
00252 m_probeTargetResidual = 0;
00253 if ( m_probeLabel )
00254 delete[] m_probeLabel;
00255 m_probeLabel = 0;
00256 if ( m_probeIndex )
00257 delete[] m_probeIndex;
00258 m_probeIndex = 0;
00259
00260 if ( m_trainSize )
00261 delete[] m_trainSize;
00262 m_trainSize = 0;
00263 if ( m_probeSize )
00264 delete[] m_probeSize;
00265 m_probeSize = 0;
00266
00267 if ( m_mixDatasetIndices )
00268 delete[] m_mixDatasetIndices;
00269 m_mixDatasetIndices = 0;
00270 if ( m_mixList )
00271 delete[] m_mixList;
00272 m_mixList = 0;
00273 if ( m_slotBoundaries )
00274 delete[] m_slotBoundaries;
00275 m_slotBoundaries = 0;
00276 if ( m_crossIndex )
00277 delete[] m_crossIndex;
00278 m_crossIndex = 0;
00279
00280 if ( m_cascadeInputs )
00281 delete[] m_cascadeInputs;
00282 m_cascadeInputs = 0;
00283
00284 if ( m_targetMean )
00285 delete[] m_targetMean;
00286 m_targetMean = 0;
00287
00288 }
00289
00296 void Data::readDataset ( string name )
00297 {
00298
00299 if ( name == "MNIST" )
00300 {
00301 DatasetReader r;
00302
00303 r.readMNIST ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00304 }
00305 else if ( name == "NETFLIX" )
00306 {
00307 DatasetReader r;
00308
00309 r.readNETFLIX ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00310 }
00311 else if ( name == "AusDM2009" )
00312 {
00313 DatasetReader r;
00314
00315 r.readAusDM2009 ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00316 }
00317 else if ( name == "KDDCup09Large" )
00318 {
00319 DatasetReader r;
00320
00321 r.readKDDCup09Large ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00322 }
00323 else if ( name == "KDDCup09Small" )
00324 {
00325 DatasetReader r;
00326
00327 r.readKDDCup09Small ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00328 }
00329 else if ( name == "BINARY" )
00330 {
00331 DatasetReader r;
00332
00333 r.readBINARY ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00334 }
00335 else if ( name == "CSV" )
00336 {
00337 DatasetReader r;
00338
00339 r.readCSV ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00340 }
00341 else if ( name == "ARFF" )
00342 {
00343 DatasetReader r;
00344
00345 r.readARFF ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00346 }
00347 else if ( name == "PRUDSYS_DMC2009" )
00348 {
00349 DatasetReader r;
00350
00351 r.readPRUDSYS ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00352 }
00353 else if ( name == "ADULT" )
00354 {
00355 DatasetReader r;
00356
00357 r.readADULT ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00358 }
00359 else if ( name == "AUSTRALIAN" )
00360 {
00361 DatasetReader r;
00362
00363 r.readAUSTRALIAN ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00364 }
00365 else if ( name == "BALANCE" )
00366 {
00367 DatasetReader r;
00368
00369 r.readBALANCE ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00370 }
00371 else if ( name == "CYLINDER-BANDS" )
00372 {
00373 DatasetReader r;
00374
00375 r.readCYLINDERBANDS ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00376 }
00377 else if ( name == "BREAST" )
00378 {
00379 DatasetReader r;
00380
00381 r.readBREASTCANCERWISCONSIN ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00382 }
00383 else if ( name == "CREDIT" )
00384 {
00385 DatasetReader r;
00386
00387 r.readAUSTRALIANCREDIT ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00388 }
00389 else if ( name == "DIABETES" )
00390 {
00391 DatasetReader r;
00392
00393 r.readDIABETES ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00394 }
00395 else if ( name == "GERMAN" )
00396 {
00397 DatasetReader r;
00398
00399 r.readGERMAN ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00400 }
00401 else if ( name == "GLASS" )
00402 {
00403 DatasetReader r;
00404
00405 r.readGLASS ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00406 }
00407 else if ( name == "HEART-SPECTF" )
00408 {
00409 DatasetReader r;
00410
00411 r.readHEART ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00412 }
00413 else if ( name == "HEPATITIS" )
00414 {
00415 DatasetReader r;
00416
00417 r.readHEPATITIS ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00418 }
00419 else if ( name == "IONOSPHERE" )
00420 {
00421 DatasetReader r;
00422
00423 r.readIONOSPHERE ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00424 }
00425 else if ( name == "IRIS" )
00426 {
00427 DatasetReader r;
00428
00429 r.readIRIS ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00430 }
00431 else if ( name == "LETTER" )
00432 {
00433 DatasetReader r;
00434
00435 r.readLETTER ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00436 }
00437 else if ( name == "MONKS-1" )
00438 {
00439 DatasetReader r;
00440
00441 r.readMONKS1 ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00442 }
00443 else if ( name == "MONKS-2" )
00444 {
00445 DatasetReader r;
00446
00447 r.readMONKS2 ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00448 }
00449 else if ( name == "MONKS-3" )
00450 {
00451 DatasetReader r;
00452
00453 r.readMONKS3 ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00454 }
00455 else if ( name == "MUSHROOM" )
00456 {
00457 DatasetReader r;
00458
00459 r.readMUSHROOM ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00460 }
00461 else if ( name == "SATIMAGE" )
00462 {
00463 DatasetReader r;
00464
00465 r.readSATIMAGE ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00466 }
00467 else if ( name == "SEGMENTATION" )
00468 {
00469 DatasetReader r;
00470
00471 r.readSEGMENTATION ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00472 }
00473 else if ( name == "SONAR" )
00474 {
00475 DatasetReader r;
00476
00477 r.readSONAR ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00478 }
00479 else if ( name == "VEHICLE" )
00480 {
00481 DatasetReader r;
00482
00483 r.readVEHICLE ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00484 }
00485 else if ( name == "VOTES" )
00486 {
00487 DatasetReader r;
00488
00489 r.readVOTES ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00490 }
00491 else if ( name == "WINE" )
00492 {
00493 DatasetReader r;
00494
00495 r.readWINE ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00496 }
00497 else if ( name == "POKER" )
00498 {
00499 DatasetReader r;
00500
00501 r.readPOKER ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00502 }
00503 else if ( name == "YEAST" )
00504 {
00505 DatasetReader r;
00506
00507 r.readYEAST ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00508 }
00509 else if ( name == "SURVIVAL" )
00510 {
00511 DatasetReader r;
00512
00513 r.readSURVIVAL ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00514 }
00515 else if ( name == "SPIDER" )
00516 {
00517 DatasetReader r;
00518
00519 r.readSPIDER ( m_datasetPath+"/"+m_dataPath, m_trainOrig, m_trainTargetOrig, m_trainLabelOrig, m_testOrig, m_testTargetOrig, m_testLabelOrig, m_nTrain, m_nTest, m_nClass, m_nDomain, m_nFeatures, m_positiveTarget, m_negativeTarget );
00520 }
00521 else
00522 {
00523 cout<<"Dataset not found:"<<name<<endl;
00524 exit ( 0 );
00525 }
00526
00527 if(m_addConstantInput)
00528 addConstantInput();
00529
00530
00531 reduceTrainingSetSize ( m_subsampleTrainSet );
00532
00533
00534 int nFeatOrig = m_nFeatures;
00535 reduceFeatureSize ( m_trainOrig, m_nTrain, m_nFeatures, m_subsampleFeatures, Framework::getFrameworkMode() );
00536 reduceFeatureSize ( m_testOrig, m_nTest, nFeatOrig, m_subsampleFeatures, true );
00537
00538
00539 if ( m_featureSelectionWriteBinaryDataset )
00540 {
00541 makeBinaryDataset();
00542 exit ( 0 );
00543 }
00544
00545
00546 mixDataset();
00547 }
00548
00557 void Data::doBootstrapSampling ( REAL* probs, REAL* &train, REAL* &target, REAL* &targetEff, REAL* &targetRes, int* &label, int nTrainNew )
00558 {
00559 cout<<endl<<"Do boostrap sampling of the dataset (size:"<<m_nTrain<<")"<<endl;
00560 cout<<"Random seed:"<<m_randomSeedBagging<<endl;
00561 srand ( m_randomSeedBagging );
00562
00563 if ( nTrainNew > 0 && nTrainNew < m_nTrain )
00564 cout<<"Draw not a boostrap sample, make a simple random subset ("<<100.0* ( double ) nTrainNew/ ( double ) m_nTrain<<"%)"<<endl;
00565
00566 REAL* trainNew = 0, *ptr0, *ptr1;
00567 if ( train )
00568 trainNew = new REAL[m_nFeatures*m_nTrain];
00569 REAL* targetNew = 0;
00570 if ( target )
00571 targetNew = new REAL[m_nClass*m_nDomain*m_nTrain];
00572 REAL* targetEffNew = 0;
00573 if ( targetEff )
00574 targetEffNew = new REAL[m_nClass*m_nDomain*m_nTrain];
00575 REAL* targetResNew = 0;
00576 if ( targetRes )
00577 targetResNew = new REAL[m_nClass*m_nDomain*m_nTrain];
00578 int* labelNew = 0;
00579 if ( Framework::getDatasetType() ==true )
00580 labelNew = new int[m_nDomain*m_nTrain];
00581 int* replicateCnt = new int[m_nTrain];
00582 for ( int i=0;i<m_nTrain;i++ )
00583 replicateCnt[i] = 0;
00584
00585 int sampleCnt = 0;
00586 while ( ( sampleCnt < m_nTrain && nTrainNew == 0 ) || ( sampleCnt < nTrainNew && nTrainNew > 0 && nTrainNew < m_nTrain ) )
00587
00588 {
00589
00590 int ind;
00591 if ( nTrainNew == 0 || nTrainNew >= m_nTrain )
00592 {
00593 if ( probs == 0 )
00594 ind = rand() %m_nTrain;
00595 else
00596 ind = vectorSampling ( probs, m_nTrain );
00597 }
00598 else
00599 {
00600 ind = rand() %m_nTrain;
00601 while ( replicateCnt[ind] )
00602 ind = rand() %m_nTrain;
00603 }
00604 replicateCnt[ind]++;
00605
00606
00607 if ( train )
00608 {
00609 ptr0 = train + ind * m_nFeatures;
00610 ptr1 = trainNew + sampleCnt * m_nFeatures;
00611 for ( int j=0;j<m_nFeatures;j++ )
00612 ptr1[j] = ptr0[j];
00613 }
00614
00615
00616 if ( target )
00617 {
00618 ptr0 = target + ind * m_nClass*m_nDomain;
00619 ptr1 = targetNew + sampleCnt * m_nClass*m_nDomain;
00620 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00621 ptr1[j] = ptr0[j];
00622 }
00623
00624
00625 if ( targetEff )
00626 {
00627 ptr0 = targetEff + ind * m_nClass*m_nDomain;
00628 ptr1 = targetEffNew + sampleCnt * m_nClass*m_nDomain;
00629 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00630 ptr1[j] = ptr0[j];
00631 }
00632
00633
00634 if ( targetRes )
00635 {
00636 ptr0 = targetRes + ind * m_nClass*m_nDomain;
00637 ptr1 = targetResNew + sampleCnt * m_nClass*m_nDomain;
00638 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00639 ptr1[j] = ptr0[j];
00640 }
00641
00642
00643 if ( Framework::getDatasetType() ==true )
00644 for ( int d=0;d<m_nDomain;d++ )
00645 labelNew[d+sampleCnt*m_nDomain] = label[d+ind*m_nDomain];
00646
00647 sampleCnt++;
00648 }
00649
00650 int nonReplicates = 0, notUsed = 0, replicates = 0;
00651 for ( int i=0;i<m_nTrain;i++ )
00652 {
00653 if ( replicateCnt[i] == 0 )
00654 notUsed++;
00655 if ( replicateCnt[i] == 1 )
00656 nonReplicates++;
00657 if ( replicateCnt[i] > 1 )
00658 replicates++;
00659 }
00660 cout<<"notUsed:"<<notUsed<<" nonReplicates:"<<nonReplicates<<" replicates:"<<replicates;
00661 cout<<" ("<<100.0* ( REAL ) ( nonReplicates+replicates ) / ( REAL ) m_nTrain<<"%)"<<endl<<endl;
00662
00663 delete[] replicateCnt;
00664
00665
00666 train = trainNew;
00667 target = targetNew;
00668 targetEff = targetEffNew;
00669 targetRes = targetResNew;
00670 label = labelNew;
00671 }
00672
00680 int Data::vectorSampling ( REAL* probs, int length )
00681 {
00682 double sum = 0.0;
00683 for ( int i=0;i<length;i++ )
00684 sum += probs[i];
00685
00686 double value = sum * ( ( double ) rand() / ( double ) RAND_MAX );
00687
00688 sum = 0.0;
00689 for ( int i=0;i<length;i++ )
00690 {
00691 sum += probs[i];
00692 if ( sum >= value )
00693 return i;
00694 }
00695 cout<<"value:"<<value<<endl<<"length:"<<length<<endl<<"sum:"<<sum<<endl;
00696 for ( int i=0;i<length;i++ )
00697 cout<<probs[i]<<" "<<flush;
00698 assert ( false );
00699 return -1;
00700 }
00701
00707 void Data::makeBinaryDataset()
00708 {
00709 cout<<endl;
00710 cout<<"Make binary dataset from selected features"<<endl;
00711 cout<<"Open features:"<<FEATURE_TXT_FILE<<endl;
00712
00713
00714 fstream f;
00715 vector<int> features;
00716 f.open ( FEATURE_TXT_FILE,ios::in );
00717 if ( f.is_open() ==false )
00718 assert ( false );
00719 int value, nValidFeatures = 0;
00720 while ( f>>value )
00721 features.push_back ( value );
00722 f.close();
00723
00724
00725 for ( int j=0;j<features.size();j++ )
00726 if ( features[j] >= m_nFeatures || features[j] == -1 )
00727 assert ( false );
00728 else
00729 nValidFeatures++;
00730
00731 cout<<"nValidFeatures:"<<nValidFeatures<<endl;
00732 REAL* feat;
00733 int* label, N;
00734
00735 if ( Framework::getFrameworkMode() == 1 )
00736 {
00737 cout<<"Write: binary.test"<<endl;
00738 f.open ( "binary.test", ios::out );
00739 feat = m_testOrig;
00740 label = m_testLabelOrig;
00741 N = m_nTest;
00742 }
00743 else
00744 {
00745 cout<<"Write: binary.train"<<endl;
00746 f.open ( "binary.train", ios::out );
00747 feat = m_trainOrig;
00748 label = m_trainLabelOrig;
00749 N = m_nTrain;
00750 }
00751
00752 cout<<"#lines:"<<N<<endl;
00753
00754
00755 f.write ( ( char* ) &N, sizeof ( int ) );
00756 f.write ( ( char* ) &m_nClass, sizeof ( int ) );
00757 f.write ( ( char* ) &m_nDomain, sizeof ( int ) );
00758 f.write ( ( char* ) &nValidFeatures, sizeof ( int ) );
00759
00760
00761 for ( int i=0;i<N;i++ )
00762 for ( int j=0;j<features.size();j++ )
00763 f.write ( ( char* ) & ( feat[i*m_nFeatures + features[j]] ), sizeof ( REAL ) );
00764
00765
00766 f.write ( ( char* ) label, sizeof ( int ) *N*m_nDomain );
00767 f.close();
00768
00769 }
00770
00775 void Data::mixDataset()
00776 {
00777 if ( m_nTrain )
00778 {
00779 m_mixDatasetIndices = new int[m_nTrain];
00780 for ( int i=0;i<m_nTrain;i++ )
00781 m_mixDatasetIndices[i] = i;
00782 }
00783 else
00784 {
00785 cout<<"Do no mix the dataset."<<endl;
00786 m_mixDatasetIndices = 0;
00787 return;
00788 }
00789 cout<<"Randomize the dataset: "<<m_nMixDataset*m_nTrain<<" line swaps [";
00790
00791 int progress = m_nTrain*m_nMixDataset/10 + 1;
00792 REAL* tmp0 = new REAL[m_nFeatures];
00793 REAL* tmp1 = new REAL[m_nClass*m_nDomain];
00794 for ( int i=0;i<m_nTrain*m_nMixDataset;i++ )
00795 {
00796 if ( i%progress==0 )
00797 cout<<"."<<flush;
00798
00799
00800 int ind0 = rand() %m_nTrain;
00801 int ind1 = rand() %m_nTrain;
00802
00803
00804 REAL* ptr0 = m_trainOrig + ind0 * m_nFeatures;
00805 REAL* ptr1 = m_trainOrig + ind1 * m_nFeatures;
00806 for ( int j=0;j<m_nFeatures;j++ )
00807 {
00808 tmp0[j] = ptr0[j];
00809 ptr0[j] = ptr1[j];
00810 ptr1[j] = tmp0[j];
00811 }
00812
00813
00814 ptr0 = m_trainTargetOrig + ind0 * m_nClass * m_nDomain;
00815 ptr1 = m_trainTargetOrig + ind1 * m_nClass * m_nDomain;
00816 for ( int j=0;j<m_nClass*m_nDomain;j++ )
00817 {
00818 tmp1[j] = ptr0[j];
00819 ptr0[j] = ptr1[j];
00820 ptr1[j] = tmp1[j];
00821 }
00822
00823
00824 if ( Framework::getDatasetType() ==true )
00825 {
00826 for ( int d=0;d<m_nDomain;d++ )
00827 {
00828 int tmp = m_trainLabelOrig[d+ind0*m_nDomain];
00829 m_trainLabelOrig[d+ind0*m_nDomain] = m_trainLabelOrig[d+ind1*m_nDomain];
00830 m_trainLabelOrig[d+ind1*m_nDomain] = tmp;
00831 }
00832 }
00833
00834
00835 int tmp = m_mixDatasetIndices[ind0];
00836 m_mixDatasetIndices[ind0] = m_mixDatasetIndices[ind1];
00837 m_mixDatasetIndices[ind1] = tmp;
00838 }
00839 if ( tmp0 )
00840 delete[] tmp0;
00841 tmp0 = 0;
00842 if ( tmp1 )
00843 delete[] tmp1;
00844 tmp1 = 0;
00845
00846 cout<<"] "<<"mixInd[0]:"<<m_mixDatasetIndices[0]<<" mixInd["<<m_nTrain-1<<"]:"<<m_mixDatasetIndices[m_nTrain-1]<<endl;
00847 }
00848
00853 void Data::loadNormalization ( int nCascade )
00854 {
00855
00856 char buf[1024];
00857 sprintf ( buf,"%s/%s/normalization.dat.add%d",m_datasetPath.c_str(), m_tempPath.c_str(), nCascade );
00858 cout<<"Load mean and std: "<<buf<<endl;
00859 fstream f ( buf, ios::in );
00860 if ( f.is_open() == false )
00861 assert ( false );
00862 int n;
00863 f.read ( ( char* ) &n, sizeof ( int ) );
00864 if ( m_mean == 0 )
00865 m_mean = new REAL[n];
00866 if ( m_std == 0 )
00867 m_std = new REAL[n];
00868 f.read ( ( char* ) m_mean, sizeof ( REAL ) *n );
00869 f.read ( ( char* ) m_std, sizeof ( REAL ) *n );
00870 REAL min = 1e10, max = -1e10;
00871 for ( int i=0;i<n;i++ )
00872 {
00873 if ( min > m_mean[i] )
00874 min = m_mean[i];
00875 if ( max < m_mean[i] )
00876 max = m_mean[i];
00877 }
00878 cout<<"Mean: min|max:"<<min<<"|"<<max<<endl;
00879 min = 1e10;
00880 max = -1e10;
00881 for ( int i=0;i<n;i++ )
00882 {
00883 if ( min > m_std[i] )
00884 min = m_std[i];
00885 if ( max < m_std[i] )
00886 max = m_std[i];
00887 }
00888 cout<<"Std: min|max:"<<min<<"|"<<max<<endl;
00889 f.close();
00890 }
00891
00901 void Data::allocMemForCrossValidationSets()
00902 {
00903 cout<<"Alloc mem for cross validation data sets"<<endl;
00904 m_mean = new REAL[m_nFeatures];
00905 m_std = new REAL[m_nFeatures];
00906
00907 if(m_validationType == "ValidationSet")
00908 m_nCross = 0;
00909 else
00910 {
00911
00912 if ( m_nCross > m_nTrain )
00913 {
00914 cout<<"Limit: nCross=nTrain"<<endl;
00915 m_nCross = m_nTrain;
00916 }
00917 cout<<"Cross-validation settings: "<<m_nCross<<" sets"<<endl;
00918 }
00919
00920
00921 cout<<"Calculating mean and std per input"<<endl;
00922 double minStd = 1e10, maxStd = -1e10, minMean = 1e10, maxMean = -1e10, minValue = 1e10, maxValue = -1e10;
00923 for ( int i=0;i<m_nFeatures;i++ )
00924 {
00925
00926 double mean = 0.0;
00927 for ( int j=0;j<m_nTrain;j++ )
00928 {
00929 REAL v = m_trainOrig[j*m_nFeatures + i];
00930 mean += v;
00931 if ( minValue > v )
00932 minValue = v;
00933 if ( maxValue < v )
00934 maxValue = v;
00935 }
00936 mean /= ( double ) m_nTrain;
00937
00938
00939 double std = 0.0;
00940 for ( int j=0;j<m_nTrain;j++ )
00941 std += ( mean - m_trainOrig[j*m_nFeatures + i] ) * ( mean - m_trainOrig[j*m_nFeatures + i] );
00942 std = sqrt ( std/ ( double ) ( m_nTrain-1 ) );
00943
00944 if ( m_datasetName=="KDDCup09Large" || m_datasetName=="KDDCup09Small" )
00945 {
00946 double max = -1e10;
00947 for ( int j=0;j<m_nTrain;j++ )
00948 if ( max < fabs ( m_trainOrig[j*m_nFeatures + i]-mean ) )
00949 max = fabs ( m_trainOrig[j*m_nFeatures + i]-mean );
00950 std = max;
00951 }
00952
00953 if ( fabs ( std ) < 1e-9 && mean == 0.0 )
00954 {
00955
00956 cout<<"f:"<<i<<"=0 "<<flush;
00957 std = 1e10;
00958 }
00959 if ( fabs ( std ) < 1e-9 && mean != 0.0 )
00960 {
00961
00962 cout<<"f:"<<i<<"=c "<<flush;
00963 std = mean;
00964 mean = 0.0;
00965 }
00966 if ( mean==1.0 )
00967 {
00968
00969 cout<<"f:"<<i<<"=1 "<<flush;
00970 std = 1.0;
00971 mean = 0.0;
00972 }
00973 if ( std < m_standardDeviationMin )
00974 {
00975
00976 cout<<"f:"<<i<<"lim "<<flush;
00977 std = m_standardDeviationMin;
00978 }
00979
00980 minStd = minStd > std? std : minStd;
00981 maxStd = maxStd < std? std : maxStd;
00982 minMean = minMean > mean? mean : minMean;
00983 maxMean = maxMean < mean? mean : maxMean;
00984
00985
00986 m_mean[i] = mean;
00987 m_std[i] = std;
00988 }
00989 if ( m_enableStaticNormalization )
00990 {
00991 cout<<"Static mean:"<<m_staticMeanNormalization<<" and std:"<<m_staticStdNormalization<<endl;
00992 for ( int i=0;i<m_nFeatures;i++ )
00993 {
00994 m_mean[i] = m_staticMeanNormalization;
00995 m_std[i] = m_staticStdNormalization;
00996 }
00997 minMean = m_staticMeanNormalization;
00998 maxMean = m_staticMeanNormalization;
00999 minStd = m_staticStdNormalization;
01000 maxStd = m_staticStdNormalization;
01001 }
01002 if ( m_enableGlobalMeanStdEstimate )
01003 {
01004 cout<<"Calc average of mean and std"<<endl;
01005 double mean = 0.0;
01006 for ( int i=0;i<m_nFeatures;i++ )
01007 mean += m_mean[i];
01008 mean /= ( double ) m_nFeatures;
01009 for ( int i=0;i<m_nFeatures;i++ )
01010 m_mean[i] = mean;
01011 minMean = maxMean = mean;
01012
01013 double std = 0.0;
01014 int stdCnt = 0;
01015 for ( int i=0;i<m_nFeatures;i++ )
01016 {
01017 if ( m_std[i] != 1e10 )
01018 {
01019 std += m_std[i];
01020 stdCnt++;
01021 }
01022 }
01023 if ( stdCnt == 0 )
01024 assert ( false );
01025 std /= ( double ) stdCnt;
01026 for ( int i=0;i<m_nFeatures;i++ )
01027 m_std[i] = std;
01028 minStd = maxStd = std;
01029 }
01030 if ( m_enableProbablisticNormalization )
01031 {
01032 cout<<"Calc probablistic normalization"<<endl;
01033 minStd = 1e10;
01034 maxStd = -1e10;
01035 minMean = 1e10;
01036 maxMean = -1e10;
01037 for ( int i=0;i<m_nFeatures;i++ )
01038 {
01039 REAL min = 1e10, max = -1e10;
01040 for ( int j=0;j<m_nTrain;j++ )
01041 {
01042 REAL v = m_trainOrig[i + j*m_nFeatures];
01043 if ( min > v )
01044 min = v;
01045 if ( max < v )
01046 max = v;
01047 }
01048 REAL diff = max - min;
01049 m_mean[i] = min;
01050 m_std[i] = diff;
01051 if ( m_std[i] < 1e-6 )
01052 m_std[i] = 1.0;
01053
01054 minStd = minStd > m_std[i]? m_std[i] : minStd;
01055 maxStd = maxStd < m_std[i]? m_std[i] : maxStd;
01056 minMean = minMean > m_mean[i]? m_mean[i] : minMean;
01057 maxMean = maxMean < m_mean[i]? m_mean[i] : maxMean;
01058 }
01059 cout<<"mean|std:"<<endl;
01060 for ( int i=0;i<m_nFeatures;i++ )
01061 cout<<m_mean[i]<<"|"<<m_std[i]<<" ";
01062 cout<<endl;
01063 }
01064 cout<<"Min|Max mean: "<<minMean<<"|"<<maxMean<<" Min|Max std: "<<minStd<<"|"<<maxStd<<" Min|Max value: "<<minValue<<"|"<<maxValue<<endl;
01065
01066
01067 cout<<"Target means: "<<flush;
01068 for ( int i=0;i<m_nClass*m_nDomain;i++ )
01069 {
01070 double mean = 0.0;
01071 REAL* ptr = m_trainTargetOrig + i * m_nClass * m_nDomain;
01072 for ( int j=0;j<m_nTrain;j++ )
01073 mean += ptr[j];
01074 cout<<i<<":"<<mean/ ( double ) ( m_nTrain ) <<" ";
01075 }
01076 cout<<endl;
01077
01078
01079 char buf[1024];
01080 sprintf ( buf,"%s/%s/normalization.dat.add%d",m_datasetPath.c_str(), m_tempPath.c_str(), m_nCascadeInputs );
01081 cout<<"Save mean and std: "<<buf<<endl;
01082 fstream f ( buf, ios::out );
01083 f.write ( ( char* ) &m_nFeatures, sizeof ( int ) );
01084 f.write ( ( char* ) m_mean, sizeof ( REAL ) *m_nFeatures );
01085 f.write ( ( char* ) m_std, sizeof ( REAL ) *m_nFeatures );
01086 f.close();
01087
01088 m_mixList = new int[m_nTrain];
01089
01090
01091 for ( int i=0;i<m_nTrain;i++ )
01092 m_mixList[i] = i;
01093
01094
01095 cout<<"Random seed:"<<m_randSeed<<endl;
01096 srand ( m_randSeed );
01097
01098 cout<<"nFeatures:"<<m_nFeatures<<endl;
01099 cout<<"nClass:"<<m_nClass<<endl;
01100 cout<<"nDomain:"<<m_nDomain<<endl;
01101
01102 if ( m_validationType == "ValidationSet" )
01103 {
01104
01105 m_trainSize = new int[1];
01106 m_trainSize[0] = m_nTrain;
01107 return;
01108 }
01109
01110
01111 m_trainTargetOrigEffect = new REAL[m_nClass*m_nDomain*m_nTrain];
01112 m_trainTargetOrigResidual = new REAL[m_nClass*m_nDomain*m_nTrain];
01113
01114
01115 m_trainSize = new int[m_nCross+1];
01116 m_train = new REAL*[m_nCross+1];
01117 m_trainTarget = new REAL*[m_nCross+1];
01118 m_trainTargetEffect = new REAL*[m_nCross+1];
01119 m_trainTargetResidual = new REAL*[m_nCross+1];
01120 m_trainLabel = new int*[m_nCross+1];
01121 if(m_validationType == "Bagging")
01122 m_trainBaggingIndex = new int*[m_nCross+1];
01123
01124 m_probeSize = new int[m_nCross+1];
01125 m_probe = new REAL*[m_nCross+1];
01126 m_probeTarget = new REAL*[m_nCross+1];
01127 m_probeTargetEffect = new REAL*[m_nCross+1];
01128 m_probeTargetResidual = new REAL*[m_nCross+1];
01129 m_probeLabel = new int*[m_nCross+1];
01130 m_probeIndex = new int*[m_nCross+1];
01131
01132
01133
01134 int index0, index1, tmp;
01135 cout<<"Make "<<m_nTrain*m_nMixTrainList<<" index swaps (randomize sample index list)"<<endl;
01136 for ( int i=0;i<m_nTrain*m_nMixTrainList;i++ )
01137 {
01138 index0 = rand() % m_nTrain;
01139 index1 = rand() % m_nTrain;
01140
01141
01142 tmp = m_mixList[index0];
01143 m_mixList[index0] = m_mixList[index1];
01144 m_mixList[index1] = tmp;
01145 }
01146
01147 if( m_validationType == "Retraining" || m_validationType == "CrossFoldMean" )
01148 {
01149 m_slotBoundaries = new int[m_nCross+2];
01150
01151 double partitionSize = ( double ) m_nTrain / ( double ) m_nCross;
01152 double accumulatedSize = partitionSize;
01153 int cnt = 0, currentSize = -1;
01154 m_slotBoundaries[0] = 0;
01155 m_slotBoundaries[m_nCross+1] = m_nTrain;
01156 cout<<"partition size: "<<partitionSize<<endl;
01157
01158
01159 for ( int i=0;i<=m_nTrain;i++ )
01160 {
01161 currentSize++;
01162 if ( cnt < m_nCross )
01163 {
01164 if ( i == ( int ) round ( accumulatedSize ) || i==m_nTrain )
01165 {
01166 m_slotBoundaries[cnt+1] = i;
01167 m_probeSize[cnt] = currentSize;
01168 m_trainSize[cnt] = m_nTrain - currentSize;
01169 currentSize = 0;
01170 accumulatedSize += partitionSize;
01171 cnt++;
01172 }
01173 }
01174 }
01175 m_trainSize[m_nCross] = m_nTrain;
01176 m_probeSize[m_nCross] = 0;
01177
01178
01179 int sum = 0;
01180 cout<<"slot: TRAIN | PROBE"<<endl<<"==================="<<endl;
01181 for ( int i=0;i<m_nCross+1;i++ )
01182 {
01183 cout<<i<<": "<<m_trainSize[i]<<" | "<<m_probeSize[i]<<endl;
01184 sum += m_probeSize[i];
01185 }
01186 cout<<"probe sum:"<<sum<<endl;
01187 }
01188 else if ( m_validationType == "Bagging" )
01189 {
01190 bool* bagSamples = new bool[m_nTrain];
01191 cout<<"Bagging sizes: TRAIN | PROBE"<<endl<<"============================"<<endl;
01192 for(int i=0;i<m_nCross;i++)
01193 {
01194 m_trainBaggingIndex[i] = new int[m_nTrain];
01195
01196
01197 srand(Framework::getRandomSeed() + i);
01198 int cnt = 0;
01199 for(int j=0;j<m_nTrain;j++)
01200 bagSamples[j] = 0;
01201 for(int j=0;j<m_nTrain;j++)
01202 {
01203 int ind = rand() % m_nTrain;
01204 bagSamples[ind] = 1;
01205 m_trainBaggingIndex[i][j] = ind;
01206 }
01207 for(int j=0;j<m_nTrain;j++)
01208 cnt += bagSamples[j];
01209 m_trainSize[i] = m_nTrain;
01210 m_probeSize[i] = m_nTrain - cnt;
01211
01212 m_probeIndex[i] = new int[m_probeSize[i]];
01213 cnt = 0;
01214 for(int j=0;j<m_nTrain;j++)
01215 {
01216 if(bagSamples[j] == false)
01217 {
01218 m_probeIndex[i][cnt] = j;
01219 cnt++;
01220 }
01221 }
01222 cout<<i<<": "<<m_trainSize[i]<<" | "<<m_probeSize[i]<<" ("<<100.0*(double)m_probeSize[i]/(double)m_nTrain<<"% in probe)"<<endl;
01223 }
01224 m_trainSize[m_nCross] = 0;
01225 m_probeSize[m_nCross] = 0;
01226 m_probeIndex[m_nCross] = 0;
01227 m_trainBaggingIndex[m_nCross] = 0;
01228 delete[] bagSamples;
01229
01230
01231 int* bagCnt = new int[m_nTrain];
01232 for(int i=0;i<m_nTrain;i++)
01233 bagCnt[i] = 0;
01234 for(int i=0;i<m_nCross;i++)
01235 for(int j=0;j<m_nTrain;j++)
01236 bagCnt[m_trainBaggingIndex[i][j]]++;
01237 cout<<"Bagging summary: #averaged: and #cnt"<<endl;
01238 for(int nr=0;nr<2*m_nCross;nr++)
01239 {
01240 int cnt = 0;
01241 for(int i=0;i<m_nTrain;i++)
01242 if(bagCnt[i] == nr)
01243 cnt++;
01244 cout<<"n:"<<nr<<"|#"<<cnt<<" ";
01245 }
01246 cout<<endl;
01247 delete[] bagCnt;
01248 }
01249 else
01250 assert(false);
01251
01252
01253 for ( int i=0;i<m_nCross+1;i++ )
01254 {
01255
01256 int nTrain = m_trainSize[i];
01257 if ( m_enableSaveMemory == false )
01258 m_train[i] = new REAL[nTrain * m_nFeatures];
01259 else
01260 m_train[i] = 0;
01261 m_trainTarget[i] = new REAL[nTrain * m_nClass * m_nDomain];
01262 m_trainTargetEffect[i] = new REAL[nTrain * m_nClass * m_nDomain];
01263 m_trainTargetResidual[i] = new REAL[nTrain * m_nClass * m_nDomain];
01264 m_trainLabel[i] = new int[nTrain*m_nDomain];
01265
01266
01267 int nProbe = m_probeSize[i];
01268 if ( nProbe )
01269 {
01270 if ( m_enableSaveMemory == false )
01271 m_probe[i] = new REAL[nProbe * m_nFeatures];
01272 else
01273 m_probe[i] = 0;
01274 m_probeTarget[i] = new REAL[nProbe * m_nClass * m_nDomain];
01275 m_probeTargetEffect[i] = new REAL[nProbe * m_nClass * m_nDomain];
01276 m_probeTargetResidual[i] = new REAL[nProbe * m_nClass * m_nDomain];
01277 m_probeLabel[i] = new int[nProbe*m_nDomain];
01278 if ( m_validationType != "Bagging" )
01279 m_probeIndex[i] = new int[nProbe];
01280 }
01281 else
01282 {
01283 m_probe[i] = 0;
01284 m_probeTarget[i] = 0;
01285 m_probeTargetEffect[i] = 0;
01286 m_probeTargetResidual[i] = 0;
01287 m_probeLabel[i] = 0;
01288 m_probeIndex[i] = 0;
01289 }
01290 }
01291
01292
01293 m_crossIndex = new int[m_nTrain];
01294 for ( int i=0;i<m_nTrain;i++ )
01295 m_crossIndex[i] = -1;
01296
01297 }
01298
01306 void Data::readEffectFile()
01307 {
01308 if(m_validationType == "ValidationSet")
01309 return;
01310
01311 for ( int i=0;i<m_nClass*m_nDomain*m_nTrain;i++ )
01312 m_trainTargetOrigEffect[i] = 0.0;
01313
01314 string name = m_datasetPath + "/" + m_fullPredPath + "/" + m_trainOnFullPredictorFile;
01315 fstream f ( name.c_str(), ios::in );
01316 if ( f.is_open() && m_trainOnFullPredictorFile!="" )
01317 {
01318 cout<<"Read fullPredictor:"<<name<<" ";
01319 f.read ( ( char* ) m_trainTargetOrigEffect, sizeof ( REAL ) *m_nClass*m_nDomain*m_nTrain );
01320
01321 double rmse0 = 0.0, rmse1 = 0.0, err;
01322 for ( int i=0;i<m_nClass*m_nDomain;i++ )
01323 {
01324 for ( int j=0;j<m_nTrain;j++ )
01325 {
01326 err = m_trainTargetOrigEffect[j*m_nClass*m_nDomain + i] - m_trainTargetOrig[j*m_nClass*m_nDomain + i];
01327 rmse0 += err * err;
01328 }
01329 }
01330 cout<<"RMSE:"<<sqrt ( rmse0/ ( double ) ( m_nClass*m_nDomain*m_nTrain ) ) <<"(retrain:"<<sqrt ( rmse1/ ( double ) ( m_nClass*m_nDomain*m_nTrain ) ) <<")"<<endl;
01331
01332 f.close();
01333 }
01334 else
01335 cout<<"Can not open effect file:"<<name<<endl;
01336
01337
01338 cout<<"Init residuals"<<endl;
01339 for ( int i=0;i<m_nClass*m_nDomain*m_nTrain;i++ )
01340 m_trainTargetOrigResidual[i] = m_trainTargetOrig[i] - m_trainTargetOrigEffect[i];
01341 }
01342
01348 void Data::fillNCrossValidationSet ( int n )
01349 {
01350
01351 if ( m_train[n] )
01352 delete[] m_train[n];
01353 m_train[n] = 0;
01354 m_train[n] = new REAL[m_trainSize[n]*m_nFeatures];
01355 for ( int i=0;i<m_trainSize[n]*m_nFeatures;i++ )
01356 m_train[n][i] = 0.0;
01357 if ( m_probe[n] )
01358 delete[] m_probe[n];
01359 m_probe[n] = 0;
01360 if ( m_probeSize[n] )
01361 m_probe[n] = new REAL[m_probeSize[n]*m_nFeatures];
01362 for ( int i=0;i<m_probeSize[n]*m_nFeatures;i++ )
01363 m_probe[n][i] = 0.0;
01364
01365 if(m_validationType == "Bagging")
01366 {
01367 bool* bagSamples = new bool[m_nTrain];
01368 for(int i=0;i<m_nTrain;i++)
01369 bagSamples[i] = 0;
01370 for(int i=0;i<m_nTrain;i++)
01371 {
01372 int ind = m_trainBaggingIndex[n][i];
01373 bagSamples[ind] = 1;
01374 for(int j=0;j<m_nFeatures;j++)
01375 m_train[n][i*m_nFeatures+j] = m_trainOrig[ind*m_nFeatures + j];
01376 }
01377 int cnt = 0;
01378 for(int i=0;i<m_nTrain;i++)
01379 {
01380 if(bagSamples[i] == false)
01381 {
01382 for(int j=0;j<m_nFeatures;j++)
01383 m_probe[n][cnt*m_nFeatures+j] = m_trainOrig[i*m_nFeatures + j];
01384 cnt++;
01385 }
01386 }
01387 if(cnt != m_probeSize[n])
01388 {
01389 cout<<"cnt:"<<cnt<<" probeSize"<<m_probeSize[n]<<endl;
01390 assert(false);
01391 }
01392 delete[] bagSamples;
01393 }
01394 else
01395 {
01396
01397 int begin = m_slotBoundaries[n];
01398 int end = m_slotBoundaries[n+1];
01399
01400 int probeCnt = 0, trainCnt = 0;
01401
01402
01403 for ( int j=0;j<m_nTrain;j++ )
01404 {
01405 int index = m_mixList[j];
01406
01407
01408 if ( j>=begin && j <end )
01409 {
01410 for ( int k=0;k<m_nFeatures;k++ )
01411 m_probe[n][probeCnt*m_nFeatures + k] = m_trainOrig[index*m_nFeatures + k];
01412 probeCnt++;
01413 }
01414 else
01415 {
01416 for ( int k=0;k<m_nFeatures;k++ )
01417 m_train[n][trainCnt*m_nFeatures + k] = m_trainOrig[index*m_nFeatures + k];
01418 trainCnt++;
01419 }
01420 }
01421
01422 if ( probeCnt != m_probeSize[n] || trainCnt != m_trainSize[n] )
01423 assert ( false );
01424 }
01425 }
01426
01432 void Data::freeNCrossValidationSet ( int n )
01433 {
01434 if ( m_train[n] )
01435 delete[] m_train[n];
01436 m_train[n] = 0;
01437 if ( m_probe[n] )
01438 delete[] m_probe[n];
01439 m_probe[n] = 0;
01440 }
01441
01445 void Data::doFeatureSelection()
01446 {
01447 bool* selectedFeatures = new bool[m_nFeatures];
01448 InputFeatureSelector::selectFeatures ( selectedFeatures, m_trainOrig, m_nFeatures, m_nTrain, m_trainLabelOrig, m_trainTargetOrigResidual, m_nClass, m_nDomain );
01449
01450 delete[] selectedFeatures;
01451 }
01452
01458 void Data::partitionDatasetToCrossValidationSets()
01459 {
01460 cout<<"Partition dataset to cross validation sets"<<endl;
01461
01462
01463 readEffectFile();
01464
01465
01466 if(m_trainOrig)
01467 { fstream f("Atrain.txt",ios::out); for ( int i=0;i<m_nTrain && i < 1000;i++ ){for ( int j=0;j<m_nFeatures;j++ ) f<<m_trainOrig[i*m_nFeatures + j]<<" ";f<<endl;}f.close();}
01468 if(m_testOrig)
01469 { fstream f("Atest.txt",ios::out); for ( int i=0;i<m_nTest && i < 1000;i++ ){for ( int j=0;j<m_nFeatures;j++ ) f<<m_testOrig[i*m_nFeatures + j]<<" ";f<<endl;}f.close();}
01470 if(m_valid)
01471 { fstream f("Avalid.txt",ios::out); for ( int i=0;i<m_validSize && i < 1000;i++ ){for ( int j=0;j<m_nFeatures;j++ ) f<<m_valid[i*m_nFeatures + j]<<" ";f<<endl;}f.close();}
01472
01473
01474 cout<<"Apply mean and std correction to train input features"<<endl;
01475 for ( int i=0;i<m_nTrain;i++ )
01476 for ( int j=0;j<m_nFeatures;j++ )
01477 m_trainOrig[i*m_nFeatures + j] = ( m_trainOrig[i*m_nFeatures + j] - m_mean[j] ) / m_std[j];
01478
01479
01480 REAL min = 1e10, max = -1e10;
01481 for ( int i=0;i<m_nTrain;i++ )
01482 for ( int j=0;j<m_nFeatures;j++ )
01483 {
01484 if ( min > m_trainOrig[i*m_nFeatures + j] )
01485 min = m_trainOrig[i*m_nFeatures + j];
01486 if ( max < m_trainOrig[i*m_nFeatures + j] )
01487 max = m_trainOrig[i*m_nFeatures + j];
01488 }
01489 cout<<"Min/Max after apply mean/std: "<<min<<"/"<<max<<endl;
01490
01491
01492 min = 1e10;
01493 max = -1e10;
01494 m_targetMean = new REAL[m_nClass*m_nDomain];
01495 double* targetMean = new double[m_nClass*m_nDomain];
01496 for(int i=0;i<m_nClass*m_nDomain;i++)
01497 targetMean[i] = 0.0;
01498 for ( int i=0;i<m_nTrain;i++ )
01499 for ( int j=0;j<m_nClass*m_nDomain;j++ )
01500 {
01501 targetMean[j] += m_trainTargetOrig[i*m_nClass*m_nDomain + j];
01502 if ( min > m_trainTargetOrig[i*m_nClass*m_nDomain + j] )
01503 min = m_trainTargetOrig[i*m_nClass*m_nDomain + j];
01504 if ( max < m_trainTargetOrig[i*m_nClass*m_nDomain + j] )
01505 max = m_trainTargetOrig[i*m_nClass*m_nDomain + j];
01506 }
01507 for(int i=0;i<m_nClass*m_nDomain;i++)
01508 m_targetMean[i] = targetMean[i]/(double)m_nTrain;
01509 delete[] targetMean;
01510
01511 cout<<"Min/Max target: "<<min<<"/"<<max<<endl<<"Mean target: ";
01512 for(int i=0;i<m_nClass*m_nDomain;i++)
01513 cout<<m_targetMean[i]<<" ";
01514 cout<<endl<<endl;
01515
01516 if(m_validationType == "Retraining" || m_validationType == "CrossFoldMean")
01517 {
01518 int* labels = new int[m_nDomain];
01519
01520
01521 for ( int i=0;i<m_nCross+1;i++ )
01522 {
01523
01524 int begin = m_slotBoundaries[i];
01525 int end = m_slotBoundaries[i+1];
01526
01527 int probeCnt = 0, trainCnt = 0;
01528
01529
01530 for ( int j=0;j<m_nTrain;j++ )
01531 {
01532 int index = m_mixList[j];
01533 if ( Framework::getDatasetType() )
01534 {
01535 for ( int d=0;d<m_nDomain;d++ )
01536 labels[d] = m_trainLabelOrig[d+index*m_nDomain];
01537 }
01538
01539
01540 if ( j>=begin && j <end )
01541 {
01542 m_probeIndex[i][probeCnt] = index;
01543 for ( int d=0;d<m_nDomain;d++ )
01544 m_probeLabel[i][d+probeCnt*m_nDomain] = labels[d];
01545 for ( int k=0;k<m_nFeatures;k++ )
01546 if ( m_enableSaveMemory == false )
01547 m_probe[i][probeCnt*m_nFeatures + k] = m_trainOrig[index*m_nFeatures + k];
01548 for ( int k=0;k<m_nClass*m_nDomain;k++ )
01549 {
01550 m_probeTarget[i][probeCnt*m_nClass*m_nDomain + k] = m_trainTargetOrig[index*m_nClass*m_nDomain + k];
01551 m_probeTargetEffect[i][probeCnt*m_nClass*m_nDomain + k] = m_trainTargetOrigEffect[index*m_nClass*m_nDomain + k];
01552 m_probeTargetResidual[i][probeCnt*m_nClass*m_nDomain + k] = m_trainTargetOrigResidual[index*m_nClass*m_nDomain + k];
01553 }
01554 probeCnt++;
01555 m_crossIndex[j] = i;
01556 }
01557 else
01558 {
01559 for ( int d=0;d<m_nDomain;d++ )
01560 m_trainLabel[i][d+trainCnt*m_nDomain] = labels[d];
01561 for ( int k=0;k<m_nFeatures;k++ )
01562 if ( m_enableSaveMemory == false )
01563 m_train[i][trainCnt*m_nFeatures + k] = m_trainOrig[index*m_nFeatures + k];
01564 for ( int k=0;k<m_nClass*m_nDomain;k++ )
01565 {
01566 m_trainTarget[i][trainCnt*m_nClass*m_nDomain + k] = m_trainTargetOrig[index*m_nClass*m_nDomain + k];
01567 m_trainTargetEffect[i][trainCnt*m_nClass*m_nDomain + k] = m_trainTargetOrigEffect[index*m_nClass*m_nDomain + k];
01568 m_trainTargetResidual[i][trainCnt*m_nClass*m_nDomain + k] = m_trainTargetOrigResidual[index*m_nClass*m_nDomain + k];
01569 }
01570 trainCnt++;
01571 }
01572 }
01573 if ( probeCnt != m_probeSize[i] || trainCnt != m_trainSize[i] )
01574 assert ( false );
01575 }
01576
01577 if ( labels )
01578 delete[] labels;
01579
01580 for ( int i=0;i<m_nTrain;i++ )
01581 if ( m_crossIndex[i] == -1 )
01582 assert ( false );
01583 }
01584 else if(m_validationType == "Bagging")
01585 {
01586 bool* bagSamples = new bool[m_nTrain];
01587 for ( int i=0;i<m_nCross;i++ )
01588 {
01589
01590 for(int j=0;j<m_nTrain;j++)
01591 bagSamples[j] = 0;
01592 for(int j=0;j<m_nTrain;j++)
01593 {
01594 uint ind = m_trainBaggingIndex[i][j];
01595 bagSamples[ind] = 1;
01596
01597 if ( Framework::getDatasetType() )
01598 for ( int d=0;d<m_nDomain;d++ )
01599 m_trainLabel[i][d+j*m_nDomain] = m_trainLabelOrig[d+ind*m_nDomain];
01600 for ( int k=0;k<m_nFeatures;k++ )
01601 if ( m_enableSaveMemory == false )
01602 m_train[i][j*m_nFeatures + k] = m_trainOrig[ind*m_nFeatures + k];
01603 for ( int k=0;k<m_nClass*m_nDomain;k++ )
01604 {
01605 m_trainTarget[i][j*m_nClass*m_nDomain + k] = m_trainTargetOrig[ind*m_nClass*m_nDomain + k];
01606 m_trainTargetEffect[i][j*m_nClass*m_nDomain + k] = m_trainTargetOrigEffect[ind*m_nClass*m_nDomain + k];
01607 m_trainTargetResidual[i][j*m_nClass*m_nDomain + k] = m_trainTargetOrigResidual[ind*m_nClass*m_nDomain + k];
01608 }
01609 }
01610
01611
01612 int cnt = 0;
01613 for(int j=0;j<m_nTrain;j++)
01614 cnt += bagSamples[j];
01615 if(m_nTrain - cnt != m_probeSize[i])
01616 assert(false);
01617 cnt = 0;
01618 for(int j=0;j<m_nTrain;j++)
01619 {
01620 if(bagSamples[j] == false)
01621 {
01622 if ( Framework::getDatasetType() )
01623 for ( int d=0;d<m_nDomain;d++ )
01624 m_probeLabel[i][d+cnt*m_nDomain] = m_trainLabelOrig[d+j*m_nDomain];
01625 for ( int k=0;k<m_nFeatures;k++ )
01626 if ( m_enableSaveMemory == false )
01627 m_probe[i][cnt*m_nFeatures + k] = m_trainOrig[j*m_nFeatures + k];
01628 for ( int k=0;k<m_nClass*m_nDomain;k++ )
01629 {
01630 m_probeTarget[i][cnt*m_nClass*m_nDomain + k] = m_trainTargetOrig[j*m_nClass*m_nDomain + k];
01631 m_probeTargetEffect[i][cnt*m_nClass*m_nDomain + k] = m_trainTargetOrigEffect[j*m_nClass*m_nDomain + k];
01632 m_probeTargetResidual[i][cnt*m_nClass*m_nDomain + k] = m_trainTargetOrigResidual[j*m_nClass*m_nDomain + k];
01633 }
01634 cnt++;
01635 }
01636 }
01637 if(cnt != m_probeSize[i])
01638 assert(false);
01639 }
01640 delete[] bagSamples;
01641 }
01642 else if(m_validationType == "ValidationSet")
01643 {
01644 ;
01645 }
01646 else
01647 assert(false);
01648 }
01649
01656 void Data::fillCascadeLearningInputs()
01657 {
01658 cout<<endl<<"Add effects (predictions of previous algorithms) as inputs to dataset"<<endl;
01659
01660
01661 vector<string> files = m_algorithmNameList;
01662 vector<string> m_usedFiles;
01663
01664 for ( int i=0;i<files.size();i++ )
01665 if ( files[i].at ( files[i].size()-1 ) != '.' && files[i].find ( ".dat" ) == files[i].length()-4 )
01666 m_usedFiles.push_back ( files[i] );
01667 int size = m_usedFiles.size();
01668
01669
01670 m_cascadeInputs = new REAL[size*m_nClass*m_nDomain*m_nTrain];
01671 for ( int i=0;i<size*m_nClass*m_nDomain*m_nTrain;i++ )
01672 m_cascadeInputs[i] = 1e10;
01673
01674
01675 for ( int i=0;i<size;i++ )
01676 {
01677 fstream f ( m_usedFiles[i].c_str(), ios::in );
01678 if ( f.is_open() == false )
01679 assert ( false );
01680 REAL* cache = new REAL[m_nTrain*m_nClass*m_nDomain];
01681 f.read ( ( char* ) cache, sizeof ( REAL ) *m_nTrain*m_nClass*m_nDomain );
01682 f.close();
01683
01684 for ( int j=0;j<m_nTrain;j++ )
01685 for ( int k=0;k<m_nClass*m_nDomain;k++ )
01686 m_cascadeInputs[j*m_nClass*m_nDomain*size + i*m_nClass*m_nDomain + k] = cache[j*m_nClass*m_nDomain + k];
01687
01688 if ( cache )
01689 delete[] cache;
01690 cache = 0;
01691 }
01692 for ( int i=0;i<size;i++ )
01693 {
01694 double rmse = 0.0, err;
01695 for ( int j=0;j<m_nTrain;j++ )
01696 for ( int k=0;k<m_nClass*m_nDomain;k++ )
01697 {
01698 err = m_cascadeInputs[j*m_nClass*m_nDomain*size + i*m_nClass*m_nDomain + k] - m_trainTargetOrig[k + j*m_nClass*m_nDomain];
01699 rmse += err*err;
01700 }
01701 cout<<"File:"<<m_usedFiles[i]<<" RMSE:"<<sqrt ( rmse/ ( double ) ( m_nClass*m_nTrain*m_nDomain ) ) <<endl;
01702 }
01703 if ( size == 0 )
01704 cout<<"Nothing to do here"<<endl;
01705 cout<<endl;
01706
01707 m_nCascadeInputs = size;
01708 cout<<"nCascadeInputs:"<<m_nCascadeInputs<<endl;
01709 }
01710
01716 void Data::extendTrainDataWithCascadeInputs()
01717 {
01718 if ( m_nCascadeInputs == 0 )
01719 return;
01720
01721 cout<<"Extend the train data with cascade inputs"<<endl;
01722
01723 if ( m_trainOrig )
01724 {
01725 REAL* m_trainOrigNew = new REAL[m_nTrain* ( m_nFeatures+m_nCascadeInputs*m_nClass*m_nDomain ) ];
01726 for ( int i=0;i<m_nTrain;i++ )
01727 {
01728 REAL* ptr0 = m_trainOrigNew + i* ( m_nFeatures+m_nCascadeInputs*m_nClass*m_nDomain );
01729 REAL* ptr1 = m_trainOrig + i*m_nFeatures;
01730 for ( int j=0;j<m_nFeatures;j++ )
01731 ptr0[j] = ptr1[j];
01732 ptr0 = m_trainOrigNew + i* ( m_nFeatures+m_nCascadeInputs*m_nClass*m_nDomain ) + m_nFeatures;
01733 ptr1 = m_cascadeInputs + i*m_nCascadeInputs*m_nClass*m_nDomain;
01734 for ( int j=0;j<m_nCascadeInputs*m_nClass*m_nDomain;j++ )
01735 ptr0[j] = ptr1[j];
01736 }
01737 if ( m_trainOrig )
01738 delete[] m_trainOrig;
01739 m_trainOrig = m_trainOrigNew;
01740 }
01741
01742 if ( m_testOrig )
01743 {
01744 REAL* m_testOrigNew = new REAL[m_nTest* ( m_nFeatures+m_nCascadeInputs*m_nClass*m_nDomain ) ];
01745 for ( int i=0;i<m_nTest;i++ )
01746 {
01747 REAL* ptr0 = m_testOrigNew + i* ( m_nFeatures+m_nCascadeInputs*m_nClass*m_nDomain );
01748 REAL* ptr1 = m_testOrig + i*m_nFeatures;
01749 for ( int j=0;j<m_nFeatures;j++ )
01750 ptr0[j] = ptr1[j];
01751 ptr0 = m_testOrigNew + i* ( m_nFeatures+m_nCascadeInputs*m_nClass*m_nDomain ) + m_nFeatures;
01752 ptr1 = m_cascadeInputs + i*m_nCascadeInputs*m_nClass*m_nDomain;
01753 for ( int j=0;j<m_nCascadeInputs*m_nClass*m_nDomain;j++ )
01754 ptr0[j] = ptr1[j];
01755 }
01756 if ( m_testOrig )
01757 delete[] m_testOrig;
01758 m_testOrig = m_testOrigNew;
01759 }
01760
01761 int nFeaturesBefore = m_nFeatures;
01762 m_nFeatures += m_nCascadeInputs*m_nClass*m_nDomain;
01763 cout<<"nFeatures: "<<m_nFeatures<<" (before: "<<nFeaturesBefore<<")"<<endl;
01764 }
01765
01766
01775 void Data::setPathes ( string temp, string dsc, string fullPred, string data )
01776 {
01777 m_tempPath = temp;
01778 m_dscPath = dsc;
01779 m_fullPredPath = fullPred;
01780 m_dataPath = data;
01781 }
01782
01789 void Data::readParameter ( string line, int mode )
01790 {
01791
01792 int pos = line.find ( "=" );
01793 string name = line.substr ( 0, pos );
01794 string value = line.substr ( pos+1 );
01795
01796 if ( mode==-1 )
01797 {
01798 if ( name=="ALGORITHM" )
01799 m_algorithmName = value;
01800 if ( name=="ID" )
01801 m_algorithmID = atoi ( value.c_str() );
01802 if ( name=="TRAIN_ON_FULLPREDICTOR" )
01803 {
01804 if(m_validationType == "ValidationSet")
01805 assert(false);
01806 m_trainOnFullPredictorFile = value;
01807 }
01808 if ( name=="DISABLE" )
01809 m_disableTraining = atoi ( value.c_str() );
01810 cout<<"[META] ";
01811 }
01812
01813 if ( mode==0 )
01814 m_intMap[name] = atoi ( value.c_str() );
01815
01816 if ( mode==1 )
01817 m_doubleMap[name] = atof ( value.c_str() );
01818
01819 if ( mode==2 )
01820 m_stringMap[name] = value;
01821
01822 if ( mode==3 )
01823 m_boolMap[name] = atoi ( value.c_str() );
01824
01825 cout<<name<<": "<<value<<endl;
01826 }
01827
01833 void Data::readDscFile ( string name )
01834 {
01835 cout<<"Load descriptor file: "<<name<<endl;
01836 fstream f ( name.c_str(), ios::in );
01837
01838 if ( f.is_open() ==false )
01839 {
01840 cout<<"Can not open file:"<<name<<endl;
01841 assert ( false );
01842 }
01843
01844 int mode = -1;
01845
01846 char buf[256];
01847 while ( f.getline ( buf, 256 ) )
01848 {
01849 string line ( buf );
01850 if ( line[0]=='#' )
01851 continue;
01852 if ( line.find ( "[int]" ) != string::npos )
01853 mode = 0;
01854 if ( line.find ( "[double]" ) != string::npos )
01855 mode = 1;
01856 if ( line.find ( "[string]" ) != string::npos )
01857 mode = 2;
01858 if ( line.find ( "[bool]" ) != string::npos )
01859 mode = 3;
01860
01861
01862 if ( line.find ( "=" ) != string::npos )
01863 readParameter ( line, mode );
01864 }
01865
01866 f.close();
01867 }
01868
01873 vector<string> Data::getDirectoryFileList ( string path )
01874 {
01875 vector<string> v;
01876 DIR *dp;
01877 struct dirent *dirp;
01878 if ( ( dp = opendir ( path.c_str() ) ) == NULL )
01879 {
01880 cout << "Error opening " << path << endl;
01881 return v;
01882 }
01883 while ( ( dirp = readdir ( dp ) ) != NULL )
01884 v.push_back ( path + string ( dirp->d_name ) );
01885 closedir ( dp );
01886 return v;
01887 }
01888
01897 int* Data::splitStringToIntegerList ( string str, char delimiter )
01898 {
01899 vector<int> v;
01900 int number;
01901 char *begin = ( char* ) str.c_str(), *end = ( char* ) str.c_str(), tmp;
01902 for ( int i=0;i<str.length();i++ )
01903 {
01904 end++;
01905 if ( *end==delimiter || *end==0 )
01906 {
01907 tmp = *end;
01908 *end = 0;
01909 sscanf ( begin, "%d", &number );
01910 begin = end + 1;
01911 *end = tmp;
01912 v.push_back ( number );
01913 }
01914 }
01915 int* returnList = new int[v.size() ];
01916 for ( int i=0;i<v.size();i++ )
01917 returnList[i] = v[i];
01918 return returnList;
01919 }
01920
01929 vector<string> Data::splitStringToStringList ( string str, char delimiter )
01930 {
01931 vector<string> v;
01932 int number;
01933 char *begin = ( char* ) str.c_str(), *end = ( char* ) str.c_str(), tmp;
01934 for ( int i=0;i<str.length();i++ )
01935 {
01936 end++;
01937 if ( *end==delimiter || *end==0 )
01938 {
01939 tmp = *end;
01940 *end = 0;
01941 v.push_back ( begin );
01942 begin = end + 1;
01943 *end = tmp;
01944 }
01945 }
01946 return v;
01947 }
01948
01954 void Data::setDataPointers ( Data* data )
01955 {
01956 cout<<"Set data pointers"<<endl;
01957
01958
01959 m_intMap = data->m_intMap;
01960 m_doubleMap = data->m_doubleMap;
01961 m_boolMap = data->m_boolMap;
01962 m_stringMap = data->m_stringMap;
01963
01964 m_algorithmName = data->m_algorithmName;
01965 m_algorithmID = data->m_algorithmID;
01966 m_trainOnFullPredictorFile = data->m_trainOnFullPredictorFile;
01967 m_disableTraining = data->m_disableTraining;
01968
01969 m_randSeed = data->m_randSeed;
01970 m_positiveTarget = data->m_positiveTarget;
01971 m_negativeTarget = data->m_negativeTarget;
01972
01973 m_mixList = data->m_mixList;
01974
01975
01976 m_datasetPath = data->m_datasetPath;
01977 m_datasetName = data->m_datasetName;
01978 m_tempPath = data->m_tempPath;
01979 m_dscPath = data->m_dscPath;
01980 m_fullPredPath = data->m_fullPredPath;
01981 m_dataPath = data->m_dataPath;
01982
01983
01984 m_nFeatures = data->m_nFeatures;
01985 m_nClass = data->m_nClass;
01986 m_nDomain = data->m_nDomain;
01987 m_nMixTrainList = data->m_nMixTrainList;
01988
01989
01990 m_nCross = data->m_nCross;
01991 m_validationType = data->m_validationType;
01992
01993
01994 m_mean = data->m_mean;
01995 m_std = data->m_std;
01996 m_standardDeviationMin = data->m_standardDeviationMin;
01997 m_targetMean = data->m_targetMean;
01998
01999
02000 m_nTrain = data->m_nTrain;
02001 m_trainOrig = data->m_trainOrig;
02002 m_trainTargetOrig = data->m_trainTargetOrig;
02003 m_trainTargetOrigEffect = data->m_trainTargetOrigEffect;
02004 m_trainTargetOrigResidual = data->m_trainTargetOrigResidual;
02005 m_trainLabelOrig = data->m_trainLabelOrig;
02006 m_trainBaggingIndex = data->m_trainBaggingIndex;
02007
02008
02009 m_validSize = data->m_validSize;
02010 m_valid = data->m_valid;
02011 m_validTarget = data->m_validTarget;
02012 m_validLabel = data->m_validLabel;
02013
02014
02015 m_nTest = data->m_nTest;
02016 m_testOrig = data->m_testOrig;
02017 m_testTargetOrig = data->m_testTargetOrig;
02018 m_testLabelOrig = data->m_testLabelOrig;
02019
02020
02021 m_slotBoundaries = data->m_slotBoundaries;
02022
02023
02024 m_trainSize = data->m_trainSize;
02025 m_train = data->m_train;
02026 m_trainTarget = data->m_trainTarget;
02027 m_trainTargetEffect = data->m_trainTargetEffect;
02028 m_trainTargetResidual = data->m_trainTargetResidual;
02029 m_trainLabel = data->m_trainLabel;
02030
02031
02032 m_probeSize = data->m_probeSize;
02033 m_probe = data->m_probe;
02034 m_probeTarget = data->m_probeTarget;
02035 m_probeTargetEffect = data->m_probeTargetEffect;
02036 m_probeTargetResidual = data->m_probeTargetResidual;
02037 m_probeLabel = data->m_probeLabel;
02038 m_probeIndex = data->m_probeIndex;
02039
02040 m_crossIndex = data->m_crossIndex;
02041
02042
02043 m_blendingRegularization = data->m_blendingRegularization;
02044 m_enableGlobalBlendingWeights = data->m_enableGlobalBlendingWeights;
02045 m_blendingEnableCrossValidation = data->m_blendingEnableCrossValidation;
02046 m_enablePostNNBlending = data->m_enablePostNNBlending;
02047 m_blendingAlgorithm = data->m_blendingAlgorithm;
02048
02049
02050 m_enableCascadeLearning = data->m_enableCascadeLearning;
02051 m_nCascadeInputs = data->m_nCascadeInputs;
02052 m_cascadeInputs = data->m_cascadeInputs;
02053
02054
02055 m_enableGlobalMeanStdEstimate = data->m_enableGlobalMeanStdEstimate;
02056
02057
02058 m_maxThreadsInCross = data->m_maxThreadsInCross;
02059
02060
02061 m_enableSaveMemory = data->m_enableSaveMemory;
02062
02063
02064 m_errorFunction = data->m_errorFunction;
02065
02066
02067 m_mixDatasetIndices = data->m_mixDatasetIndices;
02068
02069
02070 m_algorithmNameList = data->m_algorithmNameList;
02071
02072
02073 m_enablePostBlendClipping = data->m_enablePostBlendClipping;
02074
02075
02076 m_addOutputNoise = data->m_addOutputNoise;
02077
02078
02079 m_enableFeatureSelection = data->m_enableFeatureSelection;
02080 m_featureSelectionWriteBinaryDataset = data->m_featureSelectionWriteBinaryDataset;
02081
02082
02083 m_enableBagging = data->m_enableBagging;
02084 m_randomSeedBagging = data->m_randomSeedBagging;
02085
02086
02087 m_disableWriteDscFile = data->m_disableWriteDscFile;
02088
02089
02090 m_enableStaticNormalization = data->m_enableStaticNormalization;
02091 m_staticMeanNormalization = data->m_staticMeanNormalization;
02092 m_staticStdNormalization = data->m_staticStdNormalization;
02093 m_enableProbablisticNormalization = data->m_enableProbablisticNormalization;
02094
02095
02096 m_dimensionalityReduction = data->m_dimensionalityReduction;
02097
02098
02099 m_loadWeightsBeforeTraining = data->m_loadWeightsBeforeTraining;
02100
02101 m_subsampleTrainSet = data->m_subsampleTrainSet;
02102 m_subsampleFeatures = data->m_subsampleFeatures;
02103 m_globalTrainingLoops = data->m_globalTrainingLoops;
02104 m_addConstantInput = data->m_addConstantInput;
02105 }
02106
02113 void Data::setAlgorithmList ( vector<string> algorithmNameList )
02114 {
02115 cout<<"Set algorithm list (nTrained:"<< ( int ) algorithmNameList.size() <<")"<<endl;
02116 m_algorithmNameList = algorithmNameList;
02117 for ( int i=0;i<m_algorithmNameList.size();i++ )
02118 {
02119 int pos = m_algorithmNameList[i].find_first_of ( ".",0 );
02120 if ( pos == 0 )
02121 assert ( false );
02122 m_algorithmNameList[i] = m_datasetPath + "/" + m_fullPredPath + "/" + m_algorithmNameList[i].substr ( 0,pos ) + ".dat";
02123 cout<<"m_algorithmNameList["<<i<<"]:"<<m_algorithmNameList[i]<<endl;
02124 }
02125 }
02126
02132 void Data::enableBagging ( bool en )
02133 {
02134 cout<<"Enable bagging:"<<en<<endl;
02135 m_enableBagging = en;
02136 }
02137
02143 void Data::baggingRandomSeed ( uint seed )
02144 {
02145 m_randomSeedBagging = seed;
02146 }
02147
02155 void Data::mergeTrainAndTest()
02156 {
02157 cout<<"trainSet = {trainSet(#"<<m_nTrain<<") + testSet(#"<<m_nTest<<")}"<<endl;
02158 if ( m_nTest == 0 )
02159 return;
02160
02161 REAL* train = new REAL[ ( m_nTrain + m_nTest ) *m_nFeatures];
02162 REAL* trainTarget = new REAL[ ( m_nTrain + m_nTest ) *m_nClass*m_nDomain];
02163 int* trainLabel = new int[ ( m_nTrain + m_nTest ) *m_nDomain];
02164
02165 memcpy ( train, m_trainOrig, sizeof ( REAL ) *m_nTrain*m_nFeatures );
02166 memcpy ( train + m_nTrain*m_nFeatures, m_testOrig, sizeof ( REAL ) *m_nTest*m_nFeatures );
02167
02168 memcpy ( trainTarget, m_trainTargetOrig, sizeof ( REAL ) *m_nTrain*m_nClass*m_nDomain );
02169 memcpy ( trainTarget + m_nTrain*m_nClass*m_nDomain, m_testTargetOrig, sizeof ( REAL ) *m_nTest*m_nClass*m_nDomain );
02170
02171 memcpy ( trainLabel, m_trainLabelOrig, sizeof ( REAL ) *m_nTrain*m_nDomain );
02172 memcpy ( trainLabel + m_nTrain*m_nDomain, m_testLabelOrig, sizeof ( REAL ) *m_nTest*m_nDomain );
02173
02174 delete[] m_trainOrig;
02175 delete[] m_trainTargetOrig;
02176 delete[] m_trainLabelOrig;
02177
02178 m_trainOrig = train;
02179 m_trainTargetOrig = trainTarget;
02180 m_trainLabelOrig = trainLabel;
02181
02182 m_nTrain = m_nTrain + m_nTest;
02183 }
02184
02188 void Data::normalizeZeroOne()
02189 {
02190 cout<<"Autoencoder: Normalize train between 0 and 1"<<endl;
02191
02192 REAL* mean = new REAL[m_nFeatures];
02193 REAL* std = new REAL[m_nFeatures];
02194
02195 for ( int i=0;i<m_nFeatures;i++ )
02196 {
02197 double mu = 0.0, min = 1e10, max = -1e10;
02198 for ( int j=0;j<m_nTrain;j++ )
02199 {
02200 REAL v = m_trainOrig[i+j*m_nFeatures];
02201 mu += v;
02202 if ( min > v )
02203 min = v;
02204 if ( max < v )
02205 max = v;
02206 }
02207 mean[i] = min;
02208 std[i] = max - min;
02209 if ( std[i] <= 1e-2 )
02210 std[i] = 1.0;
02211 m_mean[i] = 0.0;
02212 m_std[i] = 1.0;
02213
02214 if ( m_enableStaticNormalization )
02215 {
02216 mean[i] += m_staticMeanNormalization;
02217 std[i] *= m_staticStdNormalization;
02218 }
02219 }
02220 for ( int i=0;i<m_nTrain;i++ )
02221 for ( int j=0;j<m_nFeatures;j++ )
02222 {
02223 m_trainOrig[j+i*m_nFeatures] = ( m_trainOrig[j+i*m_nFeatures] - mean[j] ) / std[j];
02224 REAL v = m_trainOrig[j+i*m_nFeatures];
02225 if ( v > 1.0 || v < 0.0 )
02226 {
02227 cout<<"v:"<<v<<endl;
02228 assert ( false );
02229 }
02230 }
02231
02232
02233 for ( int j=0;j<m_nFeatures;j++ )
02234 cout<<mean[j]<<"|"<<std[j]<<" ";
02235 cout<<endl;
02236
02237
02238 cout<<"save the 0..1 normalizations"<<endl;
02239 string meanName = m_datasetPath + "/" + m_tempPath + "/AutoencoderDataMean.dat";
02240 string stdName = m_datasetPath + "/" + m_tempPath + "/AutoencoderDataStd.dat";
02241 cout<<"meanName:"<<meanName<<endl<<"stdName:"<<stdName<<endl;
02242 fstream fMean ( meanName.c_str(),ios::out );
02243 fstream fStd ( stdName.c_str(),ios::out );
02244 fMean.write ( ( char* ) mean, sizeof ( REAL ) *m_nFeatures );
02245 fStd.write ( ( char* ) std, sizeof ( REAL ) *m_nFeatures );
02246 fMean.close();
02247 fStd.close();
02248
02249 delete[] mean;
02250 delete[] std;
02251 }
02252
02259 void Data::reduceTrainingSetSize ( REAL percent )
02260 {
02261 cout<<"reduce training set (current size:"<<m_nTrain<<") to "<<percent*100.0<<"% of its original size"<<flush;
02262 if ( percent <= 0.0 || percent >= 1.0 )
02263 {
02264 cout<<" [nothing to do]"<<endl;
02265 return;
02266 }
02267 cout<<endl;
02268
02269 srand ( Framework::getRandomSeed() );
02270 int cnt = 0;
02271 for ( int i=0;i<m_nTrain;i++ )
02272 if ( ( double ) rand() / ( double ) RAND_MAX < percent )
02273 cnt++;
02274
02275 cout<<"allocate new training set, size:"<<cnt<<endl;
02276
02277 REAL* train = new REAL[cnt*m_nFeatures];
02278 REAL* trainTarget = new REAL[cnt*m_nClass*m_nDomain];
02279
02280 int* trainLabel = 0;
02281 if ( m_trainLabelOrig )
02282 trainLabel = new int[cnt*m_nDomain];
02283
02284 srand ( Framework::getRandomSeed() );
02285 cnt = 0;
02286 for ( int i=0;i<m_nTrain;i++ )
02287 {
02288 if ( ( double ) rand() / ( double ) RAND_MAX < percent )
02289 {
02290 for ( int j=0;j<m_nFeatures;j++ )
02291 train[j+cnt*m_nFeatures] = m_trainOrig[j+i*m_nFeatures];
02292 for ( int j=0;j<m_nClass*m_nDomain;j++ )
02293 trainTarget[j+cnt*m_nClass*m_nDomain] = m_trainTargetOrig[j+i*m_nClass*m_nDomain];
02294 if ( m_trainLabelOrig )
02295 {
02296 for ( int j=0;j<m_nDomain;j++ )
02297 trainLabel[j+cnt*m_nDomain] = m_trainLabelOrig[j+i*m_nDomain];
02298 }
02299 cnt++;
02300 }
02301 }
02302
02303 delete[] m_trainOrig;
02304 delete[] m_trainTargetOrig;
02305 if ( m_trainLabelOrig )
02306 delete[] m_trainLabelOrig;
02307
02308 m_trainOrig = train;
02309 m_trainTargetOrig = trainTarget;
02310 if ( m_trainLabelOrig )
02311 m_trainLabelOrig = trainLabel;
02312
02313 m_nTrain = cnt;
02314 }
02315
02322 void Data::reduceFeatureSize ( REAL* &table, int tableRows, int &tableCols, REAL percent, bool loadColumnSet )
02323 {
02324 cout<<"subsample the columns (current:"<<tableCols<<") to "<<percent*100.0<<"% of columns (skip constant 1 features)"<<flush;
02325 if ( percent <= 0.0 || percent >= 1.0 )
02326 {
02327 cout<<" [nothing to do]"<<endl;
02328 return;
02329 }
02330 cout<<endl;
02331
02332
02333 bool* isConstantOne = new bool[tableCols];
02334 bool* selectedCols = new bool[tableCols];
02335 for ( int i=0;i<tableCols;i++ )
02336 {
02337 isConstantOne[i] = true;
02338 selectedCols[i] = false;
02339 }
02340 for ( int i=0;i<tableRows;i++ )
02341 for ( int j=0;j<tableCols;j++ )
02342 isConstantOne[j] &= table[j+i*tableCols]==1.0;
02343
02344 srand ( Framework::getRandomSeed() );
02345 int cnt = 0;
02346 for ( int i=0;i<tableCols;i++ )
02347 if ( ( double ) rand() / ( double ) RAND_MAX < percent || isConstantOne[i] )
02348 {
02349 selectedCols[i] = true;
02350 cnt++;
02351 }
02352 delete[] isConstantOne;
02353
02354 if ( loadColumnSet )
02355 {
02356 string fname = m_datasetPath + "/" + string ( DATA_PATH ) + "/subspace.txt";
02357 cout<<"load subspace file:"<<fname<<endl;
02358 fstream f ( fname.c_str(),ios::in );
02359 cnt = 0;
02360 for ( int i=0;i<tableCols;i++ )
02361 {
02362 f>>selectedCols[i];
02363 cnt += selectedCols[i];
02364 }
02365 f.close();
02366 }
02367 else
02368 {
02369 string fname = m_datasetPath + "/" + string ( DATA_PATH ) + "/subspace.txt";
02370 cout<<"write subspace file:"<<fname<<endl;
02371 fstream f ( fname.c_str(),ios::out );
02372 for ( int i=0;i<tableCols;i++ )
02373 f<<selectedCols[i]<<endl;
02374 f.close();
02375 }
02376
02377 cout<<"allocate new table set, column size:"<<cnt<<endl;
02378 REAL* newTable = new REAL[cnt*tableRows];
02379
02380 srand ( Framework::getRandomSeed() );
02381 for ( int i=0;i<tableRows;i++ )
02382 {
02383 int c = 0;
02384 for ( int j=0;j<tableCols;j++ )
02385 {
02386 if ( selectedCols[j] )
02387 {
02388 newTable[c+i*cnt] = table[j+i*tableCols];
02389 c++;
02390 }
02391 }
02392 }
02393
02394 delete[] table;
02395 delete[] selectedCols;
02396 table = newTable;
02397 tableCols = cnt;
02398 }
02399
02403 void Data::addConstantInput()
02404 {
02405 if(m_trainOrig)
02406 {
02407 cout<<"Add a constant 1 column to the train feature matrix"<<endl;
02408 REAL* trainTmp = new REAL[m_nTrain*(m_nFeatures+1)];
02409 for(int i=0;i<m_nTrain;i++)
02410 {
02411 for(int j=0;j<m_nFeatures;j++)
02412 trainTmp[i*(m_nFeatures+1)+j] = m_trainOrig[i*m_nFeatures+j];
02413 trainTmp[i*(m_nFeatures+1)+m_nFeatures] = 1.0;
02414 }
02415 delete[] m_trainOrig;
02416 m_trainOrig = trainTmp;
02417 }
02418 if(m_testOrig)
02419 {
02420 cout<<"Add a constant 1 column to the test feature matrix"<<endl;
02421 REAL* testTmp = new REAL[m_nTest*(m_nFeatures+1)];
02422 for(int i=0;i<m_nTest;i++)
02423 {
02424 for(int j=0;j<m_nFeatures;j++)
02425 testTmp[i*(m_nFeatures+1)+j] = m_testOrig[i*m_nFeatures+j];
02426 testTmp[i*(m_nFeatures+1)+m_nFeatures] = 1.0;
02427 }
02428 delete[] m_testOrig;
02429 m_testOrig = testTmp;
02430 }
02431 m_nFeatures++;
02432 }