00001 #ifndef __DATASETREADER_
00002 #define __DATASETREADER_
00003
00004 #include "Data.h"
00005 #include "Framework.h"
00006
00007 #include <math.h>
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <sstream>
00011 #include <vector>
00012 #include <set>
00013 #include <algorithm>
00014 #include <string.h>
00015 #include <cstring>
00016
00017
00018 #define NETFLIX_DATA_DIR "./NETFLIX/DataFiles/tmp/"
00019 #define NETFLIX_SLOTDATA_ROOT_DIR "./NETFLIX/DataFiles/tmp2/"
00020
00021 using namespace std;
00022
00030 class DatasetReader : public Framework
00031 {
00032 public:
00033 DatasetReader();
00034 ~DatasetReader();
00035
00036
00037 void getDataBounds ( const char** filenames, string delimiter, int& nFeat, int& nClass, uint& nLines, char* columnType, char* enabledCol, int targetColumn, int filenameID, bool fillData = false, REAL* data = 0, int* labels = 0, bool addConstantOne = true, bool skipFirstLine = false );
00038
00039
00040 void splitRandomTestset ( REAL percentTest, REAL* data, int* labels, int nData, int nFeat, int nClass, REAL* &train, int* &trainLabel, REAL* &trainTarget, REAL* &test, int* &testLabel, REAL* &testTarget, uint& nTrain, uint& nTest, REAL positiveTarget, REAL negativeTarget, bool noRandom = false );
00041
00042
00043 void makeNumericTrainAndTestTargets ( int nClass, int nTrain, int nTest, REAL positiveTarget, REAL negativeTarget, int* trainLabel, int* testLabel, REAL* &trainTarget, REAL* &testTarget );
00044
00045
00046 void readMNIST ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00047
00048
00049 void readNETFLIX ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00050
00051
00052 void readKDDCup09Large ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00053 void readKDDCup09LargeBin ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00054 void readKDDCup09Small ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00055
00056
00057 void readAusDM2009 ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00058
00059
00060 void readBINARY ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00061
00062
00063 void readCSV ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00064
00065
00066 void readARFF ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00067
00068
00069 void readPRUDSYS ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00070
00071
00072
00073 void readADULT ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00074
00075 void readAUSTRALIAN ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00076
00077 void readBALANCE ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00078
00079 void readCYLINDERBANDS ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00080
00081 void readBREASTCANCERWISCONSIN ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00082
00083 void readAUSTRALIANCREDIT ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00084
00085 void readDIABETES ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00086
00087 void readGERMAN ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00088
00089 void readGLASS ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00090
00091 void readHEART ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00092
00093 void readHEPATITIS ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00094
00095 void readIONOSPHERE ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00096
00097 void readIRIS ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00098
00099 void readLETTER ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00100
00101 void readMONKS1 ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00102
00103 void readMONKS2 ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00104
00105 void readMONKS3 ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00106
00107 void readMUSHROOM ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00108
00109 void readSATIMAGE ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00110
00111 void readSEGMENTATION ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00112
00113 void readSONAR ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00114
00115 void readVEHICLE ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00116
00117 void readVOTES ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00118
00119 void readWINE ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00120
00121 void readPOKER ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00122
00123 void readYEAST ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00124
00125 void readSURVIVAL ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00126
00127
00128 void readSPIDER ( string path, REAL* &train, REAL* &trainTarget, int* &trainLabel, REAL* &test, REAL* &testTarget, int* &testLabel, uint& nTrain, uint& nTest, int& nClass, int& nDomain, int& nFeat, REAL positiveTarget = 1.0, REAL negativeTarget = -1.0 );
00129 };
00130
00131
00132 #endif