PolynomialRegression Class Reference

#include <PolynomialRegression.h>

Inheritance diagram for PolynomialRegression:

StandardAlgorithm Framework Algorithm AutomaticParameterTuner AUC Framework Framework Data Framework Framework Framework

List of all members.

Public Member Functions

 PolynomialRegression ()
 ~PolynomialRegression ()
virtual void modelInit ()
virtual void modelUpdate (REAL *input, REAL *target, uint nSamples, uint crossRun)
virtual void predictAllOutputs (REAL *rawInputs, REAL *outputs, uint nSamples, uint crossRun)
virtual void readSpecificMaps ()
virtual void saveWeights (int cross)
virtual void loadWeights (int cross)
virtual void loadMetaWeights (int cross)

Static Public Member Functions

static string templateGenerator (int id, string preEffect, int nameID, bool blendStop)

Private Member Functions

REAL power (REAL x, int e)

Private Attributes

REAL ** m_x
double m_reg
int m_inputDim
NumericalTools solver
REAL * m_polyMean
REAL * m_polyStd
int m_polyOrder
bool m_enableCrossInteractions


Detailed Description

Polynomial prediction model This is nothing more than linear regression with an extention of the feature space Cross interactions enable interaction between input features, but increase the dimensionality with O(n^2). This works for a few input features.

Normalization is done internally of the extended feature space

Tunable parameters are the regularization constant.

Definition at line 19 of file PolynomialRegression.h.


Constructor & Destructor Documentation

PolynomialRegression::PolynomialRegression (  ) 

Constructor

Definition at line 8 of file PolynomialRegression.cpp.

00009 {
00010     cout<<"PolynomialRegression"<<endl;
00011     // init member vars
00012     m_x = 0;
00013     m_reg = 0;
00014     m_polyMean = 0;
00015     m_polyStd = 0;
00016     m_polyOrder = 0;
00017     m_enableCrossInteractions = 0;
00018     m_inputDim = 0;
00019 }

PolynomialRegression::~PolynomialRegression (  ) 

Destructor

Definition at line 24 of file PolynomialRegression.cpp.

00025 {
00026     cout<<"descructor PolynomialRegression"<<endl;
00027     for ( int i=0;i<m_nCross+1;i++ )
00028     {
00029         if ( m_x )
00030         {
00031             if ( m_x[i] )
00032                 delete[] m_x[i];
00033             m_x[i] = 0;
00034         }
00035     }
00036     if ( m_x )
00037         delete[] m_x;
00038     m_x = 0;
00039     if ( m_polyMean )
00040         delete[] m_polyMean;
00041     m_polyMean = 0;
00042     if ( m_polyStd )
00043         delete[] m_polyStd;
00044     m_polyStd = 0;
00045 }


Member Function Documentation

void PolynomialRegression::loadWeights ( int  cross  )  [virtual]

Load the weights and all other parameters and make the model ready to predict

Implements StandardAlgorithm.

Definition at line 410 of file PolynomialRegression.cpp.

00411 {
00412     char buf[1024];
00413     sprintf ( buf,"%02d",cross );
00414     string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00415     cout<<"Load:"<<name<<endl;
00416     fstream f ( name.c_str(), ios::in );
00417     if ( f.is_open() == false )
00418         assert ( false );
00419     f.read ( ( char* ) &m_nTrain, sizeof ( int ) );
00420     f.read ( ( char* ) &m_nFeatures, sizeof ( int ) );
00421     f.read ( ( char* ) &m_nClass, sizeof ( int ) );
00422     f.read ( ( char* ) &m_nDomain, sizeof ( int ) );
00423     f.read ( ( char* ) &m_inputDim, sizeof ( int ) );
00424 
00425     m_x = new REAL*[m_nCross+1];
00426     for ( int i=0;i<m_nCross+1;i++ )
00427         m_x[i] = 0;
00428     m_x[cross] = new REAL[ ( m_inputDim + 1 ) * m_nClass * m_nDomain];
00429     m_polyMean = new REAL[m_inputDim];
00430     m_polyStd = new REAL[m_inputDim];
00431 
00432     f.read ( ( char* ) m_x[cross], sizeof ( REAL ) * ( m_inputDim+1 ) *m_nClass*m_nDomain );
00433     f.read ( ( char* ) m_polyMean, sizeof ( REAL ) *m_inputDim );
00434     f.read ( ( char* ) m_polyStd, sizeof ( REAL ) *m_inputDim );
00435     f.read ( ( char* ) &m_maxSwing, sizeof ( double ) );
00436     f.read ( ( char* ) &m_reg, sizeof ( double ) );
00437     f.close();
00438 }

void PolynomialRegression::modelInit (  )  [virtual]

Init the PolyReg Model

Implements StandardAlgorithm.

Definition at line 62 of file PolynomialRegression.cpp.

00063 {
00064     // add the tunable parameter
00065     paramDoubleValues.push_back ( &m_reg );
00066     paramDoubleNames.push_back ( "reg" );
00067 
00068     m_inputDim = m_polyOrder * m_nFeatures;
00069 
00070     if ( m_enableCrossInteractions )
00071     {
00072         m_inputDim = 0;
00073 
00074         // cross-interactions
00075         for ( int i=0;i<m_polyOrder*m_nFeatures;i++ )
00076             for ( int j=0;j<m_polyOrder*m_nFeatures+1;j++ )
00077                 if ( j >= i )
00078                     m_inputDim++;
00079     }
00080 
00081     cout<<"Input dimension:"<<m_inputDim<<endl;
00082 
00083     // alloc mem for weights
00084     if ( m_x == 0 )
00085     {
00086         m_x = new REAL*[m_nCross+1];
00087         for ( int i=0;i<m_nCross+1;i++ )
00088             m_x[i] = new REAL[ ( m_inputDim + 1 ) * m_nClass * m_nDomain];
00089 
00090         // new mean/std
00091         m_polyMean = new REAL[m_inputDim];
00092         m_polyStd = new REAL[m_inputDim];
00093     }
00094 }

void PolynomialRegression::modelUpdate ( REAL *  input,
REAL *  target,
uint  nSamples,
uint  crossRun 
) [virtual]

Make a model update, set the new cross validation set or set the whole training set for retraining

Parameters:
input Pointer to input (can be cross validation set, or whole training set) (rows x nFeatures)
target The targets (can be cross validation targets)
nSamples The sample size (rows) in input
crossRun The cross validation run (for training)

Implements StandardAlgorithm.

Definition at line 181 of file PolynomialRegression.cpp.

00182 {
00183     REAL x,y;
00184     REAL* crossTrain = 0;
00185 
00186     if ( m_enableCrossInteractions )
00187     {
00188         double minStd = 1e10, maxStd = -1e10, minMean = 1e10, maxMean = -1e10;
00189 
00190         // cross-interactions
00191         int pos = 0;
00192         for ( int ii=0;ii<m_polyOrder*m_nFeatures;ii++ )
00193             for ( int jj=0;jj<m_polyOrder*m_nFeatures+1;jj++ )
00194                 if ( jj >= ii )
00195                 {
00196                     int index0 = ii%m_nFeatures;
00197                     int index1 = jj%m_nFeatures;
00198 
00199                     int order0 = ( ii%m_polyOrder ) + 1;
00200                     int order1 = ( jj%m_polyOrder ) + 1;
00201 
00202                     // mean
00203                     double mean = 0.0, val;
00204                     for ( int j=0;j<nSamples;j++ )
00205                     {
00206                         x = input[j*m_nFeatures + index0];
00207                         y = input[j*m_nFeatures + index1];
00208 
00209                         x = power ( x,order0 );
00210                         y = power ( y,order1 );
00211 
00212                         if ( jj < m_polyOrder*m_nFeatures )
00213                             x = x * y;
00214 
00215                         mean += x;
00216                     }
00217                     mean /= ( double ) nSamples;
00218 
00219                     // standard deviation
00220                     double std = 0.0;
00221                     for ( int j=0;j<nSamples;j++ )
00222                     {
00223                         x = input[j*m_nFeatures + index0];
00224                         y = input[j*m_nFeatures + index1];
00225 
00226                         x = power ( x,order0 );
00227                         y = power ( y,order1 );
00228 
00229                         if ( jj < m_polyOrder*m_nFeatures )
00230                             x = x * y;
00231 
00232                         std += ( mean - x ) * ( mean - x );
00233                     }
00234                     std = sqrt ( std/ ( double ) ( nSamples-1 ) );
00235                     if ( std < m_standardDeviationMin )
00236                         std = m_standardDeviationMin;
00237 
00238                     minStd = minStd > std? std : minStd;
00239                     maxStd = maxStd < std? std : maxStd;
00240                     minMean = minMean > mean? mean : minMean;
00241                     maxMean = maxMean < mean? mean : maxMean;
00242 
00243                     // save them
00244                     m_polyMean[pos] = mean;
00245                     m_polyStd[pos] = std;
00246                     pos++;
00247                 }
00248 
00249         //cout<<"Min|Max mean: "<<minMean<<"|"<<maxMean<<"   Min|Max std: "<<minStd<<"|"<<maxStd<<endl<<endl;
00250 
00251 
00252         if ( pos != m_inputDim )
00253             assert ( false );
00254 
00255         // apply to trainset
00256         crossTrain = new REAL[nSamples* ( m_inputDim+1 ) ];
00257         for ( int i=0;i<nSamples;i++ )
00258         {
00259             REAL* inputPtr = input + i*m_nFeatures;
00260             REAL* trainPtr = crossTrain + i* ( m_inputDim + 1 );
00261 
00262             // cross-interactions
00263             pos = 0;
00264             for ( int ii=0;ii<m_polyOrder*m_nFeatures;ii++ )
00265                 for ( int jj=0;jj<m_polyOrder*m_nFeatures+1;jj++ )
00266                     if ( jj >= ii )
00267                     {
00268                         int index0 = ii%m_nFeatures;
00269                         int index1 = jj%m_nFeatures;
00270 
00271                         int order0 = ( ii%m_polyOrder ) + 1;
00272                         int order1 = ( jj%m_polyOrder ) + 1;
00273 
00274                         x = inputPtr[index0];
00275                         y = inputPtr[index1];
00276 
00277                         x = power ( x,order0 );
00278                         y = power ( y,order1 );
00279 
00280                         if ( jj < m_polyOrder*m_nFeatures )
00281                             x = x * y;
00282 
00283                         trainPtr[pos] = ( x - m_polyMean[pos] ) / m_polyStd[pos];
00284                         pos++;
00285                     }
00286             trainPtr[pos] = 1.0;
00287 
00288             if ( pos != m_inputDim )
00289                 assert ( false );
00290         }
00291 
00292         // solve the linear system
00293         solver.RidgeRegressionMultisolutionSinglecall ( crossTrain, target, m_x[crossRun], nSamples, m_inputDim + 1, m_nClass*m_nDomain, m_reg, true );
00294     }
00295     else  // no feature interaction
00296     {
00297         crossTrain = new REAL[nSamples* ( m_polyOrder*m_nFeatures+1 ) ];
00298 
00299         //cout<<endl<<"Calculate new mean/std for all poly orders of input features x^(1.."<<m_polyOrder<<")"<<endl;
00300         double minStd = 1e10, maxStd = -1e10, minMean = 1e10, maxMean = -1e10;
00301         for ( int order=0;order<m_polyOrder;order++ )
00302         {
00303             for ( int i=0;i<m_nFeatures;i++ )
00304             {
00305                 // mean
00306                 double mean = 0.0, val;
00307                 for ( int j=0;j<nSamples;j++ )
00308                 {
00309                     val = input[j*m_nFeatures + i];
00310                     mean += power ( val, order+1 );
00311                 }
00312                 mean /= ( double ) nSamples;
00313 
00314                 // standard deviation
00315                 double std = 0.0;
00316                 for ( int j=0;j<nSamples;j++ )
00317                 {
00318                     val = input[j*m_nFeatures + i];
00319                     val = power ( val, order+1 );
00320                     std += ( mean - val ) * ( mean - val );
00321                 }
00322                 std = sqrt ( std/ ( double ) ( nSamples-1 ) );
00323                 if ( std < m_standardDeviationMin )
00324                     std = m_standardDeviationMin;
00325 
00326                 minStd = minStd > std? std : minStd;
00327                 maxStd = maxStd < std? std : maxStd;
00328                 minMean = minMean > mean? mean : minMean;
00329                 maxMean = maxMean < mean? mean : maxMean;
00330 
00331                 // save them
00332                 m_polyMean[order*m_nFeatures + i] = mean;
00333                 m_polyStd[order*m_nFeatures + i] = std;
00334             }
00335         }
00336         //cout<<"Min|Max mean: "<<minMean<<"|"<<maxMean<<"   Min|Max std: "<<minStd<<"|"<<maxStd<<endl<<endl;
00337 
00338         // copy train + add a constant input
00339         for ( int i=0;i<nSamples;i++ )
00340         {
00341             for ( int order=0;order<m_polyOrder;order++ )
00342             {
00343                 int index = i* ( m_polyOrder*m_nFeatures + 1 ) + order * m_nFeatures;
00344                 REAL* inputPtr = input + i*m_nFeatures;
00345                 REAL* meanPtr = m_polyMean + order*m_nFeatures;
00346                 REAL* stdPtr = m_polyStd + order*m_nFeatures;
00347                 REAL* featurePtr = crossTrain + index;
00348                 for ( int k=0;k<m_nFeatures;k++ )
00349                 {
00350                     x = power ( inputPtr[k], order+1 );
00351                     x = ( x - meanPtr[k] ) / stdPtr[k];
00352                     featurePtr[k] = x;
00353                 }
00354             }
00355             crossTrain[i* ( m_polyOrder*m_nFeatures + 1 ) + m_polyOrder*m_nFeatures] = 1.0;
00356         }
00357 
00358         // solve the linear system
00359         solver.RidgeRegressionMultisolutionSinglecall ( crossTrain, target, m_x[crossRun], nSamples, m_polyOrder * m_nFeatures + 1, m_nClass*m_nDomain, m_reg, true );
00360     }
00361     if ( crossTrain )
00362         delete[] crossTrain;
00363     crossTrain = 0;
00364 }

REAL PolynomialRegression::power ( REAL  x,
int  e 
) [private]

Computes the integer power of input x

Parameters:
x Input
e Power, e>=1
Returns:
x^e

Definition at line 373 of file PolynomialRegression.cpp.

00374 {
00375     REAL tmp = x;
00376     for ( int i=1;i<e;i++ )
00377         tmp *= x;
00378     return tmp;
00379 }

void PolynomialRegression::predictAllOutputs ( REAL *  rawInputs,
REAL *  outputs,
uint  nSamples,
uint  crossRun 
) [virtual]

Prediction for outside use, predicts outputs based on raw input values

Parameters:
rawInputs The input feature, without normalization (raw)
outputs The output value (prediction of target)
nSamples The input size (number of rows)
crossRun Number of cross validation run (in training)

Implements StandardAlgorithm.

Definition at line 104 of file PolynomialRegression.cpp.

00105 {
00106     REAL x,y;
00107 
00108     for ( int i=0;i<nSamples;i++ )
00109     {
00110         for ( int j=0;j<m_nClass*m_nDomain;j++ )
00111         {
00112             if ( m_enableCrossInteractions )
00113             {
00114                 REAL sum = 0.0;
00115                 REAL* inputPtr = rawInputs + i*m_nFeatures;
00116                 REAL* xPtr = m_x[crossRun] + j;
00117 
00118                 // cross-interactions
00119                 int pos = 0;
00120                 for ( int ii=0;ii<m_polyOrder*m_nFeatures;ii++ )
00121                     for ( int jj=0;jj<m_polyOrder*m_nFeatures+1;jj++ )
00122                         if ( jj >= ii )
00123                         {
00124                             int index0 = ii%m_nFeatures;
00125                             int index1 = jj%m_nFeatures;
00126 
00127                             int order0 = ( ii%m_polyOrder ) + 1;
00128                             int order1 = ( jj%m_polyOrder ) + 1;
00129 
00130                             x = inputPtr[index0];
00131                             y = inputPtr[index1];
00132 
00133                             x = power ( x,order0 );
00134                             y = power ( y,order1 );
00135 
00136                             if ( jj < m_polyOrder*m_nFeatures )
00137                                 x = x * y;
00138 
00139                             x = ( x - m_polyMean[pos] ) / m_polyStd[pos];
00140                             sum += x * xPtr[pos*m_nClass*m_nDomain];
00141                             pos++;
00142                         }
00143 
00144                 sum += xPtr[pos];
00145                 if ( pos != m_inputDim )
00146                     assert ( false );
00147 
00148                 outputs[i*m_nClass*m_nDomain + j] = sum;
00149             }
00150             else  // no feature interaction
00151             {
00152                 REAL sum = 1.0 * m_x[crossRun][m_polyOrder*m_nFeatures*m_nClass*m_nDomain + j];
00153                 for ( int order=0;order<m_polyOrder;order++ )
00154                 {
00155                     REAL* inputPtr = rawInputs + i*m_nFeatures;
00156                     REAL* meanPtr = m_polyMean + order*m_nFeatures;
00157                     REAL* stdPtr = m_polyStd + order*m_nFeatures;
00158                     REAL* xPtr = m_x[crossRun] + order*m_nClass*m_nDomain*m_nFeatures + j;
00159                     for ( int k=0;k<m_nFeatures;k++ )
00160                     {
00161                         x = power ( inputPtr[k], order+1 );
00162                         x = ( x - meanPtr[k] ) / stdPtr[k];
00163                         sum += x * xPtr[k*m_nClass*m_nDomain];
00164                     }
00165                 }
00166                 outputs[i*m_nClass*m_nDomain + j] = sum;
00167             }
00168         }
00169     }
00170 }

void PolynomialRegression::readSpecificMaps (  )  [virtual]

Read the Algorithm specific values from the description file

Implements StandardAlgorithm.

Definition at line 51 of file PolynomialRegression.cpp.

00052 {
00053     m_polyOrder = m_intMap["polyOrder"];
00054     m_reg = m_doubleMap["initReg"];
00055     m_enableCrossInteractions = m_boolMap["enableCrossInteractions"];
00056 }

void PolynomialRegression::saveWeights ( int  cross  )  [virtual]

Save the weights and all other parameters for load the complete prediction model

Implements StandardAlgorithm.

Definition at line 385 of file PolynomialRegression.cpp.

00386 {
00387     char buf[1024];
00388     sprintf ( buf,"%02d",cross );
00389     string name = m_datasetPath + "/" + m_tempPath + "/" + m_weightFile + "." + buf;
00390     if ( m_inRetraining )
00391         cout<<"Save:"<<name<<endl;
00392     fstream f ( name.c_str(), ios::out );
00393     f.write ( ( char* ) &m_nTrain, sizeof ( int ) );
00394     f.write ( ( char* ) &m_nFeatures, sizeof ( int ) );
00395     f.write ( ( char* ) &m_nClass, sizeof ( int ) );
00396     f.write ( ( char* ) &m_nDomain, sizeof ( int ) );
00397     f.write ( ( char* ) &m_inputDim, sizeof ( int ) );
00398     f.write ( ( char* ) m_x[cross], sizeof ( REAL ) * ( m_inputDim+1 ) *m_nClass*m_nDomain );
00399     f.write ( ( char* ) m_polyMean, sizeof ( REAL ) *m_inputDim );
00400     f.write ( ( char* ) m_polyStd, sizeof ( REAL ) *m_inputDim );
00401     f.write ( ( char* ) &m_maxSwing, sizeof ( double ) );
00402     f.write ( ( char* ) &m_reg, sizeof ( double ) );
00403     f.close();
00404 }

string PolynomialRegression::templateGenerator ( int  id,
string  preEffect,
int  nameID,
bool  blendStop 
) [static]

Generates a template of the description file

Returns:
The template string

Definition at line 475 of file PolynomialRegression.cpp.

00476 {
00477     stringstream s;
00478     s<<"ALGORITHM=PolynomialRegression"<<endl;
00479     s<<"ID="<<id<<endl;
00480     s<<"TRAIN_ON_FULLPREDICTOR="<<preEffect<<endl;
00481     s<<"DISABLE=0"<<endl;
00482     s<<endl;
00483     s<<"[int]"<<endl;
00484     s<<"polyOrder=2"<<endl;
00485     s<<"maxTuninigEpochs=20"<<endl;
00486     s<<endl;
00487     s<<"[double]"<<endl;
00488     s<<"initMaxSwing=1.0"<<endl;
00489     s<<"initReg=1e-3"<<endl;
00490     s<<endl;
00491     s<<"[bool]"<<endl;
00492     s<<"enableCrossInteractions=0"<<endl;
00493     s<<"enableClipping=1"<<endl;
00494     s<<"enableTuneSwing=0"<<endl;
00495     s<<endl;
00496     s<<"minimzeProbe="<< ( !blendStop ) <<endl;
00497     s<<"minimzeProbeClassificationError=0"<<endl;
00498     s<<"minimzeBlend="<<blendStop<<endl;
00499     s<<"minimzeBlendClassificationError=0"<<endl;
00500     s<<endl;
00501     s<<"[string]"<<endl;
00502     s<<"weightFile=PolynomialRegression_"<<nameID<<"_weights.dat"<<endl;
00503     s<<"fullPrediction=PolynomialRegression_"<<nameID<<".dat"<<endl;
00504 
00505     return s.str();
00506 }


The documentation for this class was generated from the following files:

Generated on Tue Jan 26 09:21:16 2010 for ELF by  doxygen 1.5.8