#include <AUC.h>
Public Member Functions | |
AUC () | |
~AUC () | |
REAL | getAUC (REAL *prediction, int *labels, int nClass, int nDomain, int nLines) |
Works for binary classification Input: score and labels[0,1]
Definition at line 26 of file AUC.h.
AUC::AUC | ( | ) |
AUC::~AUC | ( | ) |
REAL AUC::getAUC | ( | REAL * | prediction, | |
int * | labels, | |||
int | nClass, | |||
int | nDomain, | |||
int | nLines | |||
) |
Direct port of a MATLAB script to C++ (provided by Isabelle Guyon from KDDCup2009)
prediction | real valued predictions (real* pointer) | |
labels | correct class labels (targets) (int* pointer) | |
nClass | number of classes | |
nDomain | number of domains | |
nLines | number of samples |
Definition at line 31 of file AUC.cpp.
00032 { 00033 if ( nClass != 2 ) 00034 assert ( false ); 00035 00036 double avgArea = 0.0; 00037 00038 for ( int d=0;d<nDomain;d++ ) 00039 { 00040 // translate it to a score 00041 double* score = new double[nLines]; 00042 int* index = new int[nLines]; 00043 int neg = 0, pos = 0; 00044 for ( int i=0;i<nLines;i++ ) 00045 { 00046 index[i] = i; 00047 //score[i] = prediction[d*2 + i*nDomain*2 + 1] - prediction[d*2 + i*nDomain*2 + 0]; 00048 score[i] = prediction[i+d*2*nLines + nLines] - prediction[i+d*2*nLines]; 00049 score[i] = -score[i]; 00050 if ( labels[i*nDomain+d] == 1 ) 00051 pos++; 00052 if ( labels[i*nDomain+d] == 0 ) 00053 neg++; 00054 } 00055 00056 // get pos and neg target index 00057 int* negIndex = new int[neg]; 00058 int* posIndex = new int[pos]; 00059 neg = 0; 00060 pos = 0; 00061 for ( int i=0;i<nLines;i++ ) 00062 { 00063 if ( labels[i*nDomain+d] == 1 ) 00064 { 00065 posIndex[pos] = i; 00066 pos++; 00067 } 00068 if ( labels[i*nDomain+d] == 0 ) 00069 { 00070 negIndex[neg] = i; 00071 neg++; 00072 } 00073 } 00074 00075 // sort scores 00076 ippsSortIndexAscend_64f_I ( score, index, nLines ); 00077 //quickSort(score, index, 0, nLines-1); 00078 00079 // translated from a MATLAB script... 00080 double oldval = score[0]; 00081 double newval = score[0]; 00082 double* R = new double[nLines+1]; 00083 for ( int i=1;i<=nLines;i++ ) 00084 R[i] = i; 00085 int k0 = 1; 00086 for ( int k=2;k<=nLines;k++ ) // for k=2:n 00087 { 00088 newval = score[k-1]; // newval=u(k); 00089 if ( newval == oldval ) //if newval==oldval 00090 { 00091 double v = R[k-1]* ( double ) ( k-k0 ) / ( double ) ( k-k0+1.0 ) +R[k]/ ( double ) ( k-k0+1.0 ); 00092 00093 for ( int j=k0;j<=k;j++ ) 00094 { 00095 R[j] = v; 00096 } 00097 } 00098 else 00099 k0 = k; 00100 oldval = newval; 00101 } 00102 double* S = new double[nLines]; 00103 for ( int i=0;i<nLines;i++ ) 00104 S[index[i]]=R[i+1]; 00105 00106 00107 //SS=sort(S(negidx)); 00108 double* SS = new double[neg]; 00109 for ( int i=0;i<neg;i++ ) 00110 SS[i] = S[negIndex[i]]; 00111 //quickSort(SS,index,0,neg-1); 00112 ippsSortIndexAscend_64f_I ( SS, index, neg ); 00113 00114 //RR=[1:neg]; 00115 double* RR = new double[neg+1]; 00116 for ( int i=1;i<=neg;i++ ) 00117 RR[i] = i; 00118 00119 // SEN=(SS-RR)/pos; 00120 //area(kk)=sum(SEN)/neg; 00121 double area = 0.0; 00122 for ( int i=1;i<=neg;i++ ) 00123 area += ( SS[i-1]-RR[i] ) /pos; 00124 area /= neg; 00125 00126 if ( area <= 0.0 ) 00127 assert ( false ); 00128 if ( area >= 1.0 ) 00129 assert ( false ); 00130 if ( area < 0.5 ) 00131 area = 1.0 - area; 00132 00133 avgArea += area; 00134 00135 if ( score ) 00136 delete[] score; 00137 score = 0; 00138 if ( index ) 00139 delete[] index; 00140 index = 0; 00141 if ( negIndex ) 00142 delete[] negIndex; 00143 negIndex = 0; 00144 if ( posIndex ) 00145 delete[] posIndex; 00146 posIndex = 0; 00147 if ( R ) 00148 delete[] R; 00149 R = 0; 00150 if ( S ) 00151 delete[] S; 00152 S = 0; 00153 if ( SS ) 00154 delete[] SS; 00155 SS = 0; 00156 if ( RR ) 00157 delete[] RR; 00158 RR = 0; 00159 } 00160 00161 avgArea /= ( double ) nDomain; 00162 00163 return -avgArea; // we want to maximize the area 00164 }