SNAP Library 2.0, Developer Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <agm.h>
Public Member Functions | |
TLogRegFit () | |
~TLogRegFit () | |
PLogRegPredict | CalcLogRegGradient (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false) |
PLogRegPredict | CalcLogRegNewton (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false) |
int | MLEGradient (const double &ChangeEps, const int &MaxStep, const TStr PlotNm) |
int | MLENewton (const double &ChangeEps, const int &MaxStep, const TStr PlotNm) |
double | GetStepSizeByLineSearch (const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta) |
double | Likelihood (const TFltV &NewTheta) |
double | Likelihood () |
void | Gradient (TFltV &GradV) |
void | Hessian (TFltVV &HVV) |
void | GetNewtonStep (TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV) |
Private Attributes | |
TVec< TFltV > | X |
TFltV | Y |
TFltV | Theta |
int | M |
TLogRegFit::TLogRegFit | ( | ) | [inline] |
TLogRegFit::~TLogRegFit | ( | ) | [inline] |
PLogRegPredict TLogRegFit::CalcLogRegGradient | ( | const TVec< TFltV > & | XPt, |
const TFltV & | yPt, | ||
const TStr & | PlotNm = TStr() , |
||
const double & | ChangeEps = 0.01 , |
||
const int & | MaxStep = 200 , |
||
const bool | InterceptPt = false |
||
) |
Definition at line 881 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Len(), M, MLEGradient(), Theta, X, and Y.
{ X = XPt; Y = yPt; IAssert(X.Len() == Y.Len()); if (Intercept == false) { // if intercept is not included, add it for (int r = 0; r < X.Len(); r++) { X[r].Add(1); } } M = X[0].Len(); for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); } for (int r = 0; r < Y.Len(); r++) { if (Y[r] >= 0.99999) { Y[r] = 0.99999; } if (Y[r] <= 0.00001) { Y[r] = 0.00001; } } Theta.Gen(M); MLEGradient(ChangeEps, MaxStep, PlotNm); return new TLogRegPredict(Theta); };
PLogRegPredict TLogRegFit::CalcLogRegNewton | ( | const TVec< TFltV > & | XPt, |
const TFltV & | yPt, | ||
const TStr & | PlotNm = TStr() , |
||
const double & | ChangeEps = 0.01 , |
||
const int & | MaxStep = 200 , |
||
const bool | InterceptPt = false |
||
) |
Definition at line 862 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Len(), M, MLENewton(), Theta, X, and Y.
Referenced by TAGMUtil::FindComsByAGM().
{ X = XPt; Y = yPt; IAssert(X.Len() == Y.Len()); if (Intercept == false) { // if intercept is not included, add it for (int r = 0; r < X.Len(); r++) { X[r].Add(1); } } M = X[0].Len(); for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); } for (int r = 0; r < Y.Len(); r++) { if (Y[r] >= 0.99999) { Y[r] = 0.99999; } if (Y[r] <= 0.00001) { Y[r] = 0.00001; } } Theta.Gen(M); MLENewton(ChangeEps, MaxStep, PlotNm); return new TLogRegPredict(Theta); };
void TLogRegFit::GetNewtonStep | ( | TFltVV & | HVV, |
const TFltV & | GradV, | ||
TFltV & | DeltaLV | ||
) |
Definition at line 698 of file agm.cpp.
References TVVec< TVal >::GetXDim(), TVec< TVal, TSizeTy >::Len(), TNumericalStuff::SolveSymetricSystem(), and Theta.
Referenced by MLENewton().
{ bool HSingular = false; for (int i = 0; i < HVV.GetXDim(); i++) { if (HVV(i,i) == 0.0) { HVV(i,i) = 0.001; HSingular = true; } DeltaLV[i] = GradV[i] / HVV(i, i); } if (! HSingular) { if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite for (int r = 0; r < Theta.Len(); r++) { for (int c = 0; c < Theta.Len(); c++) { HVV(r, c) = - HVV(r, c); } } TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV); } else { TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV); for (int i = 0; i < DeltaLV.Len(); i++) { DeltaLV[i] = - DeltaLV[i]; } } } }
double TLogRegFit::GetStepSizeByLineSearch | ( | const TFltV & | DeltaV, |
const TFltV & | GradV, | ||
const double & | Alpha, | ||
const double & | Beta | ||
) |
Definition at line 817 of file agm.cpp.
References TLinAlg::DotProduct(), IAssert, TVec< TVal, TSizeTy >::Len(), Likelihood(), and Theta.
Referenced by MLEGradient(), and MLENewton().
{ double StepSize = 1.0; double InitLikelihood = Likelihood(); IAssert(Theta.Len() == DeltaV.Len()); TFltV NewThetaV(Theta.Len()); double MinVal = -1e10, MaxVal = 1e10; for(int iter = 0; ; iter++) { for (int i = 0; i < Theta.Len(); i++){ NewThetaV[i] = Theta[i] + StepSize * DeltaV[i]; if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; } if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; } } if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) { StepSize *= Beta; } else { break; } } return StepSize; }
void TLogRegFit::Gradient | ( | TFltV & | GradV | ) |
Definition at line 849 of file agm.cpp.
References TVec< TVal, TSizeTy >::Gen(), TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), M, Theta, X, and Y.
Referenced by MLEGradient(), and MLENewton().
{ TFltV OutV; TLogRegPredict::GetCfy(X, OutV, Theta); GradV.Gen(M); for (int r = 0; r < X.Len(); r++) { //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val); for (int m = 0; m < M; m++) { GradV[m] += (Y[r] - OutV[r]) * X[r][m]; } } //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); } }
void TLogRegFit::Hessian | ( | TFltVV & | HVV | ) |
Definition at line 726 of file agm.cpp.
References TVVec< TVal >::At(), TVVec< TVal >::Gen(), TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), Theta, and X.
Referenced by MLENewton().
{ HVV.Gen(Theta.Len(), Theta.Len()); TFltV OutV; TLogRegPredict::GetCfy(X, OutV, Theta); for (int i = 0; i < X.Len(); i++) { for (int r = 0; r < Theta.Len(); r++) { HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]); for (int c = r + 1; c < Theta.Len(); c++) { HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]); HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]); } } } /* printf("\n"); for (int r = 0; r < Theta.Len(); r++) { for (int c = 0; c < Theta.Len(); c++) { printf("%f\t", HVV.At(r, c).Val); } printf("\n"); } */ }
double TLogRegFit::Likelihood | ( | const TFltV & | NewTheta | ) |
Definition at line 838 of file agm.cpp.
References TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), X, and Y.
{ TFltV OutV; TLogRegPredict::GetCfy(X, OutV, NewTheta); double L = 0; for (int r = 0; r < OutV.Len(); r++) { L += Y[r] * log(OutV[r]); L += (1 - Y[r]) * log(1 - OutV[r]); } return L; }
double TLogRegFit::Likelihood | ( | ) | [inline] |
Definition at line 158 of file agm.h.
References Likelihood(), and Theta.
Referenced by GetStepSizeByLineSearch(), Likelihood(), and MLEGradient().
{ return Likelihood(Theta); }
int TLogRegFit::MLEGradient | ( | const double & | ChangeEps, |
const int & | MaxStep, | ||
const TStr | PlotNm | ||
) |
Definition at line 777 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TStr::Empty(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), Gradient(), TVec< TVal, TSizeTy >::Len(), Likelihood(), TLinAlg::Norm(), TGnuPlot::PlotValV(), and Theta.
Referenced by CalcLogRegGradient().
{ TExeTm ExeTm; TFltV GradV(Theta.Len()); int iter = 0; TIntFltPrV IterLV, IterGradNormV; double MinVal = -1e10, MaxVal = 1e10; double GradCutOff = 100000; for(iter = 0; iter < MaxStep; iter++) { Gradient(GradV); //if gradient is going out of the boundary, cut off for(int i = 0; i < Theta.Len(); i++) { if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; } if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; } if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; } if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; } } double Alpha = 0.15, Beta = 0.9; //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); if (TLinAlg::Norm(GradV) < ChangeEps) { break; } for(int i = 0; i < Theta.Len(); i++) { double Change = LearnRate * GradV[i]; Theta[i] += Change; if(Theta[i] < MinVal) { Theta[i] = MinVal;} if(Theta[i] > MaxVal) { Theta[i] = MaxVal;} } if (! PlotNm.Empty()) { double L = Likelihood(); IterLV.Add(TIntFltPr(iter, L)); IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV))); } } if (! PlotNm.Empty()) { TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q"); printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); } return iter; }
int TLogRegFit::MLENewton | ( | const double & | ChangeEps, |
const int & | MaxStep, | ||
const TStr | PlotNm | ||
) |
Definition at line 750 of file agm.cpp.
References TLinAlg::DotProduct(), TStr::Empty(), GetNewtonStep(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), Gradient(), Hessian(), TVec< TVal, TSizeTy >::Len(), and Theta.
Referenced by CalcLogRegNewton().
{ TExeTm ExeTm; TFltV GradV(Theta.Len()), DeltaLV(Theta.Len()); TFltVV HVV(Theta.Len(), Theta.Len()); int iter = 0; double MinVal = -1e10, MaxVal = 1e10; for(iter = 0; iter < MaxStep; iter++) { Gradient(GradV); Hessian(HVV); GetNewtonStep(HVV, GradV, DeltaLV); double Increment = TLinAlg::DotProduct(GradV, DeltaLV); if (Increment <= ChangeEps) {break;} double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1); for(int i = 0; i < Theta.Len(); i++) { double Change = LearnRate * DeltaLV[i]; Theta[i] += Change; if(Theta[i] < MinVal) { Theta[i] = MinVal;} if(Theta[i] > MaxVal) { Theta[i] = MaxVal;} } } if (! PlotNm.Empty()) { printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); } return iter; }
int TLogRegFit::M [private] |
Definition at line 147 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), and Gradient().
TFltV TLogRegFit::Theta [private] |
Definition at line 146 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), GetNewtonStep(), GetStepSizeByLineSearch(), Gradient(), Hessian(), Likelihood(), MLEGradient(), and MLENewton().
TVec<TFltV> TLogRegFit::X [private] |
Definition at line 144 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), Gradient(), Hessian(), and Likelihood().
TFltV TLogRegFit::Y [private] |
Definition at line 145 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), Gradient(), and Likelihood().