|
SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <agm.h>

Public Member Functions | |
| TLogRegFit () | |
| ~TLogRegFit () | |
| PLogRegPredict | CalcLogRegGradient (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false) |
| PLogRegPredict | CalcLogRegNewton (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false) |
| int | MLEGradient (const double &ChangeEps, const int &MaxStep, const TStr PlotNm) |
| int | MLENewton (const double &ChangeEps, const int &MaxStep, const TStr PlotNm) |
| double | GetStepSizeByLineSearch (const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta) |
| double | Likelihood (const TFltV &NewTheta) |
| double | Likelihood () |
| void | Gradient (TFltV &GradV) |
| void | Hessian (TFltVV &HVV) |
| void | GetNewtonStep (TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV) |
Private Attributes | |
| TVec< TFltV > | X |
| TFltV | Y |
| TFltV | Theta |
| int | M |
| TLogRegFit::TLogRegFit | ( | ) | [inline] |
| TLogRegFit::~TLogRegFit | ( | ) | [inline] |
| PLogRegPredict TLogRegFit::CalcLogRegGradient | ( | const TVec< TFltV > & | XPt, |
| const TFltV & | yPt, | ||
| const TStr & | PlotNm = TStr(), |
||
| const double & | ChangeEps = 0.01, |
||
| const int & | MaxStep = 200, |
||
| const bool | InterceptPt = false |
||
| ) |
Definition at line 881 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Len(), M, MLEGradient(), Theta, X, and Y.
{
X = XPt;
Y = yPt;
IAssert(X.Len() == Y.Len());
if (Intercept == false) { // if intercept is not included, add it
for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
}
M = X[0].Len();
for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
for (int r = 0; r < Y.Len(); r++) {
if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
}
Theta.Gen(M);
MLEGradient(ChangeEps, MaxStep, PlotNm);
return new TLogRegPredict(Theta);
};

| PLogRegPredict TLogRegFit::CalcLogRegNewton | ( | const TVec< TFltV > & | XPt, |
| const TFltV & | yPt, | ||
| const TStr & | PlotNm = TStr(), |
||
| const double & | ChangeEps = 0.01, |
||
| const int & | MaxStep = 200, |
||
| const bool | InterceptPt = false |
||
| ) |
Definition at line 862 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Len(), M, MLENewton(), Theta, X, and Y.
Referenced by TAGMUtil::FindComsByAGM().
{
X = XPt;
Y = yPt;
IAssert(X.Len() == Y.Len());
if (Intercept == false) { // if intercept is not included, add it
for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
}
M = X[0].Len();
for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
for (int r = 0; r < Y.Len(); r++) {
if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
}
Theta.Gen(M);
MLENewton(ChangeEps, MaxStep, PlotNm);
return new TLogRegPredict(Theta);
};


| void TLogRegFit::GetNewtonStep | ( | TFltVV & | HVV, |
| const TFltV & | GradV, | ||
| TFltV & | DeltaLV | ||
| ) |
Definition at line 698 of file agm.cpp.
References TVVec< TVal >::GetXDim(), TVec< TVal, TSizeTy >::Len(), TNumericalStuff::SolveSymetricSystem(), and Theta.
Referenced by MLENewton().
{
bool HSingular = false;
for (int i = 0; i < HVV.GetXDim(); i++) {
if (HVV(i,i) == 0.0) {
HVV(i,i) = 0.001;
HSingular = true;
}
DeltaLV[i] = GradV[i] / HVV(i, i);
}
if (! HSingular) {
if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
for (int r = 0; r < Theta.Len(); r++) {
for (int c = 0; c < Theta.Len(); c++) {
HVV(r, c) = - HVV(r, c);
}
}
TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
}
else {
TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
for (int i = 0; i < DeltaLV.Len(); i++) {
DeltaLV[i] = - DeltaLV[i];
}
}
}
}


| double TLogRegFit::GetStepSizeByLineSearch | ( | const TFltV & | DeltaV, |
| const TFltV & | GradV, | ||
| const double & | Alpha, | ||
| const double & | Beta | ||
| ) |
Definition at line 817 of file agm.cpp.
References TLinAlg::DotProduct(), IAssert, TVec< TVal, TSizeTy >::Len(), Likelihood(), and Theta.
Referenced by MLEGradient(), and MLENewton().
{
double StepSize = 1.0;
double InitLikelihood = Likelihood();
IAssert(Theta.Len() == DeltaV.Len());
TFltV NewThetaV(Theta.Len());
double MinVal = -1e10, MaxVal = 1e10;
for(int iter = 0; ; iter++) {
for (int i = 0; i < Theta.Len(); i++){
NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; }
if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
}
if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
StepSize *= Beta;
} else {
break;
}
}
return StepSize;
}


| void TLogRegFit::Gradient | ( | TFltV & | GradV | ) |
Definition at line 849 of file agm.cpp.
References TVec< TVal, TSizeTy >::Gen(), TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), M, Theta, X, and Y.
Referenced by MLEGradient(), and MLENewton().
{
TFltV OutV;
TLogRegPredict::GetCfy(X, OutV, Theta);
GradV.Gen(M);
for (int r = 0; r < X.Len(); r++) {
//printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
for (int m = 0; m < M; m++) {
GradV[m] += (Y[r] - OutV[r]) * X[r][m];
}
}
//for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
}


| void TLogRegFit::Hessian | ( | TFltVV & | HVV | ) |
Definition at line 726 of file agm.cpp.
References TVVec< TVal >::At(), TVVec< TVal >::Gen(), TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), Theta, and X.
Referenced by MLENewton().
{
HVV.Gen(Theta.Len(), Theta.Len());
TFltV OutV;
TLogRegPredict::GetCfy(X, OutV, Theta);
for (int i = 0; i < X.Len(); i++) {
for (int r = 0; r < Theta.Len(); r++) {
HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
for (int c = r + 1; c < Theta.Len(); c++) {
HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
}
}
}
/*
printf("\n");
for (int r = 0; r < Theta.Len(); r++) {
for (int c = 0; c < Theta.Len(); c++) {
printf("%f\t", HVV.At(r, c).Val);
}
printf("\n");
}
*/
}


| double TLogRegFit::Likelihood | ( | const TFltV & | NewTheta | ) |
Definition at line 838 of file agm.cpp.
References TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), X, and Y.
{
TFltV OutV;
TLogRegPredict::GetCfy(X, OutV, NewTheta);
double L = 0;
for (int r = 0; r < OutV.Len(); r++) {
L += Y[r] * log(OutV[r]);
L += (1 - Y[r]) * log(1 - OutV[r]);
}
return L;
}

| double TLogRegFit::Likelihood | ( | ) | [inline] |
Definition at line 158 of file agm.h.
References Likelihood(), and Theta.
Referenced by GetStepSizeByLineSearch(), Likelihood(), and MLEGradient().
{ return Likelihood(Theta); }


| int TLogRegFit::MLEGradient | ( | const double & | ChangeEps, |
| const int & | MaxStep, | ||
| const TStr | PlotNm | ||
| ) |
Definition at line 777 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TStr::Empty(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), Gradient(), TVec< TVal, TSizeTy >::Len(), Likelihood(), TLinAlg::Norm(), TGnuPlot::PlotValV(), and Theta.
Referenced by CalcLogRegGradient().
{
TExeTm ExeTm;
TFltV GradV(Theta.Len());
int iter = 0;
TIntFltPrV IterLV, IterGradNormV;
double MinVal = -1e10, MaxVal = 1e10;
double GradCutOff = 100000;
for(iter = 0; iter < MaxStep; iter++) {
Gradient(GradV); //if gradient is going out of the boundary, cut off
for(int i = 0; i < Theta.Len(); i++) {
if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
}
double Alpha = 0.15, Beta = 0.9;
//double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
for(int i = 0; i < Theta.Len(); i++) {
double Change = LearnRate * GradV[i];
Theta[i] += Change;
if(Theta[i] < MinVal) { Theta[i] = MinVal;}
if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
}
if (! PlotNm.Empty()) {
double L = Likelihood();
IterLV.Add(TIntFltPr(iter, L));
IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
}
}
if (! PlotNm.Empty()) {
TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
}
return iter;
}


| int TLogRegFit::MLENewton | ( | const double & | ChangeEps, |
| const int & | MaxStep, | ||
| const TStr | PlotNm | ||
| ) |
Definition at line 750 of file agm.cpp.
References TLinAlg::DotProduct(), TStr::Empty(), GetNewtonStep(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), Gradient(), Hessian(), TVec< TVal, TSizeTy >::Len(), and Theta.
Referenced by CalcLogRegNewton().
{
TExeTm ExeTm;
TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
TFltVV HVV(Theta.Len(), Theta.Len());
int iter = 0;
double MinVal = -1e10, MaxVal = 1e10;
for(iter = 0; iter < MaxStep; iter++) {
Gradient(GradV);
Hessian(HVV);
GetNewtonStep(HVV, GradV, DeltaLV);
double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
if (Increment <= ChangeEps) {break;}
double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
for(int i = 0; i < Theta.Len(); i++) {
double Change = LearnRate * DeltaLV[i];
Theta[i] += Change;
if(Theta[i] < MinVal) { Theta[i] = MinVal;}
if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
}
}
if (! PlotNm.Empty()) {
printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
}
return iter;
}


int TLogRegFit::M [private] |
Definition at line 147 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), and Gradient().
TFltV TLogRegFit::Theta [private] |
Definition at line 146 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), GetNewtonStep(), GetStepSizeByLineSearch(), Gradient(), Hessian(), Likelihood(), MLEGradient(), and MLENewton().
TVec<TFltV> TLogRegFit::X [private] |
Definition at line 144 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), Gradient(), Hessian(), and Likelihood().
TFltV TLogRegFit::Y [private] |
Definition at line 145 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), Gradient(), and Likelihood().