SNAP Library 2.1, User Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TLogRegFit Class Reference

#include <agm.h>

List of all members.

Public Member Functions

 TLogRegFit ()
 ~TLogRegFit ()
PLogRegPredict CalcLogRegGradient (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
PLogRegPredict CalcLogRegNewton (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
int MLEGradient (const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
int MLENewton (const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
double GetStepSizeByLineSearch (const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
double Likelihood (const TFltV &NewTheta)
double Likelihood ()
void Gradient (TFltV &GradV)
void Hessian (TFltVV &HVV)
void GetNewtonStep (TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV)

Private Attributes

TVec< TFltVX
TFltV Y
TFltV Theta
int M

Detailed Description

Definition at line 142 of file agm.h.


Constructor & Destructor Documentation

TLogRegFit::TLogRegFit ( ) [inline]

Definition at line 149 of file agm.h.

{}

Definition at line 150 of file agm.h.

{}

Member Function Documentation

PLogRegPredict TLogRegFit::CalcLogRegGradient ( const TVec< TFltV > &  XPt,
const TFltV yPt,
const TStr PlotNm = TStr(),
const double &  ChangeEps = 0.01,
const int &  MaxStep = 200,
const bool  InterceptPt = false 
)

Definition at line 881 of file agm.cpp.

                                                                                                                                                                             {
  X = XPt;
  Y = yPt;
  IAssert(X.Len() == Y.Len());
  if (Intercept == false) { // if intercept is not included, add it
    for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
  }
  M = X[0].Len();
  for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
  for (int r = 0; r < Y.Len(); r++) {  
    if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
    if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
  }
  Theta.Gen(M);
  MLEGradient(ChangeEps, MaxStep, PlotNm);
  return new TLogRegPredict(Theta); 
};
PLogRegPredict TLogRegFit::CalcLogRegNewton ( const TVec< TFltV > &  XPt,
const TFltV yPt,
const TStr PlotNm = TStr(),
const double &  ChangeEps = 0.01,
const int &  MaxStep = 200,
const bool  InterceptPt = false 
)

Definition at line 862 of file agm.cpp.

                                                                                                                                                                           {

  X = XPt;
  Y = yPt;
  IAssert(X.Len() == Y.Len());
  if (Intercept == false) { // if intercept is not included, add it
    for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
  }
  M = X[0].Len();
  for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
  for (int r = 0; r < Y.Len(); r++) {  
    if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
    if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
  }
  Theta.Gen(M);
  MLENewton(ChangeEps, MaxStep, PlotNm);
  return new TLogRegPredict(Theta); 
};
void TLogRegFit::GetNewtonStep ( TFltVV HVV,
const TFltV GradV,
TFltV DeltaLV 
)

Definition at line 698 of file agm.cpp.

                                                                             {
  bool HSingular = false;
  for (int i = 0; i < HVV.GetXDim(); i++) {
    if (HVV(i,i) == 0.0) {
      HVV(i,i) = 0.001;
      HSingular = true;
    }
    DeltaLV[i] = GradV[i] / HVV(i, i);
  }
  if (! HSingular) {
    if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
      for (int r = 0; r < Theta.Len(); r++) {
        for (int c = 0; c < Theta.Len(); c++) {
          HVV(r, c) = - HVV(r, c);
        }
      }
      TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
    }
    else {
      TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
      for (int i = 0; i < DeltaLV.Len(); i++) {
        DeltaLV[i] = - DeltaLV[i];
      }
    }

  }
}
double TLogRegFit::GetStepSizeByLineSearch ( const TFltV DeltaV,
const TFltV GradV,
const double &  Alpha,
const double &  Beta 
)

Definition at line 817 of file agm.cpp.

                                                                                                                           {
  double StepSize = 1.0;
  double InitLikelihood = Likelihood();
  IAssert(Theta.Len() == DeltaV.Len());
  TFltV NewThetaV(Theta.Len());
  double MinVal = -1e10, MaxVal = 1e10;
  for(int iter = 0; ; iter++) {
    for (int i = 0; i < Theta.Len(); i++){
      NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
      if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal;  }
      if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
    }
    if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
      StepSize *= Beta;
    } else {
      break;
    }
  }
  return StepSize;
}
void TLogRegFit::Gradient ( TFltV GradV)

Definition at line 849 of file agm.cpp.

                                      {
  TFltV OutV;
  TLogRegPredict::GetCfy(X, OutV, Theta);
  GradV.Gen(M);
  for (int r = 0; r < X.Len(); r++) {
    //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
    for (int m = 0; m < M; m++) {
      GradV[m] += (Y[r] - OutV[r]) * X[r][m];
    }
  }
  //for (int m = 0; m < M; m++) {  printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
}
void TLogRegFit::Hessian ( TFltVV HVV)

Definition at line 726 of file agm.cpp.

                                    {
  HVV.Gen(Theta.Len(), Theta.Len());
  TFltV OutV;
  TLogRegPredict::GetCfy(X, OutV, Theta);
  for (int i = 0; i < X.Len(); i++) {
    for (int r = 0; r < Theta.Len(); r++) {
      HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
      for (int c = r + 1; c < Theta.Len(); c++) {
        HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
        HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
      }
    }
  }
  /*
  printf("\n");
  for (int r = 0; r < Theta.Len(); r++) {
    for (int c = 0; c < Theta.Len(); c++) {
      printf("%f\t", HVV.At(r, c).Val);
    }
    printf("\n");
  }
  */
}
double TLogRegFit::Likelihood ( const TFltV NewTheta)

Definition at line 838 of file agm.cpp.

                                                   {
  TFltV OutV;
  TLogRegPredict::GetCfy(X, OutV, NewTheta);
  double L = 0;
  for (int r = 0; r < OutV.Len(); r++) {
    L += Y[r] * log(OutV[r]);
    L += (1 - Y[r]) * log(1 - OutV[r]);
  }
  return L;
}
double TLogRegFit::Likelihood ( ) [inline]

Definition at line 158 of file agm.h.

{ return Likelihood(Theta); }
int TLogRegFit::MLEGradient ( const double &  ChangeEps,
const int &  MaxStep,
const TStr  PlotNm 
)

Definition at line 777 of file agm.cpp.

                                                                                          {
  TExeTm ExeTm;
  TFltV GradV(Theta.Len());
  int iter = 0;
  TIntFltPrV IterLV, IterGradNormV;
  double MinVal = -1e10, MaxVal = 1e10;
  double GradCutOff = 100000;
  for(iter = 0; iter < MaxStep; iter++) {
    Gradient(GradV);    //if gradient is going out of the boundary, cut off
    for(int i = 0; i < Theta.Len(); i++) {
      if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
      if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
      if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
      if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
    }
    double Alpha = 0.15, Beta = 0.9;
    //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
    double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
    if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
    for(int i = 0; i < Theta.Len(); i++) {
      double Change = LearnRate * GradV[i];
      Theta[i] += Change;
      if(Theta[i] < MinVal) { Theta[i] = MinVal;}
      if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
    }
    if (! PlotNm.Empty()) {
      double L = Likelihood();
      IterLV.Add(TIntFltPr(iter, L));
      IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
    }
    
  }
  if (! PlotNm.Empty()) {
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
    TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
    printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
  }
  return iter;
}
int TLogRegFit::MLENewton ( const double &  ChangeEps,
const int &  MaxStep,
const TStr  PlotNm 
)

Definition at line 750 of file agm.cpp.

                                                                                        {
  TExeTm ExeTm;
  TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
  TFltVV HVV(Theta.Len(), Theta.Len());
  int iter = 0;
  double MinVal = -1e10, MaxVal = 1e10;
  for(iter = 0; iter < MaxStep; iter++) {
    Gradient(GradV);
    Hessian(HVV);
    GetNewtonStep(HVV, GradV, DeltaLV);
    double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
    if (Increment <= ChangeEps) {break;}
    double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
    for(int i = 0; i < Theta.Len(); i++) {
      double Change = LearnRate * DeltaLV[i];
      Theta[i] += Change;
      if(Theta[i] < MinVal) { Theta[i] = MinVal;}
      if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
    }
  }
  if (! PlotNm.Empty()) {
    printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
  }

  return iter;
}

Member Data Documentation

int TLogRegFit::M [private]

Definition at line 147 of file agm.h.

Definition at line 146 of file agm.h.

TVec<TFltV> TLogRegFit::X [private]

Definition at line 144 of file agm.h.

TFltV TLogRegFit::Y [private]

Definition at line 145 of file agm.h.


The documentation for this class was generated from the following files: