| 
    SNAP Library 2.0, Developer Reference
    2013-05-13 16:33:57
    
   SNAP, a general purpose, high performance system for analysis and manipulation of large networks 
   | 
  
  
  
 
#include <agm.h>

Public Member Functions | |
| TLogRegFit () | |
| ~TLogRegFit () | |
| PLogRegPredict | CalcLogRegGradient (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false) | 
| PLogRegPredict | CalcLogRegNewton (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false) | 
| int | MLEGradient (const double &ChangeEps, const int &MaxStep, const TStr PlotNm) | 
| int | MLENewton (const double &ChangeEps, const int &MaxStep, const TStr PlotNm) | 
| double | GetStepSizeByLineSearch (const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta) | 
| double | Likelihood (const TFltV &NewTheta) | 
| double | Likelihood () | 
| void | Gradient (TFltV &GradV) | 
| void | Hessian (TFltVV &HVV) | 
| void | GetNewtonStep (TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV) | 
Private Attributes | |
| TVec< TFltV > | X | 
| TFltV | Y | 
| TFltV | Theta | 
| int | M | 
| TLogRegFit::TLogRegFit | ( | ) |  [inline] | 
        
| TLogRegFit::~TLogRegFit | ( | ) |  [inline] | 
        
| PLogRegPredict TLogRegFit::CalcLogRegGradient | ( | const TVec< TFltV > & | XPt, | 
| const TFltV & | yPt, | ||
| const TStr & | PlotNm = TStr(),  | 
        ||
| const double & | ChangeEps = 0.01,  | 
        ||
| const int & | MaxStep = 200,  | 
        ||
| const bool | InterceptPt = false  | 
        ||
| ) | 
Definition at line 881 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Len(), M, MLEGradient(), Theta, X, and Y.
                                                                                                                                                                             {
  X = XPt;
  Y = yPt;
  IAssert(X.Len() == Y.Len());
  if (Intercept == false) { // if intercept is not included, add it
    for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
  }
  M = X[0].Len();
  for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
  for (int r = 0; r < Y.Len(); r++) {  
    if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
    if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
  }
  Theta.Gen(M);
  MLEGradient(ChangeEps, MaxStep, PlotNm);
  return new TLogRegPredict(Theta); 
};

| PLogRegPredict TLogRegFit::CalcLogRegNewton | ( | const TVec< TFltV > & | XPt, | 
| const TFltV & | yPt, | ||
| const TStr & | PlotNm = TStr(),  | 
        ||
| const double & | ChangeEps = 0.01,  | 
        ||
| const int & | MaxStep = 200,  | 
        ||
| const bool | InterceptPt = false  | 
        ||
| ) | 
Definition at line 862 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Len(), M, MLENewton(), Theta, X, and Y.
Referenced by TAGMUtil::FindComsByAGM().
                                                                                                                                                                           {
  X = XPt;
  Y = yPt;
  IAssert(X.Len() == Y.Len());
  if (Intercept == false) { // if intercept is not included, add it
    for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
  }
  M = X[0].Len();
  for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
  for (int r = 0; r < Y.Len(); r++) {  
    if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
    if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
  }
  Theta.Gen(M);
  MLENewton(ChangeEps, MaxStep, PlotNm);
  return new TLogRegPredict(Theta); 
};


| void TLogRegFit::GetNewtonStep | ( | TFltVV & | HVV, | 
| const TFltV & | GradV, | ||
| TFltV & | DeltaLV | ||
| ) | 
Definition at line 698 of file agm.cpp.
References TVVec< TVal >::GetXDim(), TVec< TVal, TSizeTy >::Len(), TNumericalStuff::SolveSymetricSystem(), and Theta.
Referenced by MLENewton().
                                                                             {
  bool HSingular = false;
  for (int i = 0; i < HVV.GetXDim(); i++) {
    if (HVV(i,i) == 0.0) {
      HVV(i,i) = 0.001;
      HSingular = true;
    }
    DeltaLV[i] = GradV[i] / HVV(i, i);
  }
  if (! HSingular) {
    if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
      for (int r = 0; r < Theta.Len(); r++) {
        for (int c = 0; c < Theta.Len(); c++) {
          HVV(r, c) = - HVV(r, c);
        }
      }
      TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
    }
    else {
      TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
      for (int i = 0; i < DeltaLV.Len(); i++) {
        DeltaLV[i] = - DeltaLV[i];
      }
    }
  }
}


| double TLogRegFit::GetStepSizeByLineSearch | ( | const TFltV & | DeltaV, | 
| const TFltV & | GradV, | ||
| const double & | Alpha, | ||
| const double & | Beta | ||
| ) | 
Definition at line 817 of file agm.cpp.
References TLinAlg::DotProduct(), IAssert, TVec< TVal, TSizeTy >::Len(), Likelihood(), and Theta.
Referenced by MLEGradient(), and MLENewton().
                                                                                                                           {
  double StepSize = 1.0;
  double InitLikelihood = Likelihood();
  IAssert(Theta.Len() == DeltaV.Len());
  TFltV NewThetaV(Theta.Len());
  double MinVal = -1e10, MaxVal = 1e10;
  for(int iter = 0; ; iter++) {
    for (int i = 0; i < Theta.Len(); i++){
      NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
      if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal;  }
      if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
    }
    if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
      StepSize *= Beta;
    } else {
      break;
    }
  }
  return StepSize;
}


| void TLogRegFit::Gradient | ( | TFltV & | GradV | ) | 
Definition at line 849 of file agm.cpp.
References TVec< TVal, TSizeTy >::Gen(), TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), M, Theta, X, and Y.
Referenced by MLEGradient(), and MLENewton().
                                      {
  TFltV OutV;
  TLogRegPredict::GetCfy(X, OutV, Theta);
  GradV.Gen(M);
  for (int r = 0; r < X.Len(); r++) {
    //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
    for (int m = 0; m < M; m++) {
      GradV[m] += (Y[r] - OutV[r]) * X[r][m];
    }
  }
  //for (int m = 0; m < M; m++) {  printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
}


| void TLogRegFit::Hessian | ( | TFltVV & | HVV | ) | 
Definition at line 726 of file agm.cpp.
References TVVec< TVal >::At(), TVVec< TVal >::Gen(), TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), Theta, and X.
Referenced by MLENewton().
                                    {
  HVV.Gen(Theta.Len(), Theta.Len());
  TFltV OutV;
  TLogRegPredict::GetCfy(X, OutV, Theta);
  for (int i = 0; i < X.Len(); i++) {
    for (int r = 0; r < Theta.Len(); r++) {
      HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
      for (int c = r + 1; c < Theta.Len(); c++) {
        HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
        HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
      }
    }
  }
  /*
  printf("\n");
  for (int r = 0; r < Theta.Len(); r++) {
    for (int c = 0; c < Theta.Len(); c++) {
      printf("%f\t", HVV.At(r, c).Val);
    }
    printf("\n");
  }
  */
}


| double TLogRegFit::Likelihood | ( | const TFltV & | NewTheta | ) | 
Definition at line 838 of file agm.cpp.
References TLogRegPredict::GetCfy(), TVec< TVal, TSizeTy >::Len(), X, and Y.
                                                   {
  TFltV OutV;
  TLogRegPredict::GetCfy(X, OutV, NewTheta);
  double L = 0;
  for (int r = 0; r < OutV.Len(); r++) {
    L += Y[r] * log(OutV[r]);
    L += (1 - Y[r]) * log(1 - OutV[r]);
  }
  return L;
}

| double TLogRegFit::Likelihood | ( | ) |  [inline] | 
        
Definition at line 158 of file agm.h.
References Likelihood(), and Theta.
Referenced by GetStepSizeByLineSearch(), Likelihood(), and MLEGradient().
{ return Likelihood(Theta); }


| int TLogRegFit::MLEGradient | ( | const double & | ChangeEps, | 
| const int & | MaxStep, | ||
| const TStr | PlotNm | ||
| ) | 
Definition at line 777 of file agm.cpp.
References TVec< TVal, TSizeTy >::Add(), TStr::Empty(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), Gradient(), TVec< TVal, TSizeTy >::Len(), Likelihood(), TLinAlg::Norm(), TGnuPlot::PlotValV(), and Theta.
Referenced by CalcLogRegGradient().
                                                                                          {
  TExeTm ExeTm;
  TFltV GradV(Theta.Len());
  int iter = 0;
  TIntFltPrV IterLV, IterGradNormV;
  double MinVal = -1e10, MaxVal = 1e10;
  double GradCutOff = 100000;
  for(iter = 0; iter < MaxStep; iter++) {
    Gradient(GradV);    //if gradient is going out of the boundary, cut off
    for(int i = 0; i < Theta.Len(); i++) {
      if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
      if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
      if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
      if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
    }
    double Alpha = 0.15, Beta = 0.9;
    //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
    double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
    if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
    for(int i = 0; i < Theta.Len(); i++) {
      double Change = LearnRate * GradV[i];
      Theta[i] += Change;
      if(Theta[i] < MinVal) { Theta[i] = MinVal;}
      if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
    }
    if (! PlotNm.Empty()) {
      double L = Likelihood();
      IterLV.Add(TIntFltPr(iter, L));
      IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
    }
    
  }
  if (! PlotNm.Empty()) {
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
    TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
    printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
  }
  return iter;
}


| int TLogRegFit::MLENewton | ( | const double & | ChangeEps, | 
| const int & | MaxStep, | ||
| const TStr | PlotNm | ||
| ) | 
Definition at line 750 of file agm.cpp.
References TLinAlg::DotProduct(), TStr::Empty(), GetNewtonStep(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), Gradient(), Hessian(), TVec< TVal, TSizeTy >::Len(), and Theta.
Referenced by CalcLogRegNewton().
                                                                                        {
  TExeTm ExeTm;
  TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
  TFltVV HVV(Theta.Len(), Theta.Len());
  int iter = 0;
  double MinVal = -1e10, MaxVal = 1e10;
  for(iter = 0; iter < MaxStep; iter++) {
    Gradient(GradV);
    Hessian(HVV);
    GetNewtonStep(HVV, GradV, DeltaLV);
    double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
    if (Increment <= ChangeEps) {break;}
    double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
    for(int i = 0; i < Theta.Len(); i++) {
      double Change = LearnRate * DeltaLV[i];
      Theta[i] += Change;
      if(Theta[i] < MinVal) { Theta[i] = MinVal;}
      if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
    }
  }
  if (! PlotNm.Empty()) {
    printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
  }
  return iter;
}


int TLogRegFit::M [private] | 
        
Definition at line 147 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), and Gradient().
TFltV TLogRegFit::Theta [private] | 
        
Definition at line 146 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), GetNewtonStep(), GetStepSizeByLineSearch(), Gradient(), Hessian(), Likelihood(), MLEGradient(), and MLENewton().
TVec<TFltV> TLogRegFit::X [private] | 
        
Definition at line 144 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), Gradient(), Hessian(), and Likelihood().
TFltV TLogRegFit::Y [private] | 
        
Definition at line 145 of file agm.h.
Referenced by CalcLogRegGradient(), CalcLogRegNewton(), Gradient(), and Likelihood().