SNAP Library 3.0, Developer Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TCoda Class Reference

#include <agmdirected.h>

Collaboration diagram for TCoda:

Public Member Functions

 TCoda (const PNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
 
 TCoda ()
 
void SetGraph (const PNGraph &GraphPt)
 
PNGraph GetGraph ()
 
PNGraph GetGraphRawNID ()
 
void SetRegCoef (const double _RegCoef)
 
double GetRegCoef ()
 
void RandomInit (const int InitComs)
 
int GetNumComs ()
 
void NeighborComInit (const int InitComs)
 
void NeighborComInit (TFltIntPrV &NIdPhiV, const int InitComs)
 
void SetCmtyVV (const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
 
double Likelihood (const bool DoParallel=false)
 
double LikelihoodForNode (const bool IsRow, const int UID)
 
double LikelihoodForNode (const bool IsRow, const int UID, const TIntFltH &FU)
 
void GetNonEdgePairScores (TFltIntIntTrV &ScoreV)
 
void GetNIDValH (TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
 
void DumpMemberships (const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
 
void DumpMemberships (const TStr &OutFNm, const TStrHash< TInt > &NodeNameH, const double Thres)
 
void GetCmtyS (TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
 
void DumpMemberships (const TStr &OutFNm, const double Thres)
 
void DumpMemberships (const TStr &OutFNm)
 
void GetCommunity (TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
 
void GetCommunity (TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID, const double Thres)
 extract community affiliation for given community ID More...
 
void GetTopCIDs (TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
 
void GradientForNode (const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
 
void SetHoldOut (const double HOFrac)
 
void GetCmtyVV (TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
 
void GetCmtyVV (TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const double ThresOut, const double ThresIn, const int MinSz=3)
 
void GetCmtyVV (const bool IsOut, TVec< TIntV > &CmtyVV)
 
void GetCmtyVV (const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
 extract community affiliation for outgoing edges from F_uc More...
 
void GetCmtyVVUnSorted (const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
 
void GetCmtyVVUnSorted (TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn)
 
int FindComsByCV (TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
 
int FindComsByCV (const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.3)
 estimate number of communities using cross validation More...
 
double LikelihoodHoldOut (const bool DoParallel=false)
 
double GetStepSizeByLineSearch (const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
 
int MLEGradAscent (const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
int MLEGradAscentParallel (const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
int MLEGradAscentParallel (const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
 
void Save (TSOut &SOut)
 
void Load (TSIn &SIn, const int &RndSeed=0)
 
TFltGetSumVal (const bool IsOut, const int CID)
 
double GetCom (const bool IsOut, const int &NID, const int &CID)
 
double GetComOut (const int &NID, const int &CID)
 
double GetComIn (const int &NID, const int &CID)
 
void AddCom (const bool IsOut, const int &NID, const int &CID, const double &Val)
 
void AddComOut (const int &NID, const int &CID, const double &Val)
 
void AddComIn (const int &NID, const int &CID, const double &Val)
 
void DelCom (const bool IsOut, const int &NID, const int &CID)
 
void DelComOut (const int &NID, const int &CID)
 
void DelComIn (const int &NID, const int &CID)
 
double DotProduct (const TIntFltH &UV, const TIntFltH &VV)
 
double DotProductUtoV (const int &UID, const int &VID)
 
double Prediction (const TIntFltH &FU, const TIntFltH &HV)
 
double Prediction (const int &UID, const int &VID)
 
double Sum (const TIntFltH &UV)
 
double Norm2 (const TIntFltH &UV)
 

Public Attributes

TFlt MinVal
 
TFlt MaxVal
 
TFlt NegWgt
 
TFlt PNoCom
 
TBool DoParallel
 

Private Attributes

PNGraph G
 
TVec< TIntFltHF
 
TVec< TIntFltHH
 
TRnd Rnd
 
TIntV NIDV
 
TFlt RegCoef
 
TFltV SumFV
 
TFltV SumHV
 
TBool NodesOk
 
TInt NumComs
 
TVec< TIntSetHOVIDSV
 

Detailed Description

Definition at line 7 of file agmdirected.h.

Constructor & Destructor Documentation

TCoda::TCoda ( const PNGraph GraphPt,
const int &  InitComs,
const int  RndSeed = 0 
)
inline

Definition at line 27 of file agmdirected.h.

References RandomInit(), and SetGraph().

27  : Rnd(RndSeed), RegCoef(0),
28  NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
TFlt NegWgt
Definition: agmdirected.h:23
TFlt MinVal
Definition: agmdirected.h:21
void SetGraph(const PNGraph &GraphPt)
TFlt RegCoef
Definition: agmdirected.h:14
TBool NodesOk
Definition: agmdirected.h:17
void RandomInit(const int InitComs)
Definition: agmdirected.cpp:43
TRnd Rnd
Definition: agmdirected.h:12
TFlt MaxVal
Definition: agmdirected.h:22

Here is the call graph for this function:

TCoda::TCoda ( )
inline

Definition at line 29 of file agmdirected.h.

References G, and TNGraph::New().

29 { G = TNGraph::New(); }
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:425
PNGraph G
Definition: agmdirected.h:9

Here is the call graph for this function:

Member Function Documentation

void TCoda::AddCom ( const bool  IsOut,
const int &  NID,
const int &  CID,
const double &  Val 
)
inline

Definition at line 114 of file agmdirected.h.

References AddComIn(), and AddComOut().

Referenced by MLEGradAscent().

114  {
115  if (IsOut) {
116  AddComOut(NID, CID, Val);
117  } else {
118  AddComIn(NID, CID, Val);
119  }
120  }
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::AddComIn ( const int &  NID,
const int &  CID,
const double &  Val 
)
inline

Definition at line 128 of file agmdirected.h.

References H, and SumHV.

Referenced by AddCom(), NeighborComInit(), RandomInit(), and SetCmtyVV().

128  {
129  if (H[NID].IsKey(CID)) {
130  SumHV[CID] -= H[NID].GetDat(CID);
131  }
132  H[NID].AddDat(CID) = Val;
133  SumHV[CID] += Val;
134  }
TVec< TIntFltH > H
Definition: agmdirected.h:11
TFltV SumHV
Definition: agmdirected.h:16

Here is the caller graph for this function:

void TCoda::AddComOut ( const int &  NID,
const int &  CID,
const double &  Val 
)
inline

Definition at line 121 of file agmdirected.h.

References F, and SumFV.

Referenced by AddCom(), NeighborComInit(), RandomInit(), and SetCmtyVV().

121  {
122  if (F[NID].IsKey(CID)) {
123  SumFV[CID] -= F[NID].GetDat(CID);
124  }
125  F[NID].AddDat(CID) = Val;
126  SumFV[CID] += Val;
127  }
TFltV SumFV
Definition: agmdirected.h:15
TVec< TIntFltH > F
Definition: agmdirected.h:10

Here is the caller graph for this function:

void TCoda::DelCom ( const bool  IsOut,
const int &  NID,
const int &  CID 
)
inline

Definition at line 135 of file agmdirected.h.

References DelComIn(), and DelComOut().

Referenced by MLEGradAscent().

135  {
136  if (IsOut) {
137  return DelComOut(NID, CID);
138  } else {
139  return DelComIn(NID, CID);
140  }
141  }
void DelComOut(const int &NID, const int &CID)
Definition: agmdirected.h:142
void DelComIn(const int &NID, const int &CID)
Definition: agmdirected.h:148

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::DelComIn ( const int &  NID,
const int &  CID 
)
inline

Definition at line 148 of file agmdirected.h.

References H, and SumHV.

Referenced by DelCom().

148  {
149  if (H[NID].IsKey(CID)) {
150  SumHV[CID] -= H[NID].GetDat(CID);
151  H[NID].DelKey(CID);
152  }
153  }
TVec< TIntFltH > H
Definition: agmdirected.h:11
TFltV SumHV
Definition: agmdirected.h:16

Here is the caller graph for this function:

void TCoda::DelComOut ( const int &  NID,
const int &  CID 
)
inline

Definition at line 142 of file agmdirected.h.

References F, and SumFV.

Referenced by DelCom().

142  {
143  if (F[NID].IsKey(CID)) {
144  SumFV[CID] -= F[NID].GetDat(CID);
145  F[NID].DelKey(CID);
146  }
147  }
TFltV SumFV
Definition: agmdirected.h:15
TVec< TIntFltH > F
Definition: agmdirected.h:10

Here is the caller graph for this function:

double TCoda::DotProduct ( const TIntFltH UV,
const TIntFltH VV 
)
inline

Definition at line 154 of file agmdirected.h.

References THash< TKey, TDat, THashFunc >::BegI(), THash< TKey, TDat, THashFunc >::EndI(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::IsKey(), and THash< TKey, TDat, THashFunc >::Len().

Referenced by DotProductUtoV(), GetStepSizeByLineSearch(), LikelihoodForNode(), and Prediction().

154  {
155  double DP = 0;
156  if (UV.Len() > VV.Len()) {
157  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
158  if (VV.IsKey(HI.GetKey())) {
159  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
160  }
161  }
162  } else {
163  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
164  if (UV.IsKey(HI.GetKey())) {
165  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
166  }
167  }
168  }
169  return DP;
170  }
TIter BegI() const
Definition: hash.h:171
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
TIter EndI() const
Definition: hash.h:176
bool IsKey(const TKey &Key) const
Definition: hash.h:216
int Len() const
Definition: hash.h:186

Here is the call graph for this function:

Here is the caller graph for this function:

double TCoda::DotProductUtoV ( const int &  UID,
const int &  VID 
)
inline

Definition at line 171 of file agmdirected.h.

References DotProduct(), F, and H.

171  {
172  return DotProduct(F[UID], H[VID]);
173  }
TVec< TIntFltH > H
Definition: agmdirected.h:11
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
TVec< TIntFltH > F
Definition: agmdirected.h:10

Here is the call graph for this function:

void TCoda::DumpMemberships ( const TStr OutFNm,
const TStrHash< TInt > &  NodeNameH 
)
inline

Definition at line 45 of file agmdirected.h.

References DumpMemberships(), and PNoCom.

Referenced by DumpMemberships().

45 { DumpMemberships(OutFNm, NodeNameH, sqrt(PNoCom)); }
TFlt PNoCom
Definition: agmdirected.h:24
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::DumpMemberships ( const TStr OutFNm,
const TStrHash< TInt > &  NodeNameH,
const double  Thres 
)

Definition at line 617 of file agmdirected.cpp.

References TStr::CStr(), G, THash< TKey, TDat, THashFunc >::GetKey(), TStrHash< TDat, TStringPool, THashFunc >::GetKey(), GetNIDValH(), TNGraph::GetNodes(), GetSumVal(), IAssert, THash< TKey, TDat, THashFunc >::Len(), TStrHash< TDat, TStringPool, THashFunc >::Len(), NumComs, and TFlt::Val.

617  {
618  if (NodeNameH.Len() > 0) { IAssert(NodeNameH.Len() == G->GetNodes()); }
619  FILE* FId = fopen(OutFNm.CStr(), "wt");
620  TIntFltH CIDSumFH(NumComs);
621  for (int c = 0; c < NumComs; c++) {
622  CIDSumFH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
623  }
624  CIDSumFH.SortByDat(false);
625  for (int c = 0; c < NumComs; c++) {
626  int CID = CIDSumFH.GetKey(c);
627  TIntFltH NIDOutFH, NIDInFH, NIDInOutFH;
628  GetNIDValH(NIDInOutFH, NIDOutFH, NIDInFH, CID, Thres);
629  if (NIDOutFH.Len() == 0 || NIDInFH.Len() == 0) { continue; }
630 
631  /*
632  if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { continue; }
633  for (int u = 0; u < NIDV.Len(); u++) {
634  if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
635  NIDInOutFH.AddDat(u, GetCom(true, u, CID) + GetCom(false, u, CID));
636  } else if (GetCom(true, u, CID) >= Thres) {
637  NIDOutFH.AddDat(u, GetCom(true, u, CID));
638  } else if (GetCom(false, u, CID) >= Thres) {
639  NIDInFH.AddDat(u, GetCom(false, u, CID));
640  }
641  }
642  NIDInOutFH.SortByDat(false);
643  NIDInFH.SortByDat(false);
644  NIDOutFH.SortByDat(false);
645  */
646  fprintf(FId, "%d\t%d\t%d\t%f\t%f\t%f\t", NIDInOutFH.Len(), NIDInFH.Len() - NIDInOutFH.Len(), NIDOutFH.Len() - NIDInOutFH.Len(), CIDSumFH.GetDat(CID).Val, GetSumVal(false, CID).Val, GetSumVal(true, CID).Val);
647  fprintf(FId, "InOut:\t");
648  for (int u = 0; u < NIDInOutFH.Len(); u++) {
649  int NIdx = NIDInOutFH.GetKey(u);
650  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInOutFH[u].Val);
651  }
652  fprintf(FId, "In:\t");
653  for (int u = 0; u < NIDInFH.Len(); u++) {
654  int NIdx = NIDInFH.GetKey(u);
655  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInFH[u].Val);
656  }
657  fprintf(FId, "Out:\t");
658  for (int u = 0; u < NIDOutFH.Len(); u++) {
659  int NIdx = NIDOutFH.GetKey(u);
660  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDOutFH[u].Val);
661  }
662  fprintf(FId, "\n");
663  }
664  fclose(FId);
665 }
#define IAssert(Cond)
Definition: bd.h:262
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
double Val
Definition: dt.h:1295
int Len() const
Definition: hash.h:770
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
const char * GetKey(const int &KeyId) const
Definition: hash.h:821
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
char * CStr()
Definition: dt.h:476
int Len() const
Definition: hash.h:186
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210

Here is the call graph for this function:

void TCoda::DumpMemberships ( const TStr OutFNm,
const double  Thres 
)

Definition at line 668 of file agmdirected.cpp.

References TStrHash< TDat, TStringPool, THashFunc >::AddKey(), DumpMemberships(), TStr::Fmt(), G, TNGraph::GetNodes(), TVec< TVal, TSizeTy >::Len(), and NIDV.

668  {
669  TStrHash<TInt> NodeNameH(G->GetNodes(), false);
670  for (int u = 0; u < NIDV.Len(); u++) { NodeNameH.AddKey(TStr::Fmt("%d", NIDV[u].Val)); }
671  DumpMemberships(OutFNm, NodeNameH, Thres);
672 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TIntV NIDV
Definition: agmdirected.h:13
PNGraph G
Definition: agmdirected.h:9
int AddKey(const char *Key)
Definition: hash.h:896
Definition: hash.h:729
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45

Here is the call graph for this function:

void TCoda::DumpMemberships ( const TStr OutFNm)
inline

Definition at line 49 of file agmdirected.h.

References DumpMemberships(), and PNoCom.

Referenced by DumpMemberships().

49 { DumpMemberships(OutFNm, sqrt(PNoCom)); }
TFlt PNoCom
Definition: agmdirected.h:24
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45

Here is the call graph for this function:

Here is the caller graph for this function:

int TCoda::FindComsByCV ( TIntV ComsV,
const double  HOFrac = 0.2,
const int  NumThreads = 20,
const TStr  PlotLFNm = TStr(),
const int  EdgesForCV = 100,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)

Definition at line 746 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), TStr::Empty(), G, TAGMFastUtil::GenHoldOutPairs(), TNGraph::GetNodes(), HOVIDSV, TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), Likelihood(), LikelihoodHoldOut(), MLEGradAscent(), MLEGradAscentParallel(), TFlt::Mn, NeighborComInit(), TGnuPlot::PlotValV(), RandomInit(), and Rnd.

Referenced by FindComsByCV().

746  {
747  if (ComsV.Len() == 0) {
748  int MaxComs = G->GetNodes() / 5;
749  ComsV.Add(2);
750  while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
751  }
752  int MaxIterCV = 3;
753 
754  TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV);
755  TFltIntPrV NIdPhiV;
756  TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
757 
758  if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
759  printf("generating hold out set\n");
760  TIntV NIdV1, NIdV2;
761  G->GetNIdV(NIdV1);
762  G->GetNIdV(NIdV2);
763  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
764  // generate holdout sets
765  TAGMFastUtil::GenHoldOutPairs(G, HoldOutSets[IterCV], HOFrac, Rnd);
766  /*
767  HoldOutSets[IterCV].Gen(G->GetNodes());
768  const int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
769  int HOCnt = 0;
770  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
771  printf("holding out %d edges...\n", HOEdges);
772  for (int he = 0; he < (int) HOEdges; he++) {
773  HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
774  HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1);
775  HOCnt++;
776  }
777  printf("%d Edges hold out\n", HOCnt);
778  while(HOCnt++ < HOTotal) {
779  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
780  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
781  HoldOutSets[IterCV][SrcNID].AddKey(DstNID);
782  HoldOutSets[IterCV][DstNID].AddKey(SrcNID);
783  }
784  */
785  }
786 
787  printf("hold out set generated\n");
788  }
789 
790  TFltV HOLV(ComsV.Len());
791  TIntFltPrV ComsLV;
792  for (int c = 0; c < ComsV.Len(); c++) {
793  const int Coms = ComsV[c];
794  printf("Try number of Coms:%d\n", Coms);
795 
796  if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
797  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
798  HOVIDSV = HoldOutSets[IterCV];
799  NeighborComInit(NIdPhiV, Coms);
800  printf("Initialized\n");
801 
802  if (NumThreads == 1) {
803  printf("MLE without parallelization begins\n");
804  MLEGradAscent(0.05, 10 * G->GetNodes(), "", StepAlpha, StepBeta);
805  } else {
806  printf("MLE with parallelization begins\n");
807  MLEGradAscentParallel(0.05, 100, NumThreads, "", StepAlpha, StepBeta);
808  }
809  double HOL = LikelihoodHoldOut();
810  HOL = HOL < 0? HOL: TFlt::Mn;
811  HOLV[c] += HOL;
812  }
813  }
814  else {
815  HOVIDSV.Gen(G->GetNodes());
816  MLEGradAscent(0.0001, 100 * G->GetNodes(), "");
817  double BIC = 2 * Likelihood() - (double) G->GetNodes() * Coms * 2.0 * log ( (double) G->GetNodes());
818  HOLV[c] = BIC;
819  }
820  }
821  int EstComs = 2;
822  double MaxL = TFlt::Mn;
823  printf("\n");
824  for (int c = 0; c < ComsV.Len(); c++) {
825  ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
826  printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
827  if (MaxL < HOLV[c]) {
828  MaxL = HOLV[c];
829  EstComs = ComsV[c];
830  }
831  }
832  printf("\n");
833  RandomInit(EstComs);
834  HOVIDSV.Gen(G->GetNodes());
835  if (! PlotLFNm.Empty()) {
836  TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
837  }
838  return EstComs;
839 }
double Likelihood(const bool DoParallel=false)
void NeighborComInit(const int InitComs)
Definition: agmdirected.cpp:83
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
double LikelihoodHoldOut(const bool DoParallel=false)
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
PNGraph G
Definition: agmdirected.h:9
void RandomInit(const int InitComs)
Definition: agmdirected.cpp:43
bool Empty() const
Definition: dt.h:488
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
TRnd Rnd
Definition: agmdirected.h:12
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
static const double Mn
Definition: dt.h:1297
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429

Here is the call graph for this function:

Here is the caller graph for this function:

int TCoda::FindComsByCV ( const int  NumThreads,
const int  MaxComs,
const int  MinComs,
const int  DivComs,
const TStr  OutFNm,
const int  EdgesForCV = 100,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.3 
)

estimate number of communities using cross validation

Definition at line 733 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), FindComsByCV(), TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), TMath::Log(), and TInt::Val.

733  {
734  double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
735  TIntV ComsV;
736  ComsV.Add(MinComs);
737  while (ComsV.Len() < DivComs) {
738  int NewComs = int(ComsV.Last() * ComsGap);
739  if (NewComs == ComsV.Last().Val) { NewComs++; }
740  ComsV.Add(NewComs);
741  }
742  if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
743  return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm, EdgesForCV, StepAlpha, StepBeta);
744 }
int Val
Definition: dt.h:1046
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
static double Log(const double &Val)
Definition: xmath.h:14
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574

Here is the call graph for this function:

void TCoda::GetCmtyS ( TIntSet CmtySOut,
TIntSet CmtySIn,
const int  CID,
const double  Thres 
)

Definition at line 674 of file agmdirected.cpp.

References G, THashSet< TKey, THashFunc >::Gen(), GetCom(), TNGraph::GetNodes(), TVec< TVal, TSizeTy >::Len(), and NIDV.

674  {
675  CmtySOut.Gen(G->GetNodes() / 10);
676  CmtySIn.Gen(G->GetNodes() / 10);
677  for (int u = 0; u < NIDV.Len(); u++) {
678  if (GetCom(true, u, CID) > Thres) {
679  //CmtySOut.AddKey(
680  }
681  }
682 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void Gen(const int &ExpectVals)
Definition: shash.h:1115
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TIntV NIDV
Definition: agmdirected.h:13
PNGraph G
Definition: agmdirected.h:9
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93

Here is the call graph for this function:

void TCoda::GetCmtyVV ( TVec< TIntV > &  CmtyVVOut,
TVec< TIntV > &  CmtyVVIn,
const int  MinSz = 3 
)

Definition at line 717 of file agmdirected.cpp.

References G, and TNGraph::GetNodes().

Referenced by GetCmtyVV().

717  {
718  GetCmtyVV(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()), MinSz);
719  GetCmtyVV(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()), MinSz);
720 }
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
PNGraph G
Definition: agmdirected.h:9

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::GetCmtyVV ( TVec< TIntV > &  CmtyVVOut,
TVec< TIntV > &  CmtyVVIn,
const double  ThresOut,
const double  ThresIn,
const int  MinSz = 3 
)

Definition at line 727 of file agmdirected.cpp.

References GetCmtyVV().

727  {
728  GetCmtyVV(false, CmtyVVIn, ThresIn, MinSz);
729  GetCmtyVV(true, CmtyVVOut, ThresOut, MinSz);
730 }
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)

Here is the call graph for this function:

void TCoda::GetCmtyVV ( const bool  IsOut,
TVec< TIntV > &  CmtyVV 
)

Definition at line 501 of file agmdirected.cpp.

References G, GetCmtyVV(), and TNGraph::GetNodes().

501  {
502  GetCmtyVV(IsOut, CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
503 }
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
PNGraph G
Definition: agmdirected.h:9

Here is the call graph for this function:

void TCoda::GetCmtyVV ( const bool  IsOut,
TVec< TIntV > &  CmtyVV,
const double  Thres,
const int  MinSz = 3 
)

extract community affiliation for outgoing edges from F_uc

Definition at line 539 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), TVec< TVal, TSizeTy >::Gen(), THash< TKey, TDat, THashFunc >::GetKey(), THash< TKey, TDat, THashFunc >::GetKeyV(), GetNIDValH(), GetSumVal(), TVec< TVal, TSizeTy >::Len(), NumComs, and THash< TKey, TDat, THashFunc >::SortByDat().

539  {
540  CmtyVV.Gen(NumComs, 0);
541  TIntFltH CIDSumFH(NumComs);
542  for (int c = 0; c < NumComs; c++) {
543  CIDSumFH.AddDat(c, GetSumVal(IsOut, c));
544  }
545  CIDSumFH.SortByDat(false);
546  for (int c = 0; c < NumComs; c++) {
547  int CID = CIDSumFH.GetKey(c);
548  TIntFltH NIDFucH, NIDHucH, NIDInOutH;
549  TIntV CmtyV;
550  GetNIDValH(NIDInOutH, NIDFucH, NIDHucH, CID, Thres);
551  if (IsOut) {
552  NIDFucH.GetKeyV(CmtyV);
553  } else {
554  NIDHucH.GetKeyV(CmtyV);
555  }
556  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
557  }
558  if ( NumComs != CmtyVV.Len()) {
559  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
560  }
561 }
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
TInt NumComs
Definition: agmdirected.h:18
void GetKeyV(TVec< TKey > &KeyV) const
Definition: hash.h:442
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574

Here is the call graph for this function:

void TCoda::GetCmtyVVUnSorted ( const bool  IsOut,
TVec< TIntV > &  CmtyVV,
const double  Thres,
const int  MinSz = 3 
)

Definition at line 563 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), G, TVec< TVal, TSizeTy >::Gen(), GetCom(), TNGraph::GetNodes(), GetSumVal(), TVec< TVal, TSizeTy >::Len(), NIDV, and NumComs.

Referenced by GetCmtyVVUnSorted().

563  {
564  CmtyVV.Gen(NumComs, 0);
565  for (int c = 0; c < NumComs; c++) {
566  TIntV CmtyV((int) (GetSumVal(IsOut, c) * 10), 0);
567  for (int u = 0; u < G->GetNodes(); u++) {
568  if (GetCom(IsOut, u, c) > Thres) { CmtyV.Add(NIDV[u]); }
569  }
570  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
571  }
572  if ( NumComs != CmtyVV.Len()) {
573  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
574  }
575 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
TIntV NIDV
Definition: agmdirected.h:13
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::GetCmtyVVUnSorted ( TVec< TIntV > &  CmtyVVOut,
TVec< TIntV > &  CmtyVVIn 
)

Definition at line 722 of file agmdirected.cpp.

References G, GetCmtyVVUnSorted(), and TNGraph::GetNodes().

722  {
723  GetCmtyVVUnSorted(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()));
724  GetCmtyVVUnSorted(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()));
725 }
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
PNGraph G
Definition: agmdirected.h:9

Here is the call graph for this function:

double TCoda::GetCom ( const bool  IsOut,
const int &  NID,
const int &  CID 
)
inline

Definition at line 93 of file agmdirected.h.

References GetComIn(), and GetComOut().

Referenced by GetCmtyS(), GetCmtyVVUnSorted(), GetCommunity(), GetNIDValH(), GetStepSizeByLineSearch(), GradientForNode(), LikelihoodForNode(), and MLEGradAscent().

93  {
94  if (IsOut) {
95  return GetComOut(NID, CID);
96  } else {
97  return GetComIn(NID, CID);
98  }
99  }
double GetComOut(const int &NID, const int &CID)
Definition: agmdirected.h:100
double GetComIn(const int &NID, const int &CID)
Definition: agmdirected.h:107

Here is the call graph for this function:

Here is the caller graph for this function:

double TCoda::GetComIn ( const int &  NID,
const int &  CID 
)
inline

Definition at line 107 of file agmdirected.h.

References H.

Referenced by GetCom().

107  {
108  if (H[NID].IsKey(CID)) {
109  return H[NID].GetDat(CID);
110  } else {
111  return 0.0;
112  }
113  }
TVec< TIntFltH > H
Definition: agmdirected.h:11

Here is the caller graph for this function:

void TCoda::GetCommunity ( TIntV CmtyVIn,
TIntV CmtyVOut,
const int  CID 
)
inline

Definition at line 50 of file agmdirected.h.

References GetCommunity(), and PNoCom.

Referenced by GetCommunity(), and GetTopCIDs().

50 { GetCommunity(CmtyVIn, CmtyVOut, CID, sqrt(PNoCom)); }
TFlt PNoCom
Definition: agmdirected.h:24
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
Definition: agmdirected.h:50

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::GetCommunity ( TIntV CmtyVIn,
TIntV CmtyVOut,
const int  CID,
const double  Thres 
)

extract community affiliation for given community ID

Definition at line 506 of file agmdirected.cpp.

References F, GetCom(), TVec< TVal, TSizeTy >::Len(), NIDV, and NodesOk.

506  {
507  TIntFltH NIDFucH(F.Len() / 10), NIDHucH(F.Len() / 10);
508  for (int u = 0; u < NIDV.Len(); u++) {
509  int NID = u;
510  if (! NodesOk) { NID = NIDV[u]; }
511  if (GetCom(true, u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(true, u, CID)); }
512  if (GetCom(false, u, CID) >= Thres) { NIDHucH.AddDat(NID, GetCom(false, u, CID)); }
513  }
514  NIDFucH.SortByDat(false);
515  NIDHucH.SortByDat(false);
516  NIDFucH.GetKeyV(CmtyVOut);
517  NIDHucH.GetKeyV(CmtyVIn);
518 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TIntV NIDV
Definition: agmdirected.h:13
TVec< TIntFltH > F
Definition: agmdirected.h:10
TBool NodesOk
Definition: agmdirected.h:17
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93

Here is the call graph for this function:

double TCoda::GetComOut ( const int &  NID,
const int &  CID 
)
inline

Definition at line 100 of file agmdirected.h.

References F.

Referenced by GetCom().

100  {
101  if (F[NID].IsKey(CID)) {
102  return F[NID].GetDat(CID);
103  } else {
104  return 0.0;
105  }
106  }
TVec< TIntFltH > F
Definition: agmdirected.h:10

Here is the caller graph for this function:

PNGraph TCoda::GetGraph ( )
inline

Definition at line 31 of file agmdirected.h.

References G.

31 { return G; }
PNGraph G
Definition: agmdirected.h:9
PNGraph TCoda::GetGraphRawNID ( )

Definition at line 577 of file agmdirected.cpp.

References TNGraph::AddEdge(), TNGraph::AddNode(), TNGraph::BegNI(), TNGraph::EndNI(), G, TNGraph::GetEdges(), TNGraph::GetNodes(), IAssert, TNGraph::IsNode(), TNGraph::New(), and NIDV.

Referenced by TCodaAnalyzer::TCodaAnalyzer().

577  {
578  PNGraph NewG = TNGraph::New(G->GetNodes(), -1);
579  for (TNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
580  //add node
581  int NIdx = NI.GetId();
582  int NID = NIDV[NIdx];
583  if (! NewG->IsNode(NID)) { NewG->AddNode(NID); }
584  //add edge
585  for (int e = 0; e < NI.GetOutDeg(); e++) {
586  int OutNID = NIDV[NI.GetOutNId(e)];
587  if (! NewG->IsNode(OutNID)) { NewG->AddNode(OutNID); }
588  NewG->AddEdge(NID, OutNID);
589  }
590  }
591  IAssert(G->GetNodes() == NewG->GetNodes());
592  IAssert(G->GetEdges() == NewG->GetEdges());
593  return NewG;
594 }
#define IAssert(Cond)
Definition: bd.h:262
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:479
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:425
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
TIntV NIDV
Definition: agmdirected.h:13
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node IDs SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:477
PNGraph G
Definition: agmdirected.h:9
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:481
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::GetNIDValH ( TIntFltH NIdValInOutH,
TIntFltH NIdValOutH,
TIntFltH NIdValInH,
const int  CID,
const double  Thres 
)

Definition at line 596 of file agmdirected.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), THash< TKey, TDat, THashFunc >::Gen(), GetCom(), GetSumVal(), TVec< TVal, TSizeTy >::Len(), NIDV, and THash< TKey, TDat, THashFunc >::SortByDat().

Referenced by DumpMemberships(), GetCmtyVV(), and TCodaAnalyzer::TCodaAnalyzer().

596  {
597  NIdValOutH.Gen((int) GetSumVal(true, CID) + 1);
598  NIdValInH.Gen((int) GetSumVal(false, CID) + 1);
599  NIdValInOutH.Gen((int) GetSumVal(false, CID) + 1);
600  if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { return; }
601  for (int u = 0; u < NIDV.Len(); u++) {
602  if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
603  NIdValInOutH.AddDat(NIDV[u], GetCom(true, u, CID) + GetCom(false, u, CID));
604  }
605  if (GetCom(true, u, CID) >= Thres) {
606  NIdValOutH.AddDat(NIDV[u], GetCom(true, u, CID));
607  }
608  if (GetCom(false, u, CID) >= Thres) {
609  NIdValInH.AddDat(NIDV[u], GetCom(false, u, CID));
610  }
611  }
612  NIdValInH.SortByDat(false);
613  NIdValOutH.SortByDat(false);
614  NIdValInOutH.SortByDat(false);
615 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
TIntV NIDV
Definition: agmdirected.h:13
void Gen(const int &ExpectVals)
Definition: hash.h:180
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
void SortByDat(const bool &Asc=true)
Definition: hash.h:250

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::GetNonEdgePairScores ( TFltIntIntTrV ScoreV)

Definition at line 153 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), G, TVec< TVal, TSizeTy >::Gen(), TNGraph::GetNIdV(), TNGraph::GetNodes(), TNGraph::IsEdge(), TVec< TVal, TSizeTy >::Len(), NIDV, and Prediction().

153  {
154  ScoreV.Gen(G->GetNodes() * G->GetNodes(), 0);
155  TIntV NIDV;
156  G->GetNIdV(NIDV);
157  TIntSet Cuv;
158  for (int u = 0; u < NIDV.Len(); u++) {
159  int UID = NIDV[u];
160  for (int v = 0; v < NIDV.Len(); v++) {
161  int VID = NIDV[v];
162  if (UID == VID) { continue; }
163  if (! G->IsEdge(UID, VID)) {
164  double Val = 1.0 - Prediction(UID, VID);
165  ScoreV.Add(TFltIntIntTr(Val, UID, VID));
166  }
167  }
168  }
169 }
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TIntV NIDV
Definition: agmdirected.h:13
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
PNGraph G
Definition: agmdirected.h:9
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:181

Here is the call graph for this function:

int TCoda::GetNumComs ( )
inline

Definition at line 36 of file agmdirected.h.

References NumComs, and TInt::Val.

Referenced by GetTopCIDs(), and TCodaAnalyzer::TCodaAnalyzer().

36 { return NumComs.Val; }
int Val
Definition: dt.h:1046
TInt NumComs
Definition: agmdirected.h:18

Here is the caller graph for this function:

double TCoda::GetRegCoef ( )
inline

Definition at line 34 of file agmdirected.h.

References RegCoef.

34 { return RegCoef; }
TFlt RegCoef
Definition: agmdirected.h:14
double TCoda::GetStepSizeByLineSearch ( const bool  IsRow,
const int  UID,
const TIntFltH DeltaV,
const TIntFltH GradV,
const double &  Alpha,
const double &  Beta,
const int  MaxIter = 10 
)

Definition at line 859 of file agmdirected.cpp.

References DotProduct(), GetCom(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::GetKey(), THash< TKey, TDat, THashFunc >::Len(), LikelihoodForNode(), MaxVal, and MinVal.

Referenced by MLEGradAscent(), and MLEGradAscentParallel().

859  {
860  double StepSize = 1.0;
861  double InitLikelihood = LikelihoodForNode(IsRow, UID);
862  TIntFltH NewVarV(DeltaV.Len());
863  for(int iter = 0; iter < MaxIter; iter++) {
864  for (int i = 0; i < DeltaV.Len(); i++){
865  int CID = DeltaV.GetKey(i);
866  double NewVal;
867  NewVal = GetCom(IsRow, UID, CID) + StepSize * DeltaV.GetDat(CID);
868  if (NewVal < MinVal) { NewVal = MinVal; }
869  if (NewVal > MaxVal) { NewVal = MaxVal; }
870  NewVarV.AddDat(CID, NewVal);
871  }
872  if (LikelihoodForNode(IsRow, UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
873  StepSize *= Beta;
874  } else {
875  break;
876  }
877  if (iter == MaxIter - 1) {
878  StepSize = 0.0;
879  break;
880  }
881  }
882  return StepSize;
883 }
double LikelihoodForNode(const bool IsRow, const int UID)
TFlt MinVal
Definition: agmdirected.h:21
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
TFlt MaxVal
Definition: agmdirected.h:22
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int Len() const
Definition: hash.h:186
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210

Here is the call graph for this function:

Here is the caller graph for this function:

TFlt& TCoda::GetSumVal ( const bool  IsOut,
const int  CID 
)
inline

Definition at line 86 of file agmdirected.h.

References SumFV, and SumHV.

Referenced by DumpMemberships(), GetCmtyVV(), GetCmtyVVUnSorted(), GetNIDValH(), GetTopCIDs(), GradientForNode(), and LikelihoodForNode().

86  {
87  if (IsOut) {
88  return SumFV[CID];
89  } else {
90  return SumHV[CID];
91  }
92  }
TFltV SumFV
Definition: agmdirected.h:15
TFltV SumHV
Definition: agmdirected.h:16

Here is the caller graph for this function:

void TCoda::GetTopCIDs ( TIntV CIdV,
const int  TopK,
const int  IsAverage = 1,
const int  MinSz = 1 
)

Definition at line 520 of file agmdirected.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), GetCommunity(), THash< TKey, TDat, THashFunc >::GetKeyV(), GetNumComs(), GetSumVal(), THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), THash< TKey, TDat, THashFunc >::SortByDat(), and TVec< TVal, TSizeTy >::Trunc().

Referenced by TCodaAnalyzer::TCodaAnalyzer().

520  {
521  TIntFltH CIdFHH;
522  for (int c = 0; c < GetNumComs(); c++) {
523  if (IsAverage == 1) {
524  TIntV CmtyVIn, CmtyVOut;
525  GetCommunity(CmtyVIn, CmtyVOut, c);
526  if (CmtyVIn.Len() == 0 || CmtyVOut.Len() == 0) { continue; }
527  if (CmtyVIn.Len() < MinSz || CmtyVOut.Len() < MinSz) { continue; }
528  CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c) / (double) CmtyVIn.Len() / (double) CmtyVOut.Len());
529  } else {
530  CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
531  }
532  }
533  CIdFHH.SortByDat(false);
534  CIdFHH.GetKeyV(CIdV);
535  if (TopK < CIdFHH.Len()) { CIdV.Trunc(TopK); }
536 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
int GetNumComs()
Definition: agmdirected.h:36
void GetKeyV(TVec< TKey > &KeyV) const
Definition: hash.h:442
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
Definition: agmdirected.h:50
void Trunc(const TSizeTy &_Vals=-1)
Truncates the vector's length and capacity to _Vals elements.
Definition: ds.h:982
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
void SortByDat(const bool &Asc=true)
Definition: hash.h:250

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::GradientForNode ( const bool  IsRow,
const int  UID,
TIntFltH GradU,
const TIntSet CIDSet 
)

Definition at line 377 of file agmdirected.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), G, THash< TKey, TDat, THashFunc >::Gen(), TVec< TVal, TSizeTy >::Gen(), GetCom(), TNGraph::TNodeI::GetInDeg(), TNGraph::TNodeI::GetInNId(), THashSet< TKey, THashFunc >::GetKey(), TNGraph::GetNI(), TNGraph::TNodeI::GetOutDeg(), TNGraph::TNodeI::GetOutNId(), GetSumVal(), HOVIDSV, IAssert, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), THashSet< TKey, THashFunc >::Len(), NegWgt, NumComs, Prediction(), RegCoef, and SumHV.

Referenced by MLEGradAscent(), and MLEGradAscentParallel().

377  {
378  GradU.Gen(CIDSet.Len());
379 
380  TFltV HOSumHV; //adjust for Hv of v hold out
381  if (HOVIDSV[UID].Len() > 0) {
382  HOSumHV.Gen(NumComs);
383  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
384  for (int c = 0; c < SumHV.Len(); c++) {
385  HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
386  }
387  }
388  }
389 
390  TNGraph::TNodeI NI = G->GetNI(UID);
391  int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
392  TFltV PredV(Deg), GradV(CIDSet.Len());
393  TIntV CIDV(CIDSet.Len());
394  for (int e = 0; e < Deg; e++) {
395  int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
396  if (VID == UID) { continue; }
397  if (HOVIDSV[UID].IsKey(VID)) { continue; }
398  PredV[e] = IsRow? Prediction(UID, VID): Prediction(VID, UID);
399  }
400 
401  for (int c = 0; c < CIDSet.Len(); c++) {
402  int CID = CIDSet.GetKey(c);
403  double Val = 0.0;
404  for (int e = 0; e < Deg; e++) {
405  int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
406  if (VID == UID) { continue; }
407  if (HOVIDSV[UID].IsKey(VID)) { continue; }
408  Val += PredV[e] * GetCom(! IsRow, VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(! IsRow, VID, CID);
409  }
410  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
411  Val -= NegWgt * (GetSumVal(! IsRow, CID) - HOSum - GetCom(! IsRow, UID, CID));
412  CIDV[c] = CID;
413  GradV[c] = Val;
414  }
415  //add regularization
416  if (RegCoef > 0.0) { //L1
417  for (int c = 0; c < GradV.Len(); c++) {
418  GradV[c] -= RegCoef;
419  }
420  }
421  if (RegCoef < 0.0) { //L2
422  for (int c = 0; c < GradV.Len(); c++) {
423  GradV[c] += 2 * RegCoef * GetCom(IsRow, UID, CIDV[c]);
424  }
425  }
426  for (int c = 0; c < GradV.Len(); c++) {
427  if (GetCom(IsRow, UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
428  if (fabs(GradV[c]) < 0.0001) { continue; }
429  GradU.AddDat(CIDV[c], GradV[c]);
430  }
431  for (int c = 0; c < GradU.Len(); c++) {
432  if (GradU[c] >= 10) { GradU[c] = 10; }
433  if (GradU[c] <= -10) { GradU[c] = -10; }
434  IAssert(GradU[c] >= -10);
435  }
436 }
#define IAssert(Cond)
Definition: bd.h:262
TFlt NegWgt
Definition: agmdirected.h:23
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
TFlt RegCoef
Definition: agmdirected.h:14
const TKey & GetKey(const int &KeyId) const
Definition: shash.h:1141
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
void Gen(const int &ExpectVals)
Definition: hash.h:180
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
int Len() const
Definition: shash.h:1121
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372

Here is the call graph for this function:

Here is the caller graph for this function:

double TCoda::Likelihood ( const bool  DoParallel = false)

Definition at line 231 of file agmdirected.cpp.

References F, and LikelihoodForNode().

Referenced by FindComsByCV(), MLEGradAscent(), and MLEGradAscentParallel().

231  {
232  TExeTm ExeTm;
233  double L = 0.0;
234  if (_DoParallel) {
235  #pragma omp parallel for
236  for (int u = 0; u < F.Len(); u++) {
237  double LU = LikelihoodForNode(true, u);
238  #pragma omp atomic
239  L += LU;
240  }
241  }
242  else {
243  for (int u = 0; u < F.Len(); u++) {
244  double LU = LikelihoodForNode(true, u);
245  L += LU;
246  }
247  }
248  return L;
249 }
double LikelihoodForNode(const bool IsRow, const int UID)
Definition: tm.h:355
TVec< TIntFltH > F
Definition: agmdirected.h:10

Here is the call graph for this function:

Here is the caller graph for this function:

double TCoda::LikelihoodForNode ( const bool  IsRow,
const int  UID 
)

Definition at line 251 of file agmdirected.cpp.

References F, and H.

Referenced by GetStepSizeByLineSearch(), and Likelihood().

251  {
252  if (IsRow) {
253  return LikelihoodForNode(IsRow, UID, F[UID]);
254  } else {
255  return LikelihoodForNode(IsRow, UID, H[UID]);
256  }
257 }
double LikelihoodForNode(const bool IsRow, const int UID)
TVec< TIntFltH > H
Definition: agmdirected.h:11
TVec< TIntFltH > F
Definition: agmdirected.h:10

Here is the caller graph for this function:

double TCoda::LikelihoodForNode ( const bool  IsRow,
const int  UID,
const TIntFltH FU 
)

Definition at line 259 of file agmdirected.cpp.

References THash< TKey, TDat, THashFunc >::BegI(), DotProduct(), THash< TKey, TDat, THashFunc >::EndI(), F, G, TVec< TVal, TSizeTy >::Gen(), GetCom(), TNGraph::TNodeI::GetInDeg(), TNGraph::TNodeI::GetInNId(), TNGraph::GetNI(), TNGraph::TNodeI::GetOutDeg(), TNGraph::TNodeI::GetOutNId(), GetSumVal(), H, HOVIDSV, TVec< TVal, TSizeTy >::Len(), NegWgt, Norm2(), NumComs, Prediction(), RegCoef, Sum(), and SumHV.

259  {
260  double L = 0.0;
261  TFltV HOSumHV; //adjust for Hv of v hold out
262  if (HOVIDSV[UID].Len() > 0) {
263  HOSumHV.Gen(NumComs);
264  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
265  for (int c = 0; c < SumHV.Len(); c++) {
266  HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
267  }
268  }
269  }
270  TNGraph::TNodeI NI = G->GetNI(UID);
271  const int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
272  for (int e = 0; e < Deg; e++) {
273  const int v = IsRow ? NI.GetOutNId(e): NI.GetInNId(e);
274  if (v == UID) { continue; }
275  if (HOVIDSV[UID].IsKey(v)) { continue; }
276  if (IsRow) {
277  L += log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
278  } else {
279  L += log (1.0 - Prediction(F[v], FU)) + NegWgt * DotProduct(F[v], FU);
280  }
281  }
282  for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
283  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
284  L -= NegWgt * (GetSumVal(! IsRow, HI.GetKey()) - HOSum - GetCom(! IsRow, UID, HI.GetKey())) * HI.GetDat();
285  }
286  //add regularization
287  if (RegCoef > 0.0) { //L1
288  L -= RegCoef * Sum(FU);
289  }
290  if (RegCoef < 0.0) { //L2
291  L += RegCoef * Norm2(FU);
292  }
293  return L;
294 }
TFlt NegWgt
Definition: agmdirected.h:23
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:171
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
TIter EndI() const
Definition: hash.h:176
TFlt RegCoef
Definition: agmdirected.h:14
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
TVec< TIntFltH > F
Definition: agmdirected.h:10
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
double Sum(const TIntFltH &UV)
Definition: agmdirected.h:182
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372

Here is the call graph for this function:

double TCoda::LikelihoodHoldOut ( const bool  DoParallel = false)

Definition at line 841 of file agmdirected.cpp.

References G, HOVIDSV, TNGraph::IsEdge(), NegWgt, and Prediction().

Referenced by FindComsByCV().

841  {
842  double L = 0.0;
843  for (int u = 0; u < HOVIDSV.Len(); u++) {
844  for (int e = 0; e < HOVIDSV[u].Len(); e++) {
845  int VID = HOVIDSV[u][e];
846  if (VID == u) { continue; }
847  double Pred = Prediction(u, VID);
848  if (G->IsEdge(u, VID)) {
849  L += log(1.0 - Pred);
850  }
851  else {
852  L += NegWgt * log(Pred);
853  }
854  }
855  }
856  return L;
857 }
TFlt NegWgt
Definition: agmdirected.h:23
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
PNGraph G
Definition: agmdirected.h:9

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::Load ( TSIn SIn,
const int &  RndSeed = 0 
)

Definition at line 25 of file agmdirected.cpp.

References F, G, H, HOVIDSV, TNGraph::Load(), TVec< TVal, TSizeTy >::Load(), TBool::Load(), TInt::Load(), TFlt::Load(), MaxVal, MinVal, NegWgt, NIDV, NodesOk, NumComs, PNoCom, TRnd::PutSeed(), RegCoef, Rnd, SumFV, and SumHV.

25  {
26  G->Load(SIn);
27  F.Load(SIn);
28  H.Load(SIn);
29  NIDV.Load(SIn);
30  RegCoef.Load(SIn);
31  SumFV.Load(SIn);
32  SumHV.Load(SIn);
33  NodesOk.Load(SIn);
34  MinVal.Load(SIn);
35  MaxVal.Load(SIn);
36  NegWgt.Load(SIn);
37  NumComs.Load(SIn);
38  HOVIDSV.Load(SIn);
39  PNoCom.Load(SIn);
40  Rnd.PutSeed(RndSeed);
41 }
TFlt NegWgt
Definition: agmdirected.h:23
TFlt MinVal
Definition: agmdirected.h:21
TVec< TIntFltH > H
Definition: agmdirected.h:11
void Load(TSIn &SIn)
Definition: dt.h:901
void Load(TSIn &SIn)
Definition: ds.h:895
TFlt RegCoef
Definition: agmdirected.h:14
TIntV NIDV
Definition: agmdirected.h:13
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
TFltV SumFV
Definition: agmdirected.h:15
static PNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
Definition: graph.h:431
void Load(TSIn &SIn)
Definition: dt.h:1059
TVec< TIntFltH > F
Definition: agmdirected.h:10
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
void PutSeed(const int &_Seed)
Definition: dt.cpp:18
TFltV SumHV
Definition: agmdirected.h:16
void Load(TSIn &SIn)
Definition: dt.h:1312
TRnd Rnd
Definition: agmdirected.h:12
TFlt MaxVal
Definition: agmdirected.h:22

Here is the call graph for this function:

int TCoda::MLEGradAscent ( const double &  Thres,
const int &  MaxIter,
const TStr  PlotNm,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)

Definition at line 885 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), AddCom(), THashSet< TKey, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::BegI(), DelCom(), TStr::Empty(), THashSet< TKey, THashFunc >::Empty(), THash< TKey, TDat, THashFunc >::EndI(), F, G, GetCom(), TNGraph::TNodeI::GetInDeg(), TNGraph::TNodeI::GetInNId(), TNGraph::GetNI(), TNGraph::GetNodes(), TNGraph::TNodeI::GetOutDeg(), TNGraph::TNodeI::GetOutNId(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), GradientForNode(), H, HOVIDSV, IAssert, THashSet< TKey, THashFunc >::IsKey(), Likelihood(), TFlt::Mn, Norm2(), NumComs, TGnuPlot::PlotValV(), and Rnd.

Referenced by FindComsByCV().

885  {
886  time_t InitTime = time(NULL);
887  TExeTm ExeTm, CheckTm;
888  int iter = 0, PrevIter = 0;
889  TIntFltPrV IterLV;
890  TNGraph::TNodeI UI;
891  double PrevL = TFlt::Mn, CurL = 0.0;
892  TIntV NIdxV(F.Len(), 0);
893  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
894  IAssert(NIdxV.Len() == F.Len());
895  TIntFltH GradV;
896  while(iter < MaxIter) {
897  NIdxV.Shuffle(Rnd);
898  for (int ui = 0; ui < F.Len(); ui++, iter++) {
899  const bool IsRow = (ui % 2 == 0);
900  int u = NIdxV[ui]; //
901  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
902  UI = G->GetNI(u);
903  const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
904  TIntSet CIDSet(5 * Deg);
905  for (int e = 0; e < Deg; e++) {
906  int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
907  if (HOVIDSV[u].IsKey(VID)) { continue; }
908  TIntFltH NbhCIDH = IsRow? H[VID]: F[VID];
909  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
910  CIDSet.AddKey(CI.GetKey());
911  //printf("CI.GetKey:%d\n", CI.GetKey());
912  IAssert(CI.GetKey() <= NumComs);
913  }
914  }
915  TIntFltH& CurMem = IsRow? F[u]: H[u];
916  for (TIntFltH::TIter CI = CurMem.BegI(); CI < CurMem.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
917  if (! CIDSet.IsKey(CI.GetKey())) {
918  DelCom(IsRow, u, CI.GetKey());
919  }
920  }
921  if (CIDSet.Empty()) { continue; }
922  GradientForNode(IsRow, u, GradV, CIDSet);
923  if (Norm2(GradV) < 1e-4) { continue; }
924  double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
925  if (LearnRate == 0.0) { continue; }
926  for (int ci = 0; ci < GradV.Len(); ci++) {
927  int CID = GradV.GetKey(ci);
928  double Change = LearnRate * GradV.GetDat(CID);
929  double NewFuc = GetCom(IsRow, u, CID) + Change;
930  if (NewFuc <= 0.0) {
931  DelCom(IsRow, u, CID);
932  } else {
933  AddCom(IsRow, u, CID, NewFuc);
934  }
935  }
936  if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
937  IterLV.Add(TIntFltPr(iter, Likelihood(false)));
938  }
939  }
940  printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
941  fflush(stdout);
942  if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
943  PrevIter = iter;
944  CurL = Likelihood();
945  if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
946  printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
947  }
948  fflush(stdout);
949  if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
950  else { PrevL = CurL; }
951  }
952 
953  }
954  printf("\n");
955  printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
956  if (! PlotNm.Empty()) {
957  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
958  }
959  return iter;
960 }
#define IAssert(Cond)
Definition: bd.h:262
Definition: tm.h:355
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
double Likelihood(const bool DoParallel=false)
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:171
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TIter EndI() const
Definition: hash.h:176
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
const char * GetTmStr() const
Definition: tm.h:370
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
TVec< TIntFltH > F
Definition: agmdirected.h:10
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
void DelCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:135
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
bool Empty() const
Definition: dt.h:488
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:114
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
TRnd Rnd
Definition: agmdirected.h:12
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372
static const double Mn
Definition: dt.h:1297
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429

Here is the call graph for this function:

Here is the caller graph for this function:

int TCoda::MLEGradAscentParallel ( const double &  Thres,
const int &  MaxIter,
const int  ChunkNum,
const int  ChunkSize,
const TStr  PlotNm,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)

Definition at line 962 of file agmdirected.cpp.

References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), THashSet< TKey, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::BegI(), THash< TKey, TDat, THashFunc >::Clr(), THash< TKey, TDat, THashFunc >::Defrag(), THash< TKey, TDat, THashFunc >::DelIfKey(), TStr::Empty(), THashSet< TKey, THashFunc >::Empty(), THash< TKey, TDat, THashFunc >::EndI(), F, G, THash< TKey, TDat, THashFunc >::GetDat(), TVec< TVal, TSizeTy >::GetDat(), TNGraph::TNodeI::GetDeg(), TNGraph::TNodeI::GetInDeg(), TNGraph::TNodeI::GetInNId(), THash< TKey, TDat, THashFunc >::GetKey(), TNGraph::TNodeI::GetNbrNId(), TNGraph::GetNI(), TNGraph::GetNodes(), TNGraph::TNodeI::GetOutDeg(), TNGraph::TNodeI::GetOutNId(), GetStepSizeByLineSearch(), GradientForNode(), H, HOVIDSV, IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THashSet< TKey, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), Likelihood(), Norm2(), TGnuPlot::PlotValV(), TVec< TVal, TSizeTy >::PutAll(), Rnd, SumFV, and SumHV.

Referenced by FindComsByCV(), and MLEGradAscentParallel().

962  {
963  //parallel
964  time_t InitTime = time(NULL);
965  //uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
966  TExeTm ExeTm, CheckTm;
967  double PrevL = Likelihood(true);
968  TIntFltPrV IterLV;
969  int PrevIter = 0;
970  int iter = 0;
971  TIntV NIdxV(F.Len(), 0);
972  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
973  TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
974  NIDOPTV.PutAll(0);
975  TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
976  TIntV NewNIDV(ChunkNum * ChunkSize);
977  TBoolV IsRowV(ChunkNum * ChunkSize);
978  for (iter = 0; iter < MaxIter; iter++) {
979  NIdxV.Clr(false);
980  for (int i = 0; i < F.Len(); i++) {
981  //if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
982  NIdxV.Add(i);
983  }
984  IAssert (NIdxV.Len() <= F.Len());
985  NIdxV.Shuffle(Rnd);
986  // compute gradient for chunk of nodes
987 #pragma omp parallel for schedule(static, 1)
988  for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
989  TIntFltH GradV;
990  for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
991  const bool IsRow = (ui % 2 == 0);
992  NewNIDV[ui] = -1;
993  if (ui > NIdxV.Len()) { continue; }
994  const int u = NIdxV[ui]; //
995  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
996  TNGraph::TNodeI UI = G->GetNI(u);
997  const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
998  TIntSet CIDSet(5 * Deg);
999  TIntFltH CurFU = IsRow? F[u]: H[u];
1000  for (int e = 0; e < Deg; e++) {
1001  int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
1002  if (HOVIDSV[u].IsKey(VID)) { continue; }
1003  TIntFltH& NbhCIDH = IsRow? H[VID]: F[VID];
1004  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
1005  CIDSet.AddKey(CI.GetKey());
1006  }
1007  }
1008  if (CIDSet.Empty()) {
1009  CurFU.Clr();
1010  }
1011  else {
1012  for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
1013  if (! CIDSet.IsKey(CI.GetKey())) {
1014  CurFU.DelIfKey(CI.GetKey());
1015  }
1016  }
1017  GradientForNode(IsRow, u, GradV, CIDSet);
1018  if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
1019  double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
1020  if (LearnRate == 0.0) { NewNIDV[ui] = -2; continue; }
1021  for (int ci = 0; ci < GradV.Len(); ci++) {
1022  int CID = GradV.GetKey(ci);
1023  double Change = LearnRate * GradV.GetDat(CID);
1024  double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
1025  if (NewFuc <= 0.0) {
1026  CurFU.DelIfKey(CID);
1027  } else {
1028  CurFU.AddDat(CID) = NewFuc;
1029  }
1030  }
1031  CurFU.Defrag();
1032  }
1033  //store changes
1034  NewF[ui] = CurFU;
1035  NewNIDV[ui] = u;
1036  IsRowV[ui] = IsRow;
1037  }
1038  }
1039  int NumNoChangeGrad = 0;
1040  int NumNoChangeStepSize = 0;
1041  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1042  int NewNID = NewNIDV[ui];
1043  if (NewNID == -1) { NumNoChangeGrad++; continue; }
1044  if (NewNID == -2) { NumNoChangeStepSize++; continue; }
1045  if (IsRowV[ui]) {
1046  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
1047  SumFV[CI.GetKey()] -= CI.GetDat();
1048  }
1049  } else {
1050  for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
1051  SumHV[CI.GetKey()] -= CI.GetDat();
1052  }
1053  }
1054  }
1055 #pragma omp parallel for
1056  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1057  int NewNID = NewNIDV[ui];
1058  if (NewNID < 0) { continue; }
1059  if (IsRowV[ui]) {
1060  F[NewNID] = NewF[ui];
1061  } else {
1062  H[NewNID] = NewF[ui];
1063  }
1064  }
1065  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1066  int NewNID = NewNIDV[ui];
1067  if (NewNID < 0) { continue; }
1068  if (IsRowV[ui]) {
1069  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
1070  SumFV[CI.GetKey()] += CI.GetDat();
1071  }
1072  } else {
1073  for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
1074  SumHV[CI.GetKey()] += CI.GetDat();
1075  }
1076  }
1077  }
1078  // update the nodes who are optimal
1079  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1080  int NewNID = NewNIDV[ui];
1081  if (NewNID < 0) { continue; }
1082  TNGraph::TNodeI UI = G->GetNI(NewNID);
1083  NIDOPTV[NewNID] = 0;
1084  for (int e = 0; e < UI.GetDeg(); e++) {
1085  NIDOPTV[UI.GetNbrNId(e)] = 0;
1086  }
1087  }
1088  int OPTCnt = 0;
1089  for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
1090  /*
1091  if (! PlotNm.Empty()) {
1092  printf("\r%d iterations [%s] %lu secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), TSecTm::GetCurTm().GetAbsSecs() - StartTm);
1093  if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
1094  fflush(stdout);
1095  }
1096  */
1097  if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
1098  PrevIter = iter;
1099  double CurL = Likelihood(true);
1100  IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
1101  printf("\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL, CurL - PrevL, time(NULL) - InitTime);
1102  fflush(stdout);
1103  if (CurL - PrevL <= Thres * fabs(PrevL)) {
1104  break;
1105  }
1106  else {
1107  PrevL = CurL;
1108  }
1109  }
1110  }
1111  if (! PlotNm.Empty()) {
1112  printf("\nMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
1113  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
1114  } else {
1115  printf("\rMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
1116  fflush(stdout);
1117  }
1118  return iter;
1119 }
#define IAssert(Cond)
Definition: bd.h:262
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:376
Definition: tm.h:355
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
double Likelihood(const bool DoParallel=false)
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:171
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
TIter EndI() const
Definition: hash.h:176
void Defrag()
Definition: hash.h:513
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
TFltV SumFV
Definition: agmdirected.h:15
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1166
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:807
TVec< TIntFltH > F
Definition: agmdirected.h:10
PNGraph G
Definition: agmdirected.h:9
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:358
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
bool Empty() const
Definition: dt.h:488
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
bool DelIfKey(const TKey &Key)
Definition: hash.h:201
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:319
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
TRnd Rnd
Definition: agmdirected.h:12
bool IsKey(const TKey &Key) const
Definition: hash.h:216
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429

Here is the call graph for this function:

Here is the caller graph for this function:

int TCoda::MLEGradAscentParallel ( const double &  Thres,
const int &  MaxIter,
const int  ChunkNum,
const TStr  PlotNm = TStr(),
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)
inline

Definition at line 74 of file agmdirected.h.

References G, TNGraph::GetNodes(), and MLEGradAscentParallel().

74  {
75  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
76  if (ChunkSize == 0) { ChunkSize = 1; }
77  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
78  }
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
PNGraph G
Definition: agmdirected.h:9

Here is the call graph for this function:

void TCoda::NeighborComInit ( const int  InitComs)

Definition at line 83 of file agmdirected.cpp.

References F, and G.

Referenced by FindComsByCV().

83  {
84  //initialize with best neighborhood communities (Gleich et.al. KDD'12)
85  TExeTm RunTm;
86  TFltIntPrV NIdPhiV(F.Len(), 0);
87  TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
88  NeighborComInit(NIdPhiV, InitComs);
89 }
Definition: tm.h:355
void NeighborComInit(const int InitComs)
Definition: agmdirected.cpp:83
TVec< TIntFltH > F
Definition: agmdirected.h:10
PNGraph G
Definition: agmdirected.h:9
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429

Here is the caller graph for this function:

void TCoda::NeighborComInit ( TFltIntPrV NIdPhiV,
const int  InitComs 
)

Definition at line 91 of file agmdirected.cpp.

References AddComIn(), AddComOut(), F, G, TVec< TVal, TSizeTy >::Gen(), TNGraph::TNodeI::GetDeg(), TNGraph::TNodeI::GetInDeg(), TNGraph::TNodeI::GetNbrNId(), TNGraph::GetNI(), TNGraph::GetNodes(), TNGraph::TNodeI::GetOutDeg(), TRnd::GetUniDev(), TRnd::GetUniDevInt(), H, TVec< TVal, TSizeTy >::Len(), NumComs, Rnd, TVec< TVal, TSizeTy >::Sort(), SumFV, and SumHV.

91  {
92  NIdPhiV.Sort(true);
93  F.Gen(G->GetNodes());
94  H.Gen(G->GetNodes());
95  SumFV.Gen(InitComs);
96  SumHV.Gen(InitComs);
97  NumComs = InitComs;
98  //TIntFltH NCPhiH(F.Len());
99  TIntSet InvalidNIDS(F.Len());
100  TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
101  //choose nodes with local minimum in conductance
102  int CurCID = 0;
103  for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
104  int UID = NIdPhiV[ui].Val2;
105  fflush(stdout);
106  if (InvalidNIDS.IsKey(UID)) { continue; }
107  ChosenNIDV.Add(UID); //FOR DEBUG
108  //add the node and its neighbors to the current community
109  TNGraph::TNodeI NI = G->GetNI(UID);
110  if (NI.GetOutDeg() > 0) { AddComOut(UID, CurCID, 1.0); }
111  if (NI.GetInDeg() > 0) { AddComIn(UID, CurCID, 1.0); }
112  fflush(stdout);
113  //add neighbors depending on whether they have incoming / outgoing edges from the center node (NI)
114  for (int e = 0; e < NI.GetDeg(); e++) {
115  int VID = NI.GetNbrNId(e);
116  TNGraph::TNodeI VI = G->GetNI(VID);
117  if (VI.GetOutDeg() > 0) { AddComOut(VID, CurCID, 1.0); }
118  if (VI.GetInDeg() > 0) { AddComIn(VID, CurCID, 1.0); }
119  }
120  //exclude its neighbors from the next considerations
121  for (int e = 0; e < NI.GetDeg(); e++) {
122  InvalidNIDS.AddKey(NI.GetNbrNId(e));
123  }
124  CurCID++;
125  fflush(stdout);
126  if (CurCID >= NumComs) { break; }
127  }
128  if (NumComs > CurCID) {
129  printf("%d communities needed to fill randomly\n", NumComs - CurCID);
130  }
131  //assign a member to zero-member community (if any)
132  for (int c = 0; c < SumFV.Len(); c++) {
133  if (SumFV[c] == 0.0) {
134  int ComSz = 10;
135  for (int u = 0; u < ComSz; u++) {
136  int UID = Rnd.GetUniDevInt(G->GetNodes());
137  AddComOut(UID, c, Rnd.GetUniDev());
138  }
139  }
140  }
141  //assign a member to zero-member community (if any)
142  for (int c = 0; c < SumHV.Len(); c++) {
143  if (SumHV[c] == 0.0) {
144  int ComSz = 10;
145  for (int u = 0; u < ComSz; u++) {
146  int UID = Rnd.GetUniDevInt(G->GetNodes());
147  AddComIn(UID, c, Rnd.GetUniDev());
148  }
149  }
150  }
151 }
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:376
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
TVec< TIntFltH > H
Definition: agmdirected.h:11
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
TFltV SumFV
Definition: agmdirected.h:15
TVec< TIntFltH > F
Definition: agmdirected.h:10
PNGraph G
Definition: agmdirected.h:9
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:358
TInt NumComs
Definition: agmdirected.h:18
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
double GetUniDev()
Definition: dt.h:30
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
TRnd Rnd
Definition: agmdirected.h:12

Here is the call graph for this function:

double TCoda::Norm2 ( const TIntFltH UV)
inline

Definition at line 189 of file agmdirected.h.

References THash< TKey, TDat, THashFunc >::BegI(), and THash< TKey, TDat, THashFunc >::EndI().

Referenced by LikelihoodForNode(), MLEGradAscent(), and MLEGradAscentParallel().

189  {
190  double N = 0.0;
191  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
192  N += HI.GetDat() * HI.GetDat();
193  }
194  return N;
195  }
TIter BegI() const
Definition: hash.h:171
TIter EndI() const
Definition: hash.h:176

Here is the call graph for this function:

Here is the caller graph for this function:

double TCoda::Prediction ( const TIntFltH FU,
const TIntFltH HV 
)
inline

Definition at line 174 of file agmdirected.h.

References DotProduct(), TStr::Fmt(), IAssertR, and PNoCom.

Referenced by GetNonEdgePairScores(), GradientForNode(), LikelihoodForNode(), LikelihoodHoldOut(), and Prediction().

174  {
175  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, HV);
176  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
177  return exp(- DP);
178  }
#define IAssertR(Cond, Reason)
Definition: bd.h:265
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
TFlt PNoCom
Definition: agmdirected.h:24
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599

Here is the call graph for this function:

Here is the caller graph for this function:

double TCoda::Prediction ( const int &  UID,
const int &  VID 
)
inline

Definition at line 179 of file agmdirected.h.

References F, H, and Prediction().

179  {
180  return Prediction(F[UID], H[VID]);
181  }
TVec< TIntFltH > H
Definition: agmdirected.h:11
TVec< TIntFltH > F
Definition: agmdirected.h:10
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174

Here is the call graph for this function:

void TCoda::RandomInit ( const int  InitComs)

Definition at line 43 of file agmdirected.cpp.

References AddComIn(), AddComOut(), F, G, TVec< TVal, TSizeTy >::Gen(), TNGraph::TNodeI::GetInDeg(), TNGraph::GetNI(), TNGraph::GetNodes(), TNGraph::TNodeI::GetOutDeg(), TRnd::GetUniDev(), TRnd::GetUniDevInt(), H, TVec< TVal, TSizeTy >::Len(), NumComs, Rnd, SumFV, and SumHV.

Referenced by FindComsByCV(), and TCoda().

43  {
44  F.Gen(G->GetNodes());
45  H.Gen(G->GetNodes());
46  SumFV.Gen(InitComs);
47  SumHV.Gen(InitComs);
48  NumComs = InitComs;
49  for (int u = 0; u < F.Len(); u++) {
50  //assign to just one community
51  int Mem = G->GetNI(u).GetOutDeg();
52  if (Mem > 10) { Mem = 10; }
53  for (int c = 0; c < Mem; c++) {
54  int CID = Rnd.GetUniDevInt(InitComs);
55  AddComOut(u, CID, Rnd.GetUniDev());
56  }
57  }
58  for (int u = 0; u < H.Len(); u++) {
59  //assign to just one community
60  int Mem = G->GetNI(u).GetInDeg();
61  if (Mem > 10) { Mem = 10; }
62  for (int c = 0; c < Mem; c++) {
63  int CID = Rnd.GetUniDevInt(InitComs);
64  AddComIn(u, CID, Rnd.GetUniDev());
65  }
66  }
67  //assign a member to zero-member community (if any)
68  for (int c = 0; c < SumFV.Len(); c++) {
69  if (SumFV[c] == 0.0) {
70  int UID = Rnd.GetUniDevInt(G->GetNodes());
71  AddComOut(UID, c, Rnd.GetUniDev());
72  }
73  }
74  //assign a member to zero-member community (if any)
75  for (int c = 0; c < SumHV.Len(); c++) {
76  if (SumHV[c] == 0.0) {
77  int UID = Rnd.GetUniDevInt(G->GetNodes());
78  AddComIn(UID, c, Rnd.GetUniDev());
79  }
80  }
81 }
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
TVec< TIntFltH > H
Definition: agmdirected.h:11
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
TFltV SumFV
Definition: agmdirected.h:15
TVec< TIntFltH > F
Definition: agmdirected.h:10
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
TFltV SumHV
Definition: agmdirected.h:16
double GetUniDev()
Definition: dt.h:30
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
TRnd Rnd
Definition: agmdirected.h:12

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::Save ( TSOut SOut)

Definition at line 8 of file agmdirected.cpp.

References F, G, H, HOVIDSV, MaxVal, MinVal, NegWgt, NIDV, NodesOk, NumComs, PNoCom, RegCoef, TNGraph::Save(), TVec< TVal, TSizeTy >::Save(), TBool::Save(), TInt::Save(), TFlt::Save(), SumFV, and SumHV.

8  {
9  G->Save(SOut);
10  F.Save(SOut);
11  H.Save(SOut);
12  NIDV.Save(SOut);
13  RegCoef.Save(SOut);
14  SumFV.Save(SOut);
15  SumHV.Save(SOut);
16  NodesOk.Save(SOut);
17  MinVal.Save(SOut);
18  MaxVal.Save(SOut);
19  NegWgt.Save(SOut);
20  NumComs.Save(SOut);
21  HOVIDSV.Save(SOut);
22  PNoCom.Save(SOut);
23 }
TFlt NegWgt
Definition: agmdirected.h:23
TFlt MinVal
Definition: agmdirected.h:21
void Save(TSOut &SOut) const
Definition: dt.h:1060
TVec< TIntFltH > H
Definition: agmdirected.h:11
void Save(TSOut &SOut) const
Definition: dt.h:902
TFlt RegCoef
Definition: agmdirected.h:14
void Save(TSOut &SOut) const
Definition: ds.h:903
TIntV NIDV
Definition: agmdirected.h:13
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
TFltV SumFV
Definition: agmdirected.h:15
TVec< TIntFltH > F
Definition: agmdirected.h:10
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
Definition: graph.h:423
TFltV SumHV
Definition: agmdirected.h:16
TFlt MaxVal
Definition: agmdirected.h:22
void Save(TSOut &SOut) const
Definition: dt.h:1309

Here is the call graph for this function:

void TCoda::SetCmtyVV ( const TVec< TIntV > &  CmtyVVOut,
const TVec< TIntV > &  CmtyVVIn 
)

Definition at line 171 of file agmdirected.cpp.

References AddComIn(), AddComOut(), THash< TKey, TDat, THashFunc >::AddDat(), F, G, TVec< TVal, TSizeTy >::Gen(), TVec< TVal, TSizeTy >::GetDat(), TNGraph::GetNodes(), H, IAssert, TNGraph::IsNode(), TVec< TVal, TSizeTy >::Len(), NIDV, NodesOk, NumComs, SumFV, and SumHV.

171  {
172  IAssert(CmtyVVOut.Len() == CmtyVVIn.Len());
173  F.Gen(G->GetNodes());
174  H.Gen(G->GetNodes());
175  SumFV.Gen(CmtyVVOut.Len());
176  SumHV.Gen(CmtyVVIn.Len());
177  NumComs = CmtyVVOut.Len();
178  TIntH NIDIdxH(NIDV.Len());
179  if (! NodesOk) {
180  for (int u = 0; u < NIDV.Len(); u++) {
181  NIDIdxH.AddDat(NIDV[u], u);
182  }
183  }
184  for (int c = 0; c < CmtyVVOut.Len(); c++) {
185  for (int u = 0; u < CmtyVVOut[c].Len(); u++) {
186  int UID = CmtyVVOut[c][u];
187  if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
188  if (G->IsNode(UID)) {
189  AddComOut(UID, c, 1.0);
190  }
191  }
192  }
193  for (int c = 0; c < CmtyVVIn.Len(); c++) {
194  for (int u = 0; u < CmtyVVIn[c].Len(); u++) {
195  int UID = CmtyVVIn[c][u];
196  if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
197  if (G->IsNode(UID)) {
198  AddComIn(UID, c, 1.0);
199  }
200  }
201  }
202 }
#define IAssert(Cond)
Definition: bd.h:262
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TVec< TIntFltH > H
Definition: agmdirected.h:11
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TIntV NIDV
Definition: agmdirected.h:13
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
TFltV SumFV
Definition: agmdirected.h:15
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:807
TVec< TIntFltH > F
Definition: agmdirected.h:10
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:477
PNGraph G
Definition: agmdirected.h:9
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
TFltV SumHV
Definition: agmdirected.h:16
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TDat & AddDat(const TKey &Key)
Definition: hash.h:196

Here is the call graph for this function:

void TCoda::SetGraph ( const PNGraph GraphPt)

Definition at line 204 of file agmdirected.cpp.

References TSnap::DelSelfEdges(), DoParallel, G, TNGraph::GetNIdV(), TNGraph::GetNodes(), TSnap::GetSubGraph(), HOVIDSV, IAssert, TNGraph::IsNode(), TFlt::Mx, NegWgt, NIDV, NodesOk, and PNoCom.

Referenced by TCoda().

204  {
205  G = GraphPt;
206  HOVIDSV.Gen(G->GetNodes());
207  NodesOk = true;
208  GraphPt->GetNIdV(NIDV);
209  // check that nodes IDs are {0,1,..,Nodes-1}
210  for (int nid = 0; nid < GraphPt->GetNodes(); nid++) {
211  if (! GraphPt->IsNode(nid)) {
212  NodesOk = false;
213  break;
214  }
215  }
216  if (! NodesOk) {
217  printf("rearrage nodes\n");
218  G = TSnap::GetSubGraph(GraphPt, NIDV, true);
219  for (int nid = 0; nid < G->GetNodes(); nid++) {
220  IAssert(G->IsNode(nid));
221  }
222  }
224 
225  PNoCom = 1.0 / (double) G->GetNodes();
226  DoParallel = false;
227  if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow
228  NegWgt = 1.0;
229 }
#define IAssert(Cond)
Definition: bd.h:262
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TFlt NegWgt
Definition: agmdirected.h:23
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TBool DoParallel
Definition: agmdirected.h:25
static const double Mx
Definition: dt.h:1298
TIntV NIDV
Definition: agmdirected.h:13
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:477
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
TBool NodesOk
Definition: agmdirected.h:17
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
Definition: alg.h:419

Here is the call graph for this function:

Here is the caller graph for this function:

void TCoda::SetHoldOut ( const double  HOFrac)
inline

Definition at line 54 of file agmdirected.h.

References G, TAGMFastUtil::GenHoldOutPairs(), HOVIDSV, and Rnd.

54 { TVec<TIntSet> HoldOut; TAGMFastUtil::GenHoldOutPairs(G, HoldOut, HOFrac, Rnd); HOVIDSV = HoldOut; }
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
PNGraph G
Definition: agmdirected.h:9
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
TRnd Rnd
Definition: agmdirected.h:12
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429

Here is the call graph for this function:

void TCoda::SetRegCoef ( const double  _RegCoef)
inline

Definition at line 33 of file agmdirected.h.

References RegCoef.

33 { RegCoef = _RegCoef; }
TFlt RegCoef
Definition: agmdirected.h:14
double TCoda::Sum ( const TIntFltH UV)
inline

Definition at line 182 of file agmdirected.h.

References THash< TKey, TDat, THashFunc >::BegI(), and THash< TKey, TDat, THashFunc >::EndI().

Referenced by LikelihoodForNode().

182  {
183  double N = 0.0;
184  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
185  N += HI.GetDat();
186  }
187  return N;
188  }
TIter BegI() const
Definition: hash.h:171
TIter EndI() const
Definition: hash.h:176

Here is the call graph for this function:

Here is the caller graph for this function:

Member Data Documentation

TBool TCoda::DoParallel

Definition at line 25 of file agmdirected.h.

Referenced by SetGraph().

TFlt TCoda::MaxVal

Definition at line 22 of file agmdirected.h.

Referenced by GetStepSizeByLineSearch(), Load(), and Save().

TFlt TCoda::MinVal

Definition at line 21 of file agmdirected.h.

Referenced by GetStepSizeByLineSearch(), Load(), and Save().

TFlt TCoda::NegWgt
TBool TCoda::NodesOk
private

Definition at line 17 of file agmdirected.h.

Referenced by GetCommunity(), Load(), Save(), SetCmtyVV(), and SetGraph().

TFlt TCoda::PNoCom
TFlt TCoda::RegCoef
private

Definition at line 14 of file agmdirected.h.

Referenced by GetRegCoef(), GradientForNode(), LikelihoodForNode(), Load(), Save(), and SetRegCoef().

TRnd TCoda::Rnd
private
TFltV TCoda::SumFV
private

The documentation for this class was generated from the following files: