SNAP Library 2.0, Developer Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #ifndef snap_agm_h 00002 #define snap_agm_h 00003 #include "Snap.h" 00004 00007 class TAGM { 00008 public: 00009 static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd); 00010 static PUNGraph GenAGM(const TIntV& NIdV , THash<TInt,TIntV >& CmtyVH, const TStr& AGMNm, const double& PiCoef, const double& ProbBase, TRnd& Rnd=TInt::Rnd); 00011 static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd); 00012 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd); 00013 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0); 00014 }; 00015 00018 class TAGMUtil { 00019 public: 00020 static void GenPLSeq(TIntV& SzSeq,const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max); 00021 static void ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd); 00022 static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd); 00023 static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd); 00024 static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd); 00025 static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd); 00026 static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd); 00027 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd); 00028 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0); 00029 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV); 00030 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV); 00031 static void GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH); 00032 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV); 00033 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV >& CmtyVH); 00034 static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV >& CmtyVH); 00035 static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV >& CmtyVV); 00036 static void GetNodeMembership(TIntH& NIDComVH, const TVec<TIntV >& CmtyVV); 00037 static void LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV); 00038 static void DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV); 00039 static void DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH); 00040 static int TotalMemberships(const TVec<TIntV>& CmtyVV); 00041 static void RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd); 00042 static void RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd); 00043 static int Intersection(const TIntV& C1, const TIntV& C2); 00044 static void GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C); 00045 static int Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B); 00046 static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges); 00047 static void GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS); 00048 static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz) { 00049 THash<TInt, TStr> TmpH; 00050 SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, TmpH); 00051 } 00052 static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH) { 00053 THash<TInt, TIntTr> TmpH; 00054 SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, NIDNameH, TmpH); 00055 } 00056 static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH, const THash<TInt, TIntTr >& NIDColorH); 00057 static void SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr >& NIDColorH, const THash<TInt, TIntTr >& CIDColorH); 00058 static int FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom = 0.0, const TStr PltFPrx = TStr()); 00059 template <class PGraph> 00060 static PGraph LoadEdgeListStr(const TStr& InFNm, TIntStrH& NIDNameH, const int& SrcColId = 0, const int& DstColId = 1, const TSsFmt SsFmt = ssfTabSep) { 00061 TSsParser Ss(InFNm, SsFmt); 00062 PGraph Graph = PGraph::TObj::New(); 00063 TStrHash<TInt> StrSet(Mega(1), true); 00064 while (Ss.Next()) { 00065 const int SrcNId = StrSet.AddKey(Ss[SrcColId]); 00066 const int DstNId = StrSet.AddKey(Ss[DstColId]); 00067 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00068 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00069 Graph->AddEdge(SrcNId, DstNId); 00070 } 00071 NIDNameH.Gen(StrSet.Len()); 00072 for (int s = 0; s < StrSet.Len(); s++) { NIDNameH.AddDat(s, StrSet.GetKey(s)); } 00073 IAssert(NIDNameH.Len() == Graph->GetNodes()); 00074 00075 Graph->Defrag(); 00076 return Graph; 00077 } 00078 00079 template<class PGraph> 00080 static void GVizComGraph(const PGraph& Graph,const TVec<TIntV >& CmtyVV, const TStr& OutFNm, const TStr& Desc = TStr()){ 00081 TStrV Colors = TStrV::GetV("red","blue","green","pink","cyan"); 00082 TStrV Shapes = TStrV::GetV("ellipse","triangle","square","pentagon","hexagon"); 00083 THash<TInt,TIntV> NIDComVH; 00084 GetNodeMembership(NIDComVH,CmtyVV); 00085 00086 const TStr Ext = OutFNm.GetFExt(); 00087 const TStr GraphFNm = OutFNm.GetSubStr(0, OutFNm.Len() - Ext.Len()) + "dot"; 00088 const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected); 00089 FILE *F = fopen(GraphFNm.CStr(), "wt"); 00090 if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr()); 00091 if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); } 00092 fprintf(F, " graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill 00093 fprintf(F, " node [width=0.3, height=0.3]\n"); 00094 //nodes 00095 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00096 int NID = NI.GetId(); 00097 TIntV& CIDV = NIDComVH.GetDat(NID); 00098 IAssert(CIDV.Len()>0); 00099 TStr ShapeNm = Shapes[(CIDV.Len()-1) % Shapes.Len()]; 00100 TStr ColorNm = Colors[CIDV[0] % Colors.Len()]; 00101 TStr NodeComLabel = TStr::Fmt("%d(",NID); 00102 for(int i=0;i<CIDV.Len();i++) { 00103 TStr TmpStr = TStr::Fmt("%d",int(CIDV[i])); 00104 NodeComLabel += TmpStr; 00105 if(i<CIDV.Len()-1){NodeComLabel+=",";} 00106 } 00107 NodeComLabel += ")"; 00108 fprintf(F, " %d [style=filled, shape=\"%s\" fillcolor=\"%s\" label=\"%s\"];\n", NI.GetId(), ShapeNm.CStr(),ColorNm.CStr(), NodeComLabel.CStr()); 00109 } 00110 00111 // edges 00112 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00113 if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 ) { 00114 fprintf(F, "%d;\n", NI.GetId()); } 00115 else { 00116 for (int e = 0; e < NI.GetOutDeg(); e++) { 00117 if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; } 00118 fprintf(F, " %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 00119 } 00120 } 00121 } 00122 if (! Desc.Empty()) { 00123 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00124 fprintf(F, " fontsize=24;\n"); 00125 } 00126 fprintf(F, "}\n"); 00127 fclose(F); 00128 TSnap::TSnapDetail::GVizDoLayout(GraphFNm, OutFNm, gvlNeato); 00129 } 00130 }; 00131 00132 //Light version of logistic regression (originally in Snap)-- Jaewon Yang, 04/07/12 00133 // forward 00134 class TLogRegPredict; 00135 typedef TPt<TLogRegPredict> PLogRegPredict; 00136 00138 // Logistic Regression using gradient descent 00139 // X: N * M matrix where N = number of examples and M = number of features. 00140 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h) 00141 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!! 00142 class TLogRegFit { 00143 private: 00144 TVec<TFltV> X; 00145 TFltV Y; 00146 TFltV Theta; 00147 int M; // number of features 00148 public: 00149 TLogRegFit() {} 00150 ~TLogRegFit() {} 00151 // returns model for logistic regression 00152 PLogRegPredict CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false); 00153 PLogRegPredict CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false); 00154 int MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm); 00155 int MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm); 00156 double GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta); 00157 double Likelihood(const TFltV& NewTheta); 00158 double Likelihood() { return Likelihood(Theta); } 00159 void Gradient(TFltV& GradV); 00160 void Hessian(TFltVV& HVV); 00161 void GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV); 00162 }; 00163 00164 00166 // Logistic-Regression-Model 00167 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h) 00168 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!! 00169 class TLogRegPredict { 00170 private: 00171 TCRef CRef; 00172 private: 00173 TFltV Theta; 00174 public: 00175 TLogRegPredict(const TFltV& _bb): Theta(_bb) { }; 00176 //static PLogRegMd New(PSparseTrainSet Set) { return TLogReg::CalcLogReg(Set); }; //TODO 00177 00178 TLogRegPredict(TSIn& SIn) { Theta.Load(SIn); }; 00179 static PLogRegPredict Load(TSIn& SIn) { return new TLogRegPredict(SIn); }; 00180 void Save(TSOut& SOut) const { Theta.Save(SOut); }; 00181 00182 UndefDefaultCopyAssign(TLogRegPredict); 00183 public: 00184 // classifies vector, returns probability that AttrV is positive 00185 static void GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta); 00186 static double GetCfy(const TFltV& AttrV, const TFltV& NewTheta); 00187 double GetCfy(const TFltV& AttrV) { return GetCfy(AttrV, Theta); } 00188 void GetTheta(TFltV& _Theta) { _Theta = Theta; } 00189 void GetCfy(const TVec<TFltV>& X, TFltV& OutV) { GetCfy(X, OutV, Theta); } 00190 void PrintTheta() { for (int t = 0; t < Theta.Len(); t++) { printf("Theta[%d] = %f\n", t, Theta[t].Val); } } 00191 friend class TPt<TLogRegPredict>; 00192 }; 00193 00194 #endif