SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #ifndef snap_network_community_profile_h 00002 #define snap_network_community_profile_h 00003 00004 #include "Snap.h" 00005 00006 class TLocClust; 00007 class TLocClustStat; 00008 class TNcpGraphsBase; 00009 00010 //#/////////////////////////////////////////////// 00012 00024 class TLocClust { 00025 public: 00026 static bool Verbose; 00027 private: 00028 PUNGraph Graph; 00029 int Nodes, Edges2; // Nodes, 2*edges in Graph 00030 TIntFltH ProbH, ResH; 00031 TIntQ NodeQ; 00032 double Alpha; // PageRank jump probability (smaller Alpha diffuses the mass farther away) 00033 int SeedNId; // Seed node 00034 // volume, cut size, node ids, conductances 00035 TIntV NIdV, VolV, CutV; // Vol=2*edges_inside+cut (vol = sum of the degrees) 00036 TFltV PhiV; // Conductance 00037 int BestCutIdx; // Index K to vectors where the conductance of the bounding cut (PhiV[K]) achieves its minimum 00038 public: 00039 TLocClust(const PUNGraph& GraphPt, const double& AlphaVal) : 00040 Graph(GraphPt), Nodes(GraphPt->GetNodes()), Edges2(2*GraphPt->GetEdges()), Alpha(AlphaVal) { } 00042 int Len() const { return GetRndWalkSup(); } 00044 int GetRndWalkSup() const { return VolV.Len(); } 00045 00047 int GetNId(const int& NodeN) const { return NIdV[NodeN]; } 00049 00051 int GetVol(const int& Nodes) const { return VolV[Nodes]; } 00053 00055 int GetCut(const int& Nodes) const { return CutV[Nodes]; } 00057 00059 double GetPhi(const int& ValId) const { return PhiV[ValId]; } 00060 00062 const TIntV& GetNIdV() const { return NIdV; } 00064 const TIntV& GetVolV() const { return VolV; } 00066 const TIntV& GetCutV() const { return CutV; } 00068 const TFltV& GetPhiV() const { return PhiV; } 00069 00071 00073 int BestCut() const { return BestCutIdx; } 00075 int BestCutNodes() const { return BestCutIdx+1; } 00077 int GetCutEdges() const { return GetCut(BestCut()); } 00079 int GetCutVol() const { return GetVol(BestCut()); } 00081 double GetCutPhi() const { return GetPhi(BestCut()); } 00082 00084 00086 int ApproxPageRank(const int& SeedNode, const double& Eps); 00088 void SupportSweep(); 00090 00093 void FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac=0.2); 00094 00096 void PlotVolDistr(const TStr& OutFNm, TStr Desc=TStr()) const; 00098 void PlotCutDistr(const TStr& OutFNm, TStr Desc=TStr()) const; 00100 void PlotPhiDistr(const TStr& OutFNm, TStr Desc=TStr()) const; 00102 void SavePajek(const TStr& OutFNm) const; 00103 00105 static void DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN); 00107 static void GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges=-1); 00109 static void GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges=-1); 00112 00117 static void PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc="", const bool& BagOfWhiskers=true, const bool& RewireNet=false, 00118 const int& KMin=10, const int& KMax=Mega(100), const int& Coverage=10, const bool& SaveTxtStat=false, const bool& PlotBoltzman=false); 00119 00120 friend class TLocClustStat; 00121 }; 00122 00123 //#/////////////////////////////////////////////// 00125 class TLocClustStat { 00126 public: 00127 static double BinFactor; 00128 static int TakeValAt; 00129 public: 00130 class TNodeSweep { 00131 public: 00132 TInt SeedNId; 00133 TIntV SweepV; // nodes inside the cut: cut of size k has node ids CutV[0...k-1] 00134 TFltV PhiV; // conductance at the cut 00135 public: 00136 TNodeSweep() {} 00137 TNodeSweep(const int& SeedNode, const TIntV& SweepNIdV, const TFltV& PhiNIdV) : 00138 SeedNId(SeedNode), SweepV(SweepNIdV), PhiV(PhiNIdV) { IAssert(SweepV.Len()==PhiV.Len()); } 00139 TNodeSweep(const TNodeSweep& NS) : SeedNId(NS.SeedNId), SweepV(NS.SweepV), PhiV(NS.PhiV) { } 00140 int Len() const { return SweepV.Len(); } 00141 int GetSeed() const { return SeedNId; } 00142 int NId(const int i) const { return SweepV[i]; } 00143 double Phi(const int i) const { return PhiV[i]; } 00144 bool operator < (const TNodeSweep& CS) const { return Len() < CS.Len(); } 00145 }; 00146 00147 class TCutInfo { 00148 public: 00149 TInt Nodes, Edges, CutSz; // nodes inside, edges inside, edges cut 00150 TIntV CutNIdV; // node ids inside the cluster 00151 public: 00152 TCutInfo() : Nodes(0), Edges(0), CutSz(0), CutNIdV() { } 00153 TCutInfo(const int& ClustNodes, const int& EdgesInside, const int& CutSize) : Nodes(ClustNodes), Edges(EdgesInside), CutSz(CutSize) { } 00154 TCutInfo(const int& ClustNodes, const int& EdgesInside, const int& CutSize, const TIntV& NIdV) : 00155 Nodes(ClustNodes), Edges(EdgesInside), CutSz(CutSize), CutNIdV(NIdV) { } 00156 TCutInfo(const PUNGraph& G, const TIntV& ClustNIdV, bool TakeNIdV=false) : Nodes(ClustNIdV.Len()) { 00157 TSnap::GetEdgesInOut(G, ClustNIdV, Edges.Val, CutSz.Val); if(TakeNIdV){CutNIdV=ClustNIdV;} } 00158 TCutInfo(const TCutInfo& CS) : Nodes(CS.Nodes), Edges(CS.Edges), CutSz(CS.CutSz), CutNIdV(CS.CutNIdV) { } 00159 int GetNodes() const { return Nodes; } 00160 int GetEdges() const { return Edges; } 00161 int GetVol() const { return 2*Edges+CutSz; } 00162 int GetCutSz() const { return CutSz; } 00163 // community score measures (lower is better) 00164 double GetPhi() const { return double(CutSz)/double(2*Edges+CutSz); } // conductance 00165 double GetExpansion() const { return Nodes<2 ? 1.0 : double(CutSz)/double(Nodes); } // expansion 00166 double GetIntDens() const { return 1.0 - ((Nodes<2) ? 0 : 2.0*double(Edges)/double(Nodes*(Nodes-1))); } // internal density 00167 double GetCutRatio(const int& GNodes) const { return double(CutSz)/double(Nodes*(GNodes-Nodes)); } // cut ratio (external density) 00168 double GetNormCut(const int& GEdges) const { return GetPhi() + double(CutSz)/double(2*GEdges-GetVol()); } // normalized cut 00169 double GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const; // fraction of node's edges pointing outside the cluster 00170 double GetModular(const int& GEdges) const { return (2.0*Edges - GetExpEdgesIn(GEdges)) / (2.0*GEdges); } // modularity 00171 double GetModRat(const int& GEdges) const { return (2.0*Edges) / GetExpEdgesIn(GEdges); } // modularity ratio 00172 double GetExpEdgesIn(const int& GEdges) const { return TMath::Sqr(2.0*Edges+CutSz)/(2.0*GEdges); } // expected edges inside (sum of degrees on nodes inside)^2/(2*E) 00173 bool operator < (const TCutInfo& CS) const { return GetPhi() < CS.GetPhi(); } 00174 }; 00175 00176 private: 00177 // parameters 00178 TFlt Alpha, SizeFrac, KFac; 00179 TInt KMin, KMax, Coverage; 00180 PUNGraph Graph; // set at ::Run() 00181 //private: 00182 public: 00183 TVec<TNodeSweep> SweepsV; // node ids and conductances for each run of local clustering 00184 THash<TInt, TFltV> SizePhiH; // all conductances at cluster size K 00185 THash<TInt, TCutInfo> BestCutH; // best cut (min conductance) at size K (edges inside, edges cut) 00186 TFltPrV BagOfWhiskerV; // (size, conductance) for bag of whiskers 00187 TFltPr BestWhisk; // best whisker (whisker with largest volume), (size, conductance) 00188 TCutInfo BestCut; // best over-all cut 00189 TIntSet SizeBucketSet; // for exponential bucketing (only indexes BestCutH at positions in BucketSet have CutNIdV filled) 00190 public: 00191 TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac, const int& _Coverage, const double& _SizeFrac); 00192 void Save(TSOut& SOut) const; 00193 void Clr(); 00194 void SetGraph(const PUNGraph& GraphPt) { Graph=GraphPt; } 00195 void Run(const PUNGraph& Graph, const bool& SaveAllSweeps=false, const bool& SaveAllCond=false, const bool& SaveBestNodesAtK=false); 00196 void AddBagOfWhiskers(); 00197 void AddCut(const TIntV& NIdV); 00198 void AddCut(const int& ClustSz, const double& Phi); 00199 void AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond=true); 00200 00201 double FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const; 00202 const TCutInfo& FindBestCut(const int& Nodes=-1) const; 00203 double FindBestCut(const int& Nodes, TIntV& ClustNIdV) const; 00204 int FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const; 00205 00206 const TCutInfo& GetBestCut() const { return BestCut; } // overall best cut 00207 int GetCuts() const { return BestCutH.Len(); } 00208 const TCutInfo& GetKNodeCut(const int& Nodes) const { return BestCutH.GetDat(Nodes); } 00209 const TCutInfo& GetCutN(const int& N) const { return BestCutH[N]; } 00210 00211 int BestWhiskNodes() const { return int(BestWhisk.Val1.Val); } 00212 int BestWhiskEdges() const { return (int)TMath::Round(1.0/BestWhisk.Val2.Val)/2-1; } 00213 double BestWhiskPhi() const { return BestWhisk.Val2; } 00214 TFltPr GetBestWhisk() const { return BestWhisk; } 00215 const TFltPrV& GetBagOfWhiskersV() const { return BagOfWhiskerV; } 00216 void GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const; 00217 void GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const; 00218 00219 TStr ParamStr() const; 00220 void PlotNCP(const TStr& OutFNm, TStr Desc=TStr()) const; // lower-envelope of conductance 00221 void PlotNCPModul(const TStr& OutFNm, TStr Desc=TStr()) const; // NCP but with modularity on Y-axis 00222 void PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const; 00223 void PlotPhiInOut(const TStr& OutFNm, TStr Desc=TStr()) const; // conductance on the boundary / counductance inside 00224 void PlotBestClustDens(TStr OutFNm, TStr Desc=TStr()) const; // plot edges inside, cut size, conductance 00225 void PlotNCPScatter(const TStr& OutFNm, TStr Desc=TStr()) const; // all different conducances of all sizes (not just min) 00226 void PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc=TStr()) const; // histogram of conductances for a fixed CmtySz 00227 void PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc=TStr()) const; 00228 00229 void ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const; 00230 void ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const; 00231 void SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const; 00232 00233 static void BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& BestWhisk); 00234 static void BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV); 00235 static void MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV); 00236 static void MakeExpBins(const TFltV& ValV, TFltV& NewV); 00237 }; 00238 00239 //#/////////////////////////////////////////////// 00241 class TNcpGraphsBase { 00242 private: 00243 TStrV GNmV; 00244 TFltV ParamValV ; 00245 TIntPrV GSizeV; 00246 TFltPrV WhiskerV, RewWhiskerV; // (size, conductance) 00247 TVec<TFltPrV> NcpV; 00248 TVec<TFltPrV> RewNcpV; 00249 TVec<TFltPrV> WhiskNcpV; 00250 public: 00251 TNcpGraphsBase(const TStr& FNmWc); 00252 TNcpGraphsBase(TSIn& SIn); 00253 void Save(TSOut& SOut) const; 00254 void Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth); 00255 double GetXAtMinY(const TFltPrV& Ncp, const int& NNodes); 00256 TFltPr GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes); 00257 void PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN=false); 00258 void SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN=false); 00259 void PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN=false); 00260 void PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN=false); 00261 void PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN=false); 00262 void PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY); 00263 void SaveTxt(const TStr& OutFNm); 00264 static void PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp=false, const bool& VsGraphN=false); 00265 }; 00266 00267 #endif