SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "ncp.h" 00003 00005 // Local Spectral Clustering 00006 00007 bool TLocClust::Verbose = true; 00008 00009 int TLocClust::ApproxPageRank(const int& SeedNode, const double& Eps) { 00010 for (int i = 0; i < ProbH.Len(); i++) { ProbH[i]=0.0; } 00011 for (int i = 0; i < ResH.Len(); i++) { ResH[i]=0.0; } 00012 ProbH.Clr(false, -1, false); 00013 ResH.Clr(false, -1, false); 00014 ResH.AddDat(SeedNode, 1.0); 00015 int iter = 0; 00016 double OldRes = 0.0; 00017 NodeQ.Clr(false); 00018 NodeQ.Push(SeedNode); 00019 TExeTm ExeTm; 00020 while (! NodeQ.Empty()) { 00021 const int NId = NodeQ.Top(); NodeQ.Pop(); 00022 const TUNGraph::TNodeI& Node = Graph->GetNI(NId); 00023 const int NIdDeg = Node.GetOutDeg(); 00024 const double PushVal = ResH.GetDat(NId) - 0.5*Eps*NIdDeg; 00025 const double PutVal = (1.0-Alpha) * PushVal / double(NIdDeg); 00026 ProbH.AddDat(NId) += Alpha*PushVal; 00027 ResH.AddDat(NId) = 0.5 * Eps * NIdDeg; 00028 for (int e = 0; e < NIdDeg; e++) { 00029 const int DstNId = Node.GetOutNId(e); 00030 const int DstDeg = Graph->GetNI(DstNId).GetOutDeg(); 00031 double& ResVal = ResH.AddDat(DstNId).Val; 00032 OldRes = ResVal; 00033 ResVal += PutVal; 00034 if (ResVal >= Eps*DstDeg && OldRes < Eps*DstDeg) { 00035 NodeQ.Push(DstNId); } 00036 } 00037 iter++; 00038 if (iter % Mega(1) == 0) { 00039 printf(" %d[%s]", NodeQ.Len(), ExeTm.GetStr()); 00040 if (iter/1000 > Graph->GetNodes() || ExeTm.GetSecs() > 4*3600) { // more than 2 hours 00041 printf("Too many iterations! Stop to save time.\n"); 00042 return iter; } 00043 } 00044 } 00045 // check that the residuals are sufficiently small 00046 /*for (int i =0; i < ProbH.Len(); i++) { 00047 const int Deg = Graph->GetNI(ResH.GetKey(i)).GetOutDeg(); 00048 IAssert(ResH[i] < Eps*Deg); } //*/ 00049 return iter; 00050 } 00051 00052 void TLocClust::SupportSweep() { 00053 TExeTm ExeTm; 00054 VolV.Clr(false); CutV.Clr(false); PhiV.Clr(false); 00055 if (ProbH.Empty()) { return; } 00056 const int TopNId = ProbH.GetKey(0); 00057 const int TopNIdDeg = Graph->GetNI(TopNId).GetOutDeg(); 00058 int Vol = TopNIdDeg, Cut = TopNIdDeg; 00059 double Phi = Cut/double(Vol); 00060 VolV.Add(Vol); CutV.Add(Cut); PhiV.Add(1.0); 00061 for (int i = 1; i < ProbH.Len(); i++) { 00062 const int NId = ProbH.GetKey(i); 00063 const TUNGraph::TNodeI& Node = Graph->GetNI(NId); 00064 const int OutDeg = Node.GetOutDeg(); 00065 int CutSz = OutDeg; // edges outside 00066 for (int e = 0; e < OutDeg; e++) { 00067 const int Rank = ProbH.GetKeyId(Node.GetOutNId(e)); 00068 if ( Rank > -1 && Rank < i) { CutSz -= 2; } 00069 } 00070 Vol += OutDeg; Cut += CutSz; 00071 if (Vol < Edges2) { 00072 if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); } 00073 else { Phi = Cut / double(Vol); } 00074 } else { 00075 Phi = 1.0; 00076 } 00077 IAssert((Phi+1e-6) >= double(1.0)/double(i*(i+1)+1)); // conductance is worse than the best possible 00078 VolV.Add(Vol); CutV.Add(Cut); PhiV.Add(Phi); 00079 }} 00080 00081 void TLocClust::FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac) { 00082 double MaxPhi = TFlt::Mx; 00083 BestCutIdx = -1; 00084 SeedNId = SeedNode; 00085 // calculate pagerank and cut sets 00086 ApproxPageRank(SeedNId, 1.0/double(ClustSz)); 00087 for (int i = 0; i < ProbH.Len(); i++) { 00088 ProbH[i] /= Graph->GetNI(ProbH.GetKey(i)).GetOutDeg(); } 00089 ProbH.SortByDat(false); 00090 SupportSweep(); 00091 // find best cut 00092 NIdV.Clr(false); 00093 for (int i = 0; i < PhiV.Len(); i++) { 00094 const double Phi = PhiV[i]; 00095 NIdV.Add(ProbH.GetKey(i)); 00096 if (Phi < MaxPhi) { MaxPhi = Phi; BestCutIdx = i; } 00097 } 00098 } 00099 00100 void TLocClust::PlotVolDistr(const TStr& OutFNm, TStr Desc) const { 00101 TFltPrV RankValV(VolV.Len(), 0); 00102 for (int i = 0; i < VolV.Len(); i++) { 00103 RankValV.Add(TFltPr(i+1, VolV[i].Val)); } 00104 if (Desc.Empty()) { Desc = OutFNm; } 00105 TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV); 00106 TGnuPlot GP(OutFNm+"-VOL", TStr::Fmt("VOLUME. Seed node %d.", SeedNId, Desc.CStr())); 00107 GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5" 00108 GP.SetXYLabel("Rank", "Volume"); 00109 //GP.SetScale(gpsLog10X); 00110 GP.SavePng(); 00111 } 00112 00113 void TLocClust::PlotCutDistr(const TStr& OutFNm, TStr Desc) const { 00114 TFltPrV RankValV(CutV.Len(), 0); 00115 for (int i = 0; i < CutV.Len(); i++) { 00116 RankValV.Add(TFltPr(i+1, CutV[i].Val)); } 00117 if (Desc.Empty()) { Desc = OutFNm; } 00118 TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV); 00119 TGnuPlot GP(OutFNm+"-CUT", TStr::Fmt("CUT SIZE. Seed node %d.", SeedNId, Desc.CStr())); 00120 GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5" 00121 GP.SetXYLabel("Rank", "Cut size"); 00122 //GP.SetScale(gpsLog10X); 00123 GP.SavePng(); 00124 } 00125 00126 void TLocClust::PlotPhiDistr(const TStr& OutFNm, TStr Desc) const { 00127 TFltPrV RankValV(PhiV.Len(), 0); 00128 for (int i = 0; i < PhiV.Len(); i++) { 00129 RankValV.Add(TFltPr(i+1, PhiV[i])); } 00130 if (Desc.Empty()) { Desc = OutFNm; } 00131 TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV); 00132 TGnuPlot GP(OutFNm+"-PHI", TStr::Fmt("CONDUCTANCE (Cut size / Volume). Seed node %d.", SeedNId, Desc.CStr())); 00133 GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5" 00134 GP.SetXYLabel("Rank", "Conductance (Cut size / Volume)"); 00135 //GP.SetScale(gpsLog10X); 00136 GP.SavePng(); 00137 } 00138 00139 void TLocClust::SavePajek(const TStr& OutFNm) const { 00140 FILE *F = fopen(TStr::Fmt("%s.net", OutFNm.CStr()).CStr(), "wt"); 00141 TIntH NIdToIdH(Graph->GetNodes(), true); 00142 TIntH ClustSet(BestCut()+1); 00143 const int BucketSz = BestCutNodes()/ 4; 00144 TStrV ClrV = TStrV::GetV("Black", "Gray80", "Gray60", "Gray40", "Gray20", "RedViolet"); 00145 for (int a = 0; a < BestCutNodes(); a++) { 00146 ClustSet.AddDat(NIdV[a], (a+1)/BucketSz); 00147 } 00148 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00149 int i = 0; 00150 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00151 const int NId = NI.GetId(); 00152 if (NId == SeedNId) { 00153 fprintf(F, "%d \"%d\" diamond x_fact 2 y_fact 2 ic Green fos 10\n", i+1, NId); } 00154 else if (ClustSet.IsKey(NId)) { 00155 //fprintf(F, "%d \"%d\" box x_fact 1 y_fact 1 ic Red fos 10\n", i+1, NId); } 00156 fprintf(F, "%d \"%d\" box x_fact 2 y_fact 2 ic %s fos 10\n", i+1, NId, ClrV[ClustSet.GetDat(NId)].CStr()); } 00157 else { 00158 fprintf(F, "%d \"%d\" ellipse x_fact 2 y_fact 2 ic Blue fos 10\n", i+1, NId); } 00159 NIdToIdH.AddDat(NId, i+1); 00160 i++; 00161 } 00162 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); // arcs are directed, edges are undirected 00163 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00164 const int NId = NIdToIdH.GetDat(NI.GetId()); 00165 for (int e = 0; e < NI.GetOutDeg(); e++) { 00166 fprintf(F, "%d %d %g c Tan\n", NId, NIdToIdH.GetDat(NI.GetOutNId(e)).Val, 1.0); 00167 } 00168 } 00169 fclose(F); 00170 } 00171 00172 void TLocClust::DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN=10) { 00173 TCnComV CnComV; TSnap::Get1CnCom(Graph, CnComV); 00174 CnComV.Sort(false); 00175 // plot size distribution 00176 { TIntH SzCntH; 00177 for (int i = 0; i < CnComV.Len(); i++) { SzCntH.AddDat(CnComV[i].Len()) += 1; } 00178 TGnuPlot::PlotValCntH(SzCntH, "whiskSz."+FNmPref, TStr::Fmt("%s. G(%d, %d)", FNmPref.CStr(), Graph->GetNodes(), 00179 Graph->GetEdges()), "Whisker size (Maximal component connected with a bridge edge)", "Count", gpsLog10XY, false); } 00180 // draw them 00181 int BrNodeId = -1; 00182 for (int c = 0; c < TMath::Mn(CnComV.Len(), PlotN); c++) { 00183 const PUNGraph BrClust = TSnap::GetSubGraph(Graph, CnComV[c].NIdV); 00184 for (TUNGraph::TNodeI NI = BrClust->BegNI(); NI < BrClust->EndNI(); NI++) { 00185 if (NI.GetOutDeg() != Graph->GetNI(NI.GetId()).GetOutDeg()) { BrNodeId=NI.GetId(); break; } } 00186 TIntStrH ClrH; ClrH.AddDat(BrNodeId, "red"); 00187 TSnap::DrawGViz(BrClust, gvlNeato, TStr::Fmt("whisk-%s-%02d.gif", FNmPref.CStr(), c+1), 00188 TStr::Fmt("Bridge node id: %d, n=%d, e=%d", BrNodeId, BrClust->GetNodes(), BrClust->GetEdges()), false, ClrH); 00189 } 00190 } 00191 00192 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges) { 00193 TIntSet NIdSet(NIdV.Len()); 00194 for (int i = 0; i < NIdV.Len(); i++) { NIdSet.AddKey(NIdV[i]); } 00195 GetCutStat(Graph, NIdSet, Vol, Cut, Phi, GraphEdges); 00196 } 00197 00198 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges) { 00199 const int Edges2 = GraphEdges>=0 ? 2*GraphEdges : Graph->GetEdges(); 00200 Vol=0; Cut=0; Phi=0.0; 00201 for (int i = 0; i < NIdSet.Len(); i++) { 00202 if (! Graph->IsNode(NIdSet[i])) { continue; } 00203 TUNGraph::TNodeI NI = Graph->GetNI(NIdSet[i]); 00204 for (int e = 0; e < NI.GetOutDeg(); e++) { 00205 if (! NIdSet.IsKey(NI.GetOutNId(e))) { Cut += 1; } 00206 } 00207 Vol += NI.GetOutDeg(); 00208 } 00209 // get conductance 00210 if (Vol != Edges2) { 00211 if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); } 00212 else if (Vol == 0) { Phi = 0.0; } 00213 else { Phi = Cut / double(Vol); } 00214 } else { 00215 if (Vol == Edges2) { Phi = 1.0; } 00216 } 00217 } 00218 00219 void TLocClust::PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc, const bool& BagOfWhiskers, const bool& RewireNet, const int& KMin, const int& KMax, const int& Coverage, const bool& SaveTxtStat, const bool& PlotBoltzman) { 00220 const double Alpha = 0.001, KFac = 1.5, SizeFrac = 0.001; 00221 //const int KMin = 2, KMax = Mega(100), Coverage = 10; 00222 TLocClustStat ClusStat1(Alpha, KMin, KMax, KFac, Coverage, SizeFrac); 00223 ClusStat1.Run(Graph, false, PlotBoltzman, SaveTxtStat); 00224 if (BagOfWhiskers) { ClusStat1.AddBagOfWhiskers(); } 00225 TLocClustStat ClusStat2(Alpha, KMin, KMax, KFac, Coverage, SizeFrac); 00226 ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // plot before rewiring 00227 if (SaveTxtStat) { // for every piece size store modularity 00228 ClusStat1.SaveTxtInfo(FNm, Desc, false); 00229 } 00230 if (PlotBoltzman) { 00231 ClusStat1.PlotBoltzmanCurve(FNm, Desc); 00232 } 00233 if (RewireNet) { 00234 ClusStat2.Run(TSnap::GenRewire(Graph, 50, TInt::Rnd), false, false, false); 00235 if (BagOfWhiskers) { ClusStat2.AddBagOfWhiskers(); } 00236 ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // if rewire, plot again 00237 } 00238 } 00239 00241 // Local Clustering Statistics 00242 00243 double TLocClustStat::BinFactor = 1.01; 00244 00245 double TLocClustStat::TCutInfo::GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const { 00246 if (CutNIdV.Empty()) { 00247 IAssert(Nodes<100 || ! CutNIdV.Empty()); 00248 MxFrac=1; AvgFrac=1; MedianFrac=1; Pct9Frac=1; Flake=1; 00249 return 1; 00250 } 00251 TMom FracDegMom; 00252 TIntSet InNIdSet(CutNIdV.Len()); 00253 int NHalfIn=0; 00254 for (int i = 0; i < CutNIdV.Len(); i++) { 00255 InNIdSet.AddKey(CutNIdV[i]); } 00256 for (int n = 0; n < CutNIdV.Len(); n++) { 00257 const TUNGraph::TNodeI NI = Graph->GetNI(CutNIdV[n]); 00258 int EdgesOut = 0; 00259 for (int i = 0; i < NI.GetDeg(); i++) { 00260 if (! InNIdSet.IsKey(NI.GetNbrNId(i))) { EdgesOut++; } 00261 } 00262 const double FracOut = EdgesOut/double(NI.GetDeg()); 00263 if (FracOut <= 0.5) { NHalfIn++; } 00264 FracDegMom.Add(FracOut); 00265 } 00266 FracDegMom.Def(); 00267 MxFrac = FracDegMom.GetMx(); 00268 AvgFrac = FracDegMom.GetMean(); 00269 MedianFrac = FracDegMom.GetMedian(); 00270 Pct9Frac = FracDegMom.GetDecile(9); 00271 Flake = 1.0 - double(NHalfIn)/double(CutNIdV.Len()); 00272 return MxFrac; 00273 } 00274 00275 TLocClustStat::TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac, 00276 const int& _Coverage, const double& _SizeFrac) : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac), 00277 KMin(_KMin), KMax(_KMax), Coverage(_Coverage) { 00278 } 00279 00280 void TLocClustStat::Save(TSOut& SOut) const { 00281 // parameters 00282 Alpha.Save(SOut); 00283 SizeFrac.Save(SOut); 00284 KFac.Save(SOut); 00285 KMin.Save(SOut); 00286 KMax.Save(SOut); 00287 Coverage.Save(SOut); 00288 // cluster stat 00289 //SweepsV.Save(SOut); 00290 //SizePhiH.Save(SOut); 00291 } 00292 00293 void TLocClustStat::Clr() { 00294 SweepsV.Clr(false); // node ids and conductances for each run of local clustering 00295 SizePhiH.Clr(false); // conductances at cluster size K 00296 BestCutH.Clr(false); // best cut (min conductance) at size K 00297 BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters 00298 } 00299 00300 void TLocClustStat::Run(const PUNGraph& _Graph, const bool& SaveAllSweeps, const bool& SaveAllCond, const bool& SaveBestNodesAtK) { 00301 Graph = TSnap::GetMxWcc(_Graph); 00302 const int Nodes = Graph->GetNodes(); 00303 const int Edges = Graph->GetEdges(); 00304 printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges); 00305 printf(" Alpha: %g\n", Alpha()); 00306 printf(" K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1))); 00307 printf(" Coverage: %d\n", Coverage()); 00308 printf(" SizeFrac: %g\n\n", SizeFrac()); 00309 TExeTm TotTm; 00310 Clr(); 00311 TLocClust Clust(Graph, Alpha); 00312 BestCut.CutNIdV.Clr(false); 00313 BestCut.CutSz=-1; BestCut.Edges=-1; 00314 double BestPhi = TFlt::Mx; 00315 int prevK=-1; 00316 bool NextDone=false; 00317 if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet) 00318 SizeBucketSet.Clr(); 00319 double PrevBPos = 1, BPos = 1; 00320 while (BPos <= Mega(100)) { 00321 PrevBPos = (uint) floor(BPos); 00322 BPos *= BinFactor; 00323 if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; } 00324 SizeBucketSet.AddKey(int(floor(BPos) - 1)); 00325 } 00326 } 00327 for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) { 00328 if (K == prevK) { K++; } prevK = K; 00329 const int Runs = 2 + int(Coverage * floor(double(Graph->GetEdges()) / double(K))); 00330 printf("%d] K: %d, %d runs\n", cnt+1, K, Runs); 00331 if (NextDone) { break; } // done 00332 if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; } 00333 //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; } 00334 TExeTm ExeTm, IterTm; 00335 double MeanSz=0.0, MeanVol=0.0, Count=0.0; 00336 for (int run = 0; run < Runs; run++) { 00337 const int SeedNId = Graph->GetRndNId(); IterTm.Tick(); 00338 Clust.FindBestCut(SeedNId, K, SizeFrac); 00339 const int Sz = Clust.BestCutNodes(); 00340 const int Vol = Clust.GetCutVol(); 00341 const double Phi = TMath::Round(Clust.GetCutPhi(), 4); 00342 if (Sz == 0 || Vol == 0 || Phi == 0) { continue; } 00343 MeanSz+=Sz; MeanVol+=Vol; Count+= 1; 00344 if (SaveAllSweeps) { // save the full cut set and conductances for all trials 00345 SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); } 00346 int SAtBestPhi=-1; 00347 for (int s = 0; s < Clust.Len(); s++) { 00348 const int size = s+1; 00349 const int cut = Clust.GetCut(s); 00350 const int edges = (Clust.GetVol(s)-cut)/2; 00351 const double phi = Clust.GetPhi(s); 00352 if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges 00353 IAssert((Clust.GetVol(s) - cut) % 2 == 0); 00354 IAssert(phi == double(cut)/double(2*edges+cut)); 00355 IAssert(phi >= 1.0/double((1+s)*s+1)); 00357 // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV); 00358 //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0; 00359 //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); 00360 //const double phi = MxFrac; 00361 if (BestPhi >= phi) { 00362 BestPhi = phi; 00363 BestCut = TCutInfo(size, edges, cut); 00364 SAtBestPhi = s; 00365 } 00367 //bool TAKE=false; if (! BestCutH.IsKey(size)) { TAKE=true; } 00368 //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); if (MxFrac >= phi) { TAKE = true; } } 00369 // if (TAKE) { 00370 if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes) 00371 BestCutH.AddDat(size, TCutInfo(size, edges, cut)); // for every size store best cut (NIds inside the cut) 00372 if (SaveBestNodesAtK) { // store node ids in best community for each size k 00373 if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet 00374 Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); } 00375 } 00376 if (SaveAllCond) { // for every size store all conductances 00377 SizePhiH.AddDat(size).Add(phi); } 00378 } 00379 if (SAtBestPhi != -1) { // take nodes in best cluster 00380 const int size = SAtBestPhi+1; 00381 Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV); 00382 } 00383 if (TLocClust::Verbose) { 00384 printf("."); 00385 if (run % 50 == 0) { 00386 printf("\r %d / %d \r", run, Runs); } 00387 } 00388 } 00389 if (TLocClust::Verbose) { 00390 printf("\r %d / %d: %s \n", Runs, Runs, ExeTm.GetStr()); 00391 } 00392 MeanSz/=Count; MeanVol/=Count; 00393 printf(" Graph(%d, %d) ", Nodes, Edges); 00394 printf(" mean: sz: %.2f vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm()); 00395 } 00396 SizePhiH.SortByKey(); 00397 for (int k = 0; k < SizePhiH.Len(); k++) { 00398 SizePhiH[k].Sort(); } 00399 SweepsV.Sort(); 00400 BestCutH.SortByKey(); 00401 printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr()); 00402 } 00403 00404 void TLocClustStat::AddBagOfWhiskers() { 00405 BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact 00406 //BagOfWhiskers2(Graph, BagOfWhiskerV); 00407 } 00408 00409 // add a cut on NIdV nodes 00410 void TLocClustStat::AddCut(const TIntV& NIdV) { 00411 int Vol, Cut; double Phi; 00412 TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges()); 00413 SizePhiH.AddDat(NIdV.Len()).Add(Phi); 00414 } 00415 00416 // add a cut of particular conductance 00417 void TLocClustStat::AddCut(const int& ClustSz, const double& Phi) { 00418 SizePhiH.AddDat(ClustSz).Add(Phi); 00419 } 00420 00421 // add the cut on particular set of nodes (calculate conductance and modularity) 00422 void TLocClustStat::AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond) { 00423 const int K = NIdV.Len(); 00424 TCutInfo CutInfo(Graph, NIdV, true); 00425 printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), 00426 CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges())); 00427 if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo); return; } 00428 TCutInfo& CurCut = BestCutH.GetDat(K); 00429 // keep the cluster with best conductance 00430 if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; } 00431 // keep the cluster with best modularity 00432 //const int E = Graph->GetEdges(); 00433 //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; } 00434 } 00435 00436 const TLocClustStat::TCutInfo& TLocClustStat::FindBestCut(const int& Nodes) const { 00437 double bestPhi = 1; 00438 int CutId = -1; 00439 if (Nodes > 0) { 00440 IAssert(BestCutH.IsKey(Nodes)); 00441 return BestCutH.GetDat(Nodes); 00442 } else { 00443 for (int n = 0; n < BestCutH.Len(); n++) { 00444 if (BestCutH[n].GetPhi() <= bestPhi) { 00445 bestPhi = BestCutH[n].GetPhi(); CutId = n; } 00446 } 00447 IAssert(CutId != -1); 00448 IAssert(! BestCutH[CutId].CutNIdV.Empty()); 00449 return BestCutH[CutId]; 00450 } 00451 } 00452 00453 // find the cut with the lowest Phi of particular size (if Nodes=-1 find absolute best cut) 00454 double TLocClustStat::FindBestCut(const int& Nodes, TIntV& ClustNIdV) const { 00455 const TCutInfo& Cut = FindBestCut(Nodes); 00456 ClustNIdV = Cut.CutNIdV; 00457 return Cut.GetPhi(); 00458 } 00459 00460 int TLocClustStat::FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const { 00461 const TCutInfo& CutInfo = FindBestCut(Nodes); 00462 Vol = CutInfo.GetVol(); 00463 Cut = CutInfo.GetCutSz(); 00464 Phi = CutInfo.GetPhi(); 00465 return CutInfo.GetNodes(); 00466 } 00467 00468 // find best phi where the cut has N nodes, and the nodes in the cluster are not in the TabuSet 00469 double TLocClustStat::FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const { 00470 double bestPhi = 1; 00471 BestCutId = -1; 00472 bool Tabu; 00473 IAssert(! SweepsV.Empty()); 00474 for (int c = 0; c < SweepsV.Len(); c++) { 00475 const TNodeSweep& Sweep = SweepsV[c]; 00476 if (Sweep.Len() < Nodes) { continue; } 00477 if (Sweep.Phi(Nodes-1) > bestPhi) { continue; } 00478 Tabu = false; 00479 for (int i = 0; i < Nodes; i++) { 00480 if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; } 00481 } 00482 if (Tabu) { continue; } 00483 bestPhi = Sweep.Phi(Nodes-1); 00484 BestCutId = c; 00485 } 00486 return bestPhi; 00487 } 00488 00489 // get statistics over all conductances at all cluster sizes 00490 void TLocClustStat::GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const { 00491 TFltPrV BucketV; 00492 MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false); 00493 if (! SizePhiH.Empty()) { // stores conductances of all little clusters 00494 const THash<TInt, TFltV>& KvH = SizePhiH; 00495 for (int i = 0; i < KvH.Len(); i++) { 00496 const double X = KvH.GetKey(i).Val; IAssert(X >= 1.0); 00497 const TFltV& YVec = KvH[i]; 00498 TMom Mom; 00499 for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); } 00500 Mom.Def(); 00501 /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1); 00502 IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1); 00503 IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1); 00504 IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1); 00505 IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/ 00506 MeanV.Add(TFltPr(X, Mom.GetMean())); 00507 MedV.Add(TFltPr(X, Mom.GetMedian())); 00508 Dec1V.Add(TFltPr(X, Mom.GetDecile(1))); 00509 MinV.Add(TFltPr(X, Mom.GetMn())); 00510 MaxV.Add(TFltPr(X, Mom.GetMx())); 00511 } 00512 MeanV.Sort(); MedV.Sort(); Dec1V.Sort(); MinV.Sort(); MaxV.Sort(); 00513 // create log buckets (makes plots smaller and smoother) 00514 TLocClustStat::MakeExpBins(MeanV, BucketV); MeanV.Swap(BucketV); 00515 TLocClustStat::MakeExpBins(MedV, BucketV); MedV.Swap(BucketV); 00516 TLocClustStat::MakeExpBins(Dec1V, BucketV); Dec1V.Swap(BucketV); 00517 TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV); 00518 TLocClustStat::MakeExpBins(MaxV, BucketV); MaxV.Swap(BucketV); 00519 } else { 00520 //const int GEdges = Graph->GetEdges(); 00521 for (int i = 0; i < BestCutH.Len(); i++) { 00522 MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi())); 00523 } 00524 MinV.Sort(); 00525 TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV); 00526 } 00527 } 00528 00529 void TLocClustStat::GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const { 00530 IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters 00531 NcpV.Gen(TempV.Len()); 00532 TFltPrV BucketV; 00533 for (int t = 0; t < TempV.Len(); t++) { 00534 const double T = TempV[t]; 00535 for (int i = 0; i < SizePhiH.Len(); i++) { 00536 const double X = SizePhiH.GetKey(i).Val; IAssert(X >= 1.0); 00537 const TFltV& PhiV = SizePhiH[i]; 00538 double V = 0.0, SumW = 0.0; 00539 for (int j = 0; j < PhiV.Len(); j++) { 00540 V += PhiV[j] * exp(-PhiV[j]/T); 00541 SumW += exp(-PhiV[j]/T); 00542 } 00543 //V /= PhiV.Len(); 00544 V /= SumW; 00545 NcpV[t].Add(TFltPr(X, V)); 00546 } 00547 TLocClustStat::MakeExpBins(NcpV[t], BucketV); NcpV[t].Swap(BucketV); 00548 // normalize to start at (1,1) 00549 //for (int i = 0; i < NcpV[t].Len(); i++) { 00550 // NcpV[t][i].Val2 /= NcpV[t][0].Val2; 00551 //} 00552 } 00553 } 00554 00555 TStr TLocClustStat::ParamStr() const { 00556 if (Graph.Empty()) { 00557 return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); } 00558 else { 00559 return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(), 00560 Graph->GetNodes(), Graph->GetEdges()); 00561 } 00562 } 00563 00564 void TLocClustStat::PlotNCP(const TStr& OutFNm, TStr Desc) const { 00565 if (Desc.Empty()) { Desc = OutFNm; } 00566 TFltPrV MeanV, MedV, Dec1V, MinV, MaxV; 00567 GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV); 00568 TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00569 if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); } 00570 if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); } 00571 if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); } 00572 if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); } 00573 if (! MinV.Empty()) { 00574 GP.AddPlot((MinV), gpwLines, "MIN"); 00575 //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier"); 00576 } 00577 if (! BagOfWhiskerV.Empty()) { 00578 GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1"); 00579 TFltPrV BestWhiskV; BestWhiskV.Add(TFltPr(BestWhisk)); 00580 GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2"); 00581 } 00582 GP.SetScale(gpsLog10XY); 00583 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00584 GP.SetXRange(1, Graph->GetNodes()/2); 00585 GP.SavePng(); 00586 } 00587 00588 // NCP but with modularity on Y-axis 00589 void TLocClustStat::PlotNCPModul(const TStr& OutFNm, TStr Desc) const { 00590 if (Desc.Empty()) { Desc = OutFNm; } 00591 TFltPrV MinV, BucketV; 00592 const int GEdges = Graph->GetEdges(); 00593 for (int i = 0; i < BestCutH.Len(); i++) { 00594 MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); } 00595 MinV.Sort(); 00596 TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV); 00597 TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00598 if (! MinV.Empty()) { 00599 GP.AddPlot((MinV), gpwLines, "MIN"); } 00600 GP.SetScale(gpsLog10XY); 00601 GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)"); 00602 GP.SetXRange(1, Graph->GetNodes()/2); 00603 GP.SavePng(); 00604 } 00605 00606 // plot top 10 surface/volume curves (disjont clusters of particular size) 00607 void TLocClustStat::PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const { 00608 if (Desc.Empty()) { Desc = OutFNm; } 00609 const double BinFactor = 1.05; 00610 double BinPos=1; 00611 int PrevBPos=1, CurBPos=1, CutId; 00612 bool AddSome; 00613 TVec<TFltPrV> Curves(TakeMinN); 00614 while (true) { 00615 PrevBPos = CurBPos; 00616 BinPos *= BinFactor; // do exponential binning 00617 CurBPos = (int) floor(BinPos); 00618 if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1; BinPos=CurBPos; } 00619 const int Nodes = CurBPos; 00620 TIntSet TabuNIdSet(Graph->GetNodes()); 00621 AddSome = false; 00622 for (int t = 0; t < TakeMinN; t++) { 00623 const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId); 00624 if (CutId == -1) { break; } 00625 Curves[t].Add(TFltPr(Nodes, Phi)); 00626 for (int n = 0; n < Nodes; n++) { 00627 TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); } 00628 AddSome = true; 00629 } 00630 if (! AddSome) { break; } 00631 } 00632 TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr())); 00633 for (int i = 0; i < Curves.Len(); i++) { 00634 GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); } 00635 //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); } 00636 GP.SetScale(gpsLog10XY); 00637 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00638 GP.SetXRange(1, Graph->GetNodes()/2); 00639 GP.SavePng(); 00640 } 00641 00642 // plot conductance on the boundary / counductance inside vs. cluster size 00643 void TLocClustStat::PlotPhiInOut(const TStr& OutFNm, TStr Desc) const { 00644 IAssert(! BestCutH.Empty() && ! Graph.Empty()); 00645 TFltPrV PhiInV, PhiBoundV, PhiRatV; 00646 FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt"); 00647 fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n"); 00648 TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac); 00649 const double IdxFact = 1.05; 00650 int curIdx=1, prevIdx=1; 00651 while (curIdx <= BestCutH.Len()) { 00652 const TCutInfo& CutInfo = BestCutH[curIdx-1]; 00653 if (CutInfo.GetNodes() > 1) { 00654 PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV); 00655 ClustStat2.Run(ClustG); 00656 const TCutInfo& InCut = ClustStat2.FindBestCut(-1); 00657 PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi())); 00658 PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi())); 00659 PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi())); 00660 fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(), 00661 CutInfo.GetCutSz(), CutInfo.GetPhi(), InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi()); 00662 fflush(F); 00663 } 00664 prevIdx = curIdx; 00665 curIdx = (int) TMath::Round(double(curIdx)*IdxFact); 00666 if (prevIdx == curIdx) { curIdx++; } 00667 } 00668 fclose(F); 00669 { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00670 GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1"); 00671 GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1"); 00672 //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1"); 00673 GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY); 00674 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00675 //GP.AddCmd("set logscale xyy2 10"); 00676 //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\""); 00677 GP.SavePng(); } 00678 //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr()); 00679 //GP.RunGnuPlot(); 00680 { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00681 GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1"); 00682 GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY); 00683 GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better"); 00684 GP.SavePng(); } 00685 } 00686 00687 // Plot edges inside, edges cut and conductance. 00688 // run: replace_all.py cutEdges.*.plt "title \"Conductance" "axis x1y2 title \"Conductance" 00689 void TLocClustStat::PlotBestClustDens(TStr OutFNm, TStr Desc) const { 00690 if (Desc.Empty()) { Desc = OutFNm; } 00691 const int len = BestCutH.Len(); 00692 TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0); 00693 for (int i = 0; i < BestCutH.Len(); i++) { 00694 const double size = BestCutH.GetKey(i).Val; 00695 CutV.Add(TFltPr(size, BestCutH[i].GetCutSz())); 00696 EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges())); 00697 PhiV.Add(TFltPr(size, BestCutH[i].GetPhi())); 00698 } 00699 TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00700 TFltPrV NewV; TLocClustStat::MakeExpBins(EdgesV, NewV); 00701 GP.AddPlot(NewV, gpwLines, "Edges inside"); 00702 TLocClustStat::MakeExpBins(CutV, NewV); 00703 GP.AddPlot(NewV, gpwLines, "Cut edges"); 00704 TLocClustStat::MakeExpBins(PhiV, NewV); 00705 GP.AddPlot(NewV, gpwLines, "Conductance"); 00706 GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY); 00707 GP.AddCmd("set logscale xyy2 10"); 00708 GP.AddCmd("set y2label \"Conductance\""); 00709 GP.SavePng(); 00710 system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr()); 00711 GP.RunGnuPlot(); 00712 } 00713 00714 // all different conducances of all sizes (not just lower envelope) 00715 void TLocClustStat::PlotNCPScatter(const TStr& OutFNm, TStr Desc) const { 00716 if (Desc.Empty()) { Desc = OutFNm; } 00717 THashSet<TFltPr> PhiSzH; 00718 IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()"); 00719 for (int k = 0; k < SizePhiH.Len(); k++) { 00720 const int K = SizePhiH.GetKey(k); 00721 const TFltV& PhiV = SizePhiH[k]; 00722 for (int p = 0; p < PhiV.Len(); p++) { 00723 PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); } 00724 } 00725 TFltPrV PhiSzV; PhiSzH.GetKeyV(PhiSzV); 00726 TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00727 GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2"); 00728 GP.SetScale(gpsLog10XY); 00729 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00730 GP.SetXRange(1, Graph->GetNodes()/2); 00731 GP.SavePng(); 00732 } 00733 00734 // histogram of conductances for a fixed CmtySz 00735 void TLocClustStat::PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc) const { 00736 IAssert(! SizePhiH.Empty()); 00737 const TFltV& PhiV = SizePhiH.GetDat(CmtySz); 00738 THash<TFlt, TInt> PhiCntH; 00739 for (int i = 0; i < PhiV.Len(); i++) { 00740 const double Buck = TMath::Round(PhiV[i], 3); 00741 PhiCntH.AddDat(Buck) += 1; 00742 } 00743 TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()), 00744 TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)", "Count"); 00745 } 00746 00747 void TLocClustStat::PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc) const { 00748 TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1; 00749 GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1); 00750 TVec<TFltPrV> NcpV; 00751 //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000); 00752 const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1); 00753 GetBoltzmanCurveStat(TempV, NcpV); 00754 TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00755 GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); 00756 GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1"); 00757 GP.AddPlot(MedV1, gpwLines, "Median", "lw 1"); 00758 GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1"); 00759 for (int t = 0; t < TempV.Len(); t++) { 00760 GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1"); 00761 } 00762 GP.SetScale(gpsLog10XY); 00763 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00764 GP.SavePng(); 00765 // 00766 TFltPrV SzNClustV; 00767 int kCnt=1; 00768 for (int i = 0; i < SizePhiH.Len(); i++) { 00769 const int K = SizePhiH.GetKey(i); 00770 const TFltV& PhiV = SizePhiH[i]; 00771 SzNClustV.Add(TFltPr(K, PhiV.Len())); 00772 if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) { 00773 THash<TFlt, TFlt> CntH; 00774 for (int p = 0; p < PhiV.Len(); p++) { 00775 CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1; 00776 } 00777 TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s", 00778 Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)", 00779 TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len())); 00780 } 00781 } 00782 TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), 00783 "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog); 00784 } 00785 00786 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const { 00787 if (Desc.Empty()) { Desc = OutFNm; } 00788 TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1; 00789 TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2; 00790 GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1); 00791 LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2); 00792 // plot 00793 TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00794 if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); } 00795 if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); } 00796 if (! BagOfWhiskerV.Empty()) { 00797 GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 00798 TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk); 00799 GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); } 00800 if (! LcStat2.BagOfWhiskerV.Empty()) { 00801 GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 00802 TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk); 00803 GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); } 00804 GP.SetScale(gpsLog10XY); 00805 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00806 //GP.SetXRange(1, Graph->GetNodes()/2); 00807 GP.SavePng(); 00808 } 00809 00810 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const { 00811 if (Desc.Empty()) { Desc = OutFNm; } 00812 TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1; 00813 TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2; 00814 TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3; 00815 GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1); 00816 LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2); 00817 LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3); 00818 // plot 00819 TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); 00820 if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); } 00821 if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); } 00822 if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); } 00823 if (! BagOfWhiskerV.Empty()) { 00824 GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 00825 TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk); 00826 GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); } 00827 if (! LcStat2.BagOfWhiskerV.Empty()) { 00828 GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 00829 TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk); 00830 GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); } 00831 if (! LcStat3.BagOfWhiskerV.Empty()) { 00832 GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1"); 00833 TFltPrV BestWhiskV; BestWhiskV.Add(LcStat3.BestWhisk); 00834 GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); } 00835 GP.SetScale(gpsLog10XY); 00836 GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); 00837 GP.SetXRange(1, Graph->GetNodes()/2); 00838 GP.SavePng(); 00839 } 00840 00841 void TLocClustStat::SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const { 00842 printf("Save text info..."); 00843 TExeTm ExeTm; 00844 const int GNodes = Graph->GetNodes(); 00845 const int GEdges = Graph->GetEdges(); 00846 TVec<TFltV> ColV(17); 00847 double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0; 00848 // double PrevBPos = 1, BPos = 1; 00849 for (int i = 0; i < SizeBucketSet.Len(); i++) { 00850 if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; } 00851 const TLocClustStat::TCutInfo& C = GetKNodeCut(SizeBucketSet[i]); 00852 C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); 00853 ColV[0].Add(C.Nodes()); ColV[1].Add(C.Edges()); 00854 ColV[2].Add(C.CutSz()); ColV[3].Add(C.GetPhi()); 00855 ColV[4].Add(C.GetExpansion()); ColV[5].Add(C.GetIntDens()); 00856 ColV[6].Add(C.GetCutRatio(GNodes)); ColV[7].Add(C.GetNormCut(GEdges)); 00857 ColV[8].Add(MxFrac); ColV[9].Add(AvgFrac); 00858 ColV[10].Add(MedianFrac); ColV[11].Add(Pct9Frac); ColV[12].Add(Flake); 00859 ColV[13].Add(double(2.0*C.Edges)); ColV[14].Add(C.GetExpEdgesIn(GEdges)); 00860 ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges)); 00861 printf("."); 00862 } 00863 // normalize so that maximum value is at 1 00864 if (SetMaxAt1) { 00865 for (int c = 1; c < ColV.Len(); c++) { 00866 double MaxVal=1e-10; 00867 for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); } 00868 for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; } 00869 } 00870 } 00871 // save 00872 const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr()); 00873 FILE *F = fopen(DatFNm.CStr(), "wt"); 00874 fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr()); 00875 fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n"); 00876 for (int r = 0; r < ColV[0].Len(); r++) { 00877 fprintf(F, "%g", ColV[0][r]()); 00878 for (int c = 1; c < ColV.Len(); c++) { 00879 fprintf(F, "\t%g", ColV[c][r]()); } 00880 fprintf(F, "\n"); 00881 } 00882 fclose(F); 00883 printf("[%s]\n", ExeTm.GetStr()); 00884 TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s", 00885 Desc.CStr(), ParamStr().CStr())); 00886 GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2"); 00887 GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3"); 00888 GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8"); 00889 GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6"); 00890 GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7"); 00891 GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9"); 00892 GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11"); 00893 GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13"); 00894 GP.SetScale(gpsLog10XY); 00895 GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)"); 00896 GP.SavePng(); 00897 } 00898 00899 // conductances if clusters are composed of disjoint pieces that can be separated 00900 // from the graph by a single edge 00901 void TLocClustStat::BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& MaxWhisk) { 00902 TCnComV Cn1ComV; 00903 TSnap::Get1CnCom(Graph, Cn1ComV); 00904 TIntPrV SzVolV; 00905 int MxSize=0; 00906 if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; } 00907 // Graph->SaveEdgeList("g-vol.txt"); TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif"); Fail; } IAssert(vol <= sz*(sz-1)); 00908 printf(" 1-connected components: %d\n", Cn1ComV.Len()); 00909 MaxWhisk = TFltPr(1,1); 00910 for (int c = 0; c < Cn1ComV.Len(); c++) { 00911 const TIntV& NIdV = Cn1ComV[c].NIdV; 00912 const int sz = NIdV.Len(); 00913 if (sz < 2) { continue; } 00914 int vol = 0; // volume is the size of degrees 00915 for (int n = 0; n < sz; n++) { 00916 vol += Graph->GetNI(NIdV[n]).GetOutDeg(); } 00917 SzVolV.Add(TIntPr(sz, vol)); 00918 MxSize += sz; 00919 if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); } 00920 } 00921 SzVolV.Sort(false); 00922 // compose clusters 00923 THash<TInt, TIntSet> ItemSetH(MxSize, true); 00924 THash<TInt, TInt> VolH(MxSize, true); 00925 THash<TInt, TFlt> CostH(MxSize, true); 00926 ItemSetH.AddKey(0); VolH.AddKey(0); 00927 TExeTm ExeTm; 00928 // exact up to size 1000 00929 for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) { 00930 for (int item = 0; item <SzVolV.Len(); item++) { 00931 const int smallSz = size-SzVolV[item].Val1; 00932 if (ItemSetH.IsKey(smallSz)) { 00933 const TIntSet& SmallSet = ItemSetH.GetDat(smallSz); 00934 if (SmallSet.IsKey(item)) { continue; } 00935 const int SmallVol = VolH.GetDat(smallSz); 00936 // combine smaller nad new solution 00937 const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10); 00938 const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2); 00939 if (curCost < newCost) { continue; } 00940 VolH.AddDat(size, SmallVol+SzVolV[item].Val2); 00941 ItemSetH.AddDat(size, SmallSet); 00942 ItemSetH.GetDat(size).AddKey(item); 00943 CostH.AddDat(size, newCost); 00944 } 00945 } 00946 if (VolH.IsKey(size) && size%100==0) { 00947 printf("\rSize: %d/%d: vol: %d, items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val, 00948 ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr()); 00949 } 00950 } 00951 // add sizes larger than 1000 00952 printf("\nAdding sizes > 1000 nodes..."); 00953 int partSz=0; double partVol=0.0; 00954 for (int i = 0; i < SzVolV.Len(); i++) { 00955 partSz += SzVolV[i].Val1(); 00956 partVol += SzVolV[i].Val2(); 00957 if (partSz < 1000) { continue; } 00958 const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10); 00959 const double partPhi = double(i+1)/partVol; 00960 if (partPhi < curCost) { 00961 CostH.AddDat(partSz, partPhi); 00962 } 00963 } 00964 VolH.SortByKey(); 00965 CostH.SortByKey(); 00966 SizePhiV.Gen(CostH.Len(), 0); 00967 SizePhiV.Add(TFltPr(1, 1)); 00968 for (int s = 0; s < CostH.Len(); s++) { 00969 const int size = CostH.GetKey(s); 00970 const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]); 00971 SizePhiV.Add(TFltPr(size, cond)); 00972 } 00973 printf("done\n"); 00974 } 00975 00976 // faster greedy version 00977 void TLocClustStat::BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV) { 00978 TCnComV Cn1ComV; 00979 TSnap::Get1CnCom(Graph, Cn1ComV); 00980 TIntPrV SzVolV; 00981 int MxSize=0, TotVol=0; 00982 if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; } 00983 printf(" 1-connected components: %d\n", Cn1ComV.Len()); 00984 for (int c = 0; c < Cn1ComV.Len(); c++) { 00985 const TIntV& NIdV = Cn1ComV[c].NIdV; 00986 const int sz = NIdV.Len(); 00987 if (sz < 2) { continue; } 00988 int vol = 0; // volume is the size of degrees 00989 for (int n = 0; n < sz; n++) { 00990 vol += Graph->GetNI(NIdV[n]).GetOutDeg(); } 00991 SzVolV.Add(TIntPr(sz, vol)); 00992 MxSize += sz; TotVol += vol; 00993 } 00994 printf(" Total size: %d\t Total vol: %d\n", MxSize, TotVol); 00995 SzVolV.Sort(false); 00996 // compose clusters 00997 THash<TFlt, TFlt> SizePhiH(MxSize, true); 00998 for (int i = 0; i < SzVolV.Len(); i++) { 00999 const int Sz = SzVolV[i].Val1(); 01000 const double Phi = 1.0/double(SzVolV[i].Val2()); 01001 if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) { 01002 SizePhiH.AddDat(Sz, Phi); } 01003 } 01004 double partSz=0.0, partVol=0.0; 01005 for (int i = 0; i < SzVolV.Len(); i++) { 01006 partSz += SzVolV[i].Val1(); 01007 partVol += SzVolV[i].Val2(); 01008 const double partPhi = double(i+1)/partVol; 01009 if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) { 01010 SizePhiH.AddDat(partSz, partPhi); } 01011 } 01012 SizePhiV.Gen(SizePhiH.Len()+1, 0); 01013 SizePhiV.Add(TFltPr(1, 1)); 01014 SizePhiH.SortByKey(); 01015 for (int s = 0; s < SizePhiH.Len(); s++) { 01016 SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s])); 01017 } 01018 } 01019 01020 void TLocClustStat::MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV) { 01021 if (ValV.Empty()) { NewV.Clr(false); return; } 01022 NewV.Gen(1000, 0); 01023 double PrevBPos = 1, BPos = 1; 01024 int i = 0; 01025 while (BPos <= ValV.Last().Val1) { 01026 //if (TakeValAt == 1) { 01027 // while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; } 01028 // NewV.Add(ValV[i-1]); } 01029 //else if (TakeValAt == 2) { 01030 int MinI=-1; double MinCnt=TFlt::Mx; 01031 while (i < ValV.Len() && ValV[i].Val1 <= BPos) { 01032 if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; } 01033 if (MinI>=0 && MinI<ValV.Len()) { 01034 NewV.Add(ValV[MinI]); } 01035 PrevBPos = (uint) floor(BPos); 01036 BPos *= BinFactor; 01037 if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; } 01038 } 01039 NewV.Add(ValV.Last()); 01040 } 01041 01042 void TLocClustStat::MakeExpBins(const TFltV& ValV, TFltV& NewV) { 01043 if (ValV.Empty()) { NewV.Clr(false); return; } 01044 NewV.Gen(1000, 0); 01045 double PrevBPos = 1, BPos = 1; 01046 int i = 1; 01047 NewV.Add(ValV[0]); 01048 while (BPos <= ValV.Len()) { 01049 int MinI=-1; double MinCnt=TFlt::Mx; 01050 while (i < ValV.Len() && i <= BPos) { 01051 if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; } 01052 if (MinI>=0 && MinI<ValV.Len()) { 01053 NewV.Add(ValV[MinI]); } 01054 PrevBPos = (uint) floor(BPos); 01055 BPos *= BinFactor; 01056 if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; } 01057 } 01058 NewV.Add(ValV.Last()); 01059 } 01060 01061 01063 // Local clustering for a set of graphs (loads ncp-*.tab files) 01064 TNcpGraphsBase::TNcpGraphsBase(const TStr& FNmWc) { 01065 TStr FNm; 01066 for (TFFile FFile(FNmWc); FFile.Next(FNm); ) { 01067 TSsParser Ss(FNm, ssfTabSep, true, false); 01068 int TrueNcpId=-1, WhiskId=-1, RewBestWhiskId=-1, RewId=-1, BestWhiskId=-1; 01069 while (Ss.Next()) { 01070 for (int f = 0; f < Ss.GetFlds(); f++) { 01071 // load ForestFire parameter (fwd burn prob) 01072 if (strstr(Ss[f], "FWD:")) { 01073 TStr S(Ss[f]); const int x = S.SearchStr("FWD:"); 01074 ParamValV.Add(S.GetSubStr(x+4, S.SearchCh(' ', x+1)-1).GetFlt()); 01075 } 01076 // extract column names 01077 if (strstr(Ss[f], "ORIGINAL MIN")!=NULL) { 01078 GNmV.Add(TStr::Fmt("%s %s", FNm.GetSubStr(FNm.SearchCh('.')+1, FNm.SearchChBack('.')-1).CStr(), strchr(Ss[f], '('))); 01079 int Nodes=0,Edges=0; sscanf(strchr(Ss[f], '(')+1, "%d,%d)", &Nodes, &Edges); 01080 GSizeV.Add(TIntPr(Nodes, Edges)); 01081 printf("%s: %d %d\n", GNmV.Last().CStr(), Nodes, Edges); 01082 TrueNcpId=f; 01083 } 01084 if (strstr(Ss[f], "ORIGINAL whisker")!=NULL || strstr(Ss[f], "TRUE whisker")!=NULL) { WhiskId=f; } 01085 if (strstr(Ss[f], "ORIGINAL Best whisker")!=NULL || strstr(Ss[f], "TRUE Best whisker")!=NULL) { BestWhiskId=f; } 01086 if (strstr(Ss[f], "REWIRED MIN")!=NULL || strstr(Ss[f], "RAND MIN")!=NULL) { RewId=f; } 01087 if (strstr(Ss[f], "REWIRED Best whisker")!=NULL || strstr(Ss[f], "RAND Best whisker")!=NULL) { RewBestWhiskId=f; } 01088 } 01089 if (TrueNcpId!=-1 || WhiskId!=-1) { break; } 01090 } 01091 if (TrueNcpId < 0) { printf("%s\n", FNm.GetFMid().CStr()); break; } 01092 if (BestWhiskId < 0) { WhiskerV.Add(TFltPr(1,1)); } 01093 if (RewBestWhiskId < 0) { RewWhiskerV.Add(TFltPr(1,1)); } 01094 NcpV.Add(); WhiskNcpV.Add(); RewNcpV.Add(); 01095 TFltPrV& Ncp = NcpV.Last(); 01096 TFltPrV& WhiskNcp = WhiskNcpV.Last(); 01097 TFltPrV& RewNcp = RewNcpV.Last(); 01098 bool Once=false, Once2=false; 01099 while (Ss.Next()) { 01100 if (TrueNcpId < Ss.GetFlds()&& Ss.IsFlt(TrueNcpId)) { Ncp.Add(TFltPr(Ss.GetFlt(TrueNcpId-1), Ss.GetFlt(TrueNcpId))); } 01101 if (WhiskId>=0 && WhiskId < Ss.GetFlds() && Ss.IsFlt(WhiskId)) { WhiskNcp.Add(TFltPr(Ss.GetFlt(WhiskId-1), Ss.GetFlt(WhiskId))); } 01102 if (RewId >=0 && RewId < Ss.GetFlds()&& Ss.IsFlt(RewId)) { RewNcp.Add(TFltPr(Ss.GetFlt(RewId-1), Ss.GetFlt(RewId))); } 01103 if (BestWhiskId>=0 && BestWhiskId < Ss.GetFlds() && ! Once) { Once=true; 01104 int W2=BestWhiskId-1; while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; } 01105 WhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(BestWhiskId))); } 01106 if (RewBestWhiskId>=0 && RewBestWhiskId < Ss.GetFlds() && ! Once2) { Once2=true; 01107 int W2=RewBestWhiskId-1; while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; } 01108 RewWhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(RewBestWhiskId))); } 01109 } 01110 printf(" ncp:%d whisk:%d rew:%d\n", NcpV.Last().Len(), WhiskNcpV.Last().Len(), RewNcpV.Last().Len()); 01111 } 01112 IAssert(NcpV.Len() == GSizeV.Len()); 01113 } 01114 TNcpGraphsBase::TNcpGraphsBase(TSIn& SIn) : GNmV(SIn), GSizeV(SIn), WhiskerV(SIn), 01115 RewWhiskerV(SIn),NcpV(SIn), RewNcpV(SIn),WhiskNcpV(SIn) { 01116 } 01117 01118 void TNcpGraphsBase::Save(TSOut& SOut) const { 01119 GNmV.Save(SOut); GSizeV.Save(SOut); 01120 WhiskerV.Save(SOut); RewWhiskerV.Save(SOut); NcpV.Save(SOut); 01121 RewNcpV.Save(SOut); WhiskNcpV.Save(SOut); 01122 } 01123 01124 void TNcpGraphsBase::Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth) { 01125 TGnuPlot GP(OutFNm); 01126 for (int i = 0; i < TMath::Mn(NcpV.Len(), TopN); i++) { 01127 GP.AddPlot(NcpV[i], gpwLines, GNmV[i], Smooth?"smooth csplines":""); 01128 } 01129 GP.SetScale(gpsLog10XY); 01130 GP.SavePng(); 01131 } 01132 01133 double TNcpGraphsBase::GetXAtMinY(const TFltPrV& Ncp, const int& NNodes) { 01134 double MinX1=1, MinY1=1; 01135 for (int k = 0; k < Ncp.Len(); k++) { 01136 if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } } 01137 return MinX1<1 ? 1 : MinX1; 01138 //if (NNodes < 1000) return MinX1; 01139 // smooth 01140 /*const int WndSize = 50; 01141 double MinX=1, MinY=1; 01142 TFltPrV Ncp2V; 01143 for (int k = 0; k < Ncp.Len(); k++) { 01144 int WndSz = k > WndSize ? WndSize : k; 01145 double SmoothVal=0.0, SmoothCnt=0; 01146 for (int i = -WndSz; i <= WndSz; i++) { 01147 if (k+i > -1 && k+i < Ncp.Len()) { SmoothCnt+=pow(1.1, -abs(i)); 01148 SmoothVal+=pow(1.1, -abs(i)) * Ncp[k+i].Val2; } 01149 } 01150 SmoothVal = SmoothVal/SmoothCnt; 01151 Ncp2V.Add(TFltPr(Ncp[k].Val1, SmoothVal)); 01152 if (SmoothVal<MinY) { MinX=Ncp[k].Val1; MinY=SmoothVal; } 01153 } 01154 static int cnt = 0; 01155 if (Ncp2V.Len() > 10 && cnt < 10) { 01156 TGnuPlot GP(TStr::Fmt("test-%03d", ++cnt)); 01157 GP.SetScale(gpsLog10XY); 01158 GP.AddPlot(Ncp, gpwLines, "true"); 01159 GP.AddPlot(Ncp2V, gpwLines, "smooth"); 01160 GP.SavePng(); 01161 } 01162 if (MinX < 1) { return 1; } else if (MinX > 1000) { return 1000; } 01163 return MinX;*/ 01164 } 01165 01166 TFltPr TNcpGraphsBase::GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes) { 01167 double MinX1=1, MinY1=1; 01168 for (int k = 0; k < Ncp.Len(); k++) { 01169 if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } } 01170 return TFltPr(MinX1<1?1:MinX1, MinY1); 01171 } 01172 01173 void TNcpGraphsBase::PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN) { 01174 TFltPrV GSzMinK, GSzMinY; 01175 for (int i = 0; i < NcpV.Len(); i++) { 01176 const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1); 01177 const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1(); 01178 GSzMinK.Add(TFltPr(X, XYAtMinY.Val1)); 01179 GSzMinY.Add(TFltPr(X, XYAtMinY.Val2)); 01180 } 01181 GSzMinK.Sort(); GSzMinY.Sort(); 01182 const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size"; 01183 TGnuPlot::PlotValV(GSzMinK, TStr("bestK-")+OutFNm, "Network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01184 TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestK-")+OutFNm, "Network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01185 } 01186 01187 void TNcpGraphsBase::SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN) { 01188 TVec<TQuad<TInt, TInt, TFlt, TStr> > GSzMinK; 01189 for (int i = 0; i < NcpV.Len(); i++) { 01190 const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1); 01191 const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1(); 01192 GSzMinK.Add(TQuad<TInt, TInt, TFlt, TStr>((int)X, (int)XYAtMinY.Val1(), XYAtMinY.Val2, GNmV[i])); 01193 } 01194 GSzMinK.Sort(); 01195 FILE *F = fopen(TStr::Fmt("bestK-%s.txt", OutFNm.CStr()).CStr(), "wt"); 01196 fprintf(F, "#nodes\tbestK\tcondAtBestK\tgraph name\n"); 01197 for (int i = 0; i < GSzMinK.Len(); i++) { 01198 fprintf(F, "%d\t%d\t%f\t%s\n", GSzMinK[i].Val1(), GSzMinK[i].Val2(), GSzMinK[i].Val3(), GSzMinK[i].Val4.CStr()); 01199 } 01200 fclose(F); 01201 } 01202 01203 void TNcpGraphsBase::PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN) { 01204 TFltPrV GSzMinK, GSzMinY; 01205 for (int i = 0; i < NcpV.Len(); i++) { 01206 const TFltPr XYAtMinY = GetXYAtMinY(RewNcpV[i], GSizeV[i].Val1); 01207 const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1(); 01208 GSzMinK.Add(TFltPr(X, XYAtMinY.Val1)); 01209 GSzMinY.Add(TFltPr(X, XYAtMinY.Val2)); 01210 } 01211 GSzMinK.Sort(); GSzMinY.Sort(); 01212 const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size"; 01213 TGnuPlot::PlotValV(GSzMinK, TStr("bestR-")+OutFNm, "Rewired network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01214 TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestR-")+OutFNm, "Rewired network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01215 } 01216 01217 void TNcpGraphsBase::PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN) { 01218 TFltPrV GSzMinK, GSzMinY; 01219 for (int i = 0; i < GSizeV.Len(); i++) { 01220 if (WhiskerV[i].Val1()>0) { 01221 const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1(); 01222 GSzMinK.Add(TFltPr(X, WhiskerV[i].Val1())); 01223 GSzMinY.Add(TFltPr(X, WhiskerV[i].Val2())); 01224 } 01225 } 01226 GSzMinK.Sort(); GSzMinY.Sort(); 01227 const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size"; 01228 TGnuPlot::PlotValV(GSzMinK, TStr("bestW-")+OutFNm, "Network", XLabel, "size of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01229 TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestW-")+OutFNm, "Network", XLabel, "conductance of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01230 } 01231 01232 void TNcpGraphsBase::PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN) { 01233 TFltPrV GSzMinK, GSzMinY; 01234 for (int i = 0; i < GSizeV.Len(); i++) { 01235 if (WhiskerV[i].Val1()>0) { 01236 const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1(); 01237 GSzMinK.Add(TFltPr(X, RewWhiskerV[i].Val1())); 01238 GSzMinY.Add(TFltPr(X, RewWhiskerV[i].Val2())); 01239 } 01240 } 01241 GSzMinK.Sort(); GSzMinY.Sort(); 01242 const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size"; 01243 TGnuPlot::PlotValV(GSzMinK, TStr("bestWR-")+OutFNm, "Rewired network", XLabel, "size of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01244 TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestWR-")+OutFNm, "Rewired network", XLabel, "conductance of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints); 01245 } 01246 01247 void TNcpGraphsBase::PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY) { 01248 THash<TFlt, TMom> MomH; 01249 int Cnt=0; 01250 for (int i = 0; i < NcpVec.Len(); i++) { 01251 if (GSizeV[i].Val1 < MinSz) { continue; } 01252 const TFltPrV& Ncp = NcpVec[i]; 01253 double MinX=1, MinY=1; 01254 for (int k = 0; k < Ncp.Len(); k++){ 01255 if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } } 01256 if (MinY > MaxMinY) { continue; } Cnt++; 01257 //const double Coef = (1-0.0001)/(1.0-MinY); 01258 for (int k = 0; k < Ncp.Len(); k++){ 01259 //MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 2)),2)).Add(0.0001+(Ncp[k].Val2-MinY)*Coef); 01260 MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 1)),0)).Add(Ncp[k].Val2); 01261 } 01262 } 01263 TGnuPlot::PlotValMomH(MomH, OutFNm, "", "size of the cluster, k", "phi(k)", gpsLog, gpwLines, true, true,true,true); 01264 printf(" minSz: %d, miny %g\t%d\n", MinSz, MaxMinY, Cnt); 01265 } 01266 01267 void TNcpGraphsBase::SaveTxt(const TStr& OutFNm) { 01268 FILE *F=fopen(OutFNm.CStr(), "wt"); 01269 fprintf(F, "#Nodes\tEdges\tBestK\tPhi(BestK)\tMaxWhiskN\tPhi(MaxWhisk)\tGraph\n"); 01270 for (int i = 0; i < NcpV.Len(); i++) { 01271 const TFltPrV& Ncp = NcpV[i]; 01272 double MinX=1, MinY=1; 01273 for (int k = 0; k < Ncp.Len(); k++){ 01274 if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } } 01275 fprintf(F, "%d\t%d\t%d\t%f\t%d\t%f\t%s\n", GSizeV[i].Val1(), GSizeV[i].Val2(), 01276 int(MinX), MinY, int(WhiskerV[i].Val1), WhiskerV[i].Val2(), GNmV[i].CStr()); 01277 } 01278 fclose(F); 01279 } 01280 01281 void TNcpGraphsBase::PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp, const bool& VsGraphN) { 01282 TNcpGraphsBase NcpBs(InFNmWc); 01283 //NcpBs.Save(TFOut(OutFNm+".NcpBase")); 01284 //TNcpGraphsBase NcpBs(TFIn(OutFNm+".NcpBase")); 01285 if (ImposeNcp) { 01286 NcpBs.Impose(OutFNm+"5R", 5, false); NcpBs.Impose(OutFNm+"5S", 5, true); 01287 NcpBs.Impose(OutFNm+"R", 10, false); NcpBs.Impose(OutFNm+"S", 10, true); 01288 } 01289 NcpBs.PlotNcpMin(OutFNm, VsGraphN); 01290 //NcpBs.SaveTxtNcpMin(OutFNm, VsGraphN); 01291 NcpBs.PlotRewNcpMin(OutFNm, VsGraphN); 01292 NcpBs.PlotBestWhisker(OutFNm, VsGraphN); 01293 NcpBs.PlotRewBestWhisker(OutFNm, VsGraphN); 01294 01295 //NcpBs.PlotAvgNcp(OutFNm+"AvgNcp", NcpBs.NcpV, 1, 1); 01296 //NcpBs.PlotAvgNcp(OutFNm+"AvgRewNcp", NcpBs.RewNcpV, 1, 1); 01297 /*NcpBs.PlotAvgNcp(OutFNm+"AvgNcp2", NcpBs.NcpV, 100, 0.1); 01298 NcpBs.PlotAvgNcp(OutFNm+"AvgNcp3", NcpBs.NcpV, 100, 0.01); 01299 NcpBs.PlotAvgNcp(OutFNm+"AvgNcp4", NcpBs.NcpV, 100, 0.001); 01300 NcpBs.PlotAvgNcp(OutFNm+"AvgNcp5", NcpBs.NcpV, 100, 0.0001); 01301 NcpBs.PlotAvgNcp(OutFNm+"RewNcp1", NcpBs.RewNcpV, 1000, 1); 01302 NcpBs.PlotAvgNcp(OutFNm+"RewNcp2", NcpBs.RewNcpV, 100, 0.1); 01303 NcpBs.PlotAvgNcp(OutFNm+"RewNcp3", NcpBs.RewNcpV, 100, 0.01); 01304 NcpBs.PlotAvgNcp(OutFNm+"RewNcp4", NcpBs.RewNcpV, 100, 0.001); 01305 NcpBs.PlotAvgNcp(OutFNm+"RewNcp5", NcpBs.RewNcpV, 100, 0.0001); 01306 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp1", NcpBs.WhiskNcpV, 1000, 1); 01307 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp2", NcpBs.WhiskNcpV, 100, 0.1); 01308 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp3", NcpBs.WhiskNcpV, 100, 0.01); 01309 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp4", NcpBs.WhiskNcpV, 100, 0.001); 01310 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp5", NcpBs.WhiskNcpV, 100, 0.0001); 01311 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp6", NcpBs.WhiskNcpV, 100, 0.00004); 01312 NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp7", NcpBs.WhiskNcpV, 100, 0.00005);*/ 01313 //NcpBs.SaveTxt(OutFNm+"bestK.txt"); 01314 }