SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Time Network 00003 TTimeNet& TTimeNet::operator = (const TTimeNet& TimeNet) { 00004 if (this != &TimeNet) { 00005 TNet::operator=(TimeNet); 00006 } 00007 return *this; 00008 } 00009 00010 PTimeNet TTimeNet::GetSubGraph(const TIntV& NIdV) const { 00011 PTimeNet NewNetPt = TTimeNet::New(); 00012 TTimeNet& NewNet = *NewNetPt; 00013 NewNet.Reserve(NIdV.Len(), -1); 00014 int node, edge; 00015 TNodeI NI; 00016 for (node = 0; node < NIdV.Len(); node++) { 00017 NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); // also copy the node data 00018 } 00019 for (node = 0; node < NIdV.Len(); node++) { 00020 NI = GetNI(NIdV[node]); 00021 const int SrcNId = NI.GetId(); 00022 for (edge = 0; edge < NI.GetOutDeg(); edge++) { 00023 const int OutNId = NI.GetOutNId(edge); 00024 if (NewNet.IsNode(OutNId)) { 00025 NewNet.AddEdge(SrcNId, OutNId); } 00026 } 00027 } 00028 NewNet.Defrag(); 00029 return NewNetPt; 00030 } 00031 00032 PTimeNENet TTimeNet::GetTimeNENet() const { 00033 TIntV NIdV; GetNIdByTm(NIdV); 00034 PTimeNENet OutNet = TTimeNENet::New(GetNodes(), GetEdges()); 00035 for (int i = 0; i < NIdV.Len(); i++) { 00036 const int Src = NIdV[i]; 00037 const TTimeNet::TNodeI NI = GetNI(Src); 00038 const TSecTm SrcTm = NI.GetDat(); 00039 if (! OutNet->IsNode(Src)) { OutNet->AddNode(Src, SrcTm); } 00040 for (int e = 0; e < NI.GetOutDeg(); e++) { 00041 if (! OutNet->IsNode(NI.GetOutNId(e))) { OutNet->AddNode(NI.GetOutNId(e), SrcTm); } 00042 OutNet->AddEdge(Src, NI.GetOutNId(e), -1, SrcTm); 00043 } 00044 } 00045 return OutNet; 00046 } 00047 00048 void TTimeNet::GetNIdByTm(TIntV& NIdV) const { 00049 TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0); 00050 for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) { 00051 TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); } 00052 TmToNIdV.Sort(); 00053 NIdV.Gen(GetNodes(), 0); 00054 for (int i = 0; i < TmToNIdV.Len(); i++) { 00055 NIdV.Add(TmToNIdV[i].Dat); } 00056 } 00057 00058 // put nodes into buckets by TmUnit 00059 void TTimeNet::GetTmBuckets(const TTmUnit& TmUnit, TTmBucketV& TmBucketV) const { 00060 THash<TInt, TIntV> TmIdToNIdVH; 00061 for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) { 00062 const int TmId = NodeI().Round(TmUnit); 00063 if (! TmIdToNIdVH.IsKey(TmId)) TmIdToNIdVH.AddKey(TmId); 00064 TmIdToNIdVH.GetDat(TmId).Add(NodeI.GetId()); 00065 } 00066 TVec<TPair<TInt, TIntV> > TmIdNIdVV; 00067 TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV); 00068 TmIdNIdVV.Sort(); 00069 TmBucketV.Gen(TmIdNIdVV.Len()); 00070 for (int i = 0; i < TmIdNIdVV.Len(); i++) { 00071 TTmBucket& Bucket = TmBucketV[i]; 00072 Bucket.BegTm = TmIdNIdVV[i].Val1; 00073 Bucket.NIdV = TmIdNIdVV[i].Val2; 00074 } 00075 } 00076 00077 void TTimeNet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const { 00078 TIntV NIdV; 00079 GetNIdByTm(NIdV); 00080 TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0); 00081 for (int i = 0; i < NIdV.Len(); i++) { 00082 const int b = i/NodesPerBucket; 00083 if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); } 00084 TmBucketV[b].NIdV.Add(NIdV[i]); 00085 } 00086 } 00087 00088 PGStatVec TTimeNet::TimeGrowth(const TTmUnit& TmUnit, const TFSet& TakeStat, const TSecTm& StartTm) const { 00089 PGStatVec GrowthStat = new TGStatVec(TmUnit, TakeStat); 00090 TTmBucketV TmBucketV; 00091 GetTmBuckets(TmUnit, TmBucketV); 00092 TIntV NodeIdV; 00093 TExeTm ExeTm; 00094 for (int t = 0; t < TmBucketV.Len(); t++) { 00095 NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T 00096 printf("\n=== %d/%d] %s (%d nodes)\n", t+1, TmBucketV.Len(), 00097 TmBucketV[t].BegTm.GetStr().CStr(), NodeIdV.Len()); ExeTm.Tick(); 00098 if (TmBucketV[t].BegTm < StartTm) continue; 00099 //PNGraph PreGraph = GetSubGraph(NodeIdV, true); // renumber nodes 00100 PNGraph PreGraph = TSnap::ConvertSubGraph<PNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); // don't renumber nodes 00101 GrowthStat->Add(PreGraph, TmBucketV[t].BegTm); 00102 } 00103 return GrowthStat; 00104 } 00105 00106 void TTimeNet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit, 00107 const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc, const bool& AlsoRewire) const { 00108 const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr(); 00109 TTmBucketV TmBucketV; 00110 GetTmBuckets(TmUnit, TmBucketV); 00111 TIntV NodeIdV; 00112 TExeTm ExeTm, Run1Tm; 00113 TFltTrV TmDiamV, NdsDiamV; 00114 TFltTrV RwTmDiamV, RwNdsDiamV; 00115 for (int t = 0; t < TmBucketV.Len(); t++) { 00116 NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T 00117 printf("\n*** %d/%d] at %s (%d nodes)\n", t+1, TmBucketV.Len(), 00118 TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), NodeIdV.Len()); ExeTm.Tick(); 00119 if (TmBucketV[t].BegTm < StartTm) continue; 00120 //PUNGraph PreGraph = GetSubUNGraph(NodeIdV, true); 00121 PUNGraph PreGraph = TSnap::ConvertSubGraph<PUNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); 00122 { TMom Mom; 00123 for (int r = 0; r < NDiamRuns; r++) { 00124 printf("%d...", r+1); Run1Tm.Tick(); 00125 const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph) : PreGraph); 00126 Mom.Add(EffDiam); printf("[%s]\r", Run1Tm.GetTmStr()); 00127 } 00128 Mom.Def(); 00129 TmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev())); 00130 NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev())); 00131 NdsDiamV.Sort(); 00132 printf(" [%s] \n", ExeTm.GetTmStr()); } 00133 if (AlsoRewire) { 00134 //PUNGraph RwGraph = TGGen::GenRndDegSeqS(PreGraph, 50, TInt::Rnd); // edge switching model 00135 TIntV DegSeqV(PreGraph->GetNodes(), 0); 00136 for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { DegSeqV.Add(NI.GetDeg()); } 00137 PUNGraph RwGraph = TSnap::GenConfModel(DegSeqV, TInt::Rnd); 00138 printf("Configuration model: (%d, %d) --> (%d, %d)\n", PreGraph->GetNodes(), PreGraph->GetEdges(), RwGraph->GetNodes(), RwGraph->GetEdges()); 00139 TMom Mom; 00140 for (int r = 0; r < NDiamRuns; r++) { 00141 printf(" diam run %d...", r+1); Run1Tm.Tick(); 00142 const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph):PreGraph); 00143 Mom.Add(EffDiam); printf(" current run [%s]\n", Run1Tm.GetTmStr()); 00144 } 00145 Mom.Def(); 00146 RwTmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev())); 00147 RwNdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev())); 00148 RwNdsDiamV.Sort(); 00149 printf("done with diameter. Total time [%s] \n", ExeTm.GetTmStr()); 00150 } 00151 // plot 00152 { TGnuPlot GnuPlot("diamEff-T."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges())); 00153 GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), WccStr+"Effective Diameter"); 00154 GnuPlot.AddErrBar(TmDiamV, "True", ""); 00155 if (! RwTmDiamV.Empty()) { GnuPlot.AddErrBar(RwTmDiamV, "Rewired", "");} 00156 GnuPlot.SavePng(); } 00157 { TGnuPlot GnuPlot("diamEff-N."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges())); 00158 GnuPlot.SetXYLabel("NODES", WccStr+"Effective Diameter"); 00159 GnuPlot.AddErrBar(NdsDiamV, "True", ""); 00160 if (! RwNdsDiamV.Empty()) { GnuPlot.AddErrBar(RwNdsDiamV, "Rewired", "");} 00161 GnuPlot.SavePng(); } 00162 } 00163 } 00164 00165 // pretend we only have link data starting in PostTmDiam 00166 // compare the diameter of the nodes after PostTmDiam with diameter 00167 // of the nodes after PostTmDiam but *also* using nodes and edges 00168 // from before PostTmDiam 00169 void TTimeNet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit, 00170 const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam) const { 00171 printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n", 00172 FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr()); 00173 printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n", 00174 DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()); 00175 const int NDiamRuns = 10; 00176 const int NSamples = 100; 00177 //PUNGraph FullGraph = GetUNGraph(); 00178 PUNGraph FullGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this)); 00179 // delete past 00180 if (DelPreTmEdges.IsDef()) { 00181 int NDelNodes = 0, NDelEdges = 0; 00182 printf("Deleting pre-%s node out-links\n", DelPreTmEdges.GetStr().CStr()); 00183 for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) { 00184 if (NodeI() < DelPreTmEdges) { 00185 const int NodeId = NodeI.GetId(); 00186 for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) { 00187 FullGraph->DelEdge(NodeId, NodeI.GetOutNId(edge)); } 00188 NDelEdges += NodeI.GetOutDeg(); NDelNodes++; 00189 } 00190 } 00191 printf(" Deleted %d nodes out-edges (%d edges total).\n", NDelNodes, NDelEdges); 00192 FullGraph->Defrag(true); 00193 } 00194 PGStatVec GrowthStat = TGStatVec::New(TmUnit); 00195 TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev; 00196 TIntV NodeIdV; 00197 TExeTm ExeTm; 00198 TTmBucketV TmBucketV; 00199 GetTmBuckets(TmUnit, TmBucketV); 00200 for (int t = 0; t < TmBucketV.Len(); t++) { 00201 printf("\nGraph: %s (%d / %d) [%s]\n", TmBucketV[t].BegTm.GetTmStr().CStr(), 00202 t+1, TmBucketV.Len(), TExeTm::GetCurTm()); 00203 // up-to-year subgraph 00204 NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T 00205 if (TmBucketV[t].BegTm < PostTmDiam) { continue; } 00206 const PUNGraph PreGraph = TSnap::GetSubGraph(FullGraph, NodeIdV, true); 00207 const PUNGraph WccGraph = TSnap::GetMxWcc(PreGraph); 00208 TIntV PostYearNIdV, WccPostYearNIdV; 00209 for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { 00210 if (GetNDat(NI.GetId()) >= PostTmDiam) { 00211 PostYearNIdV.Add(NI.GetId()); 00212 if (WccGraph->IsNode(NI.GetId())) { WccPostYearNIdV.Add(NI.GetId()); } 00213 } 00214 } 00215 TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom; 00216 // diameter of PostYearDiam subgraph using whole graph (all edges) 00217 int FullDiam; double EffDiam; 00218 for (int r = 0; r < NDiamRuns; r++) { 00219 if (! PostYearNIdV.Empty()) { 00220 TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false, EffDiam, FullDiam); 00221 PreDiamMom.Add(FullDiam); PreEffDiamMom.Add(EffDiam); 00222 } 00223 if (! WccPostYearNIdV.Empty()) { 00224 TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false, EffDiam, FullDiam); 00225 WccDiamMom.Add(FullDiam); WccEffDiamMom.Add(EffDiam); 00226 } 00227 printf(" diam: %d [%s] \r", r+1, ExeTm.GetTmStr()); ExeTm.Tick(); 00228 } 00229 PreDiamMom.Def(); PreEffDiamMom.Def(); 00230 WccDiamMom.Def(); WccEffDiamMom.Def(); 00231 // save stat 00232 PGStat GraphStatPt = GrowthStat->Add(TmBucketV[t].BegTm); 00233 TGStat& GS = *GraphStatPt; 00234 GS.TakeBasicStat(PreGraph, false); 00235 GS.TakeBasicStat(WccGraph, true); 00236 GS.SetVal(gsvFullDiam, PreDiamMom.GetMean()); // mean 00237 GS.SetVal(gsvEffDiam, PreEffDiamMom.GetMean()); 00238 GS.SetVal(gsvFullWccDiam, WccDiamMom.GetMean()); 00239 GS.SetVal(gsvEffWccDiam, WccEffDiamMom.GetMean()); 00240 GS.SetVal(gsvFullDiamDev, PreDiamMom.GetSDev()); // variance 00241 GS.SetVal(gsvEffDiamDev, PreEffDiamMom.GetSDev()); 00242 GS.SetVal(gsvFullWccDiamDev, WccDiamMom.GetSDev()); 00243 GS.SetVal(gsvEffWccDiamDev, WccEffDiamMom.GetSDev()); 00244 { TFOut FOut("growth."+FNmPref+".gStatVec"); GrowthStat->Save(FOut); } 00245 GrowthStat->SaveTxt(FNmPref, TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%s\nPostYearDiam\t%s\n", 00246 Desc.CStr(), DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr())); 00247 } 00248 // diameter plots 00249 //GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.", 00250 // DelPreEdges, PostYearDiam));*/ 00251 } 00252 00253 void TTimeNet::PlotCCfOverTm(const TStr& FNmPref, TStr Desc, const TTmUnit& TmUnit, const int& NodesBucket) const { 00254 if (Desc.Empty()) { Desc = FNmPref; } 00255 TTimeNet::TTmBucketV TmBucketV; 00256 TStr XLbl; 00257 if (TmUnit == tmuNodes) { 00258 XLbl = "Number of nodes (time)"; 00259 IAssert(NodesBucket > 0); 00260 GetNodeBuckets(NodesBucket, TmBucketV); } 00261 else { 00262 XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr()); 00263 GetTmBuckets(TmUnit, TmBucketV); 00264 } 00265 TIntV NodeIdV; 00266 TFltPrV DegToCCfV, CcfV, OpClV, OpV; 00267 TVec<TTuple<TFlt, 4> > OpenClsV; 00268 TTuple<TFlt, 4> Tuple; 00269 TExeTm ExeTm; 00270 int XVal = 0; 00271 printf("Clustering coefficient over time:\n %d edges, %d edges per bucket, %d buckets \n", GetEdges(), 100000, TmBucketV.Len()); 00272 PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this)); 00273 for (int t = 0; t < TmBucketV.Len(); t++) { 00274 printf("\r %d/%d: ", t+1, TmBucketV.Len()); 00275 NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T 00276 int Open=0, Close=0; 00277 const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV); 00278 const double CCf = TSnap::GetClustCf(Graph, DegToCCfV, Open, Close); 00279 if (TmUnit == tmuNodes) { XVal = Graph->GetNodes(); } 00280 else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); } 00281 CcfV.Add(TFltPr(XVal, CCf)); 00282 OpClV.Add(TFltPr(XVal, Open+Close==0?0:Close/(Open+Close))); 00283 OpV.Add(TFltPr(XVal, Open==0?0:Close/Open)); 00284 Tuple[0]=Graph->GetNodes(); 00285 Tuple[1]=Graph->GetEdges(); 00286 Tuple[2]=Close; Tuple[3]=Open; 00287 OpenClsV.Add(Tuple); 00288 printf(" %s", ExeTm.GetStr()); 00289 TGnuPlot::PlotValV(DegToCCfV, TStr::Fmt("ccfAt%02dtm.%s", t+1, FNmPref.CStr()), 00290 TStr::Fmt("%s. At time %d. Clustering Coefficient. G(%d,%d)", Desc.CStr(), t+1, Graph->GetNodes(), Graph->GetEdges()), 00291 "Degree", "Clustering coefficient", gpsLog10XY, false); 00292 } 00293 TGnuPlot::PlotValV(CcfV, "ccfOverTm."+FNmPref, Desc+". Average Clustering Coefficient", XLbl, "Average clustering coefficient", gpsAuto, false); 00294 TGnuPlot::PlotValV(OpClV, "ClsOpnTr1."+FNmPref, Desc+". Close/(Open+Closed) triads", XLbl, "Close / (Open+Closed) triads", gpsAuto, false); 00295 TGnuPlot::PlotValV(OpV, "ClsOpnTr2."+FNmPref, Desc+". Close/Open triads", XLbl, "Close / Open triads", gpsAuto, false); 00296 TGnuPlot::SaveTs(OpenClsV, "ClsOpnTr."+FNmPref+".tab", TStr::Fmt("#%s\n#Nodes\tEdges\tClosed\tOpenTriads", Desc.CStr())); 00297 printf("\n"); 00298 } 00299 00300 void TTimeNet::PlotMedianDegOverTm(const TStr& FNmPref, const TTmUnit& TmUnit, const int& NodesPerBucket) const { 00301 TTimeNet::TTmBucketV TmBucketV; 00302 TStr XLbl; 00303 if (TmUnit == tmuNodes) { 00304 XLbl = "Number of nodes (time)"; IAssert(NodesPerBucket > 0); 00305 GetNodeBuckets(NodesPerBucket, TmBucketV); } 00306 else { 00307 XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr()); 00308 GetTmBuckets(TmUnit, TmBucketV); } 00309 printf("\n\n%s\nMedian degree over time:\n %d edges, %d edges per bucket, %d buckets \n", FNmPref.CStr(), GetEdges(), NodesPerBucket, TmBucketV.Len()); 00310 TFltPrV MedDegV, MedInDegV, MedOutDegV; 00311 TIntV NodeIdV; 00312 int XVal; 00313 PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this)); 00314 PNGraph NGraph = TSnap::ConvertGraph<PNGraph>(PTimeNet((TTimeNet*)this)); 00315 FILE *F = fopen(("gStat-"+FNmPref+".tab").CStr(), "wt"); 00316 fprintf(F, "UndirNodes\tUndirEdges\tUndirNonZNodes\tMedianDeg\tMeanDeg\tDirNodes\tDirEdges\tDirNonzNodes\tMedInDeg\tMedOutDeg\tMeanInDeg\tMeanOutDeg\n"); 00317 for (int t = 0; t < TmBucketV.Len(); t++) { 00318 printf("\r %d/%d: ", t+1, TmBucketV.Len()); 00319 NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T 00320 if (TmUnit == tmuNodes) { XVal = NodeIdV.Len(); } 00321 else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); } 00322 // un graph 00323 { const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV); TMom Mom; 00324 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (NI.GetOutDeg()>0) { Mom.Add(NI.GetOutDeg());} } 00325 Mom.Def(); MedDegV.Add(TFltPr(XVal, Mom.GetMedian())); 00326 fprintf(F, "%d\t%d\t%d\t%f\t%f", Graph->GetNodes(), Graph->GetEdges(), TSnap::CntNonZNodes(Graph), (float)Mom.GetMedian(), (float)Mom.GetMean()); } 00327 // directed graph 00328 { const PNGraph Graph = TSnap::GetSubGraph<PNGraph>(NGraph, NodeIdV); TMom MomOut, MomIn; 00329 for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00330 if (NI.GetOutDeg()>0) { MomOut.Add(NI.GetOutDeg()); } 00331 if (NI.GetInDeg()>0) { MomIn.Add(NI.GetInDeg()); } } 00332 MomOut.Def(); MedOutDegV.Add(TFltPr(XVal, MomOut.GetMedian())); 00333 MomIn.Def(); MedInDegV.Add(TFltPr(XVal, MomIn.GetMedian())); 00334 fprintf(F, "\t%d\t%d\t%d\t%f\t%f\t%f\t%f\n", Graph->GetNodes(), Graph->GetEdges(), (int)TSnap::CntNonZNodes(Graph), (float)MomIn.GetMedian(), (float)MomOut.GetMedian(), (float)MomIn.GetMean(), (float)MomOut.GetMean()); } 00335 } 00336 fclose(F); 00337 TGnuPlot::PlotValV(MedDegV, "medDeg."+FNmPref, FNmPref+" Median degree", TTmInfo::GetTmUnitStr(TmUnit), "Median degree"); 00338 TGnuPlot::PlotValV(MedOutDegV, "medOutDeg."+FNmPref, FNmPref+" Median OUT degree", TTmInfo::GetTmUnitStr(TmUnit), "Median OUT degree"); 00339 TGnuPlot::PlotValV(MedInDegV, "medInDeg."+FNmPref, FNmPref+" Median IN degree", TTmInfo::GetTmUnitStr(TmUnit), "Median IN degree"); 00340 } 00341 00342 // load Arxiv affiliation network (papers to authors bipartite graph) 00343 // <set1_id> <time> <set2_id1> <set2_id2> ... (all ids have to be unique) 00344 PTimeNet TTimeNet::LoadBipartite(const TStr& InFNm) { 00345 PTimeNet TimeNetPt = TTimeNet::New(); 00346 TTimeNet& TimeNet = *TimeNetPt; 00347 PSs Ss = TSs::LoadTxt(ssfTabSep, InFNm.CStr()); 00348 TIntH Set1IdH; // paper ids 00349 TStrV StrTimeV; 00350 for (int y = 0; y < Ss->GetYLen(); y++) { 00351 if (Ss->At(0, y)[0] == '#') continue; // skip comments 00352 if (Ss->GetXLen(y) < 3) continue; // there must be at least one author 00353 const int& SrcId = Ss->At(0, y).GetInt(); 00354 IAssert(! Set1IdH.IsKey(SrcId)); 00355 IAssert(! TimeNet.IsNode(SrcId)); 00356 Set1IdH.AddKey(SrcId); 00357 Ss->At(1, y).SplitOnAllCh('-', StrTimeV); 00358 const int Year = StrTimeV[0].GetInt(); 00359 const int Month = StrTimeV[1].GetInt(); 00360 const int Day = StrTimeV[2].GetInt(); 00361 const TSecTm NodeTm(Year, Month, Day); 00362 TimeNet.AddNode(SrcId, NodeTm); 00363 for (int dst = 2; dst < Ss->GetXLen(y); dst++) { 00364 const int DstId = Ss->At(dst, y).GetInt(); 00365 IAssert(! Set1IdH.IsKey(DstId)); 00366 if (! TimeNet.IsNode(DstId)) { TimeNet.AddNode(DstId, NodeTm); } 00367 else { TimeNet.GetNDat(DstId) = TMath::Mn(NodeTm, TimeNet.GetNDat(DstId)); } 00368 if (! TimeNet.IsEdge(SrcId, DstId)) { TimeNet.AddEdge(SrcId, DstId); } 00369 } 00370 } 00371 TimeNet.Defrag(); 00372 printf("Bipartate graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges()); 00373 printf(" Bipartate sets: %d nodes --> %d nodes\n", TSnap::CntInDegNodes(TimeNetPt, 0), 00374 TSnap::CntOutDegNodes(TimeNetPt, 0)); 00375 return TimeNetPt; 00376 } 00377 00378 // load Arxiv paper citation network 00379 // PaperFNm: <paper> <time> 00380 // CiteFNm: <source> <destination> 00381 PTimeNet TTimeNet::LoadArxiv(const TStr& PaperFNm, const TStr& CiteFNm) { 00382 TExeTm ExeTm; 00383 PTimeNet TimeNetPt = TTimeNet::New(); 00384 TTimeNet& TimeNet = *TimeNetPt; 00385 printf("Arxiv citation graph (paper publication year)...\n"); 00386 // load time data (file hep-ph-slacdates) 00387 char Line [1024]; 00388 FILE *PprF = fopen(PaperFNm.CStr(), "rt"); 00389 TStr StrId, StrTime; 00390 TStrV StrV, StrTimeV; 00391 int N = 0, DuplicateNode = 0; 00392 while (! feof(PprF)) { 00393 Line[0] = 0; 00394 fgets(Line, 1024, PprF); 00395 if (strlen(Line) == 0 || Line[0] == '#') continue; 00396 Line[strlen(Line)-1] = 0; // delete trailing '\n' 00397 TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2); 00398 StrId = StrV[0]; StrTime = StrV[1]; IAssert(!StrId.Empty() && !StrTime.Empty()); 00399 StrTime.SplitOnAllCh('-', StrTimeV); IAssert(StrTimeV.Len() == 3); 00400 const int NodeId = StrId.GetInt(); 00401 if (! TimeNet.IsNode(NodeId)) { 00402 const int Year = StrTimeV[0].GetInt(); 00403 const int Month = StrTimeV[1].GetInt(); 00404 const int Day = StrTimeV[2].GetInt(); 00405 TimeNet.AddNode(NodeId, TSecTm(Year, Month, Day)); 00406 } else { DuplicateNode++; } 00407 if (++N % 10000 == 0) printf("\r %dk", N/1000); 00408 } 00409 printf("\r %d nodes read. %d duplicate nodes. %s\n", N, DuplicateNode, ExeTm.GetTmStr()); 00410 fclose(PprF); 00411 // load citations (file hep-ph-citations) 00412 int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0; 00413 FILE *CiteF = fopen(CiteFNm.CStr(), "rt"); 00414 N = 0; ExeTm.Tick(); 00415 printf("Loading Arxiv citations...\n"); 00416 TIntPrV EdgeV; 00417 THash<TInt, TSecTm> NIdToTimeH; 00418 while (! feof(CiteF)) { 00419 Line[0] = 0; 00420 fgets(Line, 1024, CiteF); 00421 if (strlen(Line) == 0 || Line[0] == '#') continue; 00422 Line[strlen(Line)-1] = 0; // delete trailing '\n' 00423 TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2); 00424 const int SrcNId = StrV[0].GetInt(); 00425 const int DstNId = StrV[1].GetInt(); 00426 EdgeV.Add(TIntPr(SrcNId, DstNId)); 00427 // infer time of destination node -- earliest paper that cites the node (paper) 00428 if (! TimeNet.IsNode(DstNId) && TimeNet.IsNode(SrcNId)) { 00429 const TSecTm& SrcTm = TimeNet.GetNDat(SrcNId); 00430 if (! NIdToTimeH.IsKey(DstNId)) { 00431 NIdToTimeH.AddDat(DstNId, SrcTm); 00432 NewDstIds++; 00433 } 00434 else if (NIdToTimeH.GetDat(DstNId) < SrcTm) { 00435 NIdToTimeH.GetDat(DstNId) = SrcTm; } 00436 } 00437 if (++N % 10000 == 0) printf("\r %dk", N/1000); 00438 } 00439 fclose(CiteF); 00440 // add infeered time nodes (nodes which are cited by papers with known time) 00441 for (int i = 0; i < NIdToTimeH.Len(); i++) { 00442 TimeNet.AddNode(NIdToTimeH.GetKey(i), NIdToTimeH[i]); 00443 } 00444 // add links 00445 for (int i = 0; i < EdgeV.Len(); i++) { 00446 const int SrcNId = EdgeV[i].Val1; 00447 const int DstNId = EdgeV[i].Val2; 00448 if (TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) { 00449 if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); } 00450 else { DupLinks++; } 00451 } else { 00452 if (! TimeNet.IsNode(SrcNId)) { 00453 NewSrcIds++; 00454 if (! TimeNet.IsNode(DstNId)) { NewCits++; } 00455 } 00456 } 00457 } 00458 printf("\r %d citations read. %s\n", N, ExeTm.GetTmStr()); 00459 printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges()); 00460 printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0)); 00461 TIntV RmNIdV; 00462 for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) { 00463 if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); } 00464 } 00465 for (int i = 0; i < RmNIdV.Len(); i++) { 00466 TimeNet.DelNode(RmNIdV[i]); 00467 } 00468 TimeNet.Defrag(true); 00469 printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges()); 00470 printf(" Duplicate citations : %d\n", DupLinks); 00471 printf(" Nodes without time which are cited : %d (add them to graph, use time of the earliest source node)\n", NewDstIds); 00472 printf(" Citations between unknown time nodes : %d\n", NewCits); 00473 printf(" Nodes without time which make citations: %d (do not add them into the graph)\n", NewSrcIds); 00474 return TimeNetPt; 00475 } 00476 00477 // Patent citation network. Time units are seconds even though they mean years 00478 PTimeNet TTimeNet::LoadPatents(const TStr& PatentFNm, const TStr& CiteFNm) { 00479 int N = 0; 00480 TExeTm ExeTm; 00481 PTimeNet TimeNetPt = TTimeNet::New(); 00482 TTimeNet& TimeNet = *TimeNetPt; 00483 TimeNet.Reserve(4000000, 160000000); 00484 printf("parsing patent data (patent grant year)...\n"); 00485 // load time data (file pat63_99.txt) 00486 const int& PatIdCol = 0; 00487 const int& GYearCol = 1; 00488 TStrV ColV; 00489 char Line [1024]; 00490 FILE *PatF = fopen(PatentFNm.CStr(), "rt"); 00491 fgets(Line, 1024, PatF); // skip 1st line 00492 while (! feof(PatF)) { 00493 Line[0] = 0; 00494 fgets(Line, 1024, PatF); 00495 if (strlen(Line) == 0) break; 00496 TStr(Line).SplitOnAllCh(',', ColV, false); 00497 IAssert(ColV.Len() == 23); 00498 const int PatentId = ColV[PatIdCol].GetInt(); 00499 const int GrantYear = ColV[GYearCol].GetInt(); 00500 IAssert(! TimeNet.IsNode(PatentId)); 00501 TimeNet.AddNode(PatentId, TSecTm(GrantYear)); // pretend year is a second 00502 if (++N % 100000 == 0) printf("\r %dk", N/1000); 00503 } 00504 printf("\r %d patents read. %s\n", N, ExeTm.GetTmStr()); 00505 fclose(PatF); 00506 // load citations (file cite75_99.txt) 00507 printf("\nLoading patent citations...\n"); 00508 int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0; 00509 N = 0; ExeTm.Tick(); 00510 TStr SrcId, DstId; 00511 FILE *CiteF = fopen(CiteFNm.CStr(), "rt"); 00512 fgets(Line, 1024, CiteF); // skip 1st line 00513 while (! feof(CiteF)) { 00514 Line[0] = 0; 00515 fgets(Line, 1024, CiteF); 00516 if (strlen(Line) == 0) break; 00517 Line[strlen(Line)-1] = 0; // delete trailing '\n' 00518 TStr(Line).SplitOnCh(SrcId, ',', DstId); 00519 const int SrcNId = SrcId.GetInt(); 00520 const int DstNId = DstId.GetInt(); 00521 if (! TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) { 00522 //TimeNet.AddNode(SrcNId, TSecTm(1, 1, 1)); NewSrcIds++; 00523 //TimeNet.AddNode(DstNId, TSecTm(1, 1, 1)); NewDstIds++; 00524 NewCits++; 00525 continue; 00526 } 00527 else if (TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) { 00528 TimeNet.AddNode(DstNId, TimeNet.GetNDat(SrcNId)); NewDstIds++; 00529 } 00530 else if (! TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) { 00531 TimeNet.AddNode(SrcNId, TimeNet.GetNDat(DstNId)); NewSrcIds++; 00532 } 00533 if (! TimeNet.IsEdge(SrcNId, DstNId)) { 00534 TimeNet.AddEdge(SrcNId, DstNId); 00535 } else { DupLinks++; } 00536 if (++N % 100000 == 0) printf("\r %dk", N/1000); 00537 } 00538 fclose(CiteF); 00539 printf("\r %d citations read. %s\n\n", N, ExeTm.GetTmStr()); 00540 printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges()); 00541 printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0)); 00542 TIntV RmNIdV; 00543 for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) { 00544 if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); } 00545 } 00546 for (int i = 0; i < RmNIdV.Len(); i++) { 00547 TimeNet.DelNode(RmNIdV[i]); 00548 } 00549 TimeNet.Defrag(true); 00550 printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges()); 00551 printf(" Duplicate citations : %d\n", DupLinks); 00552 printf(" Citations between unknown time nodes : %d\n", NewCits); 00553 printf(" Nodes without time which make citations: %d\n", NewSrcIds); 00554 printf(" Nodes without time which are cited : %d\n", NewDstIds); 00555 return TimeNetPt; 00556 } 00557 00558 // Amazon ShareTheLove: nodes are people, we add an edge when there is a first 00559 // recommendation between 2 people (we count each edge only once (regardless 00560 // of how many recommendations there were between 2 persons) 00561 PTimeNet TTimeNet::LoadAmazon(const TStr& StlFNm) { 00562 PTimeNet TimeNetPt = TTimeNet::New(); 00563 TTimeNet& TimeNet = *TimeNetPt; 00564 TimeNet.Reserve(3953993, -1); 00565 printf("Amazon Share-the-Love...\n"); 00566 char line [2024], MonthStr[4]; 00567 int NLines=0; 00568 TStrV ColV; 00569 FILE *F = fopen(StlFNm.CStr(), "rt"); 00570 while (! feof(F)) { 00571 memset(line, 0, 2024); 00572 fgets(line, 2024, F); 00573 if (strlen(line) == 0) break; 00574 TStr(line).SplitOnAllCh(',', ColV); 00575 const int SrcNId = ColV[0].GetInt(); 00576 const int DstNId = ColV[1].GetInt(); 00577 // time data 00578 TStr TmStr = ColV[2]; // time-format: 29JAN02:21:55:23 00579 int Year = TmStr.GetSubStr(5, 6).GetInt(); 00580 if (Year < 10) { Year += 2000; } else { Year += 1900; } 00581 MonthStr[0]=toupper(TmStr[2]); MonthStr[1]=tolower(TmStr[3]); 00582 MonthStr[2]=tolower(TmStr[4]); MonthStr[3]=0; 00583 const int Month = TTmInfo::GetMonthN(MonthStr, lUs); 00584 const int Day = TmStr.GetSubStr(0, 1).GetInt(); 00585 const int Hour = TmStr.GetSubStr(8, 9).GetInt(); 00586 const int Min = TmStr.GetSubStr(11, 12).GetInt(); 00587 const int Sec = TmStr.GetSubStr(14, 15).GetInt(); 00588 // add nodes and links 00589 if (! TimeNet.IsNode(SrcNId)) { TimeNet.AddNode(SrcNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); } 00590 if (! TimeNet.IsNode(DstNId)) { TimeNet.AddNode(DstNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); } 00591 if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); } 00592 if (++NLines % 100000 == 0) printf("\r %dk", NLines/1000); 00593 } 00594 fclose(F); 00595 printf("\r %d lines read\n", NLines); 00596 printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges()); 00597 TimeNet.Defrag(true); 00598 return TimeNetPt; 00599 } 00600 00602 // Time Node-Edge Network 00603 TTimeNENet& TTimeNENet::operator = (const TTimeNENet& TimeNet) { 00604 if (this != &TimeNet) { 00605 TNet::operator=(TimeNet); 00606 } 00607 return *this; 00608 } 00609 00610 PTimeNet TTimeNENet::GetTimeNet() const { 00611 PTimeNet NewNet = TTimeNet::New(); 00612 NewNet->Reserve(GetNodes(), -1); 00613 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00614 NewNet->AddNode(NI.GetId(), NI.GetDat()); 00615 } 00616 for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) { 00617 const int src = EI.GetSrcNId(); 00618 const int dst = EI.GetDstNId(); 00619 if (! NewNet->IsEdge(src, dst)) { 00620 NewNet->AddEdge(src, dst); } 00621 } 00622 NewNet->Defrag(); 00623 return NewNet; 00624 } 00625 00626 // only take earliest edge between the nodes 00627 PTimeNENet TTimeNENet::Get1stEdgeNet() const { 00628 PTimeNENet Net = TTimeNENet::New(); 00629 Net->Reserve(GetNodes(), -1); 00630 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00631 Net->AddNode(NI.GetId(), NI.GetDat()); } 00632 TIntV EIdV; GetEIdByTm(EIdV); 00633 TIntPrSet EdgeSet(GetEdges()); 00634 for (int edge = 0; edge < EIdV.Len(); edge++) { 00635 const TEdgeI EI = GetEI(EIdV[edge]); 00636 const int Src = EI.GetSrcNId(); 00637 const int Dst = EI.GetDstNId(); 00638 if (Src==Dst || EdgeSet.IsKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)))) { continue; } // take only 1st edge 00639 EdgeSet.AddKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst))); 00640 Net->AddEdge(EI); 00641 } 00642 return Net; 00643 } 00644 00645 PTimeNENet TTimeNENet::GetSubGraph(const TIntV& NIdV) const { 00646 PTimeNENet NewNetPt = TTimeNENet::New(); 00647 TTimeNENet& NewNet = *NewNetPt; 00648 NewNet.Reserve(NIdV.Len(), -1); 00649 int node, edge; 00650 TNodeI NI; 00651 for (node = 0; node < NIdV.Len(); node++) { 00652 NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); 00653 } 00654 for (node = 0; node < NIdV.Len(); node++) { 00655 NI = GetNI(NIdV[node]); 00656 for (edge = 0; edge < NI.GetOutDeg(); edge++) { 00657 const TEdgeI EI = GetEI(NI.GetOutEId(edge)); 00658 if (NewNet.IsNode(EI.GetDstNId())) { 00659 NewNet.AddEdge(EI); } 00660 } 00661 } 00662 NewNet.Defrag(); 00663 return NewNetPt; 00664 } 00665 00666 PTimeNENet TTimeNENet::GetESubGraph(const TIntV& EIdV) const { 00667 PTimeNENet NewNetPt = TTimeNENet::New(); 00668 TTimeNENet& NewNet = *NewNetPt; 00669 NewNet.Reserve(-1, EIdV.Len()); 00670 for (int edge = 0; edge < EIdV.Len(); edge++) { 00671 const TEdgeI Edge = GetEI(EIdV[edge]); 00672 if (! NewNet.IsNode(Edge.GetSrcNId())) 00673 NewNet.AddNode(GetNI(Edge.GetSrcNId())); 00674 if (! NewNet.IsNode(Edge.GetDstNId())) 00675 NewNet.AddNode(GetNI(Edge.GetDstNId())); 00676 NewNet.AddEdge(Edge); 00677 } 00678 NewNet.Defrag(); 00679 return NewNetPt; 00680 } 00681 00682 // assumes that the edges of the network are sorted in time 00683 PTimeNENet TTimeNENet::GetGraphUpToTm(const TSecTm& MaxEdgeTm) const { 00684 PTimeNENet NewNetPt = TTimeNENet::New(); 00685 TTimeNENet& NewNet = *NewNetPt; 00686 TSecTm PrevTm; 00687 for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) { 00688 if (EI() > MaxEdgeTm) { break; } 00689 if (! NewNet.IsNode(EI.GetSrcNId())) 00690 NewNet.AddNode(GetNI(EI.GetSrcNId())); 00691 if (! NewNet.IsNode(EI.GetDstNId())) 00692 NewNet.AddNode(GetNI(EI.GetDstNId())); 00693 NewNet.AddEdge(EI); 00694 IAssert(! PrevTm.IsDef() || PrevTm <= EI()); // edge times must be sorted 00695 PrevTm = EI(); 00696 } 00697 NewNet.Defrag(); 00698 return NewNetPt; 00699 } 00700 00701 void TTimeNENet::SortNodeEdgeTimes() { 00702 NodeH.SortByDat(true); 00703 EdgeH.SortByDat(true); 00704 } 00705 00706 // node time must be smaller than times of incoming or outgoing edges 00707 void TTimeNENet::UpdateNodeTimes() { 00708 int Cnt = 0; 00709 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00710 TSecTm& NodeTm = NI(); 00711 for (int edge = 0; edge < NI.GetOutDeg(); edge++) { 00712 const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge)); 00713 if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; } 00714 } 00715 for (int edge = 0; edge < NI.GetInDeg(); edge++) { 00716 const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge)); 00717 if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; } 00718 } 00719 } 00720 printf("Update node times: %d/%d updates\n", Cnt, GetNodes()); 00721 } 00722 00723 void TTimeNENet::SetNodeTmToFirstEdgeTm() { 00724 int Cnt = 0; 00725 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00726 if (NI.GetDeg() == 0) { continue; } 00727 TSecTm NodeTm; 00728 for (int edge = 0; edge < NI.GetOutDeg(); edge++) { 00729 const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge)); IAssert(EdgeTm.IsDef()); 00730 if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; } 00731 } 00732 for (int edge = 0; edge < NI.GetInDeg(); edge++) { 00733 const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge)); IAssert(EdgeTm.IsDef()); 00734 if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; } 00735 } 00736 GetNDat(NI.GetId()) = NodeTm; 00737 } 00738 printf("Node times set: %d/%d updates\n", Cnt, GetNodes()); 00739 } 00740 00741 // shuffle edge arrivals 00742 void TTimeNENet::SetRndEdgeTimes(const int& MinTmEdge) { 00743 printf("Shuffling last %d (%d%%) edge arrival times..\n", GetEdges()-MinTmEdge, int(100.0*(GetEdges()-MinTmEdge)/double(GetEdges()))); 00744 TIntV RndEIdV; GetEIdByTm(RndEIdV); 00745 TIntV TrueEIdV = RndEIdV; 00746 TSecTmV TrueTmV; 00747 const int SwapLen = RndEIdV.Len()-MinTmEdge; 00748 for (int R = 0; R < 10; R++) { 00749 for (int i = MinTmEdge; i < RndEIdV.Len(); i++) { 00750 RndEIdV.Swap(TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge, TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge); } 00751 } 00752 for (int e = 0; e < TrueEIdV.Len(); e++) { 00753 TrueTmV.Add(GetEDat(TrueEIdV[e])); } 00754 for (int e = 0; e < RndEIdV.Len(); e++) { 00755 GetEDat(RndEIdV[e]) = TrueTmV[e]; } 00756 UpdateNodeTimes(); 00757 } 00758 00759 void TTimeNENet::DumpTimeStat() const { 00760 TSecTm MnNodeTm, MxNodeTm; 00761 TSecTm MnEdgeTm, MxEdgeTm; 00762 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00763 const TSecTm NodeTm = NI(); 00764 if (! MnNodeTm.IsDef() || MnNodeTm>NodeTm) { MnNodeTm = NodeTm; } 00765 if (! MxNodeTm.IsDef() || MxNodeTm<NodeTm) { MxNodeTm = NodeTm; } 00766 } 00767 printf("Node times:\n %s\n %s\n", MnNodeTm.GetStr().CStr(), MxNodeTm.GetStr().CStr()); 00768 for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) { 00769 const TSecTm EdgeTm = EI(); 00770 if (! MnEdgeTm.IsDef() || MnEdgeTm>EdgeTm) { MnEdgeTm = EdgeTm; } 00771 if (! MxEdgeTm.IsDef() || MxEdgeTm<EdgeTm) { MxEdgeTm = EdgeTm; } 00772 } 00773 printf("Edge times:\n %s\n %s\n", MnEdgeTm.GetStr().CStr(), MxEdgeTm.GetStr().CStr()); 00774 } 00775 00776 void TTimeNENet::GetNIdByTm(TIntV& NIdV) const { 00777 TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0); 00778 for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) { 00779 TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); } 00780 TmToNIdV.Sort(); 00781 NIdV.Gen(GetNodes(), 0); 00782 for (int i = 0; i < TmToNIdV.Len(); i++) { 00783 NIdV.Add(TmToNIdV[i].Dat); } 00784 } 00785 00786 void TTimeNENet::GetEIdByTm(TIntV& EIdV) const { 00787 TVec<TKeyDat<TSecTm, TInt> > TmToEIdV(GetEdges(), 0); 00788 for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) { 00789 TmToEIdV.Add(TKeyDat<TSecTm, TInt>(EI.GetDat(), EI.GetId())); } 00790 TmToEIdV.Sort(); 00791 EIdV.Gen(GetEdges(), 0); 00792 for (int i = 0; i < TmToEIdV.Len(); i++) { 00793 EIdV.Add(TmToEIdV[i].Dat); } 00794 } 00795 00796 void TTimeNENet::GetTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const { 00797 THash<TInt, TIntV> TmIdToNIdVH; 00798 TIntV NIdV; GetNIdByTm(NIdV); 00799 for (int n = 0; n < NIdV.Len(); n++) { 00800 const int TmId = GetNDat(NIdV[n]).Round(TmUnit).GetAbsSecs(); 00801 if (! TmIdToNIdVH.IsKey(TmId)) { TmIdToNIdVH.AddKey(TmId); } 00802 TmIdToNIdVH.GetDat(TmId).Add(NIdV[n]); 00803 } 00804 TVec<TPair<TInt, TIntV> > TmIdNIdVV; 00805 TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV); 00806 TmIdNIdVV.Sort(); 00807 TmBucketV.Gen(TmIdNIdVV.Len()); 00808 for (int i = 0; i < TmIdNIdVV.Len(); i++) { 00809 TTimeNet::TTmBucket& Bucket = TmBucketV[i]; 00810 Bucket.BegTm = TmIdNIdVV[i].Val1; 00811 Bucket.NIdV = TmIdNIdVV[i].Val2; 00812 } 00813 } 00814 00815 void TTimeNENet::GetEdgeTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const { 00816 THash<TInt, TIntV> TmIdToEIdVH; 00817 TIntV EIdV; GetEIdByTm(EIdV); 00818 for (int e = 0; e < EIdV.Len(); e++) { 00819 const int TmId = GetEDat(EIdV[e]).Round(TmUnit).GetAbsSecs(); 00820 if (! TmIdToEIdVH.IsKey(TmId)) { TmIdToEIdVH.AddKey(TmId); } 00821 TmIdToEIdVH.GetDat(TmId).Add(EIdV[e]); 00822 } 00823 TVec<TPair<TInt, TIntV> > TmIdEIdVV; 00824 TmIdToEIdVH.GetKeyDatPrV(TmIdEIdVV); 00825 TmIdEIdVV.Sort(); 00826 TmBucketV.Gen(TmIdEIdVV.Len()); 00827 for (int i = 0; i < TmIdEIdVV.Len(); i++) { 00828 TTimeNet::TTmBucket& Bucket = TmBucketV[i]; 00829 Bucket.BegTm = TmIdEIdVV[i].Val1; 00830 Bucket.NIdV = TmIdEIdVV[i].Val2; 00831 } 00832 } 00833 00834 void TTimeNENet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const { 00835 TIntV NIdV; GetNIdByTm(NIdV); 00836 TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0); 00837 for (int i = 0; i < NIdV.Len(); i++) { 00838 const int b = i/NodesPerBucket; 00839 if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); } 00840 TmBucketV[b].NIdV.Add(NIdV[i]); 00841 } 00842 } 00843 00844 void TTimeNENet::GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const { 00845 TIntV EIdV; GetEIdByTm(EIdV); 00846 TmBucketV.Gen(EIdV.Len()/EdgesPerBucket + 1, 0); 00847 for (int i = 0; i < EIdV.Len(); i++) { 00848 const int b = i/EdgesPerBucket; 00849 if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); } 00850 TmBucketV[b].NIdV.Add(EIdV[i]); 00851 } 00852 } 00853 00854 // get edges that close triangles over time 00855 int TTimeNENet::GetTriadEdges(TIntV& TriadEIdV) const { 00856 PUNGraph Graph = TUNGraph::New(GetNodes(), GetEdges()); 00857 TIntV EIdV; GetEIdByTm(EIdV); 00858 TriadEIdV.Clr(); 00859 TExeTm ExeTm; 00860 for (int edge = 0; edge < EIdV.Len(); edge++) { 00861 const TEdgeI EI = GetEI(EIdV[edge]); 00862 const int Src = EI.GetSrcNId(); 00863 const int Dst = EI.GetDstNId(); 00864 if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge 00865 if (! Graph->IsNode(Src)) { Graph->AddNode(Src); } 00866 if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); } 00867 if (TSnap::GetCmnNbrs(Graph, Src, Dst) > 0) { TriadEIdV.Add(EIdV[edge]); } 00868 Graph->AddEdge(Src, Dst); 00869 if (edge % 10000 == 0) { 00870 printf("\redges %dk / %dk: triangle edges: %dk [total %s]", edge/1000, EIdV.Len()/1000, 00871 TriadEIdV.Len()/1000, ExeTm.GetStr()); } 00872 } 00873 return Graph->GetEdges(); 00874 } 00875 00876 PGStatVec TTimeNENet::TimeGrowth(const TTmUnit& TimeStep, const TFSet& TakeStat, const TSecTm& StartTm) const { 00877 TExeTm ExeTm; 00878 PGStatVec GStatVec = TGStatVec::New(TimeStep, TakeStat); 00879 TTimeNet::TTmBucketV TmBucketV; 00880 GetEdgeTmBuckets(TimeStep, TmBucketV); 00881 const PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this)); 00882 TIntV EdgeIdV; 00883 for (int t = 0; t < TmBucketV.Len(); t++) { 00884 EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T 00885 printf("\n***%d/%d: %s (%d edges) ", t+1, TmBucketV.Len(), TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick(); 00886 if (TmBucketV[t].BegTm < StartTm) { continue; } 00887 const PNEGraph PreGraph = TSnap::GetESubGraph(FullGraph, EdgeIdV); 00888 GStatVec->Add(PreGraph, TmBucketV[t].BegTm); 00889 printf(" [%s]\n", ExeTm.GetTmStr()); 00890 //{ TFOut FOut("LinkedIn.GStatVec"); GStatVec->Save(FOut); } 00891 } 00892 return GStatVec; 00893 } 00894 00895 // take network from last TakeNTmUnits and measure properties 00896 PGStatVec TTimeNENet::TimeGrowth(const TStr& FNmPref, const TStr& Desc, const TFSet& TakeStat, const int& NDiamRuns, 00897 const TTmUnit& TmUnit, const int& TakeNTmUnits, const bool& LinkBWays) const { 00898 TGStat::NDiamRuns = NDiamRuns; 00899 PGStatVec GrowthStat = TGStatVec::New(TmUnit, TakeStat); 00900 TTimeNet::TTmBucketV TmBucketV; 00901 GetEdgeTmBuckets(TmUnit, TmBucketV); 00902 TIntV EdgeIdV; 00903 TExeTm ExeTm; 00904 for (int t = 0; t < TmBucketV.Len(); t++) { 00905 // take graph over last TakeNTmUnits time units 00906 if (TakeNTmUnits == -1) { 00907 EdgeIdV.AddV(TmBucketV[t].NIdV); } 00908 else { 00909 if (t < TakeNTmUnits) { continue; } 00910 EdgeIdV.Clr(false); 00911 for (int i = t-TakeNTmUnits; i < t; i++) { EdgeIdV.AddV(TmBucketV[i].NIdV); } 00912 } 00913 printf("*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick(); 00914 PNEGraph PreGraph = TSnap::ConvertESubGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this), EdgeIdV); 00915 if (LinkBWays) { 00916 TIntV KeepEIdV; // keep only nodes that have in- and out-links (send and receive email) 00917 for (TNEGraph::TEdgeI EI = PreGraph->BegEI(); EI < PreGraph->EndEI(); EI++) { 00918 if (PreGraph->IsEdge(EI.GetDstNId(), EI.GetSrcNId(), true)) { KeepEIdV.Add(EI.GetId()); } 00919 } 00920 PreGraph = TSnap::GetESubGraph(PreGraph, KeepEIdV); 00921 } 00922 // take statistics 00923 GrowthStat->Add(PreGraph, TmBucketV[t].BegTm); 00924 { TFOut FOut(TStr::Fmt("growth.%s.gStatVec", FNmPref.CStr())); 00925 GrowthStat->Save(FOut); } 00926 GrowthStat->SaveTxt(FNmPref, Desc); 00927 printf(" [%s]\n", ExeTm.GetTmStr()); 00928 } 00929 return GrowthStat; 00930 } 00931 00932 void TTimeNENet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit, 00933 const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc) const { 00934 TTimeNet::TTmBucketV TmBucketV; 00935 GetEdgeTmBuckets(TmUnit, TmBucketV); 00936 PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this)); 00937 TIntV EdgeIdV; 00938 TExeTm ExeTm, Run1Tm; 00939 TFltTrV TmDiamV, NdsDiamV; 00940 for (int t = 0; t < TmBucketV.Len(); t++) { 00941 EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T 00942 printf("\n*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), EdgeIdV.Len()); ExeTm.Tick(); 00943 if (TmBucketV[t].BegTm < StartTm) continue; 00944 PNGraph PreGraph = TSnap::ConvertESubGraph<PNGraph>(FullGraph, EdgeIdV); 00945 TMom Mom; 00946 double EffDiam = 0.0; 00947 for (int r = 0; r < NDiamRuns; r++) { 00948 printf("%d...", r+1); Run1Tm.Tick(); 00949 if (OnlyWcc) { EffDiam = TSnap::GetAnfEffDiam(TSnap::GetMxWcc(PreGraph)); } 00950 else { EffDiam = TSnap::GetAnfEffDiam(PreGraph); } 00951 Mom.Add(EffDiam); 00952 printf("[%s]\r", Run1Tm.GetTmStr()); 00953 } 00954 Mom.Def(); 00955 TmDiamV.Add(TFltTr(TmBucketV[t].BegTm.Round(TmUnit).GetAbsSecs(), Mom.GetMean(), Mom.GetSDev())); 00956 NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev())); 00957 NdsDiamV.Sort(); 00958 printf(" [%s] \n", ExeTm.GetTmStr()); 00959 const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr(); 00960 { TGnuPlot GnuPlot("diamEff1."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns)); 00961 GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), "AVERAGE "+WccStr+"Effective Diameter"); 00962 GnuPlot.AddErrBar(TmDiamV, "", ""); 00963 GnuPlot.SavePng(); } 00964 { TGnuPlot GnuPlot("diamEff2."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns)); 00965 GnuPlot.SetXYLabel("NODES", "AVERAGE "+WccStr+"Effective Diameter"); 00966 GnuPlot.AddErrBar(NdsDiamV, "", ""); 00967 GnuPlot.SavePng(); } 00968 } 00969 } 00970 00971 // pretend we only have link data starting in PostTmDiam 00972 // compare the diameter of the nodes after PostTmDiam with diameter 00973 // of the nodes after PostTmDiam but *also* using nodes and edges 00974 // from before PostTmDiam 00975 void TTimeNENet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit, 00976 const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam, const bool& LinkBWays) { 00977 printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n", 00978 FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr()); 00979 printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n", 00980 DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()); 00981 // run diameter 00982 /*const int NSamples = 100; 00983 const int NDiamRuns = 10; 00984 // delete past 00985 if (DelPreEdges != -1) { 00986 TIntV EdgeIdV; 00987 printf("Deleting pre-year %d edges\n", DelPreEdges); 00988 for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) { 00989 if (EI().GetYear() < DelPreEdges) { EdgeIdV.Add(EI.GetId()); } } 00990 for (int e = 0; e < EdgeIdV.Len(); e++) { DelEdge(EdgeIdV[e]); } 00991 printf(" Deleted %d edges (out of %d edges total).\n", EdgeIdV.Len(), GetEdges()); 00992 } 00993 PNEGraph FullGraph = GetNEGraph(); 00994 TSnap::DelZeroDegNodes(FullGraph); 00995 00996 PGrowthStat GrowthStat = TGrowthStat::New(TmUnit, TGraphStat::AllStat()); 00997 TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev; 00998 TIntV EdgeIdV; 00999 TExeTm ExeTm; 01000 TTimeNet::TTmBucketV EdgeTmBucketV; 01001 GetEdgeTmBuckets(TmUnit, EdgeTmBucketV); 01002 for (int t = 0; t < EdgeTmBucketV.Len(); t++) { 01003 printf("\nGraph: %s (%d / %d) [%s]\n", EdgeTmBucketV[t].BegTm.GetTmStr().CStr(), 01004 t+1, EdgeTmBucketV.Len(), TExeTm::GetCurTm()); 01005 // up-to-year subgraph 01006 EdgeIdV.AddV(EdgeTmBucketV[t].NIdV); // nodes up to time T 01007 if (EdgeTmBucketV[t].BegTm.GetYear() < PostYearDiam) continue; 01008 01009 const PNEGraph PreNEGraph = EdgeBothWays ? FullGraph->GetEdgeSubGraph(EdgeIdV) : FullGraph; 01010 PNGraph PreGraph = PreNEGraph->GetBWayGraph(); 01011 PNGraph WccGraph = TSnap::GetMxWcc(PreGraph); 01012 01013 // find nodes that are not in missing past 01014 TIntV PostYearNIdV, WccPostYearNIdV; 01015 THash<TIntPr, TInt> PostYearEdgeH; 01016 for (TNEGraph::TEdgeI EI = PreNEGraph->BegEI(); EI < PreNEGraph->EndEI(); EI++) { 01017 if (GetEDat(EI.GetId()).GetYear() >= PostYearDiam) { 01018 PostYearEdgeH.AddKey(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); } 01019 } 01020 TIntH PostYearNIdH; 01021 for (int i = 0; i < PostYearEdgeH.Len(); i++) { 01022 if ((! EdgeBothWays) || (EdgeBothWays && PostYearEdgeH.IsKey(TIntPr(PostYearEdgeH.GetKey(i).Val2, PostYearEdgeH.GetKey(i).Val1)))) { //reverse edge exists 01023 PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val1); 01024 PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val2); 01025 } 01026 } 01027 PostYearNIdH.GetKeyV(PostYearNIdV); 01028 for (int i = 0; i < PostYearNIdV.Len(); i++) { 01029 if (WccGraph->IsNode(PostYearNIdV[i])) { 01030 WccPostYearNIdV.Add(PostYearNIdV[i]); } 01031 } 01032 01033 // diameter of PostYearDiam subgraph using whole graph (all edges) 01034 TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom; 01035 for (int r = 0; r < NDiamRuns; r++) { 01036 if (! PostYearNIdV.Empty()) { 01037 PreDiamMom.Add(-1); //PreDiamMom.Add(TSnap::GetBfsFullDiam(PreGraph, NSamples, PostYearNIdV, false)); 01038 PreEffDiamMom.Add(TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false)); 01039 } 01040 if (! WccPostYearNIdV.Empty()) { 01041 //WccDiamMom.Add(TSnap::GetBfsFullDiam(WccGraph, NSamples, WccPostYearNIdV, false)); 01042 //WccEffDiamMom.Add(TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false)); 01043 WccDiamMom.Add(-1); WccEffDiamMom.Add(-1); 01044 } 01045 printf(" diam: %d [%s] %.2f\r", r+1, ExeTm.GetTmStr(), PreEffDiamMom.GetValV().Last().Val); ExeTm.Tick(); 01046 } 01047 PreDiamMom.Def(); PreEffDiamMom.Def(); 01048 WccDiamMom.Def(); WccEffDiamMom.Def(); 01049 // save stat 01050 PGraphStat GraphStatPt = GrowthStat->Add(EdgeTmBucketV[t].BegTm); 01051 TGraphStat& GraphStat = *GraphStatPt; 01052 GraphStat.Nodes = PreGraph->GetNodes(); 01053 GraphStat.NonZNodes = PreGraph->GetNodes() - TSnap::CntDegNodes<PNGraph>(PreGraph, 0); 01054 GraphStat.Srcs = GraphStat.Nodes - TSnap::CntOutDegNodes<PNGraph>(PreGraph, 0); 01055 GraphStat.Edges = PreGraph->GetEdges(); 01056 GraphStat.WccNodes= WccGraph->GetNodes(); 01057 GraphStat.WccEdges = WccGraph->GetEdges(); 01058 GraphStat.FullDiam = PreDiamMom.GetMean(); // mean 01059 GraphStat.EffDiam = PreEffDiamMom.GetMean(); 01060 GraphStat.FullWccDiam = WccDiamMom.GetMean(); 01061 GraphStat.EffWccDiam = WccEffDiamMom.GetMean(); 01062 GraphStat.FullDiamDev = PreDiamMom.GetSDev(); // variance 01063 GraphStat.EffDiamDev = PreEffDiamMom.GetSDev(); 01064 GraphStat.FullWccDiamDev = WccDiamMom.GetSDev(); 01065 GraphStat.EffWccDiamDev = WccEffDiamMom.GetSDev(); 01066 { TFOut FOut("growth."+FNmPref+".bin"); 01067 GrowthStat->Save(FOut); } 01068 const TStr BigDesc = TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%d\nPostYearDiam\t%d\n", 01069 Desc.CStr(), DelPreEdges, PostYearDiam); 01070 GrowthStat->SaveTxt(FNmPref, BigDesc); 01071 } 01072 // diameter plots 01073 GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.", 01074 DelPreEdges, PostYearDiam)); 01075 */ 01076 } 01077 01078 // plot time difference of the node ages when an edge arrives 01079 /*void TTimeNENet::PlotEdgeNodeTimeDiff(const TStr& FNmPref, TStr Desc) const { 01080 if (Desc.Empty()) { Desc = FNmPref; } 01081 printf("Edge node time differences:\n"); 01082 THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge 01083 THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst) 01084 PUNGraph Graph = TUNGraph::New(GetNodes(), -1); 01085 TIntV EIdV; GetEIdByTm(EIdV); 01086 TIntH DegCntH(10, true); 01087 TExeTm ExeTm; 01088 for (int edge = 0; edge < EIdV.Len(); edge++) { 01089 const TEdgeI EI = GetEI(EIdV[edge]); 01090 const int Src = EI.GetSrcNId(); 01091 const int Dst = EI.GetDstNId(); 01092 if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge 01093 if (! Graph->IsNode(Src)) { Graph->AddNode(Src); } 01094 if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); } 01095 const int SrcDeg = Graph->GetNode(Src).GetOutDeg(); 01096 const int DstDeg = Graph->GetNode(Dst).GetInDeg(); 01097 const int EdgeTm = EI.GetDat().GetAbsSecs(); 01098 const int SrcTm = EI.GetSrcNDat().GetAbsSecs(); 01099 const int DstTm = EI.GetDstNDat().GetAbsSecs(); 01100 NodeAgeH.AddDat((DstTm-SrcTm)/3600) += 1; 01101 if (SrcDeg == 0 || DstDeg == 0) { 01102 Node1AgeH.AddDat((DstTm-SrcTm)/3600) += 1; } 01103 SrcTmH.AddDat((EdgeTm-SrcTm)/3600) += 1; 01104 DstTmH.AddDat((EdgeTm-DstTm)/3600) += 1; 01105 // add edge 01106 Graph->AddEdge(Src, Dst); 01107 if (edge % 1000 == 0) { 01108 printf("\r%dk / %dk [total %s]", edge/1000, EIdV.Len()/1000, ExeTm.GetStr()); } 01109 } 01110 TGnuPlot::PlotValCntH(NodeAgeH, "tmDstSrc."+FNmPref, TStr::Fmt("%s, Node time difference.", Desc.CStr()), 01111 "Time(destination) - Time(source) [hours]", "Count"); 01112 TGnuPlot::PlotValCntH(Node1AgeH, "tmDstSrc1."+FNmPref, TStr::Fmt("%s. 1-ST EDGE of one of the nodes.", Desc.CStr()), 01113 "Time(destination) - Time(source) [hours]", "Count"); 01114 TGnuPlot::PlotValCntH(SrcTmH, "tmEdgeSrc."+FNmPref, TStr::Fmt("%s. Edge time - source node time.", Desc.CStr()), 01115 "Edge time - source node time [hours]", "Count"); 01116 TGnuPlot::PlotValCntH(DstTmH, "tmEdgeDst."+FNmPref, TStr::Fmt("%s. Edge time - destination node time.", Desc.CStr()), 01117 "Edge time - destination node time [hours]", "Count"); 01118 printf("\n"); 01119 } 01120 01121 // plot the distribution whether the edge to node that closes the triad was 01122 // chosen random or preferentially 01123 void TTimeNENet::PlotCloseTriad(const TStr& FNmPref, TStr Desc, const bool& OnlyBackEdges) const { 01124 if (Desc.Empty()) { Desc = FNmPref; } 01125 printf("Random vs. preferential triad closing: %s\n", OnlyBackEdges?"OnlyBackEdges":""); 01126 THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge 01127 THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst) 01128 PUNGraph Graph = TUNGraph::New(GetNodes(), -1); 01129 TIntV EIdV; GetEIdByTm(EIdV); 01130 TIntV CmnV, SrcNbrV, DstNbrV; 01131 TExeTm ExeTm; 01132 TFltV RndRdnV, PrefPrefV, PrefRndV, RndPrefV; 01133 TFltFltH RndRndH, PrefPrefH, PrefRndH, RndPrefH; 01134 int TriadEdges = 0; 01135 FILE *F = fopen(TStr::Fmt("%s-prob.tab", FNmPref.CStr()).CStr(), "wt"); 01136 fprintf(F, "#Probability of picking a node that closes the triangle (pick preferentially vs. uniformly at random)\n"); 01137 fprintf(F, "#i\tEId\tRndRnd\tRndPref\tPrefRnd\tPrefPref\n"); 01138 for (int edge = 0; edge < EIdV.Len(); edge++) { 01139 const TEdgeI EI = GetEI(EIdV[edge]); 01140 const int Src = EI.GetSrcNId(); 01141 const int Dst = EI.GetDstNId(); 01142 if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge 01143 if (! Graph->IsNode(Src)) { Graph->AddNode(Src); } 01144 if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); } 01145 if (OnlyBackEdges && EI.GetSrcNDat().GetAbsSecs()-EI.GetDstNDat().GetAbsSecs() < 0) { 01146 Graph->AddEdge(Src, Dst); continue; 01147 } 01148 // find common neighbors 01149 const TUNGraph::TNodeI SrcNI = Graph->GetNI(Src); 01150 const TUNGraph::TNodeI DstNI = Graph->GetNI(Dst); 01151 SrcNbrV.Clr(false); DstNbrV.Clr(false); 01152 for (int i = 0; i < SrcNI.GetOutDeg(); i++) { SrcNbrV.Add(SrcNI.GetOutNId(i)); } 01153 for (int i = 0; i < DstNI.GetOutDeg(); i++) { DstNbrV.Add(DstNI.GetOutNId(i)); } 01154 SrcNbrV.Sort(); DstNbrV.Sort(); 01155 SrcNbrV.Intrs(DstNbrV, CmnV); 01156 if (! CmnV.Empty()) { 01157 // triad completion 01158 const int SrcDeg = Graph->GetNode(Src).GetOutDeg(); 01159 const int DstDeg = Graph->GetNode(Dst).GetInDeg(); 01160 double RndRnd=0, RndPref=0, PrefRnd=0, PrefPref=0; 01161 int AllNbrDegSum=0;//, NbrDegSum = 0; 01162 //for (int i = 0; i < CmnV.Len(); i++) { 01163 // NbrDegSum += Graph->GetNode(CmnV[i]).GetOutDeg(); } 01164 for (int i = 0; i < SrcNI.GetOutDeg(); i++) { 01165 AllNbrDegSum += Graph->GetNode(SrcNI.GetOutNId(i)).GetOutDeg(); } 01166 // uniform-uniform node selection = 1/|cmn| sum_{i\in cmn} 1/(d_i-1) 01167 // preferential-uniform 01168 for (int i = 0; i < CmnV.Len(); i++) { 01169 const int Deg = Graph->GetNode(CmnV[i]).GetOutDeg(); 01170 RndRnd += (1.0/double(SrcDeg)) * (1.0/double(Deg-1)); 01171 PrefRnd += (Deg/double(AllNbrDegSum)) * (1.0/double(Deg-1)); 01172 } 01173 // uniform-preferential selection = 1/|cmn| sum_{i\in cmn} d_t / sum_{i--j} d_j 01174 // preferential-preferential 01175 for (int i = 0; i < CmnV.Len(); i++) { 01176 const TUNGraph::TNode N = Graph->GetNode(CmnV[i]); 01177 const int Deg = N.GetOutDeg(); 01178 int DegSum = 0; 01179 for (int j = 0; j < N.GetOutDeg(); j++) { 01180 if (N.GetOutNId(j) != Src) { 01181 DegSum += Graph->GetNode(N.GetOutNId(j)).GetOutDeg(); } 01182 } 01183 RndPref += 1.0/double(SrcDeg) * DstDeg/double(DegSum); 01184 PrefPref += (Deg/double(AllNbrDegSum)) * (DstDeg/double(DegSum)); 01185 } 01186 IAssert(0.0<RndRnd && RndRnd<=1.0); IAssert(0.0<RndPref && RndPref<=1.0); 01187 IAssert(0.0<PrefRnd && PrefRnd<=1.0); IAssert(0.0<PrefPref && PrefPref<=1.0); 01188 RndRndH.AddDat(TMath::Round(RndRnd, 2)) += 1.0; 01189 RndPrefH.AddDat(TMath::Round(RndPref, 2)) += 1.0; 01190 PrefRndH.AddDat(TMath::Round(PrefRnd, 2)) += 1.0; 01191 PrefPrefH.AddDat(TMath::Round(PrefPref, 2)) += 1.0; 01192 fprintf(F, "%d\t%d\t%g\t%g\t%g\t%g\n", edge, EI.GetId(), RndRnd, RndPref, PrefRnd, PrefPref); 01193 TriadEdges++; 01194 } 01195 Graph->AddEdge(Src, Dst); 01196 if (edge % 1000 == 0) { 01197 printf("\r%dk / %dk triad edges %dk [total %s]", edge/1000, EIdV.Len()/1000, TriadEdges/1000, ExeTm.GetStr()); } 01198 } 01199 fclose(F); 01200 printf("\nRandom vs. preferential triad closing\n"); 01201 printf("All edges: %d\n", Graph->GetEdges()); 01202 printf("Triad closing edges: %d\n", TriadEdges); 01203 // plot distribution 01204 TFltPrV RndRndPrV; RndRndH.GetKeyDatPrV(RndRndPrV); RndRndPrV.Sort(); 01205 TFltPrV RndPrefPrV; RndPrefH.GetKeyDatPrV(RndPrefPrV); RndPrefPrV.Sort(); 01206 TFltPrV PrefRndPrV; PrefRndH.GetKeyDatPrV(PrefRndPrV); PrefRndPrV.Sort(); 01207 TFltPrV PrefPrefPrV; PrefPrefH.GetKeyDatPrV(PrefPrefPrV); PrefPrefPrV.Sort(); 01208 { TGnuPlot GP(TStr::Fmt("probPdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges())); 01209 GP.AddPlot(RndRndPrV, gpwLinesPoints, "Random--Random"); 01210 GP.AddPlot(RndPrefPrV, gpwLinesPoints, "Random--Preferential"); 01211 GP.AddPlot(PrefRndPrV, gpwLinesPoints, "Preferential--Random"); 01212 GP.AddPlot(PrefPrefPrV, gpwLinesPoints, "Preferential--Preferential"); 01213 GP.SetXYLabel("Probability", "Count"); 01214 GP.SavePng(); } 01215 { TGnuPlot GP(TStr::Fmt("probCdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges())); 01216 GP.AddPlot(TGUtil::GetCdf(RndRndPrV), gpwLinesPoints, "Random--Random"); 01217 GP.AddPlot(TGUtil::GetCdf(RndPrefPrV), gpwLinesPoints, "Random--Preferential"); 01218 GP.AddPlot(TGUtil::GetCdf(PrefRndPrV), gpwLinesPoints, "Preferential--Random"); 01219 GP.AddPlot(TGUtil::GetCdf(PrefPrefPrV), gpwLinesPoints, "Preferential--Preferential"); 01220 GP.SetXYLabel("Cumulative probability", "Count"); 01221 GP.SavePng(); } 01222 }*/ 01223 01224 PTimeNENet TTimeNENet::GetGnmRndNet(const int& Nodes, const int& Edges) { 01225 printf("Generating G_nm(%d, %d)\n", Nodes, Edges); 01226 int Src, Dst; 01227 PTimeNENet Net = TTimeNENet::New(); 01228 Net->Reserve(Nodes, Edges); 01229 for (int e = 0; e < Edges; e++) { 01230 Src = TInt::Rnd.GetUniDevInt(Nodes); 01231 Dst = TInt::Rnd.GetUniDevInt(Nodes); 01232 while (Dst == Src || Net->IsEdge(Src, Dst)) { 01233 Dst = TInt::Rnd.GetUniDevInt(Nodes); } 01234 if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(e)); } 01235 if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(e)); } 01236 Net->AddEdge(Src, Dst, -1, TSecTm(e)); 01237 } 01238 return Net; 01239 } 01240 01241 // ACL, model B: Aiello, Chung, Lu: Random evolution in massive graphs 01242 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& Edges, const double& GammaIn, const double& GammaOut) { 01243 const double Alpha = Nodes/double(Edges); 01244 printf("Generating PA(%d, %d), with slope in:%.1f, out: %.1f\n", Nodes, Edges, 01245 2+GammaIn/(Alpha/(1-Alpha)), 2+GammaOut/(Alpha/(1-Alpha))); 01246 // init 01247 int nodes=0, edges=0, time=0, iter=0; 01248 TIntV OutW(Edges, 0), InW(Edges, 0); 01249 PTimeNENet Net = TTimeNENet::New(); 01250 Net->Reserve(Nodes, Edges); 01251 // 1st node 01252 Net->AddNode(0, TSecTm(time++)); nodes++; 01253 OutW.Add(0); InW.Add(0); 01254 while (edges < Edges) { 01255 int Src=-1, Dst=-1; iter++; 01256 if (TInt::Rnd.GetUniDev() < Alpha) { 01257 if (nodes < Nodes) { 01258 IAssert(Net->AddNode(nodes, TSecTm(time++))); 01259 nodes++; } 01260 } else { 01261 if (TInt::Rnd.GetUniDev() < nodes*GammaIn/double(edges+nodes*GammaIn)) { 01262 Src = TInt::Rnd.GetUniDevInt(nodes); } 01263 else { Src = OutW[TInt::Rnd.GetUniDevInt(OutW.Len())]; } 01264 if (TInt::Rnd.GetUniDev() < nodes*GammaOut/double(edges+nodes*GammaOut)) { 01265 Dst = TInt::Rnd.GetUniDevInt(nodes); } 01266 else { Dst = InW[TInt::Rnd.GetUniDevInt(InW.Len())]; } 01267 } 01268 if (Src == Dst || Net->IsEdge(Src, Dst)) { 01269 continue; 01270 } 01271 //printf("%d/%d %d %d\n", edges, iter, Src, Dst); 01272 if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(time++)); nodes++; } 01273 if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(time++)); nodes++; } 01274 Net->AddEdge(Src, Dst, -1, TSecTm(time++)); 01275 OutW.Add(Src); InW.Add(Dst); edges++; 01276 } 01277 for (int node = 0; node < Nodes; node++) { 01278 if (! Net->IsNode(node)) { 01279 Net->AddNode(node, TSecTm(time++)); } 01280 } 01281 return Net; 01282 } 01283 01284 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& OutDeg) { 01285 printf("Generating PA, nodes:%d, out-deg:%d\n", Nodes, OutDeg); 01286 // init 01287 int time=0; 01288 PTimeNENet Net = TTimeNENet::New(); 01289 Net->Reserve(Nodes, OutDeg*Nodes); 01290 Net->AddNode(0, TSecTm(++time)); Net->AddNode(1, TSecTm(++time)); 01291 Net->AddEdge(0, 1, -1, TSecTm(++time)); 01292 TIntV NIdV; NIdV.Add(0); NIdV.Add(1); 01293 TIntSet NodeSet; 01294 for (int node = 2; node <= Nodes; node++) { 01295 NodeSet.Clr(false); 01296 while (NodeSet.Len() < OutDeg && NodeSet.Len() < node) { 01297 NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]); 01298 } 01299 const int N = Net->AddNode(node, TSecTm(++time)); 01300 for (int i = 0; i < NodeSet.Len(); i++) { 01301 Net->AddEdge(node, NodeSet[i], -1, TSecTm(++time)); 01302 NIdV.Add(N); NIdV.Add(NodeSet[i]); 01303 } 01304 } 01305 return Net; 01306 } 01307 01308 void TTimeNENet::SaveEdgeTm(const TStr& EdgeFNm, const bool& RenumberNId, const bool& RelativeTm) const { 01309 TIntV EIdV; GetEIdByTm(EIdV); 01310 const int BegTm = RelativeTm ? GetEDat(EIdV[0]).GetAbsSecs() : 0; 01311 TIntSet NIdMap; 01312 if (RenumberNId) { NIdMap.Gen(GetNodes()); } 01313 FILE *F = fopen(EdgeFNm.CStr(), "wt"); 01314 //fprintf(F, "#Nodes\t%d\n#Edges\t%d\n", GetNodes(), GetEdges()); 01315 //fprintf(F, "#<src>\t<dst>\t<time>\n"); 01316 for (int e =0; e < EIdV.Len(); e++) { 01317 const TEdgeI EI = GetEI(EIdV[e]); 01318 if (RenumberNId) { 01319 const int src = EI.GetSrcNId(); 01320 const int dst = EI.GetDstNId(); 01321 NIdMap.AddKey(src); NIdMap.AddKey(dst); 01322 fprintf(F, "%d\t%d\t%d\n", NIdMap.GetKeyId(src), NIdMap.GetKeyId(dst), EI().GetAbsSecs()-BegTm); 01323 }else { 01324 fprintf(F, "%d\t%d\t%d\n", EI.GetSrcNId(), EI.GetDstNId(), EI().GetAbsSecs()-BegTm); } 01325 } 01326 fclose(F); 01327 } 01328 01329 PTimeNENet TTimeNENet::GetSmallNet() { 01330 PTimeNENet Net = TTimeNENet::New(); 01331 for (int i = 1; i <= 6; i++) { 01332 Net->AddNode(i, TSecTm(0)); } 01333 int tm = 1; 01334 Net->AddEdge(1, 2, -1, TSecTm(tm++)); 01335 Net->AddEdge(3, 4, -1, TSecTm(tm++)); 01336 Net->AddEdge(3, 1, -1, TSecTm(tm++)); 01337 Net->AddEdge(5, 6, -1, TSecTm(tm++)); 01338 Net->AddEdge(6, 4, -1, TSecTm(tm++)); 01339 Net->AddEdge(5, 3, -1, TSecTm(tm++)); 01340 Net->AddEdge(5, 4, -1, TSecTm(tm++)); 01341 Net->AddEdge(5, 2, -1, TSecTm(tm++)); 01342 return Net; 01343 } 01344 01345 PTimeNENet TTimeNENet::LoadFlickr(const TStr& NodeFNm, const TStr& EdgeFNm) { 01346 const int BegOfTm = 1047369600; // Tue Mar 11 01:00:00 2003 (begining of Flickr) 01347 PTimeNENet Net = TTimeNENet::New(); 01348 printf("Adding nodes..."); 01349 { TSsParser Ss(NodeFNm, ssfWhiteSep); 01350 while (Ss.Next()) { 01351 const int NId = Ss.GetInt(0); 01352 const int Tm = Ss.GetInt(1)+BegOfTm; 01353 if (TSecTm(Tm) < TSecTm(2002, 1, 1)) { 01354 printf(" skip node %g (time %d)\n", (double) Ss.GetLineNo(), Ss.GetInt(1)); continue; } 01355 Net->AddNode(NId, TSecTm(Tm)); 01356 } } 01357 printf(" %d nodes\n", Net->GetNodes()); 01358 printf("Adding edges..."); 01359 int SkipCnt=0; 01360 //TIntH SkipDiffCnt; 01361 { TSsParser Ss(EdgeFNm, ssfWhiteSep); 01362 while (Ss.Next()) { 01363 const int NId1 = Ss.GetInt(0); 01364 const int NId2 = Ss.GetInt(1); 01365 const TSecTm Tm = TSecTm(Ss.GetInt(2)+BegOfTm); 01366 if (! Net->IsNode(NId1) || ! Net->IsNode(NId2)) { printf("not node\n"); continue; } 01367 if (Tm < TSecTm(2002, 1, 1)) { SkipCnt++; 01368 printf(" skip edge %g (time %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr()); continue; } 01369 if (Tm+600 < Net->GetNDat(NId1)) { 01370 printf(" 1:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId1).GetStr().CStr()); 01371 //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId1).GetAbsSecs()) += 1; 01372 SkipCnt++; continue; } 01373 if (Tm+600 < Net->GetNDat(NId2)) { SkipCnt++; 01374 printf(" 2:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId2).GetStr().CStr()); 01375 //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId2).GetAbsSecs()) += 1; 01376 SkipCnt++; continue; } 01377 Net->AddEdge(NId1, NId2, -1, TSecTm(Tm)); 01378 } } 01379 //TGnuPlot::PlotValCntH(SkipDiffCnt, "flickr-edgeNodeDiff", "", "seconds", "count"); 01380 printf(" %d edges\n", Net->GetEdges()); 01381 printf(" %d edges skipped (edge time < node time)\n", SkipCnt); 01382 Net->UpdateNodeTimes(); 01383 return Net; 01384 } 01385 01386 // load network where fields are separated by Separator and each line contains (*Fld are column indexes) 01387 // .. <source> .. <destination> .. <time> 01388 PTimeNENet TTimeNENet::LoadEdgeTm(const TStr& EdgeFNm, const int& SrcFld, const int& DstFld, const int& TimeFld, const TSsFmt& Separator) { 01389 printf("Loading %s\n", EdgeFNm.CStr()); 01390 PTimeNENet Net = TTimeNENet::New(); 01391 TStrHash<TInt> StrToId(Mega(1), true); // node id to string 01392 int LineCnt=0; 01393 TExeTm ExeTm; 01394 TSsParser Ss(EdgeFNm, Separator); 01395 TSecTm MinTm=TSecTm::GetCurTm(), MaxTm=TSecTm(100); 01396 while (Ss.Next()) { 01397 if (Ss.IsCmt()) { continue; } 01398 IAssert(Ss.Len() > TimeFld); 01399 const char* Node1 = Ss.GetFld(SrcFld); 01400 const char* Node2 = Ss.GetFld(DstFld); 01401 const char* TmStr = Ss.GetFld(TimeFld); 01402 if (strcmp(TmStr,"NULL")==0) { continue; } 01403 //const TSecTm Tm = TSecTm::GetDtTmFromYmdHmsStr(TmStr); 01404 const TSecTm Tm(atoi(TmStr)); 01405 const int NId1 = StrToId.AddKey(Node1); 01406 const int NId2 = StrToId.AddKey(Node2); 01407 if (! Net->IsNode(NId1)) { Net->AddNode(NId1, TSecTm()); } 01408 if (! Net->IsNode(NId2)) { Net->AddNode(NId2, TSecTm()); } 01409 MinTm=TMath::Mn(MinTm, Tm); 01410 MaxTm=TMath::Mx(MaxTm, Tm); 01411 Net->AddEdge(NId1, NId2, -1, Tm); 01412 if (++LineCnt % 1000 == 0) { 01413 printf("\r %dk lines processed: %d %d [%s]", LineCnt/1000, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); } 01414 } 01415 printf("\r %d lines processed: %d %d [%s]\n", LineCnt, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); 01416 printf(" Data range %s -- %s\n", MinTm.GetStr().CStr(), MaxTm.GetStr().CStr()); 01417 //TSnap::PrintInfo(Net, "", "", false); 01418 Net->UpdateNodeTimes(); 01419 return Net; 01420 }