SNAP Library 2.4, Developer Reference  2015-05-11 19:40:56
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
timenet.cpp
Go to the documentation of this file.
1 // Time Network
4  if (this != &TimeNet) {
5  TNet::operator=(TimeNet);
6  }
7  return *this;
8 }
9 
10 PTimeNet TTimeNet::GetSubGraph(const TIntV& NIdV) const {
11  PTimeNet NewNetPt = TTimeNet::New();
12  TTimeNet& NewNet = *NewNetPt;
13  NewNet.Reserve(NIdV.Len(), -1);
14  int node, edge;
15  TNodeI NI;
16  for (node = 0; node < NIdV.Len(); node++) {
17  NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); // also copy the node data
18  }
19  for (node = 0; node < NIdV.Len(); node++) {
20  NI = GetNI(NIdV[node]);
21  const int SrcNId = NI.GetId();
22  for (edge = 0; edge < NI.GetOutDeg(); edge++) {
23  const int OutNId = NI.GetOutNId(edge);
24  if (NewNet.IsNode(OutNId)) {
25  NewNet.AddEdge(SrcNId, OutNId); }
26  }
27  }
28  NewNet.Defrag();
29  return NewNetPt;
30 }
31 
33  TIntV NIdV; GetNIdByTm(NIdV);
35  for (int i = 0; i < NIdV.Len(); i++) {
36  const int Src = NIdV[i];
37  const TTimeNet::TNodeI NI = GetNI(Src);
38  const TSecTm SrcTm = NI.GetDat();
39  if (! OutNet->IsNode(Src)) { OutNet->AddNode(Src, SrcTm); }
40  for (int e = 0; e < NI.GetOutDeg(); e++) {
41  if (! OutNet->IsNode(NI.GetOutNId(e))) { OutNet->AddNode(NI.GetOutNId(e), SrcTm); }
42  OutNet->AddEdge(Src, NI.GetOutNId(e), -1, SrcTm);
43  }
44  }
45  return OutNet;
46 }
47 
48 void TTimeNet::GetNIdByTm(TIntV& NIdV) const {
49  TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
50  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
51  TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
52  TmToNIdV.Sort();
53  NIdV.Gen(GetNodes(), 0);
54  for (int i = 0; i < TmToNIdV.Len(); i++) {
55  NIdV.Add(TmToNIdV[i].Dat); }
56 }
57 
58 // put nodes into buckets by TmUnit
59 void TTimeNet::GetTmBuckets(const TTmUnit& TmUnit, TTmBucketV& TmBucketV) const {
60  THash<TInt, TIntV> TmIdToNIdVH;
61  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
62  const int TmId = NodeI().Round(TmUnit);
63  if (! TmIdToNIdVH.IsKey(TmId)) TmIdToNIdVH.AddKey(TmId);
64  TmIdToNIdVH.GetDat(TmId).Add(NodeI.GetId());
65  }
66  TVec<TPair<TInt, TIntV> > TmIdNIdVV;
67  TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
68  TmIdNIdVV.Sort();
69  TmBucketV.Gen(TmIdNIdVV.Len());
70  for (int i = 0; i < TmIdNIdVV.Len(); i++) {
71  TTmBucket& Bucket = TmBucketV[i];
72  Bucket.BegTm = TmIdNIdVV[i].Val1;
73  Bucket.NIdV = TmIdNIdVV[i].Val2;
74  }
75 }
76 
77 void TTimeNet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
78  TIntV NIdV;
79  GetNIdByTm(NIdV);
80  TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
81  for (int i = 0; i < NIdV.Len(); i++) {
82  const int b = i/NodesPerBucket;
83  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
84  TmBucketV[b].NIdV.Add(NIdV[i]);
85  }
86 }
87 
88 PGStatVec TTimeNet::TimeGrowth(const TTmUnit& TmUnit, const TFSet& TakeStat, const TSecTm& StartTm) const {
89  PGStatVec GrowthStat = new TGStatVec(TmUnit, TakeStat);
90  TTmBucketV TmBucketV;
91  GetTmBuckets(TmUnit, TmBucketV);
92  TIntV NodeIdV;
93  TExeTm ExeTm;
94  for (int t = 0; t < TmBucketV.Len(); t++) {
95  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
96  printf("\n=== %d/%d] %s (%d nodes)\n", t+1, TmBucketV.Len(),
97  TmBucketV[t].BegTm.GetStr().CStr(), NodeIdV.Len()); ExeTm.Tick();
98  if (TmBucketV[t].BegTm < StartTm) continue;
99  //PNGraph PreGraph = GetSubGraph(NodeIdV, true); // renumber nodes
100  PNGraph PreGraph = TSnap::ConvertSubGraph<PNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); // don't renumber nodes
101  GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
102  }
103  return GrowthStat;
104 }
105 
106 void TTimeNet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
107  const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc, const bool& AlsoRewire) const {
108  const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
109  TTmBucketV TmBucketV;
110  GetTmBuckets(TmUnit, TmBucketV);
111  TIntV NodeIdV;
112  TExeTm ExeTm, Run1Tm;
113  TFltTrV TmDiamV, NdsDiamV;
114  TFltTrV RwTmDiamV, RwNdsDiamV;
115  for (int t = 0; t < TmBucketV.Len(); t++) {
116  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
117  printf("\n*** %d/%d] at %s (%d nodes)\n", t+1, TmBucketV.Len(),
118  TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), NodeIdV.Len()); ExeTm.Tick();
119  if (TmBucketV[t].BegTm < StartTm) continue;
120  //PUNGraph PreGraph = GetSubUNGraph(NodeIdV, true);
121  PUNGraph PreGraph = TSnap::ConvertSubGraph<PUNGraph>(PTimeNet((TTimeNet*)this), NodeIdV);
122  { TMom Mom;
123  for (int r = 0; r < NDiamRuns; r++) {
124  printf("%d...", r+1); Run1Tm.Tick();
125  const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph) : PreGraph);
126  Mom.Add(EffDiam); printf("[%s]\r", Run1Tm.GetTmStr());
127  }
128  Mom.Def();
129  TmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
130  NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
131  NdsDiamV.Sort();
132  printf(" [%s] \n", ExeTm.GetTmStr()); }
133  if (AlsoRewire) {
134  //PUNGraph RwGraph = TGGen::GenRndDegSeqS(PreGraph, 50, TInt::Rnd); // edge switching model
135  TIntV DegSeqV(PreGraph->GetNodes(), 0);
136  for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { DegSeqV.Add(NI.GetDeg()); }
137  PUNGraph RwGraph = TSnap::GenConfModel(DegSeqV, TInt::Rnd);
138  printf("Configuration model: (%d, %d) --> (%d, %d)\n", PreGraph->GetNodes(), PreGraph->GetEdges(), RwGraph->GetNodes(), RwGraph->GetEdges());
139  TMom Mom;
140  for (int r = 0; r < NDiamRuns; r++) {
141  printf(" diam run %d...", r+1); Run1Tm.Tick();
142  const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph):PreGraph);
143  Mom.Add(EffDiam); printf(" current run [%s]\n", Run1Tm.GetTmStr());
144  }
145  Mom.Def();
146  RwTmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
147  RwNdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
148  RwNdsDiamV.Sort();
149  printf("done with diameter. Total time [%s] \n", ExeTm.GetTmStr());
150  }
151  // plot
152  { TGnuPlot GnuPlot("diamEff-T."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
153  GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), WccStr+"Effective Diameter");
154  GnuPlot.AddErrBar(TmDiamV, "True", "");
155  if (! RwTmDiamV.Empty()) { GnuPlot.AddErrBar(RwTmDiamV, "Rewired", "");}
156  GnuPlot.SavePng(); }
157  { TGnuPlot GnuPlot("diamEff-N."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
158  GnuPlot.SetXYLabel("NODES", WccStr+"Effective Diameter");
159  GnuPlot.AddErrBar(NdsDiamV, "True", "");
160  if (! RwNdsDiamV.Empty()) { GnuPlot.AddErrBar(RwNdsDiamV, "Rewired", "");}
161  GnuPlot.SavePng(); }
162  }
163 }
164 
165 // pretend we only have link data starting in PostTmDiam
166 // compare the diameter of the nodes after PostTmDiam with diameter
167 // of the nodes after PostTmDiam but *also* using nodes and edges
168 // from before PostTmDiam
169 void TTimeNet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
170  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam) const {
171  printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n",
172  FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
173  printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n",
174  DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
175  const int NDiamRuns = 10;
176  const int NSamples = 100;
177  //PUNGraph FullGraph = GetUNGraph();
178  PUNGraph FullGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
179  // delete past
180  if (DelPreTmEdges.IsDef()) {
181  int NDelNodes = 0, NDelEdges = 0;
182  printf("Deleting pre-%s node out-links\n", DelPreTmEdges.GetStr().CStr());
183  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
184  if (NodeI() < DelPreTmEdges) {
185  const int NodeId = NodeI.GetId();
186  for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
187  FullGraph->DelEdge(NodeId, NodeI.GetOutNId(edge)); }
188  NDelEdges += NodeI.GetOutDeg(); NDelNodes++;
189  }
190  }
191  printf(" Deleted %d nodes out-edges (%d edges total).\n", NDelNodes, NDelEdges);
192  FullGraph->Defrag(true);
193  }
194  PGStatVec GrowthStat = TGStatVec::New(TmUnit);
195  TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
196  TIntV NodeIdV;
197  TExeTm ExeTm;
198  TTmBucketV TmBucketV;
199  GetTmBuckets(TmUnit, TmBucketV);
200  for (int t = 0; t < TmBucketV.Len(); t++) {
201  printf("\nGraph: %s (%d / %d) [%s]\n", TmBucketV[t].BegTm.GetTmStr().CStr(),
202  t+1, TmBucketV.Len(), TExeTm::GetCurTm());
203  // up-to-year subgraph
204  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
205  if (TmBucketV[t].BegTm < PostTmDiam) { continue; }
206  const PUNGraph PreGraph = TSnap::GetSubGraph(FullGraph, NodeIdV, true);
207  const PUNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
208  TIntV PostYearNIdV, WccPostYearNIdV;
209  for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) {
210  if (GetNDat(NI.GetId()) >= PostTmDiam) {
211  PostYearNIdV.Add(NI.GetId());
212  if (WccGraph->IsNode(NI.GetId())) { WccPostYearNIdV.Add(NI.GetId()); }
213  }
214  }
215  TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
216  // diameter of PostYearDiam subgraph using whole graph (all edges)
217  int FullDiam; double EffDiam;
218  for (int r = 0; r < NDiamRuns; r++) {
219  if (! PostYearNIdV.Empty()) {
220  TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false, EffDiam, FullDiam);
221  PreDiamMom.Add(FullDiam); PreEffDiamMom.Add(EffDiam);
222  }
223  if (! WccPostYearNIdV.Empty()) {
224  TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false, EffDiam, FullDiam);
225  WccDiamMom.Add(FullDiam); WccEffDiamMom.Add(EffDiam);
226  }
227  printf(" diam: %d [%s] \r", r+1, ExeTm.GetTmStr()); ExeTm.Tick();
228  }
229  PreDiamMom.Def(); PreEffDiamMom.Def();
230  WccDiamMom.Def(); WccEffDiamMom.Def();
231  // save stat
232  PGStat GraphStatPt = GrowthStat->Add(TmBucketV[t].BegTm);
233  TGStat& GS = *GraphStatPt;
234  GS.TakeBasicStat(PreGraph, false);
235  GS.TakeBasicStat(WccGraph, true);
236  GS.SetVal(gsvFullDiam, PreDiamMom.GetMean()); // mean
237  GS.SetVal(gsvEffDiam, PreEffDiamMom.GetMean());
238  GS.SetVal(gsvFullWccDiam, WccDiamMom.GetMean());
239  GS.SetVal(gsvEffWccDiam, WccEffDiamMom.GetMean());
240  GS.SetVal(gsvFullDiamDev, PreDiamMom.GetSDev()); // variance
241  GS.SetVal(gsvEffDiamDev, PreEffDiamMom.GetSDev());
242  GS.SetVal(gsvFullWccDiamDev, WccDiamMom.GetSDev());
243  GS.SetVal(gsvEffWccDiamDev, WccEffDiamMom.GetSDev());
244  { TFOut FOut("growth."+FNmPref+".gStatVec"); GrowthStat->Save(FOut); }
245  GrowthStat->SaveTxt(FNmPref, TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%s\nPostYearDiam\t%s\n",
246  Desc.CStr(), DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()));
247  }
248  // diameter plots
249  //GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
250  // DelPreEdges, PostYearDiam));*/
251 }
252 
253 void TTimeNet::PlotCCfOverTm(const TStr& FNmPref, TStr Desc, const TTmUnit& TmUnit, const int& NodesBucket) const {
254  if (Desc.Empty()) { Desc = FNmPref; }
255  TTimeNet::TTmBucketV TmBucketV;
256  TStr XLbl;
257  if (TmUnit == tmuNodes) {
258  XLbl = "Number of nodes (time)";
259  IAssert(NodesBucket > 0);
260  GetNodeBuckets(NodesBucket, TmBucketV); }
261  else {
262  XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
263  GetTmBuckets(TmUnit, TmBucketV);
264  }
265  TIntV NodeIdV;
266  TFltPrV DegToCCfV, CcfV, OpClV, OpV;
267  TVec<TTuple<TFlt, 4> > OpenClsV;
268  TTuple<TFlt, 4> Tuple;
269  TExeTm ExeTm;
270  int XVal = 0;
271  printf("Clustering coefficient over time:\n %d edges, %d edges per bucket, %d buckets \n", GetEdges(), 100000, TmBucketV.Len());
272  PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
273  for (int t = 0; t < TmBucketV.Len(); t++) {
274  printf("\r %d/%d: ", t+1, TmBucketV.Len());
275  NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
276  int64 Open=0, Close=0;
277  const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);
278  const double CCf = TSnap::GetClustCf(Graph, DegToCCfV, Open, Close);
279  if (TmUnit == tmuNodes) { XVal = Graph->GetNodes(); }
280  else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
281  CcfV.Add(TFltPr(XVal, CCf));
282  OpClV.Add(TFltPr(XVal, Open+Close==0?0:(double)Close/(Open+Close)));
283  OpV.Add(TFltPr(XVal, Open==0?0:(double)Close/Open));
284  Tuple[0]=Graph->GetNodes();
285  Tuple[1]=Graph->GetEdges();
286  Tuple[2]=(double)Close; Tuple[3]=(double)Open;
287  OpenClsV.Add(Tuple);
288  printf(" %s", ExeTm.GetStr());
289  TGnuPlot::PlotValV(DegToCCfV, TStr::Fmt("ccfAt%02dtm.%s", t+1, FNmPref.CStr()),
290  TStr::Fmt("%s. At time %d. Clustering Coefficient. G(%d,%d)", Desc.CStr(), t+1, Graph->GetNodes(), Graph->GetEdges()),
291  "Degree", "Clustering coefficient", gpsLog10XY, false);
292  }
293  TGnuPlot::PlotValV(CcfV, "ccfOverTm."+FNmPref, Desc+". Average Clustering Coefficient", XLbl, "Average clustering coefficient", gpsAuto, false);
294  TGnuPlot::PlotValV(OpClV, "ClsOpnTr1."+FNmPref, Desc+". Close/(Open+Closed) triads", XLbl, "Close / (Open+Closed) triads", gpsAuto, false);
295  TGnuPlot::PlotValV(OpV, "ClsOpnTr2."+FNmPref, Desc+". Close/Open triads", XLbl, "Close / Open triads", gpsAuto, false);
296  TGnuPlot::SaveTs(OpenClsV, "ClsOpnTr."+FNmPref+".tab", TStr::Fmt("#%s\n#Nodes\tEdges\tClosed\tOpenTriads", Desc.CStr()));
297  printf("\n");
298 }
299 
300 void TTimeNet::PlotMedianDegOverTm(const TStr& FNmPref, const TTmUnit& TmUnit, const int& NodesPerBucket) const {
301  TTimeNet::TTmBucketV TmBucketV;
302  TStr XLbl;
303  if (TmUnit == tmuNodes) {
304  XLbl = "Number of nodes (time)"; IAssert(NodesPerBucket > 0);
305  GetNodeBuckets(NodesPerBucket, TmBucketV); }
306  else {
307  XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
308  GetTmBuckets(TmUnit, TmBucketV); }
309  printf("\n\n%s\nMedian degree over time:\n %d edges, %d edges per bucket, %d buckets \n", FNmPref.CStr(), GetEdges(), NodesPerBucket, TmBucketV.Len());
310  TFltPrV MedDegV, MedInDegV, MedOutDegV;
311  TIntV NodeIdV;
312  int XVal;
313  PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
314  PNGraph NGraph = TSnap::ConvertGraph<PNGraph>(PTimeNet((TTimeNet*)this));
315  FILE *F = fopen(("gStat-"+FNmPref+".tab").CStr(), "wt");
316  fprintf(F, "UndirNodes\tUndirEdges\tUndirNonZNodes\tMedianDeg\tMeanDeg\tDirNodes\tDirEdges\tDirNonzNodes\tMedInDeg\tMedOutDeg\tMeanInDeg\tMeanOutDeg\n");
317  for (int t = 0; t < TmBucketV.Len(); t++) {
318  printf("\r %d/%d: ", t+1, TmBucketV.Len());
319  NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
320  if (TmUnit == tmuNodes) { XVal = NodeIdV.Len(); }
321  else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
322  // un graph
323  { const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV); TMom Mom;
324  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (NI.GetOutDeg()>0) { Mom.Add(NI.GetOutDeg());} }
325  Mom.Def(); MedDegV.Add(TFltPr(XVal, Mom.GetMedian()));
326  fprintf(F, "%d\t%d\t%d\t%f\t%f", Graph->GetNodes(), Graph->GetEdges(), TSnap::CntNonZNodes(Graph), (float)Mom.GetMedian(), (float)Mom.GetMean()); }
327  // directed graph
328  { const PNGraph Graph = TSnap::GetSubGraph<PNGraph>(NGraph, NodeIdV); TMom MomOut, MomIn;
329  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
330  if (NI.GetOutDeg()>0) { MomOut.Add(NI.GetOutDeg()); }
331  if (NI.GetInDeg()>0) { MomIn.Add(NI.GetInDeg()); } }
332  MomOut.Def(); MedOutDegV.Add(TFltPr(XVal, MomOut.GetMedian()));
333  MomIn.Def(); MedInDegV.Add(TFltPr(XVal, MomIn.GetMedian()));
334  fprintf(F, "\t%d\t%d\t%d\t%f\t%f\t%f\t%f\n", Graph->GetNodes(), Graph->GetEdges(), (int)TSnap::CntNonZNodes(Graph), (float)MomIn.GetMedian(), (float)MomOut.GetMedian(), (float)MomIn.GetMean(), (float)MomOut.GetMean()); }
335  }
336  fclose(F);
337  TGnuPlot::PlotValV(MedDegV, "medDeg."+FNmPref, FNmPref+" Median degree", TTmInfo::GetTmUnitStr(TmUnit), "Median degree");
338  TGnuPlot::PlotValV(MedOutDegV, "medOutDeg."+FNmPref, FNmPref+" Median OUT degree", TTmInfo::GetTmUnitStr(TmUnit), "Median OUT degree");
339  TGnuPlot::PlotValV(MedInDegV, "medInDeg."+FNmPref, FNmPref+" Median IN degree", TTmInfo::GetTmUnitStr(TmUnit), "Median IN degree");
340 }
341 
342 // load Arxiv affiliation network (papers to authors bipartite graph)
343 // <set1_id> <time> <set2_id1> <set2_id2> ... (all ids have to be unique)
345  PTimeNet TimeNetPt = TTimeNet::New();
346  TTimeNet& TimeNet = *TimeNetPt;
347  PSs Ss = TSs::LoadTxt(ssfTabSep, InFNm.CStr());
348  TIntH Set1IdH; // paper ids
349  TStrV StrTimeV;
350  for (int y = 0; y < Ss->GetYLen(); y++) {
351  if (Ss->At(0, y)[0] == '#') continue; // skip comments
352  if (Ss->GetXLen(y) < 3) continue; // there must be at least one author
353  const int& SrcId = Ss->At(0, y).GetInt();
354  IAssert(! Set1IdH.IsKey(SrcId));
355  IAssert(! TimeNet.IsNode(SrcId));
356  Set1IdH.AddKey(SrcId);
357  Ss->At(1, y).SplitOnAllCh('-', StrTimeV);
358  const int Year = StrTimeV[0].GetInt();
359  const int Month = StrTimeV[1].GetInt();
360  const int Day = StrTimeV[2].GetInt();
361  const TSecTm NodeTm(Year, Month, Day);
362  TimeNet.AddNode(SrcId, NodeTm);
363  for (int dst = 2; dst < Ss->GetXLen(y); dst++) {
364  const int DstId = Ss->At(dst, y).GetInt();
365  IAssert(! Set1IdH.IsKey(DstId));
366  if (! TimeNet.IsNode(DstId)) { TimeNet.AddNode(DstId, NodeTm); }
367  else { TimeNet.GetNDat(DstId) = TMath::Mn(NodeTm, TimeNet.GetNDat(DstId)); }
368  if (! TimeNet.IsEdge(SrcId, DstId)) { TimeNet.AddEdge(SrcId, DstId); }
369  }
370  }
371  TimeNet.Defrag();
372  printf("Bipartate graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
373  printf(" Bipartate sets: %d nodes --> %d nodes\n", TSnap::CntInDegNodes(TimeNetPt, 0),
374  TSnap::CntOutDegNodes(TimeNetPt, 0));
375  return TimeNetPt;
376 }
377 
378 // load Arxiv paper citation network
379 // PaperFNm: <paper> <time>
380 // CiteFNm: <source> <destination>
381 PTimeNet TTimeNet::LoadArxiv(const TStr& PaperFNm, const TStr& CiteFNm) {
382  TExeTm ExeTm;
383  PTimeNet TimeNetPt = TTimeNet::New();
384  TTimeNet& TimeNet = *TimeNetPt;
385  printf("Arxiv citation graph (paper publication year)...\n");
386  // load time data (file hep-ph-slacdates)
387  char Line [1024];
388  FILE *PprF = fopen(PaperFNm.CStr(), "rt");
389  TStr StrId, StrTime;
390  TStrV StrV, StrTimeV;
391  int N = 0, DuplicateNode = 0;
392  while (! feof(PprF)) {
393  Line[0] = 0;
394  fgets(Line, 1024, PprF);
395  if (strlen(Line) == 0 || Line[0] == '#') continue;
396  Line[strlen(Line)-1] = 0; // delete trailing '\n'
397  TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2);
398  StrId = StrV[0]; StrTime = StrV[1]; IAssert(!StrId.Empty() && !StrTime.Empty());
399  StrTime.SplitOnAllCh('-', StrTimeV); IAssert(StrTimeV.Len() == 3);
400  const int NodeId = StrId.GetInt();
401  if (! TimeNet.IsNode(NodeId)) {
402  const int Year = StrTimeV[0].GetInt();
403  const int Month = StrTimeV[1].GetInt();
404  const int Day = StrTimeV[2].GetInt();
405  TimeNet.AddNode(NodeId, TSecTm(Year, Month, Day));
406  } else { DuplicateNode++; }
407  if (++N % 10000 == 0) printf("\r %dk", N/1000);
408  }
409  printf("\r %d nodes read. %d duplicate nodes. %s\n", N, DuplicateNode, ExeTm.GetTmStr());
410  fclose(PprF);
411  // load citations (file hep-ph-citations)
412  int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
413  FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
414  N = 0; ExeTm.Tick();
415  printf("Loading Arxiv citations...\n");
416  TIntPrV EdgeV;
417  THash<TInt, TSecTm> NIdToTimeH;
418  while (! feof(CiteF)) {
419  Line[0] = 0;
420  fgets(Line, 1024, CiteF);
421  if (strlen(Line) == 0 || Line[0] == '#') continue;
422  Line[strlen(Line)-1] = 0; // delete trailing '\n'
423  TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2);
424  const int SrcNId = StrV[0].GetInt();
425  const int DstNId = StrV[1].GetInt();
426  EdgeV.Add(TIntPr(SrcNId, DstNId));
427  // infer time of destination node -- earliest paper that cites the node (paper)
428  if (! TimeNet.IsNode(DstNId) && TimeNet.IsNode(SrcNId)) {
429  const TSecTm& SrcTm = TimeNet.GetNDat(SrcNId);
430  if (! NIdToTimeH.IsKey(DstNId)) {
431  NIdToTimeH.AddDat(DstNId, SrcTm);
432  NewDstIds++;
433  }
434  else if (NIdToTimeH.GetDat(DstNId) < SrcTm) {
435  NIdToTimeH.GetDat(DstNId) = SrcTm; }
436  }
437  if (++N % 10000 == 0) printf("\r %dk", N/1000);
438  }
439  fclose(CiteF);
440  // add infeered time nodes (nodes which are cited by papers with known time)
441  for (int i = 0; i < NIdToTimeH.Len(); i++) {
442  TimeNet.AddNode(NIdToTimeH.GetKey(i), NIdToTimeH[i]);
443  }
444  // add links
445  for (int i = 0; i < EdgeV.Len(); i++) {
446  const int SrcNId = EdgeV[i].Val1;
447  const int DstNId = EdgeV[i].Val2;
448  if (TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
449  if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
450  else { DupLinks++; }
451  } else {
452  if (! TimeNet.IsNode(SrcNId)) {
453  NewSrcIds++;
454  if (! TimeNet.IsNode(DstNId)) { NewCits++; }
455  }
456  }
457  }
458  printf("\r %d citations read. %s\n", N, ExeTm.GetTmStr());
459  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
460  printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
461  TIntV RmNIdV;
462  for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
463  if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
464  }
465  for (int i = 0; i < RmNIdV.Len(); i++) {
466  TimeNet.DelNode(RmNIdV[i]);
467  }
468  TimeNet.Defrag(true);
469  printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
470  printf(" Duplicate citations : %d\n", DupLinks);
471  printf(" Nodes without time which are cited : %d (add them to graph, use time of the earliest source node)\n", NewDstIds);
472  printf(" Citations between unknown time nodes : %d\n", NewCits);
473  printf(" Nodes without time which make citations: %d (do not add them into the graph)\n", NewSrcIds);
474  return TimeNetPt;
475 }
476 
477 // Patent citation network. Time units are seconds even though they mean years
478 PTimeNet TTimeNet::LoadPatents(const TStr& PatentFNm, const TStr& CiteFNm) {
479  int N = 0;
480  TExeTm ExeTm;
481  PTimeNet TimeNetPt = TTimeNet::New();
482  TTimeNet& TimeNet = *TimeNetPt;
483  TimeNet.Reserve(4000000, 160000000);
484  printf("parsing patent data (patent grant year)...\n");
485  // load time data (file pat63_99.txt)
486  const int& PatIdCol = 0;
487  const int& GYearCol = 1;
488  TStrV ColV;
489  char Line [1024];
490  FILE *PatF = fopen(PatentFNm.CStr(), "rt");
491  fgets(Line, 1024, PatF); // skip 1st line
492  while (! feof(PatF)) {
493  Line[0] = 0;
494  fgets(Line, 1024, PatF);
495  if (strlen(Line) == 0) break;
496  TStr(Line).SplitOnAllCh(',', ColV, false);
497  IAssert(ColV.Len() == 23);
498  const int PatentId = ColV[PatIdCol].GetInt();
499  const int GrantYear = ColV[GYearCol].GetInt();
500  IAssert(! TimeNet.IsNode(PatentId));
501  TimeNet.AddNode(PatentId, TSecTm(GrantYear)); // pretend year is a second
502  if (++N % 100000 == 0) printf("\r %dk", N/1000);
503  }
504  printf("\r %d patents read. %s\n", N, ExeTm.GetTmStr());
505  fclose(PatF);
506  // load citations (file cite75_99.txt)
507  printf("\nLoading patent citations...\n");
508  int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
509  N = 0; ExeTm.Tick();
510  TStr SrcId, DstId;
511  FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
512  fgets(Line, 1024, CiteF); // skip 1st line
513  while (! feof(CiteF)) {
514  Line[0] = 0;
515  fgets(Line, 1024, CiteF);
516  if (strlen(Line) == 0) break;
517  Line[strlen(Line)-1] = 0; // delete trailing '\n'
518  TStr(Line).SplitOnCh(SrcId, ',', DstId);
519  const int SrcNId = SrcId.GetInt();
520  const int DstNId = DstId.GetInt();
521  if (! TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
522  //TimeNet.AddNode(SrcNId, TSecTm(1, 1, 1)); NewSrcIds++;
523  //TimeNet.AddNode(DstNId, TSecTm(1, 1, 1)); NewDstIds++;
524  NewCits++;
525  continue;
526  }
527  else if (TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
528  TimeNet.AddNode(DstNId, TimeNet.GetNDat(SrcNId)); NewDstIds++;
529  }
530  else if (! TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
531  TimeNet.AddNode(SrcNId, TimeNet.GetNDat(DstNId)); NewSrcIds++;
532  }
533  if (! TimeNet.IsEdge(SrcNId, DstNId)) {
534  TimeNet.AddEdge(SrcNId, DstNId);
535  } else { DupLinks++; }
536  if (++N % 100000 == 0) printf("\r %dk", N/1000);
537  }
538  fclose(CiteF);
539  printf("\r %d citations read. %s\n\n", N, ExeTm.GetTmStr());
540  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
541  printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
542  TIntV RmNIdV;
543  for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
544  if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
545  }
546  for (int i = 0; i < RmNIdV.Len(); i++) {
547  TimeNet.DelNode(RmNIdV[i]);
548  }
549  TimeNet.Defrag(true);
550  printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
551  printf(" Duplicate citations : %d\n", DupLinks);
552  printf(" Citations between unknown time nodes : %d\n", NewCits);
553  printf(" Nodes without time which make citations: %d\n", NewSrcIds);
554  printf(" Nodes without time which are cited : %d\n", NewDstIds);
555  return TimeNetPt;
556 }
557 
558 // Amazon ShareTheLove: nodes are people, we add an edge when there is a first
559 // recommendation between 2 people (we count each edge only once (regardless
560 // of how many recommendations there were between 2 persons)
562  PTimeNet TimeNetPt = TTimeNet::New();
563  TTimeNet& TimeNet = *TimeNetPt;
564  TimeNet.Reserve(3953993, -1);
565  printf("Amazon Share-the-Love...\n");
566  char line [2024], MonthStr[4];
567  int NLines=0;
568  TStrV ColV;
569  FILE *F = fopen(StlFNm.CStr(), "rt");
570  while (! feof(F)) {
571  memset(line, 0, 2024);
572  fgets(line, 2024, F);
573  if (strlen(line) == 0) break;
574  TStr(line).SplitOnAllCh(',', ColV);
575  const int SrcNId = ColV[0].GetInt();
576  const int DstNId = ColV[1].GetInt();
577  // time data
578  TStr TmStr = ColV[2]; // time-format: 29JAN02:21:55:23
579  int Year = TmStr.GetSubStr(5, 6).GetInt();
580  if (Year < 10) { Year += 2000; } else { Year += 1900; }
581  MonthStr[0]=toupper(TmStr[2]); MonthStr[1]=tolower(TmStr[3]);
582  MonthStr[2]=tolower(TmStr[4]); MonthStr[3]=0;
583  const int Month = TTmInfo::GetMonthN(MonthStr, lUs);
584  const int Day = TmStr.GetSubStr(0, 1).GetInt();
585  const int Hour = TmStr.GetSubStr(8, 9).GetInt();
586  const int Min = TmStr.GetSubStr(11, 12).GetInt();
587  const int Sec = TmStr.GetSubStr(14, 15).GetInt();
588  // add nodes and links
589  if (! TimeNet.IsNode(SrcNId)) { TimeNet.AddNode(SrcNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
590  if (! TimeNet.IsNode(DstNId)) { TimeNet.AddNode(DstNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
591  if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
592  if (++NLines % 100000 == 0) printf("\r %dk", NLines/1000);
593  }
594  fclose(F);
595  printf("\r %d lines read\n", NLines);
596  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
597  TimeNet.Defrag(true);
598  return TimeNetPt;
599 }
600 
602 // Time Node-Edge Network
604  if (this != &TimeNet) {
605  TNet::operator=(TimeNet);
606  }
607  return *this;
608 }
609 
611  PTimeNet NewNet = TTimeNet::New();
612  NewNet->Reserve(GetNodes(), -1);
613  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
614  NewNet->AddNode(NI.GetId(), NI.GetDat());
615  }
616  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
617  const int src = EI.GetSrcNId();
618  const int dst = EI.GetDstNId();
619  if (! NewNet->IsEdge(src, dst)) {
620  NewNet->AddEdge(src, dst); }
621  }
622  NewNet->Defrag();
623  return NewNet;
624 }
625 
626 // only take earliest edge between the nodes
628  PTimeNENet Net = TTimeNENet::New();
629  Net->Reserve(GetNodes(), -1);
630  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
631  Net->AddNode(NI.GetId(), NI.GetDat()); }
632  TIntV EIdV; GetEIdByTm(EIdV);
633  TIntPrSet EdgeSet(GetEdges());
634  for (int edge = 0; edge < EIdV.Len(); edge++) {
635  const TEdgeI EI = GetEI(EIdV[edge]);
636  const int Src = EI.GetSrcNId();
637  const int Dst = EI.GetDstNId();
638  if (Src==Dst || EdgeSet.IsKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)))) { continue; } // take only 1st edge
639  EdgeSet.AddKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)));
640  Net->AddEdge(EI);
641  }
642  return Net;
643 }
644 
646  PTimeNENet NewNetPt = TTimeNENet::New();
647  TTimeNENet& NewNet = *NewNetPt;
648  NewNet.Reserve(NIdV.Len(), -1);
649  int node, edge;
650  TNodeI NI;
651  for (node = 0; node < NIdV.Len(); node++) {
652  NewNet.AddNode(NIdV[node], GetNDat(NIdV[node]));
653  }
654  for (node = 0; node < NIdV.Len(); node++) {
655  NI = GetNI(NIdV[node]);
656  for (edge = 0; edge < NI.GetOutDeg(); edge++) {
657  const TEdgeI EI = GetEI(NI.GetOutEId(edge));
658  if (NewNet.IsNode(EI.GetDstNId())) {
659  NewNet.AddEdge(EI); }
660  }
661  }
662  NewNet.Defrag();
663  return NewNetPt;
664 }
665 
667  PTimeNENet NewNetPt = TTimeNENet::New();
668  TTimeNENet& NewNet = *NewNetPt;
669  NewNet.Reserve(-1, EIdV.Len());
670  for (int edge = 0; edge < EIdV.Len(); edge++) {
671  const TEdgeI Edge = GetEI(EIdV[edge]);
672  if (! NewNet.IsNode(Edge.GetSrcNId()))
673  NewNet.AddNode(GetNI(Edge.GetSrcNId()));
674  if (! NewNet.IsNode(Edge.GetDstNId()))
675  NewNet.AddNode(GetNI(Edge.GetDstNId()));
676  NewNet.AddEdge(Edge);
677  }
678  NewNet.Defrag();
679  return NewNetPt;
680 }
681 
682 // assumes that the edges of the network are sorted in time
684  PTimeNENet NewNetPt = TTimeNENet::New();
685  TTimeNENet& NewNet = *NewNetPt;
686  TSecTm PrevTm;
687  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
688  if (EI() > MaxEdgeTm) { break; }
689  if (! NewNet.IsNode(EI.GetSrcNId()))
690  NewNet.AddNode(GetNI(EI.GetSrcNId()));
691  if (! NewNet.IsNode(EI.GetDstNId()))
692  NewNet.AddNode(GetNI(EI.GetDstNId()));
693  NewNet.AddEdge(EI);
694  IAssert(! PrevTm.IsDef() || PrevTm <= EI()); // edge times must be sorted
695  PrevTm = EI();
696  }
697  NewNet.Defrag();
698  return NewNetPt;
699 }
700 
702  NodeH.SortByDat(true);
703  EdgeH.SortByDat(true);
704 }
705 
706 // node time must be smaller than times of incoming or outgoing edges
708  int Cnt = 0;
709  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
710  TSecTm& NodeTm = NI();
711  for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
712  const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));
713  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
714  }
715  for (int edge = 0; edge < NI.GetInDeg(); edge++) {
716  const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));
717  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
718  }
719  }
720  printf("Update node times: %d/%d updates\n", Cnt, GetNodes());
721 }
722 
724  int Cnt = 0;
725  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
726  if (NI.GetDeg() == 0) { continue; }
727  TSecTm NodeTm;
728  for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
729  const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge)); IAssert(EdgeTm.IsDef());
730  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
731  }
732  for (int edge = 0; edge < NI.GetInDeg(); edge++) {
733  const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge)); IAssert(EdgeTm.IsDef());
734  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
735  }
736  GetNDat(NI.GetId()) = NodeTm;
737  }
738  printf("Node times set: %d/%d updates\n", Cnt, GetNodes());
739 }
740 
741 // shuffle edge arrivals
742 void TTimeNENet::SetRndEdgeTimes(const int& MinTmEdge) {
743  printf("Shuffling last %d (%d%%) edge arrival times..\n", GetEdges()-MinTmEdge, int(100.0*(GetEdges()-MinTmEdge)/double(GetEdges())));
744  TIntV RndEIdV; GetEIdByTm(RndEIdV);
745  TIntV TrueEIdV = RndEIdV;
746  TSecTmV TrueTmV;
747  const int SwapLen = RndEIdV.Len()-MinTmEdge;
748  for (int R = 0; R < 10; R++) {
749  for (int i = MinTmEdge; i < RndEIdV.Len(); i++) {
750  RndEIdV.Swap(TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge, TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge); }
751  }
752  for (int e = 0; e < TrueEIdV.Len(); e++) {
753  TrueTmV.Add(GetEDat(TrueEIdV[e])); }
754  for (int e = 0; e < RndEIdV.Len(); e++) {
755  GetEDat(RndEIdV[e]) = TrueTmV[e]; }
756  UpdateNodeTimes();
757 }
758 
760  TSecTm MnNodeTm, MxNodeTm;
761  TSecTm MnEdgeTm, MxEdgeTm;
762  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
763  const TSecTm NodeTm = NI();
764  if (! MnNodeTm.IsDef() || MnNodeTm>NodeTm) { MnNodeTm = NodeTm; }
765  if (! MxNodeTm.IsDef() || MxNodeTm<NodeTm) { MxNodeTm = NodeTm; }
766  }
767  printf("Node times:\n %s\n %s\n", MnNodeTm.GetStr().CStr(), MxNodeTm.GetStr().CStr());
768  for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
769  const TSecTm EdgeTm = EI();
770  if (! MnEdgeTm.IsDef() || MnEdgeTm>EdgeTm) { MnEdgeTm = EdgeTm; }
771  if (! MxEdgeTm.IsDef() || MxEdgeTm<EdgeTm) { MxEdgeTm = EdgeTm; }
772  }
773  printf("Edge times:\n %s\n %s\n", MnEdgeTm.GetStr().CStr(), MxEdgeTm.GetStr().CStr());
774 }
775 
776 void TTimeNENet::GetNIdByTm(TIntV& NIdV) const {
777  TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
778  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
779  TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
780  TmToNIdV.Sort();
781  NIdV.Gen(GetNodes(), 0);
782  for (int i = 0; i < TmToNIdV.Len(); i++) {
783  NIdV.Add(TmToNIdV[i].Dat); }
784 }
785 
786 void TTimeNENet::GetEIdByTm(TIntV& EIdV) const {
787  TVec<TKeyDat<TSecTm, TInt> > TmToEIdV(GetEdges(), 0);
788  for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
789  TmToEIdV.Add(TKeyDat<TSecTm, TInt>(EI.GetDat(), EI.GetId())); }
790  TmToEIdV.Sort();
791  EIdV.Gen(GetEdges(), 0);
792  for (int i = 0; i < TmToEIdV.Len(); i++) {
793  EIdV.Add(TmToEIdV[i].Dat); }
794 }
795 
796 void TTimeNENet::GetTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
797  THash<TInt, TIntV> TmIdToNIdVH;
798  TIntV NIdV; GetNIdByTm(NIdV);
799  for (int n = 0; n < NIdV.Len(); n++) {
800  const int TmId = GetNDat(NIdV[n]).Round(TmUnit).GetAbsSecs();
801  if (! TmIdToNIdVH.IsKey(TmId)) { TmIdToNIdVH.AddKey(TmId); }
802  TmIdToNIdVH.GetDat(TmId).Add(NIdV[n]);
803  }
804  TVec<TPair<TInt, TIntV> > TmIdNIdVV;
805  TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
806  TmIdNIdVV.Sort();
807  TmBucketV.Gen(TmIdNIdVV.Len());
808  for (int i = 0; i < TmIdNIdVV.Len(); i++) {
809  TTimeNet::TTmBucket& Bucket = TmBucketV[i];
810  Bucket.BegTm = TmIdNIdVV[i].Val1;
811  Bucket.NIdV = TmIdNIdVV[i].Val2;
812  }
813 }
814 
815 void TTimeNENet::GetEdgeTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
816  THash<TInt, TIntV> TmIdToEIdVH;
817  TIntV EIdV; GetEIdByTm(EIdV);
818  for (int e = 0; e < EIdV.Len(); e++) {
819  const int TmId = GetEDat(EIdV[e]).Round(TmUnit).GetAbsSecs();
820  if (! TmIdToEIdVH.IsKey(TmId)) { TmIdToEIdVH.AddKey(TmId); }
821  TmIdToEIdVH.GetDat(TmId).Add(EIdV[e]);
822  }
823  TVec<TPair<TInt, TIntV> > TmIdEIdVV;
824  TmIdToEIdVH.GetKeyDatPrV(TmIdEIdVV);
825  TmIdEIdVV.Sort();
826  TmBucketV.Gen(TmIdEIdVV.Len());
827  for (int i = 0; i < TmIdEIdVV.Len(); i++) {
828  TTimeNet::TTmBucket& Bucket = TmBucketV[i];
829  Bucket.BegTm = TmIdEIdVV[i].Val1;
830  Bucket.NIdV = TmIdEIdVV[i].Val2;
831  }
832 }
833 
834 void TTimeNENet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
835  TIntV NIdV; GetNIdByTm(NIdV);
836  TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
837  for (int i = 0; i < NIdV.Len(); i++) {
838  const int b = i/NodesPerBucket;
839  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
840  TmBucketV[b].NIdV.Add(NIdV[i]);
841  }
842 }
843 
844 void TTimeNENet::GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
845  TIntV EIdV; GetEIdByTm(EIdV);
846  TmBucketV.Gen(EIdV.Len()/EdgesPerBucket + 1, 0);
847  for (int i = 0; i < EIdV.Len(); i++) {
848  const int b = i/EdgesPerBucket;
849  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
850  TmBucketV[b].NIdV.Add(EIdV[i]);
851  }
852 }
853 
854 // get edges that close triangles over time
855 int TTimeNENet::GetTriadEdges(TIntV& TriadEIdV) const {
856  PUNGraph Graph = TUNGraph::New(GetNodes(), GetEdges());
857  TIntV EIdV; GetEIdByTm(EIdV);
858  TriadEIdV.Clr();
859  TExeTm ExeTm;
860  for (int edge = 0; edge < EIdV.Len(); edge++) {
861  const TEdgeI EI = GetEI(EIdV[edge]);
862  const int Src = EI.GetSrcNId();
863  const int Dst = EI.GetDstNId();
864  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
865  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
866  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
867  if (TSnap::GetCmnNbrs(Graph, Src, Dst) > 0) { TriadEIdV.Add(EIdV[edge]); }
868  Graph->AddEdge(Src, Dst);
869  if (edge % 10000 == 0) {
870  printf("\redges %dk / %dk: triangle edges: %dk [total %s]", edge/1000, EIdV.Len()/1000,
871  TriadEIdV.Len()/1000, ExeTm.GetStr()); }
872  }
873  return Graph->GetEdges();
874 }
875 
876 PGStatVec TTimeNENet::TimeGrowth(const TTmUnit& TimeStep, const TFSet& TakeStat, const TSecTm& StartTm) const {
877  TExeTm ExeTm;
878  PGStatVec GStatVec = TGStatVec::New(TimeStep, TakeStat);
879  TTimeNet::TTmBucketV TmBucketV;
880  GetEdgeTmBuckets(TimeStep, TmBucketV);
881  const PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
882  TIntV EdgeIdV;
883  for (int t = 0; t < TmBucketV.Len(); t++) {
884  EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
885  printf("\n***%d/%d: %s (%d edges) ", t+1, TmBucketV.Len(), TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick();
886  if (TmBucketV[t].BegTm < StartTm) { continue; }
887  const PNEGraph PreGraph = TSnap::GetESubGraph(FullGraph, EdgeIdV);
888  GStatVec->Add(PreGraph, TmBucketV[t].BegTm);
889  printf(" [%s]\n", ExeTm.GetTmStr());
890  //{ TFOut FOut("LinkedIn.GStatVec"); GStatVec->Save(FOut); }
891  }
892  return GStatVec;
893 }
894 
895 // take network from last TakeNTmUnits and measure properties
896 PGStatVec TTimeNENet::TimeGrowth(const TStr& FNmPref, const TStr& Desc, const TFSet& TakeStat, const int& NDiamRuns,
897  const TTmUnit& TmUnit, const int& TakeNTmUnits, const bool& LinkBWays) const {
898  TGStat::NDiamRuns = NDiamRuns;
899  PGStatVec GrowthStat = TGStatVec::New(TmUnit, TakeStat);
900  TTimeNet::TTmBucketV TmBucketV;
901  GetEdgeTmBuckets(TmUnit, TmBucketV);
902  TIntV EdgeIdV;
903  TExeTm ExeTm;
904  for (int t = 0; t < TmBucketV.Len(); t++) {
905  // take graph over last TakeNTmUnits time units
906  if (TakeNTmUnits == -1) {
907  EdgeIdV.AddV(TmBucketV[t].NIdV); }
908  else {
909  if (t < TakeNTmUnits) { continue; }
910  EdgeIdV.Clr(false);
911  for (int i = t-TakeNTmUnits; i < t; i++) { EdgeIdV.AddV(TmBucketV[i].NIdV); }
912  }
913  printf("*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick();
914  PNEGraph PreGraph = TSnap::ConvertESubGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this), EdgeIdV);
915  if (LinkBWays) {
916  TIntV KeepEIdV; // keep only nodes that have in- and out-links (send and receive email)
917  for (TNEGraph::TEdgeI EI = PreGraph->BegEI(); EI < PreGraph->EndEI(); EI++) {
918  if (PreGraph->IsEdge(EI.GetDstNId(), EI.GetSrcNId(), true)) { KeepEIdV.Add(EI.GetId()); }
919  }
920  PreGraph = TSnap::GetESubGraph(PreGraph, KeepEIdV);
921  }
922  // take statistics
923  GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
924  { TFOut FOut(TStr::Fmt("growth.%s.gStatVec", FNmPref.CStr()));
925  GrowthStat->Save(FOut); }
926  GrowthStat->SaveTxt(FNmPref, Desc);
927  printf(" [%s]\n", ExeTm.GetTmStr());
928  }
929  return GrowthStat;
930 }
931 
932 void TTimeNENet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
933  const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc) const {
934  TTimeNet::TTmBucketV TmBucketV;
935  GetEdgeTmBuckets(TmUnit, TmBucketV);
936  PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
937  TIntV EdgeIdV;
938  TExeTm ExeTm, Run1Tm;
939  TFltTrV TmDiamV, NdsDiamV;
940  for (int t = 0; t < TmBucketV.Len(); t++) {
941  EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
942  printf("\n*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), EdgeIdV.Len()); ExeTm.Tick();
943  if (TmBucketV[t].BegTm < StartTm) continue;
944  PNGraph PreGraph = TSnap::ConvertESubGraph<PNGraph>(FullGraph, EdgeIdV);
945  TMom Mom;
946  double EffDiam = 0.0;
947  for (int r = 0; r < NDiamRuns; r++) {
948  printf("%d...", r+1); Run1Tm.Tick();
949  if (OnlyWcc) { EffDiam = TSnap::GetAnfEffDiam(TSnap::GetMxWcc(PreGraph)); }
950  else { EffDiam = TSnap::GetAnfEffDiam(PreGraph); }
951  Mom.Add(EffDiam);
952  printf("[%s]\r", Run1Tm.GetTmStr());
953  }
954  Mom.Def();
955  TmDiamV.Add(TFltTr(TmBucketV[t].BegTm.Round(TmUnit).GetAbsSecs(), Mom.GetMean(), Mom.GetSDev()));
956  NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
957  NdsDiamV.Sort();
958  printf(" [%s] \n", ExeTm.GetTmStr());
959  const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
960  { TGnuPlot GnuPlot("diamEff1."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
961  GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), "AVERAGE "+WccStr+"Effective Diameter");
962  GnuPlot.AddErrBar(TmDiamV, "", "");
963  GnuPlot.SavePng(); }
964  { TGnuPlot GnuPlot("diamEff2."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
965  GnuPlot.SetXYLabel("NODES", "AVERAGE "+WccStr+"Effective Diameter");
966  GnuPlot.AddErrBar(NdsDiamV, "", "");
967  GnuPlot.SavePng(); }
968  }
969 }
970 
971 // pretend we only have link data starting in PostTmDiam
972 // compare the diameter of the nodes after PostTmDiam with diameter
973 // of the nodes after PostTmDiam but *also* using nodes and edges
974 // from before PostTmDiam
975 void TTimeNENet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
976  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam, const bool& LinkBWays) {
977  printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n",
978  FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
979  printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n",
980  DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
981  // run diameter
982  /*const int NSamples = 100;
983  const int NDiamRuns = 10;
984  // delete past
985  if (DelPreEdges != -1) {
986  TIntV EdgeIdV;
987  printf("Deleting pre-year %d edges\n", DelPreEdges);
988  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
989  if (EI().GetYear() < DelPreEdges) { EdgeIdV.Add(EI.GetId()); } }
990  for (int e = 0; e < EdgeIdV.Len(); e++) { DelEdge(EdgeIdV[e]); }
991  printf(" Deleted %d edges (out of %d edges total).\n", EdgeIdV.Len(), GetEdges());
992  }
993  PNEGraph FullGraph = GetNEGraph();
994  TSnap::DelZeroDegNodes(FullGraph);
995 
996  PGrowthStat GrowthStat = TGrowthStat::New(TmUnit, TGraphStat::AllStat());
997  TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
998  TIntV EdgeIdV;
999  TExeTm ExeTm;
1000  TTimeNet::TTmBucketV EdgeTmBucketV;
1001  GetEdgeTmBuckets(TmUnit, EdgeTmBucketV);
1002  for (int t = 0; t < EdgeTmBucketV.Len(); t++) {
1003  printf("\nGraph: %s (%d / %d) [%s]\n", EdgeTmBucketV[t].BegTm.GetTmStr().CStr(),
1004  t+1, EdgeTmBucketV.Len(), TExeTm::GetCurTm());
1005  // up-to-year subgraph
1006  EdgeIdV.AddV(EdgeTmBucketV[t].NIdV); // nodes up to time T
1007  if (EdgeTmBucketV[t].BegTm.GetYear() < PostYearDiam) continue;
1008 
1009  const PNEGraph PreNEGraph = EdgeBothWays ? FullGraph->GetEdgeSubGraph(EdgeIdV) : FullGraph;
1010  PNGraph PreGraph = PreNEGraph->GetBWayGraph();
1011  PNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
1012 
1013  // find nodes that are not in missing past
1014  TIntV PostYearNIdV, WccPostYearNIdV;
1015  THash<TIntPr, TInt> PostYearEdgeH;
1016  for (TNEGraph::TEdgeI EI = PreNEGraph->BegEI(); EI < PreNEGraph->EndEI(); EI++) {
1017  if (GetEDat(EI.GetId()).GetYear() >= PostYearDiam) {
1018  PostYearEdgeH.AddKey(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); }
1019  }
1020  TIntH PostYearNIdH;
1021  for (int i = 0; i < PostYearEdgeH.Len(); i++) {
1022  if ((! EdgeBothWays) || (EdgeBothWays && PostYearEdgeH.IsKey(TIntPr(PostYearEdgeH.GetKey(i).Val2, PostYearEdgeH.GetKey(i).Val1)))) { //reverse edge exists
1023  PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val1);
1024  PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val2);
1025  }
1026  }
1027  PostYearNIdH.GetKeyV(PostYearNIdV);
1028  for (int i = 0; i < PostYearNIdV.Len(); i++) {
1029  if (WccGraph->IsNode(PostYearNIdV[i])) {
1030  WccPostYearNIdV.Add(PostYearNIdV[i]); }
1031  }
1032 
1033  // diameter of PostYearDiam subgraph using whole graph (all edges)
1034  TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
1035  for (int r = 0; r < NDiamRuns; r++) {
1036  if (! PostYearNIdV.Empty()) {
1037  PreDiamMom.Add(-1); //PreDiamMom.Add(TSnap::GetBfsFullDiam(PreGraph, NSamples, PostYearNIdV, false));
1038  PreEffDiamMom.Add(TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false));
1039  }
1040  if (! WccPostYearNIdV.Empty()) {
1041  //WccDiamMom.Add(TSnap::GetBfsFullDiam(WccGraph, NSamples, WccPostYearNIdV, false));
1042  //WccEffDiamMom.Add(TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false));
1043  WccDiamMom.Add(-1); WccEffDiamMom.Add(-1);
1044  }
1045  printf(" diam: %d [%s] %.2f\r", r+1, ExeTm.GetTmStr(), PreEffDiamMom.GetValV().Last().Val); ExeTm.Tick();
1046  }
1047  PreDiamMom.Def(); PreEffDiamMom.Def();
1048  WccDiamMom.Def(); WccEffDiamMom.Def();
1049  // save stat
1050  PGraphStat GraphStatPt = GrowthStat->Add(EdgeTmBucketV[t].BegTm);
1051  TGraphStat& GraphStat = *GraphStatPt;
1052  GraphStat.Nodes = PreGraph->GetNodes();
1053  GraphStat.NonZNodes = PreGraph->GetNodes() - TSnap::CntDegNodes<PNGraph>(PreGraph, 0);
1054  GraphStat.Srcs = GraphStat.Nodes - TSnap::CntOutDegNodes<PNGraph>(PreGraph, 0);
1055  GraphStat.Edges = PreGraph->GetEdges();
1056  GraphStat.WccNodes= WccGraph->GetNodes();
1057  GraphStat.WccEdges = WccGraph->GetEdges();
1058  GraphStat.FullDiam = PreDiamMom.GetMean(); // mean
1059  GraphStat.EffDiam = PreEffDiamMom.GetMean();
1060  GraphStat.FullWccDiam = WccDiamMom.GetMean();
1061  GraphStat.EffWccDiam = WccEffDiamMom.GetMean();
1062  GraphStat.FullDiamDev = PreDiamMom.GetSDev(); // variance
1063  GraphStat.EffDiamDev = PreEffDiamMom.GetSDev();
1064  GraphStat.FullWccDiamDev = WccDiamMom.GetSDev();
1065  GraphStat.EffWccDiamDev = WccEffDiamMom.GetSDev();
1066  { TFOut FOut("growth."+FNmPref+".bin");
1067  GrowthStat->Save(FOut); }
1068  const TStr BigDesc = TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%d\nPostYearDiam\t%d\n",
1069  Desc.CStr(), DelPreEdges, PostYearDiam);
1070  GrowthStat->SaveTxt(FNmPref, BigDesc);
1071  }
1072  // diameter plots
1073  GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
1074  DelPreEdges, PostYearDiam));
1075  */
1076 }
1077 
1078 // plot time difference of the node ages when an edge arrives
1079 /*void TTimeNENet::PlotEdgeNodeTimeDiff(const TStr& FNmPref, TStr Desc) const {
1080  if (Desc.Empty()) { Desc = FNmPref; }
1081  printf("Edge node time differences:\n");
1082  THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
1083  THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
1084  PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
1085  TIntV EIdV; GetEIdByTm(EIdV);
1086  TIntH DegCntH(10, true);
1087  TExeTm ExeTm;
1088  for (int edge = 0; edge < EIdV.Len(); edge++) {
1089  const TEdgeI EI = GetEI(EIdV[edge]);
1090  const int Src = EI.GetSrcNId();
1091  const int Dst = EI.GetDstNId();
1092  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
1093  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
1094  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
1095  const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
1096  const int DstDeg = Graph->GetNode(Dst).GetInDeg();
1097  const int EdgeTm = EI.GetDat().GetAbsSecs();
1098  const int SrcTm = EI.GetSrcNDat().GetAbsSecs();
1099  const int DstTm = EI.GetDstNDat().GetAbsSecs();
1100  NodeAgeH.AddDat((DstTm-SrcTm)/3600) += 1;
1101  if (SrcDeg == 0 || DstDeg == 0) {
1102  Node1AgeH.AddDat((DstTm-SrcTm)/3600) += 1; }
1103  SrcTmH.AddDat((EdgeTm-SrcTm)/3600) += 1;
1104  DstTmH.AddDat((EdgeTm-DstTm)/3600) += 1;
1105  // add edge
1106  Graph->AddEdge(Src, Dst);
1107  if (edge % 1000 == 0) {
1108  printf("\r%dk / %dk [total %s]", edge/1000, EIdV.Len()/1000, ExeTm.GetStr()); }
1109  }
1110  TGnuPlot::PlotValCntH(NodeAgeH, "tmDstSrc."+FNmPref, TStr::Fmt("%s, Node time difference.", Desc.CStr()),
1111  "Time(destination) - Time(source) [hours]", "Count");
1112  TGnuPlot::PlotValCntH(Node1AgeH, "tmDstSrc1."+FNmPref, TStr::Fmt("%s. 1-ST EDGE of one of the nodes.", Desc.CStr()),
1113  "Time(destination) - Time(source) [hours]", "Count");
1114  TGnuPlot::PlotValCntH(SrcTmH, "tmEdgeSrc."+FNmPref, TStr::Fmt("%s. Edge time - source node time.", Desc.CStr()),
1115  "Edge time - source node time [hours]", "Count");
1116  TGnuPlot::PlotValCntH(DstTmH, "tmEdgeDst."+FNmPref, TStr::Fmt("%s. Edge time - destination node time.", Desc.CStr()),
1117  "Edge time - destination node time [hours]", "Count");
1118  printf("\n");
1119 }
1120 
1121 // plot the distribution whether the edge to node that closes the triad was
1122 // chosen random or preferentially
1123 void TTimeNENet::PlotCloseTriad(const TStr& FNmPref, TStr Desc, const bool& OnlyBackEdges) const {
1124  if (Desc.Empty()) { Desc = FNmPref; }
1125  printf("Random vs. preferential triad closing: %s\n", OnlyBackEdges?"OnlyBackEdges":"");
1126  THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
1127  THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
1128  PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
1129  TIntV EIdV; GetEIdByTm(EIdV);
1130  TIntV CmnV, SrcNbrV, DstNbrV;
1131  TExeTm ExeTm;
1132  TFltV RndRdnV, PrefPrefV, PrefRndV, RndPrefV;
1133  TFltFltH RndRndH, PrefPrefH, PrefRndH, RndPrefH;
1134  int TriadEdges = 0;
1135  FILE *F = fopen(TStr::Fmt("%s-prob.tab", FNmPref.CStr()).CStr(), "wt");
1136  fprintf(F, "#Probability of picking a node that closes the triangle (pick preferentially vs. uniformly at random)\n");
1137  fprintf(F, "#i\tEId\tRndRnd\tRndPref\tPrefRnd\tPrefPref\n");
1138  for (int edge = 0; edge < EIdV.Len(); edge++) {
1139  const TEdgeI EI = GetEI(EIdV[edge]);
1140  const int Src = EI.GetSrcNId();
1141  const int Dst = EI.GetDstNId();
1142  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
1143  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
1144  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
1145  if (OnlyBackEdges && EI.GetSrcNDat().GetAbsSecs()-EI.GetDstNDat().GetAbsSecs() < 0) {
1146  Graph->AddEdge(Src, Dst); continue;
1147  }
1148  // find common neighbors
1149  const TUNGraph::TNodeI SrcNI = Graph->GetNI(Src);
1150  const TUNGraph::TNodeI DstNI = Graph->GetNI(Dst);
1151  SrcNbrV.Clr(false); DstNbrV.Clr(false);
1152  for (int i = 0; i < SrcNI.GetOutDeg(); i++) { SrcNbrV.Add(SrcNI.GetOutNId(i)); }
1153  for (int i = 0; i < DstNI.GetOutDeg(); i++) { DstNbrV.Add(DstNI.GetOutNId(i)); }
1154  SrcNbrV.Sort(); DstNbrV.Sort();
1155  SrcNbrV.Intrs(DstNbrV, CmnV);
1156  if (! CmnV.Empty()) {
1157  // triad completion
1158  const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
1159  const int DstDeg = Graph->GetNode(Dst).GetInDeg();
1160  double RndRnd=0, RndPref=0, PrefRnd=0, PrefPref=0;
1161  int AllNbrDegSum=0;//, NbrDegSum = 0;
1162  //for (int i = 0; i < CmnV.Len(); i++) {
1163  // NbrDegSum += Graph->GetNode(CmnV[i]).GetOutDeg(); }
1164  for (int i = 0; i < SrcNI.GetOutDeg(); i++) {
1165  AllNbrDegSum += Graph->GetNode(SrcNI.GetOutNId(i)).GetOutDeg(); }
1166  // uniform-uniform node selection = 1/|cmn| sum_{i\in cmn} 1/(d_i-1)
1167  // preferential-uniform
1168  for (int i = 0; i < CmnV.Len(); i++) {
1169  const int Deg = Graph->GetNode(CmnV[i]).GetOutDeg();
1170  RndRnd += (1.0/double(SrcDeg)) * (1.0/double(Deg-1));
1171  PrefRnd += (Deg/double(AllNbrDegSum)) * (1.0/double(Deg-1));
1172  }
1173  // uniform-preferential selection = 1/|cmn| sum_{i\in cmn} d_t / sum_{i--j} d_j
1174  // preferential-preferential
1175  for (int i = 0; i < CmnV.Len(); i++) {
1176  const TUNGraph::TNode N = Graph->GetNode(CmnV[i]);
1177  const int Deg = N.GetOutDeg();
1178  int DegSum = 0;
1179  for (int j = 0; j < N.GetOutDeg(); j++) {
1180  if (N.GetOutNId(j) != Src) {
1181  DegSum += Graph->GetNode(N.GetOutNId(j)).GetOutDeg(); }
1182  }
1183  RndPref += 1.0/double(SrcDeg) * DstDeg/double(DegSum);
1184  PrefPref += (Deg/double(AllNbrDegSum)) * (DstDeg/double(DegSum));
1185  }
1186  IAssert(0.0<RndRnd && RndRnd<=1.0); IAssert(0.0<RndPref && RndPref<=1.0);
1187  IAssert(0.0<PrefRnd && PrefRnd<=1.0); IAssert(0.0<PrefPref && PrefPref<=1.0);
1188  RndRndH.AddDat(TMath::Round(RndRnd, 2)) += 1.0;
1189  RndPrefH.AddDat(TMath::Round(RndPref, 2)) += 1.0;
1190  PrefRndH.AddDat(TMath::Round(PrefRnd, 2)) += 1.0;
1191  PrefPrefH.AddDat(TMath::Round(PrefPref, 2)) += 1.0;
1192  fprintf(F, "%d\t%d\t%g\t%g\t%g\t%g\n", edge, EI.GetId(), RndRnd, RndPref, PrefRnd, PrefPref);
1193  TriadEdges++;
1194  }
1195  Graph->AddEdge(Src, Dst);
1196  if (edge % 1000 == 0) {
1197  printf("\r%dk / %dk triad edges %dk [total %s]", edge/1000, EIdV.Len()/1000, TriadEdges/1000, ExeTm.GetStr()); }
1198  }
1199  fclose(F);
1200  printf("\nRandom vs. preferential triad closing\n");
1201  printf("All edges: %d\n", Graph->GetEdges());
1202  printf("Triad closing edges: %d\n", TriadEdges);
1203  // plot distribution
1204  TFltPrV RndRndPrV; RndRndH.GetKeyDatPrV(RndRndPrV); RndRndPrV.Sort();
1205  TFltPrV RndPrefPrV; RndPrefH.GetKeyDatPrV(RndPrefPrV); RndPrefPrV.Sort();
1206  TFltPrV PrefRndPrV; PrefRndH.GetKeyDatPrV(PrefRndPrV); PrefRndPrV.Sort();
1207  TFltPrV PrefPrefPrV; PrefPrefH.GetKeyDatPrV(PrefPrefPrV); PrefPrefPrV.Sort();
1208  { TGnuPlot GP(TStr::Fmt("probPdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
1209  GP.AddPlot(RndRndPrV, gpwLinesPoints, "Random--Random");
1210  GP.AddPlot(RndPrefPrV, gpwLinesPoints, "Random--Preferential");
1211  GP.AddPlot(PrefRndPrV, gpwLinesPoints, "Preferential--Random");
1212  GP.AddPlot(PrefPrefPrV, gpwLinesPoints, "Preferential--Preferential");
1213  GP.SetXYLabel("Probability", "Count");
1214  GP.SavePng(); }
1215  { TGnuPlot GP(TStr::Fmt("probCdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
1216  GP.AddPlot(TGUtil::GetCdf(RndRndPrV), gpwLinesPoints, "Random--Random");
1217  GP.AddPlot(TGUtil::GetCdf(RndPrefPrV), gpwLinesPoints, "Random--Preferential");
1218  GP.AddPlot(TGUtil::GetCdf(PrefRndPrV), gpwLinesPoints, "Preferential--Random");
1219  GP.AddPlot(TGUtil::GetCdf(PrefPrefPrV), gpwLinesPoints, "Preferential--Preferential");
1220  GP.SetXYLabel("Cumulative probability", "Count");
1221  GP.SavePng(); }
1222 }*/
1223 
1224 PTimeNENet TTimeNENet::GetGnmRndNet(const int& Nodes, const int& Edges) {
1225  printf("Generating G_nm(%d, %d)\n", Nodes, Edges);
1226  int Src, Dst;
1227  PTimeNENet Net = TTimeNENet::New();
1228  Net->Reserve(Nodes, Edges);
1229  for (int e = 0; e < Edges; e++) {
1230  Src = TInt::Rnd.GetUniDevInt(Nodes);
1231  Dst = TInt::Rnd.GetUniDevInt(Nodes);
1232  while (Dst == Src || Net->IsEdge(Src, Dst)) {
1233  Dst = TInt::Rnd.GetUniDevInt(Nodes); }
1234  if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(e)); }
1235  if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(e)); }
1236  Net->AddEdge(Src, Dst, -1, TSecTm(e));
1237  }
1238  return Net;
1239 }
1240 
1241 // ACL, model B: Aiello, Chung, Lu: Random evolution in massive graphs
1242 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& Edges, const double& GammaIn, const double& GammaOut) {
1243  const double Alpha = Nodes/double(Edges);
1244  printf("Generating PA(%d, %d), with slope in:%.1f, out: %.1f\n", Nodes, Edges,
1245  2+GammaIn/(Alpha/(1-Alpha)), 2+GammaOut/(Alpha/(1-Alpha)));
1246  // init
1247  int nodes=0, edges=0, time=0, iter=0;
1248  TIntV OutW(Edges, 0), InW(Edges, 0);
1249  PTimeNENet Net = TTimeNENet::New();
1250  Net->Reserve(Nodes, Edges);
1251  // 1st node
1252  Net->AddNode(0, TSecTm(time++)); nodes++;
1253  OutW.Add(0); InW.Add(0);
1254  while (edges < Edges) {
1255  int Src=-1, Dst=-1; iter++;
1256  if (TInt::Rnd.GetUniDev() < Alpha) {
1257  if (nodes < Nodes) {
1258  IAssert(Net->AddNode(nodes, TSecTm(time++)));
1259  nodes++; }
1260  } else {
1261  if (TInt::Rnd.GetUniDev() < nodes*GammaIn/double(edges+nodes*GammaIn)) {
1262  Src = TInt::Rnd.GetUniDevInt(nodes); }
1263  else { Src = OutW[TInt::Rnd.GetUniDevInt(OutW.Len())]; }
1264  if (TInt::Rnd.GetUniDev() < nodes*GammaOut/double(edges+nodes*GammaOut)) {
1265  Dst = TInt::Rnd.GetUniDevInt(nodes); }
1266  else { Dst = InW[TInt::Rnd.GetUniDevInt(InW.Len())]; }
1267  }
1268  if (Src == Dst || Net->IsEdge(Src, Dst)) {
1269  continue;
1270  }
1271  //printf("%d/%d %d %d\n", edges, iter, Src, Dst);
1272  if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(time++)); nodes++; }
1273  if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(time++)); nodes++; }
1274  Net->AddEdge(Src, Dst, -1, TSecTm(time++));
1275  OutW.Add(Src); InW.Add(Dst); edges++;
1276  }
1277  for (int node = 0; node < Nodes; node++) {
1278  if (! Net->IsNode(node)) {
1279  Net->AddNode(node, TSecTm(time++)); }
1280  }
1281  return Net;
1282 }
1283 
1284 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& OutDeg) {
1285  printf("Generating PA, nodes:%d, out-deg:%d\n", Nodes, OutDeg);
1286  // init
1287  int time=0;
1288  PTimeNENet Net = TTimeNENet::New();
1289  Net->Reserve(Nodes, OutDeg*Nodes);
1290  Net->AddNode(0, TSecTm(++time)); Net->AddNode(1, TSecTm(++time));
1291  Net->AddEdge(0, 1, -1, TSecTm(++time));
1292  TIntV NIdV; NIdV.Add(0); NIdV.Add(1);
1293  TIntSet NodeSet;
1294  for (int node = 2; node <= Nodes; node++) {
1295  NodeSet.Clr(false);
1296  while (NodeSet.Len() < OutDeg && NodeSet.Len() < node) {
1297  NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]);
1298  }
1299  const int N = Net->AddNode(node, TSecTm(++time));
1300  for (int i = 0; i < NodeSet.Len(); i++) {
1301  Net->AddEdge(node, NodeSet[i], -1, TSecTm(++time));
1302  NIdV.Add(N); NIdV.Add(NodeSet[i]);
1303  }
1304  }
1305  return Net;
1306 }
1307 
1308 void TTimeNENet::SaveEdgeTm(const TStr& EdgeFNm, const bool& RenumberNId, const bool& RelativeTm) const {
1309  TIntV EIdV; GetEIdByTm(EIdV);
1310  const int BegTm = RelativeTm ? GetEDat(EIdV[0]).GetAbsSecs() : 0;
1311  TIntSet NIdMap;
1312  if (RenumberNId) { NIdMap.Gen(GetNodes()); }
1313  FILE *F = fopen(EdgeFNm.CStr(), "wt");
1314  //fprintf(F, "#Nodes\t%d\n#Edges\t%d\n", GetNodes(), GetEdges());
1315  //fprintf(F, "#<src>\t<dst>\t<time>\n");
1316  for (int e =0; e < EIdV.Len(); e++) {
1317  const TEdgeI EI = GetEI(EIdV[e]);
1318  if (RenumberNId) {
1319  const int src = EI.GetSrcNId();
1320  const int dst = EI.GetDstNId();
1321  NIdMap.AddKey(src); NIdMap.AddKey(dst);
1322  fprintf(F, "%d\t%d\t%d\n", NIdMap.GetKeyId(src), NIdMap.GetKeyId(dst), EI().GetAbsSecs()-BegTm);
1323  }else {
1324  fprintf(F, "%d\t%d\t%d\n", EI.GetSrcNId(), EI.GetDstNId(), EI().GetAbsSecs()-BegTm); }
1325  }
1326  fclose(F);
1327 }
1328 
1330  PTimeNENet Net = TTimeNENet::New();
1331  for (int i = 1; i <= 6; i++) {
1332  Net->AddNode(i, TSecTm(0)); }
1333  int tm = 1;
1334  Net->AddEdge(1, 2, -1, TSecTm(tm++));
1335  Net->AddEdge(3, 4, -1, TSecTm(tm++));
1336  Net->AddEdge(3, 1, -1, TSecTm(tm++));
1337  Net->AddEdge(5, 6, -1, TSecTm(tm++));
1338  Net->AddEdge(6, 4, -1, TSecTm(tm++));
1339  Net->AddEdge(5, 3, -1, TSecTm(tm++));
1340  Net->AddEdge(5, 4, -1, TSecTm(tm++));
1341  Net->AddEdge(5, 2, -1, TSecTm(tm++));
1342  return Net;
1343 }
1344 
1345 PTimeNENet TTimeNENet::LoadFlickr(const TStr& NodeFNm, const TStr& EdgeFNm) {
1346  const int BegOfTm = 1047369600; // Tue Mar 11 01:00:00 2003 (begining of Flickr)
1347  PTimeNENet Net = TTimeNENet::New();
1348  printf("Adding nodes...");
1349  { TSsParser Ss(NodeFNm, ssfWhiteSep);
1350  while (Ss.Next()) {
1351  const int NId = Ss.GetInt(0);
1352  const int Tm = Ss.GetInt(1)+BegOfTm;
1353  if (TSecTm(Tm) < TSecTm(2002, 1, 1)) {
1354  printf(" skip node %g (time %d)\n", (double) Ss.GetLineNo(), Ss.GetInt(1)); continue; }
1355  Net->AddNode(NId, TSecTm(Tm));
1356  } }
1357  printf(" %d nodes\n", Net->GetNodes());
1358  printf("Adding edges...");
1359  int SkipCnt=0;
1360  //TIntH SkipDiffCnt;
1361  { TSsParser Ss(EdgeFNm, ssfWhiteSep);
1362  while (Ss.Next()) {
1363  const int NId1 = Ss.GetInt(0);
1364  const int NId2 = Ss.GetInt(1);
1365  const TSecTm Tm = TSecTm(Ss.GetInt(2)+BegOfTm);
1366  if (! Net->IsNode(NId1) || ! Net->IsNode(NId2)) { printf("not node\n"); continue; }
1367  if (Tm < TSecTm(2002, 1, 1)) { SkipCnt++;
1368  printf(" skip edge %g (time %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr()); continue; }
1369  if (Tm+600 < Net->GetNDat(NId1)) {
1370  printf(" 1:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId1).GetStr().CStr());
1371  //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId1).GetAbsSecs()) += 1;
1372  SkipCnt++; continue; }
1373  if (Tm+600 < Net->GetNDat(NId2)) { SkipCnt++;
1374  printf(" 2:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId2).GetStr().CStr());
1375  //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId2).GetAbsSecs()) += 1;
1376  SkipCnt++; continue; }
1377  Net->AddEdge(NId1, NId2, -1, TSecTm(Tm));
1378  } }
1379  //TGnuPlot::PlotValCntH(SkipDiffCnt, "flickr-edgeNodeDiff", "", "seconds", "count");
1380  printf(" %d edges\n", Net->GetEdges());
1381  printf(" %d edges skipped (edge time < node time)\n", SkipCnt);
1382  Net->UpdateNodeTimes();
1383  return Net;
1384 }
1385 
1386 // load network where fields are separated by Separator and each line contains (*Fld are column indexes)
1387 // .. <source> .. <destination> .. <time>
1388 PTimeNENet TTimeNENet::LoadEdgeTm(const TStr& EdgeFNm, const int& SrcFld, const int& DstFld, const int& TimeFld, const TSsFmt& Separator) {
1389  printf("Loading %s\n", EdgeFNm.CStr());
1390  PTimeNENet Net = TTimeNENet::New();
1391  TStrHash<TInt> StrToId(Mega(1), true); // node id to string
1392  int LineCnt=0;
1393  TExeTm ExeTm;
1394  TSsParser Ss(EdgeFNm, Separator);
1395  TSecTm MinTm=TSecTm::GetCurTm(), MaxTm=TSecTm(100);
1396  while (Ss.Next()) {
1397  if (Ss.IsCmt()) { continue; }
1398  IAssert(Ss.Len() > TimeFld);
1399  const char* Node1 = Ss.GetFld(SrcFld);
1400  const char* Node2 = Ss.GetFld(DstFld);
1401  const char* TmStr = Ss.GetFld(TimeFld);
1402  if (strcmp(TmStr,"NULL")==0) { continue; }
1403  //const TSecTm Tm = TSecTm::GetDtTmFromYmdHmsStr(TmStr);
1404  const TSecTm Tm(atoi(TmStr));
1405  const int NId1 = StrToId.AddKey(Node1);
1406  const int NId2 = StrToId.AddKey(Node2);
1407  if (! Net->IsNode(NId1)) { Net->AddNode(NId1, TSecTm()); }
1408  if (! Net->IsNode(NId2)) { Net->AddNode(NId2, TSecTm()); }
1409  MinTm=TMath::Mn(MinTm, Tm);
1410  MaxTm=TMath::Mx(MaxTm, Tm);
1411  Net->AddEdge(NId1, NId2, -1, Tm);
1412  if (++LineCnt % 1000 == 0) {
1413  printf("\r %dk lines processed: %d %d [%s]", LineCnt/1000, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); }
1414  }
1415  printf("\r %d lines processed: %d %d [%s]\n", LineCnt, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr());
1416  printf(" Data range %s -- %s\n", MinTm.GetStr().CStr(), MaxTm.GetStr().CStr());
1417  //TSnap::PrintInfo(Net, "", "", false);
1418  Net->UpdateNodeTimes();
1419  return Net;
1420 }
Definition: ds.h:336
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
PTimeNENet GetTimeNENet() const
Definition: timenet.cpp:32
#define IAssert(Cond)
Definition: bd.h:262
int GetInt() const
Definition: dt.h:579
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void PlotEffDiam(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &StartTm, const int &NDiamRuns=10, const bool &OnlyWcc=false) const
Definition: timenet.cpp:932
static char * GetCurTm()
Definition: tm.h:373
static PTimeNet LoadArxiv(const TStr &PaperFNm, const TStr &CiteFNm)
Definition: timenet.cpp:381
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:463
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the network.
Definition: network.h:1247
Definition: tm.h:354
double GetMedian() const
Definition: xmath.h:244
static PSs LoadTxt(const TSsFmt &SsFmt, const TStr &FNm, const PNotify &Notify=NULL, const bool &IsExcelEoln=true, const int &MxY=-1, const TIntV &AllowedColNV=TIntV(), const bool &IsQStr=true)
Definition: ss.cpp:100
void Reserve(const int &Nodes, const int &Edges)
Reserves memory for a network of Nodes nodes and Edges edges.
Definition: network.h:1281
Definition: ds.h:262
static PTimeNENet GetSmallNet()
Definition: timenet.cpp:1329
static PTimeNet New()
Definition: timenet.h:39
bool IsDef() const
Definition: tm.h:123
double GetBfsEffDiam(const PGraph &Graph, const int &NTestNodes, const bool &IsDir=false)
Returns the (approximation of the) Effective Diameter (90-th percentile of the distribution of shorte...
Definition: bfsdfs.h:278
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
TNodeI BegNI() const
Returns an iterator referring to the first node in the network.
Definition: network.h:1196
PTimeNENet Get1stEdgeNet() const
Definition: timenet.cpp:627
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
void PlotMissingPast(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &DelPreTmEdges, const TSecTm &PostTmDiam) const
Definition: timenet.cpp:169
void GetNIdByTm(TIntV &NIdV) const
Definition: timenet.cpp:776
void PlotMissingPast(const TStr &FNmPref, const TStr &Desc, const TTmUnit &TmUnit, const TSecTm &DelPreTmEdges, const TSecTm &PostTmDiam, const bool &LinkBWays)
Definition: timenet.cpp:975
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:278
void GetEIdByTm(TIntV &EIdV) const
Definition: timenet.cpp:786
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
Definition: fl.h:319
Definition: bits.h:119
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:74
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:535
void PlotCCfOverTm(const TStr &FNmPref, TStr Desc, const TTmUnit &TmUnit, const int &NodesBucket=-1) const
Definition: timenet.cpp:253
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:63
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: network.h:1194
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:1172
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:217
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the network.
Definition: network.h:183
int GetEdges() const
Returns the number of edges in the network.
PGStatVec TimeGrowth(const TTmUnit &TimeStep, const TFSet &TakeStat, const TSecTm &StartTm=TSecTm(1)) const
Definition: timenet.cpp:876
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:424
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:844
void GetTmBuckets(const TTmUnit &GroupBy, TTmBucketV &TmBucketV) const
Definition: timenet.cpp:59
int AddNode(int NId=-1)
Adds a node of ID NId to the network.
Definition: network.h:1341
static PTimeNet LoadPatents(const TStr &PatentFNm, const TStr &CiteFNm)
Definition: timenet.cpp:478
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:443
Definition: xmath.h:129
TSecTm & GetNDat(const int &NId)
Returns node data for the node of ID NId in the network.
Definition: network.h:1204
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:664
TSecTm & GetNDat(const int &NId)
Returns node data for the node of ID NId in the network.
Definition: network.h:191
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the network.
Definition: network.h:1496
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:157
Definition: gnuplot.h:7
TEdgeI GetEI(const int &EId) const
Not supported/implemented!
Definition: network.h:1251
static TRnd Rnd
Definition: dt.h:1051
TNodeI BegNI() const
Returns an iterator referring to the first node in the network.
Definition: network.h:181
double GetSDev() const
Definition: xmath.h:242
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
PTimeNENet GetGraphUpToTm(const TSecTm &MaxEdgeTm) const
Definition: timenet.cpp:683
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:165
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: network.h:179
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the network.
Definition: network.h:185
int AddEdge(const int &SrcNId, const int &DstNId, int EId=-1)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: network.h:1403
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:530
void GetEdgeTmBuckets(const TTmUnit &GroupBy, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:815
static PTimeNENet LoadEdgeTm(const TStr &EdgeFNm, const int &SrcFld=0, const int &DstFld=1, const int &TimeFld=2, const TSsFmt &Separator=ssfTabSep)
Definition: timenet.cpp:1388
Definition: bd.h:63
void SaveEdgeTm(const TStr &EdgeFNm, const bool &RenumberNId=false, const bool &RelativeTm=false) const
Definition: timenet.cpp:1308
Graph Statistics Sequence.
Definition: gstat.h:155
void SplitOnCh(TStr &LStr, const char &SplitCh, TStr &RStr) const
Definition: dt.cpp:901
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1011
static void SaveTs(const TIntKdV &KdV, const TStr &FNm, const TStr &HeadLn=TStr())
Definition: gnuplot.cpp:713
const char * GetTmStr() const
Definition: tm.h:369
void DelNode(const int &NId)
Deletes node of ID NId from the network.
Definition: network.h:297
void PlotEffDiam(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &StartTm, const int &NDiamRuns=10, const bool &OnlyWcc=false, const bool &AlsoRewire=false) const
Definition: timenet.cpp:106
void GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:834
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:953
static PTimeNENet GetPrefAttach(const int &Nodes, const int &Edges, const double &GammaIn, const double &GammaOut)
Definition: timenet.cpp:1242
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1218
Statistics of a Graph Snapshot.
Definition: gstat.h:36
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node IDs SrcNId to node DstNId to the network.
Definition: network.h:331
static PTimeNENet New()
Definition: timenet.h:86
void SetNodeTmToFirstEdgeTm()
Definition: timenet.cpp:723
int CntNonZNodes(const PGraph &Graph)
Returns the number of nodes with degree greater than 0.
Definition: alg.h:114
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
Whitespace (space or tab) separated.
Definition: ss.h:11
uint GetAbsSecs() const
Definition: tm.h:149
#define Mega(n)
Definition: gbase.h:4
THash< TInt, TNode > NodeH
Definition: network.h:1150
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:152
int AddKey(const TKey &Key)
Definition: shash.h:1254
void DumpTimeStat() const
Definition: timenet.cpp:759
Tab separated.
Definition: ss.h:6
void SetRndEdgeTimes(const int &MinTmEdge=0)
Definition: timenet.cpp:742
void DelEdge(const int &SrcNId, const int &DstNId)
Deletes an edge between node IDs SrcNId and DstNId from the graph.
Definition: graph.cpp:95
int Len() const
Returns the number of fields in the current line.
Definition: ss.h:114
TTriple< TFlt, TFlt, TFlt > TFltTr
Definition: ds.h:175
Definition: tm.h:14
PUNGraph GenConfModel(const TIntV &DegSeqV, TRnd &Rnd)
Generates a random undirect graph with a given degree sequence.
Definition: ggen.cpp:119
TNodeEdgeNet & operator=(const TNodeEdgeNet &Net)
Definition: network.h:1168
static TStr GetNullStr()
Definition: dt.cpp:1626
void Tick()
Definition: tm.h:363
PTimeNENet GetSubGraph(const TIntV &NIdV) const
Definition: timenet.cpp:645
THash< TInt, TEdge > EdgeH
Definition: network.h:1151
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void PlotMedianDegOverTm(const TStr &FNmPref, const TTmUnit &TmUnit, const int &NodesPerBucket=-1) const
Definition: timenet.cpp:300
void SetVal(const TGStatVal &StatVal, const double &Val)
Definition: gstat.cpp:88
static PTimeNet LoadBipartite(const TStr &InFNm)
Definition: timenet.cpp:344
int CntInDegNodes(const PGraph &Graph, const int &NodeInDeg)
Returns the number of nodes with in-degree NodeInDeg.
Definition: alg.h:87
static int NDiamRuns
Definition: gstat.h:38
TSecTm & GetEDat(const int &EId)
Returns edge data for the edge with ID EId.
Definition: network.h:1257
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:84
int AddKey(const char *Key)
Definition: hash.h:870
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the network.
Definition: network.h:351
static PTimeNENet LoadFlickr(const TStr &NodeFNm, const TStr &EdgeFNm)
Definition: timenet.cpp:1345
Definition: hash.h:716
PTimeNENet GetESubGraph(const TIntV &EIdV) const
Definition: timenet.cpp:666
int AddNode(int NId=-1)
Adds a node of ID NId to the network.
Definition: network.h:273
int Len() const
Definition: shash.h:1121
Definition: tm.h:81
void GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:77
Definition: ds.h:32
static PTimeNENet GetGnmRndNet(const int &Nodes, const int &Edges)
Definition: timenet.cpp:1224
int AddKey(const TKey &Key)
Definition: hash.h:327
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:465
TTimeNENet & operator=(const TTimeNENet &TimeNet)
Definition: timenet.cpp:603
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the network.
Definition: network.h:1200
void GetNIdByTm(TIntV &NIdV) const
Definition: timenet.cpp:48
static TStr GetTmUnitStr(const TTmUnit &TmUnit)
Definition: tm.cpp:108
long long int64
Definition: bd.h:27
double GetMean() const
Definition: xmath.h:240
void Save(TSOut &SOut) const
Definition: xmlser.h:16
Definition: dt.h:412
TStr GetStr(const TLoc &Loc=lUs) const
Definition: tm.cpp:457
bool Empty() const
Definition: dt.h:488
TNodeNet & operator=(const TNodeNet &NodeNet)
Definition: network.h:153
static void PlotValV(const TVec< TPair< TVal1, TVal2 > > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:363
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
uint64 GetLineNo() const
Returns the line number of the current line.
Definition: ss.h:118
int GetTriadEdges(TIntV &TriadEIdV) const
Definition: timenet.cpp:855
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
int GetEdges() const
Returns the number of edges in the network.
Definition: network.h:1212
static int GetMonthN(const TStr &MonthNm, const TLoc &Loc=lUs)
Definition: tm.cpp:39
void UpdateNodeTimes()
Definition: timenet.cpp:707
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:327
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the graph.
Definition: graph.cpp:132
void GetKeyDatPrV(TVec< TPair< TKey, TDat > > &KeyDatPrV) const
Definition: hash.h:454
void GetTmBuckets(const TTmUnit &GroupBy, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:796
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:206
static TSecTm GetCurTm()
Definition: tm.cpp:692
double GetClustCf(const PGraph &Graph, int SampleNodes=-1)
Computes the average clustering coefficient as defined in Watts and Strogatz, Collective dynamics of ...
Definition: triad.h:100
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:202
bool Next()
Loads next line from the input file.
Definition: ss.cpp:410
Definition: bd.h:196
void SortNodeEdgeTimes()
Definition: timenet.cpp:701
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the network.
Definition: network.h:373
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:486
TPt< TTimeNet > PTimeNet
Definition: timenet.h:8
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
PTimeNet GetTimeNet() const
Definition: timenet.cpp:610
PGraph GetESubGraph(const PGraph &Graph, const TIntV &EIdV)
Returns a subgraph of graph Graph with EIdV edges.
Definition: subgraph.h:206
void SplitOnWs(TStrV &StrV) const
Definition: dt.cpp:972
TTimeNet & operator=(const TTimeNet &TimeNet)
Definition: timenet.cpp:3
void Reserve(const int &Nodes, const int &Edges)
Reserves memory for a network of Nodes nodes and Edges edges.
Definition: network.h:240
int AddErrBar(const TFltTrV &XYDValV, const TStr &Label=TStr())
Definition: gnuplot.cpp:261
const char * GetStr() const
Definition: tm.h:367
char * CStr()
Definition: dt.h:476
TTmUnit
Definition: tm.h:11
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
Definition: graph.cpp:108
int CntDegNodes(const PGraph &Graph, const int &NodeDeg)
Returns the number of nodes with degree NodeDeg.
Definition: alg.h:105
bool IsKey(const TKey &Key) const
Definition: hash.h:216
static PGStatVec New(const TTmUnit &_TmUnit=tmu1Sec)
Definition: gstat.cpp:426
static PTimeNet LoadAmazon(const TStr &StlFNm)
Definition: timenet.cpp:561
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:559
PTimeNet GetSubGraph(const TIntV &NIdV) const
Definition: timenet.cpp:10
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:204
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the network.
Definition: network.h:1249
PGStatVec TimeGrowth(const TTmUnit &TmUnit, const TFSet &TakeStat, const TSecTm &StartTm) const
Definition: timenet.cpp:88
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the network.
Definition: network.h:1198
int GetCmnNbrs(const PGraph &Graph, const int &NId1, const int &NId2)
Returns a number of shared neighbors between a pair of nodes NId1 and NId2.
Definition: triad.h:383
void Def()
Definition: xmath.cpp:339
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
bool IsCmt() const
Checks whether the current line is a comment (starts with '#').
Definition: ss.h:120
int CntOutDegNodes(const PGraph &Graph, const int &NodeOutDeg)
Returns the number of nodes with out-degree NodeOutDeg.
Definition: alg.h:96
TSecTm Round(const TTmUnit &TmUnit) const
Definition: tm.cpp:620
TPt< TTimeNENet > PTimeNENet
Definition: timenet.h:11
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
Definition: ds.h:1020
void TakeBasicStat(const PGraph &Graph, const bool &IsMxWcc=false)
Definition: gstat.h:257
void SortByDat(const bool &Asc=true)
Definition: hash.h:246