SNAP Library 4.1, User Reference  2018-07-26 16:30:42
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
agmdirected.h
Go to the documentation of this file.
1 #ifndef yanglib_agmdirected_h
2 #define yanglib_agmdirected_h
3 #include "Snap.h"
4 #include "agm.h"
5 #include "agmfast.h"
6 
7 class TCoda { //sparse AGM-fast with coordinate ascent for directed affiliation
8 private:
9  PNGraph G; //graph to fit
10  TVec<TIntFltH> F; // outdegree membership for each user (Size: Nodes * Coms)
11  TVec<TIntFltH> H; // in-degree membership for each user (Size: Nodes * Coms) A ~ F * H'
12  TRnd Rnd; // random number generator
13  TIntV NIDV; // original node ID vector
14  TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
15  TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
16  TFltV SumHV; // sum_u H_uc for each community c. Needed for efficient calculation
17  TBool NodesOk; // Node ID is from 0 ~ N-1
18  TInt NumComs; // number of communities
19  TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
20 public:
21  TFlt MinVal; // minimum value of F (0)
22  TFlt MaxVal; // maximum value of F (for numerical reason)
23  TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
24  TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
25  TBool DoParallel; // whether to use parallelism for computation
26 
27  TCoda(const PNGraph& GraphPt, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
28  NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
29  TCoda() { G = TNGraph::New(); }
30  void SetGraph(const PNGraph& GraphPt);
31  PNGraph GetGraph() { return G; }
33  void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
34  double GetRegCoef() { return RegCoef; }
35  void RandomInit(const int InitComs);
36  int GetNumComs() { return NumComs.Val; }
37  void NeighborComInit(const int InitComs);
38  void NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs);
39  void SetCmtyVV(const TVec<TIntV>& CmtyVVOut, const TVec<TIntV>& CmtyVVIn);
40  double Likelihood(const bool DoParallel = false);
41  double LikelihoodForNode(const bool IsRow, const int UID);
42  double LikelihoodForNode(const bool IsRow, const int UID, const TIntFltH& FU);
43  void GetNonEdgePairScores(TFltIntIntTrV& ScoreV);
44  void GetNIDValH(TIntFltH& NIdValInOutH, TIntFltH& NIdValOutH, TIntFltH& NIdValInH, const int CID, const double Thres);
45  void DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH) { DumpMemberships(OutFNm, NodeNameH, sqrt(PNoCom)); }
46  void DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH, const double Thres);
47  void GetCmtyS(TIntSet& CmtySOut, TIntSet& CmtySIn, const int CID, const double Thres);
48  void DumpMemberships(const TStr& OutFNm, const double Thres);
49  void DumpMemberships(const TStr& OutFNm) { DumpMemberships(OutFNm, sqrt(PNoCom)); }
50  void GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID) { GetCommunity(CmtyVIn, CmtyVOut, CID, sqrt(PNoCom)); }
51  void GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID, const double Thres);
52  void GetTopCIDs(TIntV& CIdV, const int TopK, const int IsAverage = 1, const int MinSz = 1);
53  void GradientForNode(const bool IsRow, const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
54  void SetHoldOut(const double HOFrac) { TVec<TIntSet> HoldOut; TAGMFastUtil::GenHoldOutPairs(G, HoldOut, HOFrac, Rnd); HOVIDSV = HoldOut; }
55  //double LikelihoodForRow(const int UID);
56  //double LikelihoodForRow(const int UID, const TIntFltH& FU);
57  //double LikelihoodForCol(const int VID);
58  //double LikelihoodForCol(const int VID, const TIntFltH& HV);
59  //void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
60  void GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const int MinSz = 3);
61  void GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const double ThresOut, const double ThresIn, const int MinSz = 3);
62  void GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV);
63  void GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
64  void GetCmtyVVUnSorted(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
65  void GetCmtyVVUnSorted(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn);
66  //void GetCmtyVVIn(TVec<TIntV>& CmtyVV);
67  //void GetCmtyVVIn(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
68  int FindComsByCV(TIntV& ComsV, const double HOFrac = 0.2, const int NumThreads = 20, const TStr PlotLFNm = TStr(), const int EdgesForCV = 100, const double StepAlpha = 0.3, const double StepBeta = 0.1);
69  int FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const int EdgesForCV = 100, const double StepAlpha = 0.3, const double StepBeta = 0.3);
70  double LikelihoodHoldOut(const bool DoParallel = false);
71  double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
72  int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
73  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
74  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
75  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
76  if (ChunkSize == 0) { ChunkSize = 1; }
77  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
78  }
79  //int MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm = TStr());
80  //double GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
81  //double HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
82  //double LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
83  //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
84  void Save(TSOut& SOut);
85  void Load(TSIn& SIn, const int& RndSeed = 0);
86  TFlt& GetSumVal(const bool IsOut, const int CID) {
87  if (IsOut) {
88  return SumFV[CID];
89  } else {
90  return SumHV[CID];
91  }
92  }
93  double inline GetCom(const bool IsOut, const int& NID, const int& CID) {
94  if (IsOut) {
95  return GetComOut(NID, CID);
96  } else {
97  return GetComIn(NID, CID);
98  }
99  }
100  double inline GetComOut(const int& NID, const int& CID) {
101  if (F[NID].IsKey(CID)) {
102  return F[NID].GetDat(CID);
103  } else {
104  return 0.0;
105  }
106  }
107  double inline GetComIn(const int& NID, const int& CID) {
108  if (H[NID].IsKey(CID)) {
109  return H[NID].GetDat(CID);
110  } else {
111  return 0.0;
112  }
113  }
114  void inline AddCom(const bool IsOut, const int& NID, const int& CID, const double& Val) {
115  if (IsOut) {
116  AddComOut(NID, CID, Val);
117  } else {
118  AddComIn(NID, CID, Val);
119  }
120  }
121  void inline AddComOut(const int& NID, const int& CID, const double& Val) {
122  if (F[NID].IsKey(CID)) {
123  SumFV[CID] -= F[NID].GetDat(CID);
124  }
125  F[NID].AddDat(CID) = Val;
126  SumFV[CID] += Val;
127  }
128  void inline AddComIn(const int& NID, const int& CID, const double& Val) {
129  if (H[NID].IsKey(CID)) {
130  SumHV[CID] -= H[NID].GetDat(CID);
131  }
132  H[NID].AddDat(CID) = Val;
133  SumHV[CID] += Val;
134  }
135  void inline DelCom(const bool IsOut, const int& NID, const int& CID) {
136  if (IsOut) {
137  return DelComOut(NID, CID);
138  } else {
139  return DelComIn(NID, CID);
140  }
141  }
142  void inline DelComOut(const int& NID, const int& CID) {
143  if (F[NID].IsKey(CID)) {
144  SumFV[CID] -= F[NID].GetDat(CID);
145  F[NID].DelKey(CID);
146  }
147  }
148  void inline DelComIn(const int& NID, const int& CID) {
149  if (H[NID].IsKey(CID)) {
150  SumHV[CID] -= H[NID].GetDat(CID);
151  H[NID].DelKey(CID);
152  }
153  }
154  double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
155  double DP = 0;
156  if (UV.Len() > VV.Len()) {
157  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
158  if (VV.IsKey(HI.GetKey())) {
159  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
160  }
161  }
162  } else {
163  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
164  if (UV.IsKey(HI.GetKey())) {
165  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
166  }
167  }
168  }
169  return DP;
170  }
171  double inline DotProductUtoV(const int& UID, const int& VID) {
172  return DotProduct(F[UID], H[VID]);
173  }
174  double inline Prediction(const TIntFltH& FU, const TIntFltH& HV) {
175  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, HV);
176  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
177  return exp(- DP);
178  }
179  double inline Prediction(const int& UID, const int& VID) {
180  return Prediction(F[UID], H[VID]);
181  }
182  double inline Sum(const TIntFltH& UV) {
183  double N = 0.0;
184  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
185  N += HI.GetDat();
186  }
187  return N;
188  }
189  double inline Norm2(const TIntFltH& UV) {
190  double N = 0.0;
191  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
192  N += HI.GetDat() * HI.GetDat();
193  }
194  return N;
195  }
196 };
197 
199 public:
205  TCodaAnalyzer(TCoda& Coda, const double MemThres = -1.0) {
206  G = Coda.GetGraphRawNID();
207  printf("graph copied (%d nodes %d edges)\n", G->GetNodes(), G->GetEdges());
208  TIntV CIdV;
209  Coda.GetTopCIDs(CIdV, Coda.GetNumComs());
210  double Delta = MemThres == -1.0 ? sqrt(Coda.PNoCom): MemThres;
211  for (int c = 0; c < CIdV.Len(); c++) {
212  int CID = CIdV[c];
213  TIntFltH InMemH, OutMemH, InOutMemH;
214  Coda.GetNIDValH(InOutMemH, OutMemH, InMemH, CID, Delta);
215  InCmtyValHV.Add(InMemH);
216  OutCmtyValHV.Add(OutMemH);
217  InOutCmtyValHV.Add(InOutMemH);
218  }
219  printf("Communities copied (%d communities)\n", InCmtyValHV.Len());
220  }
221  void GetAllCmtyVV(TVec<TIntV>& CmtyVV, const int MinSz) {
222  for (int c = 0; c < InCmtyValHV.Len(); c++) {
223  TIntV CmtyVIn, CmtyVOut, CmtyVInOut;
224  if (InCmtyValHV[c].Len() < MinSz || OutCmtyValHV[c].Len() < MinSz) { continue; }
225  InOutCmtyValHV[c].GetKeyV(CmtyVInOut);
226  InCmtyValHV[c].GetKeyV(CmtyVIn);
227  OutCmtyValHV[c].GetKeyV(CmtyVOut);
228  CmtyVV.Add(CmtyVInOut);
229  CmtyVV.Add(CmtyVOut);
230  CmtyVV.Add(CmtyVIn);
231  }
232  }
233 
234  double GetFrac2Mode(const double Thres2Mode = 0.2, const int MinSzEach = 2) {
235  int Cnt2Mode = 0;
236  int CntAll = 0;
237  for (int c = 0; c < InCmtyValHV.Len(); c++) {
238  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
239  if (InCmtyValHV[c].Len() < MinSzEach || OutCmtyValHV[c].Len() < MinSzEach) { continue; }
240  if (Jacc <= Thres2Mode) { Cnt2Mode++; }
241  CntAll++;
242  }
243  return (double) Cnt2Mode / (double) CntAll;
244  }
245 
246  void Summary(const int TopK = 10, const double Thres2Mode = 0.2) {
247  int Cnt2Mode = 0;
248  double SumJacc = 0.0;
249  for (int c = 0; c < InCmtyValHV.Len(); c++) {
250  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
251  if (Jacc <= Thres2Mode) { Cnt2Mode++; }
252  SumJacc += Jacc;
253  if (c < TopK) {
254  printf("Cmty %d: InOut: %d, In:%d, Out:%d, Jacc;%.3f\n", c, InCmtyValHV[c].Len(), InCmtyValHV[c].Len(), OutCmtyValHV[c].Len(), Jacc);
255  }
256  }
257  double AvgJacc = SumJacc / (double) InCmtyValHV.Len();
258  printf("Average jaccard similarity = %.3f. (%d / %d communities are 2-mode)\n", AvgJacc, Cnt2Mode, InCmtyValHV.Len());
259  }
260  int GetNumComs() { return InCmtyValHV.Len(); }
262 
263  void GetCmtyVAll(TIntV& CmtyVAll, const int CID) {
264  TIntV CmtyVIn, CmtyVOut;
265  InCmtyValHV[CID].GetKeyV(CmtyVIn);
266  OutCmtyValHV[CID].GetKeyV(CmtyVOut);
267  CmtyVIn.Sort();
268  CmtyVOut.Sort();
269  CmtyVAll.Gen(CmtyVIn.Len() + CmtyVOut.Len(), 0);
270  CmtyVIn.Union(CmtyVOut, CmtyVAll);
271  }
272 
273  PNGraph Net2ModeCommunities(const double MaxJac, const double JacEdge, const bool GetWcc = true) {
274  //if In(A) is similar to Out(B), create an edge A->B between 2 communities A, B
275  int Coms = InCmtyValHV.Len();
276  PNGraph ComG = TNGraph::New(Coms, -1);
277  for (int c = 0; c < InCmtyValHV.Len(); c++) {
278  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
279  if (Jacc > MaxJac) { continue; }
280  ComG->AddNode(c);
281  }
282  TVec<TIntSet> CmtySVIn, CmtySVOut;
283  for (int c = 0; c < Coms; c++) {
284  TIntV CmtyVIn, CmtyVOut;
285  InCmtyValHV[c].GetKeyV(CmtyVIn);
286  OutCmtyValHV[c].GetKeyV(CmtyVOut);
287  TIntSet CmtySIn(CmtyVIn), CmtySOut(CmtyVOut);
288  CmtySVIn.Add(CmtySIn);
289  CmtySVOut.Add(CmtySOut);
290  }
291  for (int c1 = 0; c1 < Coms; c1++) {
292  if (! ComG->IsNode(c1)) { continue; }
293  for (int c2 = 0; c2 < Coms; c2++) {
294  if (! ComG->IsNode(c2)) { continue; }
295  int IntC1C2 = TAGMUtil::Intersection(CmtySVIn[c1], CmtySVOut[c2]);
296  double Jac = (double) IntC1C2 / (CmtySVIn[c1].Len() + CmtySVOut[c2].Len() - IntC1C2);
297  if (Jac >= JacEdge) {
298  ComG->AddEdge(c1, c2);
299  }
300  }
301  }
302  //PNGraph Wcc = TSnap::GetMxWcc(ComG);
303  TIntV NIDV;
304  ComG->GetNIdV(NIDV);
305  for (int u = 0; u < NIDV.Len(); u++) {
306  int NID = NIDV[u];
307  TNGraph::TNodeI NI = ComG->GetNI(NID);
308  if (NI.GetDeg() == 0) { ComG->DelNode(NID); }
309  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1 && NI.GetOutNId(0) == NID) { ComG->DelNode(NID); }
310  }
311  printf("Community graph made (Jaccard similarity for edges: %f, %d nodes, %d edges)\n", JacEdge, ComG->GetNodes(), ComG->GetEdges());
312  return ComG;
313  }
314 
315  // RS:2014/03/11 default parameter values do not compile on OS X with g++-4.2
316  //void Dump2ModeCommunities(const TStr& OutFNm, const double MaxJac, const TIntStrH& NIDNameH = THash<TInt, TStr>()) {
317  void Dump2ModeCommunities(const TStr& OutFNm, const double MaxJac, const TIntStrH& NIDNameH) {
318  FILE* F = fopen(OutFNm.CStr(), "wt");
319  for (int c = 0; c < InCmtyValHV.Len(); c++) {
320  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
321  if (Jacc > MaxJac) { continue; }
322  TIntV CmtyVIn, CmtyVOut, CmtyVAll;
323  InCmtyValHV[c].GetKeyV(CmtyVIn);
324  OutCmtyValHV[c].GetKeyV(CmtyVOut);
325  GetCmtyVAll(CmtyVAll, c);
326  //adjust for the nodes who belong to both cmtyvin and cmtyvout
327  for (int u = 0; u < InOutCmtyValHV[c].Len(); u++) {
328  int UID = InOutCmtyValHV[c].GetKey(u);
329  if (CmtyVIn.Len() >= CmtyVOut.Len()) {
330  CmtyVIn.DelIfIn(UID);
331  } else {
332  CmtyVOut.DelIfIn(UID);
333  }
334  }
335  if (CmtyVAll.Len() == 0) { continue; }
336  fprintf(F, "Com %d\n", c);
337  for (int u = 0; u < CmtyVOut.Len(); u++) {
338  int NID = CmtyVOut[u];
339  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): TStr::Fmt("Concept %d", NID);
340  fprintf(F, "%s:%f\n", Label.CStr(), OutCmtyValHV[c].GetDat(NID).Val);
341  }
342  fprintf(F, "||==>||\n");
343  for (int u = 0; u < CmtyVIn.Len(); u++) {
344  int NID = CmtyVIn[u];
345  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): TStr::Fmt("Concept %d", NID);
346  fprintf(F, "%s:%f\n", Label.CStr(), InCmtyValHV[c].GetDat(NID).Val);
347  }
348  fprintf(F, "\n");
349  }
350  fclose(F);
351  }
352 
353  // RS:2014/03/11 default parameter values do not compile on OS X with g++-4.2
354  //void Draw2ModeCommunity(const int CID, const TStr& OutFNm, const TIntStrH& NIDNameH = THash<TInt, TStr>(), const THash<TInt, TIntTr>& NIDColorH = THash<TInt, TIntTr>() ) {
355  void Draw2ModeCommunity(const int CID, const TStr& OutFNm, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH) {
356  TIntV CmtyVIn, CmtyVOut, CmtyVAll;
357  InCmtyValHV[CID].GetKeyV(CmtyVIn);
358  OutCmtyValHV[CID].GetKeyV(CmtyVOut);
359  GetCmtyVAll(CmtyVAll, CID);
360 
361  //adjust for the nodes who belong to both cmtyvin and cmtyvout
362  for (int u = 0; u < InOutCmtyValHV[CID].Len(); u++) {
363  int UID = InOutCmtyValHV[CID].GetKey(u);
364  if (CmtyVIn.Len() >= CmtyVOut.Len()) {
365  CmtyVIn.DelIfIn(UID);
366  } else {
367  CmtyVOut.DelIfIn(UID);
368  }
369  }
370 
371  PNGraph SG = TSnap::GetSubGraph(G, CmtyVAll);
373  if (CmtyVAll.Len() == 0) { return; }
374  double OXMin = 0.1, YMin = 0.1, OXMax = 2500.00, YMax = 1000.0, IXMin = 0.1, IXMax = 2500.00;
375  double OStep = (OXMax - OXMin) / (double) CmtyVOut.Len(), IStep = (IXMax - IXMin) / (double) CmtyVIn.Len();
376 
377  FILE* F = fopen(OutFNm.CStr(), "wt");
378  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
379  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
380  fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
381  fprintf(F, "\t\t<nodes>\n");
382  for (int c = 0; c < CmtyVOut.Len(); c++) {
383  int NID = CmtyVOut[c];
384  double XPos = c * OStep + OXMin;
385  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
386  Label.ChangeChAll('<', ' ');
387  Label.ChangeChAll('>', ' ');
388  Label.ChangeChAll('&', ' ');
389  Label.ChangeChAll('\'', ' ');
390  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
391  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
392  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
393  fprintf(F, "\t\t\t\t<viz:size value='4.0'/>\n");
394  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
395  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
396  fprintf(F, "\t\t\t</node>\n");
397  }
398 
399  for (int u = 0; u < CmtyVIn.Len(); u++) {
400  int NID = CmtyVIn[u];
401  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
402  Label.ChangeChAll('<', ' ');
403  Label.ChangeChAll('>', ' ');
404  Label.ChangeChAll('&', ' ');
405  Label.ChangeChAll('\'', ' ');
406  double XPos = IXMin + u * IStep;
407  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
408  double Alpha = 1.0;
409  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
410  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
411  fprintf(F, "\t\t\t\t<viz:size value='4.0'/>\n");
412  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
413  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
414  fprintf(F, "\t\t\t</node>\n");
415  }
416  fprintf(F, "\t\t</nodes>\n");
417  //plot edges
418  int EID = 0;
419  fprintf(F, "\t\t<edges>\n");
420  for (TNGraph::TNodeI NI = SG->BegNI(); NI < SG->EndNI(); NI++) {
421  if (NI.GetOutDeg() == 0 && NI.GetInDeg() == 0 ) { continue; }
422  for (int e = 0; e < NI.GetOutDeg(); e++) {
423  fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e));
424  }
425  }
426  fprintf(F, "\t\t</edges>\n");
427  fprintf(F, "\t</graph>\n");
428  fprintf(F, "</gexf>\n");
429  fclose(F);
430  }
431 
432 };
433 
434 #endif
PNGraph GetGraphRawNID()
bool DelIfIn(const TVal &Val)
Removes the first occurrence of element Val.
Definition: ds.h:1212
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:544
double LikelihoodForNode(const bool IsRow, const int UID)
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TFlt NegWgt
Definition: agmdirected.h:23
void DelComOut(const int &NID, const int &CID)
Definition: agmdirected.h:142
void Dump2ModeCommunities(const TStr &OutFNm, const double MaxJac, const TIntStrH &NIDNameH)
Definition: agmdirected.h:317
double GetRegCoef()
Definition: agmdirected.h:34
Definition: dt.h:11
Definition: ds.h:130
void DumpMemberships(const TStr &OutFNm)
Definition: agmdirected.h:49
int Val
Definition: dt.h:1136
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:477
TFlt MinVal
Definition: agmdirected.h:21
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:548
void SetGraph(const PNGraph &GraphPt)
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
void GetAllCmtyVV(TVec< TIntV > &CmtyVV, const int MinSz)
Definition: agmdirected.h:221
double Likelihood(const bool DoParallel=false)
void NeighborComInit(const int InitComs)
Definition: agmdirected.cpp:83
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:213
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void DelComIn(const int &NID, const int &CID)
Definition: agmdirected.h:148
static int Intersection(const TIntV &C1, const TIntV &C2)
Definition: agm.cpp:399
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
double GetComOut(const int &NID, const int &CID)
Definition: agmdirected.h:100
void SetCmtyVV(const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:499
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
void GetCmtyS(TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
TVal1 Val1
Definition: ds.h:132
int GetNumComs()
Definition: agmdirected.h:260
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
TIter EndI() const
Definition: hash.h:218
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
void SetRegCoef(const double _RegCoef)
Definition: agmdirected.h:33
TBool DoParallel
Definition: agmdirected.h:25
TFlt RegCoef
Definition: agmdirected.h:14
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
Definition: dt.h:1383
Definition: fl.h:58
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmdirected.h:74
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
double GetFrac2Mode(const double Thres2Mode=0.2, const int MinSzEach=2)
Definition: agmdirected.h:234
void DelNode(const int &NId)
Deletes node of ID NId from the graph.
Definition: graph.cpp:294
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TIntV NIDV
Definition: agmdirected.h:13
double Prediction(const int &UID, const int &VID)
Definition: agmdirected.h:179
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
TVal2 Val2
Definition: ds.h:133
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
PNGraph GetGraph()
Definition: agmdirected.h:31
int GetNumComs()
Definition: agmdirected.h:36
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
double LikelihoodHoldOut(const bool DoParallel=false)
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
TFltV SumFV
Definition: agmdirected.h:15
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
TCoda(const PNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
Definition: agmdirected.h:27
void Summary(const int TopK=10, const double Thres2Mode=0.2)
Definition: agmdirected.h:246
void Save(TSOut &SOut)
Definition: agmdirected.cpp:8
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TVec< TIntFltH > F
Definition: agmdirected.h:10
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:542
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmdirected.cpp:25
TCodaAnalyzer(TCoda &Coda, const double MemThres=-1.0)
Definition: agmdirected.h:205
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
double GetComIn(const int &NID, const int &CID)
Definition: agmdirected.h:107
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:398
Definition: fl.h:128
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
Definition: dt.h:1134
void DelCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:135
Definition: hash.h:781
void GetNonEdgePairScores(TFltIntIntTrV &ScoreV)
void RandomInit(const int InitComs)
Definition: agmdirected.cpp:43
PNGraph Net2ModeCommunities(const double MaxJac, const double JacEdge, const bool GetWcc=true)
Definition: agmdirected.h:273
TCoda()
Definition: agmdirected.h:29
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
void SetHoldOut(const double HOFrac)
Definition: agmdirected.h:54
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:546
double DotProductUtoV(const int &UID, const int &VID)
Definition: agmdirected.h:171
void Union(const TVec< TVal, TSizeTy > &ValV)
Sets this vector to its union with ValV. Assumes the vectors are sorted!
Definition: ds.h:1418
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:402
Definition: dt.h:412
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:379
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:114
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
Definition: agmdirected.h:50
void GetCmtyVAll(TIntV &CmtyVAll, const int CID)
save bipartite community affiliation into gexf file
Definition: agmdirected.h:263
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:400
TRnd Rnd
Definition: agmdirected.h:12
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TFlt MaxVal
Definition: agmdirected.h:22
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Definition: dt.h:971
double Sum(const TIntFltH &UV)
Definition: agmdirected.h:182
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int Len() const
Definition: hash.h:228
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:412
TVal3 Val3
Definition: ds.h:134
void Draw2ModeCommunity(const int CID, const TStr &OutFNm, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH)
Definition: agmdirected.h:355
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430