SNAP Library 2.4, User Reference  2015-05-11 19:40:56
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
mag.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "mag.h"
3 
7 
8 
10 // MAG affinity matrix
11 
12 const double TMAGAffMtx::NInf = -DBL_MAX;
13 
14 TMAGAffMtx::TMAGAffMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) {
15  MtxDim = (int) sqrt((double)SeedMatrix.Len());
17 }
18 
20  if (this != &Kronecker){
21  MtxDim=Kronecker.MtxDim;
22  SeedMtx=Kronecker.SeedMtx;
23  }
24  return *this;
25 }
26 
27 bool TMAGAffMtx::IsProbMtx() const {
28  for (int i = 0; i < Len(); i++) {
29  if (At(i) < 0.0 || At(i) > 1.0) return false;
30  }
31  return true;
32 }
33 
34 void TMAGAffMtx::SetRndMtx(TRnd& Rnd, const int& PrmMtxDim, const double& MinProb) {
35  MtxDim = PrmMtxDim;
37  for (int p = 0; p < SeedMtx.Len(); p++) {
38  do {
39  SeedMtx[p] = Rnd.GetUniDev();
40  } while (SeedMtx[p] < MinProb);
41  }
42 }
43 
44 void TMAGAffMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) {
45  for (int i = 0; i < Len(); i++) {
46  double& Val = At(i);
47  if (Val == Eps1Val) Val = double(Eps1);
48  else if (Val == Eps0Val) Val = double(Eps0);
49  }
50 }
51 
52 void TMAGAffMtx::AddRndNoise(TRnd& Rnd, const double& SDev) {
53  Dump("before");
54  double NewVal;
55  int c =0;
56  for (int i = 0; i < Len(); i++) {
57  for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { }
58  if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); }
59  }
60  Dump("after");
61 }
62 
64  TChA ChA("[");
65  for (int i = 0; i < Len(); i++) {
66  ChA += TStr::Fmt("%g", At(i));
67  if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; }
68  else if (i+1<Len()) { ChA += " "; }
69  }
70  ChA += "]";
71  return TStr(ChA);
72 }
73 
75  LLMtx.GenMtx(MtxDim);
76  for (int i = 0; i < Len(); i++) {
77  if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); }
78  else { LLMtx.At(i) = NInf; }
79  }
80 }
81 
83  ProbMtx.GenMtx(MtxDim);
84  for (int i = 0; i < Len(); i++) {
85  if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); }
86  else { ProbMtx.At(i) = 0.0; }
87  }
88 }
89 
91  ::Swap(MtxDim, Mtx.MtxDim);
92  SeedMtx.Swap(Mtx.SeedMtx);
93 }
94 
95 double TMAGAffMtx::GetMtxSum() const {
96  double Sum = 0;
97  for (int i = 0; i < Len(); i++) {
98  Sum += At(i); }
99  return Sum;
100 }
101 
102 double TMAGAffMtx::GetRowSum(const int& RowId) const {
103  double Sum = 0;
104  for (int c = 0; c < GetDim(); c++) {
105  Sum += At(RowId, c); }
106  return Sum;
107 }
108 
109 double TMAGAffMtx::GetColSum(const int& ColId) const {
110  double Sum = 0;
111  for (int r = 0; r < GetDim(); r++) {
112  Sum += At(r, ColId); }
113  return Sum;
114 }
115 
117  double Sum = GetMtxSum();
118  if(Sum == 0) {
119  return 0;
120  }
121 
122  for(int i = 0; i < Len(); i++) {
123  At(i) = At(i) / Sum;
124  }
125  return Sum;
126 }
127 
128 void TMAGAffMtx::Dump(const TStr& MtxNm, const bool& Sort) const {
129  /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim());
130  for (int r = 0; r < GetDim(); r++) {
131  for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); }
132  printf("\n");
133  }*/
134  if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr());
135  double Sum=0.0;
136  TFltV ValV = SeedMtx;
137  if (Sort) { ValV.Sort(false); }
138  for (int i = 0; i < ValV.Len(); i++) {
139  printf(" %10.4g", ValV[i]());
140  Sum += ValV[i];
141  if ((i+1) % GetDim() == 0) { printf("\n"); }
142  }
143  printf(" (sum:%.4f)\n", Sum);
144 }
145 
146 // average difference in the parameters
147 double TMAGAffMtx::GetAvgAbsErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) {
148  TFltV P1 = Mtx1.GetMtx();
149  TFltV P2 = Mtx2.GetMtx();
150  IAssert(P1.Len() == P2.Len());
151  P1.Sort(); P2.Sort();
152  double delta = 0.0;
153  for (int i = 0; i < P1.Len(); i++) {
154  delta += fabs(P1[i] - P2[i]);
155  }
156  return delta/P1.Len();
157 }
158 
159 // average L2 difference in the parameters
160 double TMAGAffMtx::GetAvgFroErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) {
161  TFltV P1 = Mtx1.GetMtx();
162  TFltV P2 = Mtx2.GetMtx();
163  IAssert(P1.Len() == P2.Len());
164  P1.Sort(); P2.Sort();
165  double delta = 0.0;
166  for (int i = 0; i < P1.Len(); i++) {
167  delta += pow(P1[i] - P2[i], 2);
168  }
169  return sqrt(delta/P1.Len());
170 }
171 
172 // get matrix from matlab matrix notation
174  TStrV RowStrV, ColStrV;
175  MatlabMtxStr.ChangeChAll(',', ' ');
176  MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty());
177  RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty());
178  const int Rows = RowStrV.Len();
179  const int Cols = ColStrV.Len();
180  IAssert(Rows == Cols);
181  TMAGAffMtx Mtx(Rows);
182  for (int r = 0; r < Rows; r++) {
183  RowStrV[r].SplitOnWs(ColStrV);
184  IAssert(ColStrV.Len() == Cols);
185  for (int c = 0; c < Cols; c++) {
186  Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); }
187  }
188  return Mtx;
189 }
190 
191 TMAGAffMtx TMAGAffMtx::GetRndMtx(TRnd& Rnd, const int& Dim, const double& MinProb) {
192  TMAGAffMtx Mtx;
193  Mtx.SetRndMtx(Rnd, Dim, MinProb);
194  return Mtx;
195 }
196 
197 
199 // Simple MAG node attributes (Same Bernoulli for every attribute)
200 
201 void TMAGNodeSimple::AttrGen(TIntVV& AttrVV, const int& NNodes) {
202  IAssert(Dim > 0);
203  AttrVV.Gen(NNodes, Dim);
204  AttrVV.PutAll(0);
205 
206  for(int i = 0; i < NNodes; i++) {
207  for(int l = 0; l < Dim; l++) {
208  if((TMAGNodeSimple::Rnd).GetUniDev() > Mu) {
209  AttrVV.At(i, l) = 1;
210  }
211  }
212  }
213 }
214 
215 void TMAGNodeSimple::LoadTxt(const TStr& InFNm) {
216  FILE *fp = fopen(InFNm.CStr(), "r");
217  IAssertR(fp != NULL, "File does not exist: " + InFNm);
218 
219  char buf[128];
220  char *token;
221  TStr TokenStr;
222  TFlt Val;
223 
224  token = strtok(buf, "&");
225  token = strtok(token, " \t");
226  TokenStr = TStr(token);
227  Mu = TokenStr.GetFlt(Val);
228 
229  fclose(fp);
230 }
231 
232 void TMAGNodeSimple::SaveTxt(TStrV& OutStrV) const {
233  OutStrV.Gen(Dim, 0);
234 
235  for(int i = 0; i < Dim; i++) {
236  OutStrV.Add(TStr::Fmt("%f", double(Mu)));
237  }
238 }
239 
240 
242 // MAG node attributes with (different) Bernoulli
243 
245  MuV = Dist.MuV;
246  Dim = Dist.Dim;
247  return (*this);
248 }
249 
250 void TMAGNodeBern::AttrGen(TIntVV& AttrVV, const int& NNodes) {
251  IAssert(Dim > 0);
252  AttrVV.Gen(NNodes, Dim);
253  AttrVV.PutAll(0);
254 
255  for(int i = 0; i < NNodes; i++) {
256  for(int l = 0; l < Dim; l++) {
257  if((TMAGNodeBern::Rnd).GetUniDev() > MuV[l]) {
258  AttrVV.At(i, l) = 1;
259  }
260  }
261  }
262 }
263 
264 void TMAGNodeBern::LoadTxt(const TStr& InFNm) {
265  FILE *fp = fopen(InFNm.CStr(), "r");
266  IAssertR(fp != NULL, "File does not exist: " + InFNm);
267 
268  Dim = 0;
269  MuV.Gen(10, 0);
270 
271  char buf[128];
272  char *token;
273  TStr TokenStr;
274  TFlt Val;
275 
276  while(fgets(buf, sizeof(buf), fp) != NULL) {
277  token = strtok(buf, "&");
278  token = strtok(token, " \t");
279  TokenStr = TStr(token);
280  MuV.Add(TokenStr.GetFlt(Val));
281  }
282  Dim = MuV.Len();
283 
284  fclose(fp);
285 }
286 
287 void TMAGNodeBern::SaveTxt(TStrV& OutStrV) const {
288  OutStrV.Gen(Dim, 0);
289 
290  for(int i = 0; i < Dim; i++) {
291  OutStrV.Add(TStr::Fmt("%f", double(MuV[i])));
292  }
293 }
294 
295 
297 // MAG node attributes with Beta + Bernoulli family
298 
300  AlphaV = Dist.AlphaV;
301  BetaV = Dist.BetaV;
302  Dim = Dist.Dim;
303  MuV = Dist.MuV;
304  Dirty = Dist.Dirty;
305  return (*this);
306 }
307 
308 void TMAGNodeBeta::SetBeta(const int& Attr, const double& Alpha, const double& Beta) {
309  IAssert(Attr < Dim);
310  AlphaV[Attr] = Alpha;
311  BetaV[Attr] = Beta;
312  Dirty = true;
313 }
314 
315 void TMAGNodeBeta::SetBetaV(const TFltV& _AlphaV, const TFltV& _BetaV) {
316  IAssert(_AlphaV.Len() == _BetaV.Len());
317  AlphaV = _AlphaV;
318  BetaV = _BetaV;
319  Dim = _AlphaV.Len();
320  Dirty = true;
321 }
322 
323 void TMAGNodeBeta::AttrGen(TIntVV& AttrVV, const int& NNodes) {
324  IAssert(Dim > 0);
325  AttrVV.Gen(NNodes, Dim);
326  AttrVV.PutAll(0);
327 
328 // printf("AlphaV = %d, BetaV = %d, MuV = %d\n", AlphaV.Len(), BetaV.Len(), MuV.Len());
329 
330  for(int i = 0; i < NNodes; i++) {
331  for(int l = 0; l < Dim; l++) {
332  double x = TMAGNodeBeta::Rnd.GetGammaDev((int)AlphaV[l]);
333  double y = TMAGNodeBeta::Rnd.GetGammaDev((int)BetaV[l]);
334  MuV[l] = x / (x + y);
335  if((TMAGNodeBeta::Rnd).GetUniDev() > MuV[l]) {
336  AttrVV.At(i, l) = 1;
337  }
338  }
339  }
340  Dirty = false;
341 }
342 
343 void TMAGNodeBeta::LoadTxt(const TStr& InFNm) {
344  FILE *fp = fopen(InFNm.CStr(), "r");
345  IAssertR(fp != NULL, "File does not exist: " + InFNm);
346 
347  Dim = 0;
348  AlphaV.Gen(10, 0);
349  BetaV.Gen(10, 0);
350 
351  char buf[128];
352  char *token;
353  TStr TokenStr;
354  TFlt Val;
355 
356  while(fgets(buf, sizeof(buf), fp) != NULL) {
357  token = strtok(buf, "&");
358 
359  token = strtok(token, " \t");
360  TokenStr = TStr(token);
361  AlphaV.Add(TokenStr.GetFlt(Val));
362 
363  token = strtok(NULL, " \t");
364  TokenStr = TStr(token);
365  BetaV.Add(TokenStr.GetFlt(Val));
366 
367  Dim++;
368  }
369 
370  fclose(fp);
371 }
372 
373 void TMAGNodeBeta::SaveTxt(TStrV& OutStrV) const {
374  OutStrV.Gen(Dim, 0);
375 
376  for(int i = 0; i < Dim; i++) {
377  OutStrV.Add(TStr::Fmt("%f %f", double(AlphaV[i]), double(BetaV[i])));
378  }
379 }
380 
381 
383 // MAGFit with Bernoulli node attributes
384 
385 void TMAGFitBern::SetGraph(const PNGraph& GraphPt) {
386  Graph = GraphPt;
387  bool NodesOk = true;
388  // check that nodes IDs are {0,1,..,Nodes-1}
389  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
390  if (! Graph->IsNode(nid)) { NodesOk=false; break; } }
391  if (! NodesOk) {
392  TIntV NIdV; GraphPt->GetNIdV(NIdV);
393  Graph = TSnap::GetSubGraph(GraphPt, NIdV, true);
394  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
395  IAssert(Graph->IsNode(nid)); }
396  }
397 }
398 
399 void TMAGFitBern::SetPhiVV(const TIntVV& AttrVV, const int KnownIds) {
400  const int NNodes = Param.GetNodes();
401  const int NAttrs = Param.GetAttrs();
402 
403  PhiVV.Gen(NNodes, NAttrs);
404  KnownVV.Gen(NNodes, NAttrs);
405 
406  for(int l = 0; l < NAttrs; l++) {
407  for(int i = 0; i < NNodes; i++) {
408  if(int(AttrVV(i, l)) == 0) {
409  PhiVV(i, l) = 0.9999;
410  } else {
411  PhiVV(i, l) = 0.0001;
412  }
413  }
414 
415  if(l < KnownIds) {
416  KnownVV.PutY(l, true);
417  } else {
418  KnownVV.PutY(l, false);
419  }
420  }
421 }
422 
423 void TMAGFitBern::SaveTxt(const TStr& FNm) {
424  const int NNodes = Param.GetNodes();
425  const int NAttrs = Param.GetAttrs();
426  const TFltV MuV = GetMuV();
427  TMAGAffMtxV MtxV;
428  Param.GetMtxV(MtxV);
429 
430  FILE *fp = fopen(FNm.GetCStr(), "w");
431  for(int l = 0; l < NAttrs; l++) {
432  fprintf(fp, "%.4f\t", double(MuV[l]));
433  for(int row = 0; row < 2; row++) {
434  for(int col = 0; col < 2; col++) {
435  fprintf(fp, " %.4f", double(MtxV[l].At(row, col)));
436  }
437  fprintf(fp, (row == 0) ? ";" : "\n");
438  }
439  }
440  fclose(fp);
441 
442  fp = fopen((FNm + "f").CStr(), "w");
443  for(int i = 0; i < NNodes; i++) {
444  for(int l = 0; l < NAttrs; l++) {
445  fprintf(fp, "%f ", double(PhiVV(i, l)));
446  }
447  fprintf(fp, "\n");
448  }
449  fclose(fp);
450 }
451 
452 void TMAGFitBern::Init(const TFltV& MuV, const TMAGAffMtxV& AffMtxV) {
453  TMAGNodeBern DistParam(MuV);
454  Param.SetNodeAttr(DistParam);
455  Param.SetMtxV(AffMtxV);
456 
457  const int NNodes = Param.GetNodes();
458  const int NAttrs = Param.GetAttrs();
459 
460  PhiVV.Gen(NNodes, NAttrs);
461  KnownVV.Gen(NNodes, NAttrs);
462  KnownVV.PutAll(false);
463 }
464 
465 #if 0
466 void TMAGFitBern::PerturbInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const double& PerturbRate) {
467  IAssert(PerturbRate < 1.0);
468 
469  TFltV InitMuV = MuV; //InitMuV.PutAll(0.5);
470  TMAGNodeBern DistParam(InitMuV);
471  Param.SetMtxV(AffMtxV);
472  TRnd& Rnd = TMAGNodeBern::Rnd;
473  TMAGAffMtxV PerturbMtxV = AffMtxV;
474 
475  const int NNodes = Param.GetNodes();
476  const int NAttrs = Param.GetAttrs();
477 
478  for(int l = 0; l < NAttrs; l++) {
479  double Mu = MuV[l] + PerturbRate * (Rnd.GetUniDev() - 0.5) * 2;
480 // double Mu = 0.5;
481  if(Mu < 0.01) { Mu = 0.01; }
482  if(Mu > 0.99) { Mu = 0.99; }
483  DistParam.SetMu(l, Mu);
484 // PhiVV.PutY(l, MuV[l] + Perturb);
485  TMAGAffMtx AffMtx(AffMtxV[l]);
486  for(int p = 0; p < 4; p++) {
487  AffMtx.At(p) += PerturbRate * (Rnd.GetUniDev() - 0.5) * 2;
488  if(AffMtx.At(p) < 0.05) { AffMtx.At(p) = 0.05; }
489  if(AffMtx.At(p) > 0.95) { AffMtx.At(p) = 0.95; }
490  }
491  AffMtx.At(0, 1) = AffMtx.At(1, 0);
492  PerturbMtxV[l] = AffMtx;
493  }
494 // NormalizeAffMtxV(PerturbMtxV);
495 
496  printf("\n");
497  for(int l = 0; l < NAttrs; l++) {
498  printf("Mu = %.3f ", DistParam.GetMu(l));
499  printf("AffMtx = %s\n", PerturbMtxV[l].GetMtxStr().GetCStr());
500  }
501  Param.SetMtxV(PerturbMtxV);
502  Param.SetNodeAttr(DistParam);
503 
504  PhiVV.Gen(NNodes, NAttrs);
505  KnownVV.Gen(NNodes, NAttrs);
506  KnownVV.PutAll(false);
507 }
508 #endif // 0
509 
510 void TMAGFitBern::RandomInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const int& Seed) {
511  TRnd& Rnd = TMAGNodeBern::Rnd;
512  Rnd.PutSeed(Seed);
513 
514  TFltV InitMuV = MuV; InitMuV.PutAll(0.5);
515  TMAGNodeBern DistParam(InitMuV);
516  Param.SetMtxV(AffMtxV);
517 
518  const int NNodes = Param.GetNodes();
519  const int NAttrs = Param.GetAttrs();
520 
521  PhiVV.Gen(NNodes, NAttrs);
522  KnownVV.Gen(NNodes, NAttrs);
523  KnownVV.PutAll(false);
524 
525  for(int i = 0; i < NNodes; i++) {
526  for(int l = 0; l < NAttrs; l++) {
527  PhiVV.At(i, l) = Rnd.GetUniDev();
528 // PhiVV.At(i, l) = 0.5;
529  }
530  }
531 
532  TMAGAffMtxV RndMtxV = AffMtxV;
533  for(int l = 0; l < NAttrs; l++) {
534  for(int p = 0; p < 4; p++) {
535  RndMtxV[l].At(p) = TMAGNodeBern::Rnd.GetUniDev();
536  if(RndMtxV[l].At(p) < 0.1) { RndMtxV[l].At(p) = 0.1; }
537  if(RndMtxV[l].At(p) > 0.9) { RndMtxV[l].At(p) = 0.9; }
538  }
539  RndMtxV[l].At(0, 1) = RndMtxV[l].At(1, 0);
540  }
541 
542  printf("\n");
543  for(int l = 0; l < NAttrs; l++) {
544  printf("AffMtx = %s\n", RndMtxV[l].GetMtxStr().GetCStr());
545  }
546  Param.SetMtxV(RndMtxV);
547  Param.SetNodeAttr(DistParam);
548 }
549 
550 const double TMAGFitBern::GetInCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const {
551  return (PhiVV.At(i, l) * Theta.At(0, A) + (1.0 - PhiVV.At(i, l)) * Theta.At(1, A));
552 }
553 
554 const double TMAGFitBern::GetOutCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const {
555  return (PhiVV.At(j, l) * Theta.At(A, 0) + (1.0 - PhiVV.At(j, l)) * Theta.At(A, 1));
556 }
557 
558 const double TMAGFitBern::GetAvgInCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const {
559  const int NNodes = Param.GetNodes();
560  const double Mu_l = AvgPhiV[AId] / double(NNodes);
561  return (Mu_l * Theta.At(0, A) + (1.0 - Mu_l) * Theta.At(1, A));
562 }
563 
564 const double TMAGFitBern::GetAvgOutCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const {
565  const int NNodes = Param.GetNodes();
566  const double Mu_l = AvgPhiV[AId] / double(NNodes);
567  return (Mu_l * Theta.At(A, 0) + (1.0 - Mu_l) * Theta.At(A, 1));
568 }
569 
570 const double TMAGFitBern::GetProbPhi(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2) const {
571  double Prob1 = (Attr1 == 0) ? double(PhiVV.At(NId1, AId)) : (1.0 - PhiVV.At(NId1, AId));
572  double Prob2 = (Attr2 == 0) ? double(PhiVV.At(NId2, AId)) : (1.0 - PhiVV.At(NId2, AId));
573  return (Prob1 * Prob2);
574 }
575 
576 const double TMAGFitBern::GetProbMu(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2, const bool Left, const bool Right) const {
577  TMAGNodeBern DistParam = Param.GetNodeAttr();
578 // double Mu = DistParam.GetMu(AId);
579  double Mu = AvgPhiV[AId] / double(Param.GetNodes());
580  double Prob1 = (Left) ? double(PhiVV.At(NId1, AId)) : double(Mu);
581  double Prob2 = (Right)? double(PhiVV.At(NId2, AId)) : double(Mu);
582  Prob1 = (Attr1 == 0) ? Prob1 : 1.0 - Prob1;
583  Prob2 = (Attr2 == 0) ? Prob2 : 1.0 - Prob2;
584  return (Prob1 * Prob2);
585 }
586 
587 const double TMAGFitBern::GetThetaLL(const int& NId1, const int& NId2, const int& AId) const {
588  double LL = 0.0;
589  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
590  for(int A1 = 0; A1 < 2; A1++) {
591  for(int A2 = 0; A2 < 2; A2++) {
592  LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2);
593  }
594  }
595  return log(LL);
596 }
597 
598 const double TMAGFitBern::GetAvgThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const {
599  double LL = 0.0;
600  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
601  for(int A1 = 0; A1 < 2; A1++) {
602  for(int A2 = 0; A2 < 2; A2++) {
603  LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2);
604  }
605  }
606  return log(LL);
607 }
608 
609 const double TMAGFitBern::GetSqThetaLL(const int& NId1, const int& NId2, const int& AId) const {
610  double LL = 0.0;
611  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
612  for(int A1 = 0; A1 < 2; A1++) {
613  for(int A2 = 0; A2 < 2; A2++) {
614  LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2) * Mtx.At(A1, A2);
615  }
616  }
617  return log(LL);
618 }
619 
620 const double TMAGFitBern::GetAvgSqThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const {
621  double LL = 0.0;
622  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
623  for(int A1 = 0; A1 < 2; A1++) {
624  for(int A2 = 0; A2 < 2; A2++) {
625  LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2) * Mtx.At(A1, A2);
626  }
627  }
628  return log(LL);
629 }
630 
631 const double TMAGFitBern::GetProdLinWeight(const int& NId1, const int& NId2) const {
632  const int NAttrs = Param.GetAttrs();
633  double LL = 0.0;
634 
635  for(int l = 0; l < NAttrs; l++) {
636  LL += GetThetaLL(NId1, NId2, l);
637  }
638 // return LL;
639  return LL + log(NormConst);
640 }
641 
642 const double TMAGFitBern::GetAvgProdLinWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const {
643  const int NAttrs = Param.GetAttrs();
644  double LL = 0.0;
645 
646  for(int l = 0; l < NAttrs; l++) {
647  LL += GetAvgThetaLL(NId1, NId2, l, Left, Right);
648  }
649 // return LL;
650  return LL + log(NormConst);
651 }
652 
653 const double TMAGFitBern::GetProdSqWeight(const int& NId1, const int& NId2) const {
654  const int NAttrs = Param.GetAttrs();
655  double LL = 0.0;
656 
657  for(int l = 0; l < NAttrs; l++) {
658  LL += GetSqThetaLL(NId1, NId2, l);
659  }
660 // return LL;
661  return LL + 2 * log(NormConst);
662 }
663 
664 const double TMAGFitBern::GetAvgProdSqWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const {
665  const int NAttrs = Param.GetAttrs();
666  double LL = 0.0;
667 
668  for(int l = 0; l < NAttrs; l++) {
669  LL += GetAvgSqThetaLL(NId1, NId2, l, Left, Right);
670  }
671 // return LL;
672  return LL + 2 * log(NormConst);
673 }
674 
675 const double LogSumExp(const double LogVal1, const double LogVal2) {
676  double MaxExp = (LogVal1 > LogVal2) ? LogVal1 : LogVal2;
677  double Sum = exp(LogVal1 - MaxExp) + exp(LogVal2 - MaxExp);
678  return (log(Sum) + MaxExp);
679 }
680 
681 const double LogSumExp(const TFltV& LogValV) {
682  const int Len = LogValV.Len();
683  double MaxExp = -DBL_MAX;
684 
685  for(int i = 0; i < Len; i++) {
686  if(MaxExp < LogValV[i]) { MaxExp = LogValV[i]; }
687  }
688 
689  double Sum = 0.0;
690  for(int i = 0; i < Len; i++) {
691  Sum += exp(LogValV[i] - MaxExp);
692  }
693 
694  return (log(Sum) + MaxExp);
695 }
696 
697 const double LogSumExp(const double *LogValArray, const int Len) {
698  TFltV TmpV(Len);
699  for(int i = 0; i < Len; i++) { TmpV[i] = LogValArray[i]; }
700  return LogSumExp(TmpV);
701 }
702 
703 const double TMAGFitBern::GradPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& DeltaQ, const TFltVV& CntVV) {
704  const int NAttrs = CntVV.GetYDim();
705  double Grad = DeltaQ - log(x) + log(1.0-x);
706 
707  for(int l = 0; l < NAttrs; l++) {
708  if(l == AId) { continue; }
709  const double C0 = PhiVV(NId, l);
710  const double C1 = 1.0 - C0;
711  Grad -= Lambda * C0 * log(CntVV(0, l) + C0 * x);
712  Grad -= Lambda * C1 * log(CntVV(1, l) + C1 * x);
713  Grad += Lambda * C0 * log(CntVV(2, l) + C0 * (1-x));
714  Grad += Lambda * C1 * log(CntVV(3, l) + C1 * (1-x));
715  Grad -= Lambda * log(CntVV(0, l) + CntVV(1, l) + x);
716  Grad += Lambda * log(CntVV(2, l) + CntVV(3, l) + (1-x));
717  }
718 
719  return Grad;
720 }
721 
722 const double TMAGFitBern::ObjPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& Q0, const double& Q1, const TFltVV& CntVV) {
723  const int NAttrs = CntVV.GetYDim();
724  double Val = x*(Q0 - log(x)) + (1-x)*(Q1 - log(1.0-x));
725 
726  for(int l = 0; l < NAttrs; l++) {
727  if(l == AId) { continue; }
728  const double C0 = PhiVV(NId, l);
729  const double C1 = 1.0 - C0;
730  Val -= Lambda * (CntVV(0, l) + C0 * x) * log(CntVV(0, l) + C0 * x);
731  Val -= Lambda * (CntVV(1, l) + C1 * x) * log(CntVV(1, l) + C1 * x);
732  Val -= Lambda * (CntVV(2, l) + C0 * (1-x)) * log(CntVV(2, l) + C0 * (1-x));
733  Val -= Lambda * (CntVV(3, l) + C1 * (1-x)) * log(CntVV(3, l) + C1 * (1-x));
734  Val += Lambda * (CntVV(0, l) + CntVV(1, l) + x) * log(CntVV(0, l) + CntVV(1, l) + x);
735  Val += Lambda * (CntVV(2, l) + CntVV(3, l) + 1 - x) * log(CntVV(2, l) + CntVV(3, l) + (1-x));
736 
737  if(!(CntVV(0, l) > 0)) printf("CntVV(0, %d) = %.2f\n", l, double(CntVV(0, l)));
738  if(!(CntVV(1, l) > 0)) printf("CntVV(1, %d) = %.2f\n", l, double(CntVV(1, l)));
739  if(!(CntVV(2, l) > 0)) printf("CntVV(2, %d) = %.2f\n", l, double(CntVV(2, l)));
740  if(!(CntVV(3, l) > 0)) printf("CntVV(3, %d) = %.2f\n", l, double(CntVV(3, l)));
741  }
742 
743  return Val;
744 }
745 
746 const double TMAGFitBern::GetEstNoEdgeLL(const int& NId, const int& AId) const {
747  // const int NNodes = Param.GetNodes();
748  // const int NAttrs = Param.GetAttrs();
749 
750  TMAGNodeBern DistParam = Param.GetNodeAttr();
751  double LL = 0.0;
752 
753  return LL;
754 }
755 
756 const double TMAGFitBern::UpdatePhi(const int& NId, const int& AId, double& Phi) {
757  TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId);
758  TMAGAffMtx SqTheta(Theta);
759  const int NNodes = Param.GetNodes();
760  // const int NAttrs = Param.GetAttrs();
761  Theta.GetLLMtx(LLTheta);
762  TMAGNodeBern DistParam = Param.GetNodeAttr();
763  const double Mu = DistParam.GetMu(AId);
764 
765  for(int i = 0; i < Theta.Len(); i++) {
766  SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i);
767  }
768 
769  // Using log-sum-exp trick
770  double EdgeQ[2], NonEdgeQ[2], MaxExp[2];
771  TFltV NonEdgeLLV[2];
772  for(int i = 0; i < 2; i++) {
773  EdgeQ[i] = 0.0;
774  NonEdgeQ[i] = 0.0;
775  MaxExp[i] = -DBL_MAX;
776  NonEdgeLLV[i].Gen(4 * NNodes, 0);
777  }
778 
779  for(int j = 0; j < NNodes; j++) {
780  if(j == NId) { continue; }
781 
782  if(Graph->IsEdge(NId, j)) {
783  EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta);
784  EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta);
785  } else {
786  double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId);
787  double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId);
788 
789  for(int i = 0; i < 2; i++) {
790  NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta)));
791  NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5));
792  }
793  }
794 
795  if(Graph->IsEdge(j, NId)) {
796  EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta);
797  EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta);
798  } else {
799  double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId);
800  double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId);
801 
802  for(int i = 0; i < 2; i++) {
803  NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta)));
804  NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5));
805  }
806  }
807  }
808 
809  NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]);
810  NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]);
811 
812  double Q[2];
813  Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]);
814  Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]);
815 // double Q = Q1 - Q0;
816 // printf(" [Phi_{%d}{%d}] :: Q0 = %f, Q1 = %f\n", NId, AId, Q0, Q1);
817 
818 // Phi = 1.0 / (1.0 + exp(Q));
819  Phi = Q[0] - LogSumExp(Q, 2);
820  Phi = exp(Phi);
821 
822  return Phi - PhiVV.At(NId, AId);
823 }
824 
825 
826 const double TMAGFitBern::UpdatePhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi) {
827  TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId);
828  TMAGAffMtx SqTheta(Theta);
829  const int NNodes = Param.GetNodes();
830  const int NAttrs = Param.GetAttrs();
831  Theta.GetLLMtx(LLTheta);
832  TMAGNodeBern DistParam = Param.GetNodeAttr();
833  const double Mu = DistParam.GetMu(AId);
834 
835  for(int i = 0; i < Theta.Len(); i++) {
836  SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i);
837  }
838 
839  // Using log-sum-exp trick
840  double EdgeQ[2], NonEdgeQ[2], MaxExp[2];
841  TFltV NonEdgeLLV[2];
842  TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0);
843  for(int i = 0; i < 2; i++) {
844  EdgeQ[i] = 0.0;
845  NonEdgeQ[i] = 0.0;
846  MaxExp[i] = -DBL_MAX;
847  NonEdgeLLV[i].Gen(4 * NNodes, 0);
848  }
849 
850  for(int j = 0; j < NNodes; j++) {
851  if(j == NId) { continue; }
852 
853  for(int l = 0; l < NAttrs; l++) {
854  if(l == AId) { continue; }
855  CntVV(0, l) = CntVV(0, l) + PhiVV(j, AId) * PhiVV(j, l);
856  CntVV(1, l) = CntVV(1, l) + PhiVV(j, AId) * (1.0-PhiVV(j, l));
857  CntVV(2, l) = CntVV(2, l) + (1.0-PhiVV(j, AId)) * PhiVV(j, l);
858  CntVV(3, l) = CntVV(3, l) + (1.0-PhiVV(j, AId)) * (1.0-PhiVV(j, l));
859  }
860 
861  if(Graph->IsEdge(NId, j)) {
862  EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta);
863  EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta);
864  } else {
865  double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId);
866  double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId);
867 
868  for(int i = 0; i < 2; i++) {
869  NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta)));
870  NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5));
871  }
872  }
873 
874  if(Graph->IsEdge(j, NId)) {
875  EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta);
876  EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta);
877  } else {
878  double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId);
879  double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId);
880 
881  for(int i = 0; i < 2; i++) {
882  NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta)));
883  NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5));
884  }
885  }
886  }
887 
888  NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]);
889  NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]);
890 
891  double Q[2];
892  Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]);
893  Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]);
894  double DeltaQ = Q[0] - Q[1];
895 
896 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9};
897  double x[] = {PhiVV(NId, AId)};
898 // for(int n = 0; n < 5; n++) {
899  for(int n = 0; n < 1; n++) {
900 // double LrnRate = 0.0002;
901  double LrnRate = 0.001;
902  for(int step = 0; step < 200; step++) {
903  double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV);
904  if(Grad > 0.0) { x[n] += LrnRate; }
905  else { x[n] -= LrnRate; }
906  if(x[n] > 0.9999) { x[n] = 0.9999; }
907  if(x[n] < 0.0001) { x[n] = 0.0001; }
908  LrnRate *= 0.995;
909  }
910  }
911 
912  double MaxVal = -DBL_MAX;
913  int MaxX = -1;
914 // for(int n = 0; n < 5; n++) {
915  for(int n = 0; n < 1; n++) {
916  double Val = ObjPhiMI(x[n], NId, AId, Lambda, Q[0], Q[1], CntVV);
917  if(Val > MaxVal) {
918  MaxVal = Val;
919  MaxX = n;
920  }
921  }
922  IAssert(MaxX >= 0);
923 
924  Phi = x[MaxX];
925 
926  return Phi - PhiVV.At(NId, AId);
927 }
928 
929 
930 const double TMAGFitBern::UpdateApxPhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi, TFltVV& ProdVV) {
931  TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId);
932  const int NNodes = Param.GetNodes();
933  const int NAttrs = Param.GetAttrs();
934  Theta.GetLLMtx(LLTheta);
935  TMAGNodeBern DistParam = Param.GetNodeAttr();
936  const double Mu = DistParam.GetMu(AId);
937 
938  TMAGAffMtx SqTheta(Theta);
939  for(int i = 0; i < Theta.Len(); i++) {
940  SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i);
941  }
942 
943  TFltV ProdV; ProdVV.GetRow(NId, ProdV);
944  ProdV[0] -= GetAvgThetaLL(NId, NId, AId, true, false);
945  ProdV[1] -= GetAvgThetaLL(NId, NId, AId, false, true);
946  ProdV[2] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, true, false);
947  ProdV[3] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, false, true);
948 
949  // Using log-sum-exp trick
950  double EdgeQ[2], MaxExp[2];
951  TFltV NonEdgeLLV[2];
952  TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0);
953  for(int i = 0; i < 2; i++) {
954  EdgeQ[i] = 0.0;
955  MaxExp[i] = -DBL_MAX;
956  NonEdgeLLV[i].Gen(4 * NNodes, 0);
957  }
958 
959  for(int F = 0; F < 2; F++) {
960  NonEdgeLLV[F].Add(ProdV[0] + log(GetAvgOutCoeff(NId, AId, F, Theta)));
961  NonEdgeLLV[F].Add(ProdV[1] + log(GetAvgInCoeff(NId, AId, F, Theta)));
962  NonEdgeLLV[F].Add(ProdV[2] + log(GetAvgOutCoeff(NId, AId, F, SqTheta)));
963  NonEdgeLLV[F].Add(ProdV[3] + log(GetAvgInCoeff(NId, AId, F, SqTheta)));
964  }
965  EdgeQ[0] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[0]));
966  EdgeQ[1] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[1]));
967 
968 
969  for(int l = 0; l < NAttrs; l++) {
970  if(l == AId) { continue; }
971  int BgId = (AId > l) ? AId : l;
972  int SmId = (AId + l) - BgId;
973  int SmL = (l < AId) ? 1 : 0;
974  BgId *= 4;
975  CntVV(0, l) = AvgPhiPairVV(SmId, BgId) - PhiVV(NId, AId) * PhiVV(NId, l);
976  CntVV(1+SmL, l) = AvgPhiPairVV(SmId, BgId+1+SmL) - PhiVV(NId, AId) * (1.0-PhiVV(NId, l));
977  CntVV(2-SmL, l) = AvgPhiPairVV(SmId, BgId+2-SmL) - (1.0-PhiVV(NId, AId)) * PhiVV(NId, l);
978  CntVV(3, l) = AvgPhiPairVV(SmId, BgId+3) - (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, l));
979  }
980 
981  TNGraph::TNodeI NI = Graph->GetNI(NId);
982  for(int d = 0; d < NI.GetOutDeg(); d++) {
983  int Out = NI.GetOutNId(d);
984  if(NId == Out) { continue; }
985  double LinW = GetProdLinWeight(NId, Out) - GetThetaLL(NId, Out, AId);
986  double SqW = GetProdSqWeight(NId, Out) - GetSqThetaLL(NId, Out, AId);
987 
988  for(int F = 0; F < 2; F++) {
989  EdgeQ[F] += GetOutCoeff(NId, Out, AId, F, LLTheta);
990  EdgeQ[F] += exp(LinW + log(GetOutCoeff(NId, Out, AId, F, Theta)));
991  EdgeQ[F] += 0.5 * exp(SqW + log(GetOutCoeff(NId, Out, AId, F, SqTheta)));
992  }
993  }
994  for(int d = 0; d < NI.GetInDeg(); d++) {
995  int In = NI.GetInNId(d);
996  if(NId == In) { continue; }
997  double LinW = GetProdLinWeight(In, NId) - GetThetaLL(In, NId, AId);
998  double SqW = GetProdSqWeight(In, NId) - GetSqThetaLL(In, NId, AId);
999 
1000  for(int F = 0; F < 2; F++) {
1001  EdgeQ[F] += GetInCoeff(In, NId, AId, F, LLTheta);
1002  EdgeQ[F] += exp(LinW + log(GetInCoeff(In, NId, AId, F, Theta)));
1003  EdgeQ[F] += 0.5 * exp(SqW + log(GetInCoeff(In, NId, AId, F, SqTheta)));
1004  }
1005  }
1006 
1007  EdgeQ[0] += log(Mu);
1008  EdgeQ[1] += log(1.0 - Mu);
1009  double DeltaQ = EdgeQ[0] - EdgeQ[1];
1010 // printf("(%d, %d) :: Q[0] = %f, Q[1] = %f\n", NId, AId, EdgeQ[0], EdgeQ[1]);
1011 
1012 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9};
1013  double x[] = {PhiVV(NId, AId)};
1014  TFltV ObjValV; ObjValV.Gen(60, 0);
1015 // for(int n = 0; n < 5; n++) {
1016  for(int n = 0; n < 1; n++) {
1017 // double LrnRate = 0.0002;
1018  double LrnRate = 0.001;
1019  for(int step = 0; step < 50; step++) {
1020 // for(int step = 0; step < 10; step++) {
1021  double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV);
1022  if(Grad > 0.0) { x[n] += LrnRate; }
1023  else { x[n] -= LrnRate; }
1024  if(x[n] > 0.9999) { x[n] = 0.9999; }
1025  if(x[n] < 0.0001) { x[n] = 0.0001; }
1026  if(x[n] == 0.9999 || x[n] == 0.0001) {
1027  break;
1028  }
1029  LrnRate *= 0.995;
1030  }
1031  ObjValV.Add(x[n]);
1032 // ObjValV.Add(PhiVV(NId, AId));
1033  }
1034 
1035  double MaxVal = -DBL_MAX;
1036  int MaxX = -1;
1037 // for(int n = 0; n < 5; n++) {
1038  for(int n = 0; n < ObjValV.Len(); n++) {
1039  double Val = ObjPhiMI(ObjValV[n], NId, AId, Lambda, EdgeQ[0], EdgeQ[1], CntVV);
1040  if(Val > MaxVal) {
1041  MaxVal = Val;
1042  MaxX = n;
1043  } else if(MaxX < 0) {
1044  printf("(%d, %d) : %f Q[0] = %f Q[1] = %f Val = %f\n", NId, AId, double(x[n]), double(EdgeQ[0]), double(EdgeQ[1]), Val);
1045  }
1046  }
1047  IAssert(MaxX >= 0);
1048 
1049  Phi = ObjValV[MaxX];
1050 
1051  return Phi - PhiVV.At(NId, AId);
1052 }
1053 
1054 
1055 
1056 double TMAGFitBern::DoEStepOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) {
1057  const int NNodes = Param.GetNodes();
1058  const int NAttrs = Param.GetAttrs();
1059  double MaxDelta = 0, L1 = 0;
1060  double Val;
1061  TFltIntIntTrV NewVal;
1062  int RndCount = 0;
1063  // double OldMI = 0.0, NewMI = 0.0;
1064  TFltV MuV(NAttrs); MuV.PutAll(0.0);
1065  TIntV NIndV(NNodes), AIndV(NAttrs);
1066 
1067  // Update Phi
1068  /*
1069  for(int i = 0; i < NNodes; i++) { NIndV[i] = i; }
1070  for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; }
1071  if(Randomized) {
1072  NIndV.Shuffle(TMAGNodeBern::Rnd);
1073  AIndV.Shuffle(TMAGNodeBern::Rnd);
1074  }
1075  */
1076 
1077  NewVal.Gen(NAttrs * 2);
1078  for(int i = 0; i < NNodes; i++) {
1079 // const int NId = NIndV[i]%NNodes;
1080  for(int l = 0; l < NAttrs * 2; l++) {
1081  const int NId = TMAGNodeBern::Rnd.GetUniDevInt(NNodes);
1082  const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs);
1083 // const int AId = AIndV[l]%NAttrs;
1084 // double Delta = UpdatePhi(NId, AId, Val);
1085  double Delta = 0.0;
1086  if(KnownVV(NId, AId)) {
1087  Val = PhiVV.At(NId, AId);
1088  } else {
1089  Delta = UpdatePhiMI(Lambda, NId, AId, Val);
1090  }
1091 
1092 // PhiVV.At(NId, AId) = Val;
1093  NewVal[l] = TFltIntIntTr(Val, NId, AId);
1094 
1095 // MuV[AId] = MuV[AId] + Val;
1096  if(fabs(Delta) > MaxDelta) {
1097  MaxDelta = fabs(Delta);
1098  }
1099  if(Val > 0.3 && Val < 0.7) { RndCount++; }
1100  }
1101 
1102  for(int l = 0; l < NAttrs * 2; l++) {
1103  const int NId = NewVal[l].Val2;
1104  const int AId = NewVal[l].Val3;
1105  PhiVV.At(NId, AId) = NewVal[l].Val1;
1106  }
1107  }
1108  for(int i = 0; i < NNodes; i++) {
1109  for(int l = 0; l < NAttrs; l++) {
1110  MuV[l] = MuV[l] + PhiVV.At(i, l);
1111  }
1112  }
1113  for(int l = 0; l < NAttrs; l++) {
1114  MuV[l] = MuV[l] / double(NNodes);
1115  }
1116 
1117  TFltV SortMuV = MuV;
1118  double Avg = 0.0;
1119  SortMuV.Sort(false);
1120  for(int l = 0; l < NAttrs; l++) {
1121  printf(" F[%d] = %.3f", l, double(MuV[l]));
1122  Avg += SortMuV[l];
1123  L1 += fabs(TrueMuV[l] - SortMuV[l]);
1124  }
1125  printf("\n");
1126  printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs));
1127  printf(" Avg = %.3f\n", Avg / double(NAttrs));
1128  L1 /= double(NAttrs);
1129 
1130  return L1;
1131 }
1132 
1133 
1134 double TMAGFitBern::DoEStepApxOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) {
1135  const int NNodes = Param.GetNodes();
1136  const int NAttrs = Param.GetAttrs();
1137  double MaxDelta = 0, L1 = 0;
1138  double Val;
1139  TFltIntIntTrV NewVal;
1140  int RndCount = 0;
1141  // double OldMI = 0.0, NewMI = 0.0;
1142  TFltV MuV(NAttrs); MuV.PutAll(0.0);
1143  TFltVV ProdVV(NNodes, 4); ProdVV.PutAll(0.0);
1144  TIntV NIndV(NNodes), AIndV(NAttrs);
1145 
1146  // Update Phi
1147  /*
1148  for(int i = 0; i < NNodes; i++) { NIndV[i] = i; }
1149  for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; }
1150  if(Randomized) {
1151  NIndV.Shuffle(TMAGNodeBern::Rnd);
1152  AIndV.Shuffle(TMAGNodeBern::Rnd);
1153  }
1154  */
1155 
1156  AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0);
1157  AvgPhiPairVV.Gen(NAttrs, 4*NAttrs); AvgPhiPairVV.PutAll(0.0);
1158  for(int i = 0; i < NNodes; i++) {
1159  for(int l = 0; l < NAttrs; l++) {
1160  for(int p = l+1; p < NAttrs; p++) {
1161  int index = 4 * p;
1162  AvgPhiPairVV(l, index) += PhiVV(i, l) * PhiVV(i, p);
1163  AvgPhiPairVV(l, index+1) += PhiVV(i, l) * (1.0-PhiVV(i, p));
1164  AvgPhiPairVV(l, index+2) += (1.0-PhiVV(i, l)) * PhiVV(i, p);
1165  AvgPhiPairVV(l, index+3) += (1.0-PhiVV(i, l)) * (1.0-PhiVV(i, p));
1166  }
1167  AvgPhiV[l] += PhiVV(i, l);
1168  }
1169  }
1170  for(int i = 0; i < NNodes; i++) {
1171  ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false);
1172  ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true);
1173  ProdVV(i, 2) = GetAvgProdSqWeight(i, i, true, false);
1174  ProdVV(i, 3) = GetAvgProdSqWeight(i, i, false, true);
1175  }
1176 
1177  const int Iter = 3;
1178 
1179  NewVal.Gen(NAttrs * Iter);
1180  for(int i = 0; i < NNodes * Iter; i++) {
1181  for(int l = 0; l < NAttrs; l++) {
1182  const int NId = TMAGNodeBern::Rnd.GetUniDevInt(NNodes);
1183  const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs);
1184  double Delta = 0.0;
1185  if(KnownVV(NId, AId)) {
1186  Val = PhiVV.At(NId, AId);
1187  } else {
1188  Delta = UpdateApxPhiMI(Lambda, NId, AId, Val, ProdVV);
1189  }
1190 
1191 // PhiVV.At(NId, AId) = Val;
1192  NewVal[l] = TFltIntIntTr(Val, NId, AId);
1193 
1194 // MuV[AId] = MuV[AId] + Val;
1195  if(fabs(Delta) > MaxDelta) {
1196  MaxDelta = fabs(Delta);
1197  }
1198  if(Val > 0.3 && Val < 0.7) { RndCount++; }
1199  }
1200 
1201  for(int l = 0; l < NAttrs; l++) {
1202  const int NId = NewVal[l].Val2;
1203  const int AId = NewVal[l].Val3;
1204 
1205  ProdVV(NId, 0) -= GetAvgThetaLL(NId, NId, AId, true, false);
1206  ProdVV(NId, 1) -= GetAvgThetaLL(NId, NId, AId, false, true);
1207  ProdVV(NId, 2) -= GetAvgSqThetaLL(NId, NId, AId, true, false);
1208  ProdVV(NId, 3) -= GetAvgSqThetaLL(NId, NId, AId, false, true);
1209  for(int p = 0; p < NAttrs; p++) {
1210  if(p > AId) {
1211  int index = 4 * p;
1212  AvgPhiPairVV(AId, index) -= PhiVV(NId, AId) * PhiVV(NId, p);
1213  AvgPhiPairVV(AId, index+1) -= PhiVV(NId, AId) * (1.0-PhiVV(NId, p));
1214  AvgPhiPairVV(AId, index+2) -= (1.0-PhiVV(NId, AId)) * PhiVV(NId, p);
1215  AvgPhiPairVV(AId, index+3) -= (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p));
1216  } else if (p < AId) {
1217  int index = 4 * AId;
1218  AvgPhiPairVV(p, index) -= PhiVV(NId, p) * PhiVV(NId, AId);
1219  AvgPhiPairVV(p, index+1) -= PhiVV(NId, p) * (1.0-PhiVV(NId, AId));
1220  AvgPhiPairVV(p, index+2) -= (1.0-PhiVV(NId, p)) * PhiVV(NId, AId);
1221  AvgPhiPairVV(p, index+3) -= (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId));
1222  }
1223  }
1224  AvgPhiV[AId] -= PhiVV(NId, AId);
1225 
1226  PhiVV.At(NId, AId) = NewVal[l].Val1;
1227 
1228  ProdVV(NId, 0) += GetAvgThetaLL(NId, NId, AId, true, false);
1229  ProdVV(NId, 1) += GetAvgThetaLL(NId, NId, AId, false, true);
1230  ProdVV(NId, 2) += GetAvgSqThetaLL(NId, NId, AId, true, false);
1231  ProdVV(NId, 3) += GetAvgSqThetaLL(NId, NId, AId, false, true);
1232  for(int p = 0; p < NAttrs; p++) {
1233  if(p > AId) {
1234  int index = 4 * p;
1235  AvgPhiPairVV(AId, index) += PhiVV(NId, AId) * PhiVV(NId, p);
1236  AvgPhiPairVV(AId, index+1) += PhiVV(NId, AId) * (1.0-PhiVV(NId, p));
1237  AvgPhiPairVV(AId, index+2) += (1.0-PhiVV(NId, AId)) * PhiVV(NId, p);
1238  AvgPhiPairVV(AId, index+3) += (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p));
1239  } else if (p < AId) {
1240  int index = 4 * AId;
1241  AvgPhiPairVV(p, index) += PhiVV(NId, p) * PhiVV(NId, AId);
1242  AvgPhiPairVV(p, index+1) += PhiVV(NId, p) * (1.0-PhiVV(NId, AId));
1243  AvgPhiPairVV(p, index+2) += (1.0-PhiVV(NId, p)) * PhiVV(NId, AId);
1244  AvgPhiPairVV(p, index+3) += (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId));
1245  }
1246  }
1247  AvgPhiV[AId] += PhiVV(NId, AId);
1248  }
1249  }
1250 
1251  for(int l = 0; l < NAttrs; l++) {
1252  MuV[l] = AvgPhiV[l] / double(NNodes);
1253  }
1254 
1255  TFltV SortMuV = MuV;
1256  double Avg = 0.0;
1257 // SortMuV.Sort(false);
1258  for(int l = 0; l < NAttrs; l++) {
1259  printf(" F[%d] = %.3f", l, double(MuV[l]));
1260  Avg += SortMuV[l];
1261 // L1 += fabs(TrueMuV[l] - SortMuV[l]);
1262  }
1263  printf("\n");
1264  printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs));
1265  printf(" Avg = %.3f\n", Avg / double(NAttrs));
1266 // printf(" Linf = %f\n", MaxDelta);
1267 // L1 /= double(NAttrs);
1268 
1269  return L1;
1270 }
1271 
1272 double TMAGFitBern::DoEStep(const TFltV& TrueMuV, const int& NIter, double& LL, const double& Lambda) {
1273  const int NNodes = Param.GetNodes();
1274  const int NAttrs = Param.GetAttrs();
1275  TFltVV NewPhiVV(NNodes, NAttrs);
1276  // double MI;
1277 
1278  TFltV Delta(NIter);
1279 
1280  for(int i = 0; i < NIter; i++) {
1281  TExeTm IterTm;
1282  printf("EStep iteration : %d\n", (i+1));
1283  if(ESpeedUp) {
1284  Delta[i] = DoEStepApxOneIter(TrueMuV, NewPhiVV, Lambda);
1285  } else {
1286  Delta[i] = DoEStepOneIter(TrueMuV, NewPhiVV, Lambda);
1287  }
1288 // PhiVV = NewPhiVV;
1289 
1290  printf(" (Time = %s)\n", IterTm.GetTmStr());
1291  }
1292  printf("\n");
1293 
1294  NewPhiVV.Clr();
1295 
1296  return Delta.Last();
1297 }
1298 
1299 const double TMAGFitBern::UpdateMu(const int& AId) {
1300  const int NNodes = Param.GetNodes();
1301  TMAGNodeBern DistParam = Param.GetNodeAttr();
1302  const double OldMu = DistParam.GetMu(AId);
1303  double NewMu = 0.0;
1304 
1305  for(int i = 0; i < NNodes; i++) {
1306  NewMu += PhiVV.At(i, AId);
1307  }
1308  AvgPhiV[AId] = NewMu;
1309  NewMu /= double(NNodes);
1310 
1311  printf(" [Posterior Mu] = %.4f\n", NewMu);
1312 
1313  double Delta = fabs(NewMu - OldMu);
1314  DistParam.SetMu(AId, NewMu);
1315  Param.SetNodeAttr(DistParam);
1316 
1317  return Delta;
1318 }
1319 
1320 const void TMAGFitBern::GradAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) {
1321  const int NNodes = Param.GetNodes();
1322  // const int NAttrs = Param.GetAttrs();
1323  GradV.PutAll(0.0);
1324 
1325  for(int i = 0; i < NNodes; i++) {
1326  for(int j = 0; j < NNodes; j++) {
1327  double Prod = ProdVV(i, j) - GetThetaLL(i, j, AId);
1328  double Sq = SqVV(i, j) - GetSqThetaLL(i, j, AId);
1329 
1330  for(int p = 0; p < 4; p++) {
1331  int Ai = p / 2;
1332  int Aj = p % 2;
1333  double Prob = GetProbPhi(i, j, AId, Ai, Aj);
1334  if(Graph->IsEdge(i, j)) {
1335  GradV[p] += Prob / CurMtx.At(p);
1336  } else {
1337  GradV[p] -= Prob * exp(Prod);
1338  GradV[p] -= Prob * exp(Sq) * CurMtx.At(p);
1339  }
1340  }
1341  }
1342  }
1343 }
1344 
1345 
1346 const void TMAGFitBern::GradApxAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) {
1347  const int NNodes = Param.GetNodes();
1348  // const int NAttrs = Param.GetAttrs();
1349  // const int NSq = NNodes * (NNodes - 1);
1350  GradV.PutAll(0.0);
1351 
1352  TFltV LogSumV;
1353  for(int p = 0; p < 4; p++) {
1354  int Ai = p / 2;
1355  int Aj = p % 2;
1356  LogSumV.Gen(NNodes * 4, 0);
1357 
1358  for(int i = 0; i < NNodes; i++) {
1359  const double LProd = ProdVV(i, 0) - GetAvgThetaLL(i, i, AId, true, false);
1360  const double LSq = SqVV(i, 0) - GetAvgSqThetaLL(i, i, AId, true, false);
1361  const double RProd = ProdVV(i, 1) - GetAvgThetaLL(i, i, AId, false, true);
1362  const double RSq = SqVV(i, 1) - GetAvgSqThetaLL(i, i, AId, false, true);
1363 
1364  LogSumV.Add(LProd + log(GetProbMu(i, i, AId, Ai, Aj, true, false)));
1365  LogSumV.Add(LSq + log(GetProbMu(i, i, AId, Ai, Aj, true, false)) + log(CurMtx.At(p)));
1366  LogSumV.Add(RProd + log(GetProbMu(i, i, AId, Ai, Aj, false, true)));
1367  LogSumV.Add(RSq + log(GetProbMu(i, i, AId, Ai, Aj, false, true)) + log(CurMtx.At(p)));
1368  }
1369  double LogSum = LogSumExp(LogSumV);
1370  GradV[p] -= (NNodes - 1) * 0.5 * exp(LogSum);
1371  }
1372 
1373  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
1374  const int NId1 = EI.GetSrcNId();
1375  const int NId2 = EI.GetDstNId();
1376  const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId);
1377  const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId);
1378 
1379  for(int p = 0; p < 4; p++) {
1380  int Ai = p / 2;
1381  int Aj = p % 2;
1382  double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj);
1383  GradV[p] += Prob / CurMtx.At(p);
1384  GradV[p] += Prob * exp(ProdOne);
1385  GradV[p] += Prob * exp(SqOne) * CurMtx.At(p);
1386  }
1387  }
1388 
1389 #if 0
1390  const double Prod = ProdVV(0, 0) - GetAvgThetaLL(0, 0, AId, false, false);
1391  const double Sq = SqVV(0, 0) - GetAvgSqThetaLL(0, 0, AId, false, false);
1392  for(int p = 0; p < 4; p++) {
1393  int Ai = p / 2;
1394  int Aj = p % 2;
1395  GradV[p] -= NSq * exp(Prod) * GetProbMu(0, 0, AId, Ai, Aj, false, false);
1396  GradV[p] -= NSq * exp(Sq) * GetProbMu(0, 0, AId, Ai, Aj, false, false) * CurMtx.At(p);
1397  }
1398 
1399  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
1400  const int NId1 = EI.GetSrcNId();
1401  const int NId2 = EI.GetDstNId();
1402  const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId);
1403  const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId);
1404 
1405  for(int p = 0; p < 4; p++) {
1406  int Ai = p / 2;
1407  int Aj = p % 2;
1408  double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj);
1409 // GradV[p] += Prob / CurMtx.At(p);
1410 // GradV[p] += Prob * exp(ProdOne);
1411 // GradV[p] += Prob * exp(SqOne) * CurMtx.At(p);
1412  }
1413  }
1414 #endif
1415 }
1416 
1417 
1418 const double TMAGFitBern::UpdateAffMtx(const int& AId, const double& LrnRate, const double& MaxGrad, const double& Lambda, TFltVV& ProdVV, TFltVV& SqVV, TMAGAffMtx& NewMtx) {
1419  double Delta = 0.0;
1420  // const int NNodes = Param.GetNodes();
1421  // const int NAttrs = Param.GetAttrs();
1422  TMAGAffMtx AffMtx = Param.GetMtx(AId);
1423 
1424  TFltV GradV(4);
1425  TFltV HessV(4);
1426  if(MSpeedUp) {
1427  GradApxAffMtx(AId, ProdVV, SqVV, NewMtx, GradV);
1428  } else {
1429  GradAffMtx(AId, ProdVV, SqVV, NewMtx, GradV);
1430  }
1431 
1432  double Ratio = 1.0;
1433  for(int p = 0; p < 4; p++) {
1434  if(fabs(Ratio * LrnRate * GradV[p]) > MaxGrad) {
1435  Ratio = MaxGrad / fabs(LrnRate * GradV[p]);
1436  }
1437  }
1438 
1439  for(int p = 0; p < 4; p++) {
1440  GradV[p] *= (Ratio * LrnRate);
1441  NewMtx.At(p) = AffMtx.At(p) + GradV[p];
1442 // if(NewMtx.At(p) > 0.9999) { NewMtx.At(p) = 0.9999; }
1443  if(NewMtx.At(p) < 0.0001) { NewMtx.At(p) = 0.0001; }
1444  }
1445 
1446  printf(" [Attr = %d]\n", AId);
1447  printf(" %s + [%f, %f; %f %f] -----> %s\n", (AffMtx.GetMtxStr()).GetCStr(), double(GradV[0]), double(GradV[1]), double(GradV[2]), double(GradV[3]), (NewMtx.GetMtxStr()).GetCStr());
1448 
1449 // Param.SetMtx(AId, NewMtx);
1450  return Delta;
1451 }
1452 
1453 
1454 void TMAGFitBern::NormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) {
1455  const int NNodes = Param.GetNodes();
1456  const int NAttrs = MtxV.Len();
1457  TFltV MuV = GetMuV();
1458  double Product = 1.0, ExpEdge = NNodes * (NNodes - 1);
1459 
1460  TFltV SumV(NAttrs), EdgeSumV(NAttrs);
1461  SumV.PutAll(0.0); EdgeSumV.PutAll(0.0);
1462  for(int l = 0; l < NAttrs; l++) {
1463  double Mu = (UseMu) ? double(MuV[l]) : (AvgPhiV[l] / double(NNodes));
1464  EdgeSumV[l] += Mu * Mu * MtxV[l].At(0, 0);
1465  EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(0, 1);
1466  EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(1, 0);
1467  EdgeSumV[l] += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1);
1468  SumV[l] = SumV[l] + MtxV[l].At(0, 0);
1469  SumV[l] = SumV[l] + MtxV[l].At(0, 1);
1470  SumV[l] = SumV[l] + MtxV[l].At(1, 0);
1471  SumV[l] = SumV[l] + MtxV[l].At(1, 1);
1472  Product *= SumV[l];
1473  ExpEdge *= EdgeSumV[l];
1474  }
1475  ExpEdge = Graph->GetEdges() / ExpEdge;
1476  NormConst *= Product;
1477 // NormConst = ExpEdge;
1478  Product = 1.0;
1479 // Product = pow(Product * ExpEdge, 1.0 / double(NAttrs));
1480 
1481  for(int l = 0; l < NAttrs; l++) {
1482  for(int p = 0; p < 4; p++) {
1483  MtxV[l].At(p) = MtxV[l].At(p) * Product / SumV[l];
1484 // MtxV[l].At(p) = MtxV[l].At(p) * Product / MtxV[l].At(0, 0);
1485 // MtxV[l].At(p) = MtxV[l].At(p) * Product;
1486 // if(MtxV[l].At(p) > 0.9999) { MtxV[l].At(p) = 0.9999; }
1487 // if(MtxV[l].At(p) < 0.0001) { MtxV[l].At(p) = 0.0001; }
1488  }
1489  }
1490 }
1491 
1492 void TMAGFitBern::UnNormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) {
1493  const int NNodes = Param.GetNodes();
1494  const int NAttrs = MtxV.Len();
1495  TFltIntPrV MaxEntV(NAttrs);
1496  TFltV MuV = GetMuV();
1497  NormalizeAffMtxV(MtxV, UseMu);
1498 
1499  double ExpEdge = NNodes * (NNodes - 1);
1500  for(int l = 0; l < NAttrs; l++) {
1501  double Mu = MuV[l];
1502  double EdgeSum = Mu * Mu * MtxV[l].At(0, 0);
1503  EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(0, 1);
1504  EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(1, 0);
1505  EdgeSum += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1);
1506  ExpEdge *= EdgeSum;
1507  }
1508  NormConst = double(Graph->GetEdges()) / ExpEdge;
1509 // NormConst *= ExpEdge;
1510 
1511  for(int l = 0; l < NAttrs; l++) {
1512  MaxEntV[l] = TFltIntPr(-1, l);
1513  for(int p = 0; p < 4; p++) {
1514  if(MaxEntV[l].Val1 < MtxV[l].At(p)) { MaxEntV[l].Val1 = MtxV[l].At(p); }
1515  }
1516  }
1517  MaxEntV.Sort(false);
1518 
1519  for(int l = 0; l < NAttrs; l++) {
1520  int CurId = MaxEntV[l].Val2;
1521  double Factor = pow(NormConst, 1.0 / double(NAttrs - l));
1522  double MaxFactor = 0.9999 / MaxEntV[l].Val1;
1523  Factor = (Factor > MaxFactor) ? MaxFactor : Factor;
1524  NormConst = NormConst / Factor;
1525 
1526  for(int p = 0; p < 4; p++) {
1527  MtxV[CurId].At(p) = MtxV[CurId].At(p) * Factor;
1528  }
1529  }
1530 }
1531 
1532 const void TMAGFitBern::PrepareUpdateAffMtx(TFltVV& ProdVV, TFltVV& SqVV) {
1533  const int NNodes = Param.GetNodes();
1534  ProdVV.Gen(NNodes, NNodes);
1535  SqVV.Gen(NNodes, NNodes);
1536 
1537  for(int i = 0; i < NNodes; i++) {
1538  for(int j = 0; j < NNodes; j++) {
1539  ProdVV(i, j) = GetProdLinWeight(i, j);
1540  SqVV(i, j) = GetProdSqWeight(i, j);
1541  }
1542  }
1543 }
1544 
1546  const int NNodes = Param.GetNodes();
1547  ProdVV.Gen(NNodes, 2);
1548  SqVV.Gen(NNodes, 2);
1549 
1550  for(int i = 0; i < NNodes; i++) {
1551  ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false);
1552  ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true);
1553  SqVV(i, 0) = GetAvgProdSqWeight(i, i, true, false);
1554  SqVV(i, 1) = GetAvgProdSqWeight(i, i, false, true);
1555  }
1556 }
1557 
1558 const double TMAGFitBern::UpdateAffMtxV(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) {
1559  const int NNodes = Param.GetNodes();
1560  const int NAttrs = Param.GetAttrs();
1561  const TMAGNodeBern DistParam = Param.GetNodeAttr();
1562  const TFltV MuV = DistParam.GetMuV();
1563  double Delta = 0.0;
1564  double DecLrnRate = LrnRate, DecMaxGrad = MaxGrad;
1565 
1566  TFltVV ProdVV(NNodes, NNodes), SqVV(NNodes, NNodes);
1567  TMAGAffMtxV NewMtxV, OldMtxV;
1568  Param.GetMtxV(OldMtxV);
1569  Param.GetMtxV(NewMtxV);
1570 
1571  for(int g = 0; g < GradIter; g++) {
1572  if(MSpeedUp) {
1573  PrepareUpdateApxAffMtx(ProdVV, SqVV);
1574  } else {
1575  PrepareUpdateAffMtx(ProdVV, SqVV);
1576  }
1577 
1578  printf(" [Grad step = %d]\n", (g+1));
1579 // for(int l = 0; l < NAttrs; l++) {
1580  for(int l = NReal; l < NAttrs; l++) {
1581  UpdateAffMtx(l, DecLrnRate, DecMaxGrad, Lambda, ProdVV, SqVV, NewMtxV[l]);
1582  Param.SetMtxV(NewMtxV);
1583  }
1584  DecLrnRate *= 0.97;
1585  DecMaxGrad *= 0.97;
1586 
1587  printf("\n");
1588  NormalizeAffMtxV(NewMtxV, true);
1589  Param.SetMtxV(NewMtxV);
1590  }
1591  NormalizeAffMtxV(NewMtxV, true);
1592 
1593  printf( "\nFinal\n");
1594  for(int l = 0; l < NAttrs; l++) {
1595  printf(" [");
1596  for(int p = 0; p < 4; p++) {
1597 // NewMtxV[l].At(p) = NewMtxV[l].At(p) * Product / SumV[l];
1598  Delta += fabs(OldMtxV[l].At(p) - NewMtxV[l].At(p));
1599  printf(" %.4f ", double(NewMtxV[l].At(p)));
1600  }
1601  printf("]\n");
1602  }
1603  Param.SetMtxV(NewMtxV);
1604  ProdVV.Clr(); SqVV.Clr();
1605  return Delta;
1606 }
1607 
1608 void TMAGFitBern::DoMStep(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) {
1609  // const int NNodes = Param.GetNodes();
1610  const int NAttrs = Param.GetAttrs();
1611  double MuDelta = 0.0, AffMtxDelta = 0.0;
1612 
1613  TExeTm ExeTm;
1614 
1615  printf("\n");
1616  AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0);
1617  for(int l = 0; l < NAttrs; l++) {
1618 // printf(" [Attr = %d]\n", l);
1619  MuDelta += UpdateMu(l);
1620  }
1621  printf("\n");
1622 
1623  printf(" == Update Theta\n");
1624  AffMtxDelta += UpdateAffMtxV(GradIter, LrnRate, MaxGrad, Lambda, NReal);
1625  printf("\n");
1626  printf("Elpased time = %s\n", ExeTm.GetTmStr());
1627  printf("\n");
1628 }
1629 
1630 void TMAGFitBern::DoEMAlg(const int& NStep, const int& NEstep, const int& NMstep, const double& LrnRate, const double& MaxGrad, const double& Lambda, const double& ReInit, const int& NReal) {
1631  const int NNodes = Param.GetNodes();
1632  const int NAttrs = Param.GetAttrs();
1633  TIntV IndexV;
1634  double LL;
1635  // double MuDist, MtxDist;
1636 
1637  MuHisV.Gen(NStep + 1, 0);
1638  MtxHisV.Gen(NStep + 1, 0);
1639  LLHisV.Gen(NStep + 1, 0);
1640 
1641  printf("--------------------------------------------\n");
1642  printf("Before EM Iteration\n");
1643  printf("--------------------------------------------\n");
1644 
1645  TMAGAffMtxV InitMtxV;
1646  TMAGNodeBern NodeAttr = Param.GetNodeAttr();
1647  Param.GetMtxV(InitMtxV);
1648  TFltV InitMuV = NodeAttr.GetMuV();
1649 
1650  for(int i = 0; i < NNodes; i++) {
1651  for(int l = 0; l < NAttrs; l++) {
1652  if(! KnownVV(i, l)) {
1653  PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev();
1654  }
1655  }
1656  }
1657 
1658  if(Debug) {
1659  double LL = ComputeApxLL();
1660  MuHisV.Add(InitMuV);
1661  MtxHisV.Add(InitMtxV);
1662  LLHisV.Add(LL);
1663  }
1664 
1665  NormalizeAffMtxV(InitMtxV, true);
1666  Param.SetMtxV(InitMtxV);
1667 
1668  for(int n = 0; n < NStep; n++) {
1669  printf("--------------------------------------------\n");
1670  printf("EM Iteration : %d\n", (n+1));
1671  printf("--------------------------------------------\n");
1672 
1673  NodeAttr = Param.GetNodeAttr();
1674  for(int i = 0; i < NNodes; i++) {
1675  for(int l = 0; l < NAttrs; l++) {
1676  if(!KnownVV(i, l) && TMAGNodeBern::Rnd.GetUniDev() < ReInit) {
1677  PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev();
1678  }
1679  }
1680  }
1681  DoEStep(InitMuV, NEstep, LL, Lambda);
1682  Param.GetMtxV(InitMtxV);
1683 // NormalizeAffMtxV(InitMtxV);
1684  Param.SetMtxV(InitMtxV);
1685  DoMStep(NMstep, LrnRate, MaxGrad, Lambda, NReal);
1686 
1687  printf("\n");
1688 
1689  if(Debug) {
1690  double LL = ComputeApxLL();
1691  MuHisV.Add(InitMuV);
1692  MtxHisV.Add(InitMtxV);
1693  LLHisV.Add(LL);
1694  printf(" ApxLL = %.2f (Const = %f)\n", LL, double(NormConst));
1695  }
1696 
1697  }
1698  Param.GetMtxV(InitMtxV);
1699  UnNormalizeAffMtxV(InitMtxV, true);
1700  Param.SetMtxV(InitMtxV);
1701 }
1702 
1703 void TMAGFitBern::MakeCCDF(const TFltPrV& RawV, TFltPrV& CcdfV) {
1704  double Total = 0.0;
1705  CcdfV.Gen(RawV.Len(), 0);
1706 
1707  for(int i = 0; i < RawV.Len(); i++) {
1708  if(RawV[i].Val2 <= 0) { continue; }
1709  Total += RawV[i].Val2;
1710  CcdfV.Add(RawV[i]);
1711  IAssert(RawV[i].Val2 > 0);
1712  }
1713  for(int i = 1; i < CcdfV.Len(); i++) {
1714  CcdfV[i].Val2 += CcdfV[i-1].Val2;
1715  }
1716 
1717  for(int i = CcdfV.Len() - 1; i > 0; i--) {
1718  CcdfV[i].Val2 = (Total - CcdfV[i-1].Val2) ;
1719  if(CcdfV[i].Val2 <= 0) { printf("CCDF = %f\n", double(CcdfV[i].Val2));}
1720  IAssert(CcdfV[i].Val2 > 0);
1721  }
1722  CcdfV[0].Val2 = Total;
1723 // CcdfV[0].Val2 = 1.0;
1724 }
1725 
1726 
1728  const int NNodes = Param.GetNodes();
1729  const int NAttrs = Param.GetAttrs();
1730  TMAGParam<TMAGNodeBern> MAGGen(NNodes, NAttrs);
1731  TMAGNodeBern MAGNode = Param.GetNodeAttr();
1732  MAGGen.SetNodeAttr(MAGNode);
1733  TMAGAffMtxV MtxV; Param.GetMtxV(MtxV);
1734  MAGGen.SetMtxV(MtxV);
1735 
1736  PNGraph TrG = new TNGraph;
1737  *TrG = *Graph;
1738 
1739  TIntVV AttrVV(NNodes, NAttrs);
1740  for(int i = 0; i < NNodes; i++) {
1741  for(int j = 0; j < NAttrs; j++) {
1742  if(PhiVV(i, j) > TMAGNodeBern::Rnd.GetUniDev()) AttrVV(i, j) = 0;
1743  else AttrVV(i, j) = 1;
1744  }
1745  }
1746  PNGraph MAG = MAGGen.GenMAG(AttrVV, true, 10000);
1747 // PNGraph MAG = MAGGen.GenAttrMAG(AttrVV, true, 10000);
1748  printf("%d edges created for MAG...\n", MAG->GetEdges());
1749 
1752 
1754 
1755  TGnuPlot InDegP(FNm + "-InDeg"), OutDegP(FNm + "-OutDeg"), SvalP(FNm + "-Sval"), SvecP(FNm + "-Svec"), WccP(FNm + "-Wcc"), HopP(FNm + "-Hop"), TriadP(FNm + "-Triad"), CcfP(FNm + "-Ccf");;
1756 
1757  InDegP.SetXYLabel("Degree", "# of nodes");
1758  OutDegP.SetXYLabel("Degree", "# of nodes");
1759  SvalP.SetXYLabel("Rank", "Singular value");
1760  SvecP.SetXYLabel("Rank", "Primary SngVec component");
1761  WccP.SetXYLabel("Size of component", "# of components");
1762  CcfP.SetXYLabel("Degree", "Clustering coefficient");
1763  HopP.SetXYLabel("Hops", "# of node pairs");
1764  TriadP.SetXYLabel("# of triads", "# of participating nodes");
1765 
1766  InDegP.SetScale(gpsLog10XY); InDegP.AddCmd("set key top right");
1767  OutDegP.SetScale(gpsLog10XY); OutDegP.AddCmd("set key top right");
1768  SvalP.SetScale(gpsLog10XY); SvalP.AddCmd("set key top right");
1769  SvecP.SetScale(gpsLog10XY); SvecP.AddCmd("set key top right");
1770  CcfP.SetScale(gpsLog10XY); CcfP.AddCmd("set key top right");
1771  HopP.SetScale(gpsLog10XY); HopP.AddCmd("set key top right");
1772  TriadP.SetScale(gpsLog10XY); TriadP.AddCmd("set key top right");
1773  InDegP.ShowGrid(false);
1774  OutDegP.ShowGrid(false);
1775  SvalP.ShowGrid(false);
1776  SvecP.ShowGrid(false);
1777  CcfP.ShowGrid(false);
1778  HopP.ShowGrid(false);
1779  TriadP.ShowGrid(false);
1780 
1781  const TStr Style[2] = {"lt 1 lw 3 lc rgb 'black'", "lt 2 lw 3 lc rgb 'red'"};
1782  const TStr Name[2] = {"Real", "MAG"};
1783  GS.Add(Graph, TSecTm(1), "Real Graph");
1784  GS.Add(MAG, TSecTm(2), "MAG");
1785 
1786  TFltPrV InDegV, OutDegV, SvalV, SvecV, HopV, WccV, CcfV, TriadV;
1787  for(int i = 0; i < GS.Len(); i++) {
1788  MakeCCDF(GS.At(i)->GetDistr(gsdInDeg), InDegV);
1789  MakeCCDF(GS.At(i)->GetDistr(gsdOutDeg), OutDegV);
1790  SvalV = GS.At(i)->GetDistr(gsdSngVal);
1791  SvecV = GS.At(i)->GetDistr(gsdSngVec);
1792  MakeCCDF(GS.At(i)->GetDistr(gsdClustCf), CcfV);
1793  HopV = GS.At(i)->GetDistr(gsdHops);
1794  MakeCCDF(GS.At(i)->GetDistr(gsdTriadPart), TriadV);
1795 
1796  InDegP.AddPlot(InDegV, gpwLines, Name[i], Style[i]);
1797  OutDegP.AddPlot(OutDegV, gpwLines, Name[i], Style[i]);
1798  SvalP.AddPlot(SvalV, gpwLines, Name[i], Style[i]);
1799  SvecP.AddPlot(SvecV, gpwLines, Name[i], Style[i]);
1800  CcfP.AddPlot(CcfV, gpwLines, Name[i], Style[i]);
1801  HopP.AddPlot(HopV, gpwLines, Name[i], Style[i]);
1802  TriadP.AddPlot(TriadV, gpwLines, Name[i], Style[i]);
1803  }
1804 
1805  InDegP.SaveEps(30);
1806  OutDegP.SaveEps(30);
1807  SvalP.SaveEps(30);
1808  SvecP.SaveEps(30);
1809  CcfP.SaveEps(30);
1810  HopP.SaveEps(30);
1811  TriadP.SaveEps(30);
1812 }
1813 
1814 void TMAGFitBern::CountAttr(TFltV& EstMuV) const {
1815  const int NNodes = PhiVV.GetXDim();
1816  const int NAttrs = PhiVV.GetYDim();
1817  EstMuV.Gen(NAttrs);
1818  EstMuV.PutAll(0.0);
1819 
1820  for(int l = 0; l < NAttrs; l++) {
1821  for(int i = 0; i < NNodes; i++) {
1822  EstMuV[l] = EstMuV[l] + PhiVV(i, l);
1823  }
1824  EstMuV[l] = EstMuV[l] / double(NNodes);
1825  }
1826 }
1827 
1828 void TMAGFitBern::SortAttrOrdering(const TFltV& TrueMuV, TIntV& IndexV) const {
1829  const int NAttrs = TrueMuV.Len();
1830  // const int NNodes = PhiVV.GetXDim();
1831  TFltV EstMuV, SortedTrueMuV, SortedEstMuV, TrueIdxV, EstIdxV;
1832  IndexV.Gen(NAttrs);
1833  TrueIdxV.Gen(NAttrs);
1834  EstIdxV.Gen(NAttrs);
1835 
1836  for(int l = 0; l < NAttrs; l++) {
1837  TrueIdxV[l] = l;
1838  EstIdxV[l] = l;
1839  }
1840 
1841  CountAttr(EstMuV);
1842  SortedTrueMuV = TrueMuV;
1843  SortedEstMuV = EstMuV;
1844  for(int i = 0; i < NAttrs; i++) {
1845  if(SortedTrueMuV[i] > 0.5) { SortedTrueMuV[i] = 1.0 - SortedTrueMuV[i]; }
1846  if(SortedEstMuV[i] > 0.5) { SortedEstMuV[i] = 1.0 - SortedEstMuV[i]; }
1847  }
1848 
1849  for(int i = 0; i < NAttrs; i++) {
1850  for(int j = i+1; j < NAttrs; j++) {
1851  if(SortedTrueMuV[i] < SortedTrueMuV[j]) {
1852  SortedTrueMuV.Swap(i, j);
1853  TrueIdxV.Swap(i, j);
1854  }
1855  if(SortedEstMuV[i] < SortedEstMuV[j]) {
1856  EstIdxV.Swap((int)SortedEstMuV[i], (int)SortedEstMuV[j]);
1857  SortedEstMuV.Swap(i, j);
1858  }
1859  }
1860  }
1861 
1862  for(int l = 0; l < NAttrs; l++) {
1863  IndexV[l] = (int)TrueIdxV[(int)EstIdxV[l]];
1864  }
1865 }
1866 
1867 const bool TMAGFitBern::NextPermutation(TIntV& IndexV) const {
1868  const int NAttrs = IndexV.Len();
1869  int Pos = NAttrs - 1;
1870  while(Pos > 0) {
1871  if(IndexV[Pos-1] < IndexV[Pos]) {
1872  break;
1873  }
1874  Pos--;
1875  }
1876  if(Pos == 0) {
1877  return false;
1878  }
1879 
1880  int Val = NAttrs, NewPos = -1;
1881  for(int i = Pos; i < NAttrs; i++) {
1882  if(IndexV[i] > IndexV[Pos - 1] && IndexV[i] < Val) {
1883  NewPos = i;
1884  Val = IndexV[i];
1885  }
1886  }
1887  IndexV[NewPos] = IndexV[Pos - 1];
1888  IndexV[Pos - 1] = Val;
1889 
1890  TIntV SubIndexV;
1891  IndexV.GetSubValV(Pos, NAttrs - 1, SubIndexV);
1892  SubIndexV.Sort(true);
1893  for(int i = Pos; i < NAttrs; i++) {
1894  IndexV[i] = SubIndexV[i - Pos];
1895  }
1896 
1897  return true;
1898 }
1899 
1900 const double TMAGFitBern::ComputeJointOneLL(const TIntVV& AttrVV) const {
1901  double LL = 0.0;
1902  const int NNodes = Param.GetNodes();
1903  const int NAttrs = Param.GetAttrs();
1904  TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV);
1905  const TMAGNodeBern NodeAttr = Param.GetNodeAttr();
1906  const TFltV MuV = NodeAttr.GetMuV();
1907 
1908  for(int l = 0; l < NAttrs; l++) {
1909  for(int i = 0; i < MtxV[l].Len(); i++) {
1910  MtxV[l].At(i) = log(MtxV[l].At(i));
1911  }
1912  }
1913 
1914  for(int i = 0; i < NNodes; i++) {
1915  for(int l = 0; l < NAttrs; l++) {
1916  if(AttrVV.At(i, l) == 0) {
1917  LL += log(MuV[l]);
1918  } else {
1919  LL += log(1.0 - MuV[l]);
1920  }
1921  }
1922 
1923  for(int j = 0; j < NNodes; j++) {
1924  if(i == j) { continue; }
1925 
1926  double ProbLL = 0.0;
1927  for(int l = 0; l < NAttrs; l++) {
1928  ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l));
1929  }
1930 
1931  if(Graph->IsEdge(i, j)) {
1932  LL += ProbLL;
1933  } else {
1934  LL += log(1-exp(ProbLL));
1935  }
1936  }
1937  }
1938 
1939  return LL;
1940 }
1941 
1942 const double TMAGFitBern::ComputeJointAdjLL(const TIntVV& AttrVV) const {
1943  double LL = 0.0;
1944  const int NNodes = Param.GetNodes();
1945  const int NAttrs = Param.GetAttrs();
1946  TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV);
1947  const TMAGNodeBern NodeAttr = Param.GetNodeAttr();
1948  const TFltV MuV = NodeAttr.GetMuV();
1949 
1950  for(int l = 0; l < NAttrs; l++) {
1951  for(int i = 0; i < MtxV[l].Len(); i++) {
1952  MtxV[l].At(i) = log(MtxV[l].At(i));
1953  }
1954  }
1955 
1956  for(int i = 0; i < NNodes; i++) {
1957  for(int j = 0; j < NNodes; j++) {
1958  if(i == j) { continue; }
1959 
1960  double ProbLL = 0.0;
1961  for(int l = 0; l < NAttrs; l++) {
1962  ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l));
1963  }
1964 
1965  if(Graph->IsEdge(i, j)) {
1966  LL += ProbLL;
1967  } else {
1968  LL += log(1-exp(ProbLL));
1969  }
1970  }
1971  }
1972 
1973  return LL;
1974 }
1975 
1976 const double TMAGFitBern::ComputeJointLL(int NSample) const {
1977  double LL = 0.0;
1978  const int NNodes = Param.GetNodes();
1979  const int NAttrs = Param.GetAttrs();
1980 
1981  TRnd Rnd(2000);
1982  TIntVV AttrVV(NNodes, NAttrs);
1983  int count = 0;
1984  for(int s = 0; s < NSample; s++) {
1985  for(int i = 0; i < NNodes; i++) {
1986  for(int l = 0; l < NAttrs; l++) {
1987 
1988  if(Rnd.GetUniDev() <= PhiVV(i, l)) {
1989  AttrVV.At(i, l) = 0;
1990  } else {
1991  AttrVV.At(i, l) = 1;
1992  }
1993 
1994  if(PhiVV(i, l) > 0.05 && PhiVV(i, l) < 0.95) count++;
1995  }
1996  }
1997 
1998  LL += ComputeJointOneLL(AttrVV);
1999  }
2000  AttrVV.Clr();
2001 
2002  return LL / double(NSample);
2003 }
2004 
2005 const double TMAGFitBern::ComputeApxLL() const {
2006  double LL = 0.0;
2007  // double LLSelf = 0.0;
2008  const int NNodes = Param.GetNodes();
2009  const int NAttrs = Param.GetAttrs();
2010  TMAGNodeBern NodeAttr = Param.GetNodeAttr();
2011  TFltV MuV = NodeAttr.GetMuV();
2012  TMAGAffMtxV LLMtxV(NAttrs);
2013 
2014  for(int l = 0; l < NAttrs; l++) {
2015  for(int i = 0; i < NNodes; i++) {
2016  LL += PhiVV(i, l) * log(MuV[l]);
2017  LL += (1.0 - PhiVV(i, l)) * log(1.0 - MuV[l]);
2018  LL -= PhiVV(i, l) * log(PhiVV(i, l));
2019  LL -= (1.0 - PhiVV(i, l)) * log(1.0 - PhiVV(i, l));
2020  }
2021  TMAGAffMtx Theta = Param.GetMtx(l);
2022  Theta.GetLLMtx(LLMtxV[l]);
2023  }
2024 
2025  for(int i = 0; i < NNodes; i++) {
2026  for(int j = 0; j < NNodes; j++) {
2027  if(i == j) { continue; }
2028 
2029  if(Graph->IsEdge(i, j)) {
2030  for(int l = 0; l < NAttrs; l++) {
2031  LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0);
2032  LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1);
2033  LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0);
2034  LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1);
2035  }
2036  LL += log(NormConst);
2037  } else {
2038  LL += log(1-exp(GetProdLinWeight(i, j)));
2039  }
2040  }
2041  }
2042 
2043  return LL;
2044 }
2045 
2046 const double TMAGFitBern::ComputeApxAdjLL() const {
2047  double LL = 0.0;
2048  // double LLSelf = 0.0;
2049  const int NNodes = Param.GetNodes();
2050  const int NAttrs = Param.GetAttrs();
2051  TMAGNodeBern NodeAttr = Param.GetNodeAttr();
2052  TFltV MuV = NodeAttr.GetMuV();
2053  MuV.PutAll(0.0);
2054  TMAGAffMtxV LLMtxV(NAttrs);
2055  double TotalEdge = 0.0;
2056  for(int l = 0; l < NAttrs; l++) {
2057  TMAGAffMtx Theta = Param.GetMtx(l);
2058  Theta.GetLLMtx(LLMtxV[l]);
2059  }
2060 
2061  for(int i = 0; i < NNodes; i++) {
2062  for(int j = 0; j < NNodes; j++) {
2063  if(i == j) { continue; }
2064 
2065  if(Graph->IsEdge(i, j)) {
2066  for(int l = 0; l < NAttrs; l++) {
2067  LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0);
2068  LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1);
2069  LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0);
2070  LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1);
2071  }
2072  } else {
2073  LL += log(1-exp(GetProdLinWeight(i, j)));
2074  }
2075 
2076  double TempLL = 1.0;
2077  for(int l = 0; l < NAttrs; l++) {
2078  int Ai = (double(PhiVV(i, l)) > 0.5) ? 0 : 1;
2079  int Aj = (double(PhiVV(j, l)) > 0.5) ? 0 : 1;
2080  TempLL *= Param.GetMtx(l).At(Ai, Aj);
2081  }
2082  if(TMAGNodeBern::Rnd.GetUniDev() < TempLL) {
2083  TotalEdge += 1.0;
2084  }
2085  }
2086  }
2087 
2088  return LL;
2089 }
2090 
2091 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV, const int AId1, const int AId2) {
2092  const int NNodes = AttrV.GetXDim();
2093  double MI = 0.0;
2094  double Cor = 0.0;
2095 
2096  TFltVV Pxy(2,2);
2097  TFltV Px(2), Py(2);
2098  Pxy.PutAll(0.0);
2099  Px.PutAll(0.0);
2100  Py.PutAll(0.0);
2101 
2102  for(int i = 0; i < NNodes; i++) {
2103  int X = AttrV(i, AId1);
2104  int Y = AttrV(i, AId2);
2105  Pxy(X, Y) = Pxy(X, Y) + 1;
2106  Px[X] = Px[X] + 1;
2107  Py[Y] = Py[Y] + 1;
2108  Cor += double(X * Y);
2109  }
2110 
2111  for(int x = 0; x < 2; x++) {
2112  for(int y = 0; y < 2; y++) {
2113  MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y).Val) - log(Px[x].Val) - log(Py[y].Val) + log((double)NNodes));
2114  }
2115  }
2116 
2117  return MI;
2118 }
2119 
2120 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV, const int AId1, const int AId2) {
2121  const int NNodes = AttrV.GetXDim();
2122  double MI = 0.0;
2123  double Cor = 0.0;
2124 
2125  TFltVV Pxy(2,2);
2126  TFltV Px(2), Py(2);
2127  Pxy.PutAll(0.0);
2128  Px.PutAll(0.0);
2129  Py.PutAll(0.0);
2130 
2131  for(int i = 0; i < NNodes; i++) {
2132  double X = AttrV(i, AId1);
2133  double Y = AttrV(i, AId2);
2134  Pxy(0, 0) = Pxy(0, 0) + X * Y;
2135  Pxy(0, 1) = Pxy(0, 1) + X * (1 - Y);
2136  Pxy(1, 0) = Pxy(1, 0) + (1 - X) * Y;
2137  Pxy(1, 1) = (i+1) - Pxy(0, 0) - Pxy(0, 1) - Pxy(1, 0);
2138  Px[0] = Px[0] + X;
2139  Py[0] = Py[0] + Y;
2140  Cor += double((1-X) * (1-Y));
2141  }
2142  Px[1] = NNodes - Px[0];
2143  Py[1] = NNodes - Py[0];
2144 
2145  for(int x = 0; x < 2; x++) {
2146  for(int y = 0; y < 2; y++) {
2147  MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y)) - log(Px[x]) - log(Py[y]) + log(double(NNodes)));
2148  }
2149  }
2150 
2151  return MI;
2152 }
2153 
2154 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV) {
2155  // const int NNodes = AttrV.GetXDim();
2156  const int NAttrs = AttrV.GetYDim();
2157  double MI = 0.0;
2158 
2159  for(int l = 0; l < NAttrs; l++) {
2160  for(int k = l+1; k < NAttrs; k++) {
2161  MI += ComputeMI(AttrV, l, k);
2162  }
2163  }
2164 
2165  return MI;
2166 }
2167 
2168 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV) {
2169  // const int NNodes = AttrV.GetXDim();
2170  const int NAttrs = AttrV.GetYDim();
2171  double MI = 0.0;
2172 
2173  for(int l = 0; l < NAttrs; l++) {
2174  for(int k = l+1; k < NAttrs; k++) {
2175  MI += ComputeMI(AttrV, l, k);
2176  }
2177  }
2178 
2179  return MI;
2180 }
const double ComputeJointAdjLL(const TIntVV &AttrVV) const
Definition: mag.cpp:1942
#define IAssert(Cond)
Definition: bd.h:262
char * GetCStr() const
Definition: dt.h:686
void SetNodeAttr(const TNodeAttr &Dist)
Definition: mag.h:176
TMAGNodeBern & operator=(const TMAGNodeBern &Dist)
Definition: mag.cpp:244
const double GetAvgSqThetaLL(const int &NId1, const int &NId2, const int &AId, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:620
void DoEMAlg(const int &NStep, const int &NEstep, const int &NMstep, const double &LrnRate, const double &MaxGrad, const double &Lambda, const double &ReInit, const int &NReal=0)
Definition: mag.cpp:1630
TFlt Mu
Definition: mag.h:71
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:326
PGStat Add()
Definition: gstat.cpp:449
#define IAssertR(Cond, Reason)
Definition: bd.h:265
const bool NextPermutation(TIntV &IndexV) const
Definition: mag.cpp:1867
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
bool Debug
Definition: mag.h:347
TVec< TFltV > MuHisV
Definition: mag.h:352
Definition: tm.h:354
TFltV LLHisV
Definition: mag.h:354
const int GetAttrs() const
Definition: mag.h:174
TMAGParam< TMAGNodeBern > Param
Definition: mag.h:346
Definition: dt.h:11
const double GetAvgInCoeff(const int &i, const int &AId, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:558
TMAGAffMtx & operator=(const TMAGAffMtx &Kronecker)
Definition: mag.cpp:19
const double GetEstNoEdgeLL(const int &NId, const int &AId) const
Definition: mag.cpp:746
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:467
void Swap(TMAGAffMtx &Mtx)
Definition: mag.cpp:90
TFltV BetaV
Definition: mag.h:120
TInt Dim
Definition: mag.h:122
TFltV SeedMtx
Definition: mag.h:14
void Dump(const TStr &MtxNm=TStr(), const bool &Sort=false) const
Definition: mag.cpp:128
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
Definition: graph.h:496
TFltV MuV
Definition: mag.h:121
static double GetAvgAbsErr(const TMAGAffMtx &Mtx1, const TMAGAffMtx &Mtx2)
Definition: mag.cpp:147
void NormalizeAffMtxV(TMAGAffMtxV &MtxV, const bool UseMu=false)
Definition: mag.cpp:1454
const double GetOutCoeff(const int &i, const int &j, const int &l, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:554
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:278
TInt Dim
Definition: mag.h:93
Definition: bits.h:119
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:535
void MakeCCDF(const TFltPrV &RawV, TFltPrV &CcdfV)
Definition: mag.cpp:1703
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
Definition: graph.h:494
static const double NInf
Definition: mag.h:11
void CountAttr(TFltV &EstMuV) const
Definition: mag.cpp:1814
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:424
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void SaveTxt(TStrV &OutStrV) const
Definition: mag.cpp:232
double GetRowSum(const int &RowId) const
Definition: mag.cpp:102
int GetXDim() const
Definition: ds.h:2136
TMAGNodeBeta & operator=(const TMAGNodeBeta &Dist)
Definition: mag.cpp:299
const double UpdatePhiMI(const double &Lambda, const int &NId, const int &AId, double &Phi)
Definition: mag.cpp:826
const double GetSqThetaLL(const int &NId1, const int &NId2, const int &AId) const
Definition: mag.cpp:609
double Normalize()
Definition: mag.cpp:116
const double GetProdSqWeight(const int &NId1, const int &NId2) const
Definition: mag.cpp:653
TVec< TMAGAffMtxV > MtxHisV
Definition: mag.h:353
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
void LoadTxt(const TStr &InFNm)
Definition: mag.cpp:343
const void GradApxAffMtx(const int &AId, const TFltVV &ProdVV, const TFltVV &SqVV, const TMAGAffMtx &CurMtx, TFltV &GradV)
Definition: mag.cpp:1346
int Len() const
Definition: gstat.h:183
static TRnd Rnd
Definition: mag.h:117
Definition: dt.h:1291
TBool Dirty
Definition: mag.h:123
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:530
double GetGammaDev(const int &Order)
Definition: dt.cpp:95
double GetFlt() const
Definition: dt.h:629
void UnNormalizeAffMtxV(TMAGAffMtxV &MtxV, const bool UseMu=false)
Definition: mag.cpp:1492
const double LogSumExp(const double LogVal1, const double LogVal2)
Definition: mag.cpp:675
void AttrGen(TIntVV &AttrVV, const int &NNodes)
Definition: mag.cpp:201
void SetPhiVV(const TIntVV &AttrVV, const int KnownIds=0)
Definition: mag.cpp:399
Graph Statistics Sequence.
Definition: gstat.h:155
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1011
void RandomInit(const TFltV &MuV, const TMAGAffMtxV &AffMtxV, const int &Seed)
Definition: mag.cpp:510
const char * GetTmStr() const
Definition: tm.h:369
static double GetAvgFroErr(const TMAGAffMtx &Mtx1, const TMAGAffMtx &Mtx2)
Definition: mag.cpp:160
void Clr()
Definition: ds.h:2132
void GetMtxV(TMAGAffMtxV &MtxV) const
Definition: mag.h:188
void LoadTxt(const TStr &InFNm)
Definition: mag.cpp:215
void DelZeroDegNodes(PGraph &Graph)
Removes all the zero-degree nodes, that isolated nodes, from the graph.
Definition: alg.h:432
void SetGraph(const PNGraph &GraphPt)
Definition: mag.cpp:385
bool IsProbMtx() const
Definition: mag.cpp:27
const double GetAvgProdSqWeight(const int &NId1, const int &NId2, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:664
const double GetInCoeff(const int &i, const int &j, const int &l, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:550
const void GradAffMtx(const int &AId, const TFltVV &ProdVV, const TFltVV &SqVV, const TMAGAffMtx &CurMtx, TFltV &GradV)
Definition: mag.cpp:1320
bool ESpeedUp
Definition: mag.h:347
void PlotProperties(const TStr &FNm)
Definition: mag.cpp:1727
void AddCmd(const TStr &Cmd)
Definition: gnuplot.h:81
const TFltV & GetMuV() const
Definition: mag.h:379
int Len() const
Definition: mag.h:27
void GetProbMtx(TMAGAffMtx &ProbMtx)
Definition: mag.cpp:82
const double GetAvgThetaLL(const int &NId1, const int &NId2, const int &AId, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:598
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1218
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:372
const double UpdateAffMtx(const int &AId, const double &LrnRate, const double &MaxGrad, const double &Lambda, TFltVV &ProdVV, TFltVV &SqVV, TMAGAffMtx &NewMtx)
Definition: mag.cpp:1418
Definition: ds.h:2109
TInt MtxDim
Definition: mag.h:13
TBoolVV KnownVV
Definition: mag.h:344
const void PrepareUpdateApxAffMtx(TFltVV &ProdVV, TFltVV &SqVV)
Definition: mag.cpp:1545
const double GetProdLinWeight(const int &NId1, const int &NId2) const
Definition: mag.cpp:631
const double UpdateMu(const int &AId)
Definition: mag.cpp:1299
TFltV MuV
Definition: mag.h:92
const double ComputeJointOneLL(const TIntVV &AttrVV) const
Definition: mag.cpp:1900
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1130
void SetBetaV(const TFltV &_AlphaV, const TFltV &_BetaV)
Definition: mag.cpp:315
const TFltV & GetMuV() const
Definition: mag.h:103
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:313
const double GetThetaLL(const int &NId1, const int &NId2, const int &AId) const
Definition: mag.cpp:587
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
const double UpdatePhi(const int &NId, const int &AId, double &Phi)
Definition: mag.cpp:756
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:461
double GetMu(const int &Attr) const
Definition: mag.h:105
double GetColSum(const int &ColId) const
Definition: mag.cpp:109
double DoEStepOneIter(const TFltV &TrueMuV, TFltVV &NewPhi, const double &Lambda)
Definition: mag.cpp:1056
TMAGAffMtx()
Definition: mag.h:16
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:539
void AttrGen(TIntVV &AttrVV, const int &NNodes)
Definition: mag.cpp:250
void SetBeta(const int &Attr, const double &Alpha, const double &Beta)
Definition: mag.cpp:308
TFlt NormConst
Definition: mag.h:350
const double GetProbPhi(const int &NId1, const int &NId2, const int &AId, const int &Attr1, const int &Attr2) const
Definition: mag.cpp:570
double DoEStepApxOneIter(const TFltV &TrueMuV, TFltVV &NewPhi, const double &Lambda)
Definition: mag.cpp:1134
Definition: tm.h:14
void Gen(const int &_XDim, const int &_YDim)
Definition: ds.h:2133
Definition: mag.h:10
void SaveEps(const int &FontSz=30, const TStr &Comment=TStr())
Definition: gnuplot.h:123
void GetRow(const int &RowN, TVec< TVal > &Vec) const
Definition: ds.h:2268
void AttrGen(TIntVV &AttrVV, const int &NNodes)
Definition: mag.cpp:323
Definition: gstat.h:28
void SetMtxV(const TMAGAffMtxV &MtxV)
Definition: mag.h:184
static TMAGAffMtx GetRndMtx(TRnd &Rnd, const int &Dim=2, const double &MinProb=0.0)
Definition: mag.cpp:191
TFltV AlphaV
Definition: mag.h:119
const double ComputeJointLL(int NSample) const
Definition: mag.cpp:1976
TFltVV PhiVV
Definition: mag.h:343
const TMAGAffMtx & GetMtx(const int &Attr) const
Definition: mag.h:187
const double GetAvgProdLinWeight(const int &NId1, const int &NId2, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:642
void SortAttrOrdering(const TFltV &TrueMuV, TIntV &IndexV) const
Definition: mag.cpp:1828
Definition: dt.h:201
TStr GetMtxStr() const
Definition: mag.cpp:63
Directed graph.
Definition: graph.h:296
TFltV AvgPhiV
Definition: mag.h:348
PNGraph GenMAG(TIntVV &AttrVV, const bool &IsDir=false, const int &Seed=1)
Definition: mag.h:295
double GetNrmDev()
Definition: dt.cpp:63
Definition: tm.h:81
void PutSeed(const int &_Seed)
Definition: dt.cpp:18
PGStat At(const int &ValN) const
Definition: gstat.h:186
const double ComputeApxAdjLL() const
Definition: mag.cpp:2046
void SaveTxt(TStrV &OutStrV) const
Definition: mag.cpp:287
void SetScale(const TGpScaleTy &GpScaleTy)
Definition: gnuplot.h:78
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:350
void GetLLMtx(TMAGAffMtx &LLMtx)
Definition: mag.cpp:74
static TRnd Rnd
Definition: mag.h:69
Definition: dt.h:412
const void PrepareUpdateAffMtx(TFltVV &ProdVV, TFltVV &SqVV)
Definition: mag.cpp:1532
bool Empty() const
Definition: dt.h:488
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
double DoEStep(const TFltV &TrueMuV, const int &NIter, double &LL, const double &Lambda)
Definition: mag.cpp:1272
const double GetAvgOutCoeff(const int &i, const int &AId, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:564
const TNodeAttr & GetNodeAttr() const
Definition: mag.h:177
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
TInt Dim
Definition: mag.h:72
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:327
static TRnd Rnd
Definition: mag.h:90
double GetUniDev()
Definition: dt.h:30
void SetMu(const int &Attr, const double &Prob)
Definition: mag.h:104
void SetEpsMtx(const double &Eps1, const double &Eps0, const int &Eps1Val=1, const int &Eps0Val=0)
Definition: mag.cpp:44
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:182
double GetMtxSum() const
Definition: mag.cpp:95
void SetRndMtx(TRnd &Rnd, const int &PrmMtxDim=2, const double &MinProb=0.0)
Definition: mag.cpp:34
int GetYDim() const
Definition: ds.h:2137
void SaveTxt(const TStr &FNm)
Definition: mag.cpp:423
const int GetNodes() const
Definition: mag.h:171
const double UpdateApxPhiMI(const double &Lambda, const int &NId, const int &AId, double &Phi, TFltVV &ProdVV)
Definition: mag.cpp:930
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:486
void DoMStep(const int &GradIter, const double &LrnRate, const double &MaxGrad, const double &Lambda, const int &NReal=0)
Definition: mag.cpp:1608
const double & At(const int &Row, const int &Col) const
Definition: mag.h:41
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void ShowGrid(const bool &Show)
Definition: gnuplot.h:76
TFltV & GetMtx()
Definition: mag.h:31
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:348
void SaveTxt(TStrV &OutStrV) const
Definition: mag.cpp:373
char * CStr()
Definition: dt.h:476
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:354
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:559
static const double ComputeMI(const TIntVV &AttrV, const int AId1, const int AId2)
Definition: mag.cpp:2091
Definition: gstat.h:28
void AddRndNoise(TRnd &Rnd, const double &SDev)
Definition: mag.cpp:52
PNGraph Graph
Definition: mag.h:345
const double UpdateAffMtxV(const int &GradIter, const double &LrnRate, const double &MaxGrad, const double &Lambda, const int &NReal=0)
Definition: mag.cpp:1558
bool MSpeedUp
Definition: mag.h:347
void Init(const TFltV &MuV, const TMAGAffMtxV &AffMtxV)
Definition: mag.cpp:452
Definition: gstat.h:29
const TVal & At(const int &X, const int &Y) const
Definition: ds.h:2142
int GetDim() const
Definition: mag.h:26
void PutY(const int &Y, const TVal &Val)
Definition: ds.h:2157
void PutAll(const TVal &Val)
Definition: ds.h:2154
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:358
TFltVV AvgPhiPairVV
Definition: mag.h:349
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:176
const double ComputeApxLL() const
Definition: mag.cpp:2005
const double ObjPhiMI(const double &x, const int &NId, const int &AId, const double &Lambda, const double &Q0, const double &Q1, const TFltVV &CntVV)
Definition: mag.cpp:722
void LoadTxt(const TStr &InFNm)
Definition: mag.cpp:264
void GenMtx(const int &Dim)
Definition: mag.h:36
const double GetProbMu(const int &NId1, const int &NId2, const int &AId, const int &Attr1, const int &Attr2, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:576
const double GradPhiMI(const double &x, const int &NId, const int &AId, const double &Lambda, const double &DeltaQ, const TFltVV &CntVV)
Definition: mag.cpp:703
void GetSubValV(const TSizeTy &BValN, const TSizeTy &EValN, TVec< TVal, TSizeTy > &ValV) const
Returns a vector on elements at positions BValN...EValN.
Definition: ds.h:1076