SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "mag.h" 00003 00004 TRnd TMAGNodeSimple::Rnd = TRnd(0); 00005 TRnd TMAGNodeBern::Rnd = TRnd(0); 00006 TRnd TMAGNodeBeta::Rnd = TRnd(0); 00007 00008 00010 // MAG affinity matrix 00011 00012 const double TMAGAffMtx::NInf = -DBL_MAX; 00013 00014 TMAGAffMtx::TMAGAffMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) { 00015 MtxDim = (int) sqrt((double)SeedMatrix.Len()); 00016 IAssert(MtxDim*MtxDim == SeedMtx.Len()); 00017 } 00018 00019 TMAGAffMtx& TMAGAffMtx::operator = (const TMAGAffMtx& Kronecker) { 00020 if (this != &Kronecker){ 00021 MtxDim=Kronecker.MtxDim; 00022 SeedMtx=Kronecker.SeedMtx; 00023 } 00024 return *this; 00025 } 00026 00027 bool TMAGAffMtx::IsProbMtx() const { 00028 for (int i = 0; i < Len(); i++) { 00029 if (At(i) < 0.0 || At(i) > 1.0) return false; 00030 } 00031 return true; 00032 } 00033 00034 void TMAGAffMtx::SetRndMtx(TRnd& Rnd, const int& PrmMtxDim, const double& MinProb) { 00035 MtxDim = PrmMtxDim; 00036 SeedMtx.Gen(MtxDim*MtxDim); 00037 for (int p = 0; p < SeedMtx.Len(); p++) { 00038 do { 00039 SeedMtx[p] = Rnd.GetUniDev(); 00040 } while (SeedMtx[p] < MinProb); 00041 } 00042 } 00043 00044 void TMAGAffMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) { 00045 for (int i = 0; i < Len(); i++) { 00046 double& Val = At(i); 00047 if (Val == Eps1Val) Val = double(Eps1); 00048 else if (Val == Eps0Val) Val = double(Eps0); 00049 } 00050 } 00051 00052 void TMAGAffMtx::AddRndNoise(TRnd& Rnd, const double& SDev) { 00053 Dump("before"); 00054 double NewVal; 00055 int c =0; 00056 for (int i = 0; i < Len(); i++) { 00057 for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { } 00058 if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); } 00059 } 00060 Dump("after"); 00061 } 00062 00063 TStr TMAGAffMtx::GetMtxStr() const { 00064 TChA ChA("["); 00065 for (int i = 0; i < Len(); i++) { 00066 ChA += TStr::Fmt("%g", At(i)); 00067 if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; } 00068 else if (i+1<Len()) { ChA += " "; } 00069 } 00070 ChA += "]"; 00071 return TStr(ChA); 00072 } 00073 00074 void TMAGAffMtx::GetLLMtx(TMAGAffMtx& LLMtx) { 00075 LLMtx.GenMtx(MtxDim); 00076 for (int i = 0; i < Len(); i++) { 00077 if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); } 00078 else { LLMtx.At(i) = NInf; } 00079 } 00080 } 00081 00082 void TMAGAffMtx::GetProbMtx(TMAGAffMtx& ProbMtx) { 00083 ProbMtx.GenMtx(MtxDim); 00084 for (int i = 0; i < Len(); i++) { 00085 if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); } 00086 else { ProbMtx.At(i) = 0.0; } 00087 } 00088 } 00089 00090 void TMAGAffMtx::Swap(TMAGAffMtx& Mtx) { 00091 ::Swap(MtxDim, Mtx.MtxDim); 00092 SeedMtx.Swap(Mtx.SeedMtx); 00093 } 00094 00095 double TMAGAffMtx::GetMtxSum() const { 00096 double Sum = 0; 00097 for (int i = 0; i < Len(); i++) { 00098 Sum += At(i); } 00099 return Sum; 00100 } 00101 00102 double TMAGAffMtx::GetRowSum(const int& RowId) const { 00103 double Sum = 0; 00104 for (int c = 0; c < GetDim(); c++) { 00105 Sum += At(RowId, c); } 00106 return Sum; 00107 } 00108 00109 double TMAGAffMtx::GetColSum(const int& ColId) const { 00110 double Sum = 0; 00111 for (int r = 0; r < GetDim(); r++) { 00112 Sum += At(r, ColId); } 00113 return Sum; 00114 } 00115 00116 double TMAGAffMtx::Normalize() { 00117 double Sum = GetMtxSum(); 00118 if(Sum == 0) { 00119 return 0; 00120 } 00121 00122 for(int i = 0; i < Len(); i++) { 00123 At(i) = At(i) / Sum; 00124 } 00125 return Sum; 00126 } 00127 00128 void TMAGAffMtx::Dump(const TStr& MtxNm, const bool& Sort) const { 00129 /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim()); 00130 for (int r = 0; r < GetDim(); r++) { 00131 for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); } 00132 printf("\n"); 00133 }*/ 00134 if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr()); 00135 double Sum=0.0; 00136 TFltV ValV = SeedMtx; 00137 if (Sort) { ValV.Sort(false); } 00138 for (int i = 0; i < ValV.Len(); i++) { 00139 printf(" %10.4g", ValV[i]()); 00140 Sum += ValV[i]; 00141 if ((i+1) % GetDim() == 0) { printf("\n"); } 00142 } 00143 printf(" (sum:%.4f)\n", Sum); 00144 } 00145 00146 // average difference in the parameters 00147 double TMAGAffMtx::GetAvgAbsErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) { 00148 TFltV P1 = Mtx1.GetMtx(); 00149 TFltV P2 = Mtx2.GetMtx(); 00150 IAssert(P1.Len() == P2.Len()); 00151 P1.Sort(); P2.Sort(); 00152 double delta = 0.0; 00153 for (int i = 0; i < P1.Len(); i++) { 00154 delta += fabs(P1[i] - P2[i]); 00155 } 00156 return delta/P1.Len(); 00157 } 00158 00159 // average L2 difference in the parameters 00160 double TMAGAffMtx::GetAvgFroErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) { 00161 TFltV P1 = Mtx1.GetMtx(); 00162 TFltV P2 = Mtx2.GetMtx(); 00163 IAssert(P1.Len() == P2.Len()); 00164 P1.Sort(); P2.Sort(); 00165 double delta = 0.0; 00166 for (int i = 0; i < P1.Len(); i++) { 00167 delta += pow(P1[i] - P2[i], 2); 00168 } 00169 return sqrt(delta/P1.Len()); 00170 } 00171 00172 // get matrix from matlab matrix notation 00173 TMAGAffMtx TMAGAffMtx::GetMtx(TStr MatlabMtxStr) { 00174 TStrV RowStrV, ColStrV; 00175 MatlabMtxStr.ChangeChAll(',', ' '); 00176 MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty()); 00177 RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty()); 00178 const int Rows = RowStrV.Len(); 00179 const int Cols = ColStrV.Len(); 00180 IAssert(Rows == Cols); 00181 TMAGAffMtx Mtx(Rows); 00182 for (int r = 0; r < Rows; r++) { 00183 RowStrV[r].SplitOnWs(ColStrV); 00184 IAssert(ColStrV.Len() == Cols); 00185 for (int c = 0; c < Cols; c++) { 00186 Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); } 00187 } 00188 return Mtx; 00189 } 00190 00191 TMAGAffMtx TMAGAffMtx::GetRndMtx(TRnd& Rnd, const int& Dim, const double& MinProb) { 00192 TMAGAffMtx Mtx; 00193 Mtx.SetRndMtx(Rnd, Dim, MinProb); 00194 return Mtx; 00195 } 00196 00197 00199 // Simple MAG node attributes (Same Bernoulli for every attribute) 00200 00201 void TMAGNodeSimple::AttrGen(TIntVV& AttrVV, const int& NNodes) { 00202 IAssert(Dim > 0); 00203 AttrVV.Gen(NNodes, Dim); 00204 AttrVV.PutAll(0); 00205 00206 for(int i = 0; i < NNodes; i++) { 00207 for(int l = 0; l < Dim; l++) { 00208 if((TMAGNodeSimple::Rnd).GetUniDev() > Mu) { 00209 AttrVV.At(i, l) = 1; 00210 } 00211 } 00212 } 00213 } 00214 00215 void TMAGNodeSimple::LoadTxt(const TStr& InFNm) { 00216 FILE *fp = fopen(InFNm.CStr(), "r"); 00217 IAssertR(fp != NULL, "File does not exist: " + InFNm); 00218 00219 char buf[128]; 00220 char *token; 00221 TStr TokenStr; 00222 TFlt Val; 00223 00224 token = strtok(buf, "&"); 00225 token = strtok(token, " \t"); 00226 TokenStr = TStr(token); 00227 Mu = TokenStr.GetFlt(Val); 00228 00229 fclose(fp); 00230 } 00231 00232 void TMAGNodeSimple::SaveTxt(TStrV& OutStrV) const { 00233 OutStrV.Gen(Dim, 0); 00234 00235 for(int i = 0; i < Dim; i++) { 00236 OutStrV.Add(TStr::Fmt("%f", double(Mu))); 00237 } 00238 } 00239 00240 00242 // MAG node attributes with (different) Bernoulli 00243 00244 TMAGNodeBern& TMAGNodeBern::operator=(const TMAGNodeBern& Dist) { 00245 MuV = Dist.MuV; 00246 Dim = Dist.Dim; 00247 return (*this); 00248 } 00249 00250 void TMAGNodeBern::AttrGen(TIntVV& AttrVV, const int& NNodes) { 00251 IAssert(Dim > 0); 00252 AttrVV.Gen(NNodes, Dim); 00253 AttrVV.PutAll(0); 00254 00255 for(int i = 0; i < NNodes; i++) { 00256 for(int l = 0; l < Dim; l++) { 00257 if((TMAGNodeBern::Rnd).GetUniDev() > MuV[l]) { 00258 AttrVV.At(i, l) = 1; 00259 } 00260 } 00261 } 00262 } 00263 00264 void TMAGNodeBern::LoadTxt(const TStr& InFNm) { 00265 FILE *fp = fopen(InFNm.CStr(), "r"); 00266 IAssertR(fp != NULL, "File does not exist: " + InFNm); 00267 00268 Dim = 0; 00269 MuV.Gen(10, 0); 00270 00271 char buf[128]; 00272 char *token; 00273 TStr TokenStr; 00274 TFlt Val; 00275 00276 while(fgets(buf, sizeof(buf), fp) != NULL) { 00277 token = strtok(buf, "&"); 00278 token = strtok(token, " \t"); 00279 TokenStr = TStr(token); 00280 MuV.Add(TokenStr.GetFlt(Val)); 00281 } 00282 Dim = MuV.Len(); 00283 00284 fclose(fp); 00285 } 00286 00287 void TMAGNodeBern::SaveTxt(TStrV& OutStrV) const { 00288 OutStrV.Gen(Dim, 0); 00289 00290 for(int i = 0; i < Dim; i++) { 00291 OutStrV.Add(TStr::Fmt("%f", double(MuV[i]))); 00292 } 00293 } 00294 00295 00297 // MAG node attributes with Beta + Bernoulli family 00298 00299 TMAGNodeBeta& TMAGNodeBeta::operator=(const TMAGNodeBeta& Dist) { 00300 AlphaV = Dist.AlphaV; 00301 BetaV = Dist.BetaV; 00302 Dim = Dist.Dim; 00303 MuV = Dist.MuV; 00304 Dirty = Dist.Dirty; 00305 return (*this); 00306 } 00307 00308 void TMAGNodeBeta::SetBeta(const int& Attr, const double& Alpha, const double& Beta) { 00309 IAssert(Attr < Dim); 00310 AlphaV[Attr] = Alpha; 00311 BetaV[Attr] = Beta; 00312 Dirty = true; 00313 } 00314 00315 void TMAGNodeBeta::SetBetaV(const TFltV& _AlphaV, const TFltV& _BetaV) { 00316 IAssert(_AlphaV.Len() == _BetaV.Len()); 00317 AlphaV = _AlphaV; 00318 BetaV = _BetaV; 00319 Dim = _AlphaV.Len(); 00320 Dirty = true; 00321 } 00322 00323 void TMAGNodeBeta::AttrGen(TIntVV& AttrVV, const int& NNodes) { 00324 IAssert(Dim > 0); 00325 AttrVV.Gen(NNodes, Dim); 00326 AttrVV.PutAll(0); 00327 00328 // printf("AlphaV = %d, BetaV = %d, MuV = %d\n", AlphaV.Len(), BetaV.Len(), MuV.Len()); 00329 00330 for(int i = 0; i < NNodes; i++) { 00331 for(int l = 0; l < Dim; l++) { 00332 double x = TMAGNodeBeta::Rnd.GetGammaDev((int)AlphaV[l]); 00333 double y = TMAGNodeBeta::Rnd.GetGammaDev((int)BetaV[l]); 00334 MuV[l] = x / (x + y); 00335 if((TMAGNodeBeta::Rnd).GetUniDev() > MuV[l]) { 00336 AttrVV.At(i, l) = 1; 00337 } 00338 } 00339 } 00340 Dirty = false; 00341 } 00342 00343 void TMAGNodeBeta::LoadTxt(const TStr& InFNm) { 00344 FILE *fp = fopen(InFNm.CStr(), "r"); 00345 IAssertR(fp != NULL, "File does not exist: " + InFNm); 00346 00347 Dim = 0; 00348 AlphaV.Gen(10, 0); 00349 BetaV.Gen(10, 0); 00350 00351 char buf[128]; 00352 char *token; 00353 TStr TokenStr; 00354 TFlt Val; 00355 00356 while(fgets(buf, sizeof(buf), fp) != NULL) { 00357 token = strtok(buf, "&"); 00358 00359 token = strtok(token, " \t"); 00360 TokenStr = TStr(token); 00361 AlphaV.Add(TokenStr.GetFlt(Val)); 00362 00363 token = strtok(NULL, " \t"); 00364 TokenStr = TStr(token); 00365 BetaV.Add(TokenStr.GetFlt(Val)); 00366 00367 Dim++; 00368 } 00369 00370 fclose(fp); 00371 } 00372 00373 void TMAGNodeBeta::SaveTxt(TStrV& OutStrV) const { 00374 OutStrV.Gen(Dim, 0); 00375 00376 for(int i = 0; i < Dim; i++) { 00377 OutStrV.Add(TStr::Fmt("%f %f", double(AlphaV[i]), double(BetaV[i]))); 00378 } 00379 } 00380 00381 00383 // MAGFit with Bernoulli node attributes 00384 00385 void TMAGFitBern::SetGraph(const PNGraph& GraphPt) { 00386 Graph = GraphPt; 00387 bool NodesOk = true; 00388 // check that nodes IDs are {0,1,..,Nodes-1} 00389 for (int nid = 0; nid < Graph->GetNodes(); nid++) { 00390 if (! Graph->IsNode(nid)) { NodesOk=false; break; } } 00391 if (! NodesOk) { 00392 TIntV NIdV; GraphPt->GetNIdV(NIdV); 00393 Graph = TSnap::GetSubGraph(GraphPt, NIdV, true); 00394 for (int nid = 0; nid < Graph->GetNodes(); nid++) { 00395 IAssert(Graph->IsNode(nid)); } 00396 } 00397 } 00398 00399 void TMAGFitBern::SetPhiVV(const TIntVV& AttrVV, const int KnownIds) { 00400 const int NNodes = Param.GetNodes(); 00401 const int NAttrs = Param.GetAttrs(); 00402 00403 PhiVV.Gen(NNodes, NAttrs); 00404 KnownVV.Gen(NNodes, NAttrs); 00405 00406 for(int l = 0; l < NAttrs; l++) { 00407 for(int i = 0; i < NNodes; i++) { 00408 if(int(AttrVV(i, l)) == 0) { 00409 PhiVV(i, l) = 0.9999; 00410 } else { 00411 PhiVV(i, l) = 0.0001; 00412 } 00413 } 00414 00415 if(l < KnownIds) { 00416 KnownVV.PutY(l, true); 00417 } else { 00418 KnownVV.PutY(l, false); 00419 } 00420 } 00421 } 00422 00423 void TMAGFitBern::SaveTxt(const TStr& FNm) { 00424 const int NNodes = Param.GetNodes(); 00425 const int NAttrs = Param.GetAttrs(); 00426 const TFltV MuV = GetMuV(); 00427 TMAGAffMtxV MtxV; 00428 Param.GetMtxV(MtxV); 00429 00430 FILE *fp = fopen(FNm.GetCStr(), "w"); 00431 for(int l = 0; l < NAttrs; l++) { 00432 fprintf(fp, "%.4f\t", double(MuV[l])); 00433 for(int row = 0; row < 2; row++) { 00434 for(int col = 0; col < 2; col++) { 00435 fprintf(fp, " %.4f", double(MtxV[l].At(row, col))); 00436 } 00437 fprintf(fp, (row == 0) ? ";" : "\n"); 00438 } 00439 } 00440 fclose(fp); 00441 00442 fp = fopen((FNm + "f").CStr(), "w"); 00443 for(int i = 0; i < NNodes; i++) { 00444 for(int l = 0; l < NAttrs; l++) { 00445 fprintf(fp, "%f ", double(PhiVV(i, l))); 00446 } 00447 fprintf(fp, "\n"); 00448 } 00449 fclose(fp); 00450 } 00451 00452 void TMAGFitBern::Init(const TFltV& MuV, const TMAGAffMtxV& AffMtxV) { 00453 TMAGNodeBern DistParam(MuV); 00454 Param.SetNodeAttr(DistParam); 00455 Param.SetMtxV(AffMtxV); 00456 00457 const int NNodes = Param.GetNodes(); 00458 const int NAttrs = Param.GetAttrs(); 00459 00460 PhiVV.Gen(NNodes, NAttrs); 00461 KnownVV.Gen(NNodes, NAttrs); 00462 KnownVV.PutAll(false); 00463 } 00464 00465 #if 0 00466 void TMAGFitBern::PerturbInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const double& PerturbRate) { 00467 IAssert(PerturbRate < 1.0); 00468 00469 TFltV InitMuV = MuV; //InitMuV.PutAll(0.5); 00470 TMAGNodeBern DistParam(InitMuV); 00471 Param.SetMtxV(AffMtxV); 00472 TRnd& Rnd = TMAGNodeBern::Rnd; 00473 TMAGAffMtxV PerturbMtxV = AffMtxV; 00474 00475 const int NNodes = Param.GetNodes(); 00476 const int NAttrs = Param.GetAttrs(); 00477 00478 for(int l = 0; l < NAttrs; l++) { 00479 double Mu = MuV[l] + PerturbRate * (Rnd.GetUniDev() - 0.5) * 2; 00480 // double Mu = 0.5; 00481 if(Mu < 0.01) { Mu = 0.01; } 00482 if(Mu > 0.99) { Mu = 0.99; } 00483 DistParam.SetMu(l, Mu); 00484 // PhiVV.PutY(l, MuV[l] + Perturb); 00485 TMAGAffMtx AffMtx(AffMtxV[l]); 00486 for(int p = 0; p < 4; p++) { 00487 AffMtx.At(p) += PerturbRate * (Rnd.GetUniDev() - 0.5) * 2; 00488 if(AffMtx.At(p) < 0.05) { AffMtx.At(p) = 0.05; } 00489 if(AffMtx.At(p) > 0.95) { AffMtx.At(p) = 0.95; } 00490 } 00491 AffMtx.At(0, 1) = AffMtx.At(1, 0); 00492 PerturbMtxV[l] = AffMtx; 00493 } 00494 // NormalizeAffMtxV(PerturbMtxV); 00495 00496 printf("\n"); 00497 for(int l = 0; l < NAttrs; l++) { 00498 printf("Mu = %.3f ", DistParam.GetMu(l)); 00499 printf("AffMtx = %s\n", PerturbMtxV[l].GetMtxStr().GetCStr()); 00500 } 00501 Param.SetMtxV(PerturbMtxV); 00502 Param.SetNodeAttr(DistParam); 00503 00504 PhiVV.Gen(NNodes, NAttrs); 00505 KnownVV.Gen(NNodes, NAttrs); 00506 KnownVV.PutAll(false); 00507 } 00508 #endif // 0 00509 00510 void TMAGFitBern::RandomInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const int& Seed) { 00511 TRnd& Rnd = TMAGNodeBern::Rnd; 00512 Rnd.PutSeed(Seed); 00513 00514 TFltV InitMuV = MuV; InitMuV.PutAll(0.5); 00515 TMAGNodeBern DistParam(InitMuV); 00516 Param.SetMtxV(AffMtxV); 00517 00518 const int NNodes = Param.GetNodes(); 00519 const int NAttrs = Param.GetAttrs(); 00520 00521 PhiVV.Gen(NNodes, NAttrs); 00522 KnownVV.Gen(NNodes, NAttrs); 00523 KnownVV.PutAll(false); 00524 00525 for(int i = 0; i < NNodes; i++) { 00526 for(int l = 0; l < NAttrs; l++) { 00527 PhiVV.At(i, l) = Rnd.GetUniDev(); 00528 // PhiVV.At(i, l) = 0.5; 00529 } 00530 } 00531 00532 TMAGAffMtxV RndMtxV = AffMtxV; 00533 for(int l = 0; l < NAttrs; l++) { 00534 for(int p = 0; p < 4; p++) { 00535 RndMtxV[l].At(p) = TMAGNodeBern::Rnd.GetUniDev(); 00536 if(RndMtxV[l].At(p) < 0.1) { RndMtxV[l].At(p) = 0.1; } 00537 if(RndMtxV[l].At(p) > 0.9) { RndMtxV[l].At(p) = 0.9; } 00538 } 00539 RndMtxV[l].At(0, 1) = RndMtxV[l].At(1, 0); 00540 } 00541 00542 printf("\n"); 00543 for(int l = 0; l < NAttrs; l++) { 00544 printf("AffMtx = %s\n", RndMtxV[l].GetMtxStr().GetCStr()); 00545 } 00546 Param.SetMtxV(RndMtxV); 00547 Param.SetNodeAttr(DistParam); 00548 } 00549 00550 const double TMAGFitBern::GetInCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const { 00551 return (PhiVV.At(i, l) * Theta.At(0, A) + (1.0 - PhiVV.At(i, l)) * Theta.At(1, A)); 00552 } 00553 00554 const double TMAGFitBern::GetOutCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const { 00555 return (PhiVV.At(j, l) * Theta.At(A, 0) + (1.0 - PhiVV.At(j, l)) * Theta.At(A, 1)); 00556 } 00557 00558 const double TMAGFitBern::GetAvgInCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const { 00559 const int NNodes = Param.GetNodes(); 00560 const double Mu_l = AvgPhiV[AId] / double(NNodes); 00561 return (Mu_l * Theta.At(0, A) + (1.0 - Mu_l) * Theta.At(1, A)); 00562 } 00563 00564 const double TMAGFitBern::GetAvgOutCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const { 00565 const int NNodes = Param.GetNodes(); 00566 const double Mu_l = AvgPhiV[AId] / double(NNodes); 00567 return (Mu_l * Theta.At(A, 0) + (1.0 - Mu_l) * Theta.At(A, 1)); 00568 } 00569 00570 const double TMAGFitBern::GetProbPhi(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2) const { 00571 double Prob1 = (Attr1 == 0) ? double(PhiVV.At(NId1, AId)) : (1.0 - PhiVV.At(NId1, AId)); 00572 double Prob2 = (Attr2 == 0) ? double(PhiVV.At(NId2, AId)) : (1.0 - PhiVV.At(NId2, AId)); 00573 return (Prob1 * Prob2); 00574 } 00575 00576 const double TMAGFitBern::GetProbMu(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2, const bool Left, const bool Right) const { 00577 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00578 // double Mu = DistParam.GetMu(AId); 00579 double Mu = AvgPhiV[AId] / double(Param.GetNodes()); 00580 double Prob1 = (Left) ? double(PhiVV.At(NId1, AId)) : double(Mu); 00581 double Prob2 = (Right)? double(PhiVV.At(NId2, AId)) : double(Mu); 00582 Prob1 = (Attr1 == 0) ? Prob1 : 1.0 - Prob1; 00583 Prob2 = (Attr2 == 0) ? Prob2 : 1.0 - Prob2; 00584 return (Prob1 * Prob2); 00585 } 00586 00587 const double TMAGFitBern::GetThetaLL(const int& NId1, const int& NId2, const int& AId) const { 00588 double LL = 0.0; 00589 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00590 for(int A1 = 0; A1 < 2; A1++) { 00591 for(int A2 = 0; A2 < 2; A2++) { 00592 LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2); 00593 } 00594 } 00595 return log(LL); 00596 } 00597 00598 const double TMAGFitBern::GetAvgThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const { 00599 double LL = 0.0; 00600 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00601 for(int A1 = 0; A1 < 2; A1++) { 00602 for(int A2 = 0; A2 < 2; A2++) { 00603 LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2); 00604 } 00605 } 00606 return log(LL); 00607 } 00608 00609 const double TMAGFitBern::GetSqThetaLL(const int& NId1, const int& NId2, const int& AId) const { 00610 double LL = 0.0; 00611 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00612 for(int A1 = 0; A1 < 2; A1++) { 00613 for(int A2 = 0; A2 < 2; A2++) { 00614 LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2) * Mtx.At(A1, A2); 00615 } 00616 } 00617 return log(LL); 00618 } 00619 00620 const double TMAGFitBern::GetAvgSqThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const { 00621 double LL = 0.0; 00622 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00623 for(int A1 = 0; A1 < 2; A1++) { 00624 for(int A2 = 0; A2 < 2; A2++) { 00625 LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2) * Mtx.At(A1, A2); 00626 } 00627 } 00628 return log(LL); 00629 } 00630 00631 const double TMAGFitBern::GetProdLinWeight(const int& NId1, const int& NId2) const { 00632 const int NAttrs = Param.GetAttrs(); 00633 double LL = 0.0; 00634 00635 for(int l = 0; l < NAttrs; l++) { 00636 LL += GetThetaLL(NId1, NId2, l); 00637 } 00638 // return LL; 00639 return LL + log(NormConst); 00640 } 00641 00642 const double TMAGFitBern::GetAvgProdLinWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const { 00643 const int NAttrs = Param.GetAttrs(); 00644 double LL = 0.0; 00645 00646 for(int l = 0; l < NAttrs; l++) { 00647 LL += GetAvgThetaLL(NId1, NId2, l, Left, Right); 00648 } 00649 // return LL; 00650 return LL + log(NormConst); 00651 } 00652 00653 const double TMAGFitBern::GetProdSqWeight(const int& NId1, const int& NId2) const { 00654 const int NAttrs = Param.GetAttrs(); 00655 double LL = 0.0; 00656 00657 for(int l = 0; l < NAttrs; l++) { 00658 LL += GetSqThetaLL(NId1, NId2, l); 00659 } 00660 // return LL; 00661 return LL + 2 * log(NormConst); 00662 } 00663 00664 const double TMAGFitBern::GetAvgProdSqWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const { 00665 const int NAttrs = Param.GetAttrs(); 00666 double LL = 0.0; 00667 00668 for(int l = 0; l < NAttrs; l++) { 00669 LL += GetAvgSqThetaLL(NId1, NId2, l, Left, Right); 00670 } 00671 // return LL; 00672 return LL + 2 * log(NormConst); 00673 } 00674 00675 const double LogSumExp(const double LogVal1, const double LogVal2) { 00676 double MaxExp = (LogVal1 > LogVal2) ? LogVal1 : LogVal2; 00677 double Sum = exp(LogVal1 - MaxExp) + exp(LogVal2 - MaxExp); 00678 return (log(Sum) + MaxExp); 00679 } 00680 00681 const double LogSumExp(const TFltV& LogValV) { 00682 const int Len = LogValV.Len(); 00683 double MaxExp = -DBL_MAX; 00684 00685 for(int i = 0; i < Len; i++) { 00686 if(MaxExp < LogValV[i]) { MaxExp = LogValV[i]; } 00687 } 00688 00689 double Sum = 0.0; 00690 for(int i = 0; i < Len; i++) { 00691 Sum += exp(LogValV[i] - MaxExp); 00692 } 00693 00694 return (log(Sum) + MaxExp); 00695 } 00696 00697 const double LogSumExp(const double *LogValArray, const int Len) { 00698 TFltV TmpV(Len); 00699 for(int i = 0; i < Len; i++) { TmpV[i] = LogValArray[i]; } 00700 return LogSumExp(TmpV); 00701 } 00702 00703 const double TMAGFitBern::GradPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& DeltaQ, const TFltVV& CntVV) { 00704 const int NAttrs = CntVV.GetYDim(); 00705 double Grad = DeltaQ - log(x) + log(1.0-x); 00706 00707 for(int l = 0; l < NAttrs; l++) { 00708 if(l == AId) { continue; } 00709 const double C0 = PhiVV(NId, l); 00710 const double C1 = 1.0 - C0; 00711 Grad -= Lambda * C0 * log(CntVV(0, l) + C0 * x); 00712 Grad -= Lambda * C1 * log(CntVV(1, l) + C1 * x); 00713 Grad += Lambda * C0 * log(CntVV(2, l) + C0 * (1-x)); 00714 Grad += Lambda * C1 * log(CntVV(3, l) + C1 * (1-x)); 00715 Grad -= Lambda * log(CntVV(0, l) + CntVV(1, l) + x); 00716 Grad += Lambda * log(CntVV(2, l) + CntVV(3, l) + (1-x)); 00717 } 00718 00719 return Grad; 00720 } 00721 00722 const double TMAGFitBern::ObjPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& Q0, const double& Q1, const TFltVV& CntVV) { 00723 const int NAttrs = CntVV.GetYDim(); 00724 double Val = x*(Q0 - log(x)) + (1-x)*(Q1 - log(1.0-x)); 00725 00726 for(int l = 0; l < NAttrs; l++) { 00727 if(l == AId) { continue; } 00728 const double C0 = PhiVV(NId, l); 00729 const double C1 = 1.0 - C0; 00730 Val -= Lambda * (CntVV(0, l) + C0 * x) * log(CntVV(0, l) + C0 * x); 00731 Val -= Lambda * (CntVV(1, l) + C1 * x) * log(CntVV(1, l) + C1 * x); 00732 Val -= Lambda * (CntVV(2, l) + C0 * (1-x)) * log(CntVV(2, l) + C0 * (1-x)); 00733 Val -= Lambda * (CntVV(3, l) + C1 * (1-x)) * log(CntVV(3, l) + C1 * (1-x)); 00734 Val += Lambda * (CntVV(0, l) + CntVV(1, l) + x) * log(CntVV(0, l) + CntVV(1, l) + x); 00735 Val += Lambda * (CntVV(2, l) + CntVV(3, l) + 1 - x) * log(CntVV(2, l) + CntVV(3, l) + (1-x)); 00736 00737 if(!(CntVV(0, l) > 0)) printf("CntVV(0, %d) = %.2f\n", l, double(CntVV(0, l))); 00738 if(!(CntVV(1, l) > 0)) printf("CntVV(1, %d) = %.2f\n", l, double(CntVV(1, l))); 00739 if(!(CntVV(2, l) > 0)) printf("CntVV(2, %d) = %.2f\n", l, double(CntVV(2, l))); 00740 if(!(CntVV(3, l) > 0)) printf("CntVV(3, %d) = %.2f\n", l, double(CntVV(3, l))); 00741 } 00742 00743 return Val; 00744 } 00745 00746 const double TMAGFitBern::GetEstNoEdgeLL(const int& NId, const int& AId) const { 00747 // const int NNodes = Param.GetNodes(); 00748 // const int NAttrs = Param.GetAttrs(); 00749 00750 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00751 double LL = 0.0; 00752 00753 return LL; 00754 } 00755 00756 const double TMAGFitBern::UpdatePhi(const int& NId, const int& AId, double& Phi) { 00757 TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId); 00758 TMAGAffMtx SqTheta(Theta); 00759 const int NNodes = Param.GetNodes(); 00760 // const int NAttrs = Param.GetAttrs(); 00761 Theta.GetLLMtx(LLTheta); 00762 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00763 const double Mu = DistParam.GetMu(AId); 00764 00765 for(int i = 0; i < Theta.Len(); i++) { 00766 SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i); 00767 } 00768 00769 // Using log-sum-exp trick 00770 double EdgeQ[2], NonEdgeQ[2]; 00771 double MaxExp[2]; 00772 TFltV NonEdgeLLV[2]; 00773 for(int i = 0; i < 2; i++) { 00774 EdgeQ[i] = 0.0; 00775 NonEdgeQ[i] = 0.0; 00776 MaxExp[i] = -DBL_MAX; 00777 NonEdgeLLV[i].Gen(4 * NNodes, 0); 00778 } 00779 00780 for(int j = 0; j < NNodes; j++) { 00781 if(j == NId) { continue; } 00782 00783 if(Graph->IsEdge(NId, j)) { 00784 EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta); 00785 EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta); 00786 } else { 00787 double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId); 00788 double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId); 00789 00790 for(int i = 0; i < 2; i++) { 00791 NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta))); 00792 NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5)); 00793 } 00794 } 00795 00796 if(Graph->IsEdge(j, NId)) { 00797 EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta); 00798 EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta); 00799 } else { 00800 double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId); 00801 double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId); 00802 00803 for(int i = 0; i < 2; i++) { 00804 NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta))); 00805 NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5)); 00806 } 00807 } 00808 } 00809 00810 NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]); 00811 NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]); 00812 00813 double Q[2]; 00814 Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]); 00815 Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]); 00816 // double Q = Q1 - Q0; 00817 // printf(" [Phi_{%d}{%d}] :: Q0 = %f, Q1 = %f\n", NId, AId, Q0, Q1); 00818 00819 // Phi = 1.0 / (1.0 + exp(Q)); 00820 Phi = Q[0] - LogSumExp(Q, 2); 00821 Phi = exp(Phi); 00822 00823 return Phi - PhiVV.At(NId, AId); 00824 } 00825 00826 00827 const double TMAGFitBern::UpdatePhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi) { 00828 TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId); 00829 TMAGAffMtx SqTheta(Theta); 00830 const int NNodes = Param.GetNodes(); 00831 const int NAttrs = Param.GetAttrs(); 00832 Theta.GetLLMtx(LLTheta); 00833 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00834 const double Mu = DistParam.GetMu(AId); 00835 00836 for(int i = 0; i < Theta.Len(); i++) { 00837 SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i); 00838 } 00839 00840 // Using log-sum-exp trick 00841 double EdgeQ[2], NonEdgeQ[2]; 00842 double MaxExp[2]; 00843 TFltV NonEdgeLLV[2]; 00844 TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0); 00845 for(int i = 0; i < 2; i++) { 00846 EdgeQ[i] = 0.0; 00847 NonEdgeQ[i] = 0.0; 00848 MaxExp[i] = -DBL_MAX; 00849 NonEdgeLLV[i].Gen(4 * NNodes, 0); 00850 } 00851 00852 for(int j = 0; j < NNodes; j++) { 00853 if(j == NId) { continue; } 00854 00855 for(int l = 0; l < NAttrs; l++) { 00856 if(l == AId) { continue; } 00857 CntVV(0, l) = CntVV(0, l) + PhiVV(j, AId) * PhiVV(j, l); 00858 CntVV(1, l) = CntVV(1, l) + PhiVV(j, AId) * (1.0-PhiVV(j, l)); 00859 CntVV(2, l) = CntVV(2, l) + (1.0-PhiVV(j, AId)) * PhiVV(j, l); 00860 CntVV(3, l) = CntVV(3, l) + (1.0-PhiVV(j, AId)) * (1.0-PhiVV(j, l)); 00861 } 00862 00863 if(Graph->IsEdge(NId, j)) { 00864 EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta); 00865 EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta); 00866 } else { 00867 double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId); 00868 double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId); 00869 00870 for(int i = 0; i < 2; i++) { 00871 NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta))); 00872 NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5)); 00873 } 00874 } 00875 00876 if(Graph->IsEdge(j, NId)) { 00877 EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta); 00878 EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta); 00879 } else { 00880 double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId); 00881 double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId); 00882 00883 for(int i = 0; i < 2; i++) { 00884 NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta))); 00885 NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5)); 00886 } 00887 } 00888 } 00889 00890 NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]); 00891 NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]); 00892 00893 double Q[2]; 00894 Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]); 00895 Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]); 00896 double DeltaQ = Q[0] - Q[1]; 00897 00898 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9}; 00899 double x[] = {PhiVV(NId, AId)}; 00900 // for(int n = 0; n < 5; n++) { 00901 for(int n = 0; n < 1; n++) { 00902 // double LrnRate = 0.0002; 00903 double LrnRate = 0.001; 00904 for(int step = 0; step < 200; step++) { 00905 double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV); 00906 if(Grad > 0.0) { x[n] += LrnRate; } 00907 else { x[n] -= LrnRate; } 00908 if(x[n] > 0.9999) { x[n] = 0.9999; } 00909 if(x[n] < 0.0001) { x[n] = 0.0001; } 00910 LrnRate *= 0.995; 00911 } 00912 } 00913 00914 double MaxVal = -DBL_MAX; 00915 int MaxX = -1; 00916 // for(int n = 0; n < 5; n++) { 00917 for(int n = 0; n < 1; n++) { 00918 double Val = ObjPhiMI(x[n], NId, AId, Lambda, Q[0], Q[1], CntVV); 00919 if(Val > MaxVal) { 00920 MaxVal = Val; 00921 MaxX = n; 00922 } 00923 } 00924 IAssert(MaxX >= 0); 00925 00926 Phi = x[MaxX]; 00927 00928 return Phi - PhiVV.At(NId, AId); 00929 } 00930 00931 00932 const double TMAGFitBern::UpdateApxPhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi, TFltVV& ProdVV) { 00933 TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId); 00934 const int NNodes = Param.GetNodes(); 00935 const int NAttrs = Param.GetAttrs(); 00936 Theta.GetLLMtx(LLTheta); 00937 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00938 const double Mu = DistParam.GetMu(AId); 00939 00940 TMAGAffMtx SqTheta(Theta); 00941 for(int i = 0; i < Theta.Len(); i++) { 00942 SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i); 00943 } 00944 00945 TFltV ProdV; ProdVV.GetRow(NId, ProdV); 00946 ProdV[0] -= GetAvgThetaLL(NId, NId, AId, true, false); 00947 ProdV[1] -= GetAvgThetaLL(NId, NId, AId, false, true); 00948 ProdV[2] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, true, false); 00949 ProdV[3] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, false, true); 00950 00951 // Using log-sum-exp trick 00952 double EdgeQ[2]; 00953 double MaxExp[2]; 00954 TFltV NonEdgeLLV[2]; 00955 TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0); 00956 for(int i = 0; i < 2; i++) { 00957 EdgeQ[i] = 0.0; 00958 MaxExp[i] = -DBL_MAX; 00959 NonEdgeLLV[i].Gen(4 * NNodes, 0); 00960 } 00961 00962 for(int F = 0; F < 2; F++) { 00963 NonEdgeLLV[F].Add(ProdV[0] + log(GetAvgOutCoeff(NId, AId, F, Theta))); 00964 NonEdgeLLV[F].Add(ProdV[1] + log(GetAvgInCoeff(NId, AId, F, Theta))); 00965 NonEdgeLLV[F].Add(ProdV[2] + log(GetAvgOutCoeff(NId, AId, F, SqTheta))); 00966 NonEdgeLLV[F].Add(ProdV[3] + log(GetAvgInCoeff(NId, AId, F, SqTheta))); 00967 } 00968 EdgeQ[0] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[0])); 00969 EdgeQ[1] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[1])); 00970 00971 00972 for(int l = 0; l < NAttrs; l++) { 00973 if(l == AId) { continue; } 00974 int BgId = (AId > l) ? AId : l; 00975 int SmId = (AId + l) - BgId; 00976 int SmL = (l < AId) ? 1 : 0; 00977 BgId *= 4; 00978 CntVV(0, l) = AvgPhiPairVV(SmId, BgId) - PhiVV(NId, AId) * PhiVV(NId, l); 00979 CntVV(1+SmL, l) = AvgPhiPairVV(SmId, BgId+1+SmL) - PhiVV(NId, AId) * (1.0-PhiVV(NId, l)); 00980 CntVV(2-SmL, l) = AvgPhiPairVV(SmId, BgId+2-SmL) - (1.0-PhiVV(NId, AId)) * PhiVV(NId, l); 00981 CntVV(3, l) = AvgPhiPairVV(SmId, BgId+3) - (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, l)); 00982 } 00983 00984 TNGraph::TNodeI NI = Graph->GetNI(NId); 00985 for(int d = 0; d < NI.GetOutDeg(); d++) { 00986 int Out = NI.GetOutNId(d); 00987 if(NId == Out) { continue; } 00988 double LinW = GetProdLinWeight(NId, Out) - GetThetaLL(NId, Out, AId); 00989 double SqW = GetProdSqWeight(NId, Out) - GetSqThetaLL(NId, Out, AId); 00990 00991 for(int F = 0; F < 2; F++) { 00992 EdgeQ[F] += GetOutCoeff(NId, Out, AId, F, LLTheta); 00993 EdgeQ[F] += exp(LinW + log(GetOutCoeff(NId, Out, AId, F, Theta))); 00994 EdgeQ[F] += 0.5 * exp(SqW + log(GetOutCoeff(NId, Out, AId, F, SqTheta))); 00995 } 00996 } 00997 for(int d = 0; d < NI.GetInDeg(); d++) { 00998 int In = NI.GetInNId(d); 00999 if(NId == In) { continue; } 01000 double LinW = GetProdLinWeight(In, NId) - GetThetaLL(In, NId, AId); 01001 double SqW = GetProdSqWeight(In, NId) - GetSqThetaLL(In, NId, AId); 01002 01003 for(int F = 0; F < 2; F++) { 01004 EdgeQ[F] += GetInCoeff(In, NId, AId, F, LLTheta); 01005 EdgeQ[F] += exp(LinW + log(GetInCoeff(In, NId, AId, F, Theta))); 01006 EdgeQ[F] += 0.5 * exp(SqW + log(GetInCoeff(In, NId, AId, F, SqTheta))); 01007 } 01008 } 01009 01010 EdgeQ[0] += log(Mu); 01011 EdgeQ[1] += log(1.0 - Mu); 01012 double DeltaQ = EdgeQ[0] - EdgeQ[1]; 01013 // printf("(%d, %d) :: Q[0] = %f, Q[1] = %f\n", NId, AId, EdgeQ[0], EdgeQ[1]); 01014 01015 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9}; 01016 double x[] = {PhiVV(NId, AId)}; 01017 TFltV ObjValV; ObjValV.Gen(60, 0); 01018 // for(int n = 0; n < 5; n++) { 01019 for(int n = 0; n < 1; n++) { 01020 // double LrnRate = 0.0002; 01021 double LrnRate = 0.001; 01022 for(int step = 0; step < 50; step++) { 01023 // for(int step = 0; step < 10; step++) { 01024 double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV); 01025 if(Grad > 0.0) { x[n] += LrnRate; } 01026 else { x[n] -= LrnRate; } 01027 if(x[n] > 0.9999) { x[n] = 0.9999; } 01028 if(x[n] < 0.0001) { x[n] = 0.0001; } 01029 if(x[n] == 0.9999 || x[n] == 0.0001) { 01030 break; 01031 } 01032 LrnRate *= 0.995; 01033 } 01034 ObjValV.Add(x[n]); 01035 // ObjValV.Add(PhiVV(NId, AId)); 01036 } 01037 01038 double MaxVal = -DBL_MAX; 01039 int MaxX = -1; 01040 // for(int n = 0; n < 5; n++) { 01041 for(int n = 0; n < ObjValV.Len(); n++) { 01042 double Val = ObjPhiMI(ObjValV[n], NId, AId, Lambda, EdgeQ[0], EdgeQ[1], CntVV); 01043 if(Val > MaxVal) { 01044 MaxVal = Val; 01045 MaxX = n; 01046 } else if(MaxX < 0) { 01047 printf("(%d, %d) : %f Q[0] = %f Q[1] = %f Val = %f\n", NId, AId, double(x[n]), double(EdgeQ[0]), double(EdgeQ[1]), Val); 01048 } 01049 } 01050 IAssert(MaxX >= 0); 01051 01052 Phi = ObjValV[MaxX]; 01053 01054 return Phi - PhiVV.At(NId, AId); 01055 } 01056 01057 01058 01059 double TMAGFitBern::DoEStepOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) { 01060 const int NNodes = Param.GetNodes(); 01061 const int NAttrs = Param.GetAttrs(); 01062 double MaxDelta = 0, L1 = 0; 01063 double Val; 01064 TFltIntIntTrV NewVal; 01065 int RndCount = 0; 01066 // double OldMI = 0.0, NewMI = 0.0; 01067 TFltV MuV(NAttrs); MuV.PutAll(0.0); 01068 TIntV NIndV(NNodes), AIndV(NAttrs); 01069 01070 // Update Phi 01071 /* 01072 for(int i = 0; i < NNodes; i++) { NIndV[i] = i; } 01073 for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; } 01074 if(Randomized) { 01075 NIndV.Shuffle(TMAGNodeBern::Rnd); 01076 AIndV.Shuffle(TMAGNodeBern::Rnd); 01077 } 01078 */ 01079 01080 NewVal.Gen(NAttrs * 2); 01081 for(int i = 0; i < NNodes; i++) { 01082 // const int NId = NIndV[i]%NNodes; 01083 for(int l = 0; l < NAttrs * 2; l++) { 01084 const int NId = TMAGNodeBern::Rnd.GetUniDevInt(NNodes); 01085 const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs); 01086 // const int AId = AIndV[l]%NAttrs; 01087 // double Delta = UpdatePhi(NId, AId, Val); 01088 double Delta = 0.0; 01089 if(KnownVV(NId, AId)) { 01090 Val = PhiVV.At(NId, AId); 01091 } else { 01092 Delta = UpdatePhiMI(Lambda, NId, AId, Val); 01093 } 01094 01095 // PhiVV.At(NId, AId) = Val; 01096 NewVal[l] = TFltIntIntTr(Val, NId, AId); 01097 01098 // MuV[AId] = MuV[AId] + Val; 01099 if(fabs(Delta) > MaxDelta) { 01100 MaxDelta = fabs(Delta); 01101 } 01102 if(Val > 0.3 && Val < 0.7) { RndCount++; } 01103 } 01104 01105 for(int l = 0; l < NAttrs * 2; l++) { 01106 const int NId = NewVal[l].Val2; 01107 const int AId = NewVal[l].Val3; 01108 PhiVV.At(NId, AId) = NewVal[l].Val1; 01109 } 01110 } 01111 for(int i = 0; i < NNodes; i++) { 01112 for(int l = 0; l < NAttrs; l++) { 01113 MuV[l] = MuV[l] + PhiVV.At(i, l); 01114 } 01115 } 01116 for(int l = 0; l < NAttrs; l++) { 01117 MuV[l] = MuV[l] / double(NNodes); 01118 } 01119 01120 TFltV SortMuV = MuV; 01121 double Avg = 0.0; 01122 SortMuV.Sort(false); 01123 for(int l = 0; l < NAttrs; l++) { 01124 printf(" F[%d] = %.3f", l, double(MuV[l])); 01125 Avg += SortMuV[l]; 01126 L1 += fabs(TrueMuV[l] - SortMuV[l]); 01127 } 01128 printf("\n"); 01129 printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs)); 01130 printf(" Avg = %.3f\n", Avg / double(NAttrs)); 01131 L1 /= double(NAttrs); 01132 01133 return L1; 01134 } 01135 01136 01137 double TMAGFitBern::DoEStepApxOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) { 01138 const int NNodes = Param.GetNodes(); 01139 const int NAttrs = Param.GetAttrs(); 01140 double MaxDelta = 0, L1 = 0; 01141 double Val; 01142 TFltIntIntTrV NewVal; 01143 int RndCount = 0; 01144 // double OldMI = 0.0, NewMI = 0.0; 01145 TFltV MuV(NAttrs); MuV.PutAll(0.0); 01146 TFltVV ProdVV(NNodes, 4); ProdVV.PutAll(0.0); 01147 TIntV NIndV(NNodes), AIndV(NAttrs); 01148 01149 // Update Phi 01150 /* 01151 for(int i = 0; i < NNodes; i++) { NIndV[i] = i; } 01152 for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; } 01153 if(Randomized) { 01154 NIndV.Shuffle(TMAGNodeBern::Rnd); 01155 AIndV.Shuffle(TMAGNodeBern::Rnd); 01156 } 01157 */ 01158 01159 AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0); 01160 AvgPhiPairVV.Gen(NAttrs, 4*NAttrs); AvgPhiPairVV.PutAll(0.0); 01161 for(int i = 0; i < NNodes; i++) { 01162 for(int l = 0; l < NAttrs; l++) { 01163 for(int p = l+1; p < NAttrs; p++) { 01164 int index = 4 * p; 01165 AvgPhiPairVV(l, index) += PhiVV(i, l) * PhiVV(i, p); 01166 AvgPhiPairVV(l, index+1) += PhiVV(i, l) * (1.0-PhiVV(i, p)); 01167 AvgPhiPairVV(l, index+2) += (1.0-PhiVV(i, l)) * PhiVV(i, p); 01168 AvgPhiPairVV(l, index+3) += (1.0-PhiVV(i, l)) * (1.0-PhiVV(i, p)); 01169 } 01170 AvgPhiV[l] += PhiVV(i, l); 01171 } 01172 } 01173 for(int i = 0; i < NNodes; i++) { 01174 ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false); 01175 ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true); 01176 ProdVV(i, 2) = GetAvgProdSqWeight(i, i, true, false); 01177 ProdVV(i, 3) = GetAvgProdSqWeight(i, i, false, true); 01178 } 01179 01180 const int Iter = 3; 01181 int NId; 01182 01183 NewVal.Gen(NAttrs * Iter); 01184 for(int i = 0; i < NNodes * Iter; i++) { 01185 for(int l = 0; l < NAttrs; l++) { 01186 const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs); 01187 double Delta = 0.0; 01188 if(KnownVV(NId, AId)) { 01189 Val = PhiVV.At(NId, AId); 01190 } else { 01191 Delta = UpdateApxPhiMI(Lambda, NId, AId, Val, ProdVV); 01192 } 01193 01194 // PhiVV.At(NId, AId) = Val; 01195 NewVal[l] = TFltIntIntTr(Val, NId, AId); 01196 01197 // MuV[AId] = MuV[AId] + Val; 01198 if(fabs(Delta) > MaxDelta) { 01199 MaxDelta = fabs(Delta); 01200 } 01201 if(Val > 0.3 && Val < 0.7) { RndCount++; } 01202 } 01203 01204 for(int l = 0; l < NAttrs; l++) { 01205 const int NId = NewVal[l].Val2; 01206 const int AId = NewVal[l].Val3; 01207 01208 ProdVV(NId, 0) -= GetAvgThetaLL(NId, NId, AId, true, false); 01209 ProdVV(NId, 1) -= GetAvgThetaLL(NId, NId, AId, false, true); 01210 ProdVV(NId, 2) -= GetAvgSqThetaLL(NId, NId, AId, true, false); 01211 ProdVV(NId, 3) -= GetAvgSqThetaLL(NId, NId, AId, false, true); 01212 for(int p = 0; p < NAttrs; p++) { 01213 if(p > AId) { 01214 int index = 4 * p; 01215 AvgPhiPairVV(AId, index) -= PhiVV(NId, AId) * PhiVV(NId, p); 01216 AvgPhiPairVV(AId, index+1) -= PhiVV(NId, AId) * (1.0-PhiVV(NId, p)); 01217 AvgPhiPairVV(AId, index+2) -= (1.0-PhiVV(NId, AId)) * PhiVV(NId, p); 01218 AvgPhiPairVV(AId, index+3) -= (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p)); 01219 } else if (p < AId) { 01220 int index = 4 * AId; 01221 AvgPhiPairVV(p, index) -= PhiVV(NId, p) * PhiVV(NId, AId); 01222 AvgPhiPairVV(p, index+1) -= PhiVV(NId, p) * (1.0-PhiVV(NId, AId)); 01223 AvgPhiPairVV(p, index+2) -= (1.0-PhiVV(NId, p)) * PhiVV(NId, AId); 01224 AvgPhiPairVV(p, index+3) -= (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId)); 01225 } 01226 } 01227 AvgPhiV[AId] -= PhiVV(NId, AId); 01228 01229 PhiVV.At(NId, AId) = NewVal[l].Val1; 01230 01231 ProdVV(NId, 0) += GetAvgThetaLL(NId, NId, AId, true, false); 01232 ProdVV(NId, 1) += GetAvgThetaLL(NId, NId, AId, false, true); 01233 ProdVV(NId, 2) += GetAvgSqThetaLL(NId, NId, AId, true, false); 01234 ProdVV(NId, 3) += GetAvgSqThetaLL(NId, NId, AId, false, true); 01235 for(int p = 0; p < NAttrs; p++) { 01236 if(p > AId) { 01237 int index = 4 * p; 01238 AvgPhiPairVV(AId, index) += PhiVV(NId, AId) * PhiVV(NId, p); 01239 AvgPhiPairVV(AId, index+1) += PhiVV(NId, AId) * (1.0-PhiVV(NId, p)); 01240 AvgPhiPairVV(AId, index+2) += (1.0-PhiVV(NId, AId)) * PhiVV(NId, p); 01241 AvgPhiPairVV(AId, index+3) += (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p)); 01242 } else if (p < AId) { 01243 int index = 4 * AId; 01244 AvgPhiPairVV(p, index) += PhiVV(NId, p) * PhiVV(NId, AId); 01245 AvgPhiPairVV(p, index+1) += PhiVV(NId, p) * (1.0-PhiVV(NId, AId)); 01246 AvgPhiPairVV(p, index+2) += (1.0-PhiVV(NId, p)) * PhiVV(NId, AId); 01247 AvgPhiPairVV(p, index+3) += (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId)); 01248 } 01249 } 01250 AvgPhiV[AId] += PhiVV(NId, AId); 01251 } 01252 } 01253 01254 for(int l = 0; l < NAttrs; l++) { 01255 MuV[l] = AvgPhiV[l] / double(NNodes); 01256 } 01257 01258 TFltV SortMuV = MuV; 01259 double Avg = 0.0; 01260 // SortMuV.Sort(false); 01261 for(int l = 0; l < NAttrs; l++) { 01262 printf(" F[%d] = %.3f", l, double(MuV[l])); 01263 Avg += SortMuV[l]; 01264 // L1 += fabs(TrueMuV[l] - SortMuV[l]); 01265 } 01266 printf("\n"); 01267 printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs)); 01268 printf(" Avg = %.3f\n", Avg / double(NAttrs)); 01269 // printf(" Linf = %f\n", MaxDelta); 01270 // L1 /= double(NAttrs); 01271 01272 return L1; 01273 } 01274 01275 double TMAGFitBern::DoEStep(const TFltV& TrueMuV, const int& NIter, double& LL, const double& Lambda) { 01276 const int NNodes = Param.GetNodes(); 01277 const int NAttrs = Param.GetAttrs(); 01278 TFltVV NewPhiVV(NNodes, NAttrs); 01279 // double MI; 01280 01281 TFltV Delta(NIter); 01282 01283 for(int i = 0; i < NIter; i++) { 01284 TExeTm IterTm; 01285 printf("EStep iteration : %d\n", (i+1)); 01286 if(ESpeedUp) { 01287 Delta[i] = DoEStepApxOneIter(TrueMuV, NewPhiVV, Lambda); 01288 } else { 01289 Delta[i] = DoEStepOneIter(TrueMuV, NewPhiVV, Lambda); 01290 } 01291 // PhiVV = NewPhiVV; 01292 01293 printf(" (Time = %s)\n", IterTm.GetTmStr()); 01294 } 01295 printf("\n"); 01296 01297 NewPhiVV.Clr(); 01298 01299 return Delta.Last(); 01300 } 01301 01302 const double TMAGFitBern::UpdateMu(const int& AId) { 01303 const int NNodes = Param.GetNodes(); 01304 TMAGNodeBern DistParam = Param.GetNodeAttr(); 01305 const double OldMu = DistParam.GetMu(AId); 01306 double NewMu = 0.0; 01307 01308 for(int i = 0; i < NNodes; i++) { 01309 NewMu += PhiVV.At(i, AId); 01310 } 01311 AvgPhiV[AId] = NewMu; 01312 NewMu /= double(NNodes); 01313 01314 printf(" [Posterior Mu] = %.4f\n", NewMu); 01315 01316 double Delta = fabs(NewMu - OldMu); 01317 DistParam.SetMu(AId, NewMu); 01318 Param.SetNodeAttr(DistParam); 01319 01320 return Delta; 01321 } 01322 01323 const void TMAGFitBern::GradAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) { 01324 const int NNodes = Param.GetNodes(); 01325 // const int NAttrs = Param.GetAttrs(); 01326 GradV.PutAll(0.0); 01327 01328 for(int i = 0; i < NNodes; i++) { 01329 for(int j = 0; j < NNodes; j++) { 01330 double Prod = ProdVV(i, j) - GetThetaLL(i, j, AId); 01331 double Sq = SqVV(i, j) - GetSqThetaLL(i, j, AId); 01332 01333 for(int p = 0; p < 4; p++) { 01334 int Ai = p / 2; 01335 int Aj = p % 2; 01336 double Prob = GetProbPhi(i, j, AId, Ai, Aj); 01337 if(Graph->IsEdge(i, j)) { 01338 GradV[p] += Prob / CurMtx.At(p); 01339 } else { 01340 GradV[p] -= Prob * exp(Prod); 01341 GradV[p] -= Prob * exp(Sq) * CurMtx.At(p); 01342 } 01343 } 01344 } 01345 } 01346 } 01347 01348 01349 const void TMAGFitBern::GradApxAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) { 01350 const int NNodes = Param.GetNodes(); 01351 // const int NAttrs = Param.GetAttrs(); 01352 // const int NSq = NNodes * (NNodes - 1); 01353 GradV.PutAll(0.0); 01354 01355 TFltV LogSumV; 01356 for(int p = 0; p < 4; p++) { 01357 int Ai = p / 2; 01358 int Aj = p % 2; 01359 LogSumV.Gen(NNodes * 4, 0); 01360 01361 for(int i = 0; i < NNodes; i++) { 01362 const double LProd = ProdVV(i, 0) - GetAvgThetaLL(i, i, AId, true, false); 01363 const double LSq = SqVV(i, 0) - GetAvgSqThetaLL(i, i, AId, true, false); 01364 const double RProd = ProdVV(i, 1) - GetAvgThetaLL(i, i, AId, false, true); 01365 const double RSq = SqVV(i, 1) - GetAvgSqThetaLL(i, i, AId, false, true); 01366 01367 LogSumV.Add(LProd + log(GetProbMu(i, i, AId, Ai, Aj, true, false))); 01368 LogSumV.Add(LSq + log(GetProbMu(i, i, AId, Ai, Aj, true, false)) + log(CurMtx.At(p))); 01369 LogSumV.Add(RProd + log(GetProbMu(i, i, AId, Ai, Aj, false, true))); 01370 LogSumV.Add(RSq + log(GetProbMu(i, i, AId, Ai, Aj, false, true)) + log(CurMtx.At(p))); 01371 } 01372 double LogSum = LogSumExp(LogSumV); 01373 GradV[p] -= (NNodes - 1) * 0.5 * exp(LogSum); 01374 } 01375 01376 for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 01377 const int NId1 = EI.GetSrcNId(); 01378 const int NId2 = EI.GetDstNId(); 01379 const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId); 01380 const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId); 01381 01382 for(int p = 0; p < 4; p++) { 01383 int Ai = p / 2; 01384 int Aj = p % 2; 01385 double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj); 01386 GradV[p] += Prob / CurMtx.At(p); 01387 GradV[p] += Prob * exp(ProdOne); 01388 GradV[p] += Prob * exp(SqOne) * CurMtx.At(p); 01389 } 01390 } 01391 01392 #if 0 01393 const double Prod = ProdVV(0, 0) - GetAvgThetaLL(0, 0, AId, false, false); 01394 const double Sq = SqVV(0, 0) - GetAvgSqThetaLL(0, 0, AId, false, false); 01395 for(int p = 0; p < 4; p++) { 01396 int Ai = p / 2; 01397 int Aj = p % 2; 01398 GradV[p] -= NSq * exp(Prod) * GetProbMu(0, 0, AId, Ai, Aj, false, false); 01399 GradV[p] -= NSq * exp(Sq) * GetProbMu(0, 0, AId, Ai, Aj, false, false) * CurMtx.At(p); 01400 } 01401 01402 for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 01403 const int NId1 = EI.GetSrcNId(); 01404 const int NId2 = EI.GetDstNId(); 01405 const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId); 01406 const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId); 01407 01408 for(int p = 0; p < 4; p++) { 01409 int Ai = p / 2; 01410 int Aj = p % 2; 01411 double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj); 01412 // GradV[p] += Prob / CurMtx.At(p); 01413 // GradV[p] += Prob * exp(ProdOne); 01414 // GradV[p] += Prob * exp(SqOne) * CurMtx.At(p); 01415 } 01416 } 01417 #endif 01418 } 01419 01420 01421 const double TMAGFitBern::UpdateAffMtx(const int& AId, const double& LrnRate, const double& MaxGrad, const double& Lambda, TFltVV& ProdVV, TFltVV& SqVV, TMAGAffMtx& NewMtx) { 01422 double Delta = 0.0; 01423 // const int NNodes = Param.GetNodes(); 01424 // const int NAttrs = Param.GetAttrs(); 01425 TMAGAffMtx AffMtx = Param.GetMtx(AId); 01426 01427 TFltV GradV(4); 01428 TFltV HessV(4); 01429 if(MSpeedUp) { 01430 GradApxAffMtx(AId, ProdVV, SqVV, NewMtx, GradV); 01431 } else { 01432 GradAffMtx(AId, ProdVV, SqVV, NewMtx, GradV); 01433 } 01434 01435 double Ratio = 1.0; 01436 for(int p = 0; p < 4; p++) { 01437 if(fabs(Ratio * LrnRate * GradV[p]) > MaxGrad) { 01438 Ratio = MaxGrad / fabs(LrnRate * GradV[p]); 01439 } 01440 } 01441 01442 for(int p = 0; p < 4; p++) { 01443 GradV[p] *= (Ratio * LrnRate); 01444 NewMtx.At(p) = AffMtx.At(p) + GradV[p]; 01445 // if(NewMtx.At(p) > 0.9999) { NewMtx.At(p) = 0.9999; } 01446 if(NewMtx.At(p) < 0.0001) { NewMtx.At(p) = 0.0001; } 01447 } 01448 01449 printf(" [Attr = %d]\n", AId); 01450 printf(" %s + [%f, %f; %f %f] -----> %s\n", (AffMtx.GetMtxStr()).GetCStr(), double(GradV[0]), double(GradV[1]), double(GradV[2]), double(GradV[3]), (NewMtx.GetMtxStr()).GetCStr()); 01451 01452 // Param.SetMtx(AId, NewMtx); 01453 return Delta; 01454 } 01455 01456 01457 void TMAGFitBern::NormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) { 01458 const int NNodes = Param.GetNodes(); 01459 const int NAttrs = MtxV.Len(); 01460 TFltV MuV = GetMuV(); 01461 double Product = 1.0, ExpEdge = NNodes * (NNodes - 1); 01462 01463 TFltV SumV(NAttrs), EdgeSumV(NAttrs); 01464 SumV.PutAll(0.0); EdgeSumV.PutAll(0.0); 01465 for(int l = 0; l < NAttrs; l++) { 01466 double Mu = (UseMu) ? double(MuV[l]) : (AvgPhiV[l] / double(NNodes)); 01467 EdgeSumV[l] += Mu * Mu * MtxV[l].At(0, 0); 01468 EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(0, 1); 01469 EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(1, 0); 01470 EdgeSumV[l] += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1); 01471 SumV[l] = SumV[l] + MtxV[l].At(0, 0); 01472 SumV[l] = SumV[l] + MtxV[l].At(0, 1); 01473 SumV[l] = SumV[l] + MtxV[l].At(1, 0); 01474 SumV[l] = SumV[l] + MtxV[l].At(1, 1); 01475 Product *= SumV[l]; 01476 ExpEdge *= EdgeSumV[l]; 01477 } 01478 ExpEdge = Graph->GetEdges() / ExpEdge; 01479 NormConst *= Product; 01480 // NormConst = ExpEdge; 01481 Product = 1.0; 01482 // Product = pow(Product * ExpEdge, 1.0 / double(NAttrs)); 01483 01484 for(int l = 0; l < NAttrs; l++) { 01485 for(int p = 0; p < 4; p++) { 01486 MtxV[l].At(p) = MtxV[l].At(p) * Product / SumV[l]; 01487 // MtxV[l].At(p) = MtxV[l].At(p) * Product / MtxV[l].At(0, 0); 01488 // MtxV[l].At(p) = MtxV[l].At(p) * Product; 01489 // if(MtxV[l].At(p) > 0.9999) { MtxV[l].At(p) = 0.9999; } 01490 // if(MtxV[l].At(p) < 0.0001) { MtxV[l].At(p) = 0.0001; } 01491 } 01492 } 01493 } 01494 01495 void TMAGFitBern::UnNormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) { 01496 const int NNodes = Param.GetNodes(); 01497 const int NAttrs = MtxV.Len(); 01498 TFltIntPrV MaxEntV(NAttrs); 01499 TFltV MuV = GetMuV(); 01500 NormalizeAffMtxV(MtxV, UseMu); 01501 01502 double ExpEdge = NNodes * (NNodes - 1); 01503 for(int l = 0; l < NAttrs; l++) { 01504 double Mu = MuV[l]; 01505 double EdgeSum = Mu * Mu * MtxV[l].At(0, 0); 01506 EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(0, 1); 01507 EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(1, 0); 01508 EdgeSum += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1); 01509 ExpEdge *= EdgeSum; 01510 } 01511 NormConst = double(Graph->GetEdges()) / ExpEdge; 01512 // NormConst *= ExpEdge; 01513 01514 for(int l = 0; l < NAttrs; l++) { 01515 MaxEntV[l] = TFltIntPr(-1, l); 01516 for(int p = 0; p < 4; p++) { 01517 if(MaxEntV[l].Val1 < MtxV[l].At(p)) { MaxEntV[l].Val1 = MtxV[l].At(p); } 01518 } 01519 } 01520 MaxEntV.Sort(false); 01521 01522 for(int l = 0; l < NAttrs; l++) { 01523 int CurId = MaxEntV[l].Val2; 01524 double Factor = pow(NormConst, 1.0 / double(NAttrs - l)); 01525 double MaxFactor = 0.9999 / MaxEntV[l].Val1; 01526 Factor = (Factor > MaxFactor) ? MaxFactor : Factor; 01527 NormConst = NormConst / Factor; 01528 01529 for(int p = 0; p < 4; p++) { 01530 MtxV[CurId].At(p) = MtxV[CurId].At(p) * Factor; 01531 } 01532 } 01533 } 01534 01535 const void TMAGFitBern::PrepareUpdateAffMtx(TFltVV& ProdVV, TFltVV& SqVV) { 01536 const int NNodes = Param.GetNodes(); 01537 ProdVV.Gen(NNodes, NNodes); 01538 SqVV.Gen(NNodes, NNodes); 01539 01540 for(int i = 0; i < NNodes; i++) { 01541 for(int j = 0; j < NNodes; j++) { 01542 ProdVV(i, j) = GetProdLinWeight(i, j); 01543 SqVV(i, j) = GetProdSqWeight(i, j); 01544 } 01545 } 01546 } 01547 01548 const void TMAGFitBern::PrepareUpdateApxAffMtx(TFltVV& ProdVV, TFltVV& SqVV) { 01549 const int NNodes = Param.GetNodes(); 01550 ProdVV.Gen(NNodes, 2); 01551 SqVV.Gen(NNodes, 2); 01552 01553 for(int i = 0; i < NNodes; i++) { 01554 ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false); 01555 ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true); 01556 SqVV(i, 0) = GetAvgProdSqWeight(i, i, true, false); 01557 SqVV(i, 1) = GetAvgProdSqWeight(i, i, false, true); 01558 } 01559 } 01560 01561 const double TMAGFitBern::UpdateAffMtxV(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) { 01562 const int NNodes = Param.GetNodes(); 01563 const int NAttrs = Param.GetAttrs(); 01564 const TMAGNodeBern DistParam = Param.GetNodeAttr(); 01565 const TFltV MuV = DistParam.GetMuV(); 01566 double Delta = 0.0; 01567 double DecLrnRate = LrnRate, DecMaxGrad = MaxGrad; 01568 01569 TFltVV ProdVV(NNodes, NNodes), SqVV(NNodes, NNodes); 01570 TMAGAffMtxV NewMtxV, OldMtxV; 01571 Param.GetMtxV(OldMtxV); 01572 Param.GetMtxV(NewMtxV); 01573 01574 for(int g = 0; g < GradIter; g++) { 01575 if(MSpeedUp) { 01576 PrepareUpdateApxAffMtx(ProdVV, SqVV); 01577 } else { 01578 PrepareUpdateAffMtx(ProdVV, SqVV); 01579 } 01580 01581 printf(" [Grad step = %d]\n", (g+1)); 01582 // for(int l = 0; l < NAttrs; l++) { 01583 for(int l = NReal; l < NAttrs; l++) { 01584 UpdateAffMtx(l, DecLrnRate, DecMaxGrad, Lambda, ProdVV, SqVV, NewMtxV[l]); 01585 Param.SetMtxV(NewMtxV); 01586 } 01587 DecLrnRate *= 0.97; 01588 DecMaxGrad *= 0.97; 01589 01590 printf("\n"); 01591 NormalizeAffMtxV(NewMtxV, true); 01592 Param.SetMtxV(NewMtxV); 01593 } 01594 NormalizeAffMtxV(NewMtxV, true); 01595 01596 printf( "\nFinal\n"); 01597 for(int l = 0; l < NAttrs; l++) { 01598 printf(" ["); 01599 for(int p = 0; p < 4; p++) { 01600 // NewMtxV[l].At(p) = NewMtxV[l].At(p) * Product / SumV[l]; 01601 Delta += fabs(OldMtxV[l].At(p) - NewMtxV[l].At(p)); 01602 printf(" %.4f ", double(NewMtxV[l].At(p))); 01603 } 01604 printf("]\n"); 01605 } 01606 Param.SetMtxV(NewMtxV); 01607 ProdVV.Clr(); SqVV.Clr(); 01608 return Delta; 01609 } 01610 01611 void TMAGFitBern::DoMStep(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) { 01612 // const int NNodes = Param.GetNodes(); 01613 const int NAttrs = Param.GetAttrs(); 01614 double MuDelta = 0.0, AffMtxDelta = 0.0; 01615 01616 TExeTm ExeTm; 01617 01618 printf("\n"); 01619 AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0); 01620 for(int l = 0; l < NAttrs; l++) { 01621 // printf(" [Attr = %d]\n", l); 01622 MuDelta += UpdateMu(l); 01623 } 01624 printf("\n"); 01625 01626 printf(" == Update Theta\n"); 01627 AffMtxDelta += UpdateAffMtxV(GradIter, LrnRate, MaxGrad, Lambda, NReal); 01628 printf("\n"); 01629 printf("Elpased time = %s\n", ExeTm.GetTmStr()); 01630 printf("\n"); 01631 } 01632 01633 void TMAGFitBern::DoEMAlg(const int& NStep, const int& NEstep, const int& NMstep, const double& LrnRate, const double& MaxGrad, const double& Lambda, const double& ReInit, const int& NReal) { 01634 const int NNodes = Param.GetNodes(); 01635 const int NAttrs = Param.GetAttrs(); 01636 TIntV IndexV; 01637 double LL; 01638 // double MuDist, MtxDist; 01639 01640 MuHisV.Gen(NStep + 1, 0); 01641 MtxHisV.Gen(NStep + 1, 0); 01642 LLHisV.Gen(NStep + 1, 0); 01643 01644 printf("--------------------------------------------\n"); 01645 printf("Before EM Iteration\n"); 01646 printf("--------------------------------------------\n"); 01647 01648 TMAGAffMtxV InitMtxV; 01649 TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 01650 Param.GetMtxV(InitMtxV); 01651 TFltV InitMuV = NodeAttr.GetMuV(); 01652 01653 for(int i = 0; i < NNodes; i++) { 01654 for(int l = 0; l < NAttrs; l++) { 01655 if(! KnownVV(i, l)) { 01656 PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev(); 01657 } 01658 } 01659 } 01660 01661 if(Debug) { 01662 double LL = ComputeApxLL(); 01663 MuHisV.Add(InitMuV); 01664 MtxHisV.Add(InitMtxV); 01665 LLHisV.Add(LL); 01666 } 01667 01668 NormalizeAffMtxV(InitMtxV, true); 01669 Param.SetMtxV(InitMtxV); 01670 01671 for(int n = 0; n < NStep; n++) { 01672 printf("--------------------------------------------\n"); 01673 printf("EM Iteration : %d\n", (n+1)); 01674 printf("--------------------------------------------\n"); 01675 01676 NodeAttr = Param.GetNodeAttr(); 01677 for(int i = 0; i < NNodes; i++) { 01678 for(int l = 0; l < NAttrs; l++) { 01679 if(!KnownVV(i, l) && TMAGNodeBern::Rnd.GetUniDev() < ReInit) { 01680 PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev(); 01681 } 01682 } 01683 } 01684 DoEStep(InitMuV, NEstep, LL, Lambda); 01685 Param.GetMtxV(InitMtxV); 01686 // NormalizeAffMtxV(InitMtxV); 01687 Param.SetMtxV(InitMtxV); 01688 DoMStep(NMstep, LrnRate, MaxGrad, Lambda, NReal); 01689 01690 printf("\n"); 01691 01692 if(Debug) { 01693 double LL = ComputeApxLL(); 01694 MuHisV.Add(InitMuV); 01695 MtxHisV.Add(InitMtxV); 01696 LLHisV.Add(LL); 01697 printf(" ApxLL = %.2f (Const = %f)\n", LL, double(NormConst)); 01698 } 01699 01700 } 01701 Param.GetMtxV(InitMtxV); 01702 UnNormalizeAffMtxV(InitMtxV, true); 01703 Param.SetMtxV(InitMtxV); 01704 } 01705 01706 void TMAGFitBern::MakeCCDF(const TFltPrV& RawV, TFltPrV& CcdfV) { 01707 double Total = 0.0; 01708 CcdfV.Gen(RawV.Len(), 0); 01709 01710 for(int i = 0; i < RawV.Len(); i++) { 01711 if(RawV[i].Val2 <= 0) { continue; } 01712 Total += RawV[i].Val2; 01713 CcdfV.Add(RawV[i]); 01714 IAssert(RawV[i].Val2 > 0); 01715 } 01716 for(int i = 1; i < CcdfV.Len(); i++) { 01717 CcdfV[i].Val2 += CcdfV[i-1].Val2; 01718 } 01719 01720 for(int i = CcdfV.Len() - 1; i > 0; i--) { 01721 CcdfV[i].Val2 = (Total - CcdfV[i-1].Val2) ; 01722 if(CcdfV[i].Val2 <= 0) { printf("CCDF = %f\n", double(CcdfV[i].Val2));} 01723 IAssert(CcdfV[i].Val2 > 0); 01724 } 01725 CcdfV[0].Val2 = Total; 01726 // CcdfV[0].Val2 = 1.0; 01727 } 01728 01729 01730 void TMAGFitBern::PlotProperties(const TStr& FNm) { 01731 const int NNodes = Param.GetNodes(); 01732 const int NAttrs = Param.GetAttrs(); 01733 TMAGParam<TMAGNodeBern> MAGGen(NNodes, NAttrs); 01734 TMAGNodeBern MAGNode = Param.GetNodeAttr(); 01735 MAGGen.SetNodeAttr(MAGNode); 01736 TMAGAffMtxV MtxV; Param.GetMtxV(MtxV); 01737 MAGGen.SetMtxV(MtxV); 01738 01739 PNGraph TrG = new TNGraph; 01740 *TrG = *Graph; 01741 01742 TIntVV AttrVV(NNodes, NAttrs); 01743 for(int i = 0; i < NNodes; i++) { 01744 for(int j = 0; j < NAttrs; j++) { 01745 if(PhiVV(i, j) > TMAGNodeBern::Rnd.GetUniDev()) AttrVV(i, j) = 0; 01746 else AttrVV(i, j) = 1; 01747 } 01748 } 01749 PNGraph MAG = MAGGen.GenMAG(AttrVV, true, 10000); 01750 // PNGraph MAG = MAGGen.GenAttrMAG(AttrVV, true, 10000); 01751 printf("%d edges created for MAG...\n", MAG->GetEdges()); 01752 01753 TSnap::DelZeroDegNodes(TrG); 01754 TSnap::DelZeroDegNodes(MAG); 01755 01756 TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdHops | gsdClustCf | gsdSngVec | gsdSngVal | gsdTriadPart); 01757 01758 TGnuPlot InDegP(FNm + "-InDeg"), OutDegP(FNm + "-OutDeg"), SvalP(FNm + "-Sval"), SvecP(FNm + "-Svec"), WccP(FNm + "-Wcc"), HopP(FNm + "-Hop"), TriadP(FNm + "-Triad"), CcfP(FNm + "-Ccf");; 01759 01760 InDegP.SetXYLabel("Degree", "# of nodes"); 01761 OutDegP.SetXYLabel("Degree", "# of nodes"); 01762 SvalP.SetXYLabel("Rank", "Singular value"); 01763 SvecP.SetXYLabel("Rank", "Primary SngVec component"); 01764 WccP.SetXYLabel("Size of component", "# of components"); 01765 CcfP.SetXYLabel("Degree", "Clustering coefficient"); 01766 HopP.SetXYLabel("Hops", "# of node pairs"); 01767 TriadP.SetXYLabel("# of triads", "# of participating nodes"); 01768 01769 InDegP.SetScale(gpsLog10XY); InDegP.AddCmd("set key top right"); 01770 OutDegP.SetScale(gpsLog10XY); OutDegP.AddCmd("set key top right"); 01771 SvalP.SetScale(gpsLog10XY); SvalP.AddCmd("set key top right"); 01772 SvecP.SetScale(gpsLog10XY); SvecP.AddCmd("set key top right"); 01773 CcfP.SetScale(gpsLog10XY); CcfP.AddCmd("set key top right"); 01774 HopP.SetScale(gpsLog10XY); HopP.AddCmd("set key top right"); 01775 TriadP.SetScale(gpsLog10XY); TriadP.AddCmd("set key top right"); 01776 InDegP.ShowGrid(false); 01777 OutDegP.ShowGrid(false); 01778 SvalP.ShowGrid(false); 01779 SvecP.ShowGrid(false); 01780 CcfP.ShowGrid(false); 01781 HopP.ShowGrid(false); 01782 TriadP.ShowGrid(false); 01783 01784 const TStr Style[2] = {"lt 1 lw 3 lc rgb 'black'", "lt 2 lw 3 lc rgb 'red'"}; 01785 const TStr Name[2] = {"Real", "MAG"}; 01786 GS.Add(Graph, TSecTm(1), "Real Graph"); 01787 GS.Add(MAG, TSecTm(2), "MAG"); 01788 01789 TFltPrV InDegV, OutDegV, SvalV, SvecV, HopV, WccV, CcfV, TriadV; 01790 for(int i = 0; i < GS.Len(); i++) { 01791 MakeCCDF(GS.At(i)->GetDistr(gsdInDeg), InDegV); 01792 MakeCCDF(GS.At(i)->GetDistr(gsdOutDeg), OutDegV); 01793 SvalV = GS.At(i)->GetDistr(gsdSngVal); 01794 SvecV = GS.At(i)->GetDistr(gsdSngVec); 01795 MakeCCDF(GS.At(i)->GetDistr(gsdClustCf), CcfV); 01796 HopV = GS.At(i)->GetDistr(gsdHops); 01797 MakeCCDF(GS.At(i)->GetDistr(gsdTriadPart), TriadV); 01798 01799 InDegP.AddPlot(InDegV, gpwLines, Name[i], Style[i]); 01800 OutDegP.AddPlot(OutDegV, gpwLines, Name[i], Style[i]); 01801 SvalP.AddPlot(SvalV, gpwLines, Name[i], Style[i]); 01802 SvecP.AddPlot(SvecV, gpwLines, Name[i], Style[i]); 01803 CcfP.AddPlot(CcfV, gpwLines, Name[i], Style[i]); 01804 HopP.AddPlot(HopV, gpwLines, Name[i], Style[i]); 01805 TriadP.AddPlot(TriadV, gpwLines, Name[i], Style[i]); 01806 } 01807 01808 InDegP.SaveEps(30); 01809 OutDegP.SaveEps(30); 01810 SvalP.SaveEps(30); 01811 SvecP.SaveEps(30); 01812 CcfP.SaveEps(30); 01813 HopP.SaveEps(30); 01814 TriadP.SaveEps(30); 01815 } 01816 01817 void TMAGFitBern::CountAttr(TFltV& EstMuV) const { 01818 const int NNodes = PhiVV.GetXDim(); 01819 const int NAttrs = PhiVV.GetYDim(); 01820 EstMuV.Gen(NAttrs); 01821 EstMuV.PutAll(0.0); 01822 01823 for(int l = 0; l < NAttrs; l++) { 01824 for(int i = 0; i < NNodes; i++) { 01825 EstMuV[l] = EstMuV[l] + PhiVV(i, l); 01826 } 01827 EstMuV[l] = EstMuV[l] / double(NNodes); 01828 } 01829 } 01830 01831 void TMAGFitBern::SortAttrOrdering(const TFltV& TrueMuV, TIntV& IndexV) const { 01832 const int NAttrs = TrueMuV.Len(); 01833 // const int NNodes = PhiVV.GetXDim(); 01834 TFltV EstMuV, SortedTrueMuV, SortedEstMuV, TrueIdxV, EstIdxV; 01835 IndexV.Gen(NAttrs); 01836 TrueIdxV.Gen(NAttrs); 01837 EstIdxV.Gen(NAttrs); 01838 01839 for(int l = 0; l < NAttrs; l++) { 01840 TrueIdxV[l] = l; 01841 EstIdxV[l] = l; 01842 } 01843 01844 CountAttr(EstMuV); 01845 SortedTrueMuV = TrueMuV; 01846 SortedEstMuV = EstMuV; 01847 for(int i = 0; i < NAttrs; i++) { 01848 if(SortedTrueMuV[i] > 0.5) { SortedTrueMuV[i] = 1.0 - SortedTrueMuV[i]; } 01849 if(SortedEstMuV[i] > 0.5) { SortedEstMuV[i] = 1.0 - SortedEstMuV[i]; } 01850 } 01851 01852 for(int i = 0; i < NAttrs; i++) { 01853 for(int j = i+1; j < NAttrs; j++) { 01854 if(SortedTrueMuV[i] < SortedTrueMuV[j]) { 01855 SortedTrueMuV.Swap(i, j); 01856 TrueIdxV.Swap(i, j); 01857 } 01858 if(SortedEstMuV[i] < SortedEstMuV[j]) { 01859 EstIdxV.Swap((int)SortedEstMuV[i], (int)SortedEstMuV[j]); 01860 SortedEstMuV.Swap(i, j); 01861 } 01862 } 01863 } 01864 01865 for(int l = 0; l < NAttrs; l++) { 01866 IndexV[l] = (int)TrueIdxV[(int)EstIdxV[l]]; 01867 } 01868 } 01869 01870 const bool TMAGFitBern::NextPermutation(TIntV& IndexV) const { 01871 const int NAttrs = IndexV.Len(); 01872 int Pos = NAttrs - 1; 01873 while(Pos > 0) { 01874 if(IndexV[Pos-1] < IndexV[Pos]) { 01875 break; 01876 } 01877 Pos--; 01878 } 01879 if(Pos == 0) { 01880 return false; 01881 } 01882 01883 int Val = NAttrs, NewPos = -1; 01884 for(int i = Pos; i < NAttrs; i++) { 01885 if(IndexV[i] > IndexV[Pos - 1] && IndexV[i] < Val) { 01886 NewPos = i; 01887 Val = IndexV[i]; 01888 } 01889 } 01890 IndexV[NewPos] = IndexV[Pos - 1]; 01891 IndexV[Pos - 1] = Val; 01892 01893 TIntV SubIndexV; 01894 IndexV.GetSubValV(Pos, NAttrs - 1, SubIndexV); 01895 SubIndexV.Sort(true); 01896 for(int i = Pos; i < NAttrs; i++) { 01897 IndexV[i] = SubIndexV[i - Pos]; 01898 } 01899 01900 return true; 01901 } 01902 01903 const double TMAGFitBern::ComputeJointOneLL(const TIntVV& AttrVV) const { 01904 double LL = 0.0; 01905 const int NNodes = Param.GetNodes(); 01906 const int NAttrs = Param.GetAttrs(); 01907 TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV); 01908 const TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 01909 const TFltV MuV = NodeAttr.GetMuV(); 01910 01911 for(int l = 0; l < NAttrs; l++) { 01912 for(int i = 0; i < MtxV[l].Len(); i++) { 01913 MtxV[l].At(i) = log(MtxV[l].At(i)); 01914 } 01915 } 01916 01917 for(int i = 0; i < NNodes; i++) { 01918 for(int l = 0; l < NAttrs; l++) { 01919 if(AttrVV.At(i, l) == 0) { 01920 LL += log(MuV[l]); 01921 } else { 01922 LL += log(1.0 - MuV[l]); 01923 } 01924 } 01925 01926 for(int j = 0; j < NNodes; j++) { 01927 if(i == j) { continue; } 01928 01929 double ProbLL = 0.0; 01930 for(int l = 0; l < NAttrs; l++) { 01931 ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l)); 01932 } 01933 01934 if(Graph->IsEdge(i, j)) { 01935 LL += ProbLL; 01936 } else { 01937 LL += log(1-exp(ProbLL)); 01938 } 01939 } 01940 } 01941 01942 return LL; 01943 } 01944 01945 const double TMAGFitBern::ComputeJointAdjLL(const TIntVV& AttrVV) const { 01946 double LL = 0.0; 01947 const int NNodes = Param.GetNodes(); 01948 const int NAttrs = Param.GetAttrs(); 01949 TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV); 01950 const TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 01951 const TFltV MuV = NodeAttr.GetMuV(); 01952 01953 for(int l = 0; l < NAttrs; l++) { 01954 for(int i = 0; i < MtxV[l].Len(); i++) { 01955 MtxV[l].At(i) = log(MtxV[l].At(i)); 01956 } 01957 } 01958 01959 for(int i = 0; i < NNodes; i++) { 01960 for(int j = 0; j < NNodes; j++) { 01961 if(i == j) { continue; } 01962 01963 double ProbLL = 0.0; 01964 for(int l = 0; l < NAttrs; l++) { 01965 ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l)); 01966 } 01967 01968 if(Graph->IsEdge(i, j)) { 01969 LL += ProbLL; 01970 } else { 01971 LL += log(1-exp(ProbLL)); 01972 } 01973 } 01974 } 01975 01976 return LL; 01977 } 01978 01979 const double TMAGFitBern::ComputeJointLL(int NSample) const { 01980 double LL = 0.0; 01981 const int NNodes = Param.GetNodes(); 01982 const int NAttrs = Param.GetAttrs(); 01983 01984 TRnd Rnd(2000); 01985 TIntVV AttrVV(NNodes, NAttrs); 01986 int count = 0; 01987 for(int s = 0; s < NSample; s++) { 01988 for(int i = 0; i < NNodes; i++) { 01989 for(int l = 0; l < NAttrs; l++) { 01990 01991 if(Rnd.GetUniDev() <= PhiVV(i, l)) { 01992 AttrVV.At(i, l) = 0; 01993 } else { 01994 AttrVV.At(i, l) = 1; 01995 } 01996 01997 if(PhiVV(i, l) > 0.05 && PhiVV(i, l) < 0.95) count++; 01998 } 01999 } 02000 02001 LL += ComputeJointOneLL(AttrVV); 02002 } 02003 AttrVV.Clr(); 02004 02005 return LL / double(NSample); 02006 } 02007 02008 const double TMAGFitBern::ComputeApxLL() const { 02009 double LL = 0.0; 02010 // double LLSelf = 0.0; 02011 const int NNodes = Param.GetNodes(); 02012 const int NAttrs = Param.GetAttrs(); 02013 TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 02014 TFltV MuV = NodeAttr.GetMuV(); 02015 TMAGAffMtxV LLMtxV(NAttrs); 02016 02017 for(int l = 0; l < NAttrs; l++) { 02018 for(int i = 0; i < NNodes; i++) { 02019 LL += PhiVV(i, l) * log(MuV[l]); 02020 LL += (1.0 - PhiVV(i, l)) * log(1.0 - MuV[l]); 02021 LL -= PhiVV(i, l) * log(PhiVV(i, l)); 02022 LL -= (1.0 - PhiVV(i, l)) * log(1.0 - PhiVV(i, l)); 02023 } 02024 TMAGAffMtx Theta = Param.GetMtx(l); 02025 Theta.GetLLMtx(LLMtxV[l]); 02026 } 02027 02028 for(int i = 0; i < NNodes; i++) { 02029 for(int j = 0; j < NNodes; j++) { 02030 if(i == j) { continue; } 02031 02032 if(Graph->IsEdge(i, j)) { 02033 for(int l = 0; l < NAttrs; l++) { 02034 LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0); 02035 LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1); 02036 LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0); 02037 LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1); 02038 } 02039 LL += log(NormConst); 02040 } else { 02041 LL += log(1-exp(GetProdLinWeight(i, j))); 02042 } 02043 } 02044 } 02045 02046 return LL; 02047 } 02048 02049 const double TMAGFitBern::ComputeApxAdjLL() const { 02050 double LL = 0.0; 02051 // double LLSelf = 0.0; 02052 const int NNodes = Param.GetNodes(); 02053 const int NAttrs = Param.GetAttrs(); 02054 TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 02055 TFltV MuV = NodeAttr.GetMuV(); 02056 MuV.PutAll(0.0); 02057 TMAGAffMtxV LLMtxV(NAttrs); 02058 double TotalEdge = 0.0; 02059 for(int l = 0; l < NAttrs; l++) { 02060 TMAGAffMtx Theta = Param.GetMtx(l); 02061 Theta.GetLLMtx(LLMtxV[l]); 02062 } 02063 02064 for(int i = 0; i < NNodes; i++) { 02065 for(int j = 0; j < NNodes; j++) { 02066 if(i == j) { continue; } 02067 02068 if(Graph->IsEdge(i, j)) { 02069 for(int l = 0; l < NAttrs; l++) { 02070 LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0); 02071 LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1); 02072 LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0); 02073 LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1); 02074 } 02075 } else { 02076 LL += log(1-exp(GetProdLinWeight(i, j))); 02077 } 02078 02079 double TempLL = 1.0; 02080 for(int l = 0; l < NAttrs; l++) { 02081 int Ai = (double(PhiVV(i, l)) > 0.5) ? 0 : 1; 02082 int Aj = (double(PhiVV(j, l)) > 0.5) ? 0 : 1; 02083 TempLL *= Param.GetMtx(l).At(Ai, Aj); 02084 } 02085 if(TMAGNodeBern::Rnd.GetUniDev() < TempLL) { 02086 TotalEdge += 1.0; 02087 } 02088 } 02089 } 02090 02091 return LL; 02092 } 02093 02094 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV, const int AId1, const int AId2) { 02095 const int NNodes = AttrV.GetXDim(); 02096 double MI = 0.0; 02097 double Cor = 0.0; 02098 02099 TFltVV Pxy(2,2); 02100 TFltV Px(2), Py(2); 02101 Pxy.PutAll(0.0); 02102 Px.PutAll(0.0); 02103 Py.PutAll(0.0); 02104 02105 for(int i = 0; i < NNodes; i++) { 02106 int X = AttrV(i, AId1); 02107 int Y = AttrV(i, AId2); 02108 Pxy(X, Y) = Pxy(X, Y) + 1; 02109 Px[X] = Px[X] + 1; 02110 Py[Y] = Py[Y] + 1; 02111 Cor += double(X * Y); 02112 } 02113 02114 for(int x = 0; x < 2; x++) { 02115 for(int y = 0; y < 2; y++) { 02116 MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y).Val) - log(Px[x].Val) - log(Py[y].Val) + log((double)NNodes)); 02117 } 02118 } 02119 02120 return MI; 02121 } 02122 02123 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV, const int AId1, const int AId2) { 02124 const int NNodes = AttrV.GetXDim(); 02125 double MI = 0.0; 02126 double Cor = 0.0; 02127 02128 TFltVV Pxy(2,2); 02129 TFltV Px(2), Py(2); 02130 Pxy.PutAll(0.0); 02131 Px.PutAll(0.0); 02132 Py.PutAll(0.0); 02133 02134 for(int i = 0; i < NNodes; i++) { 02135 double X = AttrV(i, AId1); 02136 double Y = AttrV(i, AId2); 02137 Pxy(0, 0) = Pxy(0, 0) + X * Y; 02138 Pxy(0, 1) = Pxy(0, 1) + X * (1 - Y); 02139 Pxy(1, 0) = Pxy(1, 0) + (1 - X) * Y; 02140 Pxy(1, 1) = (i+1) - Pxy(0, 0) - Pxy(0, 1) - Pxy(1, 0); 02141 Px[0] = Px[0] + X; 02142 Py[0] = Py[0] + Y; 02143 Cor += double((1-X) * (1-Y)); 02144 } 02145 Px[1] = NNodes - Px[0]; 02146 Py[1] = NNodes - Py[0]; 02147 02148 for(int x = 0; x < 2; x++) { 02149 for(int y = 0; y < 2; y++) { 02150 MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y)) - log(Px[x]) - log(Py[y]) + log(double(NNodes))); 02151 } 02152 } 02153 02154 return MI; 02155 } 02156 02157 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV) { 02158 // const int NNodes = AttrV.GetXDim(); 02159 const int NAttrs = AttrV.GetYDim(); 02160 double MI = 0.0; 02161 02162 for(int l = 0; l < NAttrs; l++) { 02163 for(int k = l+1; k < NAttrs; k++) { 02164 MI += ComputeMI(AttrV, l, k); 02165 } 02166 } 02167 02168 return MI; 02169 } 02170 02171 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV) { 02172 // const int NNodes = AttrV.GetXDim(); 02173 const int NAttrs = AttrV.GetYDim(); 02174 double MI = 0.0; 02175 02176 for(int l = 0; l < NAttrs; l++) { 02177 for(int k = l+1; k < NAttrs; k++) { 02178 MI += ComputeMI(AttrV, l, k); 02179 } 02180 } 02181 02182 return MI; 02183 }