SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "kronecker.h" 00003 00005 // Kronecker Graphs 00006 const double TKronMtx::NInf = -DBL_MAX; 00007 TRnd TKronMtx::Rnd = TRnd(0); 00008 00009 TKronMtx::TKronMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) { 00010 MtxDim = (int) sqrt((double)SeedMatrix.Len()); 00011 IAssert(MtxDim*MtxDim == SeedMtx.Len()); 00012 } 00013 00014 void TKronMtx::SaveTxt(const TStr& OutFNm) const { 00015 FILE *F = fopen(OutFNm.CStr(), "wt"); 00016 for (int i = 0; i < GetDim(); i++) { 00017 for (int j = 0; j < GetDim(); j++) { 00018 if (j > 0) fprintf(F, "\t"); 00019 fprintf(F, "%f", At(i,j)); } 00020 fprintf(F, "\n"); 00021 } 00022 fclose(F); 00023 } 00024 00025 TKronMtx& TKronMtx::operator = (const TKronMtx& Kronecker) { 00026 if (this != &Kronecker){ 00027 MtxDim=Kronecker.MtxDim; 00028 SeedMtx=Kronecker.SeedMtx; 00029 } 00030 return *this; 00031 } 00032 00033 bool TKronMtx::IsProbMtx() const { 00034 for (int i = 0; i < Len(); i++) { 00035 if (At(i) < 0.0 || At(i) > 1.0) return false; 00036 } 00037 return true; 00038 } 00039 00040 void TKronMtx::SetRndMtx(const int& PrmMtxDim, const double& MinProb) { 00041 MtxDim = PrmMtxDim; 00042 SeedMtx.Gen(MtxDim*MtxDim); 00043 for (int p = 0; p < SeedMtx.Len(); p++) { 00044 do { 00045 SeedMtx[p] = TKronMtx::Rnd.GetUniDev(); 00046 } while (SeedMtx[p] < MinProb); 00047 } 00048 } 00049 00050 void TKronMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) { 00051 for (int i = 0; i < Len(); i++) { 00052 double& Val = At(i); 00053 if (Val == Eps1Val) Val = double(Eps1); 00054 else if (Val == Eps0Val) Val = double(Eps0); 00055 } 00056 } 00057 00058 // scales parameter values to allow Edges 00059 void TKronMtx::SetForEdges(const int& Nodes, const int& Edges) { 00060 const int KronIter = GetKronIter(Nodes); 00061 const double EZero = pow((double) Edges, 1.0/double(KronIter)); 00062 const double Factor = EZero / GetMtxSum(); 00063 for (int i = 0; i < Len(); i++) { 00064 At(i) *= Factor; 00065 if (At(i) > 1) { At(i) = 1; } 00066 } 00067 } 00068 00069 void TKronMtx::AddRndNoise(const double& SDev) { 00070 Dump("before"); 00071 double NewVal; 00072 int c =0; 00073 for (int i = 0; i < Len(); i++) { 00074 for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { } 00075 if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); } 00076 } 00077 Dump("after"); 00078 } 00079 00080 TStr TKronMtx::GetMtxStr() const { 00081 TChA ChA("["); 00082 for (int i = 0; i < Len(); i++) { 00083 ChA += TStr::Fmt("%g", At(i)); 00084 if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; } 00085 else if (i+1<Len()) { ChA += ", "; } 00086 } 00087 ChA += "]"; 00088 return TStr(ChA); 00089 } 00090 00091 void TKronMtx::ToOneMinusMtx() { 00092 for (int i = 0; i < Len(); i++) { 00093 IAssert(At(i) >= 0.0 && At(i) <= 1.0); 00094 At(i) = 1.0 - At(i); 00095 } 00096 } 00097 00098 void TKronMtx::GetLLMtx(TKronMtx& LLMtx) { 00099 LLMtx.GenMtx(MtxDim); 00100 for (int i = 0; i < Len(); i++) { 00101 if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); } 00102 else { LLMtx.At(i) = NInf; } 00103 } 00104 } 00105 00106 void TKronMtx::GetProbMtx(TKronMtx& ProbMtx) { 00107 ProbMtx.GenMtx(MtxDim); 00108 for (int i = 0; i < Len(); i++) { 00109 if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); } 00110 else { ProbMtx.At(i) = 0.0; } 00111 } 00112 } 00113 00114 void TKronMtx::Swap(TKronMtx& KronMtx) { 00115 ::Swap(MtxDim, KronMtx.MtxDim); 00116 SeedMtx.Swap(KronMtx.SeedMtx); 00117 } 00118 00119 int TKronMtx::GetNodes(const int& NIter) const { 00120 return (int) pow(double(GetDim()), double(NIter)); 00121 } 00122 00123 int TKronMtx::GetEdges(const int& NIter) const { 00124 return (int) pow(double(GetMtxSum()), double(NIter)); 00125 } 00126 00127 int TKronMtx::GetKronIter(const int& Nodes) const { 00128 return (int) ceil(log(double(Nodes)) / log(double(GetDim()))); // upper bound 00129 //return (int) TMath::Round(log(double(Nodes)) / log(double(GetDim()))); // round to nearest power 00130 } 00131 00132 int TKronMtx::GetNZeroK(const PNGraph& Graph) const { 00133 return GetNodes(GetKronIter(Graph->GetNodes())); 00134 } 00135 00136 double TKronMtx::GetEZero(const int& Edges, const int& KronIters) const { 00137 return pow((double) Edges, 1.0/double(KronIters)); 00138 } 00139 00140 double TKronMtx::GetMtxSum() const { 00141 double Sum = 0; 00142 for (int i = 0; i < Len(); i++) { 00143 Sum += At(i); } 00144 return Sum; 00145 } 00146 00147 double TKronMtx::GetRowSum(const int& RowId) const { 00148 double Sum = 0; 00149 for (int c = 0; c < GetDim(); c++) { 00150 Sum += At(RowId, c); } 00151 return Sum; 00152 } 00153 00154 double TKronMtx::GetColSum(const int& ColId) const { 00155 double Sum = 0; 00156 for (int r = 0; r < GetDim(); r++) { 00157 Sum += At(r, ColId); } 00158 return Sum; 00159 } 00160 00161 double TKronMtx::GetEdgeProb(int NId1, int NId2, const int& NKronIters) const { 00162 double Prob = 1.0; 00163 for (int level = 0; level < NKronIters; level++) { 00164 Prob *= At(NId1 % MtxDim, NId2 % MtxDim); 00165 if (Prob == 0.0) { return 0.0; } 00166 NId1 /= MtxDim; NId2 /= MtxDim; 00167 } 00168 return Prob; 00169 } 00170 00171 double TKronMtx::GetNoEdgeProb(int NId1, int NId2, const int& NKronIters) const { 00172 return 1.0 - GetEdgeProb(NId1, NId2, NKronIters); 00173 } 00174 00175 double TKronMtx::GetEdgeLL(int NId1, int NId2, const int& NKronIters) const { 00176 double LL = 0.0; 00177 for (int level = 0; level < NKronIters; level++) { 00178 const double& LLVal = At(NId1 % MtxDim, NId2 % MtxDim); 00179 if (LLVal == NInf) return NInf; 00180 LL += LLVal; 00181 NId1 /= MtxDim; NId2 /= MtxDim; 00182 } 00183 return LL; 00184 } 00185 00186 double TKronMtx::GetNoEdgeLL(int NId1, int NId2, const int& NKronIters) const { 00187 return log(1.0 - exp(GetEdgeLL(NId1, NId2, NKronIters))); 00188 } 00189 00190 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2 00191 double TKronMtx::GetApxNoEdgeLL(int NId1, int NId2, const int& NKronIters) const { 00192 const double EdgeLL = GetEdgeLL(NId1, NId2, NKronIters); 00193 return -exp(EdgeLL) - 0.5*exp(2*EdgeLL); 00194 } 00195 00196 bool TKronMtx::IsEdgePlace(int NId1, int NId2, const int& NKronIters, const double& ProbTresh) const { 00197 double Prob = 1.0; 00198 for (int level = 0; level < NKronIters; level++) { 00199 Prob *= At(NId1 % MtxDim, NId2 % MtxDim); 00200 if (ProbTresh > Prob) { return false; } 00201 NId1 /= MtxDim; NId2 /= MtxDim; 00202 } 00203 return true; 00204 } 00205 00206 // deriv a*log(x) = a/x 00207 double TKronMtx::GetEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const { 00208 const int ThetaX = ParamId % GetDim(); 00209 const int ThetaY = ParamId / GetDim(); 00210 int ThetaCnt = 0; 00211 for (int level = 0; level < NKronIters; level++) { 00212 if ((NId1 % MtxDim) == ThetaX && (NId2 % MtxDim) == ThetaY) { 00213 ThetaCnt++; } 00214 NId1 /= MtxDim; NId2 /= MtxDim; 00215 } 00216 return double(ThetaCnt) / exp(At(ParamId)); 00217 } 00218 00219 // deriv log(1-x^a*y^b..) = -x'/(1-x) = (-a*x^(a-1)*y^b..) / (1-x^a*y^b..) 00220 double TKronMtx::GetNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const { 00221 const int& ThetaX = ParamId % GetDim(); 00222 const int& ThetaY = ParamId / GetDim(); 00223 int ThetaCnt = 0; 00224 double DLL = 0, LL = 0; 00225 for (int level = 0; level < NKronIters; level++) { 00226 const int X = NId1 % MtxDim; 00227 const int Y = NId2 % MtxDim; 00228 const double LVal = At(X, Y); 00229 if (X == ThetaX && Y == ThetaY) { 00230 if (ThetaCnt != 0) { DLL += LVal; } 00231 ThetaCnt++; 00232 } else { DLL += LVal; } 00233 LL += LVal; 00234 NId1 /= MtxDim; NId2 /= MtxDim; 00235 } 00236 return -ThetaCnt*exp(DLL) / (1.0 - exp(LL)); 00237 } 00238 00239 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2 00240 double TKronMtx::GetApxNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const { 00241 const int& ThetaX = ParamId % GetDim(); 00242 const int& ThetaY = ParamId / GetDim(); 00243 int ThetaCnt = 0; 00244 double DLL = 0;//, LL = 0; 00245 for (int level = 0; level < NKronIters; level++) { 00246 const int X = NId1 % MtxDim; 00247 const int Y = NId2 % MtxDim; 00248 const double LVal = At(X, Y); IAssert(LVal > NInf); 00249 if (X == ThetaX && Y == ThetaY) { 00250 if (ThetaCnt != 0) { DLL += LVal; } 00251 ThetaCnt++; 00252 } else { DLL += LVal; } 00253 //LL += LVal; 00254 NId1 /= MtxDim; NId2 /= MtxDim; 00255 } 00256 //return -ThetaCnt*exp(DLL)*(1.0 + exp(LL)); // -x'/(1+x) WRONG! 00257 // deriv = -(ax^(a-1)*y^b..) - a*x^(2a-1)*y^2b.. 00258 // = - (ax^(a-1)*y^b..) - a*x*(x^(a-1)*y^b..)^2 00259 return -ThetaCnt*exp(DLL) - ThetaCnt*exp(At(ThetaX, ThetaY)+2*DLL); 00260 } 00261 00262 uint TKronMtx::GetNodeSig(const double& OneProb) { 00263 uint Sig = 0; 00264 for (int i = 0; i < (int)(8*sizeof(uint)); i++) { 00265 if (TKronMtx::Rnd.GetUniDev() < OneProb) { 00266 Sig |= (1u<<i); } 00267 } 00268 return Sig; 00269 } 00270 00271 double TKronMtx::GetEdgeProb(const uint& NId1Sig, const uint& NId2Sig, const int& NIter) const { 00272 Assert(GetDim() == 2); 00273 double Prob = 1.0; 00274 for (int i = 0; i < NIter; i++) { 00275 const uint Mask = (1u<<i); 00276 const uint Bit1 = NId1Sig & Mask; 00277 const uint Bit2 = NId2Sig & Mask; 00278 Prob *= At(int(Bit1!=0), int(Bit2!=0)); 00279 } 00280 return Prob; 00281 } 00282 00283 PNGraph TKronMtx::GenThreshGraph(const double& Thresh) const { 00284 PNGraph Graph = TNGraph::New(); 00285 for (int i = 0; i < GetDim(); i++) { 00286 Graph->AddNode(i); } 00287 for (int r = 0; r < GetDim(); r++) { 00288 for (int c = 0; c < GetDim(); c++) { 00289 if (At(r, c) >= Thresh) { Graph->AddEdge(r, c); } 00290 } 00291 } 00292 return Graph; 00293 } 00294 00295 PNGraph TKronMtx::GenRndGraph(const double& RndFact) const { 00296 PNGraph Graph = TNGraph::New(); 00297 for (int i = 0; i < GetDim(); i++) { 00298 Graph->AddNode(i); } 00299 for (int r = 0; r < GetDim(); r++) { 00300 for (int c = 0; c < GetDim(); c++) { 00301 if (RndFact * At(r, c) >= TKronMtx::Rnd.GetUniDev()) { Graph->AddEdge(r, c); } 00302 } 00303 } 00304 return Graph; 00305 } 00306 00307 int TKronMtx::GetKronIter(const int& GNodes, const int& SeedMtxSz) { 00308 return (int) ceil(log(double(GNodes)) / log(double(SeedMtxSz))); 00309 } 00310 00311 // slow but exaxt procedure (we flip all O(N^2) edges) 00312 PNGraph TKronMtx::GenKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) { 00313 const TKronMtx& SeedGraph = SeedMtx; 00314 const int NNodes = SeedGraph.GetNodes(NIter); 00315 printf(" Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected"); 00316 PNGraph Graph = TNGraph::New(NNodes, -1); 00317 TExeTm ExeTm; 00318 TRnd Rnd(Seed); 00319 int edges = 0; 00320 for (int node1 = 0; node1 < NNodes; node1++) { 00321 Graph->AddNode(node1); } 00322 if (IsDir) { 00323 for (int node1 = 0; node1 < NNodes; node1++) { 00324 for (int node2 = 0; node2 < NNodes; node2++) { 00325 if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) { 00326 Graph->AddEdge(node1, node2); 00327 edges++; 00328 } 00329 } 00330 if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000); 00331 } 00332 } else { 00333 for (int node1 = 0; node1 < NNodes; node1++) { 00334 for (int node2 = node1; node2 < NNodes; node2++) { 00335 if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) { 00336 Graph->AddEdge(node1, node2); 00337 Graph->AddEdge(node2, node1); 00338 edges++; 00339 } 00340 } 00341 if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000); 00342 } 00343 } 00344 printf("\r %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr()); 00345 return Graph; 00346 } 00347 00348 // use RMat like recursive descent to quickly generate a Kronecker graph 00349 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) { 00350 const TKronMtx& SeedGraph = SeedMtx; 00351 const int MtxDim = SeedGraph.GetDim(); 00352 const double MtxSum = SeedGraph.GetMtxSum(); 00353 const int NNodes = SeedGraph.GetNodes(NIter); 00354 const int NEdges = SeedGraph.GetEdges(NIter); 00355 //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter)); 00356 //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0)); 00357 printf(" FastKronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected"); 00358 PNGraph Graph = TNGraph::New(NNodes, -1); 00359 TRnd Rnd(Seed); 00360 TExeTm ExeTm; 00361 // prepare cell probability vector 00362 TVec<TFltIntIntTr> ProbToRCPosV; // row, col position 00363 double CumProb = 0.0; 00364 for (int r = 0; r < MtxDim; r++) { 00365 for (int c = 0; c < MtxDim; c++) { 00366 const double Prob = SeedGraph.At(r, c); 00367 if (Prob > 0.0) { 00368 CumProb += Prob; 00369 ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c)); 00370 } 00371 } 00372 } 00373 // add nodes 00374 for (int i = 0; i < NNodes; i++) { 00375 Graph->AddNode(i); } 00376 // add edges 00377 int Rng, Row, Col, Collision=0, n = 0; 00378 for (int edges = 0; edges < NEdges; ) { 00379 Rng=NNodes; Row=0; Col=0; 00380 for (int iter = 0; iter < NIter; iter++) { 00381 const double& Prob = Rnd.GetUniDev(); 00382 n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; } 00383 const int MtxRow = ProbToRCPosV[n].Val2; 00384 const int MtxCol = ProbToRCPosV[n].Val3; 00385 Rng /= MtxDim; 00386 Row += MtxRow * Rng; 00387 Col += MtxCol * Rng; 00388 } 00389 if (! Graph->IsEdge(Row, Col)) { // allow self-loops 00390 Graph->AddEdge(Row, Col); edges++; 00391 if (! IsDir) { 00392 if (Row != Col) Graph->AddEdge(Col, Row); 00393 edges++; 00394 } 00395 } else { Collision++; } 00396 //if (edges % 1000 == 0) printf("\r...%dk", edges/1000); 00397 } 00398 //printf(" %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr()); 00399 printf(" collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges()); 00400 return Graph; 00401 } 00402 00403 // use RMat like recursive descent to quickly generate a Kronecker graph 00404 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const int& Edges, const bool& IsDir, const int& Seed) { 00405 const TKronMtx& SeedGraph = SeedMtx; 00406 const int MtxDim = SeedGraph.GetDim(); 00407 const double MtxSum = SeedGraph.GetMtxSum(); 00408 const int NNodes = SeedGraph.GetNodes(NIter); 00409 const int NEdges = Edges; 00410 //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter)); 00411 //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0)); 00412 printf(" RMat Kronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected"); 00413 PNGraph Graph = TNGraph::New(NNodes, -1); 00414 TRnd Rnd(Seed); 00415 TExeTm ExeTm; 00416 // prepare cell probability vector 00417 TVec<TFltIntIntTr> ProbToRCPosV; // row, col position 00418 double CumProb = 0.0; 00419 for (int r = 0; r < MtxDim; r++) { 00420 for (int c = 0; c < MtxDim; c++) { 00421 const double Prob = SeedGraph.At(r, c); 00422 if (Prob > 0.0) { 00423 CumProb += Prob; 00424 ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c)); 00425 } 00426 } 00427 } 00428 // add nodes 00429 for (int i = 0; i < NNodes; i++) { 00430 Graph->AddNode(i); } 00431 // add edges 00432 int Rng, Row, Col, Collision=0, n = 0; 00433 for (int edges = 0; edges < NEdges; ) { 00434 Rng=NNodes; Row=0; Col=0; 00435 for (int iter = 0; iter < NIter; iter++) { 00436 const double& Prob = Rnd.GetUniDev(); 00437 n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; } 00438 const int MtxRow = ProbToRCPosV[n].Val2; 00439 const int MtxCol = ProbToRCPosV[n].Val3; 00440 Rng /= MtxDim; 00441 Row += MtxRow * Rng; 00442 Col += MtxCol * Rng; 00443 } 00444 if (! Graph->IsEdge(Row, Col)) { // allow self-loops 00445 Graph->AddEdge(Row, Col); edges++; 00446 if (! IsDir) { 00447 if (Row != Col) Graph->AddEdge(Col, Row); 00448 edges++; 00449 } 00450 } else { Collision++; } 00451 //if (edges % 1000 == 0) printf("\r...%dk", edges/1000); 00452 } 00453 //printf(" %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr()); 00454 printf(" collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges()); 00455 return Graph; 00456 } 00457 00458 PNGraph TKronMtx::GenDetKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir) { 00459 const TKronMtx& SeedGraph = SeedMtx; 00460 const int NNodes = SeedGraph.GetNodes(NIter); 00461 printf(" Deterministic Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected"); 00462 PNGraph Graph = TNGraph::New(NNodes, -1); 00463 TExeTm ExeTm; 00464 int edges = 0; 00465 for (int node1 = 0; node1 < NNodes; node1++) { Graph->AddNode(node1); } 00466 00467 for (int node1 = 0; node1 < NNodes; node1++) { 00468 for (int node2 = 0; node2 < NNodes; node2++) { 00469 if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) { 00470 Graph->AddEdge(node1, node2); 00471 edges++; 00472 } 00473 } 00474 if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000); 00475 } 00476 return Graph; 00477 } 00478 00479 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) { 00480 const int KronIters = SeedMtx.GetKronIter(Graph->GetNodes()); 00481 PNGraph KronG, WccG; 00482 const bool FastGen = true; 00483 if (FastGen) { KronG = TKronMtx::GenFastKronecker(SeedMtx, KronIters, true, 0); } 00484 else { KronG = TKronMtx::GenKronecker(SeedMtx, KronIters, true, 0); } 00485 TSnap::DelZeroDegNodes(KronG); 00486 WccG = TSnap::GetMxWcc(KronG); 00487 const TStr Desc1 = TStr::Fmt("%s", Desc.CStr()); 00488 TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdHops | gsdScc | gsdClustCf | gsdSngVec | gsdSngVal); 00489 //gsdHops 00490 //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf 00491 GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges())); 00492 GS.Add(KronG, TSecTm(2), TStr::Fmt("KRONECKER K(%d, %d)", KronG->GetNodes(), KronG->GetEdges())); 00493 GS.Add(WccG, TSecTm(3), TStr::Fmt("KRONECKER wccK(%d, %d)", WccG->GetNodes(), WccG->GetEdges())); 00494 const TStr Style = "linewidth 1 pointtype 6 pointsize 1"; 00495 GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00496 GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00497 GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00498 GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00499 GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00500 GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00501 GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00502 GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00503 GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00504 GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00505 GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00506 GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00507 GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00508 GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00509 GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00510 // typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc, 00511 // distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/ 00512 } 00513 00514 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx1, const TKronMtx& SeedMtx2, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) { 00515 const int KronIters1 = SeedMtx1.GetKronIter(Graph->GetNodes()); 00516 const int KronIters2 = SeedMtx2.GetKronIter(Graph->GetNodes()); 00517 PNGraph KronG1, KronG2; 00518 const bool FastGen = true; 00519 if (FastGen) { 00520 KronG1 = TKronMtx::GenFastKronecker(SeedMtx1, KronIters1, true, 0); 00521 KronG2 = TKronMtx::GenFastKronecker(SeedMtx2, KronIters2, false, 0); } 00522 else { 00523 KronG1 = TKronMtx::GenKronecker(SeedMtx1, KronIters1, true, 0); 00524 KronG2 = TKronMtx::GenKronecker(SeedMtx2, KronIters2, true, 0); } 00525 TSnap::DelZeroDegNodes(KronG1); 00526 TSnap::DelZeroDegNodes(KronG2); 00527 const TStr Desc1 = TStr::Fmt("%s", Desc.CStr()); 00528 TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdScc | gsdHops | gsdClustCf | gsdSngVec | gsdSngVal | gsdTriadPart); 00529 //gsdHops 00530 //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf 00531 GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges())); 00532 GS.Add(KronG1, TSecTm(2), TStr::Fmt("KRONECKER1 K(%d, %d) %s", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtx1.GetMtxStr().CStr())); 00533 GS.Add(KronG2, TSecTm(3), TStr::Fmt("KRONECKER2 K(%d, %d) %s", KronG2->GetNodes(), KronG2->GetEdges(), SeedMtx2.GetMtxStr().CStr())); 00534 const TStr Style = "linewidth 1 pointtype 6 pointsize 1"; 00535 // raw data 00536 GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00537 GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00538 GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00539 GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00540 GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00541 GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00542 GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00543 GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00544 GS.ImposeDistr(gsdTriadPart, FNmPref, Desc1, false, false, gpwLinesPoints, Style); 00545 // smooth 00546 GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00547 GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00548 GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00549 GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00550 GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00551 GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00552 GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00553 GS.ImposeDistr(gsdTriadPart, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style); 00554 } 00555 00556 void TKronMtx::PlotCmpGraphs(const TVec<TKronMtx>& SeedMtxV, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) { 00557 const TStr Desc1 = TStr::Fmt("%s", Desc.CStr()); 00558 TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdScc | gsdHops | gsdClustCf | gsdSngVec | gsdSngVal); 00559 GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges())); 00560 //gsdHops 00561 //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf 00562 for (int m = 0; m < SeedMtxV.Len(); m++) { 00563 const int KronIters = SeedMtxV[m].GetKronIter(Graph->GetNodes()); 00564 PNGraph KronG1 = TKronMtx::GenFastKronecker(SeedMtxV[m], KronIters, true, 0); 00565 printf("*** K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetDim()); 00566 TSnap::DelZeroDegNodes(KronG1); 00567 printf(" del zero deg K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), m); 00568 GS.Add(KronG1, TSecTm(m+2), TStr::Fmt("K(%d, %d) n0^k=%d n0=%d", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetNZeroK(Graph), SeedMtxV[m].GetDim())); 00569 // plot after each Kronecker is done 00570 const TStr Style = "linewidth 1 pointtype 6 pointsize 1"; 00571 GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLines, Style); 00572 GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00573 GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLines, Style); 00574 GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00575 GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLines, Style); 00576 GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLines, Style); 00577 GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00578 GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLines, Style); 00579 GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00580 GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLines, Style); 00581 GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00582 GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLines, Style); 00583 GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00584 GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLines, Style); 00585 GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLines, Style); 00586 } 00587 // typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc, 00588 // distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/ 00589 } 00590 00591 void TKronMtx::KronMul(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) { 00592 const int LDim = Left.GetDim(); 00593 const int RDim = Right.GetDim(); 00594 Result.GenMtx(LDim * RDim); 00595 for (int r1 = 0; r1 < LDim; r1++) { 00596 for (int c1 = 0; c1 < LDim; c1++) { 00597 const double& Val = Left.At(r1, c1); 00598 for (int r2 = 0; r2 < RDim; r2++) { 00599 for (int c2 = 0; c2 < RDim; c2++) { 00600 Result.At(r1*RDim+r2, c1*RDim+c2) = Val * Right.At(r2, c2); 00601 } 00602 } 00603 } 00604 } 00605 } 00606 00607 void TKronMtx::KronSum(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) { 00608 const int LDim = Left.GetDim(); 00609 const int RDim = Right.GetDim(); 00610 Result.GenMtx(LDim * RDim); 00611 for (int r1 = 0; r1 < LDim; r1++) { 00612 for (int c1 = 0; c1 < LDim; c1++) { 00613 const double& Val = Left.At(r1, c1); 00614 for (int r2 = 0; r2 < RDim; r2++) { 00615 for (int c2 = 0; c2 < RDim; c2++) { 00616 if (Val == NInf || Right.At(r2, c2) == NInf) { 00617 Result.At(r1*RDim+r2, c1*RDim+c2) = NInf; } 00618 else { 00619 Result.At(r1*RDim+r2, c1*RDim+c2) = Val + Right.At(r2, c2); } 00620 } 00621 } 00622 } 00623 } 00624 } 00625 00626 void TKronMtx::KronPwr(const TKronMtx& KronMtx, const int& NIter, TKronMtx& OutMtx) { 00627 OutMtx = KronMtx; 00628 TKronMtx NewOutMtx; 00629 for (int iter = 0; iter < NIter; iter++) { 00630 KronMul(OutMtx, KronMtx, NewOutMtx); 00631 NewOutMtx.Swap(OutMtx); 00632 } 00633 00634 } 00635 00636 void TKronMtx::Dump(const TStr& MtxNm, const bool& Sort) const { 00637 /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim()); 00638 for (int r = 0; r < GetDim(); r++) { 00639 for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); } 00640 printf("\n"); 00641 }*/ 00642 if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr()); 00643 double Sum=0.0; 00644 TFltV ValV = SeedMtx; 00645 if (Sort) { ValV.Sort(false); } 00646 for (int i = 0; i < ValV.Len(); i++) { 00647 printf(" %10.4g", ValV[i]()); 00648 Sum += ValV[i]; 00649 if ((i+1) % GetDim() == 0) { printf("\n"); } 00650 } 00651 printf(" (sum:%.4f)\n", Sum); 00652 } 00653 00654 // average difference in the parameters 00655 double TKronMtx::GetAvgAbsErr(const TKronMtx& Kron1, const TKronMtx& Kron2) { 00656 TFltV P1 = Kron1.GetMtx(); 00657 TFltV P2 = Kron2.GetMtx(); 00658 IAssert(P1.Len() == P2.Len()); 00659 P1.Sort(); P2.Sort(); 00660 double delta = 0.0; 00661 for (int i = 0; i < P1.Len(); i++) { 00662 delta += fabs(P1[i] - P2[i]); 00663 } 00664 return delta/P1.Len(); 00665 } 00666 00667 // average L2 difference in the parameters 00668 double TKronMtx::GetAvgFroErr(const TKronMtx& Kron1, const TKronMtx& Kron2) { 00669 TFltV P1 = Kron1.GetMtx(); 00670 TFltV P2 = Kron2.GetMtx(); 00671 IAssert(P1.Len() == P2.Len()); 00672 P1.Sort(); P2.Sort(); 00673 double delta = 0.0; 00674 for (int i = 0; i < P1.Len(); i++) { 00675 delta += pow(P1[i] - P2[i], 2); 00676 } 00677 return sqrt(delta/P1.Len()); 00678 } 00679 00680 // get matrix from matlab matrix notation 00681 TKronMtx TKronMtx::GetMtx(TStr MatlabMtxStr) { 00682 TStrV RowStrV, ColStrV; 00683 MatlabMtxStr.ChangeChAll(',', ' '); 00684 MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty()); 00685 RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty()); 00686 const int Rows = RowStrV.Len(); 00687 const int Cols = ColStrV.Len(); 00688 IAssert(Rows == Cols); 00689 TKronMtx Mtx(Rows); 00690 for (int r = 0; r < Rows; r++) { 00691 RowStrV[r].SplitOnWs(ColStrV); 00692 IAssert(ColStrV.Len() == Cols); 00693 for (int c = 0; c < Cols; c++) { 00694 Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); } 00695 } 00696 return Mtx; 00697 } 00698 00699 TKronMtx TKronMtx::GetRndMtx(const int& Dim, const double& MinProb) { 00700 TKronMtx Mtx; 00701 Mtx.SetRndMtx(Dim, MinProb); 00702 return Mtx; 00703 } 00704 00705 TKronMtx TKronMtx::GetInitMtx(const int& Dim, const int& Nodes, const int& Edges) { 00706 const double MxParam = 0.8+TKronMtx::Rnd.GetUniDev()/5.0; 00707 const double MnParam = 0.2-TKronMtx::Rnd.GetUniDev()/5.0; 00708 const double Step = (MxParam-MnParam) / (Dim*Dim-1); 00709 TFltV ParamV(Dim*Dim); 00710 if (Dim == 1) { ParamV.PutAll(0.5); } // random graph 00711 else { 00712 for (int p = 0; p < ParamV.Len(); p++) { 00713 ParamV[p] = MxParam - p*Step; } 00714 } 00715 //IAssert(ParamV[0]==MxParam && ParamV.Last()==MnParam); 00716 TKronMtx Mtx(ParamV); 00717 Mtx.SetForEdges(Nodes, Edges); 00718 return Mtx; 00719 } 00720 00721 TKronMtx TKronMtx::GetInitMtx(const TStr& MtxStr, const int& Dim, const int& Nodes, const int& Edges) { 00722 TKronMtx Mtx(Dim); 00723 if (TCh::IsNum(MtxStr[0])) { Mtx = TKronMtx::GetMtx(MtxStr); } 00724 else if (MtxStr[0] == 'r') { Mtx = TKronMtx::GetRndMtx(Dim, 0.1); } 00725 else if (MtxStr[0] == 'a') { 00726 const double Prob = TKronMtx::Rnd.GetUniDev(); 00727 if (Prob < 0.4) { 00728 Mtx = TKronMtx::GetInitMtx(Dim, Nodes, Edges); } 00729 else { // interpolate so that there are in the corners 0.9, 0.5, 0.1, 0.5 00730 const double Max = 0.9+TKronMtx::Rnd.GetUniDev()/10.0; 00731 const double Min = 0.1-TKronMtx::Rnd.GetUniDev()/10.0; 00732 const double Med = (Max-Min)/2.0; 00733 Mtx.At(0,0) = Max; Mtx.At(0,Dim-1) = Med; 00734 Mtx.At(Dim-1, 0) = Med; Mtx.At(Dim-1, Dim-1) = Min; 00735 for (int i = 1; i < Dim-1; i++) { 00736 Mtx.At(i,i) = Max - double(i)*(Max-Min)/double(Dim-1); 00737 Mtx.At(i, 0) = Mtx.At(0, i) = Max - double(i)*(Max-Med)/double(Dim-1); 00738 Mtx.At(i, Dim-1) = Mtx.At(Dim-1, i) = Med - double(i)*(Med-Min)/double(Dim-1); 00739 } 00740 for (int i = 1; i < Dim-1; i++) { 00741 for (int j = 1; j < Dim-1; j++) { 00742 if (i >= j) { continue; } 00743 Mtx.At(i,j) = Mtx.At(j,i) = Mtx.At(i,i) - (j-i)*(Mtx.At(i,i)-Mtx.At(i,Dim-1))/(Dim-i-1); 00744 } 00745 } 00746 Mtx.AddRndNoise(0.1); 00747 } 00748 } else { FailR("Wrong mtx: matlab str, or random (r), or all (a)"); } 00749 Mtx.SetForEdges(Nodes, Edges); 00750 return Mtx; 00751 } 00752 00753 TKronMtx TKronMtx::GetMtxFromNm(const TStr& MtxNm) { 00754 if (MtxNm == "3chain") return TKronMtx::GetMtx("1 1 0; 1 1 1; 0 1 1"); 00755 else if (MtxNm == "4star") return TKronMtx::GetMtx("1 1 1 1; 1 1 0 0 ; 1 0 1 0; 1 0 0 1"); 00756 else if (MtxNm == "4chain") return TKronMtx::GetMtx("1 1 0 0; 1 1 1 0 ; 0 1 1 1; 0 0 1 1"); 00757 else if (MtxNm == "4square") return TKronMtx::GetMtx("1 1 0 1; 1 1 1 0 ; 0 1 1 1; 1 0 1 1"); 00758 else if (MtxNm == "5star") return TKronMtx::GetMtx("1 1 1 1 1; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 0; 1 0 0 0 1"); 00759 else if (MtxNm == "6star") return TKronMtx::GetMtx("1 1 1 1 1 1; 1 1 0 0 0 0; 1 0 1 0 0 0; 1 0 0 1 0 0; 1 0 0 0 1 0; 1 0 0 0 0 1"); 00760 else if (MtxNm == "7star") return TKronMtx::GetMtx("1 1 1 1 1 1 1; 1 1 0 0 0 0 0; 1 0 1 0 0 0 0; 1 0 0 1 0 0 0; 1 0 0 0 1 0 0; 1 0 0 0 0 1 0; 1 0 0 0 0 0 1"); 00761 else if (MtxNm == "5burst") return TKronMtx::GetMtx("1 1 1 1 0; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 1; 0 0 0 1 1"); 00762 else if (MtxNm == "7burst") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 1; 0 0 0 0 1 1 0; 0 0 0 0 1 0 1"); 00763 else if (MtxNm == "7cross") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 0; 0 0 0 0 1 1 1; 0 0 0 0 0 1 1"); 00764 FailR(TStr::Fmt("Unknow matrix: '%s'", MtxNm.CStr()).CStr()); 00765 return TKronMtx(); 00766 } 00767 00768 TKronMtx TKronMtx::LoadTxt(const TStr& MtxFNm) { 00769 PSs Ss = TSs::LoadTxt(ssfTabSep, MtxFNm); 00770 IAssertR(Ss->GetXLen() == Ss->GetYLen(), "Not a square matrix"); 00771 IAssert(Ss->GetYLen() == Ss->GetXLen()); 00772 TKronMtx Mtx(Ss->GetYLen()); 00773 for (int r = 0; r < Ss->GetYLen(); r++) { 00774 for (int c = 0; c < Ss->GetXLen(); c++) { 00775 Mtx.At(r, c) = (double) Ss->At(c, r).GetFlt(); } 00776 } 00777 return Mtx; 00778 } 00779 00780 00782 // Kronecker Log Likelihood 00783 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TFltV& ParamV, const double& PermPSwapNd): PermSwapNodeProb(PermPSwapNd) { 00784 InitLL(GraphPt, TKronMtx(ParamV)); 00785 } 00786 00787 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) { 00788 InitLL(GraphPt, ParamMtx); 00789 } 00790 00791 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) { 00792 InitLL(GraphPt, ParamMtx); 00793 NodePerm = NodeIdPermV; 00794 SetIPerm(NodePerm); 00795 } 00796 00797 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) { 00798 return new TKroneckerLL(GraphPt, ParamMtx, PermPSwapNd); 00799 } 00800 00801 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) { 00802 return new TKroneckerLL(GraphPt, ParamMtx, NodeIdPermV, PermPSwapNd); 00803 } 00804 00805 void TKroneckerLL::SetPerm(const char& PermId) { 00806 if (PermId == 'o') { SetOrderPerm(); } 00807 else if (PermId == 'd') { SetDegPerm(); } 00808 else if (PermId == 'r') { SetRndPerm(); } 00809 else if (PermId == 'b') { SetBestDegPerm(); } 00810 else FailR("Unknown permutation type (o,d,r)"); 00811 } 00812 00813 void TKroneckerLL::SetOrderPerm() { 00814 NodePerm.Gen(Nodes, 0); 00815 for (int i = 0; i < Graph->GetNodes(); i++) { 00816 NodePerm.Add(i); } 00817 SetIPerm(NodePerm); 00818 } 00819 00820 void TKroneckerLL::SetRndPerm() { 00821 NodePerm.Gen(Nodes, 0); 00822 for (int i = 0; i < Graph->GetNodes(); i++) { 00823 NodePerm.Add(i); } 00824 NodePerm.Shuffle(TKronMtx::Rnd); 00825 SetIPerm(NodePerm); 00826 } 00827 00828 void TKroneckerLL::SetDegPerm() { 00829 TIntPrV DegNIdV; 00830 for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00831 DegNIdV.Add(TIntPr(NI.GetDeg(), NI.GetId())); 00832 } 00833 DegNIdV.Sort(false); 00834 NodePerm.Gen(DegNIdV.Len(), 0); 00835 for (int i = 0; i < DegNIdV.Len(); i++) { 00836 NodePerm.Add(DegNIdV[i].Val2); 00837 } 00838 SetIPerm(NodePerm); 00839 } 00840 00842 void TKroneckerLL::SetBestDegPerm() { 00843 NodePerm.Gen(Nodes); 00844 const int NZero = ProbMtx.GetDim(); 00845 TFltIntPrV DegV(Nodes), CDegV(Nodes); 00846 TFltV Row(NZero); 00847 TFltV Col(NZero); 00848 for(int i = 0; i < NZero; i++) { 00849 for(int j = 0; j < NZero; j++) { 00850 Row[i] += ProbMtx.At(i, j); 00851 Col[i] += ProbMtx.At(j, i); 00852 } 00853 } 00854 00855 for(int i = 0; i < Nodes; i++) { 00856 TNGraph::TNodeI NodeI = Graph->GetNI(i); 00857 int NId = i; 00858 double RowP = 1.0, ColP = 1.0; 00859 for(int j = 0; j < KronIters; j++) { 00860 int Bit = NId % NZero; 00861 RowP *= Row[Bit]; ColP *= Col[Bit]; 00862 NId /= NZero; 00863 } 00864 CDegV[i] = TFltIntPr(RowP + ColP, i); 00865 DegV[i] = TFltIntPr(NodeI.GetDeg(), i); 00866 } 00867 DegV.Sort(false); CDegV.Sort(false); 00868 for(int i = 0; i < Nodes; i++) { 00869 NodePerm[DegV[i].Val2] = CDegV[i].Val2; 00870 } 00871 SetIPerm(NodePerm); 00872 } 00873 00875 void TKroneckerLL::SetIPerm(const TIntV& Perm) { 00876 InvertPerm.Gen(Perm.Len()); 00877 for (int i = 0; i < Perm.Len(); i++) { 00878 InvertPerm[Perm[i]] = i; 00879 } 00880 } 00881 00882 void TKroneckerLL::SetGraph(const PNGraph& GraphPt) { 00883 Graph = GraphPt; 00884 bool NodesOk = true; 00885 // check that nodes IDs are {0,1,..,Nodes-1} 00886 for (int nid = 0; nid < Graph->GetNodes(); nid++) { 00887 if (! Graph->IsNode(nid)) { NodesOk=false; break; } } 00888 if (! NodesOk) { 00889 TIntV NIdV; GraphPt->GetNIdV(NIdV); 00890 Graph = TSnap::GetSubGraph(GraphPt, NIdV, true); 00891 for (int nid = 0; nid < Graph->GetNodes(); nid++) { 00892 IAssert(Graph->IsNode(nid)); } 00893 } 00894 Nodes = Graph->GetNodes(); 00895 IAssert(LLMtx.GetDim() > 1 && LLMtx.Len() == ProbMtx.Len()); 00896 KronIters = (int) ceil(log(double(Nodes)) / log(double(ProbMtx.GetDim()))); 00897 // edge vector (for swap-edge permutation proposal) 00898 // if (PermSwapNodeProb < 1.0) { /// !!!!! MYUNGHWAN, CHECK! WHY IS THIS COMMENTED OUT 00899 GEdgeV.Gen(Graph->GetEdges(), 0); 00900 for (TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00901 if (EI.GetSrcNId() != EI.GetDstNId()) { 00902 GEdgeV.Add(TIntTr(EI.GetSrcNId(), EI.GetDstNId(), -1)); 00903 } 00904 } 00905 // } 00906 00907 RealNodes = Nodes; 00908 RealEdges = Graph->GetEdges(); 00909 LEdgeV = TIntTrV(); 00910 LSelfEdge = 0; 00911 } 00912 00913 00914 void TKroneckerLL::AppendIsoNodes() { 00915 Nodes = (int) pow((double)ProbMtx.GetDim(), KronIters); 00916 // add nodes until filling the Kronecker graph model 00917 for (int nid = Graph->GetNodes(); nid < Nodes; nid++) { 00918 Graph->AddNode(nid); 00919 } 00920 } 00921 00923 void TKroneckerLL::RestoreGraph(const bool RestoreNodes) { 00924 // remove from Graph 00925 int NId1, NId2; 00926 for (int e = 0; e < LEdgeV.Len(); e++) { 00927 NId1 = LEdgeV[e].Val1; NId2 = LEdgeV[e].Val2; 00928 Graph->DelEdge(NId1, NId2); 00929 // GEdgeV.DelIfIn(LEdgeV[e]); 00930 } 00931 if(LEdgeV.Len() - LSelfEdge) 00932 GEdgeV.Del(GEdgeV.Len() - LEdgeV.Len() + LSelfEdge, GEdgeV.Len() - 1); 00933 LEdgeV.Clr(); 00934 LSelfEdge = 0; 00935 00936 if(RestoreNodes) { 00937 for(int i = Graph->GetNodes()-1; i >= RealNodes; i--) { 00938 Graph->DelNode(i); 00939 } 00940 } 00941 } 00942 00943 double TKroneckerLL::GetFullGraphLL() const { 00944 // the number of times a seed matrix element appears in 00945 // the full kronecker adjacency matrix after KronIter 00946 // kronecker multiplications 00947 double ElemCnt = 1; 00948 const double dim = LLMtx.GetDim(); 00949 // count number of times x appears in the full kronecker matrix 00950 for (int i = 1; i < KronIters; i++) { 00951 ElemCnt = dim*dim*ElemCnt + TMath::Power(dim, 2*i); 00952 } 00953 return ElemCnt * LLMtx.GetMtxSum(); 00954 } 00955 00956 double TKroneckerLL::GetFullRowLL(int RowId) const { 00957 double RowLL = 0.0; 00958 const int MtxDim = LLMtx.GetDim(); 00959 for (int level = 0; level < KronIters; level++) { 00960 RowLL += LLMtx.GetRowSum(RowId % MtxDim); 00961 RowId /= MtxDim; 00962 } 00963 return RowLL; 00964 } 00965 00966 double TKroneckerLL::GetFullColLL(int ColId) const { 00967 double ColLL = 0.0; 00968 const int MtxDim = LLMtx.GetDim(); 00969 for (int level = 0; level < KronIters; level++) { 00970 ColLL += LLMtx.GetColSum(ColId % MtxDim); 00971 ColId /= MtxDim; 00972 } 00973 return ColLL; 00974 } 00975 00976 double TKroneckerLL::GetEmptyGraphLL() const { 00977 double LL = 0; 00978 for (int NId1 = 0; NId1 < LLMtx.GetNodes(KronIters); NId1++) { 00979 for (int NId2 = 0; NId2 < LLMtx.GetNodes(KronIters); NId2++) { 00980 LL = LL + LLMtx.GetNoEdgeLL(NId1, NId2, KronIters); 00981 } 00982 } 00983 return LL; 00984 } 00985 00986 // 2nd prder Taylor approximation, log(1-x) ~ -x - 0.5x^2 00987 double TKroneckerLL::GetApxEmptyGraphLL() const { 00988 double Sum=0.0, SumSq=0.0; 00989 for (int i = 0; i < ProbMtx.Len(); i++) { 00990 Sum += ProbMtx.At(i); 00991 SumSq += TMath::Sqr(ProbMtx.At(i)); 00992 } 00993 return -pow(Sum, KronIters) - 0.5*pow(SumSq, KronIters); 00994 } 00995 00996 void TKroneckerLL::InitLL(const TFltV& ParamV) { 00997 InitLL(TKronMtx(ParamV)); 00998 } 00999 01000 void TKroneckerLL::InitLL(const TKronMtx& ParamMtx) { 01001 IAssert(ParamMtx.IsProbMtx()); 01002 ProbMtx = ParamMtx; 01003 ProbMtx.GetLLMtx(LLMtx); 01004 LogLike = TKronMtx::NInf; 01005 if (GradV.Len() != ProbMtx.Len()) { 01006 GradV.Gen(ProbMtx.Len()); } 01007 GradV.PutAll(0.0); 01008 } 01009 01010 void TKroneckerLL::InitLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx) { 01011 IAssert(ParamMtx.IsProbMtx()); 01012 ProbMtx = ParamMtx; 01013 ProbMtx.GetLLMtx(LLMtx); 01014 SetGraph(GraphPt); 01015 LogLike = TKronMtx::NInf; 01016 if (GradV.Len() != ProbMtx.Len()) { 01017 GradV.Gen(ProbMtx.Len()); } 01018 GradV.PutAll(0.0); 01019 } 01020 01021 // exact graph log-likelihood, takes O(N^2 + E) 01022 double TKroneckerLL::CalcGraphLL() { 01023 LogLike = GetEmptyGraphLL(); // takes O(N^2) 01024 for (int nid = 0; nid < Nodes; nid++) { 01025 const TNGraph::TNodeI Node = Graph->GetNI(nid); 01026 const int SrcNId = NodePerm[nid]; 01027 for (int e = 0; e < Node.GetOutDeg(); e++) { 01028 const int DstNId = NodePerm[Node.GetOutNId(e)]; 01029 LogLike = LogLike - LLMtx.GetNoEdgeLL(SrcNId, DstNId, KronIters) 01030 + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters); 01031 } 01032 } 01033 return LogLike; 01034 } 01035 01036 // approximate graph log-likelihood, takes O(E + N_0) 01037 double TKroneckerLL::CalcApxGraphLL() { 01038 LogLike = GetApxEmptyGraphLL(); // O(N_0) 01039 for (int nid = 0; nid < Nodes; nid++) { 01040 const TNGraph::TNodeI Node = Graph->GetNI(nid); 01041 const int SrcNId = NodePerm[nid]; 01042 for (int e = 0; e < Node.GetOutDeg(); e++) { 01043 const int DstNId = NodePerm[Node.GetOutNId(e)]; 01044 LogLike = LogLike - LLMtx.GetApxNoEdgeLL(SrcNId, DstNId, KronIters) 01045 + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters); 01046 } 01047 } 01048 return LogLike; 01049 } 01050 01051 // Used in TKroneckerLL::SwapNodesLL: DeltaLL if we 01052 // add the node to the matrix (node gets/creates all 01053 // of its in- and out-edges). 01054 // Zero is for the empty row/column (isolated node) 01055 double TKroneckerLL::NodeLLDelta(const int& NId) const { 01056 if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node 01057 double Delta = 0.0; 01058 const TNGraph::TNodeI Node = Graph->GetNI(NId); 01059 // out-edges 01060 const int SrcRow = NodePerm[NId]; 01061 for (int e = 0; e < Node.GetOutDeg(); e++) { 01062 const int DstCol = NodePerm[Node.GetOutNId(e)]; 01063 Delta += - LLMtx.GetApxNoEdgeLL(SrcRow, DstCol, KronIters) 01064 + LLMtx.GetEdgeLL(SrcRow, DstCol, KronIters); 01065 } 01066 //in-edges 01067 const int SrcCol = NodePerm[NId]; 01068 for (int e = 0; e < Node.GetInDeg(); e++) { 01069 const int DstRow = NodePerm[Node.GetInNId(e)]; 01070 Delta += - LLMtx.GetApxNoEdgeLL(DstRow, SrcCol, KronIters) 01071 + LLMtx.GetEdgeLL(DstRow, SrcCol, KronIters); 01072 } 01073 // double counted self-edge 01074 if (Graph->IsEdge(NId, NId)) { 01075 Delta += + LLMtx.GetApxNoEdgeLL(SrcRow, SrcCol, KronIters) 01076 - LLMtx.GetEdgeLL(SrcRow, SrcCol, KronIters); 01077 IAssert(SrcRow == SrcCol); 01078 } 01079 return Delta; 01080 } 01081 01082 // swapping two nodes, only need to go over two rows and columns 01083 double TKroneckerLL::SwapNodesLL(const int& NId1, const int& NId2) { 01084 // subtract old LL (remove nodes) 01085 LogLike = LogLike - NodeLLDelta(NId1) - NodeLLDelta(NId2); 01086 const int PrevId1 = NodePerm[NId1], PrevId2 = NodePerm[NId2]; 01087 // double-counted edges 01088 if (Graph->IsEdge(NId1, NId2)) { 01089 LogLike += - LLMtx.GetApxNoEdgeLL(PrevId1, PrevId2, KronIters) 01090 + LLMtx.GetEdgeLL(PrevId1, PrevId2, KronIters); } 01091 if (Graph->IsEdge(NId2, NId1)) { 01092 LogLike += - LLMtx.GetApxNoEdgeLL(PrevId2, PrevId1, KronIters) 01093 + LLMtx.GetEdgeLL(PrevId2, PrevId1, KronIters); } 01094 // swap 01095 NodePerm.Swap(NId1, NId2); 01096 InvertPerm.Swap(NodePerm[NId1], NodePerm[NId2]); 01097 // add new LL (add nodes) 01098 LogLike = LogLike + NodeLLDelta(NId1) + NodeLLDelta(NId2); 01099 const int NewId1 = NodePerm[NId1], NewId2 = NodePerm[NId2]; 01100 // correct for double-counted edges 01101 if (Graph->IsEdge(NId1, NId2)) { 01102 LogLike += + LLMtx.GetApxNoEdgeLL(NewId1, NewId2, KronIters) 01103 - LLMtx.GetEdgeLL(NewId1, NewId2, KronIters); } 01104 if (Graph->IsEdge(NId2, NId1)) { 01105 LogLike += + LLMtx.GetApxNoEdgeLL(NewId2, NewId1, KronIters) 01106 - LLMtx.GetEdgeLL(NewId2, NewId1, KronIters); } 01107 return LogLike; 01108 } 01109 01110 // metropolis sampling from P(permutation|graph) 01111 bool TKroneckerLL::SampleNextPerm(int& NId1, int& NId2) { 01112 // pick 2 uniform nodes and swap 01113 if (TKronMtx::Rnd.GetUniDev() < PermSwapNodeProb) { 01114 NId1 = TKronMtx::Rnd.GetUniDevInt(Nodes); 01115 NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes); 01116 while (NId2 == NId1) { NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes); } 01117 } else { 01118 // pick uniform edge and swap endpoints (slow as it moves around high degree nodes) 01119 const int e = TKronMtx::Rnd.GetUniDevInt(GEdgeV.Len()); 01120 NId1 = GEdgeV[e].Val1; NId2 = GEdgeV[e].Val2; 01121 } 01122 const double U = TKronMtx::Rnd.GetUniDev(); 01123 const double OldLL = LogLike; 01124 const double NewLL = SwapNodesLL(NId1, NId2); 01125 const double LogU = log(U); 01126 if (LogU > NewLL - OldLL) { // reject 01127 LogLike = OldLL; 01128 NodePerm.Swap(NId2, NId1); //swap back 01129 InvertPerm.Swap(NodePerm[NId2], NodePerm[NId1]); // swap back 01130 return false; 01131 } 01132 return true; // accept new sample 01133 } 01134 01135 // exact gradient of an empty graph, O(N^2) 01136 double TKroneckerLL::GetEmptyGraphDLL(const int& ParamId) const { 01137 double DLL = 0.0; 01138 for (int NId1 = 0; NId1 < Nodes; NId1++) { 01139 for (int NId2 = 0; NId2 < Nodes; NId2++) { 01140 DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters); 01141 } 01142 } 01143 return DLL; 01144 } 01145 01146 // approx gradient, using 2nd order Taylor approximation, O(N_0^2) 01147 double TKroneckerLL::GetApxEmptyGraphDLL(const int& ParamId) const { 01148 double Sum=0.0, SumSq=0.0; 01149 for (int i = 0; i < ProbMtx.Len(); i++) { 01150 Sum += ProbMtx.At(i); 01151 SumSq += TMath::Sqr(ProbMtx.At(i)); 01152 } 01153 // d/dx -sum(x_i) - 0.5sum(x_i^2) = d/dx sum(theta)^k - 0.5 sum(theta^2)^k 01154 return -KronIters*pow(Sum, KronIters-1) - KronIters*pow(SumSq, KronIters-1)*ProbMtx.At(ParamId); 01155 } 01156 01157 // exact graph gradient, runs O(N^2) 01158 const TFltV& TKroneckerLL::CalcGraphDLL() { 01159 for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) { 01160 double DLL = 0.0; 01161 for (int NId1 = 0; NId1 < Nodes; NId1++) { 01162 for (int NId2 = 0; NId2 < Nodes; NId2++) { 01163 if (Graph->IsEdge(NId1, NId2)) { 01164 DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters); 01165 } else { 01166 DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters); 01167 } 01168 } 01169 } 01170 GradV[ParamId] = DLL; 01171 } 01172 return GradV; 01173 } 01174 01175 // slow (but correct) approximate gradient, runs O(N^2) 01176 const TFltV& TKroneckerLL::CalcFullApxGraphDLL() { 01177 for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) { 01178 double DLL = 0.0; 01179 for (int NId1 = 0; NId1 < Nodes; NId1++) { 01180 for (int NId2 = 0; NId2 < Nodes; NId2++) { 01181 if (Graph->IsEdge(NId1, NId2)) { 01182 DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters); 01183 } else { 01184 DLL += LLMtx.GetApxNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters); 01185 } 01186 } 01187 } 01188 GradV[ParamId] = DLL; 01189 } 01190 return GradV; 01191 } 01192 01193 // fast approximate gradient, runs O(E) 01194 const TFltV& TKroneckerLL::CalcApxGraphDLL() { 01195 for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) { 01196 double DLL = GetApxEmptyGraphDLL(ParamId); 01197 for (int nid = 0; nid < Nodes; nid++) { 01198 const TNGraph::TNodeI Node = Graph->GetNI(nid); 01199 const int SrcNId = NodePerm[nid]; 01200 for (int e = 0; e < Node.GetOutDeg(); e++) { 01201 const int DstNId = NodePerm[Node.GetOutNId(e)]; 01202 DLL = DLL - LLMtx.GetApxNoEdgeDLL(ParamId, SrcNId, DstNId, KronIters) 01203 + LLMtx.GetEdgeDLL(ParamId, SrcNId, DstNId, KronIters); 01204 } 01205 } 01206 GradV[ParamId] = DLL; 01207 } 01208 return GradV; 01209 } 01210 01211 // Used in TKroneckerLL::UpdateGraphDLL: DeltaDLL if we 01212 // add the node to the empty matrix/graph (node 01213 // gets/creates all of its in- and out-edges). 01214 double TKroneckerLL::NodeDLLDelta(const int ParamId, const int& NId) const { 01215 if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node 01216 double Delta = 0.0; 01217 const TNGraph::TNodeI Node = Graph->GetNI(NId); 01218 const int SrcRow = NodePerm[NId]; 01219 for (int e = 0; e < Node.GetOutDeg(); e++) { 01220 const int DstCol = NodePerm[Node.GetOutNId(e)]; 01221 Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, DstCol, KronIters) 01222 + LLMtx.GetEdgeDLL(ParamId, SrcRow, DstCol, KronIters); 01223 } 01224 const int SrcCol = NodePerm[NId]; 01225 for (int e = 0; e < Node.GetInDeg(); e++) { 01226 const int DstRow = NodePerm[Node.GetInNId(e)]; 01227 Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, DstRow, SrcCol, KronIters) 01228 + LLMtx.GetEdgeDLL(ParamId, DstRow, SrcCol, KronIters); 01229 } 01230 // double counter self-edge 01231 if (Graph->IsEdge(NId, NId)) { 01232 Delta += + LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, SrcCol, KronIters) 01233 - LLMtx.GetEdgeDLL(ParamId, SrcRow, SrcCol, KronIters); 01234 IAssert(SrcRow == SrcCol); 01235 } 01236 return Delta; 01237 } 01238 01239 // given old DLL and new permutation, efficiently updates the DLL 01240 // permutation is new, but DLL is old 01241 void TKroneckerLL::UpdateGraphDLL(const int& SwapNId1, const int& SwapNId2) { 01242 for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) { 01243 // permutation before the swap (swap back to previous position) 01244 NodePerm.Swap(SwapNId1, SwapNId2); 01245 // subtract old DLL 01246 TFlt& DLL = GradV[ParamId]; 01247 DLL = DLL - NodeDLLDelta(ParamId, SwapNId1) - NodeDLLDelta(ParamId, SwapNId2); 01248 // double-counted edges 01249 const int PrevId1 = NodePerm[SwapNId1], PrevId2 = NodePerm[SwapNId2]; 01250 if (Graph->IsEdge(SwapNId1, SwapNId2)) { 01251 DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId1, PrevId2, KronIters) 01252 + LLMtx.GetEdgeDLL(ParamId, PrevId1, PrevId2, KronIters); } 01253 if (Graph->IsEdge(SwapNId2, SwapNId1)) { 01254 DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId2, PrevId1, KronIters) 01255 + LLMtx.GetEdgeDLL(ParamId, PrevId2, PrevId1, KronIters); } 01256 // permutation after the swap (restore the swap) 01257 NodePerm.Swap(SwapNId1, SwapNId2); 01258 // add new DLL 01259 DLL = DLL + NodeDLLDelta(ParamId, SwapNId1) + NodeDLLDelta(ParamId, SwapNId2); 01260 const int NewId1 = NodePerm[SwapNId1], NewId2 = NodePerm[SwapNId2]; 01261 // double-counted edges 01262 if (Graph->IsEdge(SwapNId1, SwapNId2)) { 01263 DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId1, NewId2, KronIters) 01264 - LLMtx.GetEdgeDLL(ParamId, NewId1, NewId2, KronIters); } 01265 if (Graph->IsEdge(SwapNId2, SwapNId1)) { 01266 DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId2, NewId1, KronIters) 01267 - LLMtx.GetEdgeDLL(ParamId, NewId2, NewId1, KronIters); } 01268 } 01269 } 01270 01271 void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV) { 01272 printf("SampleGradient: %s (%s warm-up):", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr()); 01273 int NId1=0, NId2=0, NAccept=0; 01274 TExeTm ExeTm1; 01275 if (WarmUp > 0) { 01276 CalcApxGraphLL(); 01277 for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); } 01278 printf(" warm-up:%s,", ExeTm1.GetTmStr()); ExeTm1.Tick(); 01279 } 01280 CalcApxGraphLL(); // re-calculate LL (due to numerical errors) 01281 CalcApxGraphDLL(); 01282 AvgLL = 0; 01283 AvgGradV.Gen(LLMtx.Len()); AvgGradV.PutAll(0.0); 01284 printf(" sampl"); 01285 for (int s = 0; s < NSamples; s++) { 01286 if (SampleNextPerm(NId1, NId2)) { // new permutation 01287 UpdateGraphDLL(NId1, NId2); NAccept++; } 01288 for (int m = 0; m < LLMtx.Len(); m++) { AvgGradV[m] += GradV[m]; } 01289 AvgLL += GetLL(); 01290 } 01291 printf("ing"); 01292 AvgLL = AvgLL / double(NSamples); 01293 for (int m = 0; m < LLMtx.Len(); m++) { 01294 AvgGradV[m] = AvgGradV[m] / double(NSamples); } 01295 printf(":%s (%.0f/s), accept %.1f%%\n", ExeTm1.GetTmStr(), double(NSamples)/ExeTm1.GetSecs(), 01296 double(100*NAccept)/double(NSamples)); 01297 } 01298 01299 double TKroneckerLL::GradDescent(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) { 01300 printf("\n----------------------------------------------------------------------\n"); 01301 printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges()); 01302 printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters())); 01303 TExeTm IterTm, TotalTm; 01304 double OldLL=-1e10, CurLL=0; 01305 const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters)); 01306 TFltV CurGradV, LearnRateV(GetParams()), LastStep(GetParams()); 01307 LearnRateV.PutAll(LrnRate); 01308 TKronMtx NewProbMtx = ProbMtx; 01309 01310 if(DebugMode) { 01311 LLV.Gen(NIter, 0); 01312 MtxV.Gen(NIter, 0); 01313 } 01314 01315 for (int Iter = 0; Iter < NIter; Iter++) { 01316 printf("%03d] ", Iter); 01317 SampleGradient(WarmUp, NSamples, CurLL, CurGradV); 01318 for (int p = 0; p < GetParams(); p++) { 01319 LearnRateV[p] *= 0.95; 01320 if (Iter < 1) { 01321 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; } 01322 while (fabs(LearnRateV[p]*CurGradV[p]) < 0.02) { LearnRateV[p] *= (1.0/0.95); } // move more 01323 } else { 01324 // set learn rate so that move for each parameter is inside the [MnStep, MxStep] 01325 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");} 01326 while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");} 01327 if (MxStep > 3*MnStep) { MxStep *= 0.95; } 01328 } 01329 NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p]; 01330 if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; } 01331 if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; } 01332 } 01333 printf(" trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(), 01334 ProbMtx.GetMtxSum(), ProbMtx.GetEdges(KronIters), fabs(EZero-ProbMtx.GetMtxSum())); 01335 printf(" currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good 01336 for (int p = 0; p < GetParams(); p++) { 01337 printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p), 01338 ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); 01339 } 01340 if (Iter+1 < NIter) { // skip last update 01341 ProbMtx = NewProbMtx; ProbMtx.GetLLMtx(LLMtx); } 01342 OldLL=CurLL; 01343 printf("\n"); fflush(stdout); 01344 01345 if(DebugMode) { 01346 LLV.Add(CurLL); 01347 MtxV.Add(NewProbMtx); 01348 } 01349 } 01350 printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs()); 01351 ProbMtx.Dump("FITTED PARAMS", false); 01352 return CurLL; 01353 } 01354 01355 double TKroneckerLL::GradDescent2(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) { 01356 printf("\n----------------------------------------------------------------------\n"); 01357 printf("GradDescent2\n"); 01358 printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges()); 01359 printf("Skip moves that make likelihood smaller\n"); 01360 printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters())); 01361 TExeTm IterTm, TotalTm; 01362 double CurLL=0, NewLL=0; 01363 const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters)); 01364 TFltV CurGradV, NewGradV, LearnRateV(GetParams()), LastStep(GetParams()); 01365 LearnRateV.PutAll(LrnRate); 01366 TKronMtx NewProbMtx=ProbMtx, CurProbMtx=ProbMtx; 01367 bool GoodMove = false; 01368 // Start 01369 for (int Iter = 0; Iter < NIter; Iter++) { 01370 printf("%03d] ", Iter); 01371 if (! GoodMove) { SampleGradient(WarmUp, NSamples, CurLL, CurGradV); } 01372 CurProbMtx = ProbMtx; 01373 // update parameters 01374 for (int p = 0; p < GetParams(); p++) { 01375 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");} 01376 while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");} 01377 NewProbMtx.At(p) = CurProbMtx.At(p) + LearnRateV[p]*CurGradV[p]; 01378 if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; } 01379 if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; } 01380 LearnRateV[p] *= 0.95; 01381 } 01382 printf(" "); 01383 ProbMtx=NewProbMtx; ProbMtx.GetLLMtx(LLMtx); 01384 SampleGradient(WarmUp, NSamples, NewLL, NewGradV); 01385 if (NewLL > CurLL) { // accept the move 01386 printf("== Good move:\n"); 01387 printf(" trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(), 01388 ProbMtx.GetMtxSum(), ProbMtx.GetEdges(KronIters), fabs(EZero-ProbMtx.GetMtxSum())); 01389 printf(" currLL: %.4f deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good 01390 for (int p = 0; p < GetParams(); p++) { 01391 printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p), 01392 CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); } 01393 CurLL = NewLL; 01394 CurGradV = NewGradV; 01395 GoodMove = true; 01396 } else { 01397 printf("** BAD move:\n"); 01398 printf(" *trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(), 01399 ProbMtx.GetMtxSum(), ProbMtx.GetEdges(KronIters), fabs(EZero-ProbMtx.GetMtxSum())); 01400 printf(" *curLL: %.4f deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good 01401 for (int p = 0; p < GetParams(); p++) { 01402 printf(" b%d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p), 01403 CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); } 01404 // move to old position 01405 ProbMtx = CurProbMtx; ProbMtx.GetLLMtx(LLMtx); 01406 GoodMove = false; 01407 } 01408 printf("\n"); fflush(stdout); 01409 } 01410 printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs()); 01411 ProbMtx.Dump("FITTED PARAMS\n", false); 01412 return CurLL; 01413 } 01414 01416 // filling in random edges for KronEM 01417 void TKroneckerLL::SetRandomEdges(const int& NEdges, const bool isDir) { 01418 int count = 0, added = 0, collision = 0; 01419 const int MtxDim = ProbMtx.GetDim(); 01420 const double MtxSum = ProbMtx.GetMtxSum(); 01421 TVec<TFltIntIntTr> ProbToRCPosV; // row, col position 01422 double CumProb = 0.0; 01423 01424 for(int r = 0; r < MtxDim; r++) { 01425 for(int c = 0; c < MtxDim; c++) { 01426 const double Prob = ProbMtx.At(r, c); 01427 if (Prob > 0.0) { 01428 CumProb += Prob; 01429 ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c)); 01430 } 01431 } 01432 } 01433 01434 int Rng, Row, Col, n, NId1, NId2; 01435 while(added < NEdges) { 01436 Rng = Nodes; Row = 0; Col = 0; 01437 for (int iter = 0; iter < KronIters; iter++) { 01438 const double& Prob = TKronMtx::Rnd.GetUniDev(); 01439 n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; } 01440 const int MtxRow = ProbToRCPosV[n].Val2; 01441 const int MtxCol = ProbToRCPosV[n].Val3; 01442 Rng /= MtxDim; 01443 Row += MtxRow * Rng; 01444 Col += MtxCol * Rng; 01445 } 01446 01447 count++; 01448 01449 NId1 = InvertPerm[Row]; NId2 = InvertPerm[Col]; 01450 01451 // Check conflicts 01452 if(EMType != kronEdgeMiss && IsObsEdge(NId1, NId2)) { 01453 continue; 01454 } 01455 01456 if (! Graph->IsEdge(NId1, NId2)) { 01457 Graph->AddEdge(NId1, NId2); 01458 if(NId1 != NId2) { GEdgeV.Add(TIntTr(NId1, NId2, LEdgeV.Len())); } 01459 else { LSelfEdge++; } 01460 LEdgeV.Add(TIntTr(NId1, NId2, GEdgeV.Len()-1)); 01461 added++; 01462 if (! isDir) { 01463 if (NId1 != NId2) { 01464 Graph->AddEdge(NId2, NId1); 01465 GEdgeV.Add(TIntTr(NId2, NId1, LEdgeV.Len())); 01466 LEdgeV.Add(TIntTr(NId2, NId1, GEdgeV.Len()-1)); 01467 added++; 01468 } 01469 } 01470 } else { collision ++; } 01471 } 01472 // printf("total = %d / added = %d / collision = %d\n", count, added, collision); 01473 } 01474 01475 // sampling setup for KronEM 01476 void TKroneckerLL::MetroGibbsSampleSetup(const int& WarmUp) { 01477 double alpha = log(ProbMtx.GetMtxSum()) / log(double(ProbMtx.GetDim())); 01478 int NId1 = 0, NId2 = 0; 01479 int NMissing; 01480 01481 RestoreGraph(false); 01482 if(EMType == kronEdgeMiss) { 01483 CalcApxGraphLL(); 01484 for (int s = 0; s < WarmUp; s++) SampleNextPerm(NId1, NId2); 01485 } 01486 01487 if(EMType == kronFutureLink) { 01488 NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) - pow(double(RealNodes), alpha)); 01489 } else if(EMType == kronEdgeMiss) { 01490 NMissing = MissEdges; 01491 } else { 01492 NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) * (1.0 - pow(double(RealNodes) / double(Nodes), 2))); 01493 } 01494 NMissing = (NMissing < 1) ? 1 : NMissing; 01495 01496 SetRandomEdges(NMissing, true); 01497 01498 CalcApxGraphLL(); 01499 for (int s = 0; s < WarmUp; s++) SampleNextPerm(NId1, NId2); 01500 } 01501 01502 // Metropolis-Hastings steps for KronEM 01503 void TKroneckerLL::MetroGibbsSampleNext(const int& WarmUp, const bool DLLUpdate) { 01504 int NId1 = 0, NId2 = 0, hit = 0, GId = 0; 01505 TIntTr EdgeToRemove, NewEdge; 01506 double RndAccept; 01507 01508 if(LEdgeV.Len()) { 01509 for(int i = 0; i < WarmUp; i++) { 01510 hit = TKronMtx::Rnd.GetUniDevInt(LEdgeV.Len()); 01511 01512 NId1 = LEdgeV[hit].Val1; NId2 = LEdgeV[hit].Val2; 01513 GId = LEdgeV[hit].Val3; 01514 SetRandomEdges(1, true); 01515 NewEdge = LEdgeV.Last(); 01516 01517 RndAccept = (1.0 - exp(LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters))) / (1.0 - exp(LLMtx.GetEdgeLL(NId1, NId2, KronIters))); 01518 RndAccept = (RndAccept > 1.0) ? 1.0 : RndAccept; 01519 01520 if(TKronMtx::Rnd.GetUniDev() > RndAccept) { // reject 01521 Graph->DelEdge(NewEdge.Val1, NewEdge.Val2); 01522 if(NewEdge.Val1 != NewEdge.Val2) { GEdgeV.DelLast(); } 01523 else { LSelfEdge--; } 01524 LEdgeV.DelLast(); 01525 } else { // accept 01526 Graph->DelEdge(NId1, NId2); 01527 LEdgeV[hit] = LEdgeV.Last(); 01528 LEdgeV.DelLast(); 01529 if(NId1 == NId2) { 01530 LSelfEdge--; 01531 if(NewEdge.Val1 != NewEdge.Val2) { 01532 GEdgeV[GEdgeV.Len()-1].Val3 = hit; 01533 } 01534 } else { 01535 IAssertR(GEdgeV.Last().Val3 >= 0, "Invalid indexing"); 01536 01537 GEdgeV[GId] = GEdgeV.Last(); 01538 if(NewEdge.Val1 != NewEdge.Val2) { 01539 GEdgeV[GId].Val3 = hit; 01540 } 01541 LEdgeV[GEdgeV[GId].Val3].Val3 = GId; 01542 GEdgeV.DelLast(); 01543 } 01544 01545 LogLike += LLMtx.GetApxNoEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters); 01546 LogLike += -LLMtx.GetApxNoEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters); 01547 01548 if(DLLUpdate) { 01549 for (int p = 0; p < LLMtx.Len(); p++) { 01550 GradV[p] += LLMtx.GetApxNoEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters); 01551 GradV[p] += -LLMtx.GetApxNoEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters); 01552 } 01553 } 01554 } 01555 } 01556 } 01557 01558 // CalcApxGraphLL(); 01559 for (int s = 0; s < WarmUp; s++) { 01560 if(SampleNextPerm(NId1, NId2)) { 01561 if(DLLUpdate) UpdateGraphDLL(NId1, NId2); 01562 } 01563 } 01564 } 01565 01566 // E-step in KronEM 01567 void TKroneckerLL::RunEStep(const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, TFltV& LLV, TVec<TFltV>& DLLV) { 01568 TExeTm ExeTm, TotalTm; 01569 LLV.Gen(NSamples, 0); 01570 DLLV.Gen(NSamples, 0); 01571 01572 ExeTm.Tick(); 01573 for(int i = 0; i < 2; i++) MetroGibbsSampleSetup(WarmUp); 01574 printf(" Warm-Up [%u] : %s\n", WarmUp, ExeTm.GetTmStr()); 01575 CalcApxGraphLL(); 01576 for(int i = 0; i < GibbsWarmUp; i++) MetroGibbsSampleNext(10, false); 01577 printf(" Gibbs Warm-Up [%u] : %s\n", GibbsWarmUp, ExeTm.GetTmStr()); 01578 01579 ExeTm.Tick(); 01580 CalcApxGraphLL(); 01581 CalcApxGraphDLL(); 01582 for(int i = 0; i < NSamples; i++) { 01583 MetroGibbsSampleNext(50, false); 01584 01585 LLV.Add(LogLike); 01586 DLLV.Add(GradV); 01587 01588 int OnePercent = (i+1) % (NSamples / 10); 01589 if(OnePercent == 0) { 01590 int TenPercent = ((i+1) / (NSamples / 10)) * 10; 01591 printf(" %3u%% done : %s\n", TenPercent, ExeTm.GetTmStr()); 01592 } 01593 } 01594 } 01595 01596 // M-step in KronEM 01597 double TKroneckerLL::RunMStep(const TFltV& LLV, const TVec<TFltV>& DLLV, const int& GradIter, const double& LrnRate, double MnStep, double MxStep) { 01598 TExeTm IterTm, TotalTm; 01599 double OldLL=LogLike, CurLL=0; 01600 const double alpha = log(double(RealEdges)) / log(double(RealNodes)); 01601 const double EZero = pow(double(ProbMtx.GetDim()), alpha); 01602 01603 TFltV CurGradV(GetParams()), LearnRateV(GetParams()), LastStep(GetParams()); 01604 LearnRateV.PutAll(LrnRate); 01605 01606 TKronMtx NewProbMtx = ProbMtx; 01607 const int NSamples = LLV.Len(); 01608 const int ReCalcLen = NSamples / 10; 01609 01610 for (int s = 0; s < LLV.Len(); s++) { 01611 CurLL += LLV[s]; 01612 for(int p = 0; p < GetParams(); p++) { CurGradV[p] += DLLV[s][p]; } 01613 } 01614 CurLL /= NSamples; 01615 for(int p = 0; p < GetParams(); p++) { CurGradV[p] /= NSamples; } 01616 01617 double MaxLL = CurLL; 01618 TKronMtx MaxProbMtx = ProbMtx; 01619 TKronMtx OldProbMtx = ProbMtx; 01620 01621 for (int Iter = 0; Iter < GradIter; Iter++) { 01622 printf(" %03d] ", Iter+1); 01623 IterTm.Tick(); 01624 01625 for (int p = 0; p < GetParams(); p++) { 01626 if (Iter < 1) { 01627 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; } 01628 while (fabs(LearnRateV[p]*CurGradV[p]) < 5 * MnStep) { LearnRateV[p] *= (1.0/0.95); } // move more 01629 } else { 01630 // set learn rate so that move for each parameter is inside the [MnStep, MxStep] 01631 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");} 01632 while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");} 01633 if (MxStep > 3*MnStep) { MxStep *= 0.95; } 01634 } 01635 NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p]; 01636 if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; } 01637 if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; } 01638 LearnRateV[p] *= 0.95; 01639 } 01640 printf(" trueE0: %.2f (%u from %u), estE0: %.2f (%u from %u), ERR: %f\n", EZero, RealEdges(), RealNodes(), ProbMtx.GetMtxSum(), Graph->GetEdges(), Graph->GetNodes(), fabs(EZero-ProbMtx.GetMtxSum())); 01641 printf(" currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good 01642 for (int p = 0; p < GetParams(); p++) { 01643 printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p), 01644 ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); 01645 } 01646 01647 OldLL=CurLL; 01648 if(Iter == GradIter - 1) { 01649 break; 01650 } 01651 01652 CurLL = 0; 01653 CurGradV.PutAll(0.0); 01654 TFltV OneDLL; 01655 01656 CalcApxGraphLL(); 01657 CalcApxGraphDLL(); 01658 01659 for(int s = 0; s < NSamples; s++) { 01660 ProbMtx = OldProbMtx; ProbMtx.GetLLMtx(LLMtx); 01661 MetroGibbsSampleNext(10, true); 01662 ProbMtx = NewProbMtx; ProbMtx.GetLLMtx(LLMtx); 01663 if(s % ReCalcLen == ReCalcLen/2) { 01664 CurLL += CalcApxGraphLL(); 01665 OneDLL = CalcApxGraphDLL(); 01666 } else { 01667 CurLL += LogLike; 01668 OneDLL = GradV; 01669 } 01670 for(int p = 0; p < GetParams(); p++) { 01671 CurGradV[p] += OneDLL[p]; 01672 } 01673 } 01674 CurLL /= NSamples; 01675 01676 if(MaxLL < CurLL) { 01677 MaxLL = CurLL; MaxProbMtx = ProbMtx; 01678 } 01679 01680 printf(" Time: %s\n", IterTm.GetTmStr()); 01681 printf("\n"); fflush(stdout); 01682 } 01683 ProbMtx = MaxProbMtx; ProbMtx.GetLLMtx(LLMtx); 01684 01685 printf(" FinalLL : %f, TotalExeTm: %s\n", MaxLL, TotalTm.GetTmStr()); 01686 ProbMtx.Dump(" FITTED PARAMS", false); 01687 01688 return MaxLL; 01689 } 01690 01691 // KronEM 01692 void TKroneckerLL::RunKronEM(const int& EMIter, const int& GradIter, double LrnRate, double MnStep, double MxStep, const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, const TKronEMType& Type, const int& NMissing) { 01693 printf("\n----------------------------------------------------------------------\n"); 01694 printf("Fitting graph on %d nodes, %d edges\n", int(RealNodes), int(RealEdges)); 01695 printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters())); 01696 01697 TFltV LLV(NSamples); 01698 TVec<TFltV> DLLV(NSamples); 01699 //int count = 0; 01700 01701 EMType = Type; 01702 MissEdges = NMissing; 01703 AppendIsoNodes(); 01704 SetRndPerm(); 01705 01706 if(DebugMode) { 01707 LLV.Gen(EMIter, 0); 01708 MtxV.Gen(EMIter, 0); 01709 } 01710 01711 for(int i = 0; i < EMIter; i++) { 01712 printf("\n----------------------------------------------------------------------\n"); 01713 printf("%03d EM-iter] E-Step\n", i+1); 01714 RunEStep(GibbsWarmUp, WarmUp, NSamples, LLV, DLLV); 01715 printf("\n\n"); 01716 01717 printf("%03d EM-iter] M-Step\n", i+1); 01718 double CurLL = RunMStep(LLV, DLLV, GradIter, LrnRate, MnStep, MxStep); 01719 printf("\n\n"); 01720 01721 if(DebugMode) { 01722 LLV.Add(CurLL); 01723 MtxV.Add(ProbMtx); 01724 } 01725 } 01726 01727 RestoreGraph(); 01728 } 01729 01730 01731 01732 void GetMinMax(const TFltPrV& XYValV, double& Min, double& Max, const bool& ResetMinMax) { 01733 if (ResetMinMax) { Min = TFlt::Mx; Max = TFlt::Mn; } 01734 for (int i = 0; i < XYValV.Len(); i++) { 01735 Min = TMath::Mn(Min, XYValV[i].Val2.Val); 01736 Max = TMath::Mx(Max, XYValV[i].Val2.Val); 01737 } 01738 } 01739 01740 void PlotGrad(const TFltPrV& EstLLV, const TFltPrV& TrueLLV, const TVec<TFltPrV>& GradVV, const TFltPrV& AcceptV, const TStr& OutFNm, const TStr& Desc) { 01741 double Min, Max, Min1, Max1; 01742 // plot log-likelihood 01743 { TGnuPlot GP("sLL-"+OutFNm, TStr::Fmt("Log-likelihood (avg 1k samples). %s", Desc.CStr()), true); 01744 GP.AddPlot(EstLLV, gpwLines, "Esimated LL", "linewidth 1"); 01745 if (! TrueLLV.Empty()) { GP.AddPlot(TrueLLV, gpwLines, "TRUE LL", "linewidth 1"); } 01746 //GetMinMax(EstLLV, Min, Max, true); GetMinMax(TrueLLV, Min, Max, false); 01747 //GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1)); 01748 GP.SetXYLabel("Sample Index (time)", "Log-likelihood"); 01749 GP.SavePng(); } 01750 // plot accept 01751 { TGnuPlot GP("sAcc-"+OutFNm, TStr::Fmt("Pct. accepted rnd moves (over 1k samples). %s", Desc.CStr()), true); 01752 GP.AddPlot(AcceptV, gpwLines, "Pct accepted swaps", "linewidth 1"); 01753 GP.SetXYLabel("Sample Index (time)", "Pct accept permutation swaps"); 01754 GP.SavePng(); } 01755 // plot grads 01756 TGnuPlot GPAll("sGradAll-"+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true); 01757 GetMinMax(GradVV[0], Min1, Max1, true); 01758 for (int g = 0; g < GradVV.Len(); g++) { 01759 GPAll.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param %d", g+1), "linewidth 1"); 01760 GetMinMax(GradVV[g], Min1, Max1, false); 01761 TGnuPlot GP(TStr::Fmt("sGrad%02d-", g+1)+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true); 01762 GP.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param id %d", g+1), "linewidth 1"); 01763 GetMinMax(GradVV[g], Min, Max, true); 01764 GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1)); 01765 GP.SetXYLabel("Sample Index (time)", "Gradient"); 01766 GP.SavePng(); 01767 } 01768 GPAll.SetYRange((int)floor(Min1-1), (int)ceil(Max1+1)); 01769 GPAll.SetXYLabel("Sample Index (time)", "Gradient"); 01770 GPAll.SavePng(); 01771 } 01772 01773 void PlotAutoCorrelation(const TFltV& ValV, const int& MaxK, const TStr& OutFNm, const TStr& Desc) { 01774 double Avg=0.0, Var=0.0; 01775 for (int i = 0; i < ValV.Len(); i++) { Avg += ValV[i]; } 01776 Avg /= (double) ValV.Len(); 01777 for (int i = 0; i < ValV.Len(); i++) { Var += TMath::Sqr(ValV[i]-Avg); } 01778 TFltPrV ACorrV; 01779 for (int k = 0; k < TMath::Mn(ValV.Len(), MaxK); k++) { 01780 double corr = 0.0; 01781 for (int i = 0; i < ValV.Len() - k; i++) { 01782 corr += (ValV[i]-Avg)*(ValV[i+k]-Avg); 01783 } 01784 ACorrV.Add(TFltPr(k, corr/Var)); 01785 } 01786 // plot grads 01787 TGnuPlot GP("sAutoCorr-"+OutFNm, TStr::Fmt("AutoCorrelation (%d samples). %s", ValV.Len(), Desc.CStr()), true); 01788 GP.AddPlot(ACorrV, gpwLines, "", "linewidth 1"); 01789 GP.SetXYLabel("Lag, k", "Autocorrelation, r_k"); 01790 GP.SavePng(); 01791 } 01792 01793 // sample permutations and plot the current gradient and log-likelihood as the function 01794 // of the number of samples 01795 TFltV TKroneckerLL::TestSamplePerm(const TStr& OutFNm, const int& WarmUp, const int& NSamples, const TKronMtx& TrueMtx, const bool& DoPlot) { 01796 printf("Sample permutations: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr()); 01797 int NId1=0, NId2=0, NAccept=0; 01798 TExeTm ExeTm; 01799 const int PlotLen = NSamples/1000+1; 01800 double TrueLL=-1, AvgLL=0.0; 01801 TFltV AvgGradV(GetParams()); 01802 TFltPrV TrueLLV(PlotLen, 0); // true log-likelihood (under the correct permutation) 01803 TFltPrV EstLLV(PlotLen, 0); // estiamted log-likelihood (averaged over last 1k permutation) 01804 TFltPrV AcceptV; // sample acceptance ratio 01805 TFltV SampleLLV(NSamples, 0); 01806 TVec<TFltPrV> GradVV(GetParams()); 01807 for (int g = 0; g < GetParams(); g++) { 01808 GradVV[g].Gen(PlotLen, 0); } 01809 if (! TrueMtx.Empty()) { 01810 TIntV PermV=NodePerm; TKronMtx CurMtx=ProbMtx; ProbMtx.Dump(); 01811 InitLL(TrueMtx); SetOrderPerm(); CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike()); 01812 TrueLL=LogLike; InitLL(CurMtx); NodePerm=PermV; 01813 } 01814 CalcApxGraphLL(); 01815 printf("LogLike at start: %f\n", LogLike()); 01816 if (WarmUp > 0) { 01817 EstLLV.Add(TFltPr(0, LogLike)); 01818 if (TrueLL != -1) { TrueLLV.Add(TFltPr(0, TrueLL)); } 01819 for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); } 01820 printf(" warm-up:%s,", ExeTm.GetTmStr()); ExeTm.Tick(); 01821 } 01822 printf("LogLike afterm warm-up: %f\n", LogLike()); 01823 CalcApxGraphLL(); // re-calculate LL (due to numerical errors) 01824 CalcApxGraphDLL(); 01825 EstLLV.Add(TFltPr(WarmUp, LogLike)); 01826 if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp, TrueLL)); } 01827 printf(" recalculated: %f\n", LogLike()); 01828 // start sampling 01829 printf(" sampling (average per 1000 samples)\n"); 01830 TVec<TFltV> SamplVV(5); 01831 for (int s = 0; s < NSamples; s++) { 01832 if (SampleNextPerm(NId1, NId2)) { // new permutation 01833 UpdateGraphDLL(NId1, NId2); NAccept++; } 01834 for (int m = 0; m < AvgGradV.Len(); m++) { AvgGradV[m] += GradV[m]; } 01835 AvgLL += GetLL(); 01836 SampleLLV.Add(GetLL()); 01837 /*SamplVV[0].Add(GetLL()); // gives worse autocoreelation than the avg below 01838 SamplVV[1].Add(GradV[0]); 01839 SamplVV[2].Add(GradV[1]); 01840 SamplVV[3].Add(GradV[2]); 01841 SamplVV[4].Add(GradV[3]);*/ 01842 if (s > 0 && s % 1000 == 0) { 01843 printf("."); 01844 for (int g = 0; g < AvgGradV.Len(); g++) { 01845 GradVV[g].Add(TFltPr(WarmUp+s, AvgGradV[g] / 1000.0)); } 01846 EstLLV.Add(TFltPr(WarmUp+s, AvgLL / 1000.0)); 01847 if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp+s, TrueLL)); } 01848 AcceptV.Add(TFltPr(WarmUp+s, NAccept/1000.0)); 01849 // better (faster decaying) autocorrelation when one takes avg. of 1000 consecutive samples 01850 /*SamplVV[0].Add(AvgLL); 01851 SamplVV[1].Add(AvgGradV[0]); 01852 SamplVV[2].Add(AvgGradV[1]); 01853 SamplVV[3].Add(AvgGradV[2]); 01854 SamplVV[4].Add(AvgGradV[3]); //*/ 01855 if (s % 100000 == 0 && DoPlot) { 01856 const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(), 01857 Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr()); 01858 PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc); 01859 for (int n = 0; n < SamplVV.Len(); n++) { 01860 PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); } 01861 printf(" samples: %d, time: %s, samples/s: %.1f\n", s, ExeTm.GetTmStr(), double(s+1)/ExeTm.GetSecs()); 01862 } 01863 AvgLL = 0; AvgGradV.PutAll(0); NAccept=0; 01864 } 01865 } 01866 if (DoPlot) { 01867 const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(), 01868 Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr()); 01869 PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc); 01870 for (int n = 0; n < SamplVV.Len(); n++) { 01871 PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); } 01872 } 01873 return SampleLLV; // seems to work better for potential scale reduction plot 01874 } 01875 01876 void McMcGetAvgAvg(const TFltV& AvgJV, double& AvgAvg) { 01877 AvgAvg = 0.0; 01878 for (int j = 0; j < AvgJV.Len(); j++) { 01879 AvgAvg += AvgJV[j]; } 01880 AvgAvg /= AvgJV.Len(); 01881 } 01882 01883 void McMcGetAvgJ(const TVec<TFltV>& ChainLLV, TFltV& AvgJV) { 01884 for (int j = 0; j < ChainLLV.Len(); j++) { 01885 const TFltV& ChainV = ChainLLV[j]; 01886 double Avg = 0; 01887 for (int i = 0; i < ChainV.Len(); i++) { 01888 Avg += ChainV[i]; 01889 } 01890 AvgJV.Add(Avg/ChainV.Len()); 01891 } 01892 } 01893 01894 // calculates the chain potential scale reduction (see Gelman Bayesian Statistics book) 01895 double TKroneckerLL::CalcChainR2(const TVec<TFltV>& ChainLLV) { 01896 const double J = ChainLLV.Len(); 01897 const double K = ChainLLV[0].Len(); 01898 TFltV AvgJV; McMcGetAvgJ(ChainLLV, AvgJV); 01899 double AvgAvg; McMcGetAvgAvg(AvgJV, AvgAvg); 01900 IAssert(AvgJV.Len() == ChainLLV.Len()); 01901 double InChainVar=0, OutChainVar=0; 01902 // between chain var 01903 for (int j = 0; j < AvgJV.Len(); j++) { 01904 OutChainVar += TMath::Sqr(AvgJV[j] - AvgAvg); } 01905 OutChainVar = OutChainVar * (K/double(J-1)); 01906 printf("*** %g chains of len %g\n", J, K); 01907 printf(" ** between chain var: %f\n", OutChainVar); 01908 //within chain variance 01909 for (int j = 0; j < AvgJV.Len(); j++) { 01910 const TFltV& ChainV = ChainLLV[j]; 01911 for (int k = 0; k < ChainV.Len(); k++) { 01912 InChainVar += TMath::Sqr(ChainV[k] - AvgJV[j]); } 01913 } 01914 InChainVar = InChainVar * 1.0/double(J*(K-1)); 01915 printf(" ** within chain var: %f\n", InChainVar); 01916 const double PostVar = (K-1)/K * InChainVar + 1.0/K * OutChainVar; 01917 printf(" ** posterior var: %f\n", PostVar); 01918 const double ScaleRed = sqrt(PostVar/InChainVar); 01919 printf(" ** scale reduction (< 1.2): %f\n\n", ScaleRed); 01920 return ScaleRed; 01921 } 01922 01923 // Gelman-Rubin-Brooks plot: how does potential scale reduction chainge with chain length 01924 void TKroneckerLL::ChainGelmapRubinPlot(const TVec<TFltV>& ChainLLV, const TStr& OutFNm, const TStr& Desc) { 01925 TFltPrV LenR2V; // how does potential scale reduction chainge with chain length 01926 TVec<TFltV> SmallLLV(ChainLLV.Len()); 01927 const int K = ChainLLV[0].Len(); 01928 const int Buckets=1000; 01929 const int BucketSz = K/Buckets; 01930 for (int b = 1; b < Buckets; b++) { 01931 const int End = TMath::Mn(BucketSz*b, K-1); 01932 for (int c = 0; c < ChainLLV.Len(); c++) { 01933 ChainLLV[c].GetSubValV(0, End, SmallLLV[c]); } 01934 LenR2V.Add(TFltPr(End, TKroneckerLL::CalcChainR2(SmallLLV))); 01935 } 01936 LenR2V.Add(TFltPr(K, TKroneckerLL::CalcChainR2(ChainLLV))); 01937 TGnuPlot::PlotValV(LenR2V, TStr::Fmt("gelman-%s", OutFNm.CStr()), TStr::Fmt("%s. %d chains of len %d. BucketSz: %d.", 01938 Desc.CStr(), ChainLLV.Len(), ChainLLV[0].Len(), BucketSz), "Chain length", "Potential scale reduction"); 01939 } 01940 01941 // given a Kronecker graph generate from TrueParam, try to recover the parameters 01942 TFltQu TKroneckerLL::TestKronDescent(const bool& DoExact, const bool& TruePerm, double LearnRate, const int& WarmUp, const int& NSamples, const TKronMtx& TrueParam) { 01943 printf("Test gradient descent on a synthetic kronecker graphs:\n"); 01944 if (DoExact) { printf(" -- Exact gradient calculations\n"); } 01945 else { printf(" -- Approximate gradient calculations\n"); } 01946 if (TruePerm) { printf(" -- No permutation sampling (use true permutation)\n"); } 01947 else { printf(" -- Sample permutations (start with degree permutation)\n"); } 01948 TExeTm IterTm; 01949 int Iter; 01950 double OldLL=0, MyLL=0, AvgAbsErr, AbsSumErr; 01951 TFltV MyGradV, SDevV; 01952 TFltV LearnRateV(GetParams()); LearnRateV.PutAll(LearnRate); 01953 if (TruePerm) { 01954 SetOrderPerm(); 01955 } 01956 else { 01957 /*printf("SET EIGEN VECTOR PERMUTATIONS\n"); 01958 TFltV LeftSV, RightSV; 01959 TGSvd::GetSngVec(Graph, LeftSV, RightSV); 01960 TFltIntPrV V; 01961 for (int v=0; v<LeftSV.Len();v++) { V.Add(TFltIntPr(LeftSV[v], v)); } 01962 V.Sort(false); 01963 NodePerm.Gen(Nodes, 0); 01964 for (int v=0; v < V.Len();v++) { NodePerm.Add(V[v].Val2); } //*/ 01965 //printf("RANDOM PERMUTATION\n"); SetRndPerm(); 01966 printf("DEGREE PERMUTATION\n"); SetDegPerm(); 01967 } 01968 for (Iter = 0; Iter < 100; Iter++) { 01969 if (TruePerm) { 01970 // don't sample over permutations 01971 if (DoExact) { CalcGraphDLL(); CalcGraphLL(); } // slow, O(N^2) 01972 else { CalcApxGraphDLL(); CalcApxGraphLL(); } // fast 01973 MyLL = LogLike; MyGradV = GradV; 01974 } else { 01975 printf("."); 01976 // sample over permutations (approximate calculations) 01977 SampleGradient(WarmUp, NSamples, MyLL, MyGradV); 01978 } 01979 printf("%d] LL: %g, ", Iter, MyLL); 01980 AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam); 01981 AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueParam.GetMtxSum()); 01982 printf(" avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL); 01983 for (int p = 0; p < GetParams(); p++) { 01984 // set learn rate so that move for each parameter is inside the [0.01, 0.1] 01985 LearnRateV[p] *= 0.9; 01986 //printf("%d: rate: %f delta:%f\n", p, LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p])); 01987 while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.9; } 01988 //printf(" rate: %f delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p])); 01989 while (fabs(LearnRateV[p]*MyGradV[p]) < 0.001) { LearnRateV[p] *= (1.0/0.9); } 01990 //printf(" rate: %f delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p])); 01991 printf(" %d] %f <-- %f + %f lrnRate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p], 01992 ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), LearnRateV[p]()); 01993 ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p]; 01994 // box constraints 01995 if (ProbMtx.At(p) > 0.99) { ProbMtx.At(p)=0.99; } 01996 if (ProbMtx.At(p) < 0.01) { ProbMtx.At(p)=0.01; } 01997 } 01998 ProbMtx.GetLLMtx(LLMtx); OldLL = MyLL; 01999 if (AvgAbsErr < 0.01) { printf("***CONVERGED!\n"); break; } 02000 printf("\n"); fflush(stdout); 02001 } 02002 TrueParam.Dump("True Thetas", true); 02003 ProbMtx.Dump("Final Thetas", true); 02004 printf(" AvgAbsErr: %f\n AbsSumErr: %f\n Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter); 02005 printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs()); 02006 return TFltQu(AvgAbsErr, AbsSumErr, Iter, IterTm.GetSecs()); 02007 } 02008 02009 void PlotTrueAndEst(const TStr& OutFNm, const TStr& Desc, const TStr& YLabel, const TFltPrV& EstV, const TFltPrV& TrueV) { 02010 TGnuPlot GP(OutFNm, Desc.CStr(), true); 02011 GP.AddPlot(EstV, gpwLinesPoints, YLabel, "linewidth 1 pointtype 6 pointsize 1"); 02012 if (! TrueV.Empty()) { GP.AddPlot(TrueV, gpwLines, "TRUE"); } 02013 GP.SetXYLabel("Gradient descent iterations", YLabel); 02014 GP.SavePng(); 02015 } 02016 02017 void TKroneckerLL::GradDescentConvergence(const TStr& OutFNm, const TStr& Desc1, const bool& SamplePerm, const int& NIters, 02018 double LearnRate, const int& WarmUp, const int& NSamples, const int& AvgKGraphs, const TKronMtx& TrueParam) { 02019 TExeTm IterTm; 02020 int Iter; 02021 double OldLL=0, MyLL=0, AvgAbsErr=0, AbsSumErr=0; 02022 TFltV MyGradV, SDevV; 02023 TFltV LearnRateV(GetParams()); LearnRateV.PutAll(LearnRate); 02024 TFltPrV EZeroV, DiamV, Lambda1V, Lambda2V, AvgAbsErrV, AvgLLV; 02025 TFltPrV TrueEZeroV, TrueDiamV, TrueLambda1V, TrueLambda2V, TrueLLV; 02026 TFltV SngValV; TSnap::GetSngVals(Graph, 2, SngValV); SngValV.Sort(false); 02027 const double TrueEZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters)); 02028 const double TrueEffDiam = TSnap::GetAnfEffDiam(Graph, false, 10); 02029 const double TrueLambda1 = SngValV[0]; 02030 const double TrueLambda2 = SngValV[1]; 02031 if (! TrueParam.Empty()) { 02032 const TKronMtx CurParam = ProbMtx; ProbMtx.Dump(); 02033 InitLL(TrueParam); SetOrderPerm(); CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike()); 02034 OldLL = LogLike; InitLL(CurParam); 02035 } 02036 const double TrueLL = OldLL; 02037 if (! SamplePerm) { SetOrderPerm(); } else { SetDegPerm(); } 02038 for (Iter = 0; Iter < NIters; Iter++) { 02039 if (! SamplePerm) { 02040 // don't sample over permutations 02041 CalcApxGraphDLL(); CalcApxGraphLL(); // fast 02042 MyLL = LogLike; MyGradV = GradV; 02043 } else { 02044 // sample over permutations (approximate calculations) 02045 SampleGradient(WarmUp, NSamples, MyLL, MyGradV); 02046 } 02047 double SumDiam=0, SumSngVal1=0, SumSngVal2=0; 02048 for (int trial = 0; trial < AvgKGraphs; trial++) { 02049 // generate kronecker graph 02050 PNGraph KronGraph = TKronMtx::GenFastKronecker(ProbMtx, KronIters, true, 0); // approx 02051 //PNGraph KronGraph = TKronMtx::GenKronecker(ProbMtx, KronIters, true, 0); // true 02052 SngValV.Clr(true); TSnap::GetSngVals(KronGraph, 2, SngValV); SngValV.Sort(false); 02053 SumDiam += TSnap::GetAnfEffDiam(KronGraph, false, 10); 02054 SumSngVal1 += SngValV[0]; SumSngVal2 += SngValV[1]; 02055 } 02056 // how good is the current fit 02057 AvgLLV.Add(TFltPr(Iter, MyLL)); 02058 EZeroV.Add(TFltPr(Iter, ProbMtx.GetMtxSum())); 02059 DiamV.Add(TFltPr(Iter, SumDiam/double(AvgKGraphs))); 02060 Lambda1V.Add(TFltPr(Iter, SumSngVal1/double(AvgKGraphs))); 02061 Lambda2V.Add(TFltPr(Iter, SumSngVal2/double(AvgKGraphs))); 02062 TrueLLV.Add(TFltPr(Iter, TrueLL)); 02063 TrueEZeroV.Add(TFltPr(Iter, TrueEZero)); 02064 TrueDiamV.Add(TFltPr(Iter, TrueEffDiam)); 02065 TrueLambda1V.Add(TFltPr(Iter, TrueLambda1)); 02066 TrueLambda2V.Add(TFltPr(Iter, TrueLambda2)); 02067 if (Iter % 10 == 0) { 02068 const TStr Desc = TStr::Fmt("%s. Iter: %d, G(%d, %d) K(%d, %d)", Desc1.Empty()?OutFNm.CStr():Desc1.CStr(), 02069 Iter, Graph->GetNodes(), Graph->GetEdges(), ProbMtx.GetNodes(KronIters), ProbMtx.GetEdges(KronIters)); 02070 PlotTrueAndEst("LL."+OutFNm, Desc, "Average LL", AvgLLV, TrueLLV); 02071 PlotTrueAndEst("E0."+OutFNm, Desc, "E0 (expected number of edges)", EZeroV, TrueEZeroV); 02072 PlotTrueAndEst("Diam."+OutFNm+"-Diam", Desc, "Effective diameter", DiamV, TrueDiamV); 02073 PlotTrueAndEst("Lambda1."+OutFNm, Desc, "Lambda 1", Lambda1V, TrueLambda1V); 02074 PlotTrueAndEst("Lambda2."+OutFNm, Desc, "Lambda 2", Lambda2V, TrueLambda2V); 02075 if (! TrueParam.Empty()) { 02076 PlotTrueAndEst("AbsErr."+OutFNm, Desc, "Average Absolute Error", AvgAbsErrV, TFltPrV()); } 02077 } 02078 if (! TrueParam.Empty()) { 02079 AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam); 02080 AvgAbsErrV.Add(TFltPr(Iter, AvgAbsErr)); 02081 } else { AvgAbsErr = 1.0; } 02082 // update parameters 02083 AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueEZero); 02084 // update parameters 02085 for (int p = 0; p < GetParams(); p++) { 02086 LearnRateV[p] *= 0.99; 02087 while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.99; printf(".");} 02088 while (fabs(LearnRateV[p]*MyGradV[p]) < 0.002) { LearnRateV[p] *= (1.0/0.95); printf("*");} 02089 printf(" %d] %f <-- %f + %9f Grad: %9.1f, Rate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p], 02090 ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), MyGradV[p](), LearnRateV[p]()); 02091 ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p]; 02092 // box constraints 02093 if (ProbMtx.At(p) > 1.0) { ProbMtx.At(p)=1.0; } 02094 if (ProbMtx.At(p) < 0.001) { ProbMtx.At(p)=0.001; } 02095 } 02096 printf("%d] LL: %g, ", Iter, MyLL); 02097 printf(" avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL); 02098 if (AvgAbsErr < 0.001) { printf("***CONVERGED!\n"); break; } 02099 printf("\n"); fflush(stdout); 02100 ProbMtx.GetLLMtx(LLMtx); OldLL = MyLL; 02101 } 02102 TrueParam.Dump("True Thetas", true); 02103 ProbMtx.Dump("Final Thetas", true); 02104 printf(" AvgAbsErr: %f\n AbsSumErr: %f\n Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter); 02105 printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs()); 02106 } 02107 02108 // given true N0, fit the parameters, get likelihood and calculate BIC (MDL), plot n0 vs. BIC 02109 void TKroneckerLL::TestBicCriterion(const TStr& OutFNm, const TStr& Desc1, const PNGraph& G, const int& GradIters, 02110 double LearnRate, const int& WarmUp, const int& NSamples, const int& TrueN0) { 02111 TFltPrV BicV, MdlV, LLV; 02112 const double rndGP = G->GetEdges()/TMath::Sqr(double(G->GetNodes())); 02113 const double RndGLL = G->GetEdges()*log(rndGP )+ (TMath::Sqr(double(G->GetNodes()))-G->GetEdges())*log(1-rndGP); 02114 LLV.Add(TFltPr(1, RndGLL)); 02115 BicV.Add(TFltPr(1, -RndGLL + 0.5*TMath::Sqr(1)*log(TMath::Sqr(G->GetNodes())))); 02116 MdlV.Add(TFltPr(1, -RndGLL + 32*TMath::Sqr(1)+2*(log((double)1)+log((double)G->GetNodes())))); 02117 for (int NZero = 2; NZero < 10; NZero++) { 02118 const TKronMtx InitKronMtx = TKronMtx::GetInitMtx(NZero, G->GetNodes(), G->GetEdges()); 02119 InitKronMtx.Dump("INIT PARAM", true); 02120 TKroneckerLL KronLL(G, InitKronMtx); 02121 KronLL.SetPerm('d'); // degree perm 02122 const double LastLL = KronLL.GradDescent(GradIters, LearnRate, 0.001, 0.01, WarmUp, NSamples); 02123 LLV.Add(TFltPr(NZero, LastLL)); 02124 BicV.Add(TFltPr(NZero, -LastLL + 0.5*TMath::Sqr(NZero)*log(TMath::Sqr(G->GetNodes())))); 02125 MdlV.Add(TFltPr(NZero, -LastLL + 32*TMath::Sqr(NZero)+2*(log((double)NZero)+log((double)KronLL.GetKronIters())))); 02126 { TGnuPlot GP("LL-"+OutFNm, Desc1); 02127 GP.AddPlot(LLV, gpwLinesPoints, "Log-likelihood", "linewidth 1 pointtype 6 pointsize 2"); 02128 GP.SetXYLabel("NZero", "Log-Likelihood"); GP.SavePng(); } 02129 { TGnuPlot GP("BIC-"+OutFNm, Desc1); 02130 GP.AddPlot(BicV, gpwLinesPoints, "BIC", "linewidth 1 pointtype 6 pointsize 2"); 02131 GP.SetXYLabel("NZero", "BIC"); GP.SavePng(); } 02132 { TGnuPlot GP("MDL-"+OutFNm, Desc1); 02133 GP.AddPlot(MdlV, gpwLinesPoints, "MDL", "linewidth 1 pointtype 6 pointsize 2"); 02134 GP.SetXYLabel("NZero", "MDL"); GP.SavePng(); } 02135 } 02136 } 02137 02138 void TKroneckerLL::TestGradDescent(const int& KronIters, const int& KiloSamples, const TStr& Permutation) { 02139 const TStr OutFNm = TStr::Fmt("grad-%s%d-%dk", Permutation.CStr(), KronIters, KiloSamples); 02140 TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.6; 0.6 0.4"); 02141 PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, KronIters, true, 0); 02142 TKroneckerLL KronLL(Graph, KronParam); 02143 TVec<TFltV> GradVV(4), SDevVV(4); TFltV XValV; 02144 int NId1 = 0, NId2 = 0, NAccept = 0; 02145 TVec<TMom> GradMomV(4); 02146 TExeTm ExeTm; 02147 if (Permutation == "r") KronLL.SetRndPerm(); 02148 else if (Permutation == "d") KronLL.SetDegPerm(); 02149 else if (Permutation == "o") KronLL.SetOrderPerm(); 02150 else FailR("Unknown permutation (r,d,o)"); 02151 KronLL.CalcApxGraphLL(); 02152 KronLL.CalcApxGraphDLL(); 02153 for (int s = 0; s < 1000*KiloSamples; s++) { 02154 if (KronLL.SampleNextPerm(NId1, NId2)) { // new permutation 02155 KronLL.UpdateGraphDLL(NId1, NId2); NAccept++; } 02156 if (s > 50000) { //warm up period 02157 for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); } 02158 if ((s+1) % 1000 == 0) { 02159 printf("."); 02160 for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); } 02161 XValV.Add((s+1)); 02162 if ((s+1) % 100000 == 0) { 02163 TGnuPlot GP(OutFNm, TStr::Fmt("Gradient vs. samples. %d nodes, %d edges", Graph->GetNodes(), Graph->GetEdges()), true); 02164 for (int g = 0; g < GradVV.Len(); g++) { 02165 GP.AddPlot(XValV, GradVV[g], gpwLines, TStr::Fmt("grad %d", g)); } 02166 GP.SetXYLabel("sample index","log Gradient"); 02167 GP.SavePng(); 02168 } 02169 } 02170 } 02171 } 02172 printf("\n"); 02173 for (int m = 0; m < 4; m++) { 02174 GradMomV[m].Def(); 02175 printf("grad %d: mean: %12f sDev: %12f median: %12f\n", m, 02176 GradMomV[m].GetMean(), GradMomV[m].GetSDev(), GradMomV[m].GetMedian()); 02177 } 02178 } 02179 /* 02180 // sample over permutations 02181 void TKroneckerLL::GradDescent(const double& LearnRate, const int& WarmUp, const int& NSamples, const int& NIter) { 02182 TFltV GradV, SDevV; 02183 double AvgLL; 02184 for (int Iter = 0; Iter < 100; Iter++) { 02185 //SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true); 02186 SampleGradient(WarmUp, NSamples, AvgLL, GradV); 02187 for (int theta = 0; theta < GetParams(); theta++) { 02188 printf("%d] %f <-- %f + %f\n", theta, ProbMtx.At(theta) + LearnRate*GradV[theta], ProbMtx.At(theta), LearnRate*GradV[theta]); 02189 ProbMtx.At(theta) = ProbMtx.At(theta) + LearnRate*GradV[theta]; 02190 // box constraints 02191 if (ProbMtx.At(theta) > 0.99) ProbMtx.At(theta)=0.99; 02192 if (ProbMtx.At(theta) < 0.01) ProbMtx.At(theta)=0.01; 02193 } 02194 ProbMtx.GetLLMtx(LLMtx); 02195 } 02196 ProbMtx.Dump("Final Thetas"); 02197 } 02198 */ 02199 02200 02202 // Add Noise to Graph 02204 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const int& NNodes, const bool Random) { 02205 IAssert(NNodes > 0 && NNodes < (Graph->GetNodes() / 2)); 02206 02207 int i = 0; 02208 TIntV ShufflePerm; 02209 Graph->GetNIdV(ShufflePerm); 02210 if(Random) { 02211 ShufflePerm.Shuffle(TKronMtx::Rnd); 02212 for(i = 0; i < NNodes; i++) { 02213 Graph->DelNode(int(ShufflePerm[i])); 02214 } 02215 } else { 02216 for(i = 0; i < NNodes; i++) { 02217 Graph->DelNode(int(ShufflePerm[ShufflePerm.Len() - 1 - i])); 02218 } 02219 } 02220 02221 return Graph->GetNodes(); 02222 } 02223 02224 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const double& Rate, const bool Random) { 02225 IAssert(Rate > 0 && Rate < 0.5); 02226 return TKronNoise::RemoveNodeNoise(Graph, (int) floor(Rate * double(Graph->GetNodes())), Random); 02227 } 02228 02229 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const int& NEdges, const bool Random) { 02230 IAssert(NEdges > 0 && NEdges < Graph->GetEdges()); 02231 02232 const int Nodes = Graph->GetNodes(); 02233 const int Edges = Graph->GetEdges(); 02234 int Src, Dst; 02235 02236 TIntV NIdV, TempV; 02237 TIntPrV ToAdd, ToDel; 02238 Graph->GetNIdV(NIdV); 02239 02240 ToAdd.Gen(NEdges / 2, 0); 02241 for(int i = 0; i < NEdges / 2; i++) { 02242 Src = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)]; 02243 Dst = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)]; 02244 if(Graph->IsEdge(Src, Dst)) { i--; continue; } 02245 02246 ToAdd.Add(TIntPr(Src, Dst)); 02247 } 02248 02249 ToDel.Gen(Edges, 0); 02250 for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 02251 ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); 02252 } 02253 ToDel.Shuffle(TKronMtx::Rnd); 02254 02255 for(int i = 0; i < NEdges / 2; i++) { 02256 Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2); 02257 Graph->AddEdge(ToAdd[i].Val1, ToAdd[i].Val2); 02258 } 02259 02260 return Graph->GetEdges(); 02261 } 02262 02263 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const double& Rate, const bool Random) { 02264 IAssert(Rate > 0 && Rate < 0.5); 02265 return TKronNoise::FlipEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())), Random); 02266 } 02267 02268 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const int& NEdges) { 02269 IAssert(NEdges > 0 && NEdges < Graph->GetEdges()); 02270 02271 TIntPrV ToDel; 02272 02273 ToDel.Gen(Graph->GetEdges(), 0); 02274 for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 02275 if(EI.GetSrcNId() != EI.GetDstNId()) { 02276 ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); 02277 } 02278 } 02279 ToDel.Shuffle(TKronMtx::Rnd); 02280 02281 for(int i = 0; i < NEdges; i++) { 02282 Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2); 02283 } 02284 02285 return Graph->GetEdges(); 02286 } 02287 02288 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const double& Rate) { 02289 IAssert(Rate > 0 && Rate < 0.5); 02290 return TKronNoise::RemoveEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges()))); 02291 } 02292 02293 02294 02296 // Kronecker Log Likelihood Maximization 02297 void TKronMaxLL::SetPerm(const char& PermId) { 02298 if (PermId == 'o') KronLL.SetOrderPerm(); 02299 else if (PermId == 'd') KronLL.SetDegPerm(); 02300 else if (PermId == 'r') KronLL.SetRndPerm(); 02301 else FailR("Unknown permutation type (o,d,r)"); 02302 } 02303 02304 /* void TKronMaxLL::EvalNewEdgeP(const TKronMtx& ProbMtx) { 02305 ProbMtx.Dump("\n***EVAL:"); 02306 for (int i = 0; i < ProbMtx.Len(); i++) { 02307 // parameters are out of bounds 02308 if (ProbMtx.At(i) < 0.05 || ProbMtx.At(i) > 0.95) { 02309 TFltV MxDerivV(ProbMtx.Len()); MxDerivV.PutAll(1e5); 02310 FEvalH.AddDat(ProbMtx, TFEval(-1e10, MxDerivV)); 02311 return; 02312 } 02313 } 02314 double AvgLL; 02315 TFltV GradV, SDevV; 02316 KronLL.InitLL(ProbMtx); // set current point 02317 //KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true); //sample the gradient 02318 KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV); 02319 FEvalH.AddDat(ProbMtx, TFEval(AvgLL, GradV)); 02320 } 02321 02322 double TKronMaxLL::GetLL(const TFltV& ThetaV) { 02323 TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx); 02324 if (! FEvalH.IsKey(ProbMtx)) { 02325 EvalNewEdgeP(ProbMtx); 02326 } 02327 return FEvalH.GetDat(ProbMtx).LogLike; 02328 } 02329 02330 void TKronMaxLL::GetDLL(const TFltV& ThetaV, TFltV& GradV) { 02331 TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx); 02332 if (! FEvalH.IsKey(ProbMtx)) { 02333 EvalNewEdgeP(ProbMtx); 02334 } 02335 GradV = FEvalH.GetDat(ProbMtx).GradV; 02336 } 02337 02338 double TKronMaxLL::GetDLL(const TFltV& ThetaV, const int& ParamId) { 02339 TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx); 02340 if (! FEvalH.IsKey(ProbMtx)) { 02341 EvalNewEdgeP(ProbMtx); 02342 } 02343 return FEvalH.GetDat(ProbMtx).GradV[ParamId]; 02344 } 02345 void TKronMaxLL::MaximizeLL(const int& NWarmUp, const int& Samples) { 02346 WarmUp = NWarmUp; 02347 NSamples = Samples; 02348 TConjGrad<TFunc> ConjGrad(KronLL.GetProbMtx().GetMtx(), TFunc(this)); 02349 //TConjGrad<TLogBarFunc> ConjGrad(KronLL.GetEdgeP().GetV(), TLogBarFunc(this, 0.1)); 02350 ConjGrad.ConjGradMin(0.1); 02351 }*/ 02352 02353 // round to 3 decimal places 02354 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TFltV& NewThetaV) { 02355 NewThetaV.Gen(ThetaV.Len()); 02356 for (int i = 0; i < ThetaV.Len(); i++) { 02357 NewThetaV[i] = TMath::Round(ThetaV[i], 3); } 02358 } 02359 02360 // round to 3 decimal places 02361 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TKronMtx& Kronecker) { 02362 Kronecker.GenMtx((int)sqrt((double)ThetaV.Len())); 02363 for (int i = 0; i < ThetaV.Len(); i++) { 02364 Kronecker.At(i) = TMath::Round(ThetaV[i], 3); } 02365 } 02366 02367 void TKronMaxLL::Test() { 02368 TKronMtx::PutRndSeed(1); 02369 TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.7; 0.6 0.5"); 02370 PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 1); 02371 02372 TKronMaxLL KronMaxLL(Graph, TFltV::GetV(0.9, 0.7, 0.5, 0.3)); 02373 KronMaxLL.SetPerm('d'); 02374 //KronMaxLL.MaximizeLL(10000, 50000); 02375 /*TKroneckerLL KronLL(Graph, *TKronMtx::GetMtx("0.9 0.7; 0.5 0.3")); 02376 KronLL.SetDegPerm(); 02377 KronLL.GradDescent(0.005/double(Graph->GetNodes()));*/ 02378 } 02379 02381 // Kronecker Phase Plot 02382 /* 02383 void TKronPhasePlot::SaveMatlab(const TStr& OutFNm) const { 02384 FILE *F = fopen(OutFNm.CStr(), "wt"); 02385 fprintf(F, "#Take last graph stats\n"); 02386 fprintf(F, "#i\tAlpha\tBeta\tNodes\tNonZNodes\tEdges\tWccNodes\tWccEdges\tDiam\tEffDiam\tWccDiam\tWccEffDiam\t1StEigVal\tMxEigVal\n"); 02387 for (int i = 0 ; i < PhaseV.Len(); i++) { 02388 const TPhasePoint& Point = PhaseV[i]; 02389 const TGrowthStat& GrowthStat = Point.GrowthStat; 02390 const PGraphStat& FirstGrowth = GrowthStat[0]; 02391 const PGraphStat& LastGrowth = GrowthStat.Last(); 02392 fprintf(F, "%d\t%g\t%g\t", i, Point.Alpha, Point.Beta); 02393 fprintf(F, "%d\t%d\t%d\t", LastGrowth->Nodes, LastGrowth->Edges, LastGrowth->NonZNodes); 02394 fprintf(F, "%d\t%d\t", LastGrowth->WccNodes, LastGrowth->WccEdges); 02395 fprintf(F, "%f\t%f\t%f\t%f\t", LastGrowth->FullDiam, LastGrowth->EffDiam, LastGrowth->FullWccDiam, LastGrowth->EffWccDiam); 02396 //fprintf(F, "%f\t%f", FirstGrowth.MxEigVal, LastGrowth.MxEigVal); 02397 fprintf(F, "\n"); 02398 } 02399 fclose(F); 02400 } 02401 02402 void TKronPhasePlot::KroneckerPhase(const TStr& MtxId, const int& MxIter, 02403 const double& MnAlpha, const double& MxAlpha, const double& AlphaStep, 02404 const double& MnBeta, const double& MxBeta, const double& BetaStep, 02405 const TStr& FNmPref) { 02406 TKronPhasePlot PhasePlot; 02407 TExeTm KronTm; 02408 int AlphaCnt=0, BetaCnt=0; 02409 for (double Alpha = MnAlpha; (Alpha-1e-6) <= MxAlpha; Alpha += AlphaStep) { 02410 AlphaCnt++; BetaCnt = 0; 02411 printf("\n\n****A:%g***********************************************************************", Alpha); 02412 for (double Beta = MnBeta; (Beta-1e-6) <= MxBeta; Beta += BetaStep) { 02413 printf("\n\n==A[%d]:%g====B[%d]:%g=====================================================\n", AlphaCnt, Alpha, BetaCnt, Beta); 02414 BetaCnt++; 02415 TGrowthStat GrowthStat; 02416 PNGraph Graph; 02417 // run Kronecker 02418 TFullRectMtx SeedMtx = TFullRectMtx::GetMtxFromNm(MtxId); 02419 SeedMtx.Epsilonize(Alpha, Beta); 02420 for (int iter = 1; iter < MxIter + 1; iter++) { 02421 printf("%2d] at %s\n", iter, TExeTm::GetCurTm().CStr()); 02422 Graph = PNGraph(); KronTm.Tick(); 02423 Graph = SeedMtx.GenRMatKronecker(iter, false, 0); 02424 GrowthStat.Add(Graph, TNodeTm(iter)); 02425 if (KronTm.GetSecs() > 30 * 60) { 02426 printf("*** TIME LIMIT [%s]\n", KronTm.GetTmStr().CStr()); break; } 02427 } 02428 const TStr Desc = TStr::Fmt("%s. Alpha:%g. Beta:%g", MtxId.CStr(), Alpha, Beta); 02429 const TStr FNmPref1 = TStr::Fmt("%s.a%02d.b%02d", FNmPref.CStr(), AlphaCnt, BetaCnt); 02430 TGPlot::PlotDegDist(Graph, FNmPref1, Desc, false, true, true); 02431 TGPlot::PlotWccDist(Graph, FNmPref1, Desc); 02432 TGPlot::PlotSccDist(Graph, FNmPref1, Desc); 02433 GrowthStat.PlotAll(FNmPref1, Desc); 02434 GrowthStat.SaveTxt(FNmPref1, Desc); 02435 PhasePlot.PhaseV.Add(TPhasePoint(Alpha, Beta, GrowthStat)); 02436 } 02437 {TFOut FOut(TStr::Fmt("phase.%s.bin", FNmPref.CStr())); 02438 PhasePlot.Save(FOut); } 02439 } 02440 } 02441 */ 02442 /*void TKroneckerLL::SetRndThetas() { 02443 ProbMtx.Dump("TRUE parameters"); 02444 TFltV RndV; 02445 double SumRnd = 0.0; 02446 for (int i = 0; i < ProbMtx.Len(); i++) { 02447 RndV.Add(0.1+TKronMtx::Rnd.GetUniDev()); 02448 SumRnd += RndV.Last(); 02449 } 02450 RndV.Sort(false); 02451 for (int i = 0; i < ProbMtx.Len(); i++) { ProbMtx.At(i) = RndV[i]; } 02452 ProbMtx.Dump("Random parameters"); 02453 const double EdgePSum = pow(Graph->GetEdges(), 1.0/KronIters); 02454 bool Repeat = true; 02455 while (Repeat) { 02456 const double Scale = EdgePSum / SumRnd; 02457 Repeat=false; SumRnd = 0.0; 02458 for (int i = 0; i < ProbMtx.Len(); i++) { 02459 ProbMtx.At(i) = ProbMtx.At(i)*Scale; 02460 if (ProbMtx.At(i) > 0.95) { ProbMtx.At(i)=0.95; Repeat=true; } 02461 SumRnd += ProbMtx.At(i); 02462 } 02463 } 02464 ProbMtx.Dump("INIT parameters"); 02465 ProbMtx.GetLLMtx(LLMtx); 02466 }*/ 02467 02468 /* 02469 void TKroneckerLL::TestLL() { 02470 TExeTm ExeTm; 02471 // approximation to empty graph log-likelihood 02472 */ 02473 /*{ PNGraph Graph = TNGraph::New(); 02474 for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes 02475 PKronecker KronParam = TKronMtx::GetMtx("0.8 0.6; 0.7 0.3"); 02476 TKroneckerLL KronLL(Graph, KronParam); 02477 printf("\nNodes: %d\n", KronLL.GetNodes()); 02478 printf("Full Graph LL: %f\n", KronLL.GetFullGraphLL()); 02479 printf("Empty Graph Exact LL: %f\n", KronLL.GetEmptyGraphLL()); 02480 printf("Empty Approx x=log(1-x) LL: %f\n", KronLL.GetApxEmptyGraphLL()); 02481 printf("Empty Sample LL (100/node): %f\n", KronLL.GetSampleEmptyGraphLL(Graph->GetNodes() * 100)); 02482 KronLL.SetOrderPerm(); 02483 printf("\nEdge prob: %f, LL: %f\n", KronParam->GetEdgeProb(0,0,8), log(KronParam->GetEdgeProb(0,0,8))); 02484 printf("No Edge prob: %f, LL: %f\n", KronParam->GetNoEdgeProb(0,0,8), log(KronParam->GetNoEdgeProb(0,0,8))); 02485 printf("Empty Graph LL: %f\n", KronLL.CalcGraphLL()); 02486 printf("Apx Empty Graph LL: %f\n", KronLL.CalcApxGraphLL()); 02487 Graph->AddEdge(0, 0); 02488 printf("add 1 edge. LL: %f\n", KronLL.CalcGraphLL()); 02489 printf("Apx add 1 edge. LL: %f\n", KronLL.CalcApxGraphLL()); } 02490 */ 02491 02492 // log-likelihood versus different Kronecker parameters 02493 /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2"); 02494 PNGraph Graph = TKronMtx::GenKronecker(KronParam, 10, true, 10); 02495 TVec<PKronecker> ParamV; 02496 ParamV.Add(KronParam); 02497 ParamV.Add(TKronMtx::GetMtx("0.6 0.6; 0.6 0.5")); // sum = 2.3 02498 //ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.4 0.1")); // sum = 2.3 02499 //ParamV.Add(TKronMtx::GetMtx("0.8 0.7; 0.6 0.2")); // sum = 2.3 02500 ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.6 0.2")); // sum = 2.6 02501 for (int i = 0; i < ParamV.Len(); i++) { 02502 ParamV[i]->Dump(); 02503 TKroneckerLL KronLL(Graph, ParamV[i]); 02504 for (int k = 0; k < 3; k++) { 02505 if (k==0) { KronLL.SetOrderPerm(); printf("Order permutation:\n"); } 02506 if (k==1) { KronLL.SetDegPerm(); printf("Degree permutation:\n"); } 02507 if (k==2) { KronLL.SetRndPerm(); printf("Random permutation:\n"); } 02508 const double LL = KronLL.CalcGraphLL(), aLL = KronLL.CalcApxGraphLL(); 02509 printf(" Exact Graph LL: %f\n", LL); 02510 printf(" Approx Graph LL: %f\n", aLL); 02511 printf(" error : %.12f\n", -fabs(LL-aLL)/LL); 02512 } 02513 } } 02514 */ 02515 // exact vs. approximate log-likelihood 02516 /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2"); 02517 PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 16, true, 0); 02518 TKroneckerLL KronLL(Graph, KronParam); 02519 TMom ExactLL, ApxLL; 02520 printf("Random permutation:\n"); 02521 for (int i = 0; i < 100; i++) { 02522 KronLL.SetRndPerm(); 02523 //ExactLL.Add(KronLL.CalcGraphLL()); 02524 ApxLL.Add(KronLL.CalcApxGraphLL()); 02525 //printf(" Exact Graph LL: %f\n", ExactLL.GetVal(ExactLL.GetVals()-1)); 02526 printf(" Approx Graph LL: %f\n", ApxLL.GetVal(ApxLL.GetVals()-1)); 02527 } 02528 ExactLL.Def(); ApxLL.Def(); 02529 //printf("EXACT: %f (%f)\n", ExactLL.GetMean(), ExactLL.GetSDev()); 02530 printf("APPROX: %f (%f)\n", ApxLL.GetMean(), ApxLL.GetSDev()); 02531 KronLL.SetOrderPerm(); 02532 printf("Order permutation:\n"); 02533 printf(" Exact Graph LL: %f\n", KronLL.CalcGraphLL()); 02534 printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL()); 02535 } 02536 */ 02537 02538 // start from random permultation and sort it using bubble sort 02539 // compare the end result with ordered permutation 02540 //PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2"); 02541 //PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 10, true, 1); 02542 /*PKronecker KronParam = TKronMtx::GetMtx("0.9 0.7; 0.9 0.5"); 02543 PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 6, true, 2); 02544 TGAlg::SaveFullMtx(Graph, "kron32.tab"); 02545 02546 TKroneckerLL KronLL(Graph, KronParam); 02547 KronLL.SetOrderPerm(); 02548 KronLL.LogLike = KronLL.CalcApxGraphLL(); 02549 printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL()); 02550 printf(" swap 1-20: %f\n", KronLL.SwapNodesLL(1, 20)); 02551 printf(" swap 20-30: %f\n", KronLL.SwapNodesLL(20, 30)); 02552 printf(" swap 30-1: %f\n", KronLL.SwapNodesLL(1, 30)); 02553 printf(" swap 20-30: %f\n", KronLL.SwapNodesLL(30, 20)); 02554 IAssert(KronLL.GetPerm().IsSorted()); 02555 KronLL.SetRndPerm(); 02556 KronLL.LogLike = KronLL.CalcApxGraphLL(); 02557 for (int i = 0; i < 1000000; i++) { 02558 const int nid1 = TInt::Rnd.GetUniDevInt(KronLL.Nodes); 02559 const int nid2 = TInt::Rnd.GetUniDevInt(KronLL.Nodes); 02560 printf("%3d] swap LL: %f\n", i, KronLL.SwapNodesLL(nid1, nid2)); 02561 } 02562 printf("*** approx LL: %f\n", KronLL.CalcApxGraphLL()); 02563 printf("*** exact LL: %f\n", KronLL.CalcGraphLL()); 02564 */ 02565 /*ExeTm.Tick(); 02566 // bubble sort 02567 for (int i = 0; i < Graph->GetNodes()-1; i++) { 02568 for (int j = 1; j < Graph->GetNodes(); j++) { 02569 if (KronLL.GetPerm()[j-1] > KronLL.GetPerm()[j]) { 02570 const double oldLL = KronLL.GetLL(); 02571 const double newLL = KronLL.SwapNodesLL(j-1, j); 02572 //const double trueLL = KronLL.CalcApxGraphLL(); 02573 //printf("swap %3d - %3d: old: %f new: %f true:%f\n", 02574 // KronLL.GetPerm()[j-1], KronLL.GetPerm()[j], oldLL, newLL, trueLL); 02575 } 02576 } 02577 } 02578 //for (int i = 0; i < 100000; i++) { 02579 // KronLL.SwapNodesLL(TInt::Rnd.GetUniDevInt(TMath::Pow2(16)), TInt::Rnd.GetUniDevInt(TMath::Pow2(16))); } 02580 printf("\nPermutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED"); 02581 printf(" Swap Graph LL: %f\n", KronLL.GetLL()); 02582 printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL()); 02583 KronLL.SetOrderPerm(); 02584 printf(" Order Graph LL: %f\n\n", KronLL.CalcApxGraphLL()); 02585 printf("Permutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED"); 02586 printf("time: %f\n", ExeTm.GetSecs()); 02587 */ 02588 // evaluate the derivatives 02589 /*{ PNGraph Graph = TNGraph::New(); 02590 TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.4; 0.4 0.2"); 02591 //for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes 02592 Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 2); //TGAlg::SaveFullMtx(Graph, "kron16.txt"); 02593 TKroneckerLL KronLL(Graph, KronParam); 02594 KronLL.SetOrderPerm(); 02595 printf("\nNodes: %d\n", KronLL.GetNodes()); 02596 printf("Full Graph Exact LL: %f\n", KronLL.GetFullGraphLL()); 02597 printf("Empty Graph Exact LL: %f\n", KronLL.GetEmptyGraphLL()); 02598 printf("Empty Approx LL: %f\n", KronLL.GetApxEmptyGraphLL()); 02599 printf("Exact Graph LL: %f\n", KronLL.CalcGraphLL()); 02600 printf("Apx Graph LL: %f\n\n", KronLL.CalcApxGraphLL()); 02601 // derivatives 02602 printf("Empty graph Exact DLL: %f\n", KronLL.GetEmptyGraphDLL(0)); 02603 printf("Empty graph Apx DLL: %f\n", KronLL.GetApxEmptyGraphDLL(0)); 02604 printf("Theta0 edge(0,1) DLL: %f\n", KronLL.LLMtx.GetEdgeDLL(0, 0, 1, 4)); 02605 printf("Theta0 NO edge(0,1) DLL: %f\n", KronLL.LLMtx.GetNoEdgeDLL(0, 0, 1, 4)); 02606 printf("Theta0 NO edge(0,1) DLL: %f\n", KronLL.LLMtx.GetApxNoEdgeDLL(0, 0, 1, 4)); 02607 KronLL.CalcGraphDLL(); printf("Exact Theta0 DLL:"); KronLL.GetDLL(0); 02608 KronLL.CalcApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0); 02609 KronLL.CalcFullApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0); 02610 // swap 02611 */ 02612 /*for (int i = 0; i < 100; i++) { 02613 const int A = TInt::Rnd.GetUniDevInt(KronLL.Nodes); 02614 KronLL.NodePerm.Swap(i, A); 02615 //KronLL.UpdateGraphDLL(i, A); printf("Fast Theta0 DLL:"); KronLL.GetDLL(0); 02616 KronLL.CalcApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0); 02617 //KronLL.CalcFullApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0); 02618 //KronLL.CalcGraphDLL(); printf("Exact Theta0 DLL:"); KronLL.GetDLL(0); 02619 printf("\n"); 02620 } */ 02621 //} 02622 //} 02623 02624 /*void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV, TFltV& SDevV, const bool& Plot) { 02625 printf("Samples: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr()); 02626 int NId1 = 0, NId2 = 0; 02627 TExeTm ExeTm; 02628 CalcApxGraphLL(); 02629 for (int s = 0; s < WarmUp; s++) { 02630 SampleNextPerm(NId1, NId2); } 02631 printf(" warm-up:%s", ExeTm.GetTmStr()); ExeTm.Tick(); 02632 CalcApxGraphLL(); 02633 CalcApxGraphDLL(); 02634 AvgLL = 0; 02635 TVec<TMom> DLLMomV(LLMtx.Len()); 02636 for (int s = 0; s < NSamples; s++) { 02637 if (SampleNextPerm(NId1, NId2)) { // new permutation 02638 UpdateGraphDLL(NId1, NId2); 02639 } 02640 AvgLL += GetLL(); 02641 for (int m = 0; m < LLMtx.Len(); m++) { DLLMomV[m].Add(GradV[m]); } 02642 } 02643 AvgLL = AvgLL / (NSamples*Nodes); 02644 // plot gradients over sampling time 02645 if (Plot) { 02646 TVec<TFltV> FltVV(LLMtx.Len()+1); 02647 for (int s = 0; s < DLLMomV[0].GetVals(); s += 1000) { 02648 for (int m = 0; m < LLMtx.Len(); m++) { FltVV[m].Add(DLLMomV[m].GetVal(s)); } 02649 FltVV.Last().Add(s); } 02650 const TStr FNm = TFile::GetUniqueFNm(TStr::Fmt("grad%dW%sS%s-#.png", KronIters, TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr())); 02651 TGnuPlot GP(FNm.GetFMid(), TStr::Fmt("Gradient vs. Sample Index. Nodes: %d, WarmUp: %s, Samples: %s, Avg LL: %f", Nodes, 02652 TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr(), AvgLL), true); 02653 for (int m = 0; m < LLMtx.Len(); m++) { 02654 GP.AddPlot(FltVV.Last(), FltVV[m], gpwLines, TStr::Fmt("Grad %d", m+1), "linewidth 5"); } 02655 GP.SetXYLabel("Sample Index (time)", "Log-likelihood gradient"); 02656 GP.SavePng(); 02657 } 02658 // average gradients 02659 printf(" sampling:%s\n", ExeTm.GetTmStr()); 02660 printf(" AverageLL: %f\n", AvgLL); 02661 printf("Gradients:\n"); 02662 AvgGradV.Gen(LLMtx.Len()); 02663 SDevV.Gen(LLMtx.Len()); 02664 for (int m = 0; m < LLMtx.Len(); m++) { 02665 DLLMomV[m].Def(); 02666 AvgGradV[m] = DLLMomV[m].GetMean() / (Nodes*Nodes); 02667 SDevV[m] = DLLMomV[m].GetSDev() / (Nodes*Nodes); 02668 printf(" %d] mean: %16f sDev: %16f\n", m, AvgGradV[m], SDevV[m]); 02669 } 02670 } 02671 02672 void TKronMaxLL::TFunc::FDeriv(const TFltV& Point, TFltV& GradV) { 02673 CallBack->GetDLL(Point, GradV); 02674 for (int i = 0; i < GradV.Len(); i++) { GradV[i] = -GradV[i]; } 02675 } 02676 02677 double TKronMaxLL::TLogBarFunc::FVal(const TFltV& Point) { 02678 // log-likelihood 02679 const double LogLL = CallBack->GetLL(Point); 02680 // log-barrier 02681 const double MinBarrier = 0.05; 02682 const double MaxBarrier = 0.95; 02683 const double T1 = 1.0/T; 02684 double Barrier = 0.0; 02685 for (int i = 0; i < Point.Len(); i++) { 02686 if(Point[i].Val > MinBarrier && Point[i].Val < MaxBarrier) { 02687 Barrier += - T1 * (log(Point[i]-MinBarrier) + log(MaxBarrier-Point[i])); //log-barrier 02688 } else { Barrier = 1e5; } 02689 } 02690 IAssert(Barrier > 0.0); 02691 printf("barrrier: %f\n", Barrier); 02692 return -LogLL + Barrier; // minus LL since we want to maximize it 02693 } 02694 02695 void TKronMaxLL::TLogBarFunc::FDeriv(const TFltV& Point, TFltV& DerivV) { 02696 // derivative of log-likelihood 02697 CallBack->GetDLL(Point, DerivV); 02698 // derivative of log barrier 02699 const double MinBarrier = 0.05; 02700 const double MaxBarrier = 0.95; 02701 const double T1 = 1.0/T; 02702 for (int i = 0; i < Point.Len(); i++) { 02703 DerivV[i] = - DerivV[i] + (- T1*(1.0/(Point[i]-MinBarrier) - 1.0/(MaxBarrier-Point[i]))); 02704 } 02705 } 02706 */