SNAP Library 2.1, User Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agm.cpp
Go to the documentation of this file.
00001 #include "stdafx.h"
00002 #include "Snap.h"
00003 #include "agm.h"
00004 #include "agmfit.h"
00005 
00007 // AGM graph generation.
00008 
00010 void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){
00011   int CNodes = CmtyV.Len(), CEdges;
00012   if (CNodes < 20) {
00013     CEdges = (int) Rnd.GetBinomialDev(Prob, CNodes * (CNodes-1) / 2);
00014   } else {
00015     CEdges = (int) (Prob * CNodes * (CNodes - 1) / 2);
00016   }
00017   THashSet<TIntPr> NewEdgeSet(CEdges);
00018   for (int edge = 0; edge < CEdges; ) {
00019     int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
00020     int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
00021     if (SrcNId > DstNId) { Swap(SrcNId,DstNId); }
00022     if (SrcNId != DstNId && ! NewEdgeSet.IsKey(TIntPr(SrcNId, DstNId))) { // is new edge
00023       NewEdgeSet.AddKey(TIntPr(SrcNId, DstNId));
00024       Graph->AddEdge(SrcNId, DstNId);
00025       edge++; 
00026     } 
00027   }
00028 }
00029 
00030 
00031 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd){
00032   PUNGraph TryG = TAGM::GenAGM(CmtyVV, DensityCoef, 1.0, Rnd);
00033   const double ScaleCoef = (double) TargetEdges / (double) TryG->GetEdges();
00034   return TAGM::GenAGM(CmtyVV, DensityCoef, ScaleCoef, Rnd);
00035 }
00036 
00037 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd){
00038   TFltV CProbV;
00039   double Prob;
00040   for (int i = 0; i < CmtyVV.Len(); i++) {
00041     Prob = ScaleCoef*pow( double( CmtyVV[i].Len()), - DensityCoef);
00042     if (Prob > 1.0) { Prob = 1; }
00043     CProbV.Add(Prob);
00044   }
00045   return TAGM::GenAGM(CmtyVV, CProbV, Rnd);
00046 }
00047 
00049 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom){
00050   PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1);
00051   printf("AGM begins\n");
00052   for (int i = 0; i < CmtyVV.Len(); i++) {
00053     TIntV& CmtyV = CmtyVV[i];
00054     for (int u = 0; u < CmtyV.Len(); u++) {
00055       if ( G->IsNode(CmtyV[u])) { continue; }
00056       G->AddNode(CmtyV[u]);
00057     }
00058     double Prob = CProbV[i];
00059     RndConnectInsideCommunity(G, CmtyV, Prob, Rnd);
00060   }
00061   if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community
00062     TIntSet NIDS;
00063     for (int c = 0; c < CmtyVV.Len(); c++) {
00064       for (int u = 0; u < CmtyVV[c].Len(); u++) {
00065         NIDS.AddKey(CmtyVV[c][u]);
00066       }
00067     }
00068     TIntV NIDV;
00069     NIDS.GetKeyV(NIDV);
00070     RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd);
00071   }
00072   printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges());
00073   G->Defrag();
00074   return G;
00075 }
00076 
00079 
00081 void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) {
00082   SzSeq.Gen(SeqLen, 0);
00083   while (SzSeq.Len() < SeqLen) {
00084     int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha));
00085     if (Sz >= Min && Sz <= Max) {
00086       SzSeq.Add(Sz);
00087     }
00088   }
00089 }
00090 
00092 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
00093   TIntV NIDV(Nodes, 0);
00094   for (int i = 0; i < Nodes; i++) {
00095     NIDV.Add(i);
00096   }
00097   GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
00098 }
00099 
00101 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
00102   if (Coms == 0 || Nodes == 0) {
00103     CmtyVV.Clr();
00104     return;
00105   }
00106   TIntV NIDV;
00107   Graph->GetNIdV(NIDV);
00108   GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
00109 }
00110 
00112 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
00113   if (Coms == 0 || Nodes == 0) {
00114     CmtyVV.Clr();
00115     return;
00116   }
00117   TIntV ComSzSeq, MemSeq;
00118   TAGMUtil::GenPLSeq(ComSzSeq,Coms,ComSzAlpha,Rnd,MinSz,MaxSz);
00119   TAGMUtil::GenPLSeq(MemSeq,Nodes,MemAlpha,Rnd,MinK,MaxK);
00120   TIntPrV CIDSzPrV, NIDMemPrV;
00121   for (int i = 0; i < ComSzSeq.Len(); i++) {
00122     CIDSzPrV.Add(TIntPr(i, ComSzSeq[i]));
00123   }
00124   for (int i = 0; i < MemSeq.Len(); i++) {
00125     NIDMemPrV.Add(TIntPr(NIDV[i], MemSeq[i]));
00126   }
00127   TAGMUtil::ConnectCmtyVV(CmtyVV, CIDSzPrV, NIDMemPrV, Rnd);
00128 }
00129 
00131 void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) {
00132   const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len();
00133   TIntV NDegV,CDegV;
00134   TIntPrSet CNIDSet;
00135   TIntSet HitNodes(Nodes);
00136   THash<TInt,TIntV> CmtyVH;
00137   for (int i = 0;i < CIDSzPrV.Len(); i++) {
00138     for (int j = 0; j < CIDSzPrV[i].Val2; j++) {
00139       CDegV.Add(CIDSzPrV[i].Val1);
00140     }
00141   }
00142   for (int i = 0; i < NIDMemPrV.Len(); i++) {
00143     for (int j = 0; j < NIDMemPrV[i].Val2; j++) {
00144       NDegV.Add(NIDMemPrV[i].Val1);
00145     }
00146   }
00147   while (CDegV.Len() < (int) (1.2 * Nodes)) {
00148     CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1);
00149   }
00150   while (NDegV.Len() < CDegV.Len()) {
00151     NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1);
00152   }
00153   printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len());
00154   int c=0;
00155   while (c++ < 15 && CDegV.Len() > 1) {
00156     for (int i = 0; i < CDegV.Len(); i++) {
00157       int u = Rnd.GetUniDevInt(CDegV.Len());
00158       int v = Rnd.GetUniDevInt(NDegV.Len());
00159       if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
00160       CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
00161       HitNodes.AddKey(NDegV[v]);
00162       if (u == CDegV.Len() - 1) { CDegV.DelLast(); }
00163       else { 
00164         CDegV[u] = CDegV.Last(); 
00165         CDegV.DelLast();
00166       }
00167       if (v == NDegV.Len() - 1) { NDegV.DelLast(); }
00168       else { 
00169         NDegV[v] = NDegV.Last();
00170         NDegV.DelLast();
00171       }
00172     }
00173   }
00174   //make sure that every node belongs to at least one community
00175   for (int i = 0; i < Nodes; i++) {
00176     int NID = NIDMemPrV[i].Val1;
00177     if (! HitNodes.IsKey(NID)) {
00178       CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID));
00179       HitNodes.AddKey(NID);
00180     }
00181   }
00182   IAssert(HitNodes.Len() == Nodes);
00183   for (int i = 0; i < CNIDSet.Len(); i++) {
00184     TIntPr CNIDPr = CNIDSet[i];
00185     CmtyVH.AddDat(CNIDPr.Val1);
00186     CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
00187   }
00188   CmtyVH.GetDatV(CmtyVV);
00189 }
00190 
00192 void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd){
00193   THash<TInt,TIntV> CmtyVH;
00194   for (int i = 0; i < CmtyVVIn.Len(); i++) {
00195     CmtyVH.AddDat(i, CmtyVVIn[i]);
00196   }
00197   TAGMUtil::RewireCmtyNID(CmtyVH, Rnd);
00198   CmtyVH.GetDatV(CmtyVVOut);
00199 }
00200 
00202 void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) {
00203   THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
00204   TIntV NDegV;
00205   TIntV CDegV;
00206   for (int i = 0; i < CmtyVH.Len(); i++) {
00207     int CID = CmtyVH.GetKey(i);
00208     for (int j = 0; j < CmtyVH[i].Len(); j++) {
00209       int NID = CmtyVH[i][j];
00210       NDegV.Add(NID);
00211       CDegV.Add(CID);
00212     }
00213   }
00214   TIntPrSet CNIDSet(CDegV.Len());
00215   int c=0;
00216   while (c++ < 15 && CDegV.Len() > 1){
00217     for (int i = 0; i < CDegV.Len(); i++) {
00218       int u = Rnd.GetUniDevInt(CDegV.Len());
00219       int v = Rnd.GetUniDevInt(NDegV.Len());
00220       if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
00221       CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
00222       if (u == CDegV.Len() - 1) { 
00223         CDegV.DelLast(); 
00224       }  else {
00225         CDegV[u] = CDegV.Last();
00226         CDegV.DelLast();
00227       }
00228       if ( v == NDegV.Len() - 1) {
00229         NDegV.DelLast();
00230       }  else{
00231         NDegV[v] = NDegV.Last();
00232         NDegV.DelLast();
00233       }
00234     }
00235   }
00236   for (int i = 0; i < CNIDSet.Len(); i++) {
00237     TIntPr CNIDPr = CNIDSet[i];
00238     IAssert(CmtyVH.IsKey(CNIDPr.Val1));
00239     NewCmtyVH.AddDat(CNIDPr.Val1);
00240     NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
00241   }
00242   CmtyVH = NewCmtyVH;
00243 }
00244 
00246 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) {
00247   CmtyVV.Gen(Kilo(100), 0);
00248   TSsParser Ss(InFNm, ssfWhiteSep);
00249   while (Ss.Next()) {
00250     if(Ss.GetFlds() > 0) {
00251       TIntV CmtyV;
00252       for (int i = 0; i < Ss.GetFlds(); i++) {
00253         if (Ss.IsInt(i)) {
00254           CmtyV.Add(Ss.GetInt(i));
00255         }
00256       }
00257       CmtyVV.Add(CmtyV);
00258     }
00259   }
00260   CmtyVV.Pack();
00261   printf("community loading completed (%d communities)\n",CmtyVV.Len());
00262 
00263 }
00264 
00266 void TAGMUtil::DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV) {
00267   FILE* F = fopen(OutFNm.CStr(),"wt");
00268   for (int i = 0; i < CmtyVV.Len(); i++) {
00269     for (int j = 0; j < CmtyVV[i].Len(); j++) {
00270       fprintf(F,"%d\t", (int) CmtyVV[i][j]);
00271     }
00272     fprintf(F,"\n");
00273   }
00274   fclose(F);
00275 }
00276 
00278 void TAGMUtil::DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH) {
00279   FILE* F = fopen(OutFNm.CStr(), "wt");
00280   for (int c = 0; c < CmtyVV.Len(); c++) {
00281     for (int u = 0; u < CmtyVV[c].Len(); u++) {
00282       if (NIDNmH.IsKey(CmtyVV[c][u])){
00283         fprintf(F, "%s\t", NIDNmH.GetDat(CmtyVV[c][u]).CStr());
00284       }
00285       else {
00286         fprintf(F, "%d\t", (int) CmtyVV[c][u]);
00287       }
00288     }
00289     fprintf(F, "\n");
00290   }
00291   fclose(F);
00292 }
00293 
00295 int TAGMUtil::TotalMemberships(const TVec<TIntV>& CmtyVV){
00296   int M = 0;
00297   for (int i = 0; i < CmtyVV.Len(); i++) {
00298     M += CmtyVV[i].Len();
00299   }
00300   return M;
00301 }
00302 
00304 void TAGMUtil::GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH) {
00305   NIDComVH.Clr();
00306   for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
00307     for (int j = 0;j < HI.GetDat().Len(); j++) {
00308       int NID = HI.GetDat()[j];
00309       NIDComVH.AddDat(NID)++;
00310     }
00311   }
00312 }
00313 
00315 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV) {
00316   NIDComVH.Clr();
00317   for (int i = 0; i < CmtyVV.Len(); i++){
00318     int CID = i;
00319     for (int j = 0; j < CmtyVV[i].Len(); j++) {
00320       int NID = CmtyVV[i][j];
00321       NIDComVH.AddDat(NID).AddKey(CID);
00322     }
00323   }
00324 }
00325 
00327 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) {
00328   NIDComVH.Clr();
00329   for (int u = 0; u < NIDV.Len(); u++) {
00330     NIDComVH.AddDat(NIDV[u]);
00331   }
00332   for (int i = 0; i < CmtyVV.Len(); i++){
00333     int CID = i;
00334     for (int j = 0; j < CmtyVV[i].Len(); j++) {
00335       int NID = CmtyVV[i][j];
00336       NIDComVH.AddDat(NID).AddKey(CID);
00337     }
00338   }
00339 }
00340 
00341 
00342 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV) {
00343   for (int i = 0; i < CmtyVV.Len(); i++){
00344     int CID = i;
00345     for (TIntSet::TIter SI = CmtyVV[i].BegI(); SI < CmtyVV[i].EndI(); SI++) {
00346       int NID = SI.GetKey();
00347       NIDComVH.AddDat(NID).AddKey(CID);
00348     }
00349   }
00350 }
00351 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) {
00352   for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
00353     int CID = HI.GetKey();
00354     for (int j = 0; j < HI.GetDat().Len(); j++) {
00355       int NID = HI.GetDat()[j];
00356       NIDComVH.AddDat(NID).AddKey(CID);
00357     }
00358   }
00359 }
00360 
00361 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) {
00362   for (int i = 0; i < CmtyVH.Len(); i++){
00363     int CID = CmtyVH.GetKey(i);
00364     for (int j = 0; j < CmtyVH[i].Len(); j++) {
00365       int NID = CmtyVH[i][j];
00366       NIDComVH.AddDat(NID).Add(CID);
00367     }
00368   }
00369 }
00370 
00371 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) {
00372   THash<TInt,TIntV> CmtyVH;
00373   for (int i = 0; i < CmtyVV.Len(); i++) {
00374     CmtyVH.AddDat(i, CmtyVV[i]);
00375   }
00376   GetNodeMembership(NIDComVH, CmtyVH);
00377 }
00378 
00379 int TAGMUtil::Intersection(const TIntV& C1, const TIntV& C2) {
00380   TIntSet S1(C1), S2(C2);
00381   return TAGMUtil::Intersection(S1, S2);
00382 }
00383 
00384 void TAGMUtil::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) {
00385   C.Gen(A.Len());
00386   if (A.Len() < B.Len()) {
00387     for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 
00388       if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00389   } else {
00390     for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 
00391       if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00392   }
00393 }
00394 
00395 int TAGMUtil::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) {
00396   int n = 0;
00397   if (A.Len() < B.Len()) {
00398     for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 
00399       if (B.IsKey(it.GetKey())) n++;
00400   } else {
00401     for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 
00402       if (A.IsKey(it.GetKey())) n++;
00403   }
00404   return n;
00405 }
00406 
00408 void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) {
00409   THash<TInt,TIntV> NIDComVHAtr;
00410   TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr);
00411 
00412   FILE* F = fopen(OutFNm.CStr(), "wt");
00413   fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
00414   fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
00415   fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n");
00416   if (CmtyVVAtr.Len() > 0) {
00417     fprintf(F, "\t<attributes class='node'>\n");
00418     for (int c = 0; c < CmtyVVAtr.Len(); c++) {
00419       fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c);
00420       fprintf(F, "\t\t<default>false</default>\n");
00421       fprintf(F, "\t\t</attribute>\n");
00422     }
00423     fprintf(F, "\t</attributes>\n");
00424   }
00425   fprintf(F, "\t\t<nodes>\n");
00426   for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
00427     int NID = NI.GetId();
00428     TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
00429     TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
00430 
00431     double Size = MinSz;
00432     double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len();
00433     if (NIDComVHAtr.IsKey(NID)) {
00434       Size = MinSz +  SizeStep *  (double) NIDComVHAtr.GetDat(NID).Len();
00435     }
00436     double Alpha = 1.0;
00437     fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
00438     fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
00439     fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
00440     //specify attributes
00441     if (NIDComVHAtr.IsKey(NID)) {
00442       fprintf(F, "\t\t\t\t<attvalues>\n");
00443       for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) {
00444         int CID = NIDComVHAtr.GetDat(NID)[c];
00445         fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID);
00446       }
00447       fprintf(F, "\t\t\t\t</attvalues>\n");
00448     }
00449 
00450     fprintf(F, "\t\t\t</node>\n");
00451   }
00452   fprintf(F, "\t\t</nodes>\n");
00453   //plot edges
00454   int EID = 0;
00455   fprintf(F, "\t\t<edges>\n");
00456   for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
00457     fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, EI.GetSrcNId(), EI.GetDstNId());
00458   }
00459   fprintf(F, "\t\t</edges>\n");
00460   fprintf(F, "\t</graph>\n");
00461   fprintf(F, "</gexf>\n");
00462   fclose(F);
00463 }
00464 
00466 void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
00468   if (CmtyVV.Len() == 0) { return; }
00469   double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
00470   double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
00471   double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
00472   THash<TInt,TIntV> NIDComVH;
00473   TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);
00474 
00475   FILE* F = fopen(OutFNm.CStr(), "wt");
00476   fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
00477   fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
00478   fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
00479   fprintf(F, "\t\t<nodes>\n");
00480   for (int c = 0; c < CmtyVV.Len(); c++) {
00481     int CID = c;
00482     double XPos = c * CStep + CXMin;
00483     TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
00484     fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
00485     fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
00486     fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
00487     fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
00488     fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); 
00489     fprintf(F, "\t\t\t</node>\n");
00490   }
00491 
00492   for (int u = 0;u < NIDV.Len(); u++) {
00493     int NID = NIDV[u];
00494     TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
00495     double Size = MinSz;
00496     double XPos = NXMin + u * NStep;
00497     TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
00498     double Alpha = 1.0;
00499     fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
00500     fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
00501     fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
00502     fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
00503     fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); 
00504     fprintf(F, "\t\t\t</node>\n");
00505   }
00506   fprintf(F, "\t\t</nodes>\n");
00507   fprintf(F, "\t\t<edges>\n");
00508   int EID = 0;
00509   for (int u = 0;u < NIDV.Len(); u++) {
00510     int NID = NIDV[u];
00511     if (NIDComVH.IsKey(NID)) {
00512       for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
00513         int CID = NIDComVH.GetDat(NID)[c];
00514         fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
00515       }
00516     }
00517   }
00518   fprintf(F, "\t\t</edges>\n");
00519   fprintf(F, "\t</graph>\n");
00520   fprintf(F, "</gexf>\n");
00521 }
00522 
00524 int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
00525   TRnd Rnd(RndSeed);
00526   int LambdaIter = 100;
00527   if (Graph->GetNodes() < 200) { LambdaIter = 1; } 
00528   if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; } 
00529 
00530   //Find coms with large C
00531   TAGMFit AGMFitM(Graph, InitComs, RndSeed);
00532   if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); }
00533   AGMFitM.RunMCMC(MaxIter, LambdaIter, "");
00534 
00535   int TE = Graph->GetEdges();
00536   TFltV RegV; 
00537   RegV.Add(0.3 * TE);
00538   for (int r = 0; r < 25; r++) {
00539     RegV.Add(RegV.Last() * RegGap);
00540   }
00541   TFltPrV RegComsV, RegLV, RegBICV;
00542   TFltV LV, BICV;
00543   //record likelihood and number of communities with nonzero P_c
00544   for (int r = 0; r < RegV.Len(); r++) {
00545     double RegCoef = RegV[r];
00546     AGMFitM.SetRegCoef(RegCoef);
00547     AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
00548     AGMFitM.SetRegCoef(0.0);
00549         
00550     TVec<TIntV> EstCmtyVV;
00551     AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
00552     int NumLowQ = EstCmtyVV.Len();
00553     RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));
00554 
00555     if (EstCmtyVV.Len() > 0) {
00556       TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
00557       AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
00558       double CurL = AFTemp.Likelihood();
00559       LV.Add(CurL);
00560       BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
00561     }
00562     else {
00563       break;
00564     }
00565   }
00566   // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
00567   if (LV.Len() == 0) { return 2; }
00568   else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); }
00569 
00570 
00571   //normalize likelihood and BIC to 0~100
00572   int MaxL = 100;
00573   {
00574     TFltV& ValueV = LV;
00575     TFltPrV& RegValueV = RegLV;
00576     double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
00577     for (int l = 0; l < ValueV.Len(); l++) {
00578       if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
00579       if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
00580     }
00581     while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); }
00582     double RangeVal = MaxValue - MinValue;
00583     for (int l = 0; l < ValueV.Len(); l++) {
00584       RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
00585     }
00586     
00587   }
00588   {
00589     TFltV& ValueV = BICV;
00590     TFltPrV& RegValueV = RegBICV;
00591     double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
00592     for (int l = 0; l < ValueV.Len(); l++) {
00593       if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
00594       if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
00595     }
00596     while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); }
00597     double RangeVal = MaxValue - MinValue;
00598     for (int l = 0; l < ValueV.Len(); l++) {
00599       RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
00600     }
00601   }
00602 
00603   //fit logistic regression to normalized likelihood.
00604   TVec<TFltV> XV(RegLV.Len());
00605   TFltV YV (RegLV.Len());
00606   for (int l = 0; l < RegLV.Len(); l++) {
00607     XV[l] = TFltV::GetV(log(RegLV[l].Val1));
00608     YV[l] = RegLV[l].Val2 / (double) MaxL;
00609   }
00610   TFltPrV LRVScaled, LRV;
00611   TLogRegFit LRFit;
00612   PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
00613   for (int l = 0; l < RegLV.Len(); l++) {
00614     LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
00615     LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
00616   }
00617 
00618   //estimate # communities from fitted logistic regression
00619   int NumComs = 0, IdxRegDrop = 0;
00620   double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
00621   double LeftReg = 0.0, RightReg = 0.0;
00622   TFltV Theta;
00623   LRMd->GetTheta(Theta);
00624   RegDrop = (- Theta[1] - LRThres) / Theta[0];
00625   if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; }
00626   else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; }
00627   else {  //interpolate for RegDrop
00628     for (int i = 0; i < XV.Len(); i++) {
00629       if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; }
00630     }
00631     
00632     if (IdxRegDrop == 0) {
00633       printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
00634       for (int l = 0; l < RegLV.Len(); l++) {
00635         printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
00636       }
00637     }
00638     IAssert(IdxRegDrop > 0);
00639     LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
00640     RightReg = XV[IdxRegDrop][0] - RegDrop;
00641     NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));
00642 
00643   }
00644   //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
00645   printf("Num Coms:%d\n", NumComs);
00646   if (NumComs < 2) { NumComs = 2; }
00647 
00648   if (PltFPrx.Len() > 0) {
00649     TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
00650     TGnuPlot GPC(PltFPrx + ".l");
00651     GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
00652     GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
00653     GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
00654     GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
00655     GPC.SetScale(gpsLog10X);
00656     GPC.SetTitle(PlotTitle);
00657     GPC.SavePng(PltFPrx + ".l.png");
00658   }
00659   
00660   return NumComs;
00661 }
00662 
00663 double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) {
00664   const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges();
00665   int Vol = 0,  Cut = 0; 
00666   double Phi = 0.0;
00667   for (int i = 0; i < CmtyS.Len(); i++) {
00668     if (! Graph->IsNode(CmtyS[i])) { continue; }
00669     TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]);
00670     for (int e = 0; e < NI.GetOutDeg(); e++) {
00671       if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
00672     }
00673     Vol += NI.GetOutDeg();
00674   }
00675   // get conductance
00676   if (Vol != Edges2) {
00677     if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
00678     else if (Vol == 0) { Phi = 0.0; }
00679     else { Phi = Cut / double(Vol); }
00680   } else {
00681     if (Vol == Edges2) { Phi = 1.0; }
00682   }
00683   return Phi;
00684 }
00685 
00686 void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) {
00687   TUNGraph::TNodeI NI = Graph->GetNI(NID);
00688   NBCmtyS.Gen(NI.GetDeg());
00689   NBCmtyS.AddKey(NID);
00690   for (int e = 0; e < NI.GetDeg(); e++) {
00691     NBCmtyS.AddKey(NI.GetNbrNId(e));
00692   }
00693 }
00694 
00696 // Logistic regression by gradient ascent
00697 
00698 void TLogRegFit::GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV){
00699   bool HSingular = false;
00700   for (int i = 0; i < HVV.GetXDim(); i++) {
00701     if (HVV(i,i) == 0.0) {
00702       HVV(i,i) = 0.001;
00703       HSingular = true;
00704     }
00705     DeltaLV[i] = GradV[i] / HVV(i, i);
00706   }
00707   if (! HSingular) {
00708     if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
00709       for (int r = 0; r < Theta.Len(); r++) {
00710         for (int c = 0; c < Theta.Len(); c++) {
00711           HVV(r, c) = - HVV(r, c);
00712         }
00713       }
00714       TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
00715     }
00716     else {
00717       TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
00718       for (int i = 0; i < DeltaLV.Len(); i++) {
00719         DeltaLV[i] = - DeltaLV[i];
00720       }
00721     }
00722 
00723   }
00724 }
00725 
00726 void TLogRegFit::Hessian(TFltVV& HVV) {
00727   HVV.Gen(Theta.Len(), Theta.Len());
00728   TFltV OutV;
00729   TLogRegPredict::GetCfy(X, OutV, Theta);
00730   for (int i = 0; i < X.Len(); i++) {
00731     for (int r = 0; r < Theta.Len(); r++) {
00732       HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
00733       for (int c = r + 1; c < Theta.Len(); c++) {
00734         HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
00735         HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
00736       }
00737     }
00738   }
00739   /*
00740   printf("\n");
00741   for (int r = 0; r < Theta.Len(); r++) {
00742     for (int c = 0; c < Theta.Len(); c++) {
00743       printf("%f\t", HVV.At(r, c).Val);
00744     }
00745     printf("\n");
00746   }
00747   */
00748 }
00749 
00750 int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
00751   TExeTm ExeTm;
00752   TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
00753   TFltVV HVV(Theta.Len(), Theta.Len());
00754   int iter = 0;
00755   double MinVal = -1e10, MaxVal = 1e10;
00756   for(iter = 0; iter < MaxStep; iter++) {
00757     Gradient(GradV);
00758     Hessian(HVV);
00759     GetNewtonStep(HVV, GradV, DeltaLV);
00760     double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
00761     if (Increment <= ChangeEps) {break;}
00762     double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
00763     for(int i = 0; i < Theta.Len(); i++) {
00764       double Change = LearnRate * DeltaLV[i];
00765       Theta[i] += Change;
00766       if(Theta[i] < MinVal) { Theta[i] = MinVal;}
00767       if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
00768     }
00769   }
00770   if (! PlotNm.Empty()) {
00771     printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
00772   }
00773 
00774   return iter;
00775 }
00776 
00777 int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
00778   TExeTm ExeTm;
00779   TFltV GradV(Theta.Len());
00780   int iter = 0;
00781   TIntFltPrV IterLV, IterGradNormV;
00782   double MinVal = -1e10, MaxVal = 1e10;
00783   double GradCutOff = 100000;
00784   for(iter = 0; iter < MaxStep; iter++) {
00785     Gradient(GradV);    //if gradient is going out of the boundary, cut off
00786     for(int i = 0; i < Theta.Len(); i++) {
00787       if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
00788       if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
00789       if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
00790       if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
00791     }
00792     double Alpha = 0.15, Beta = 0.9;
00793     //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
00794     double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
00795     if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
00796     for(int i = 0; i < Theta.Len(); i++) {
00797       double Change = LearnRate * GradV[i];
00798       Theta[i] += Change;
00799       if(Theta[i] < MinVal) { Theta[i] = MinVal;}
00800       if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
00801     }
00802     if (! PlotNm.Empty()) {
00803       double L = Likelihood();
00804       IterLV.Add(TIntFltPr(iter, L));
00805       IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
00806     }
00807     
00808   }
00809   if (! PlotNm.Empty()) {
00810     TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
00811     TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
00812     printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
00813   }
00814   return iter;
00815 }
00816 
00817 double TLogRegFit::GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta) {
00818   double StepSize = 1.0;
00819   double InitLikelihood = Likelihood();
00820   IAssert(Theta.Len() == DeltaV.Len());
00821   TFltV NewThetaV(Theta.Len());
00822   double MinVal = -1e10, MaxVal = 1e10;
00823   for(int iter = 0; ; iter++) {
00824     for (int i = 0; i < Theta.Len(); i++){
00825       NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
00826       if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal;  }
00827       if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
00828     }
00829     if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
00830       StepSize *= Beta;
00831     } else {
00832       break;
00833     }
00834   }
00835   return StepSize;
00836 }
00837 
00838 double TLogRegFit::Likelihood(const TFltV& NewTheta) {
00839   TFltV OutV;
00840   TLogRegPredict::GetCfy(X, OutV, NewTheta);
00841   double L = 0;
00842   for (int r = 0; r < OutV.Len(); r++) {
00843     L += Y[r] * log(OutV[r]);
00844     L += (1 - Y[r]) * log(1 - OutV[r]);
00845   }
00846   return L;
00847 }
00848 
00849 void TLogRegFit::Gradient(TFltV& GradV) {
00850   TFltV OutV;
00851   TLogRegPredict::GetCfy(X, OutV, Theta);
00852   GradV.Gen(M);
00853   for (int r = 0; r < X.Len(); r++) {
00854     //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
00855     for (int m = 0; m < M; m++) {
00856       GradV[m] += (Y[r] - OutV[r]) * X[r][m];
00857     }
00858   }
00859   //for (int m = 0; m < M; m++) {  printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
00860 }
00861 
00862 PLogRegPredict TLogRegFit::CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
00863 
00864   X = XPt;
00865   Y = yPt;
00866   IAssert(X.Len() == Y.Len());
00867   if (Intercept == false) { // if intercept is not included, add it
00868     for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
00869   }
00870   M = X[0].Len();
00871   for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
00872   for (int r = 0; r < Y.Len(); r++) {  
00873     if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
00874     if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
00875   }
00876   Theta.Gen(M);
00877   MLENewton(ChangeEps, MaxStep, PlotNm);
00878   return new TLogRegPredict(Theta); 
00879 };
00880 
00881 PLogRegPredict TLogRegFit::CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
00882   X = XPt;
00883   Y = yPt;
00884   IAssert(X.Len() == Y.Len());
00885   if (Intercept == false) { // if intercept is not included, add it
00886     for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
00887   }
00888   M = X[0].Len();
00889   for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
00890   for (int r = 0; r < Y.Len(); r++) {  
00891     if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
00892     if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
00893   }
00894   Theta.Gen(M);
00895   MLEGradient(ChangeEps, MaxStep, PlotNm);
00896   return new TLogRegPredict(Theta); 
00897 };
00898 
00900 // Logistic-Regression-Model
00901 
00902 double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) {
00903     int len = AttrV.Len();
00904     double res = 0;
00905     if (len < NewTheta.Len()) { res = NewTheta.Last(); } //if feature vector is shorter, add an intercept
00906     for (int i = 0; i < len; i++) {
00907       if (i < NewTheta.Len()) { res += AttrV[i] * NewTheta[i]; }
00908     }
00909     double mu = 1 / (1 + exp(-res));
00910     return mu;
00911 }
00912 
00913 void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) {
00914   OutV.Gen(X.Len());
00915   for (int r = 0; r < X.Len(); r++) {
00916     OutV[r] = GetCfy(X[r], NewTheta);
00917   }
00918 }