SNAP Library 2.1, User Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "Snap.h" 00003 #include "agm.h" 00004 #include "agmfit.h" 00005 00007 // AGM graph generation. 00008 00010 void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){ 00011 int CNodes = CmtyV.Len(), CEdges; 00012 if (CNodes < 20) { 00013 CEdges = (int) Rnd.GetBinomialDev(Prob, CNodes * (CNodes-1) / 2); 00014 } else { 00015 CEdges = (int) (Prob * CNodes * (CNodes - 1) / 2); 00016 } 00017 THashSet<TIntPr> NewEdgeSet(CEdges); 00018 for (int edge = 0; edge < CEdges; ) { 00019 int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)]; 00020 int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)]; 00021 if (SrcNId > DstNId) { Swap(SrcNId,DstNId); } 00022 if (SrcNId != DstNId && ! NewEdgeSet.IsKey(TIntPr(SrcNId, DstNId))) { // is new edge 00023 NewEdgeSet.AddKey(TIntPr(SrcNId, DstNId)); 00024 Graph->AddEdge(SrcNId, DstNId); 00025 edge++; 00026 } 00027 } 00028 } 00029 00030 00031 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd){ 00032 PUNGraph TryG = TAGM::GenAGM(CmtyVV, DensityCoef, 1.0, Rnd); 00033 const double ScaleCoef = (double) TargetEdges / (double) TryG->GetEdges(); 00034 return TAGM::GenAGM(CmtyVV, DensityCoef, ScaleCoef, Rnd); 00035 } 00036 00037 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd){ 00038 TFltV CProbV; 00039 double Prob; 00040 for (int i = 0; i < CmtyVV.Len(); i++) { 00041 Prob = ScaleCoef*pow( double( CmtyVV[i].Len()), - DensityCoef); 00042 if (Prob > 1.0) { Prob = 1; } 00043 CProbV.Add(Prob); 00044 } 00045 return TAGM::GenAGM(CmtyVV, CProbV, Rnd); 00046 } 00047 00049 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom){ 00050 PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1); 00051 printf("AGM begins\n"); 00052 for (int i = 0; i < CmtyVV.Len(); i++) { 00053 TIntV& CmtyV = CmtyVV[i]; 00054 for (int u = 0; u < CmtyV.Len(); u++) { 00055 if ( G->IsNode(CmtyV[u])) { continue; } 00056 G->AddNode(CmtyV[u]); 00057 } 00058 double Prob = CProbV[i]; 00059 RndConnectInsideCommunity(G, CmtyV, Prob, Rnd); 00060 } 00061 if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community 00062 TIntSet NIDS; 00063 for (int c = 0; c < CmtyVV.Len(); c++) { 00064 for (int u = 0; u < CmtyVV[c].Len(); u++) { 00065 NIDS.AddKey(CmtyVV[c][u]); 00066 } 00067 } 00068 TIntV NIDV; 00069 NIDS.GetKeyV(NIDV); 00070 RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd); 00071 } 00072 printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges()); 00073 G->Defrag(); 00074 return G; 00075 } 00076 00079 00081 void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) { 00082 SzSeq.Gen(SeqLen, 0); 00083 while (SzSeq.Len() < SeqLen) { 00084 int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha)); 00085 if (Sz >= Min && Sz <= Max) { 00086 SzSeq.Add(Sz); 00087 } 00088 } 00089 } 00090 00092 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){ 00093 TIntV NIDV(Nodes, 0); 00094 for (int i = 0; i < Nodes; i++) { 00095 NIDV.Add(i); 00096 } 00097 GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd); 00098 } 00099 00101 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){ 00102 if (Coms == 0 || Nodes == 0) { 00103 CmtyVV.Clr(); 00104 return; 00105 } 00106 TIntV NIDV; 00107 Graph->GetNIdV(NIDV); 00108 GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd); 00109 } 00110 00112 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){ 00113 if (Coms == 0 || Nodes == 0) { 00114 CmtyVV.Clr(); 00115 return; 00116 } 00117 TIntV ComSzSeq, MemSeq; 00118 TAGMUtil::GenPLSeq(ComSzSeq,Coms,ComSzAlpha,Rnd,MinSz,MaxSz); 00119 TAGMUtil::GenPLSeq(MemSeq,Nodes,MemAlpha,Rnd,MinK,MaxK); 00120 TIntPrV CIDSzPrV, NIDMemPrV; 00121 for (int i = 0; i < ComSzSeq.Len(); i++) { 00122 CIDSzPrV.Add(TIntPr(i, ComSzSeq[i])); 00123 } 00124 for (int i = 0; i < MemSeq.Len(); i++) { 00125 NIDMemPrV.Add(TIntPr(NIDV[i], MemSeq[i])); 00126 } 00127 TAGMUtil::ConnectCmtyVV(CmtyVV, CIDSzPrV, NIDMemPrV, Rnd); 00128 } 00129 00131 void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) { 00132 const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len(); 00133 TIntV NDegV,CDegV; 00134 TIntPrSet CNIDSet; 00135 TIntSet HitNodes(Nodes); 00136 THash<TInt,TIntV> CmtyVH; 00137 for (int i = 0;i < CIDSzPrV.Len(); i++) { 00138 for (int j = 0; j < CIDSzPrV[i].Val2; j++) { 00139 CDegV.Add(CIDSzPrV[i].Val1); 00140 } 00141 } 00142 for (int i = 0; i < NIDMemPrV.Len(); i++) { 00143 for (int j = 0; j < NIDMemPrV[i].Val2; j++) { 00144 NDegV.Add(NIDMemPrV[i].Val1); 00145 } 00146 } 00147 while (CDegV.Len() < (int) (1.2 * Nodes)) { 00148 CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1); 00149 } 00150 while (NDegV.Len() < CDegV.Len()) { 00151 NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1); 00152 } 00153 printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len()); 00154 int c=0; 00155 while (c++ < 15 && CDegV.Len() > 1) { 00156 for (int i = 0; i < CDegV.Len(); i++) { 00157 int u = Rnd.GetUniDevInt(CDegV.Len()); 00158 int v = Rnd.GetUniDevInt(NDegV.Len()); 00159 if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } 00160 CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); 00161 HitNodes.AddKey(NDegV[v]); 00162 if (u == CDegV.Len() - 1) { CDegV.DelLast(); } 00163 else { 00164 CDegV[u] = CDegV.Last(); 00165 CDegV.DelLast(); 00166 } 00167 if (v == NDegV.Len() - 1) { NDegV.DelLast(); } 00168 else { 00169 NDegV[v] = NDegV.Last(); 00170 NDegV.DelLast(); 00171 } 00172 } 00173 } 00174 //make sure that every node belongs to at least one community 00175 for (int i = 0; i < Nodes; i++) { 00176 int NID = NIDMemPrV[i].Val1; 00177 if (! HitNodes.IsKey(NID)) { 00178 CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID)); 00179 HitNodes.AddKey(NID); 00180 } 00181 } 00182 IAssert(HitNodes.Len() == Nodes); 00183 for (int i = 0; i < CNIDSet.Len(); i++) { 00184 TIntPr CNIDPr = CNIDSet[i]; 00185 CmtyVH.AddDat(CNIDPr.Val1); 00186 CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); 00187 } 00188 CmtyVH.GetDatV(CmtyVV); 00189 } 00190 00192 void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd){ 00193 THash<TInt,TIntV> CmtyVH; 00194 for (int i = 0; i < CmtyVVIn.Len(); i++) { 00195 CmtyVH.AddDat(i, CmtyVVIn[i]); 00196 } 00197 TAGMUtil::RewireCmtyNID(CmtyVH, Rnd); 00198 CmtyVH.GetDatV(CmtyVVOut); 00199 } 00200 00202 void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) { 00203 THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len()); 00204 TIntV NDegV; 00205 TIntV CDegV; 00206 for (int i = 0; i < CmtyVH.Len(); i++) { 00207 int CID = CmtyVH.GetKey(i); 00208 for (int j = 0; j < CmtyVH[i].Len(); j++) { 00209 int NID = CmtyVH[i][j]; 00210 NDegV.Add(NID); 00211 CDegV.Add(CID); 00212 } 00213 } 00214 TIntPrSet CNIDSet(CDegV.Len()); 00215 int c=0; 00216 while (c++ < 15 && CDegV.Len() > 1){ 00217 for (int i = 0; i < CDegV.Len(); i++) { 00218 int u = Rnd.GetUniDevInt(CDegV.Len()); 00219 int v = Rnd.GetUniDevInt(NDegV.Len()); 00220 if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } 00221 CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); 00222 if (u == CDegV.Len() - 1) { 00223 CDegV.DelLast(); 00224 } else { 00225 CDegV[u] = CDegV.Last(); 00226 CDegV.DelLast(); 00227 } 00228 if ( v == NDegV.Len() - 1) { 00229 NDegV.DelLast(); 00230 } else{ 00231 NDegV[v] = NDegV.Last(); 00232 NDegV.DelLast(); 00233 } 00234 } 00235 } 00236 for (int i = 0; i < CNIDSet.Len(); i++) { 00237 TIntPr CNIDPr = CNIDSet[i]; 00238 IAssert(CmtyVH.IsKey(CNIDPr.Val1)); 00239 NewCmtyVH.AddDat(CNIDPr.Val1); 00240 NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); 00241 } 00242 CmtyVH = NewCmtyVH; 00243 } 00244 00246 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) { 00247 CmtyVV.Gen(Kilo(100), 0); 00248 TSsParser Ss(InFNm, ssfWhiteSep); 00249 while (Ss.Next()) { 00250 if(Ss.GetFlds() > 0) { 00251 TIntV CmtyV; 00252 for (int i = 0; i < Ss.GetFlds(); i++) { 00253 if (Ss.IsInt(i)) { 00254 CmtyV.Add(Ss.GetInt(i)); 00255 } 00256 } 00257 CmtyVV.Add(CmtyV); 00258 } 00259 } 00260 CmtyVV.Pack(); 00261 printf("community loading completed (%d communities)\n",CmtyVV.Len()); 00262 00263 } 00264 00266 void TAGMUtil::DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV) { 00267 FILE* F = fopen(OutFNm.CStr(),"wt"); 00268 for (int i = 0; i < CmtyVV.Len(); i++) { 00269 for (int j = 0; j < CmtyVV[i].Len(); j++) { 00270 fprintf(F,"%d\t", (int) CmtyVV[i][j]); 00271 } 00272 fprintf(F,"\n"); 00273 } 00274 fclose(F); 00275 } 00276 00278 void TAGMUtil::DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH) { 00279 FILE* F = fopen(OutFNm.CStr(), "wt"); 00280 for (int c = 0; c < CmtyVV.Len(); c++) { 00281 for (int u = 0; u < CmtyVV[c].Len(); u++) { 00282 if (NIDNmH.IsKey(CmtyVV[c][u])){ 00283 fprintf(F, "%s\t", NIDNmH.GetDat(CmtyVV[c][u]).CStr()); 00284 } 00285 else { 00286 fprintf(F, "%d\t", (int) CmtyVV[c][u]); 00287 } 00288 } 00289 fprintf(F, "\n"); 00290 } 00291 fclose(F); 00292 } 00293 00295 int TAGMUtil::TotalMemberships(const TVec<TIntV>& CmtyVV){ 00296 int M = 0; 00297 for (int i = 0; i < CmtyVV.Len(); i++) { 00298 M += CmtyVV[i].Len(); 00299 } 00300 return M; 00301 } 00302 00304 void TAGMUtil::GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH) { 00305 NIDComVH.Clr(); 00306 for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){ 00307 for (int j = 0;j < HI.GetDat().Len(); j++) { 00308 int NID = HI.GetDat()[j]; 00309 NIDComVH.AddDat(NID)++; 00310 } 00311 } 00312 } 00313 00315 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV) { 00316 NIDComVH.Clr(); 00317 for (int i = 0; i < CmtyVV.Len(); i++){ 00318 int CID = i; 00319 for (int j = 0; j < CmtyVV[i].Len(); j++) { 00320 int NID = CmtyVV[i][j]; 00321 NIDComVH.AddDat(NID).AddKey(CID); 00322 } 00323 } 00324 } 00325 00327 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) { 00328 NIDComVH.Clr(); 00329 for (int u = 0; u < NIDV.Len(); u++) { 00330 NIDComVH.AddDat(NIDV[u]); 00331 } 00332 for (int i = 0; i < CmtyVV.Len(); i++){ 00333 int CID = i; 00334 for (int j = 0; j < CmtyVV[i].Len(); j++) { 00335 int NID = CmtyVV[i][j]; 00336 NIDComVH.AddDat(NID).AddKey(CID); 00337 } 00338 } 00339 } 00340 00341 00342 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV) { 00343 for (int i = 0; i < CmtyVV.Len(); i++){ 00344 int CID = i; 00345 for (TIntSet::TIter SI = CmtyVV[i].BegI(); SI < CmtyVV[i].EndI(); SI++) { 00346 int NID = SI.GetKey(); 00347 NIDComVH.AddDat(NID).AddKey(CID); 00348 } 00349 } 00350 } 00351 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { 00352 for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){ 00353 int CID = HI.GetKey(); 00354 for (int j = 0; j < HI.GetDat().Len(); j++) { 00355 int NID = HI.GetDat()[j]; 00356 NIDComVH.AddDat(NID).AddKey(CID); 00357 } 00358 } 00359 } 00360 00361 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { 00362 for (int i = 0; i < CmtyVH.Len(); i++){ 00363 int CID = CmtyVH.GetKey(i); 00364 for (int j = 0; j < CmtyVH[i].Len(); j++) { 00365 int NID = CmtyVH[i][j]; 00366 NIDComVH.AddDat(NID).Add(CID); 00367 } 00368 } 00369 } 00370 00371 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) { 00372 THash<TInt,TIntV> CmtyVH; 00373 for (int i = 0; i < CmtyVV.Len(); i++) { 00374 CmtyVH.AddDat(i, CmtyVV[i]); 00375 } 00376 GetNodeMembership(NIDComVH, CmtyVH); 00377 } 00378 00379 int TAGMUtil::Intersection(const TIntV& C1, const TIntV& C2) { 00380 TIntSet S1(C1), S2(C2); 00381 return TAGMUtil::Intersection(S1, S2); 00382 } 00383 00384 void TAGMUtil::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) { 00385 C.Gen(A.Len()); 00386 if (A.Len() < B.Len()) { 00387 for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 00388 if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey()); 00389 } else { 00390 for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 00391 if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey()); 00392 } 00393 } 00394 00395 int TAGMUtil::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) { 00396 int n = 0; 00397 if (A.Len() < B.Len()) { 00398 for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 00399 if (B.IsKey(it.GetKey())) n++; 00400 } else { 00401 for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 00402 if (A.IsKey(it.GetKey())) n++; 00403 } 00404 return n; 00405 } 00406 00408 void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) { 00409 THash<TInt,TIntV> NIDComVHAtr; 00410 TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr); 00411 00412 FILE* F = fopen(OutFNm.CStr(), "wt"); 00413 fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); 00414 fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); 00415 fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n"); 00416 if (CmtyVVAtr.Len() > 0) { 00417 fprintf(F, "\t<attributes class='node'>\n"); 00418 for (int c = 0; c < CmtyVVAtr.Len(); c++) { 00419 fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c); 00420 fprintf(F, "\t\t<default>false</default>\n"); 00421 fprintf(F, "\t\t</attribute>\n"); 00422 } 00423 fprintf(F, "\t</attributes>\n"); 00424 } 00425 fprintf(F, "\t\t<nodes>\n"); 00426 for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { 00427 int NID = NI.GetId(); 00428 TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; 00429 TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); 00430 00431 double Size = MinSz; 00432 double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len(); 00433 if (NIDComVHAtr.IsKey(NID)) { 00434 Size = MinSz + SizeStep * (double) NIDComVHAtr.GetDat(NID).Len(); 00435 } 00436 double Alpha = 1.0; 00437 fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); 00438 fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); 00439 fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); 00440 //specify attributes 00441 if (NIDComVHAtr.IsKey(NID)) { 00442 fprintf(F, "\t\t\t\t<attvalues>\n"); 00443 for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) { 00444 int CID = NIDComVHAtr.GetDat(NID)[c]; 00445 fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID); 00446 } 00447 fprintf(F, "\t\t\t\t</attvalues>\n"); 00448 } 00449 00450 fprintf(F, "\t\t\t</node>\n"); 00451 } 00452 fprintf(F, "\t\t</nodes>\n"); 00453 //plot edges 00454 int EID = 0; 00455 fprintf(F, "\t\t<edges>\n"); 00456 for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) { 00457 fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, EI.GetSrcNId(), EI.GetDstNId()); 00458 } 00459 fprintf(F, "\t\t</edges>\n"); 00460 fprintf(F, "\t</graph>\n"); 00461 fprintf(F, "</gexf>\n"); 00462 fclose(F); 00463 } 00464 00466 void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) { 00468 if (CmtyVV.Len() == 0) { return; } 00469 double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0; 00470 double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax; 00471 double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len(); 00472 THash<TInt,TIntV> NIDComVH; 00473 TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV); 00474 00475 FILE* F = fopen(OutFNm.CStr(), "wt"); 00476 fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); 00477 fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); 00478 fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n"); 00479 fprintf(F, "\t\t<nodes>\n"); 00480 for (int c = 0; c < CmtyVV.Len(); c++) { 00481 int CID = c; 00482 double XPos = c * CStep + CXMin; 00483 TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120); 00484 fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID); 00485 fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val); 00486 fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz); 00487 fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); 00488 fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); 00489 fprintf(F, "\t\t\t</node>\n"); 00490 } 00491 00492 for (int u = 0;u < NIDV.Len(); u++) { 00493 int NID = NIDV[u]; 00494 TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; 00495 double Size = MinSz; 00496 double XPos = NXMin + u * NStep; 00497 TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); 00498 double Alpha = 1.0; 00499 fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); 00500 fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); 00501 fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); 00502 fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); 00503 fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); 00504 fprintf(F, "\t\t\t</node>\n"); 00505 } 00506 fprintf(F, "\t\t</nodes>\n"); 00507 fprintf(F, "\t\t<edges>\n"); 00508 int EID = 0; 00509 for (int u = 0;u < NIDV.Len(); u++) { 00510 int NID = NIDV[u]; 00511 if (NIDComVH.IsKey(NID)) { 00512 for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) { 00513 int CID = NIDComVH.GetDat(NID)[c]; 00514 fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID); 00515 } 00516 } 00517 } 00518 fprintf(F, "\t\t</edges>\n"); 00519 fprintf(F, "\t</graph>\n"); 00520 fprintf(F, "</gexf>\n"); 00521 } 00522 00524 int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) { 00525 TRnd Rnd(RndSeed); 00526 int LambdaIter = 100; 00527 if (Graph->GetNodes() < 200) { LambdaIter = 1; } 00528 if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; } 00529 00530 //Find coms with large C 00531 TAGMFit AGMFitM(Graph, InitComs, RndSeed); 00532 if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); } 00533 AGMFitM.RunMCMC(MaxIter, LambdaIter, ""); 00534 00535 int TE = Graph->GetEdges(); 00536 TFltV RegV; 00537 RegV.Add(0.3 * TE); 00538 for (int r = 0; r < 25; r++) { 00539 RegV.Add(RegV.Last() * RegGap); 00540 } 00541 TFltPrV RegComsV, RegLV, RegBICV; 00542 TFltV LV, BICV; 00543 //record likelihood and number of communities with nonzero P_c 00544 for (int r = 0; r < RegV.Len(); r++) { 00545 double RegCoef = RegV[r]; 00546 AGMFitM.SetRegCoef(RegCoef); 00547 AGMFitM.MLEGradAscentGivenCAG(0.01, 1000); 00548 AGMFitM.SetRegCoef(0.0); 00549 00550 TVec<TIntV> EstCmtyVV; 00551 AGMFitM.GetCmtyVV(EstCmtyVV, 0.99); 00552 int NumLowQ = EstCmtyVV.Len(); 00553 RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ)); 00554 00555 if (EstCmtyVV.Len() > 0) { 00556 TAGMFit AFTemp(Graph, EstCmtyVV, Rnd); 00557 AFTemp.MLEGradAscentGivenCAG(0.001, 1000); 00558 double CurL = AFTemp.Likelihood(); 00559 LV.Add(CurL); 00560 BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0)); 00561 } 00562 else { 00563 break; 00564 } 00565 } 00566 // if likelihood does not exist or does not change at all, report the smallest number of communities or 2 00567 if (LV.Len() == 0) { return 2; } 00568 else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); } 00569 00570 00571 //normalize likelihood and BIC to 0~100 00572 int MaxL = 100; 00573 { 00574 TFltV& ValueV = LV; 00575 TFltPrV& RegValueV = RegLV; 00576 double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; 00577 for (int l = 0; l < ValueV.Len(); l++) { 00578 if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } 00579 if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } 00580 } 00581 while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); } 00582 double RangeVal = MaxValue - MinValue; 00583 for (int l = 0; l < ValueV.Len(); l++) { 00584 RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); 00585 } 00586 00587 } 00588 { 00589 TFltV& ValueV = BICV; 00590 TFltPrV& RegValueV = RegBICV; 00591 double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; 00592 for (int l = 0; l < ValueV.Len(); l++) { 00593 if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } 00594 if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } 00595 } 00596 while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); } 00597 double RangeVal = MaxValue - MinValue; 00598 for (int l = 0; l < ValueV.Len(); l++) { 00599 RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); 00600 } 00601 } 00602 00603 //fit logistic regression to normalized likelihood. 00604 TVec<TFltV> XV(RegLV.Len()); 00605 TFltV YV (RegLV.Len()); 00606 for (int l = 0; l < RegLV.Len(); l++) { 00607 XV[l] = TFltV::GetV(log(RegLV[l].Val1)); 00608 YV[l] = RegLV[l].Val2 / (double) MaxL; 00609 } 00610 TFltPrV LRVScaled, LRV; 00611 TLogRegFit LRFit; 00612 PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx); 00613 for (int l = 0; l < RegLV.Len(); l++) { 00614 LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l]))); 00615 LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2)); 00616 } 00617 00618 //estimate # communities from fitted logistic regression 00619 int NumComs = 0, IdxRegDrop = 0; 00620 double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25 00621 double LeftReg = 0.0, RightReg = 0.0; 00622 TFltV Theta; 00623 LRMd->GetTheta(Theta); 00624 RegDrop = (- Theta[1] - LRThres) / Theta[0]; 00625 if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; } 00626 else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; } 00627 else { //interpolate for RegDrop 00628 for (int i = 0; i < XV.Len(); i++) { 00629 if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; } 00630 } 00631 00632 if (IdxRegDrop == 0) { 00633 printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val); 00634 for (int l = 0; l < RegLV.Len(); l++) { 00635 printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val); 00636 } 00637 } 00638 IAssert(IdxRegDrop > 0); 00639 LeftReg = RegDrop - XV[IdxRegDrop - 1][0]; 00640 RightReg = XV[IdxRegDrop][0] - RegDrop; 00641 NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg)); 00642 00643 } 00644 //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2); 00645 printf("Num Coms:%d\n", NumComs); 00646 if (NumComs < 2) { NumComs = 2; } 00647 00648 if (PltFPrx.Len() > 0) { 00649 TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE); 00650 TGnuPlot GPC(PltFPrx + ".l"); 00651 GPC.AddPlot(RegComsV, gpwLinesPoints, "C"); 00652 GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood"); 00653 GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC"); 00654 GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)"); 00655 GPC.SetScale(gpsLog10X); 00656 GPC.SetTitle(PlotTitle); 00657 GPC.SavePng(PltFPrx + ".l.png"); 00658 } 00659 00660 return NumComs; 00661 } 00662 00663 double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) { 00664 const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges(); 00665 int Vol = 0, Cut = 0; 00666 double Phi = 0.0; 00667 for (int i = 0; i < CmtyS.Len(); i++) { 00668 if (! Graph->IsNode(CmtyS[i])) { continue; } 00669 TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]); 00670 for (int e = 0; e < NI.GetOutDeg(); e++) { 00671 if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; } 00672 } 00673 Vol += NI.GetOutDeg(); 00674 } 00675 // get conductance 00676 if (Vol != Edges2) { 00677 if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); } 00678 else if (Vol == 0) { Phi = 0.0; } 00679 else { Phi = Cut / double(Vol); } 00680 } else { 00681 if (Vol == Edges2) { Phi = 1.0; } 00682 } 00683 return Phi; 00684 } 00685 00686 void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) { 00687 TUNGraph::TNodeI NI = Graph->GetNI(NID); 00688 NBCmtyS.Gen(NI.GetDeg()); 00689 NBCmtyS.AddKey(NID); 00690 for (int e = 0; e < NI.GetDeg(); e++) { 00691 NBCmtyS.AddKey(NI.GetNbrNId(e)); 00692 } 00693 } 00694 00696 // Logistic regression by gradient ascent 00697 00698 void TLogRegFit::GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV){ 00699 bool HSingular = false; 00700 for (int i = 0; i < HVV.GetXDim(); i++) { 00701 if (HVV(i,i) == 0.0) { 00702 HVV(i,i) = 0.001; 00703 HSingular = true; 00704 } 00705 DeltaLV[i] = GradV[i] / HVV(i, i); 00706 } 00707 if (! HSingular) { 00708 if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite 00709 for (int r = 0; r < Theta.Len(); r++) { 00710 for (int c = 0; c < Theta.Len(); c++) { 00711 HVV(r, c) = - HVV(r, c); 00712 } 00713 } 00714 TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV); 00715 } 00716 else { 00717 TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV); 00718 for (int i = 0; i < DeltaLV.Len(); i++) { 00719 DeltaLV[i] = - DeltaLV[i]; 00720 } 00721 } 00722 00723 } 00724 } 00725 00726 void TLogRegFit::Hessian(TFltVV& HVV) { 00727 HVV.Gen(Theta.Len(), Theta.Len()); 00728 TFltV OutV; 00729 TLogRegPredict::GetCfy(X, OutV, Theta); 00730 for (int i = 0; i < X.Len(); i++) { 00731 for (int r = 0; r < Theta.Len(); r++) { 00732 HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]); 00733 for (int c = r + 1; c < Theta.Len(); c++) { 00734 HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]); 00735 HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]); 00736 } 00737 } 00738 } 00739 /* 00740 printf("\n"); 00741 for (int r = 0; r < Theta.Len(); r++) { 00742 for (int c = 0; c < Theta.Len(); c++) { 00743 printf("%f\t", HVV.At(r, c).Val); 00744 } 00745 printf("\n"); 00746 } 00747 */ 00748 } 00749 00750 int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) { 00751 TExeTm ExeTm; 00752 TFltV GradV(Theta.Len()), DeltaLV(Theta.Len()); 00753 TFltVV HVV(Theta.Len(), Theta.Len()); 00754 int iter = 0; 00755 double MinVal = -1e10, MaxVal = 1e10; 00756 for(iter = 0; iter < MaxStep; iter++) { 00757 Gradient(GradV); 00758 Hessian(HVV); 00759 GetNewtonStep(HVV, GradV, DeltaLV); 00760 double Increment = TLinAlg::DotProduct(GradV, DeltaLV); 00761 if (Increment <= ChangeEps) {break;} 00762 double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1); 00763 for(int i = 0; i < Theta.Len(); i++) { 00764 double Change = LearnRate * DeltaLV[i]; 00765 Theta[i] += Change; 00766 if(Theta[i] < MinVal) { Theta[i] = MinVal;} 00767 if(Theta[i] > MaxVal) { Theta[i] = MaxVal;} 00768 } 00769 } 00770 if (! PlotNm.Empty()) { 00771 printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); 00772 } 00773 00774 return iter; 00775 } 00776 00777 int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) { 00778 TExeTm ExeTm; 00779 TFltV GradV(Theta.Len()); 00780 int iter = 0; 00781 TIntFltPrV IterLV, IterGradNormV; 00782 double MinVal = -1e10, MaxVal = 1e10; 00783 double GradCutOff = 100000; 00784 for(iter = 0; iter < MaxStep; iter++) { 00785 Gradient(GradV); //if gradient is going out of the boundary, cut off 00786 for(int i = 0; i < Theta.Len(); i++) { 00787 if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; } 00788 if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; } 00789 if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; } 00790 if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; } 00791 } 00792 double Alpha = 0.15, Beta = 0.9; 00793 //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); 00794 double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); 00795 if (TLinAlg::Norm(GradV) < ChangeEps) { break; } 00796 for(int i = 0; i < Theta.Len(); i++) { 00797 double Change = LearnRate * GradV[i]; 00798 Theta[i] += Change; 00799 if(Theta[i] < MinVal) { Theta[i] = MinVal;} 00800 if(Theta[i] > MaxVal) { Theta[i] = MaxVal;} 00801 } 00802 if (! PlotNm.Empty()) { 00803 double L = Likelihood(); 00804 IterLV.Add(TIntFltPr(iter, L)); 00805 IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV))); 00806 } 00807 00808 } 00809 if (! PlotNm.Empty()) { 00810 TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); 00811 TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q"); 00812 printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); 00813 } 00814 return iter; 00815 } 00816 00817 double TLogRegFit::GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta) { 00818 double StepSize = 1.0; 00819 double InitLikelihood = Likelihood(); 00820 IAssert(Theta.Len() == DeltaV.Len()); 00821 TFltV NewThetaV(Theta.Len()); 00822 double MinVal = -1e10, MaxVal = 1e10; 00823 for(int iter = 0; ; iter++) { 00824 for (int i = 0; i < Theta.Len(); i++){ 00825 NewThetaV[i] = Theta[i] + StepSize * DeltaV[i]; 00826 if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; } 00827 if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; } 00828 } 00829 if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) { 00830 StepSize *= Beta; 00831 } else { 00832 break; 00833 } 00834 } 00835 return StepSize; 00836 } 00837 00838 double TLogRegFit::Likelihood(const TFltV& NewTheta) { 00839 TFltV OutV; 00840 TLogRegPredict::GetCfy(X, OutV, NewTheta); 00841 double L = 0; 00842 for (int r = 0; r < OutV.Len(); r++) { 00843 L += Y[r] * log(OutV[r]); 00844 L += (1 - Y[r]) * log(1 - OutV[r]); 00845 } 00846 return L; 00847 } 00848 00849 void TLogRegFit::Gradient(TFltV& GradV) { 00850 TFltV OutV; 00851 TLogRegPredict::GetCfy(X, OutV, Theta); 00852 GradV.Gen(M); 00853 for (int r = 0; r < X.Len(); r++) { 00854 //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val); 00855 for (int m = 0; m < M; m++) { 00856 GradV[m] += (Y[r] - OutV[r]) * X[r][m]; 00857 } 00858 } 00859 //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); } 00860 } 00861 00862 PLogRegPredict TLogRegFit::CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) { 00863 00864 X = XPt; 00865 Y = yPt; 00866 IAssert(X.Len() == Y.Len()); 00867 if (Intercept == false) { // if intercept is not included, add it 00868 for (int r = 0; r < X.Len(); r++) { X[r].Add(1); } 00869 } 00870 M = X[0].Len(); 00871 for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); } 00872 for (int r = 0; r < Y.Len(); r++) { 00873 if (Y[r] >= 0.99999) { Y[r] = 0.99999; } 00874 if (Y[r] <= 0.00001) { Y[r] = 0.00001; } 00875 } 00876 Theta.Gen(M); 00877 MLENewton(ChangeEps, MaxStep, PlotNm); 00878 return new TLogRegPredict(Theta); 00879 }; 00880 00881 PLogRegPredict TLogRegFit::CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) { 00882 X = XPt; 00883 Y = yPt; 00884 IAssert(X.Len() == Y.Len()); 00885 if (Intercept == false) { // if intercept is not included, add it 00886 for (int r = 0; r < X.Len(); r++) { X[r].Add(1); } 00887 } 00888 M = X[0].Len(); 00889 for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); } 00890 for (int r = 0; r < Y.Len(); r++) { 00891 if (Y[r] >= 0.99999) { Y[r] = 0.99999; } 00892 if (Y[r] <= 0.00001) { Y[r] = 0.00001; } 00893 } 00894 Theta.Gen(M); 00895 MLEGradient(ChangeEps, MaxStep, PlotNm); 00896 return new TLogRegPredict(Theta); 00897 }; 00898 00900 // Logistic-Regression-Model 00901 00902 double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) { 00903 int len = AttrV.Len(); 00904 double res = 0; 00905 if (len < NewTheta.Len()) { res = NewTheta.Last(); } //if feature vector is shorter, add an intercept 00906 for (int i = 0; i < len; i++) { 00907 if (i < NewTheta.Len()) { res += AttrV[i] * NewTheta[i]; } 00908 } 00909 double mu = 1 / (1 + exp(-res)); 00910 return mu; 00911 } 00912 00913 void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) { 00914 OutV.Gen(X.Len()); 00915 for (int r = 0; r < X.Len(); r++) { 00916 OutV[r] = GetCfy(X[r], NewTheta); 00917 } 00918 }