SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Directed Graph Matrix -- sparse {0,1} row matrix 00003 bool TNGraphMtx::CheckNodeIds() { 00004 for (int NId = 0; NId < Graph->GetNodes(); NId++) { 00005 if (! Graph->IsNode(NId)) { return false; } 00006 } 00007 return true; 00008 } 00009 00010 TNGraphMtx::TNGraphMtx(const PNGraph& GraphPt) : Graph() { 00011 Graph = GraphPt; 00012 if (! CheckNodeIds()) { 00013 printf(" Renumbering nodes.\n"); 00014 Graph = TSnap::ConvertGraph<PNGraph>(GraphPt, true); 00015 } 00016 } 00017 00018 // Result = A * B(:,ColId) 00019 void TNGraphMtx::PMultiply(const TFltVV& B, int ColId, TFltV& Result) const { 00020 const int RowN = GetRows(); 00021 Assert(B.GetRows() >= RowN && Result.Len() >= RowN); 00022 const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH; 00023 for (int j = 0; j < RowN; j++) { 00024 const TIntV& RowV = NodeH[j].OutNIdV; 00025 Result[j] = 0.0; 00026 for (int i = 0; i < RowV.Len(); i++) { 00027 Result[j] += B(RowV[i], ColId); 00028 } 00029 } 00030 } 00031 00032 // Result = A * Vec 00033 void TNGraphMtx::PMultiply(const TFltV& Vec, TFltV& Result) const { 00034 const int RowN = GetRows(); 00035 Assert(Vec.Len() >= RowN && Result.Len() >= RowN); 00036 const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH; 00037 for (int j = 0; j < RowN; j++) { 00038 const TIntV& RowV = NodeH[j].OutNIdV; 00039 Result[j] = 0.0; 00040 for (int i = 0; i < RowV.Len(); i++) { 00041 Result[j] += Vec[RowV[i]]; 00042 } 00043 } 00044 } 00045 00046 // Result = A' * B(:,ColId) 00047 void TNGraphMtx::PMultiplyT(const TFltVV& B, int ColId, TFltV& Result) const { 00048 const int ColN = GetCols(); 00049 Assert(B.GetRows() >= ColN && Result.Len() >= ColN); 00050 const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH; 00051 for (int i = 0; i < ColN; i++) Result[i] = 0.0; 00052 for (int j = 0; j < ColN; j++) { 00053 const TIntV& RowV = NodeH[j].OutNIdV; 00054 for (int i = 0; i < RowV.Len(); i++) { 00055 Result[RowV[i]] += B(j, ColId); 00056 } 00057 } 00058 } 00059 00060 // Result = A' * Vec 00061 void TNGraphMtx::PMultiplyT(const TFltV& Vec, TFltV& Result) const { 00062 const int RowN = GetRows(); 00063 Assert(Vec.Len() >= RowN && Result.Len() >= RowN); 00064 const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH; 00065 for (int i = 0; i < RowN; i++) Result[i] = 0.0; 00066 for (int j = 0; j < RowN; j++) { 00067 const TIntV& RowV = NodeH[j].OutNIdV; 00068 for (int i = 0; i < RowV.Len(); i++) { 00069 Result[RowV[i]] += Vec[j]; 00070 } 00071 } 00072 } 00073 00075 // Undirected Graph Matrix -- sparse {0,1} row matrix 00076 bool TUNGraphMtx::CheckNodeIds() { 00077 for (int NId = 0; NId < Graph->GetNodes(); NId++) { 00078 if (! Graph->IsNode(NId)) { return false; } 00079 } 00080 return true; 00081 } 00082 00083 TUNGraphMtx::TUNGraphMtx(const PUNGraph& GraphPt) : Graph() { 00084 Graph = GraphPt; 00085 if (! CheckNodeIds()) { 00086 printf(" Renumbering %d nodes....", GraphPt->GetNodes()); 00087 TExeTm ExeTm; 00088 Graph = TSnap::ConvertGraph<PUNGraph>(GraphPt, true); 00089 /*TIntSet NIdSet; 00090 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00091 NIdSet.AddKey(NI.GetId()); 00092 } 00093 Graph = TUNGraph::New(); *Graph = *GraphPt; */ 00094 printf("done [%s]\n", ExeTm.GetStr()); 00095 } 00096 } 00097 00098 // Result = A * B(:,ColId) 00099 void TUNGraphMtx::PMultiply(const TFltVV& B, int ColId, TFltV& Result) const { 00100 const int RowN = GetRows(); 00101 Assert(B.GetRows() >= RowN && Result.Len() >= RowN); 00102 const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH; 00103 for (int j = 0; j < RowN; j++) { 00104 const TIntV& RowV = NodeH[j].NIdV; 00105 Result[j] = 0.0; 00106 for (int i = 0; i < RowV.Len(); i++) { 00107 Result[j] += B(RowV[i], ColId); 00108 } 00109 } 00110 } 00111 00112 // Result = A * Vec 00113 void TUNGraphMtx::PMultiply(const TFltV& Vec, TFltV& Result) const { 00114 const int RowN = GetRows(); 00115 Assert(Vec.Len() >= RowN && Result.Len() >= RowN); 00116 const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH; 00117 for (int j = 0; j < RowN; j++) { 00118 const TIntV& RowV = NodeH[j].NIdV; 00119 Result[j] = 0.0; 00120 for (int i = 0; i < RowV.Len(); i++) { 00121 Result[j] += Vec[RowV[i]]; 00122 } 00123 } 00124 } 00125 00126 // Result = A' * B(:,ColId) 00127 void TUNGraphMtx::PMultiplyT(const TFltVV& B, int ColId, TFltV& Result) const { 00128 const int ColN = GetCols(); 00129 Assert(B.GetRows() >= ColN && Result.Len() >= ColN); 00130 const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH; 00131 for (int i = 0; i < ColN; i++) Result[i] = 0.0; 00132 for (int j = 0; j < ColN; j++) { 00133 const TIntV& RowV = NodeH[j].NIdV; 00134 for (int i = 0; i < RowV.Len(); i++) { 00135 Result[RowV[i]] += B(j, ColId); 00136 } 00137 } 00138 } 00139 00140 // Result = A' * Vec 00141 void TUNGraphMtx::PMultiplyT(const TFltV& Vec, TFltV& Result) const { 00142 const int RowN = GetRows(); 00143 Assert(Vec.Len() >= RowN && Result.Len() >= RowN); 00144 const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH; 00145 for (int i = 0; i < RowN; i++) Result[i] = 0.0; 00146 for (int j = 0; j < RowN; j++) { 00147 const TIntV& RowV = NodeH[j].NIdV; 00148 for (int i = 0; i < RowV.Len(); i++) { 00149 Result[RowV[i]] += Vec[j]; 00150 } 00151 } 00152 } 00153 00155 // Graphs Singular Value Decomposition 00156 namespace TSnap { 00157 00158 void SetAllInvertSign(TFltV& ValV, const double& Val) { 00159 for (int i = 0; i < ValV.Len(); i++) { 00160 ValV[i] = -ValV[i]; 00161 } 00162 } 00163 bool IsAllValVNeg(TFltV& ValV, const bool& InvertSign) { 00164 bool IsAllNeg=true; 00165 for (int i = 0; i < ValV.Len(); i++) { 00166 if (ValV[i]>0.0) { IsAllNeg=false; break; } 00167 } 00168 if (IsAllNeg && InvertSign) { 00169 for (int i = 0; i < ValV.Len(); i++) { 00170 ValV[i] = -ValV[i]; } 00171 } 00172 return IsAllNeg; 00173 } 00174 00175 void GetSngVals(const PNGraph& Graph, const int& SngVals, TFltV& SngValV) { 00176 const int Nodes = Graph->GetNodes(); 00177 IAssert(SngVals > 0); 00178 if (Nodes < 100) { 00179 // perform full SVD 00180 TFltVV AdjMtx(Nodes+1, Nodes+1); 00181 TFltVV LSingV, RSingV; 00182 TIntH NodeIdH; 00183 // create adjecency matrix 00184 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00185 NodeIdH.AddKey(NodeI.GetId()); } 00186 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00187 const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; 00188 for (int e = 0; e < NodeI.GetOutDeg(); e++) { 00189 const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges 00190 if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; 00191 } 00192 } 00193 try { // can fail to converge but results seem to be good 00194 TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } 00195 catch(...) { 00196 printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } 00197 } else { 00198 // Lanczos 00199 TNGraphMtx GraphMtx(Graph); 00200 int CalcVals = int(2*SngVals); 00201 //if (CalcVals > Nodes) { CalcVals = int(2*Nodes); } 00202 //if (CalcVals > Nodes) { CalcVals = Nodes; } 00203 //while (SngValV.Len() < SngVals && CalcVals < 10*SngVals) { 00204 try { 00205 if (SngVals > 4) { 00206 TSparseSVD::SimpleLanczosSVD(GraphMtx, 2*SngVals, SngValV, false); } 00207 else { TFltVV LSingV, RSingV; // this is much more precise, but also much slower 00208 TSparseSVD::LanczosSVD(GraphMtx, SngVals, 3*SngVals, ssotFull, SngValV, LSingV, RSingV); } 00209 } 00210 catch(...) { 00211 printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", 2*SngVals, SngValV.Len()); } 00212 if (SngValV.Len() < SngVals) { 00213 printf(" ***TRIED %d GOT %d values** \n", CalcVals, SngValV.Len()); } 00214 // CalcVals += SngVals; 00215 //} 00216 } 00217 SngValV.Sort(false); 00218 //if (SngValV.Len() > SngVals) { 00219 // SngValV.Del(SngVals, SngValV.Len()-1); } 00220 //else { 00221 // while (SngValV.Len() < SngVals) SngValV.Add(1e-6); } 00222 //IAssert(SngValV.Len() == SngVals); 00223 } 00224 00225 void GetSngVec(const PNGraph& Graph, TFltV& LeftSV, TFltV& RightSV) { 00226 const int Nodes = Graph->GetNodes(); 00227 TFltVV LSingV, RSingV; 00228 TFltV SngValV; 00229 if (Nodes < 500) { 00230 // perform full SVD 00231 TFltVV AdjMtx(Nodes+1, Nodes+1); 00232 TIntH NodeIdH; 00233 // create adjecency matrix 00234 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00235 NodeIdH.AddKey(NodeI.GetId()); } 00236 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00237 const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; 00238 for (int e = 0; e < NodeI.GetOutDeg(); e++) { 00239 const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges 00240 if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; 00241 } 00242 } 00243 try { // can fail to converge but results seem to be good 00244 TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } 00245 catch(...) { 00246 printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } 00247 } else { // Lanczos 00248 TNGraphMtx GraphMtx(Graph); 00249 TSparseSVD::LanczosSVD(GraphMtx, 1, 8, ssotFull, SngValV, LSingV, RSingV); 00250 } 00251 TFlt MxSngVal = TFlt::Mn; 00252 int ValN = 0; 00253 for (int i = 0; i < SngValV.Len(); i++) { 00254 if (MxSngVal < SngValV[i]) { MxSngVal = SngValV[i]; ValN = i; } } 00255 LSingV.GetCol(ValN, LeftSV); 00256 RSingV.GetCol(ValN, RightSV); 00257 IsAllValVNeg(LeftSV, true); 00258 IsAllValVNeg(RightSV, true); 00259 } 00260 00261 void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) { 00262 const int Nodes = Graph->GetNodes(); 00263 SngValV.Clr(); 00264 LeftSV.Clr(); 00265 RightSV.Clr(); 00266 TFltVV LSingV, RSingV; 00267 if (Nodes < 100) { 00268 // perform full SVD 00269 TFltVV AdjMtx(Nodes+1, Nodes+1); 00270 TIntH NodeIdH; 00271 // create adjecency matrix (1-based) 00272 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00273 NodeIdH.AddKey(NodeI.GetId()); } 00274 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00275 const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1; 00276 for (int e = 0; e < NodeI.GetOutDeg(); e++) { 00277 const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1; // no self edges 00278 if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; 00279 } 00280 } 00281 try { // can fail to converge but results seem to be good 00282 TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); 00283 } catch(...) { 00284 printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); 00285 } 00286 } else { // Lanczos 00287 TNGraphMtx GraphMtx(Graph); 00288 TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV); 00289 //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt"); 00290 //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX 00291 } 00292 TFltIntPrV SngValIdV; 00293 for (int i = 0; i < SngValV.Len(); i++) { 00294 SngValIdV.Add(TFltIntPr(SngValV[i], i)); 00295 } 00296 SngValIdV.Sort(false); 00297 SngValV.Sort(false); 00298 for (int v = 0; v < SngValIdV.Len(); v++) { 00299 LeftSV.Add(); 00300 LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last()); 00301 RightSV.Add(); 00302 RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last()); 00303 } 00304 IsAllValVNeg(LeftSV[0], true); 00305 IsAllValVNeg(RightSV[0], true); 00306 } 00307 00308 void GetEigVals(const PUNGraph& Graph, const int& EigVals, TFltV& EigValV) { 00309 // Lanczos 00310 TUNGraphMtx GraphMtx(Graph); 00311 //const int Nodes = Graph->GetNodes(); 00312 //int CalcVals = int(2*EigVals); 00313 //if (CalcVals > Nodes) { CalcVals = Nodes; } 00314 //while (EigValV.Len() < EigVals && CalcVals < 3*EigVals) { 00315 try { 00316 if (EigVals > 4) { 00317 TSparseSVD::SimpleLanczos(GraphMtx, 2*EigVals, EigValV, false); } 00318 else { TFltVV EigVecVV; // this is much more precise, but also much slower 00319 TSparseSVD::Lanczos(GraphMtx, EigVals, 3*EigVals, ssotFull, EigValV, EigVecVV, false); } 00320 } 00321 catch(...) { 00322 printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); } 00323 if (EigValV.Len() < EigVals) { 00324 printf(" ***TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); } 00325 // CalcVals += EigVals; 00326 //} 00327 EigValV.Sort(false); 00328 /*if (EigValV.Len() > EigVals) { 00329 EigValV.Del(EigVals, EigValV.Len()-1); } 00330 else { 00331 while (EigValV.Len() < EigVals) EigValV.Add(1e-6); 00332 } 00333 IAssert(EigValV.Len() == EigVals);*/ 00334 } 00335 00336 void GetEigVec(const PUNGraph& Graph, TFltV& EigVecV) { 00337 TUNGraphMtx GraphMtx(Graph); 00338 TFltV EigValV; 00339 TFltVV EigVecVV; 00340 TSparseSVD::Lanczos(GraphMtx, 1, 8, ssotFull, EigValV, EigVecVV, false); 00341 EigVecVV.GetCol(0, EigVecV); // vector components are not sorted!!! 00342 IsAllValVNeg(EigVecV, true); 00343 } 00344 00345 // to get first few eigenvectors 00346 void GetEigVec(const PUNGraph& Graph, const int& EigVecs, TFltV& EigValV, TVec<TFltV>& EigVecV) { 00347 const int Nodes = Graph->GetNodes(); 00348 // Lanczos 00349 TUNGraphMtx GraphMtx(Graph); 00350 int CalcVals = int(2*EigVecs); 00351 if (CalcVals > Nodes) { CalcVals = Nodes; } 00352 TFltVV EigVecVV; 00353 //while (EigValV.Len() < EigVecs && CalcVals < 10*EigVecs) { 00354 try { 00355 TSparseSVD::Lanczos(GraphMtx, EigVecs, 2*EigVecs, ssotFull, EigValV, EigVecVV, false); } 00356 catch(...) { 00357 printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } 00358 if (EigValV.Len() < EigVecs) { 00359 printf(" ***TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } 00360 // CalcVals += EigVecs; 00361 //} 00362 TFltIntPrV EigValIdV; 00363 for (int i = 0; i < EigValV.Len(); i++) { 00364 EigValIdV.Add(TFltIntPr(EigValV[i], i)); 00365 } 00366 EigValIdV.Sort(false); 00367 EigValV.Sort(false); 00368 for (int v = 0; v < EigValIdV.Len(); v++) { // vector components are not sorted!!! 00369 EigVecV.Add(); 00370 EigVecVV.GetCol(EigValIdV[v].Val2, EigVecV.Last()); 00371 } 00372 IsAllValVNeg(EigVecV[0], true); 00373 } 00374 00375 // Inverse participation ratio: normalize EigVec to have L2=1 and then I=sum_k EigVec[i]^4 00376 // see Spectra of "real-world" graphs: Beyond the semicircle law by Farkas, Derenyi, Barabasi and Vicsek 00377 void GetInvParticipRat(const PUNGraph& Graph, int MaxEigVecs, int TimeLimit, TFltPrV& EigValIprV) { 00378 TUNGraphMtx GraphMtx(Graph); 00379 TFltVV EigVecVV; 00380 TFltV EigValV; 00381 TExeTm ExeTm; 00382 if (MaxEigVecs<=1) { MaxEigVecs=1000; } 00383 int EigVecs = TMath::Mn(Graph->GetNodes(), MaxEigVecs); 00384 printf("start %d vecs...", EigVecs); 00385 try { 00386 TSparseSVD::Lanczos2(GraphMtx, EigVecs, TimeLimit, ssotFull, EigValV, EigVecVV, false); 00387 } catch(...) { 00388 printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", EigVecs, EigValV.Len()); } 00389 printf(" ***TRIED %d GOT %d values in %s\n", EigVecs, EigValV.Len(), ExeTm.GetStr()); 00390 TFltV EigVec; 00391 EigValIprV.Clr(); 00392 if (EigValV.Empty()) { return; } 00393 for (int v = 0; v < EigVecVV.GetCols(); v++) { 00394 EigVecVV.GetCol(v, EigVec); 00395 EigValIprV.Add(TFltPr(EigValV[v], TSnapDetail::GetInvParticipRatEig(EigVec))); 00396 } 00397 EigValIprV.Sort(); 00398 } 00399 00400 namespace TSnapDetail { 00401 double GetInvParticipRatEig(const TFltV& EigVec) { 00402 double Sum2=0.0, Sum4=0.0; 00403 for (int i = 0; i < EigVec.Len(); i++) { 00404 Sum2 += EigVec[i]*EigVec[i]; 00405 Sum4 += pow(EigVec[i].Val, 4.0); 00406 } 00407 return Sum4/(Sum2*Sum2); 00408 } 00409 } // namespace TSnapDetail 00410 00411 }; // namespace TSnap