SNAP Library 2.0, User Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Loading and saving graphs from/to various file formats. 00003 namespace TSnap { 00004 00006 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1); 00008 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator); 00010 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1); 00012 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH); 00014 template <class PGraph> PGraph LoadConnList(const TStr& InFNm); 00016 template <class PGraph> PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH); 00017 00019 00021 template <class PGraph> PGraph LoadPajek(const TStr& InFNm); 00023 PNGraph LoadDyNet(const TStr& FNm); 00025 TVec<PNGraph> LoadDyNetGraphV(const TStr& FNm); 00026 00027 //TODO: Sparse/Dense adjacency matrix which values we threshold at Thresh to obtain an adjacency matrix. 00028 //template <class PGraph> PGraph LoadAdjMtx(const TStr& FNm, const int Thresh); 00029 //TODO: Load from a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language) 00030 //template <class PGraph> PGraph LoadGml(const TStr& FNm, const int Thresh); 00031 00032 00034 template <class PGraph> void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr()); 00036 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm); 00038 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH); 00040 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH); 00042 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH); 00044 template <class PGraph> void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm); 00046 00049 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr(), const bool& NodeLabels=false, const TIntStrH& NIdColorH=TIntStrH()); 00051 00054 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH); 00055 00056 //TODO: Save to a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language) 00057 //template <class PGraph> SaveGml(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc); 00058 00060 // Implementation 00061 00063 00067 template <class PGraph> 00068 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId) { 00069 TSsParser Ss(InFNm, ssfWhiteSep, true, true, true); 00070 PGraph Graph = PGraph::TObj::New(); 00071 int SrcNId, DstNId; 00072 while (Ss.Next()) { 00073 if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; } 00074 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00075 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00076 Graph->AddEdge(SrcNId, DstNId); 00077 } 00078 Graph->Defrag(); 00079 return Graph; 00080 } 00081 00083 00087 template <class PGraph> 00088 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator) { 00089 TSsParser Ss(InFNm, Separator); 00090 PGraph Graph = PGraph::TObj::New(); 00091 int SrcNId, DstNId; 00092 while (Ss.Next()) { 00093 if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; } 00094 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00095 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00096 Graph->AddEdge(SrcNId, DstNId); 00097 } 00098 Graph->Defrag(); 00099 return Graph; 00100 } 00101 00103 00108 template <class PGraph> 00109 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId) { 00110 TSsParser Ss(InFNm, ssfWhiteSep); 00111 PGraph Graph = PGraph::TObj::New(); 00112 TStrHash<TInt> StrToNIdH(Mega(1), true); // hash-table mapping strings to integer node ids 00113 while (Ss.Next()) { 00114 const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]); 00115 const int DstNId = StrToNIdH.AddKey(Ss[DstColId]); 00116 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00117 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00118 Graph->AddEdge(SrcNId, DstNId); 00119 } 00120 Graph->Defrag(); 00121 return Graph; 00122 } 00123 00125 00131 template <class PGraph> 00132 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH) { 00133 TSsParser Ss(InFNm, ssfWhiteSep); 00134 PGraph Graph = PGraph::TObj::New(); 00135 while (Ss.Next()) { 00136 const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]); 00137 const int DstNId = StrToNIdH.AddKey(Ss[DstColId]); 00138 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00139 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00140 Graph->AddEdge(SrcNId, DstNId); 00141 } 00142 Graph->Defrag(); 00143 return Graph; 00144 } 00145 00147 00151 template <class PGraph> 00152 PGraph LoadConnList(const TStr& InFNm) { 00153 TSsParser Ss(InFNm, ssfWhiteSep, true, true, true); 00154 PGraph Graph = PGraph::TObj::New(); 00155 while (Ss.Next()) { 00156 if (! Ss.IsInt(0)) { continue; } 00157 const int SrcNId = Ss.GetInt(0); 00158 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00159 for (int dst = 1; dst < Ss.Len(); dst++) { 00160 const int DstNId = Ss.GetInt(dst); 00161 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00162 Graph->AddEdge(SrcNId, DstNId); 00163 } 00164 } 00165 Graph->Defrag(); 00166 return Graph; 00167 } 00168 00170 00175 template <class PGraph> 00176 PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH) { 00177 TSsParser Ss(InFNm, ssfWhiteSep, true, true, true); 00178 PGraph Graph = PGraph::TObj::New(); 00179 while (Ss.Next()) { 00180 const int SrcNId = StrToNIdH.AddDatId(Ss[0]); 00181 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00182 for (int dst = 1; dst < Ss.Len(); dst++) { 00183 const int DstNId = StrToNIdH.AddDatId(Ss[dst]); 00184 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00185 Graph->AddEdge(SrcNId, DstNId); 00186 } 00187 } 00188 Graph->Defrag(); 00189 return Graph; 00190 } 00191 00192 template <class PGraph> 00193 PGraph LoadPajek(const TStr& InFNm) { 00194 PGraph Graph = PGraph::TObj::New(); 00195 TSsParser Ss(InFNm, ssfSpaceSep, true, true, true); 00196 while ((Ss.Len()==0 || strstr(Ss[0], "*vertices") == NULL) && ! Ss.Eof()) { 00197 Ss.Next(); Ss.ToLc(); } 00198 // nodes 00199 bool EdgeList = true; 00200 EAssert(strstr(Ss[0], "*vertices") != NULL); 00201 while (Ss.Next()) { 00202 Ss.ToLc(); 00203 if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment 00204 if (strstr(Ss[0], "*arcslist")!=NULL || strstr(Ss[0],"*edgeslist")!=NULL) { EdgeList=false; break; } 00205 if (strstr(Ss[0], "*arcs")!=NULL || strstr(Ss[0],"*edges")!=NULL) { break; } // arcs are directed, edges are undirected 00206 Graph->AddNode(Ss.GetInt(0)); 00207 } 00208 // edges 00209 while (Ss.Next()) { 00210 if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment 00211 if (Ss.Len()>0 && Ss[0][0] == '*') { break; } 00212 if (EdgeList) { 00213 // <source> <destination> <weight> 00214 if (Ss.Len() >= 3 && Ss.IsInt(0) && Ss.IsInt(1)) { 00215 Graph->AddEdge(Ss.GetInt(0), Ss.GetInt(1)); } 00216 } else { 00217 // <source> <destination1> <destination2> <destination3> ... 00218 const int SrcNId = Ss.GetInt(0); 00219 for (int i = 1; i < Ss.Len(); i++) { 00220 Graph->AddEdge(SrcNId, Ss.GetInt(i)); } 00221 } 00222 } 00223 return Graph; 00224 } 00225 00226 template <class PGraph> 00227 void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc) { 00228 FILE *F = fopen(OutFNm.CStr(), "wt"); 00229 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# Directed graph: %s \n", OutFNm.CStr()); } 00230 else { fprintf(F, "# Undirected graph (each unordered pair of nodes is saved once): %s\n", OutFNm.CStr()); } 00231 if (! Desc.Empty()) { fprintf(F, "# %s\n", Desc.CStr()); } 00232 fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges()); 00233 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# FromNodeId\tToNodeId\n"); } 00234 else { fprintf(F, "# NodeId\tNodeId\n"); } 00235 for (typename PGraph::TObj::TEdgeI ei = Graph->BegEI(); ei < Graph->EndEI(); ei++) { 00236 fprintf(F, "%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId()); 00237 } 00238 fclose(F); 00239 } 00240 00241 template <class PGraph> 00242 void SavePajek(const PGraph& Graph, const TStr& OutFNm) { 00243 TIntH NIdToIdH(Graph->GetNodes(), true); 00244 FILE *F = fopen(OutFNm.CStr(), "wt"); 00245 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00246 int i = 0; 00247 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00248 fprintf(F, "%d \"%d\" ic Red fos 10\n", i+1, NI.GetId()); // ic: internal color, fos: font size 00249 NIdToIdH.AddDat(NI.GetId(), i+1); 00250 } 00251 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00252 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00253 else { 00254 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00255 } 00256 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00257 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00258 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00259 fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); // width=1 00260 } 00261 fclose(F); 00262 } 00263 00266 template <class PGraph> 00267 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH) { 00268 TIntH NIdToIdH(Graph->GetNodes(), true); 00269 FILE *F = fopen(OutFNm.CStr(), "wt"); 00270 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00271 int i = 0; 00272 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00273 fprintf(F, "%d \"%d\" ic %s fos 10\n", i+1, NI.GetId(), 00274 NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red"); 00275 NIdToIdH.AddDat(NI.GetId(), i+1); 00276 } 00277 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00278 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00279 else { 00280 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00281 } 00282 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00283 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00284 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00285 fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); 00286 } 00287 fclose(F); 00288 } 00289 00293 template <class PGraph> 00294 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH) { 00295 TIntH NIdToIdH(Graph->GetNodes(), true); 00296 FILE *F = fopen(OutFNm.CStr(), "wt"); 00297 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00298 int i = 0; 00299 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00300 fprintf(F, "%d \"%s\" ic %s fos 10\n", i+1, 00301 NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(), 00302 NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red"); 00303 NIdToIdH.AddDat(NI.GetId(), i+1); 00304 } 00305 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00306 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00307 else { 00308 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00309 } 00310 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00311 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00312 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00313 fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); 00314 } 00315 fclose(F); 00316 } 00317 00322 template <class PGraph> 00323 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH) { 00324 CAssert(HasGraphFlag(typename PGraph::TObj, gfMultiGraph)); // network needs to have edge ids 00325 TIntH NIdToIdH(Graph->GetNodes(), true); 00326 FILE *F = fopen(OutFNm.CStr(), "wt"); 00327 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00328 int i = 0; 00329 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00330 fprintf(F, "%d \"%s\" ic %s fos 10\n", i+1, 00331 NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(), 00332 NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red"); 00333 NIdToIdH.AddDat(NI.GetId(), i+1); 00334 } 00335 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00336 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00337 else { 00338 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00339 } 00340 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00341 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00342 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00343 fprintf(F, "%d %d 1 c %s\n", SrcNId, DstNId, 1, 00344 EIdColorH.IsKey(EI.GetId()) ? EIdColorH.GetDat(EI.GetId()).CStr() : "Black"); 00345 } 00346 fclose(F); 00347 } 00348 00350 template <class PGraph> 00351 void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm) { 00352 FILE *F = fopen(OutFNm.CStr(), "wt"); 00353 TIntSet NIdSet(Graph->GetNodes()); // so that 00354 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00355 NIdSet.AddKey(NI.GetId()); 00356 } 00357 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00358 const int Src = NIdSet.GetKeyId(EI.GetSrcNId())+1; 00359 const int Dst = NIdSet.GetKeyId(EI.GetDstNId())+1; 00360 fprintf(F, "%d\t%d\t1\n", Src, Dst); 00361 if (! HasGraphFlag(typename PGraph::TObj, gfDirected) && Src!=Dst) { 00362 fprintf(F, "%d\t%d\t1\n", Dst, Src); 00363 } 00364 } 00365 fclose(F); 00366 } 00367 00368 template<class PGraph> 00369 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const bool& NodeLabels, const TIntStrH& NIdColorH) { 00370 const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected); 00371 FILE *F = fopen(OutFNm.CStr(), "wt"); 00372 if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr()); 00373 if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); } 00374 fprintf(F, " graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill 00375 // node [width=0.3, height=0.3, label=\"\", style=filled, color=black] 00376 // node [shape=box, width=0.3, height=0.3, label=\"\", style=filled, fillcolor=red] 00377 fprintf(F, " node [shape=ellipse, width=0.3, height=0.3%s]\n", NodeLabels?"":", label=\"\""); 00378 // node colors 00379 //for (int i = 0; i < NIdColorH.Len(); i++) { 00380 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00381 if (NIdColorH.IsKey(NI.GetId())) { 00382 fprintf(F, " %d [style=filled, fillcolor=\"%s\"];\n", NI.GetId(), NIdColorH.GetDat(NI.GetId()).CStr()); } 00383 else { 00384 fprintf(F, " %d ;\n", NI.GetId()); 00385 } 00386 } 00387 // edges 00388 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00389 if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && !NIdColorH.IsKey(NI.GetId())) { 00390 fprintf(F, "%d;\n", NI.GetId()); } 00391 else { 00392 for (int e = 0; e < NI.GetOutDeg(); e++) { 00393 if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; } 00394 fprintf(F, " %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 00395 } 00396 } 00397 } 00398 if (! Desc.Empty()) { 00399 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00400 fprintf(F, " fontsize=24;\n"); 00401 } 00402 fprintf(F, "}\n"); 00403 fclose(F); 00404 } 00405 00406 template<class PGraph> 00407 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH) { 00408 const bool IsDir = Graph->HasFlag(gfDirected); 00409 FILE *F = fopen(OutFNm.CStr(), "wt"); 00410 if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr()); 00411 if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); } 00412 fprintf(F, " graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill 00413 fprintf(F, " node [shape=ellipse, width=0.3, height=0.3]\n"); 00414 // node colors 00415 //for (int i = 0; i < NodeLabelH.Len(); i++) { 00416 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00417 fprintf(F, " %d [label=\"%s\"];\n", NI.GetId(), NIdLabelH.GetDat(NI.GetId()).CStr()); 00418 } 00419 // edges 00420 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00421 if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && ! NIdLabelH.IsKey(NI.GetId())) { 00422 fprintf(F, "%d;\n", NI.GetId()); } 00423 else { 00424 for (int e = 0; e < NI.GetOutDeg(); e++) { 00425 if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; } 00426 fprintf(F, " %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 00427 } 00428 } 00429 } 00430 if (! Desc.Empty()) { 00431 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00432 fprintf(F, " fontsize=24;\n"); 00433 } 00434 fprintf(F, "}\n"); 00435 fclose(F); 00436 } 00437 00438 } // namespace TSnap