SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 //#////////////////////////////////////////////// 00003 00010 template <class TNodeData, bool IsDir> 00011 class TBigNet { 00012 public: 00013 typedef TNodeData TNodeDat; 00014 typedef TBigNet<TNodeData, IsDir> TNet; 00015 typedef TPt<TNet> PNet; 00016 public: 00017 class TNode; 00018 typedef THash<TInt, TNode> TNodeH; // use SaveToDisk to convert between the two hash table types 00019 typedef TPt<TBigNet<TNodeData, IsDir> > PBigNet; 00020 typedef TVecPool<TInt> TVPool; 00021 typedef TPt<TVPool> PVPool; 00022 00024 00026 class TNode { 00027 public: 00029 TInt InVId; 00031 00033 TInt OutVId; 00035 TNodeDat Dat; 00036 public: 00037 TNode() : InVId(-1), OutVId(-1), Dat() { } 00038 TNode(const int& InVecId, const int& OutVecId) : InVId(InVecId), OutVId(OutVecId), Dat() { } 00039 TNode(const int& InVecId, const int& OutVecId, const TNodeDat& NodeDat) : 00040 InVId(InVecId), OutVId(OutVecId), Dat(NodeDat) { } 00041 TNode(const TNode& Node) : InVId(Node.InVId), OutVId(Node.OutVId), Dat(Node.Dat) { } 00042 TNode(TSIn& SIn) : InVId(SIn), OutVId(SIn), Dat(SIn) { } 00043 void Save(TSOut& SOut) const { SOut.Save(InVId); SOut.Save(OutVId); Dat.Save(SOut); } 00045 bool IsUnused() const { return InVId==-1 && OutVId==-1; } 00046 }; 00048 00050 class TNodeI { 00051 protected: 00052 typedef typename TNodeH::TIter THashIter; 00053 THashIter NodeHI; 00054 TVPool *Pool; 00055 int InDeg, OutDeg, *InNIdV, *OutNIdV; // if undirected, InNIdV==OutNIdV 00056 public: 00057 inline void GetInOutNIdV(); 00058 int GetInVId() const { return NodeHI->Dat.InVId; } 00059 int GetOutVId() const { return NodeHI->Dat.OutVId; } 00060 public: 00061 TNodeI() : NodeHI(), Pool(NULL), InDeg(0), OutDeg(0), InNIdV(NULL), OutNIdV(NULL) { } 00062 TNodeI(const THashIter& NodeHIter, TVPool *PoolPt) : NodeHI(NodeHIter), Pool(PoolPt) { GetInOutNIdV(); } 00063 TNodeI(const TNodeI& NodeI) : NodeHI(NodeI.NodeHI), Pool(NodeI.Pool) { GetInOutNIdV(); } 00064 TNodeI& operator = (const TNodeI& NI) { NodeHI=NI.NodeHI; Pool=NI.Pool; GetInOutNIdV(); return *this; } 00066 TNodeI& operator++ (int) { NodeHI++; GetInOutNIdV(); return *this; } 00067 bool operator < (const TNodeI& NI) const { return NodeHI < NI.NodeHI; } 00068 bool operator == (const TNodeI& NI) const { return NodeHI == NI.NodeHI; } 00069 int GetId() const { return NodeHI->Key(); } 00070 int GetDeg() const { return GetInDeg()+(InNIdV!=OutNIdV?GetOutDeg():0); } 00071 int GetInDeg() const { return InDeg; } 00072 int GetOutDeg() const { return OutDeg; } 00073 int GetInNId(const int& NodeN) const { return InNIdV[NodeN]; } 00074 int GetOutNId(const int& NodeN) const { return OutNIdV[NodeN]; } 00075 int GetOutNbrId(const int& NodeN) const { return NodeN<OutDeg ? OutNIdV[NodeN]:InNIdV[NodeN-OutDeg]; } 00076 bool IsInNId(const int& NId) const { return BinSearch(InNIdV, InNIdV+InDeg, NId)!=NULL; } 00077 bool IsOutNId(const int& NId) const { return BinSearch(OutNIdV, OutNIdV+OutDeg, NId)!=NULL; } 00078 bool IsNbrNId(const int& NId) const { return IsOutNId(NId) || IsInNId(NId); } 00079 const TNodeData& operator () () const { return GetDat(); } 00080 TNodeData& operator () () { return GetDat(); } 00081 const TNodeData& GetDat() const { return NodeHI->Dat.Dat; } 00082 TNodeData& GetDat() { return NodeHI->Dat.Dat; } 00083 // requires pointer back to the network (see as in TNodeNet) 00084 //const TNodeData& GetInNDat(const int& NodeN) const { return Net->GetNDat(GetInNId(NodeN)); } 00085 //TNodeData& GetInNDat(const int& NodeN) { return Net->GetNDat(GetInNId(NodeN)); } 00086 //const TNodeData& GetOutNDat(const int& NodeN) const { return Net->GetNDat(GetOutNId(NodeN)); } 00087 //TNodeData& GetOutNDat(const int& NodeN) { return Net->GetNDat(GetOutNId(NodeN)); } 00088 //const TNodeData& GetNbrNDat(const int& NodeN) const { return Net->GetNDat(GetNbrNId(NodeN)); } 00089 //TNodeData& GetNbrNDat(const int& NodeN) { return Net->GetNDat(GetNbrNId(NodeN)); } 00090 void Dump() const; 00091 friend class TBigNet<TNodeData, IsDir>; 00092 }; 00093 00095 00097 class TEdgeI { 00098 private: 00099 TNodeI CurNode, EndNode; 00100 int CurEdge; 00101 public: 00102 TEdgeI() : CurNode(), EndNode(), CurEdge(0) { } 00103 TEdgeI(const TNodeI& NodeI, const TNodeI& EndNodeI, const int& EdgeN=0) : CurNode(NodeI), EndNode(EndNodeI), CurEdge(0) { } 00104 TEdgeI(const TEdgeI& EdgeI) : CurNode(EdgeI.CurNode), EndNode(EdgeI.EndNode), CurEdge(EdgeI.CurEdge) { } 00105 TEdgeI& operator = (const TEdgeI& EdgeI) { if (this!=&EdgeI) { CurNode=EdgeI.CurNode; EndNode=EdgeI.EndNode; CurEdge=EdgeI.CurEdge; } return *this; } 00106 TEdgeI& operator++ (int) { CurEdge++; if (CurEdge >= CurNode.GetOutDeg()) { CurEdge=0; CurNode++; 00107 while (CurNode < EndNode && CurNode.GetOutDeg()==0) { CurNode++; } } return *this; } 00108 bool operator < (const TEdgeI& EdgeI) const { return CurNode<EdgeI.CurNode || (CurNode==EdgeI.CurNode && CurEdge<EdgeI.CurEdge); } 00109 bool operator == (const TEdgeI& EdgeI) const { return CurNode == EdgeI.CurNode && CurEdge == EdgeI.CurEdge; } 00110 int GetId() const { return -1; } 00111 int GetSrcNId() const { return CurNode.GetId(); } 00112 int GetDstNId() const { return CurNode.GetOutNId(CurEdge); } 00113 const TNodeData& GetSrcNDat() const { return CurNode.GetNDat(); } 00114 TNodeData& GetDstNDat() { return CurNode.GetOutNDat(CurEdge); } 00115 friend class TNodeNet<TNodeData>; 00116 }; 00117 protected: 00118 bool IsNode(const int& NId, TNode& Node) const { return NodeH.IsKeyGetDat(NId, Node); } 00119 int* GetInNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).InVId); } 00120 int* GetOutNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).OutVId); } 00121 static void AddSorted(int* Beg, int* End, const int& Val); 00122 static const int* BinSearch(const int* Beg, const int* End, const int& Val); 00123 static const int* BinSearch2(const int* Beg, const int* End, const int& Val); 00124 const TNode& GetNode(const int& NId) const { return NodeH.GetDat(NId); } 00125 TNode& GetNode(const int& NId) { return NodeH.GetDat(NId); } 00126 public: 00127 enum { DelNId = INT_MAX }; // id of a deleted node 00128 protected: 00129 TCRef CRef; 00130 TInt MxNId; 00131 TB32Set Flags; 00132 TVPool Pool; 00133 TNodeH NodeH; 00134 public: 00135 TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources=false); 00136 TBigNet(TSIn& SIn) : MxNId(SIn), Flags(SIn), Pool(SIn), NodeH(SIn) { TBool Dir(SIn); IAssert(Dir()==IsDir); } 00137 virtual ~TBigNet() { } 00138 virtual void Save(TSOut& SOut) const; 00139 static PBigNet New(const int& Nodes, const TSize& Edges, const bool& Sources=false) { 00140 return PBigNet(new TBigNet(Nodes, Edges, Sources)); } 00141 static PBigNet Load(TSIn& SIn) { return PBigNet(new TBigNet(SIn)); } 00142 TBigNet& operator = (const TBigNet& Net) { if (this!=&Net) { 00143 MxNId=Net.MxNId; Flags=Net.Flags; Pool=Net.Pool; NodeH=Net.NodeH; } return *this; } 00144 00145 bool OnlySources() const { return Flags.In(gfSources); } 00146 bool HasFlag(const TGraphFlag& Flag) const { 00147 return HasGraphFlag(typename TBigNet, Flag) || (Flag==gfSources && OnlySources()); } 00148 void DumpFlags() const; 00149 00150 // vertices 00151 int GetNodes() const { return NodeH.Len(); } 00152 int GetMxNId() const { return MxNId; } 00153 int AddNode(int NId, const int& InDeg, const int& OutDeg); 00154 int AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeDat& NodeDat); 00155 int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV); 00156 int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeDat& NodeDat); 00157 int AddUndirNode(int NId, const int& Deg); 00158 int AddUndirNode(int NId, const int& Deg, const TNodeDat& NodeDat); 00159 int AddUndirNode(int NId, const TIntV& EdgeNIdV); 00160 int AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeDat& NodeDat); 00161 void SetInNIdV(int NId, const TIntV& InNIdV); 00162 void SetOutNIdV(int NId, const TIntV& OutNIdV); 00163 void GetInNIdV(int NId, TIntV& OutNIdV) const; 00164 void GetOutNIdV(int NId, TIntV& OutNIdV) const; 00165 bool IsNode(const int& NId) const { return NodeH.IsKey(NId); } 00166 TNodeI BegNI() const { return TNodeI(NodeH.BegI(), (TVPool *)&Pool); } 00167 TNodeI EndNI() const { return TNodeI(NodeH.EndI(), (TVPool *)&Pool); } 00168 TNodeI GetNI(const int& NId) const { return TNodeI(NodeH.GetI(NId), (TVPool *)&Pool); } 00169 TNodeDat& GetNDat(const int& NId) { return NodeH.GetDat(NId).Dat; } 00170 const TNodeDat& GetNDat(const int& NId) const { return NodeH.GetDat(NId).Dat; } 00171 // edges 00172 TEdgeI BegEI() const { TNodeI NI=BegNI(); while(NI<EndNI() && NI.GetOutDeg()==0) NI++; return TEdgeI(NI, EndNI()); } 00173 TEdgeI EndEI() const { return TEdgeI(EndNI(), EndNI()); } 00174 TEdgeI GetEI(const int& EId) const; // not supported 00175 00176 // delete nodes 00177 int IsolateNode(int NId); // isolate the node by setting edge endpoints to point to node id DelNId, IsNode(DelNId)==false) 00178 int DelNode(int NId); // set neighbors to point to DelNId, delete node from the node table 00179 bool IsIsoNode(const int& NId) const; 00180 uint GetDelEdges(); // the number deleted edges 00181 void CompactEdgePool(); // after nodes are isolated and deleted, we compact the empty space 00182 00183 // edges 00184 ::TSize GetEdges() const { return Pool.GetVals(); } 00185 int AddEdge(const int& SrcNId, const int& DstNId); 00186 bool IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir=true) const; 00187 00188 void SortEdgeV(); 00189 void InvertFromSources(uint ExpectNodes=0); // add missing nodes and in-links 00190 void Rewire(TRnd& Rnd = TInt::Rnd); // create a random network with same degree distribution (configuration model) 00191 00192 // manipulation 00193 PNGraph GetNGraph(const bool& RenumberNodes=false) const; 00194 PNGraph GetSubNGraph(const TIntV& NIdV) const; 00195 PBigNet GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes=false) const; 00196 void GetSubGraph(const TIntV& NIdV, TBigNet* NewNet, const bool& RenumberNodes=false) const; 00197 00198 int GetRndNId(TRnd& Rnd=TInt::Rnd) const { return NodeH.GetKey(NodeH.GetRndKeyId(Rnd)); } 00199 TNodeI GetRndNI(TRnd& Rnd=TInt::Rnd) const { return GetNI(GetRndNId(Rnd)); } 00200 void GetNIdV(TIntV& NIdV) const; 00201 00202 bool Empty() const { return GetNodes()==0; } 00203 void Clr(const bool& DoDel = true) { MxNId = 0; NodeH.Clr(DoDel); Pool.Clr(DoDel); } 00204 void Reserve(const int& Nodes, const TSize& Edges); 00205 void Defrag(const bool& OnlyNodeLinks = false) { } 00206 bool IsOk() const; 00207 void Dump(const TStr& Desc = TStr()) const; 00208 void SaveForDisk(const TStr& OutFNm) const; 00209 00210 static void LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH); 00211 static void SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash); 00212 friend class TPt<TBigNet>; 00213 }; 00214 00215 // set flags 00216 namespace TSnap { 00217 template <class TNodeData, bool IsDir> struct IsDirected<TBigNet<TNodeData, IsDir> > { enum { Val = 0 }; }; 00218 template <class TNodeData> struct IsDirected<TBigNet<TNodeData, true> > { enum { Val = 1 }; }; 00219 template <class TNodeData, bool IsDir> struct IsNodeDat<TBigNet<TNodeData, IsDir> > { enum { Val = 1 }; }; 00220 } 00221 00222 template <class TNodeData, bool IsDir> 00223 inline void TBigNet<TNodeData, IsDir>::TNodeI::GetInOutNIdV() { 00224 if (NodeHI.IsEnd()) return; 00225 const TNode& N = NodeHI->Dat; 00226 if (! Pool->IsVId(N.InVId)) { 00227 InDeg=0; OutDeg=0; } 00228 else { 00229 InDeg=Pool->GetVLen(N.InVId); 00230 OutDeg=Pool->GetVLen(N.OutVId); 00231 InNIdV=(int *)Pool->GetValVPt(N.InVId); 00232 OutNIdV=(int *)Pool->GetValVPt(N.OutVId); 00233 } 00234 } 00235 00236 template <class TNodeData, bool IsDir> 00237 void TBigNet<TNodeData, IsDir>::TNodeI::Dump() const { 00238 printf("NodeId: %d\n", GetId()); 00239 printf(" I:%4d]", GetInDeg()); 00240 for (int i = 0; i < GetInDeg(); i++) { printf(" %d", GetInNId(i)); } 00241 printf("\n O:%4d]", GetOutDeg()); 00242 for (int i = 0; i < GetOutDeg(); i++) { printf(" %d", GetOutNId(i)); } 00243 printf("\n"); 00244 } 00245 00246 template <class TNodeData, bool IsDir> 00247 void TBigNet<TNodeData, IsDir>::AddSorted(int* Beg, int* End, const int& Val) { 00248 // there is at least one free slot available for the Val 00249 IAssertR(*(End-1)==TInt::Mx, "Edge can not be added: no free space"); 00250 // find position to insert 00251 int Half, Len = int(End-Beg); 00252 while (Len > 0) { 00253 Half = Len/2; 00254 int* Mid=Beg+Half; 00255 if (*Mid < Val) { Beg=Mid+1; Len=Len-Half-1; } else { Len=Half; } } 00256 IAssertR(*Beg != Val, "Value already existis"); 00257 // insert 00258 memmove(Beg+1, Beg, sizeof(int)*(End-Beg-1)); 00259 *Beg = Val; 00260 } 00261 00262 template <class TNodeData, bool IsDir> 00263 const int* TBigNet<TNodeData, IsDir>::BinSearch(const int* Beg, const int* End, const int& Val) { 00264 End--; const int *Mid; 00265 while (Beg <= End) { Mid = Beg+(End-Beg)/2; 00266 if (*Mid == Val) { return Mid; } 00267 else if (Val < *Mid) { End=Mid-1; } else { Beg=Mid+1; } 00268 } 00269 return NULL; 00270 } 00271 00272 template <class TNodeData, bool IsDir> 00273 const int* TBigNet<TNodeData, IsDir>::BinSearch2(const int* Beg, const int* End, const int& Val) { 00274 const int* First = Beg; 00275 const int* Last = End; 00276 const int* Mid; 00277 TSize len = End-Beg, half; 00278 while (len > 0) { 00279 half = len>>1; Mid=First+half; 00280 if (*Mid < Val) { First = Mid; First++; len=len-half-1; } 00281 else { len=half; } 00282 } 00283 return First==Last ? Last-1 : First; 00284 } 00285 00286 template <class TNodeData, bool IsDir> 00287 TBigNet<TNodeData, IsDir>::TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources) : 00288 CRef(), MxNId(0), Flags(), Pool(IsDir ? 2*Edges:Edges, 10000000, true, TInt::Mx), NodeH(Nodes) { 00289 //Flags.Incl(gfNodeGraph); 00290 //Flags.Incl(gfNetwork); 00291 //if (IsDir) { Flags.Incl(gfDirected); } 00292 if (Sources) { 00293 Flags.Incl(gfSources); 00294 IAssertR(! IsDir, "Jure: not clear what happens is graph is Undirected and has only sources."); 00295 } 00296 //DumpFlags(); 00297 } 00298 00299 template <class TNodeData, bool IsDir> 00300 void TBigNet<TNodeData, IsDir>::Save(TSOut& SOut) const { 00301 MxNId.Save(SOut); 00302 Flags.Save(SOut); 00303 Pool.Save(SOut); 00304 NodeH.Save(SOut); 00305 TBool(IsDir).Save(SOut); 00306 } 00307 00308 template <class TNodeData, bool IsDir> 00309 void TBigNet<TNodeData, IsDir>::DumpFlags() const { 00310 for (int i = 1; i <int(gfMx); i++) { 00311 if (HasFlag(TGraphFlag(i))) { printf(" +"); } 00312 else { printf(" -"); } 00313 printf("%s", TSnap::GetFlagStr(TGraphFlag(i)).CStr()); 00314 } 00315 printf("\n"); 00316 } 00317 00318 template <class TNodeData, bool IsDir> 00319 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg) { 00320 CAssert(IsDir); 00321 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00322 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00323 TNode& Node = NodeH.AddDat(NId); 00324 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00325 Node.InVId = Pool.AddEmptyV(InDeg); 00326 Node.OutVId = Pool.AddEmptyV(OutDeg); 00327 return NId; 00328 } 00329 00330 template <class TNodeData, bool IsDir> 00331 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeData& NodeDat) { 00332 CAssert(IsDir); 00333 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00334 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00335 TNode& Node = NodeH.AddDat(NId); 00336 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00337 Node.InVId = Pool.AddEmptyV(InDeg); 00338 Node.OutVId = Pool.AddEmptyV(OutDeg); 00339 Node.Dat = NodeDat; 00340 return NId; 00341 } 00342 00343 template <class TNodeData, bool IsDir> 00344 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg) { 00345 CAssert(! IsDir); 00346 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00347 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00348 TNode& Node = NodeH.AddDat(NId); 00349 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00350 Node.InVId = Pool.AddEmptyV(Deg); 00351 Node.OutVId = Node.InVId; // same vector 00352 return NId; 00353 } 00354 00355 template <class TNodeData, bool IsDir> 00356 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg, const TNodeData& NodeDat) { 00357 CAssert(! IsDir); 00358 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00359 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00360 TNode& Node = NodeH.AddDat(NId); 00361 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00362 Node.InVId = Pool.AddEmptyV(Deg); 00363 Node.OutVId = Node.InVId; // same vector 00364 Node.Dat = NodeDat; 00365 return NId; 00366 } 00367 00368 template <class TNodeData, bool IsDir> 00369 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV) { 00370 CAssert(IsDir); 00371 IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted()); 00372 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00373 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00374 TNode& Node = NodeH.AddDat(NId); 00375 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00376 Node.InVId = Pool.AddV(InNIdV); 00377 Node.OutVId = Pool.AddV(OutNIdV); 00378 return NId; 00379 } 00380 00381 template <class TNodeData, bool IsDir> 00382 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeData& NodeDat) { 00383 CAssert(IsDir); 00384 IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted()); 00385 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00386 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00387 TNode& Node = NodeH.AddDat(NId); 00388 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00389 Node.InVId = Pool.AddV(InNIdV); 00390 Node.OutVId = Pool.AddV(OutNIdV); 00391 Node.Dat = NodeDat; 00392 return NId; 00393 } 00394 00395 template <class TNodeData, bool IsDir> 00396 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV) { 00397 CAssert(! IsDir); 00398 IAssert(EdgeNIdV.IsSorted()); 00399 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00400 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00401 TNode& Node = NodeH.AddDat(NId); 00402 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00403 Node.InVId = Pool.AddV(EdgeNIdV); 00404 Node.OutVId = Node.InVId; 00405 return NId; 00406 } 00407 00408 template <class TNodeData, bool IsDir> 00409 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeData& NodeDat) { 00410 CAssert(! IsDir); 00411 IAssert(EdgeNIdV.IsSorted()); 00412 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00413 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00414 TNode& Node = NodeH.AddDat(NId); 00415 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00416 Node.InVId = Pool.AddV(EdgeNIdV); 00417 Node.OutVId = Node.InVId; 00418 Node.Dat = NodeDat; 00419 return NId; 00420 } 00421 00422 template <class TNodeData, bool IsDir> 00423 void TBigNet<TNodeData, IsDir>::SetInNIdV(int NId, const TIntV& InNIdV) { 00424 TNode Node; EAssert(IsNode(NId, Node)); 00425 TIntV InNodesV; Pool.GetV(Node.InVId, InNodesV); 00426 EAssert(InNIdV.Len() == InNodesV.Len()); 00427 memcpy(InNodesV.BegI(), InNIdV.BegI(), sizeof(TInt)*InNIdV.Len()); 00428 } 00429 00430 template <class TNodeData, bool IsDir> 00431 void TBigNet<TNodeData, IsDir>::SetOutNIdV(int NId, const TIntV& OutNIdV) { 00432 TNode Node; EAssert(IsNode(NId, Node)); 00433 TIntV OutNodesV; Pool.GetV(Node.OutVId, OutNodesV); 00434 EAssert(OutNIdV.Len() == OutNodesV.Len()); 00435 memcpy(OutNodesV.BegI(), OutNIdV.BegI(), sizeof(TInt)*OutNIdV.Len()); 00436 } 00437 00438 template <class TNodeData, bool IsDir> 00439 void TBigNet<TNodeData, IsDir>::GetInNIdV(int NId, TIntV& InNIdV) const { 00440 TNode Node; EAssertR(IsNode(NId, Node), TStr::Fmt("Not node: NId=%d\n", NId).CStr()); 00441 Pool.GetV(Node.InVId, InNIdV); 00442 } 00443 00444 template <class TNodeData, bool IsDir> 00445 void TBigNet<TNodeData, IsDir>::GetOutNIdV(int NId, TIntV& OutNIdV) const { 00446 TNode Node; EAssert(IsNode(NId, Node)); 00447 Pool.GetV(Node.OutVId, OutNIdV); 00448 } 00449 00450 // Node is deleted by setting edge endpoints to point to node id -1 (DelNId) 00451 // No memory is freed 00452 template <class TNodeData, bool IsDir> 00453 int TBigNet<TNodeData, IsDir>::IsolateNode(int NId) { 00454 TIntV OutV; 00455 int NDel = 0; 00456 // out-edges 00457 GetOutNIdV(NId, OutV); 00458 for (int i = 0; i < OutV.Len(); i++) { 00459 if (OutV[i] == DelNId) { break; } // because they are sorted 00460 // fast implementation 00461 const TNode& N = NodeH.GetDat(OutV[i]); 00462 int *InNIdV = (int *) Pool.GetValVPt(N.InVId); 00463 const int Deg = Pool.GetVLen(N.InVId); 00464 int* Val = (int *) BinSearch(InNIdV, InNIdV+Deg, NId); 00465 if (Val == NULL) { 00466 printf("BAD: Can't find: OUT: NId: %d -- OutNbrId: %d\n", NId, OutV[i]()); 00467 continue; 00468 } 00469 IAssert(Val != 0); 00470 memcpy(Val, Val+1, sizeof(int)*int(InNIdV+Deg-Val)); 00471 *(InNIdV+Deg-1) = DelNId; NDel++; 00472 } 00473 OutV.PutAll(DelNId); 00474 // in-edges 00475 if (IsDir) { 00476 TIntV InV; 00477 GetInNIdV(NId, InV); 00478 for (int i = 0; i < InV.Len(); i++) { 00479 if (InV[i] == DelNId) { break; } 00480 // fast implementation 00481 const TNode& N = NodeH.GetDat(InV[i]); 00482 int *OutNIdV = (int *) Pool.GetValVPt(N.OutVId); 00483 const int Deg = Pool.GetVLen(N.OutVId); 00484 int* Val = (int *) BinSearch(OutNIdV, OutNIdV+Deg, NId); 00485 if (Val == NULL) { 00486 printf("IN: NId: %d -- InNbrId: %d\n", NId, OutV[i]()); 00487 continue; 00488 } 00489 IAssert(Val != 0); 00490 memcpy(Val, Val+1, sizeof(int)*int(OutNIdV+Deg-Val)); 00491 *(OutNIdV+Deg-1) = DelNId; NDel++; 00492 } 00493 InV.PutAll(DelNId); 00494 } 00495 return NDel; 00496 } 00497 00498 // set neigbors to point to DelNId, delete node from the node table 00499 template <class TNodeData, bool IsDir> 00500 int TBigNet<TNodeData, IsDir>::DelNode(int NId) { 00501 const int DelEdges = IsolateNode(NId); 00502 NodeH.DelKey(NId); 00503 return DelEdges; 00504 } 00505 00506 template <class TNodeData, bool IsDir> 00507 bool TBigNet<TNodeData, IsDir>::IsIsoNode(const int& NId) const { 00508 if (NId == DelNId) { return true; } 00509 TIntV OutV; 00510 GetOutNIdV(NId, OutV); 00511 if (OutV.Empty() || OutV[0] == DelNId) { return true; } 00512 return false; 00513 } 00514 00515 // the number deleted edges 00516 template <class TNodeData, bool IsDir> 00517 uint TBigNet<TNodeData, IsDir>::GetDelEdges() { 00518 uint DelEdges = 0; 00519 TIntV OutV; 00520 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00521 const int NId = NI.GetId(); 00522 GetOutNIdV(NId, OutV); 00523 for (int i = 0; i < OutV.Len(); i++) { 00524 if (OutV[i] == DelNId) { DelEdges++; } 00525 } 00526 } 00527 return DelEdges; 00528 } 00529 00530 template <class TNodeData, bool IsDir> 00531 void TBigNet<TNodeData, IsDir>::CompactEdgePool() { 00532 Pool.CompactPool(DelNId); 00533 } 00534 00535 template <class TNodeData, bool IsDir> 00536 int TBigNet<TNodeData, IsDir>::AddEdge(const int& SrcNId, const int& DstNId) { 00537 TNode Src; IAssert(IsNode(SrcNId, Src)); 00538 int* OutV = (int *)Pool.GetValVPt(Src.OutVId); 00539 const int OutVLen = Pool.GetVLen(Src.OutVId); 00540 Assert(BinSearch(OutV, OutV+OutVLen, DstNId)==NULL); // no edge yet 00541 AddSorted(OutV, OutV+OutVLen, DstNId); 00542 if (! OnlySources()) { 00543 TNode Dst; IAssert(IsNode(DstNId, Dst)); 00544 int* InV = (int *)Pool.GetValVPt(Dst.InVId); 00545 const int InVLen = Pool.GetVLen(Dst.InVId); 00546 AddSorted(InV, InV+InVLen, SrcNId); 00547 } 00548 return -1; // edge id 00549 } 00550 00551 template <class TNodeData, bool IsDir> 00552 bool TBigNet<TNodeData, IsDir>::IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir) const { 00553 TNode Src, Dst; 00554 if (! IsNode(SrcNId, Src)) { return false; } // no source node 00555 int* OutV1 = (int *)Pool.GetValVPt(Src.OutVId); 00556 const bool IsEdge = BinSearch(OutV1, OutV1+Pool.GetVLen(Src.OutVId), DstNId) != NULL; 00557 if (IsEdge && OnlySources()) { return true; } 00558 const bool IsDstNode = IsNode(DstNId, Dst); 00559 if (! IsDstNode) { return false; } // no destination node 00560 else if (IsEdge) { return true; } // destination and link found 00561 else if (! Dir) { // check for the undirected version of the edge 00562 int* OutV2 = (int *)Pool.GetValVPt(Dst.OutVId); 00563 return BinSearch(OutV2, OutV2+Pool.GetVLen(Dst.OutVId), SrcNId) != NULL; } 00564 else { return false; } 00565 } 00566 00567 template <class TNodeData, bool IsDir> 00568 void TBigNet<TNodeData, IsDir>::GetNIdV(TIntV& NIdV) const { 00569 NIdV.Reserve(GetNodes(), 0); 00570 for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) { 00571 NIdV.Add(I->Key); } 00572 } 00573 00574 template <class TNodeData, bool IsDir> 00575 void TBigNet<TNodeData, IsDir>::SortEdgeV() { 00576 printf("Sorting Edges... "); 00577 TExeTm ExeTm; 00578 TIntV OutV, InV; 00579 int cnt = 0; 00580 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00581 const int NId = NI.GetId(); 00582 GetOutNIdV(NId, OutV); 00583 OutV.Sort(); 00584 if (IsDir) { 00585 GetInNIdV(NId, InV); 00586 InV.Sort(); 00587 } 00588 if (++cnt % Mega(1) == 0) { printf("\r sort:%dm %s", cnt/Mega(1), ExeTm.GetStr()); } 00589 } 00590 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00591 const int NId = NI.GetId(); 00592 GetOutNIdV(NId, OutV); 00593 IAssert(OutV.IsSorted()); 00594 GetInNIdV(NId, InV); 00595 IAssert(InV.IsSorted()); 00596 if (++cnt % Mega(1) == 0) { printf("\r check sorted:%dm %s", cnt/Mega(1), ExeTm.GetStr()); } 00597 } 00598 printf("done. [%s]\n", ExeTm.GetStr()); 00599 } 00600 00601 // add missing nodes and in-links from a network of sources 00602 template <class TNodeData, bool IsDir> 00603 void TBigNet<TNodeData, IsDir>::InvertFromSources(uint ExpectNodes) { 00604 typedef THash<TInt, TInt> TDegHash; 00605 typedef int* TIntPt; 00606 if (ExpectNodes == 0) ExpectNodes=4*GetNodes(); 00607 printf("\nInverting BigNet: reserved for %s nodes\n", TInt::GetMegaStr(ExpectNodes).CStr()); 00608 CAssert(IsDir); 00609 IAssert(OnlySources()); 00610 TDegHash InDegH(ExpectNodes); 00611 TSize NDest=0; 00612 // get node in-degree 00613 uint c=0; TExeTm ExeTm; 00614 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00615 for (int e = 0; e < NI.GetOutDeg(); e++) { 00616 InDegH.AddDat(NI.GetOutNId(e)) += 1; } 00617 if (c%100000==0) printf("\r%s h:%d [%g] ", TInt::GetMegaStr(c).CStr(), InDegH.Len(), ExeTm.GetSecs()); 00618 } 00619 printf("\n Resizing NodePool: %lld -> %lld\n", uint64(Pool.Reserved()), uint64(GetEdges())); 00620 if (2*GetEdges() > Pool.Reserved()) { 00621 Pool.Reserve(2*GetEdges()); } 00622 // add nodes 00623 printf("NodeH: %s nodes, InDeg: %s nodes, Reserve: %s\n", TInt::GetMegaStr(NodeH.Len()).CStr(), 00624 TInt::GetMegaStr(InDegH.Len()).CStr(), TInt::GetMegaStr(ExpectNodes).CStr()); 00625 NodeH.Reserve(ExpectNodes); 00626 for (TDegHash::TIter I = InDegH.BegI(); I < InDegH.EndI(); I++) { 00627 const int NId = I->Key, InDeg = I->Dat; 00628 if (! IsNode(NId)) { 00629 AddNode(NId, InDeg, 0); NDest++; } // new destination node, no out-links 00630 else { 00631 TNode& Node = GetNode(NId); 00632 IAssert(Node.InVId == 0); // no in-links 00633 Node.InVId = Pool.AddEmptyV(InDeg); } 00634 } 00635 InDegH.Clr(true); 00636 printf("Added: %lld destination nodes\n", uint64(NDest)); 00637 printf("Graph nodes: %lld nodes\n", uint64(GetNodes())); 00638 // pointer to in-links vector 00639 THash<TInt, int*> NIdToPtH(GetNodes()); 00640 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) 00641 NIdToPtH.AddDat(NI.GetId(), (int *)Pool.GetValVPt(NI.GetInVId())); 00642 // add in-edges 00643 printf("Adding edges...\n"); 00644 c = 0; 00645 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00646 const int SrcNId = NI.GetId(); 00647 for (int e = 0; e < NI.GetOutDeg(); e++) { 00648 TIntPt& InV = NIdToPtH.GetDat(NI.GetOutNId(e)); 00649 IAssert(*InV == TInt::Mx); 00650 *InV = SrcNId; InV++; 00651 } 00652 if (c%100000==0) printf("\r%s [%g] ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs()); 00653 } 00654 // sort in-links 00655 printf("\nSorting in-links...\n"); 00656 TIntV InNIdV; c = 0; 00657 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00658 Pool.GetV(NI.GetInVId(), InNIdV); 00659 InNIdV.Sort(); 00660 if (c%100000==0) printf("\r%s [%g] ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs()); 00661 } 00662 printf("\r...done [%g]\n", ExeTm.GetSecs()); 00663 Flags.Excl(gfSources); 00664 } 00665 00666 template <class TNodeData, bool IsDir> 00667 void TBigNet<TNodeData, IsDir>::Rewire(TRnd& Rnd) { 00668 uint64 NDup=0, NResolve=0, NAddit=0, cnt = 0; 00669 TExeTm ExeTm; 00670 IAssertR(! IsDir, "Only undirected networks are supported"); 00671 printf("Rewiring the network... 1"); 00672 Pool.ShuffleAll(Rnd); printf("[%s]\n", ExeTm.GetStr()); 00673 //Pool.ShuffleAll(Rnd); printf(" done [%s]\n", ExeTm.GetStr()); 00674 printf(" sorting edges...\n"); 00675 SortEdgeV(); // sort individual edge vectors 00676 printf(" done [%s]\n", ExeTm.GetStr()); 00677 // remove duplicates and self edges 00678 printf(" removing duplicates...\n"); 00679 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) { 00680 const int VId = NI.GetOutVId(); 00681 int Len = Pool.GetVLen(VId); 00682 int* V = (int *)Pool.GetValVPt(VId); 00683 for (int i = 0; i < Len-1 && V[i]!=DelNId; i++) { 00684 if (V[i] == V[i+1] || V[i]==NI.GetId()) { 00685 memcpy(V+i, V+i+1, sizeof(int)*(Len-i-1)); i--; 00686 V[Len-1] = DelNId; NDup++; 00687 } 00688 } 00689 if (Len > 0 && V[Len-1]==NI.GetId()) { V[Len-1] = DelNId; NDup++; } 00690 if (cnt % Mega(10) == 0) { printf("."); fflush(stdout); } 00691 } 00692 printf("\n %llu duplicate edges removed [%s]\n", NDup, ExeTm.GetStr()); 00693 if (OnlySources()) { return; } 00694 // resolve one way edges 00695 printf(" resolving one way edges...\n"); cnt=0; fflush(stdout); 00696 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) { 00697 for (int e = 0; e < NI.GetOutDeg(); e++) { 00698 if (NI.GetOutNId(e) == DelNId) { continue; } // skip deleted nodes 00699 const int InVId = GetNode(NI.GetOutNId(e)).InVId; 00700 const int InVLen = Pool.GetVLen(InVId); IAssert(InVLen>=0 && InVLen < Mega(3)); 00701 int* InV = (int *) Pool.GetValVPt(InVId); 00702 int* Val = (int *) BinSearch2(InV, InV+InVLen, NI.GetId()); 00703 if (*Val == NI.GetId()) { continue; } // points back, everything is ok 00704 if (InVLen>0 && InV[InVLen-1] == DelNId) { // there is space at the end, insert 00705 IAssert((InVLen-(Val-InV)-1) >= 0); 00706 memmove(Val+1, Val, sizeof(int)*(InVLen-(Val-InV)-1)); 00707 *Val = NI.GetId(); 00708 } else { // the other end could point at us but it doesn't 00709 memmove(NI.OutNIdV+e, NI.OutNIdV+e+1, sizeof(int)*(NI.GetOutDeg()-e-1)); 00710 NI.OutNIdV[NI.GetOutDeg()-1]=DelNId; e--; 00711 } 00712 NResolve++; 00713 } 00714 if (cnt % Mega(10) == 0) { printf("."); fflush(stdout); } 00715 } 00716 printf("\n %llu resolved edges [%s]\n", NResolve, ExeTm.GetStr()); 00717 // check if there are two nodes that still have space and are not yet connected and connect them 00718 printf(" filling empty edge slots...\n"); 00719 TIntPrV SlotNIdV; 00720 THash<TInt, TInt> SlotNIdH; 00721 int CumSlots=0; 00722 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00723 if (NI.GetOutNId(NI.GetOutDeg()-1) == DelNId) { 00724 int NSlots = 0; 00725 for (int s = NI.GetOutDeg()-1; s >= 0; s--) { 00726 if (NI.GetOutNId(s) == DelNId) { NSlots++; } 00727 else { break; } 00728 } 00729 SlotNIdV.Add(TIntPr(NI.GetId(), NSlots)); 00730 SlotNIdH.AddDat(NI.GetId(), NSlots); 00731 CumSlots+=NSlots; 00732 } 00733 } 00734 printf(" %d nodes, with %d spokes available, %d edges\n", SlotNIdH.Len(), CumSlots, CumSlots/2); 00735 TIntV NIdV; SlotNIdH.GetKeyV(NIdV); 00736 for (int ni1 = 0; ni1 < NIdV.Len(); ni1++) { 00737 const int nid = NIdV[ni1]; 00738 if (! SlotNIdH.IsKey(nid) || SlotNIdH.GetDat(nid) == 0) { continue; } 00739 const int NI1Slots = SlotNIdH.GetDat(nid); 00740 if ((SlotNIdH.GetMxKeyIds() - SlotNIdH.Len())/double(SlotNIdH.GetMxKeyIds()) > 0.5) { SlotNIdH.Defrag(); } 00741 TNodeI NI = GetNI(nid); 00742 for (int NTries = 0; NTries < 4*NI1Slots && NI.GetOutNId(NI.GetOutDeg()-1) == DelNId; NTries++) { 00743 const int nid2 = SlotNIdH.GetKey(SlotNIdH.GetRndKeyId(Rnd)); 00744 if (nid == nid2) { continue; } 00745 TNodeI NI2 = GetNI(nid2); 00746 // insert the edge 00747 if (NI2.GetOutNId(NI2.GetOutDeg()-1)==DelNId && ! NI2.IsInNId(NI.GetId())) { 00748 int *V1 = (int *) BinSearch2(NI.OutNIdV, NI.OutNIdV+NI.GetOutDeg(), NI2.GetId()); 00749 int *V2 = (int *) BinSearch2(NI2.InNIdV, NI2.InNIdV+NI2.GetInDeg(), NI.GetId()); 00750 if (*V1 != DelNId) { memmove(V1+1, V1, sizeof(int)*(NI.GetOutDeg()-(V1-NI.OutNIdV)-1)); } 00751 if (*V2 != DelNId) { memmove(V2+1, V2, sizeof(int)*(NI2.GetInDeg()-(V2-NI2.InNIdV)-1)); } 00752 *V1 = NI2.GetId(); *V2 = NI.GetId(); 00753 NAddit++; 00754 SlotNIdH.GetDat(nid)--; SlotNIdH.GetDat(nid2)--; 00755 } 00756 if (SlotNIdH.GetDat(nid2) == 0) { SlotNIdH.DelKey(nid2); continue; } 00757 if (SlotNIdH.GetDat(nid) == 0) { SlotNIdH.DelKey(nid); break; } 00758 } 00759 if (ni1 % Mega(1) == 0) { printf("."); fflush(stdout); } 00760 } 00761 printf(" %llu edges added.\nDONE. TOTAL REWIRE TIME [%s]\n\n", NAddit, ExeTm.GetStr()); 00762 } 00763 00764 template <class TNodeData, bool IsDir> 00765 PNGraph TBigNet<TNodeData, IsDir>::GetNGraph(const bool& RenumberNodes) const { 00766 IAssert(RenumberNodes == false); 00767 PNGraph Graph = TNGraph::New(); 00768 Graph->Reserve(GetNodes(), 0); 00769 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00770 Graph->AddNode(NI.GetId(), Pool, NI.GetInVId(), NI.GetOutVId()); 00771 } 00772 return Graph; 00773 } 00774 00775 template <class TNodeData, bool IsDir> 00776 PNGraph TBigNet<TNodeData, IsDir>::GetSubNGraph(const TIntV& NIdV) const { 00777 PNGraph Graph = TNGraph::New(NIdV.Len(), 0); 00778 // add nodes 00779 for (int i = 0; i < NIdV.Len(); i++) { 00780 Graph->AddNode(NIdV[i]); } 00781 // reserve node in- and out-degree 00782 for (int i = 0; i < NIdV.Len(); i++) { 00783 const int SrcNId = NIdV[i]; 00784 const TNodeI NI = GetNI(SrcNId); 00785 int InDeg=0, OutDeg=0; 00786 for (int e = 0; e < NI.GetInDeg(); e++) { if (Graph->IsNode(NI.GetInNId(e))) InDeg++; } 00787 for (int e = 0; e < NI.GetOutDeg(); e++) { if (Graph->IsNode(NI.GetOutNId(e))) OutDeg++; } 00788 Graph->ReserveNIdInDeg(SrcNId, InDeg); 00789 Graph->ReserveNIdOutDeg(SrcNId, OutDeg); 00790 } 00791 // add edges 00792 for (int i = 0; i < NIdV.Len(); i++) { 00793 const int SrcNId = NIdV[i]; 00794 const TNodeI NI = GetNI(SrcNId); 00795 for (int e = 0; e < NI.GetOutDeg(); e++) { 00796 const int DstNId = NI.GetOutNId(e); 00797 if (Graph->IsNode(DstNId)) { 00798 Graph->AddEdge(SrcNId, DstNId); } 00799 } 00800 } 00801 return Graph; 00802 } 00803 00804 template <class TNodeData, bool IsDir> 00805 TPt<TBigNet<TNodeData, IsDir> > TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes) const { 00806 const bool isDir = IsDir, onlySources = HasFlag(gfSources); 00807 TSize Edges=0; 00808 // find in- and out-degree 00809 TSparseHash<TInt, TIntPr> NIdDegH(NIdV.Len()); 00810 for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); } 00811 for (int i = 0; i < NIdV.Len(); i++) { 00812 const TNodeI NI = GetNI(NIdV[i]); 00813 int InDeg=0, OutDeg=0; 00814 for (int n = 0; n < NI.GetOutDeg(); n++) { 00815 if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; } 00816 if (! onlySources && isDir) { 00817 for (int n = 0; n < NI.GetInDeg(); n++) { 00818 if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; } 00819 } 00820 Edges += OutDeg; 00821 NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg); 00822 } 00823 // make network 00824 typedef TBigNet<TNodeData, IsDir> TBNet; 00825 TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected)); 00826 TBNet& NewNet = *NewNetPt; 00827 NewNet.Flags = Flags; 00828 // add nodes 00829 if (isDir || onlySources) { 00830 for (int i = 0; i < NIdV.Len(); i++) { 00831 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00832 NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00833 } else { 00834 for (int i = 0; i < NIdV.Len(); i++) { 00835 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00836 NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00837 } 00838 // add edges 00839 for (int i = 0; i < NIdV.Len(); i++) { 00840 int NId = NIdV[i]; 00841 const TNodeI NI = GetNI(NId); 00842 int *NIdVPt = (int *) NewNet.GetOutNIdVPt(NId); 00843 int deg = 0; 00844 for (int e = 0; e < NI.GetOutDeg(); e++) { 00845 const int Dst = NI.GetOutNId(e); 00846 if (NewNet.IsNode(Dst)) { 00847 *NIdVPt = Dst; NIdVPt++; deg++; } 00848 } 00849 EAssert(deg == NIdDegH.GetDat(NId).Val2); 00850 if (isDir && ! onlySources) { 00851 EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0) 00852 || (NI.GetInVId() != NI.GetOutVId())); 00853 int * NIdVPt = (int *) NewNet.GetInNIdVPt(NId); 00854 int deg = 0; 00855 for (int e = 0; e < NI.GetInDeg(); e++) { 00856 const int Dst = NI.GetInNId(e); 00857 if (NewNet.IsNode(Dst)) { 00858 *NIdVPt = Dst; NIdVPt++; deg++; } 00859 } 00860 EAssert(deg == NIdDegH.GetDat(NId).Val1); 00861 } 00862 } 00863 return NewNetPt; 00864 } 00865 00866 template <class TNodeData, bool IsDir> 00867 void TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, TBigNet* NewNetPt, const bool& RenumberNodes) const { 00868 printf("Set subgraph on %d nodes\n", NIdV.Len()); TExeTm ExeTm; 00869 const bool isDir = IsDir, onlySources = HasFlag(gfSources); 00870 TSize Edges=0; 00871 // find in- and out-degree 00872 THash<TInt, TIntPr> NIdDegH(NIdV.Len()); 00873 for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); } 00874 for (int i = 0; i < NIdV.Len(); i++) { 00875 const TNodeI NI = GetNI(NIdV[i]); 00876 int InDeg=0, OutDeg=0; 00877 for (int n = 0; n < NI.GetOutDeg(); n++) { 00878 if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; } 00879 if (! onlySources && isDir) { 00880 for (int n = 0; n < NI.GetInDeg(); n++) { 00881 if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; } 00882 } 00883 Edges += OutDeg; 00884 NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg); 00885 } 00886 // make network 00887 //typedef TBigNet<TNodeData, IsDir> TBNet; 00888 //TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected)); 00889 NewNetPt->Reserve(NIdV.Len(), Edges); 00890 TBigNet<TNodeData, IsDir>& NewNet = *NewNetPt; 00891 NewNet.Flags = Flags; 00892 TIntSet NIdMap; 00893 // add nodes 00894 if (! RenumberNodes) { 00895 if (isDir || onlySources) { 00896 for (int i = 0; i < NIdV.Len(); i++) { 00897 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00898 NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00899 } else { 00900 for (int i = 0; i < NIdV.Len(); i++) { 00901 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00902 NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00903 } 00904 } else { // renumber nodes 00905 NIdMap.Gen(NIdV.Len()); 00906 for (int i = 0; i < NIdV.Len(); i++) { NIdMap.AddKey(NIdV[i]); } 00907 if (isDir || onlySources) { 00908 for (int i = 0; i < NIdV.Len(); i++) { 00909 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00910 NewNet.AddNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00911 } else { 00912 for (int i = 0; i < NIdV.Len(); i++) { 00913 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00914 NewNet.AddUndirNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00915 } 00916 } 00917 // add edges 00918 for (int i = 0; i < NIdV.Len(); i++) { 00919 int NId = NIdV[i]; 00920 const TNodeI NI = GetNI(NId); 00921 int *NIdVPt = (int *) NewNet.GetOutNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId); 00922 int deg = 0; 00923 for (int e = 0; e < NI.GetOutDeg(); e++) { 00924 const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetOutNId(e)):NI.GetOutNId(e); 00925 if (NewNet.IsNode(Dst)) { 00926 *NIdVPt = Dst; NIdVPt++; deg++; } 00927 } 00928 EAssert(deg == NIdDegH.GetDat(NId).Val2); 00929 if (isDir && ! onlySources) { 00930 EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0) 00931 || (NI.GetInVId() != NI.GetOutVId())); 00932 int * NIdVPt = (int *) NewNet.GetInNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId); 00933 int deg = 0; 00934 for (int e = 0; e < NI.GetInDeg(); e++) { 00935 const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetInNId(e)):NI.GetInNId(e); 00936 if (NewNet.IsNode(Dst)) { 00937 *NIdVPt = Dst; NIdVPt++; deg++; } 00938 } 00939 EAssert(deg == NIdDegH.GetDat(NId).Val1); 00940 } 00941 } 00942 printf(" %u edges [%s]\n", uint(Edges), ExeTm.GetStr()); 00943 } 00944 00945 template <class TNodeData, bool IsDir> 00946 void TBigNet<TNodeData, IsDir>::Reserve(const int& Nodes, const TSize& Edges) { 00947 NodeH.Gen(TMath::Mx(int(1.1*Nodes), 100)); 00948 Pool = TVPool(TMath::Mx(Edges, (TSize) Mega(1)), Mega(100), true); 00949 } 00950 00951 // check that in- and out-edges matchs, the table is sorted and so on 00952 template <class TNodeData, bool IsDir> 00953 bool TBigNet<TNodeData, IsDir>::IsOk() const { 00954 // check the node pool 00955 TIntV ValV; TExeTm ExeTm; 00956 printf("Is ok network:\n Check Vec..."); 00957 for (uint id = 1; id < Pool.GetVecs(); id++) { 00958 Pool.GetV(id, ValV); 00959 if (! ValV.Empty()) { 00960 EAssert((0<=ValV[0] && ValV[0]<MxNId) || ValV[0]==DelNId); 00961 try { 00962 for (int i = 1; i < ValV.Len(); i++) { 00963 //if (ValV[i]==DelNId) { continue; } 00964 // sorted & no duplicates & no empty values (can have deleted nodes) 00965 EAssertR((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId), 00966 TStr::Fmt("NId1: %d NId2:%d", ValV[i-1](),ValV[i]())); 00967 EAssertR((0<=ValV[i] && ValV[i]<MxNId) || ValV[i]==DelNId, 00968 TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); 00969 if (! OnlySources()) { 00970 EAssertR(IsNode(ValV[i]) || ValV[i]==DelNId, 00971 TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); } // every link is a node 00972 } 00973 } catch (PExcept Except){ 00974 printf(" %s\n", Except->GetStr().CStr()); 00975 printf(" vec id: %d, lenght: %d\n", id, ValV.Len()); 00976 for (int i = 1; i < ValV.Len(); i++) { 00977 printf(" %d", ValV[i]()); 00978 if (!((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId))) { printf("*"); } 00979 } 00980 printf("\n"); 00981 return false; 00982 } 00983 } 00984 if (id % 10000 == 0) { 00985 printf("\r %dk / %dk [%s]", id/1000, Pool.GetVecs()/1000, ExeTm.GetStr()); } 00986 } 00987 printf("[%s]\n Check Edges...\n", ExeTm.GetStr()); 00988 // check edges 00989 int ErrCnt = 0; 00990 if (! OnlySources()) { 00991 int Cnt=0; 00992 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, Cnt++) { 00993 // nodes that point to NI, have it on out-list 00994 for (int e = 0; e < NI.GetInDeg(); e++) { 00995 if (NI.GetInNId(e) == DelNId) { continue; } // skip deleted nodes 00996 if (! IsEdge(NI.GetInNId(e), NI.GetId())) { 00997 printf("\nno edge: %d --> %d", NI.GetInNId(e), NI.GetId()); 00998 printf("NId: %d deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()); 00999 for (int i = 0; i < NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); } 01000 TNodeI NI2 = GetNI(NI.GetInNId(e)); 01001 printf("\nNId2: %d deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg()); 01002 for (int j = 0; j < NI2.GetOutDeg(); j++) { printf(" %d", NI2.GetOutNId(j)); } 01003 printf("\n"); 01004 ErrCnt++; 01005 } 01006 //EAssertR(IsEdge(NI.GetInNId(e), NI.GetId()), 01007 // TStr::Fmt("no edge: %d --> %d", NI.GetInNId(e), NI.GetId())); 01008 } 01009 // nodes NI points to, have it on in-list 01010 for (int e = 0; e < NI.GetOutDeg(); e++) { 01011 if (NI.GetOutNId(e) == DelNId) { continue; } 01012 const int InVId = GetNode(NI.GetOutNId(e)).InVId; 01013 int* DstInV = (int *)Pool.GetValVPt(InVId); 01014 if (BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) == NULL) { 01015 printf("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e)); 01016 printf("NId: %d deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()); 01017 for (int i = 0; i < NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); } 01018 TNodeI NI2 = GetNI(NI.GetOutNId(e)); 01019 printf("\nNId2: %d deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg()); 01020 for (int j = 0; j < NI2.GetInDeg(); j++) { printf(" %d", NI2.GetInNId(j)); } 01021 printf("\n"); ErrCnt++; 01022 } 01023 //EAssertR(BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) != NULL, 01024 // TStr::Fmt("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e))); 01025 } 01026 if (ErrCnt > 5) { FailR("error count too large!"); } 01027 if (Cnt % 100000 == 0) { 01028 printf("\r%s [%s]", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); } 01029 } 01030 printf("\r%s [%s]\n", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); 01031 } 01032 return true; 01033 } 01034 01035 template <class TNodeData, bool IsDir> 01036 void TBigNet<TNodeData, IsDir>::Dump(const TStr& Desc) const { 01037 if (! Desc.Empty()) { printf("\n%s (%d, %u):\n", Desc.CStr(), GetNodes(), GetEdges()); } 01038 else { printf("\nNodes: %d, Edges: %u\n", GetNodes(), GetEdges()); } 01039 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 01040 printf("%d]\n IN %d]", NI.GetId(), NI.GetInDeg()); 01041 for (int i=0; i<NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); } 01042 if (IsDir) { 01043 printf("\n OUT %d]", NI.GetOutDeg()); 01044 for (int i=0; i<NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); } 01045 } 01046 printf("\n"); 01047 } 01048 } 01049 01050 // header: <Nodes, Edges, IsDir> 01051 // format: undirected <NId, Dat, OutDeg, OutNodeV> 01052 // format: directed <NId, Dat, OutDeg, OutNodeV, InDeg, InNodeV> 01053 template <class TNodeData, bool IsDir> 01054 void TBigNet<TNodeData, IsDir>::SaveForDisk(const TStr& OutFNm) const { 01055 const bool IsDirected = IsDir; 01056 TFOut FOut(OutFNm); 01057 FOut.Save(GetNodes()); 01058 FOut.Save(GetEdges()); 01059 FOut.Save(IsDirected); 01060 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 01061 FOut.Save(NI.GetId()); 01062 NI.GetDat().Save(FOut); 01063 FOut.Save(NI.GetOutDeg()); 01064 for (int i = 0; i < NI.GetOutDeg(); i++) { 01065 FOut.Save(NI.GetOutNId(i)); } 01066 if (IsDirected) { 01067 FOut.Save(NI.GetInDeg()); 01068 for (int i = 0; i < NI.GetInDeg(); i++) { 01069 FOut.Save(NI.GetInNId(i)); } 01070 } 01071 } 01072 } 01073 01074 // skip the edge pool and only load the node data hash table 01075 template <class TNodeData, bool IsDir> 01076 void TBigNet<TNodeData, IsDir>::LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH) { 01077 TFIn SIn(InFNm); 01078 TInt MxNId(SIn); 01079 TB32Set Flags(SIn); 01080 printf("skipping Pool...\n"); 01081 TBool FastCopy(SIn); 01082 uint64 _GrowBy, _MxVals, _Vals; 01083 SIn.Load(_GrowBy); SIn.Load(_MxVals); SIn.Load(_Vals); 01084 TInt EmptyVal(SIn); 01085 int Tmp; 01086 for (TSize ValN = 0; ValN < _Vals; ValN++) { SIn.Load(Tmp); } 01087 TInt MxVals(SIn), Vals(SIn); 01088 uint64 Offset; 01089 for (int ValN = 0; ValN < Vals; ValN++) { SIn.Load(Offset); } 01090 printf("loading Hode hash table...\n"); 01091 NodeH.Load(SIn); 01092 } 01093 01094 // Save the network without loading it. Save the node hash table as THash or TSparseHash 01095 template <class TNodeData, bool IsDir> 01096 void TBigNet<TNodeData, IsDir>::SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash) { 01097 TFIn FIn(InFNm); 01098 TFOut FOut(OutFNm); 01099 { TInt MxNId(FIn); MxNId.Save(FOut); 01100 TB32Set Flags(FIn); Flags.Save(FOut); 01101 TVPool Pool(FIn); Pool.Save(FOut); } 01102 { TNodeH NodeH(FIn); 01103 if (! SaveSparseHash) { 01104 THash<TInt, TNode> NewH(NodeH.Len(), true); 01105 for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) { 01106 NewH.AddDat(I->Key, I->Dat); 01107 } 01108 NewH.Save(FOut); 01109 } else { 01110 FailR("Not implemented"); 01111 } } 01112 }