SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Big Network 00003 template <class TNodeData, bool IsDir> 00004 class TBigNet { 00005 public: 00006 typedef TNodeData TNodeDat; 00007 typedef TBigNet<TNodeData, IsDir> TNet; 00008 typedef TPt<TNet> PNet; 00009 public: 00010 class TNode; 00011 //typedef TSparseHash<TInt, TNode> TNodeH; 00012 typedef THash<TInt, TNode> TNodeH; // use SaveToDisk to convert between the two hash table types 00013 typedef TPt<TBigNet<TNodeData, IsDir> > PBigNet; 00014 typedef TVecPool<TInt> TPool; 00015 typedef TPt<TPool> PPool; 00016 00017 class TNode { 00018 public: 00019 TInt InVId, OutVId; // if undirected, InNIdV == OutNIdV 00020 TNodeDat Dat; 00021 public: 00022 TNode() : InVId(-1), OutVId(-1), Dat() { } 00023 TNode(const int& InVecId, const int& OutVecId) : InVId(InVecId), OutVId(OutVecId), Dat() { } 00024 TNode(const int& InVecId, const int& OutVecId, const TNodeDat& NodeDat) : 00025 InVId(InVecId), OutVId(OutVecId), Dat(NodeDat) { } 00026 TNode(const TNode& Node) : InVId(Node.InVId), OutVId(Node.OutVId), Dat(Node.Dat) { } 00027 TNode(TSIn& SIn) : InVId(SIn), OutVId(SIn), Dat(SIn) { } 00028 void Save(TSOut& SOut) const { SOut.Save(InVId); SOut.Save(OutVId); Dat.Save(SOut); } 00029 bool IsUnused() const { return InVId==-1 && OutVId==-1; } 00030 }; 00031 00032 class TNodeI { 00033 protected: 00034 typedef typename TNodeH::TIter THashIter; 00035 THashIter NodeHI; 00036 TPool *Pool; 00037 int InDeg, OutDeg, *InNIdV, *OutNIdV; // if undirected, InNIdV==OutNIdV 00038 public: 00039 inline void GetInOutNIdV(); 00040 int GetInVId() const { return NodeHI->Dat.InVId; } 00041 int GetOutVId() const { return NodeHI->Dat.OutVId; } 00042 public: 00043 TNodeI() : NodeHI(), Pool(NULL), InDeg(0), OutDeg(0), InNIdV(NULL), OutNIdV(NULL) { } 00044 TNodeI(const THashIter& NodeHIter, TPool *PoolPt) : NodeHI(NodeHIter), Pool(PoolPt) { GetInOutNIdV(); } 00045 TNodeI(const TNodeI& NodeI) : NodeHI(NodeI.NodeHI), Pool(NodeI.Pool) { GetInOutNIdV(); } 00046 TNodeI& operator = (const TNodeI& NI) { NodeHI=NI.NodeHI; Pool=NI.Pool; GetInOutNIdV(); return *this; } 00047 TNodeI& operator++ (int) { NodeHI++; GetInOutNIdV(); return *this; } 00048 bool operator < (const TNodeI& NI) const { return NodeHI < NI.NodeHI; } 00049 bool operator == (const TNodeI& NI) const { return NodeHI == NI.NodeHI; } 00050 int GetId() const { return NodeHI->Key(); } 00051 int GetDeg() const { return GetInDeg()+(InNIdV!=OutNIdV?GetOutDeg():0); } 00052 int GetInDeg() const { return InDeg; } 00053 int GetOutDeg() const { return OutDeg; } 00054 int GetInNId(const int& NodeN) const { return InNIdV[NodeN]; } 00055 int GetOutNId(const int& NodeN) const { return OutNIdV[NodeN]; } 00056 int GetOutNbrId(const int& NodeN) const { return NodeN<OutDeg ? OutNIdV[NodeN]:InNIdV[NodeN-OutDeg]; } 00057 bool IsInNId(const int& NId) const { return BinSearch(InNIdV, InNIdV+InDeg, NId)!=NULL; } 00058 bool IsOutNId(const int& NId) const { return BinSearch(OutNIdV, OutNIdV+OutDeg, NId)!=NULL; } 00059 bool IsNbrNId(const int& NId) const { return IsOutNId(NId) || IsInNId(NId); } 00060 const TNodeData& operator () () const { return GetDat(); } 00061 TNodeData& operator () () { return GetDat(); } 00062 const TNodeData& GetDat() const { return NodeHI->Dat.Dat; } 00063 TNodeData& GetDat() { return NodeHI->Dat.Dat; } 00064 // requires pointer back to the network (see as in TNodeNet) 00065 //const TNodeData& GetInNDat(const int& NodeN) const { return Net->GetNDat(GetInNId(NodeN)); } 00066 //TNodeData& GetInNDat(const int& NodeN) { return Net->GetNDat(GetInNId(NodeN)); } 00067 //const TNodeData& GetOutNDat(const int& NodeN) const { return Net->GetNDat(GetOutNId(NodeN)); } 00068 //TNodeData& GetOutNDat(const int& NodeN) { return Net->GetNDat(GetOutNId(NodeN)); } 00069 //const TNodeData& GetNbrNDat(const int& NodeN) const { return Net->GetNDat(GetNbrNId(NodeN)); } 00070 //TNodeData& GetNbrNDat(const int& NodeN) { return Net->GetNDat(GetNbrNId(NodeN)); } 00071 void Dump() const; 00072 friend class TBigNet<TNodeData, IsDir>; 00073 }; 00074 00075 class TEdgeI { 00076 private: 00077 TNodeI CurNode, EndNode; 00078 int CurEdge; 00079 public: 00080 TEdgeI() : CurNode(), EndNode(), CurEdge(0) { } 00081 TEdgeI(const TNodeI& NodeI, const TNodeI& EndNodeI, const int& EdgeN=0) : CurNode(NodeI), EndNode(EndNodeI), CurEdge(0) { } 00082 TEdgeI(const TEdgeI& EdgeI) : CurNode(EdgeI.CurNode), EndNode(EdgeI.EndNode), CurEdge(EdgeI.CurEdge) { } 00083 TEdgeI& operator = (const TEdgeI& EdgeI) { if (this!=&EdgeI) { CurNode=EdgeI.CurNode; EndNode=EdgeI.EndNode; CurEdge=EdgeI.CurEdge; } return *this; } 00084 TEdgeI& operator++ (int) { CurEdge++; if (CurEdge >= CurNode.GetOutDeg()) { CurEdge=0; CurNode++; 00085 while (CurNode < EndNode && CurNode.GetOutDeg()==0) { CurNode++; } } return *this; } 00086 bool operator < (const TEdgeI& EdgeI) const { return CurNode<EdgeI.CurNode || (CurNode==EdgeI.CurNode && CurEdge<EdgeI.CurEdge); } 00087 bool operator == (const TEdgeI& EdgeI) const { return CurNode == EdgeI.CurNode && CurEdge == EdgeI.CurEdge; } 00088 int GetId() const { return -1; } 00089 int GetSrcNId() const { return CurNode.GetId(); } 00090 int GetDstNId() const { return CurNode.GetOutNId(CurEdge); } 00091 const TNodeData& GetSrcNDat() const { return CurNode.GetNDat(); } 00092 TNodeData& GetDstNDat() { return CurNode.GetOutNDat(CurEdge); } 00093 friend class TNodeNet<TNodeData>; 00094 }; 00095 protected: 00096 bool IsNode(const int& NId, TNode& Node) const { return NodeH.IsKeyGetDat(NId, Node); } 00097 int* GetInNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).InVId); } 00098 int* GetOutNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).OutVId); } 00099 static void AddSorted(int* Beg, int* End, const int& Val); 00100 static const int* BinSearch(const int* Beg, const int* End, const int& Val); 00101 static const int* BinSearch2(const int* Beg, const int* End, const int& Val); 00102 const TNode& GetNode(const int& NId) const { return NodeH.GetDat(NId); } 00103 TNode& GetNode(const int& NId) { return NodeH.GetDat(NId); } 00104 public: 00105 enum { DelNId = INT_MAX }; // id of a deleted node 00106 protected: 00107 TCRef CRef; 00108 TInt MxNId; 00109 TB32Set Flags; 00110 TPool Pool; 00111 TNodeH NodeH; 00112 public: 00113 TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources=false); 00114 TBigNet(TSIn& SIn) : MxNId(SIn), Flags(SIn), Pool(SIn), NodeH(SIn) { TBool Dir(SIn); IAssert(Dir()==IsDir); } 00115 virtual ~TBigNet() { } 00116 virtual void Save(TSOut& SOut) const; 00117 static PBigNet New(const int& Nodes, const TSize& Edges, const bool& Sources=false) { 00118 return PBigNet(new TBigNet(Nodes, Edges, Sources)); } 00119 static PBigNet Load(TSIn& SIn) { return PBigNet(new TBigNet(SIn)); } 00120 TBigNet& operator = (const TBigNet& Net) { if (this!=&Net) { 00121 MxNId=Net.MxNId; Flags=Net.Flags; Pool=Net.Pool; NodeH=Net.NodeH; } return *this; } 00122 00123 bool OnlySources() const { return Flags.In(gfSources); } 00124 bool HasFlag(const TGraphFlag& Flag) const { 00125 return HasGraphFlag(typename TBigNet, Flag) || (Flag==gfSources && OnlySources()); } 00126 void DumpFlags() const; 00127 00128 // vertices 00129 int GetNodes() const { return NodeH.Len(); } 00130 int GetMxNId() const { return MxNId; } 00131 int AddNode(int NId, const int& InDeg, const int& OutDeg); 00132 int AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeDat& NodeDat); 00133 int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV); 00134 int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeDat& NodeDat); 00135 int AddUndirNode(int NId, const int& Deg); 00136 int AddUndirNode(int NId, const int& Deg, const TNodeDat& NodeDat); 00137 int AddUndirNode(int NId, const TIntV& EdgeNIdV); 00138 int AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeDat& NodeDat); 00139 void SetInNIdV(int NId, const TIntV& InNIdV); 00140 void SetOutNIdV(int NId, const TIntV& OutNIdV); 00141 void GetInNIdV(int NId, TIntV& OutNIdV) const; 00142 void GetOutNIdV(int NId, TIntV& OutNIdV) const; 00143 bool IsNode(const int& NId) const { return NodeH.IsKey(NId); } 00144 TNodeI BegNI() const { return TNodeI(NodeH.BegI(), (TPool *)&Pool); } 00145 TNodeI EndNI() const { return TNodeI(NodeH.EndI(), (TPool *)&Pool); } 00146 TNodeI GetNI(const int& NId) const { return TNodeI(NodeH.GetI(NId), (TPool *)&Pool); } 00147 TNodeDat& GetNDat(const int& NId) { return NodeH.GetDat(NId).Dat; } 00148 const TNodeDat& GetNDat(const int& NId) const { return NodeH.GetDat(NId).Dat; } 00149 // edges 00150 TEdgeI BegEI() const { TNodeI NI=BegNI(); while(NI<EndNI() && NI.GetOutDeg()==0) NI++; return TEdgeI(NI, EndNI()); } 00151 TEdgeI EndEI() const { return TEdgeI(EndNI(), EndNI()); } 00152 TEdgeI GetEI(const int& EId) const; // not supported 00153 00154 // delete nodes 00155 int IsolateNode(int NId); // isolate the node by setting edge endpoints to point to node id DelNId, IsNode(DelNId)==false) 00156 int DelNode(int NId); // set neigbors to point to DelNId, delete node from the node table 00157 bool IsIsoNode(const int& NId) const; 00158 uint GetDelEdges(); // the number deleted edges 00159 void CompactEdgePool(); // after nodes are isolated and deleted, we compact the empty space 00160 00161 // edges 00162 ::TSize GetEdges() const { return Pool.GetVals(); } 00163 int AddEdge(const int& SrcNId, const int& DstNId); 00164 bool IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir=true) const; 00165 00166 void SortEdgeV(); 00167 void InvertFromSources(uint ExpectNodes=0); // add missing nodes and in-links 00168 void Rewire(TRnd& Rnd = TInt::Rnd); // create a random network with same degree distribution (configuration model) 00169 00170 // manipulation 00171 PNGraph GetNGraph(const bool& RenumberNodes=false) const; 00172 PNGraph GetSubNGraph(const TIntV& NIdV) const; 00173 PBigNet GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes=false) const; 00174 void GetSubGraph(const TIntV& NIdV, TBigNet* NewNet, const bool& RenumberNodes=false) const; 00175 00176 int GetRndNId(TRnd& Rnd=TInt::Rnd) const { return NodeH.GetKey(NodeH.GetRndKeyId(Rnd)); } 00177 TNodeI GetRndNI(TRnd& Rnd=TInt::Rnd) const { return GetNI(GetRndNId(Rnd)); } 00178 void GetNIdV(TIntV& NIdV) const; 00179 00180 bool Empty() const { return GetNodes()==0; } 00181 void Clr(const bool& DoDel = true) { MxNId = 0; NodeH.Clr(DoDel); Pool.Clr(DoDel); } 00182 void Reserve(const int& Nodes, const TSize& Edges); 00183 void Defrag(const bool& OnlyNodeLinks = false) { } 00184 bool IsOk() const; 00185 void Dump(const TStr& Desc = TStr()) const; 00186 void SaveForDisk(const TStr& OutFNm) const; 00187 00188 static void LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH); 00189 static void SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash); 00190 friend class TPt<TBigNet>; 00191 }; 00192 00193 // set flags 00194 namespace TSnap { 00195 template <class TNodeData, bool IsDir> struct IsDirected<TBigNet<TNodeData, IsDir> > { enum { Val = 0 }; }; 00196 template <class TNodeData> struct IsDirected<TBigNet<TNodeData, true> > { enum { Val = 1 }; }; 00197 template <class TNodeData, bool IsDir> struct IsNodeDat<TBigNet<TNodeData, IsDir> > { enum { Val = 1 }; }; 00198 } 00199 00200 template <class TNodeData, bool IsDir> 00201 inline void TBigNet<TNodeData, IsDir>::TNodeI::GetInOutNIdV() { 00202 if (NodeHI.IsEnd()) return; 00203 const TNode& N = NodeHI->Dat; 00204 if (! Pool->IsVId(N.InVId)) { 00205 InDeg=0; OutDeg=0; } 00206 else { 00207 InDeg=Pool->GetVLen(N.InVId); 00208 OutDeg=Pool->GetVLen(N.OutVId); 00209 InNIdV=(int *)Pool->GetValVPt(N.InVId); 00210 OutNIdV=(int *)Pool->GetValVPt(N.OutVId); 00211 } 00212 } 00213 00214 template <class TNodeData, bool IsDir> 00215 void TBigNet<TNodeData, IsDir>::TNodeI::Dump() const { 00216 printf("NodeId: %d\n", GetId()); 00217 printf(" I:%4d]", GetInDeg()); 00218 for (int i = 0; i < GetInDeg(); i++) { printf(" %d", GetInNId(i)); } 00219 printf("\n O:%4d]", GetOutDeg()); 00220 for (int i = 0; i < GetOutDeg(); i++) { printf(" %d", GetOutNId(i)); } 00221 printf("\n"); 00222 } 00223 00224 template <class TNodeData, bool IsDir> 00225 void TBigNet<TNodeData, IsDir>::AddSorted(int* Beg, int* End, const int& Val) { 00226 // there is at least one free slot available for the Val 00227 IAssertR(*(End-1)==TInt::Mx, "Edge can not be added: no free space"); 00228 // find position to insert 00229 int Half, Len = int(End-Beg); 00230 while (Len > 0) { 00231 Half = Len/2; 00232 int* Mid=Beg+Half; 00233 if (*Mid < Val) { Beg=Mid+1; Len=Len-Half-1; } else { Len=Half; } } 00234 IAssertR(*Beg != Val, "Value already existis"); 00235 // insert 00236 memmove(Beg+1, Beg, sizeof(int)*(End-Beg-1)); 00237 *Beg = Val; 00238 } 00239 00240 template <class TNodeData, bool IsDir> 00241 const int* TBigNet<TNodeData, IsDir>::BinSearch(const int* Beg, const int* End, const int& Val) { 00242 End--; const int *Mid; 00243 while (Beg <= End) { Mid = Beg+(End-Beg)/2; 00244 if (*Mid == Val) { return Mid; } 00245 else if (Val < *Mid) { End=Mid-1; } else { Beg=Mid+1; } 00246 } 00247 return NULL; 00248 } 00249 00250 template <class TNodeData, bool IsDir> 00251 const int* TBigNet<TNodeData, IsDir>::BinSearch2(const int* Beg, const int* End, const int& Val) { 00252 const int* First = Beg; 00253 const int* Last = End; 00254 const int* Mid; 00255 TSize len = End-Beg, half; 00256 while (len > 0) { 00257 half = len>>1; Mid=First+half; 00258 if (*Mid < Val) { First = Mid; First++; len=len-half-1; } 00259 else { len=half; } 00260 } 00261 return First==Last ? Last-1 : First; 00262 } 00263 00264 template <class TNodeData, bool IsDir> 00265 TBigNet<TNodeData, IsDir>::TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources) : 00266 CRef(), MxNId(0), Flags(), Pool(IsDir ? 2*Edges:Edges, 10000000, true, TInt::Mx), NodeH(Nodes) { 00267 //Flags.Incl(gfNodeGraph); 00268 //Flags.Incl(gfNetwork); 00269 //if (IsDir) { Flags.Incl(gfDirected); } 00270 if (Sources) { 00271 Flags.Incl(gfSources); 00272 IAssertR(! IsDir, "Jure: not clear what happens is graph is Undirected and has only sources."); 00273 } 00274 //DumpFlags(); 00275 } 00276 00277 template <class TNodeData, bool IsDir> 00278 void TBigNet<TNodeData, IsDir>::Save(TSOut& SOut) const { 00279 MxNId.Save(SOut); 00280 Flags.Save(SOut); 00281 Pool.Save(SOut); 00282 NodeH.Save(SOut); 00283 TBool(IsDir).Save(SOut); 00284 } 00285 00286 template <class TNodeData, bool IsDir> 00287 void TBigNet<TNodeData, IsDir>::DumpFlags() const { 00288 for (int i = 1; i <int(gfMx); i++) { 00289 if (HasFlag(TGraphFlag(i))) { printf(" +"); } 00290 else { printf(" -"); } 00291 printf("%s", TSnap::GetFlagStr(TGraphFlag(i)).CStr()); 00292 } 00293 printf("\n"); 00294 } 00295 00296 template <class TNodeData, bool IsDir> 00297 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg) { 00298 CAssert(IsDir); 00299 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00300 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00301 TNode& Node = NodeH.AddDat(NId); 00302 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00303 Node.InVId = Pool.AddEmptyV(InDeg); 00304 Node.OutVId = Pool.AddEmptyV(OutDeg); 00305 return NId; 00306 } 00307 00308 template <class TNodeData, bool IsDir> 00309 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeData& NodeDat) { 00310 CAssert(IsDir); 00311 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00312 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00313 TNode& Node = NodeH.AddDat(NId); 00314 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00315 Node.InVId = Pool.AddEmptyV(InDeg); 00316 Node.OutVId = Pool.AddEmptyV(OutDeg); 00317 Node.Dat = NodeDat; 00318 return NId; 00319 } 00320 00321 template <class TNodeData, bool IsDir> 00322 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg) { 00323 CAssert(! IsDir); 00324 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00325 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00326 TNode& Node = NodeH.AddDat(NId); 00327 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00328 Node.InVId = Pool.AddEmptyV(Deg); 00329 Node.OutVId = Node.InVId; // same vector 00330 return NId; 00331 } 00332 00333 template <class TNodeData, bool IsDir> 00334 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg, const TNodeData& NodeDat) { 00335 CAssert(! IsDir); 00336 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00337 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00338 TNode& Node = NodeH.AddDat(NId); 00339 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00340 Node.InVId = Pool.AddEmptyV(Deg); 00341 Node.OutVId = Node.InVId; // same vector 00342 Node.Dat = NodeDat; 00343 return NId; 00344 } 00345 00346 template <class TNodeData, bool IsDir> 00347 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV) { 00348 CAssert(IsDir); 00349 IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted()); 00350 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00351 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00352 TNode& Node = NodeH.AddDat(NId); 00353 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00354 Node.InVId = Pool.AddV(InNIdV); 00355 Node.OutVId = Pool.AddV(OutNIdV); 00356 return NId; 00357 } 00358 00359 template <class TNodeData, bool IsDir> 00360 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeData& NodeDat) { 00361 CAssert(IsDir); 00362 IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted()); 00363 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00364 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00365 TNode& Node = NodeH.AddDat(NId); 00366 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00367 Node.InVId = Pool.AddV(InNIdV); 00368 Node.OutVId = Pool.AddV(OutNIdV); 00369 Node.Dat = NodeDat; 00370 return NId; 00371 } 00372 00373 template <class TNodeData, bool IsDir> 00374 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV) { 00375 CAssert(! IsDir); 00376 IAssert(EdgeNIdV.IsSorted()); 00377 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00378 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00379 TNode& Node = NodeH.AddDat(NId); 00380 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00381 Node.InVId = Pool.AddV(EdgeNIdV); 00382 Node.OutVId = Node.InVId; 00383 return NId; 00384 } 00385 00386 template <class TNodeData, bool IsDir> 00387 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeData& NodeDat) { 00388 CAssert(! IsDir); 00389 IAssert(EdgeNIdV.IsSorted()); 00390 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00391 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00392 TNode& Node = NodeH.AddDat(NId); 00393 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00394 Node.InVId = Pool.AddV(EdgeNIdV); 00395 Node.OutVId = Node.InVId; 00396 Node.Dat = NodeDat; 00397 return NId; 00398 } 00399 00400 template <class TNodeData, bool IsDir> 00401 void TBigNet<TNodeData, IsDir>::SetInNIdV(int NId, const TIntV& InNIdV) { 00402 TNode Node; EAssert(IsNode(NId, Node)); 00403 TIntV InNodesV; Pool.GetV(Node.InVId, InNodesV); 00404 EAssert(InNIdV.Len() == InNodesV.Len()); 00405 memcpy(InNodesV.BegI(), InNIdV.BegI(), sizeof(TInt)*InNIdV.Len()); 00406 } 00407 00408 template <class TNodeData, bool IsDir> 00409 void TBigNet<TNodeData, IsDir>::SetOutNIdV(int NId, const TIntV& OutNIdV) { 00410 TNode Node; EAssert(IsNode(NId, Node)); 00411 TIntV OutNodesV; Pool.GetV(Node.OutVId, OutNodesV); 00412 EAssert(OutNIdV.Len() == OutNodesV.Len()); 00413 memcpy(OutNodesV.BegI(), OutNIdV.BegI(), sizeof(TInt)*OutNIdV.Len()); 00414 } 00415 00416 template <class TNodeData, bool IsDir> 00417 void TBigNet<TNodeData, IsDir>::GetInNIdV(int NId, TIntV& InNIdV) const { 00418 TNode Node; EAssertR(IsNode(NId, Node), TStr::Fmt("Not node: NId=%d\n", NId).CStr()); 00419 Pool.GetV(Node.InVId, InNIdV); 00420 } 00421 00422 template <class TNodeData, bool IsDir> 00423 void TBigNet<TNodeData, IsDir>::GetOutNIdV(int NId, TIntV& OutNIdV) const { 00424 TNode Node; EAssert(IsNode(NId, Node)); 00425 Pool.GetV(Node.OutVId, OutNIdV); 00426 } 00427 00428 // Node is deleted by setting edge endpoints to point to node id -1 (DelNId) 00429 // No memory is freed 00430 template <class TNodeData, bool IsDir> 00431 int TBigNet<TNodeData, IsDir>::IsolateNode(int NId) { 00432 TIntV OutV; 00433 int NDel = 0; 00434 // out-edges 00435 GetOutNIdV(NId, OutV); 00436 for (int i = 0; i < OutV.Len(); i++) { 00437 if (OutV[i] == DelNId) { break; } // because they are sorted 00438 // fast implementation 00439 const TNode& N = NodeH.GetDat(OutV[i]); 00440 int *InNIdV = (int *) Pool.GetValVPt(N.InVId); 00441 const int Deg = Pool.GetVLen(N.InVId); 00442 int* Val = (int *) BinSearch(InNIdV, InNIdV+Deg, NId); 00443 if (Val == NULL) { 00444 printf("BAD: Can't find: OUT: NId: %d -- OutNbrId: %d\n", NId, OutV[i]()); 00445 continue; 00446 } 00447 IAssert(Val != 0); 00448 memcpy(Val, Val+1, sizeof(int)*int(InNIdV+Deg-Val)); 00449 *(InNIdV+Deg-1) = DelNId; NDel++; 00450 } 00451 OutV.PutAll(DelNId); 00452 // in-edges 00453 if (IsDir) { 00454 TIntV InV; 00455 GetInNIdV(NId, InV); 00456 for (int i = 0; i < InV.Len(); i++) { 00457 if (InV[i] == DelNId) { break; } 00458 // fast implementation 00459 const TNode& N = NodeH.GetDat(InV[i]); 00460 int *OutNIdV = (int *) Pool.GetValVPt(N.OutVId); 00461 const int Deg = Pool.GetVLen(N.OutVId); 00462 int* Val = (int *) BinSearch(OutNIdV, OutNIdV+Deg, NId); 00463 if (Val == NULL) { 00464 printf("IN: NId: %d -- InNbrId: %d\n", NId, OutV[i]()); 00465 continue; 00466 } 00467 IAssert(Val != 0); 00468 memcpy(Val, Val+1, sizeof(int)*int(OutNIdV+Deg-Val)); 00469 *(OutNIdV+Deg-1) = DelNId; NDel++; 00470 } 00471 InV.PutAll(DelNId); 00472 } 00473 return NDel; 00474 } 00475 00476 // set neigbors to point to DelNId, delete node from the node table 00477 template <class TNodeData, bool IsDir> 00478 int TBigNet<TNodeData, IsDir>::DelNode(int NId) { 00479 const int DelEdges = IsolateNode(NId); 00480 NodeH.DelKey(NId); 00481 return DelEdges; 00482 } 00483 00484 template <class TNodeData, bool IsDir> 00485 bool TBigNet<TNodeData, IsDir>::IsIsoNode(const int& NId) const { 00486 if (NId == DelNId) { return true; } 00487 TIntV OutV; 00488 GetOutNIdV(NId, OutV); 00489 if (OutV.Empty() || OutV[0] == DelNId) { return true; } 00490 return false; 00491 } 00492 00493 // the number deleted edges 00494 template <class TNodeData, bool IsDir> 00495 uint TBigNet<TNodeData, IsDir>::GetDelEdges() { 00496 uint DelEdges = 0; 00497 TIntV OutV; 00498 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00499 const int NId = NI.GetId(); 00500 GetOutNIdV(NId, OutV); 00501 for (int i = 0; i < OutV.Len(); i++) { 00502 if (OutV[i] == DelNId) { DelEdges++; } 00503 } 00504 } 00505 return DelEdges; 00506 } 00507 00508 template <class TNodeData, bool IsDir> 00509 void TBigNet<TNodeData, IsDir>::CompactEdgePool() { 00510 Pool.CompactPool(DelNId); 00511 } 00512 00513 template <class TNodeData, bool IsDir> 00514 int TBigNet<TNodeData, IsDir>::AddEdge(const int& SrcNId, const int& DstNId) { 00515 TNode Src; IAssert(IsNode(SrcNId, Src)); 00516 int* OutV = (int *)Pool.GetValVPt(Src.OutVId); 00517 const int OutVLen = Pool.GetVLen(Src.OutVId); 00518 Assert(BinSearch(OutV, OutV+OutVLen, DstNId)==NULL); // no edge yet 00519 AddSorted(OutV, OutV+OutVLen, DstNId); 00520 if (! OnlySources()) { 00521 TNode Dst; IAssert(IsNode(DstNId, Dst)); 00522 int* InV = (int *)Pool.GetValVPt(Dst.InVId); 00523 const int InVLen = Pool.GetVLen(Dst.InVId); 00524 AddSorted(InV, InV+InVLen, SrcNId); 00525 } 00526 return -1; // edge id 00527 } 00528 00529 template <class TNodeData, bool IsDir> 00530 bool TBigNet<TNodeData, IsDir>::IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir) const { 00531 TNode Src, Dst; 00532 if (! IsNode(SrcNId, Src)) { return false; } // no source node 00533 int* OutV1 = (int *)Pool.GetValVPt(Src.OutVId); 00534 const bool IsEdge = BinSearch(OutV1, OutV1+Pool.GetVLen(Src.OutVId), DstNId) != NULL; 00535 if (IsEdge && OnlySources()) { return true; } 00536 const bool IsDstNode = IsNode(DstNId, Dst); 00537 if (! IsDstNode) { return false; } // no destination node 00538 else if (IsEdge) { return true; } // destination and link found 00539 else if (! Dir) { // check for the undirected version of the edge 00540 int* OutV2 = (int *)Pool.GetValVPt(Dst.OutVId); 00541 return BinSearch(OutV2, OutV2+Pool.GetVLen(Dst.OutVId), SrcNId) != NULL; } 00542 else { return false; } 00543 } 00544 00545 template <class TNodeData, bool IsDir> 00546 void TBigNet<TNodeData, IsDir>::GetNIdV(TIntV& NIdV) const { 00547 NIdV.Reserve(GetNodes(), 0); 00548 for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) { 00549 NIdV.Add(I->Key); } 00550 } 00551 00552 template <class TNodeData, bool IsDir> 00553 void TBigNet<TNodeData, IsDir>::SortEdgeV() { 00554 printf("Sorting Edges... "); 00555 TExeTm ExeTm; 00556 TIntV OutV, InV; 00557 int cnt = 0; 00558 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00559 const int NId = NI.GetId(); 00560 GetOutNIdV(NId, OutV); 00561 OutV.Sort(); 00562 if (IsDir) { 00563 GetInNIdV(NId, InV); 00564 InV.Sort(); 00565 } 00566 if (++cnt % Mega(1) == 0) { printf("\r sort:%dm %s", cnt/Mega(1), ExeTm.GetStr()); } 00567 } 00568 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00569 const int NId = NI.GetId(); 00570 GetOutNIdV(NId, OutV); 00571 IAssert(OutV.IsSorted()); 00572 GetInNIdV(NId, InV); 00573 IAssert(InV.IsSorted()); 00574 if (++cnt % Mega(1) == 0) { printf("\r check sorted:%dm %s", cnt/Mega(1), ExeTm.GetStr()); } 00575 } 00576 printf("done. [%s]\n", ExeTm.GetStr()); 00577 } 00578 00579 // add missing nodes and in-links from a network of sources 00580 template <class TNodeData, bool IsDir> 00581 void TBigNet<TNodeData, IsDir>::InvertFromSources(uint ExpectNodes) { 00582 typedef THash<TInt, TInt> TDegHash; 00583 typedef int* TIntPt; 00584 if (ExpectNodes == 0) ExpectNodes=4*GetNodes(); 00585 printf("\nInverting BigNet: reserved for %s nodes\n", TInt::GetMegaStr(ExpectNodes).CStr()); 00586 CAssert(IsDir); 00587 IAssert(OnlySources()); 00588 TDegHash InDegH(ExpectNodes); 00589 TSize NDest=0; 00590 // get node in-degree 00591 uint c=0; TExeTm ExeTm; 00592 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00593 for (int e = 0; e < NI.GetOutDeg(); e++) { 00594 InDegH.AddDat(NI.GetOutNId(e)) += 1; } 00595 if (c%100000==0) printf("\r%s h:%d [%g] ", TInt::GetMegaStr(c).CStr(), InDegH.Len(), ExeTm.GetSecs()); 00596 } 00597 printf("\n Resizing NodePool: %lld -> %lld\n", uint64(Pool.Reserved()), uint64(GetEdges())); 00598 if (2*GetEdges() > Pool.Reserved()) { 00599 Pool.Reserve(2*GetEdges()); } 00600 // add nodes 00601 printf("NodeH: %s nodes, InDeg: %s nodes, Reserve: %s\n", TInt::GetMegaStr(NodeH.Len()).CStr(), 00602 TInt::GetMegaStr(InDegH.Len()).CStr(), TInt::GetMegaStr(ExpectNodes).CStr()); 00603 NodeH.Reserve(ExpectNodes); 00604 for (TDegHash::TIter I = InDegH.BegI(); I < InDegH.EndI(); I++) { 00605 const int NId = I->Key, InDeg = I->Dat; 00606 if (! IsNode(NId)) { 00607 AddNode(NId, InDeg, 0); NDest++; } // new destination node, no out-links 00608 else { 00609 TNode& Node = GetNode(NId); 00610 IAssert(Node.InVId == 0); // no in-links 00611 Node.InVId = Pool.AddEmptyV(InDeg); } 00612 } 00613 InDegH.Clr(true); 00614 printf("Added: %lld destination nodes\n", uint64(NDest)); 00615 printf("Graph nodes: %lld nodes\n", uint64(GetNodes())); 00616 // pointer to in-links vector 00617 THash<TInt, int*> NIdToPtH(GetNodes()); 00618 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) 00619 NIdToPtH.AddDat(NI.GetId(), (int *)Pool.GetValVPt(NI.GetInVId())); 00620 // add in-edges 00621 printf("Adding edges...\n"); 00622 c = 0; 00623 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00624 const int SrcNId = NI.GetId(); 00625 for (int e = 0; e < NI.GetOutDeg(); e++) { 00626 TIntPt& InV = NIdToPtH.GetDat(NI.GetOutNId(e)); 00627 IAssert(*InV == TInt::Mx); 00628 *InV = SrcNId; InV++; 00629 } 00630 if (c%100000==0) printf("\r%s [%g] ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs()); 00631 } 00632 // sort in-links 00633 printf("\nSorting in-links...\n"); 00634 TIntV InNIdV; c = 0; 00635 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00636 Pool.GetV(NI.GetInVId(), InNIdV); 00637 InNIdV.Sort(); 00638 if (c%100000==0) printf("\r%s [%g] ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs()); 00639 } 00640 printf("\r...done [%g]\n", ExeTm.GetSecs()); 00641 Flags.Excl(gfSources); 00642 } 00643 00644 template <class TNodeData, bool IsDir> 00645 void TBigNet<TNodeData, IsDir>::Rewire(TRnd& Rnd) { 00646 uint64 NDup=0, NResolve=0, NAddit=0, cnt = 0; 00647 TExeTm ExeTm; 00648 IAssertR(! IsDir, "Only undirected networks are supported"); 00649 printf("Rewiring the network... 1"); 00650 Pool.ShuffleAll(Rnd); printf("[%s]\n", ExeTm.GetStr()); 00651 //Pool.ShuffleAll(Rnd); printf(" done [%s]\n", ExeTm.GetStr()); 00652 printf(" sorting edges...\n"); 00653 SortEdgeV(); // sort individual edge vectors 00654 printf(" done [%s]\n", ExeTm.GetStr()); 00655 // remove duplicates and self edges 00656 printf(" removing duplicates...\n"); 00657 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) { 00658 const int VId = NI.GetOutVId(); 00659 int Len = Pool.GetVLen(VId); 00660 int* V = (int *)Pool.GetValVPt(VId); 00661 for (int i = 0; i < Len-1 && V[i]!=DelNId; i++) { 00662 if (V[i] == V[i+1] || V[i]==NI.GetId()) { 00663 memcpy(V+i, V+i+1, sizeof(int)*(Len-i-1)); i--; 00664 V[Len-1] = DelNId; NDup++; 00665 } 00666 } 00667 if (Len > 0 && V[Len-1]==NI.GetId()) { V[Len-1] = DelNId; NDup++; } 00668 if (cnt % Mega(10) == 0) { printf("."); fflush(stdout); } 00669 } 00670 printf("\n %llu duplicate edges removed [%s]\n", NDup, ExeTm.GetStr()); 00671 if (OnlySources()) { return; } 00672 // resolve one way edges 00673 printf(" resolving one way edges...\n"); cnt=0; fflush(stdout); 00674 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) { 00675 for (int e = 0; e < NI.GetOutDeg(); e++) { 00676 if (NI.GetOutNId(e) == DelNId) { continue; } // skip deleted nodes 00677 const int InVId = GetNode(NI.GetOutNId(e)).InVId; 00678 const int InVLen = Pool.GetVLen(InVId); IAssert(InVLen>=0 && InVLen < Mega(3)); 00679 int* InV = (int *) Pool.GetValVPt(InVId); 00680 int* Val = (int *) BinSearch2(InV, InV+InVLen, NI.GetId()); 00681 if (*Val == NI.GetId()) { continue; } // points back, everything is ok 00682 if (InVLen>0 && InV[InVLen-1] == DelNId) { // there is space at the end, insert 00683 IAssert((InVLen-(Val-InV)-1) >= 0); 00684 memmove(Val+1, Val, sizeof(int)*(InVLen-(Val-InV)-1)); 00685 *Val = NI.GetId(); 00686 } else { // the other end could point at us but it doesn't 00687 memmove(NI.OutNIdV+e, NI.OutNIdV+e+1, sizeof(int)*(NI.GetOutDeg()-e-1)); 00688 NI.OutNIdV[NI.GetOutDeg()-1]=DelNId; e--; 00689 } 00690 NResolve++; 00691 } 00692 if (cnt % Mega(10) == 0) { printf("."); fflush(stdout); } 00693 } 00694 printf("\n %llu resolved edges [%s]\n", NResolve, ExeTm.GetStr()); 00695 // check if there are two nodes that still have space and are not yet connected and connect them 00696 printf(" filling empty edge slots...\n"); 00697 TIntPrV SlotNIdV; 00698 THash<TInt, TInt> SlotNIdH; 00699 int CumSlots=0; 00700 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00701 if (NI.GetOutNId(NI.GetOutDeg()-1) == DelNId) { 00702 int NSlots = 0; 00703 for (int s = NI.GetOutDeg()-1; s >= 0; s--) { 00704 if (NI.GetOutNId(s) == DelNId) { NSlots++; } 00705 else { break; } 00706 } 00707 SlotNIdV.Add(TIntPr(NI.GetId(), NSlots)); 00708 SlotNIdH.AddDat(NI.GetId(), NSlots); 00709 CumSlots+=NSlots; 00710 } 00711 } 00712 printf(" %d nodes, with %d spokes available, %d edges\n", SlotNIdH.Len(), CumSlots, CumSlots/2); 00713 TIntV NIdV; SlotNIdH.GetKeyV(NIdV); 00714 for (int ni1 = 0; ni1 < NIdV.Len(); ni1++) { 00715 const int nid = NIdV[ni1]; 00716 if (! SlotNIdH.IsKey(nid) || SlotNIdH.GetDat(nid) == 0) { continue; } 00717 const int NI1Slots = SlotNIdH.GetDat(nid); 00718 if ((SlotNIdH.GetMxKeyIds() - SlotNIdH.Len())/double(SlotNIdH.GetMxKeyIds()) > 0.5) { SlotNIdH.Defrag(); } 00719 TNodeI NI = GetNI(nid); 00720 for (int NTries = 0; NTries < 4*NI1Slots && NI.GetOutNId(NI.GetOutDeg()-1) == DelNId; NTries++) { 00721 const int nid2 = SlotNIdH.GetKey(SlotNIdH.GetRndKeyId(Rnd)); 00722 if (nid == nid2) { continue; } 00723 TNodeI NI2 = GetNI(nid2); 00724 // insert the edge 00725 if (NI2.GetOutNId(NI2.GetOutDeg()-1)==DelNId && ! NI2.IsInNId(NI.GetId())) { 00726 int *V1 = (int *) BinSearch2(NI.OutNIdV, NI.OutNIdV+NI.GetOutDeg(), NI2.GetId()); 00727 int *V2 = (int *) BinSearch2(NI2.InNIdV, NI2.InNIdV+NI2.GetInDeg(), NI.GetId()); 00728 if (*V1 != DelNId) { memmove(V1+1, V1, sizeof(int)*(NI.GetOutDeg()-(V1-NI.OutNIdV)-1)); } 00729 if (*V2 != DelNId) { memmove(V2+1, V2, sizeof(int)*(NI2.GetInDeg()-(V2-NI2.InNIdV)-1)); } 00730 *V1 = NI2.GetId(); *V2 = NI.GetId(); 00731 NAddit++; 00732 SlotNIdH.GetDat(nid)--; SlotNIdH.GetDat(nid2)--; 00733 } 00734 if (SlotNIdH.GetDat(nid2) == 0) { SlotNIdH.DelKey(nid2); continue; } 00735 if (SlotNIdH.GetDat(nid) == 0) { SlotNIdH.DelKey(nid); break; } 00736 } 00737 if (ni1 % Mega(1) == 0) { printf("."); fflush(stdout); } 00738 } 00739 printf(" %llu edges added.\nDONE. TOTAL REWIRE TIME [%s]\n\n", NAddit, ExeTm.GetStr()); 00740 } 00741 00742 template <class TNodeData, bool IsDir> 00743 PNGraph TBigNet<TNodeData, IsDir>::GetNGraph(const bool& RenumberNodes) const { 00744 IAssert(RenumberNodes == false); 00745 PNGraph Graph = TNGraph::New(); 00746 Graph->Reserve(GetNodes(), 0); 00747 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00748 Graph->AddNode(NI.GetId(), Pool, NI.GetInVId(), NI.GetOutVId()); 00749 } 00750 return Graph; 00751 } 00752 00753 template <class TNodeData, bool IsDir> 00754 PNGraph TBigNet<TNodeData, IsDir>::GetSubNGraph(const TIntV& NIdV) const { 00755 PNGraph Graph = TNGraph::New(NIdV.Len(), 0); 00756 // add nodes 00757 for (int i = 0; i < NIdV.Len(); i++) { 00758 Graph->AddNode(NIdV[i]); } 00759 // reserve node in- and out-degree 00760 for (int i = 0; i < NIdV.Len(); i++) { 00761 const int SrcNId = NIdV[i]; 00762 const TNodeI NI = GetNI(SrcNId); 00763 int InDeg=0, OutDeg=0; 00764 for (int e = 0; e < NI.GetInDeg(); e++) { if (Graph->IsNode(NI.GetInNId(e))) InDeg++; } 00765 for (int e = 0; e < NI.GetOutDeg(); e++) { if (Graph->IsNode(NI.GetOutNId(e))) OutDeg++; } 00766 Graph->ReserveNIdInDeg(SrcNId, InDeg); 00767 Graph->ReserveNIdOutDeg(SrcNId, OutDeg); 00768 } 00769 // add edges 00770 for (int i = 0; i < NIdV.Len(); i++) { 00771 const int SrcNId = NIdV[i]; 00772 const TNodeI NI = GetNI(SrcNId); 00773 for (int e = 0; e < NI.GetOutDeg(); e++) { 00774 const int DstNId = NI.GetOutNId(e); 00775 if (Graph->IsNode(DstNId)) { 00776 Graph->AddEdge(SrcNId, DstNId); } 00777 } 00778 } 00779 return Graph; 00780 } 00781 00782 template <class TNodeData, bool IsDir> 00783 TPt<TBigNet<TNodeData, IsDir> > TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes) const { 00784 const bool isDir = IsDir, onlySources = HasFlag(gfSources); 00785 TSize Edges=0; 00786 // find in- and out-degree 00787 TSparseHash<TInt, TIntPr> NIdDegH(NIdV.Len()); 00788 for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); } 00789 for (int i = 0; i < NIdV.Len(); i++) { 00790 const TNodeI NI = GetNI(NIdV[i]); 00791 int InDeg=0, OutDeg=0; 00792 for (int n = 0; n < NI.GetOutDeg(); n++) { 00793 if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; } 00794 if (! onlySources && isDir) { 00795 for (int n = 0; n < NI.GetInDeg(); n++) { 00796 if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; } 00797 } 00798 Edges += OutDeg; 00799 NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg); 00800 } 00801 // make network 00802 typedef TBigNet<TNodeData, IsDir> TBNet; 00803 TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected)); 00804 TBNet& NewNet = *NewNetPt; 00805 NewNet.Flags = Flags; 00806 // add nodes 00807 if (isDir || onlySources) { 00808 for (int i = 0; i < NIdV.Len(); i++) { 00809 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00810 NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00811 } else { 00812 for (int i = 0; i < NIdV.Len(); i++) { 00813 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00814 NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00815 } 00816 // add edges 00817 for (int i = 0; i < NIdV.Len(); i++) { 00818 int NId = NIdV[i]; 00819 const TNodeI NI = GetNI(NId); 00820 int *NIdVPt = (int *) NewNet.GetOutNIdVPt(NId); 00821 int deg = 0; 00822 for (int e = 0; e < NI.GetOutDeg(); e++) { 00823 const int Dst = NI.GetOutNId(e); 00824 if (NewNet.IsNode(Dst)) { 00825 *NIdVPt = Dst; NIdVPt++; deg++; } 00826 } 00827 EAssert(deg == NIdDegH.GetDat(NId).Val2); 00828 if (isDir && ! onlySources) { 00829 EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0) 00830 || (NI.GetInVId() != NI.GetOutVId())); 00831 int * NIdVPt = (int *) NewNet.GetInNIdVPt(NId); 00832 int deg = 0; 00833 for (int e = 0; e < NI.GetInDeg(); e++) { 00834 const int Dst = NI.GetInNId(e); 00835 if (NewNet.IsNode(Dst)) { 00836 *NIdVPt = Dst; NIdVPt++; deg++; } 00837 } 00838 EAssert(deg == NIdDegH.GetDat(NId).Val1); 00839 } 00840 } 00841 return NewNetPt; 00842 } 00843 00844 template <class TNodeData, bool IsDir> 00845 void TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, TBigNet* NewNetPt, const bool& RenumberNodes) const { 00846 printf("Set subgraph on %d nodes\n", NIdV.Len()); TExeTm ExeTm; 00847 const bool isDir = IsDir, onlySources = HasFlag(gfSources); 00848 TSize Edges=0; 00849 // find in- and out-degree 00850 THash<TInt, TIntPr> NIdDegH(NIdV.Len()); 00851 for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); } 00852 for (int i = 0; i < NIdV.Len(); i++) { 00853 const TNodeI NI = GetNI(NIdV[i]); 00854 int InDeg=0, OutDeg=0; 00855 for (int n = 0; n < NI.GetOutDeg(); n++) { 00856 if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; } 00857 if (! onlySources && isDir) { 00858 for (int n = 0; n < NI.GetInDeg(); n++) { 00859 if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; } 00860 } 00861 Edges += OutDeg; 00862 NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg); 00863 } 00864 // make network 00865 //typedef TBigNet<TNodeData, IsDir> TBNet; 00866 //TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected)); 00867 NewNetPt->Reserve(NIdV.Len(), Edges); 00868 TBigNet<TNodeData, IsDir>& NewNet = *NewNetPt; 00869 NewNet.Flags = Flags; 00870 TIntSet NIdMap; 00871 // add nodes 00872 if (! RenumberNodes) { 00873 if (isDir || onlySources) { 00874 for (int i = 0; i < NIdV.Len(); i++) { 00875 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00876 NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00877 } else { 00878 for (int i = 0; i < NIdV.Len(); i++) { 00879 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00880 NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00881 } 00882 } else { // renumber nodes 00883 NIdMap.Gen(NIdV.Len()); 00884 for (int i = 0; i < NIdV.Len(); i++) { NIdMap.AddKey(NIdV[i]); } 00885 if (isDir || onlySources) { 00886 for (int i = 0; i < NIdV.Len(); i++) { 00887 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00888 NewNet.AddNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00889 } else { 00890 for (int i = 0; i < NIdV.Len(); i++) { 00891 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00892 NewNet.AddUndirNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00893 } 00894 } 00895 // add edges 00896 for (int i = 0; i < NIdV.Len(); i++) { 00897 int NId = NIdV[i]; 00898 const TNodeI NI = GetNI(NId); 00899 int *NIdVPt = (int *) NewNet.GetOutNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId); 00900 int deg = 0; 00901 for (int e = 0; e < NI.GetOutDeg(); e++) { 00902 const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetOutNId(e)):NI.GetOutNId(e); 00903 if (NewNet.IsNode(Dst)) { 00904 *NIdVPt = Dst; NIdVPt++; deg++; } 00905 } 00906 EAssert(deg == NIdDegH.GetDat(NId).Val2); 00907 if (isDir && ! onlySources) { 00908 EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0) 00909 || (NI.GetInVId() != NI.GetOutVId())); 00910 int * NIdVPt = (int *) NewNet.GetInNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId); 00911 int deg = 0; 00912 for (int e = 0; e < NI.GetInDeg(); e++) { 00913 const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetInNId(e)):NI.GetInNId(e); 00914 if (NewNet.IsNode(Dst)) { 00915 *NIdVPt = Dst; NIdVPt++; deg++; } 00916 } 00917 EAssert(deg == NIdDegH.GetDat(NId).Val1); 00918 } 00919 } 00920 printf(" %u edges [%s]\n", uint(Edges), ExeTm.GetStr()); 00921 } 00922 00923 template <class TNodeData, bool IsDir> 00924 void TBigNet<TNodeData, IsDir>::Reserve(const int& Nodes, const TSize& Edges) { 00925 NodeH.Gen(TMath::Mx(int(1.1*Nodes), 100)); 00926 Pool = TPool(TMath::Mx(Edges, (TSize) Mega(1)), Mega(100), true); 00927 } 00928 00929 // check that in- and out-edges matchs, the table is sorted and so on 00930 template <class TNodeData, bool IsDir> 00931 bool TBigNet<TNodeData, IsDir>::IsOk() const { 00932 // check the node pool 00933 TIntV ValV; TExeTm ExeTm; 00934 printf("Is ok network:\n Check Vec..."); 00935 for (uint id = 1; id < Pool.GetVecs(); id++) { 00936 Pool.GetV(id, ValV); 00937 if (! ValV.Empty()) { 00938 EAssert((0<=ValV[0] && ValV[0]<MxNId) || ValV[0]==DelNId); 00939 try { 00940 for (int i = 1; i < ValV.Len(); i++) { 00941 //if (ValV[i]==DelNId) { continue; } 00942 // sorted & no duplicates & no empty values (can have deleted nodes) 00943 EAssertR((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId), 00944 TStr::Fmt("NId1: %d NId2:%d", ValV[i-1](),ValV[i]())); 00945 EAssertR((0<=ValV[i] && ValV[i]<MxNId) || ValV[i]==DelNId, 00946 TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); 00947 if (! OnlySources()) { 00948 EAssertR(IsNode(ValV[i]) || ValV[i]==DelNId, 00949 TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); } // every link is a node 00950 } 00951 } catch (PExcept Except){ 00952 printf(" %s\n", Except->GetStr().CStr()); 00953 printf(" vec id: %d, lenght: %d\n", id, ValV.Len()); 00954 for (int i = 1; i < ValV.Len(); i++) { 00955 printf(" %d", ValV[i]()); 00956 if (!((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId))) { printf("*"); } 00957 } 00958 printf("\n"); 00959 return false; 00960 } 00961 } 00962 if (id % 10000 == 0) { 00963 printf("\r %dk / %dk [%s]", id/1000, Pool.GetVecs()/1000, ExeTm.GetStr()); } 00964 } 00965 printf("[%s]\n Check Edges...\n", ExeTm.GetStr()); 00966 // check edges 00967 int ErrCnt = 0; 00968 if (! OnlySources()) { 00969 int Cnt=0; 00970 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, Cnt++) { 00971 // nodes that point to NI, have it on out-list 00972 for (int e = 0; e < NI.GetInDeg(); e++) { 00973 if (NI.GetInNId(e) == DelNId) { continue; } // skip deleted nodes 00974 if (! IsEdge(NI.GetInNId(e), NI.GetId())) { 00975 printf("\nno edge: %d --> %d", NI.GetInNId(e), NI.GetId()); 00976 printf("NId: %d deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()); 00977 for (int i = 0; i < NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); } 00978 TNodeI NI2 = GetNI(NI.GetInNId(e)); 00979 printf("\nNId2: %d deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg()); 00980 for (int j = 0; j < NI2.GetOutDeg(); j++) { printf(" %d", NI2.GetOutNId(j)); } 00981 printf("\n"); 00982 ErrCnt++; 00983 } 00984 //EAssertR(IsEdge(NI.GetInNId(e), NI.GetId()), 00985 // TStr::Fmt("no edge: %d --> %d", NI.GetInNId(e), NI.GetId())); 00986 } 00987 // nodes NI points to, have it on in-list 00988 for (int e = 0; e < NI.GetOutDeg(); e++) { 00989 if (NI.GetOutNId(e) == DelNId) { continue; } 00990 const int InVId = GetNode(NI.GetOutNId(e)).InVId; 00991 int* DstInV = (int *)Pool.GetValVPt(InVId); 00992 if (BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) == NULL) { 00993 printf("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e)); 00994 printf("NId: %d deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()); 00995 for (int i = 0; i < NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); } 00996 TNodeI NI2 = GetNI(NI.GetOutNId(e)); 00997 printf("\nNId2: %d deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg()); 00998 for (int j = 0; j < NI2.GetInDeg(); j++) { printf(" %d", NI2.GetInNId(j)); } 00999 printf("\n"); ErrCnt++; 01000 } 01001 //EAssertR(BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) != NULL, 01002 // TStr::Fmt("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e))); 01003 } 01004 if (ErrCnt > 5) { FailR("error count too large!"); } 01005 if (Cnt % 100000 == 0) { 01006 printf("\r%s [%s]", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); } 01007 } 01008 printf("\r%s [%s]\n", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); 01009 } 01010 return true; 01011 } 01012 01013 template <class TNodeData, bool IsDir> 01014 void TBigNet<TNodeData, IsDir>::Dump(const TStr& Desc) const { 01015 if (! Desc.Empty()) { printf("\n%s (%d, %u):\n", Desc.CStr(), GetNodes(), GetEdges()); } 01016 else { printf("\nNodes: %d, Edges: %u\n", GetNodes(), GetEdges()); } 01017 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 01018 printf("%d]\n IN %d]", NI.GetId(), NI.GetInDeg()); 01019 for (int i=0; i<NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); } 01020 if (IsDir) { 01021 printf("\n OUT %d]", NI.GetOutDeg()); 01022 for (int i=0; i<NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); } 01023 } 01024 printf("\n"); 01025 } 01026 } 01027 01028 // header: <Nodes, Edges, IsDir> 01029 // format: undirected <NId, Dat, OutDeg, OutNodeV> 01030 // format: directed <NId, Dat, OutDeg, OutNodeV, InDeg, InNodeV> 01031 template <class TNodeData, bool IsDir> 01032 void TBigNet<TNodeData, IsDir>::SaveForDisk(const TStr& OutFNm) const { 01033 const bool IsDirected = IsDir; 01034 TFOut FOut(OutFNm); 01035 FOut.Save(GetNodes()); 01036 FOut.Save(GetEdges()); 01037 FOut.Save(IsDirected); 01038 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 01039 FOut.Save(NI.GetId()); 01040 NI.GetDat().Save(FOut); 01041 FOut.Save(NI.GetOutDeg()); 01042 for (int i = 0; i < NI.GetOutDeg(); i++) { 01043 FOut.Save(NI.GetOutNId(i)); } 01044 if (IsDirected) { 01045 FOut.Save(NI.GetInDeg()); 01046 for (int i = 0; i < NI.GetInDeg(); i++) { 01047 FOut.Save(NI.GetInNId(i)); } 01048 } 01049 } 01050 } 01051 01052 // skip the edge pool and only load the node data hash table 01053 template <class TNodeData, bool IsDir> 01054 void TBigNet<TNodeData, IsDir>::LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH) { 01055 TFIn SIn(InFNm); 01056 TInt MxNId(SIn); 01057 TB32Set Flags(SIn); 01058 printf("skipping Pool...\n"); 01059 TBool FastCopy(SIn); 01060 uint64 _GrowBy, _MxVals, _Vals; 01061 SIn.Load(_GrowBy); SIn.Load(_MxVals); SIn.Load(_Vals); 01062 TInt EmptyVal(SIn); 01063 int Tmp; 01064 for (TSize ValN = 0; ValN < _Vals; ValN++) { SIn.Load(Tmp); } 01065 TInt MxVals(SIn), Vals(SIn); 01066 uint64 Offset; 01067 for (int ValN = 0; ValN < Vals; ValN++) { SIn.Load(Offset); } 01068 printf("loading Hode hash table...\n"); 01069 NodeH.Load(SIn); 01070 } 01071 01072 // Save the network without loading it. Save the node hash table as THash or TSparseHash 01073 template <class TNodeData, bool IsDir> 01074 void TBigNet<TNodeData, IsDir>::SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash) { 01075 TFIn FIn(InFNm); 01076 TFOut FOut(OutFNm); 01077 { TInt MxNId(FIn); MxNId.Save(FOut); 01078 TB32Set Flags(FIn); Flags.Save(FOut); 01079 TPool Pool(FIn); Pool.Save(FOut); } 01080 { TNodeH NodeH(FIn); 01081 if (! SaveSparseHash) { 01082 THash<TInt, TNode> NewH(NodeH.Len(), true); 01083 for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) { 01084 NewH.AddDat(I->Key, I->Dat); 01085 } 01086 NewH.Save(FOut); 01087 } else { 01088 FailR("Not implemented"); 01089 } } 01090 }