SNAP Library 2.1, Developer Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
bignet.h
Go to the documentation of this file.
00001 //#//////////////////////////////////////////////
00003 
00010 template <class TNodeData, bool IsDir>
00011 class TBigNet {
00012 public:
00013   typedef TNodeData TNodeDat;
00014   typedef TBigNet<TNodeData, IsDir> TNet;
00015   typedef TPt<TNet> PNet;
00016 public:
00017   class TNode;
00018   typedef THash<TInt, TNode> TNodeH; // use SaveToDisk to convert between the two hash table types
00019   typedef TPt<TBigNet<TNodeData, IsDir> > PBigNet;
00020   typedef TVecPool<TInt> TVPool;
00021   typedef TPt<TVPool> PVPool;
00022 
00024 
00026   class TNode {
00027   public:
00029     TInt InVId;
00031 
00033     TInt OutVId; 
00035     TNodeDat Dat;
00036   public:
00037     TNode() : InVId(-1), OutVId(-1), Dat() { }
00038     TNode(const int& InVecId, const int& OutVecId) : InVId(InVecId), OutVId(OutVecId), Dat() { }
00039     TNode(const int& InVecId, const int& OutVecId, const TNodeDat& NodeDat) :
00040       InVId(InVecId), OutVId(OutVecId), Dat(NodeDat) { }
00041     TNode(const TNode& Node) : InVId(Node.InVId), OutVId(Node.OutVId), Dat(Node.Dat) { }
00042     TNode(TSIn& SIn) : InVId(SIn), OutVId(SIn), Dat(SIn) { }
00043     void Save(TSOut& SOut) const { SOut.Save(InVId); SOut.Save(OutVId); Dat.Save(SOut); }
00045     bool IsUnused() const { return InVId==-1 && OutVId==-1; }
00046   };
00048 
00050   class TNodeI {
00051   protected:
00052     typedef typename TNodeH::TIter THashIter;
00053     THashIter NodeHI;
00054     TVPool *Pool;
00055     int InDeg, OutDeg, *InNIdV, *OutNIdV; // if undirected, InNIdV==OutNIdV
00056   public:
00057     inline void GetInOutNIdV();
00058     int GetInVId() const { return NodeHI->Dat.InVId; }
00059     int GetOutVId() const { return NodeHI->Dat.OutVId; }
00060   public:
00061     TNodeI() : NodeHI(), Pool(NULL), InDeg(0), OutDeg(0), InNIdV(NULL), OutNIdV(NULL) { }
00062     TNodeI(const THashIter& NodeHIter, TVPool *PoolPt) : NodeHI(NodeHIter), Pool(PoolPt) { GetInOutNIdV(); }
00063     TNodeI(const TNodeI& NodeI) : NodeHI(NodeI.NodeHI), Pool(NodeI.Pool) { GetInOutNIdV(); }
00064     TNodeI& operator = (const TNodeI& NI) { NodeHI=NI.NodeHI; Pool=NI.Pool; GetInOutNIdV(); return *this; }
00066     TNodeI& operator++ (int) { NodeHI++; GetInOutNIdV(); return *this; }
00067     bool operator < (const TNodeI& NI) const { return NodeHI < NI.NodeHI; }
00068     bool operator == (const TNodeI& NI) const { return NodeHI == NI.NodeHI; }
00069     int GetId() const { return NodeHI->Key(); }
00070     int GetDeg() const { return GetInDeg()+(InNIdV!=OutNIdV?GetOutDeg():0); }
00071     int GetInDeg() const { return InDeg; }
00072     int GetOutDeg() const { return OutDeg; }
00073     int GetInNId(const int& NodeN) const { return InNIdV[NodeN]; }
00074     int GetOutNId(const int& NodeN) const { return OutNIdV[NodeN]; }
00075     int GetOutNbrId(const int& NodeN) const { return NodeN<OutDeg ? OutNIdV[NodeN]:InNIdV[NodeN-OutDeg]; }
00076     bool IsInNId(const int& NId) const { return BinSearch(InNIdV, InNIdV+InDeg, NId)!=NULL; }
00077     bool IsOutNId(const int& NId) const { return BinSearch(OutNIdV, OutNIdV+OutDeg, NId)!=NULL; }
00078     bool IsNbrNId(const int& NId) const { return IsOutNId(NId) || IsInNId(NId); }
00079     const TNodeData& operator () () const { return GetDat(); }
00080     TNodeData& operator () () { return GetDat(); }
00081     const TNodeData& GetDat() const { return NodeHI->Dat.Dat; }
00082     TNodeData& GetDat() { return NodeHI->Dat.Dat; }
00083     // requires pointer back to the network (see as in TNodeNet)
00084     //const TNodeData& GetInNDat(const int& NodeN) const { return Net->GetNDat(GetInNId(NodeN)); }
00085     //TNodeData& GetInNDat(const int& NodeN) { return Net->GetNDat(GetInNId(NodeN)); }
00086     //const TNodeData& GetOutNDat(const int& NodeN) const { return Net->GetNDat(GetOutNId(NodeN)); }
00087     //TNodeData& GetOutNDat(const int& NodeN) { return Net->GetNDat(GetOutNId(NodeN)); }
00088     //const TNodeData& GetNbrNDat(const int& NodeN) const { return Net->GetNDat(GetNbrNId(NodeN)); }
00089     //TNodeData& GetNbrNDat(const int& NodeN) { return Net->GetNDat(GetNbrNId(NodeN)); }
00090     void Dump() const;
00091     friend class TBigNet<TNodeData, IsDir>;
00092   };
00093 
00095 
00097   class TEdgeI {
00098   private:
00099     TNodeI CurNode, EndNode;
00100     int CurEdge;
00101   public:
00102     TEdgeI() : CurNode(), EndNode(), CurEdge(0) { }
00103     TEdgeI(const TNodeI& NodeI, const TNodeI& EndNodeI, const int& EdgeN=0) : CurNode(NodeI), EndNode(EndNodeI), CurEdge(0) { }
00104     TEdgeI(const TEdgeI& EdgeI) : CurNode(EdgeI.CurNode), EndNode(EdgeI.EndNode), CurEdge(EdgeI.CurEdge) { }
00105     TEdgeI& operator = (const TEdgeI& EdgeI) { if (this!=&EdgeI) { CurNode=EdgeI.CurNode;  EndNode=EdgeI.EndNode;  CurEdge=EdgeI.CurEdge; }  return *this; }
00106     TEdgeI& operator++ (int) { CurEdge++; if (CurEdge >= CurNode.GetOutDeg()) { CurEdge=0;  CurNode++;
00107       while (CurNode < EndNode && CurNode.GetOutDeg()==0) { CurNode++; } }  return *this; }
00108     bool operator < (const TEdgeI& EdgeI) const { return CurNode<EdgeI.CurNode || (CurNode==EdgeI.CurNode && CurEdge<EdgeI.CurEdge); }
00109     bool operator == (const TEdgeI& EdgeI) const { return CurNode == EdgeI.CurNode && CurEdge == EdgeI.CurEdge; }
00110     int GetId() const { return -1; }
00111     int GetSrcNId() const { return CurNode.GetId(); }
00112     int GetDstNId() const { return CurNode.GetOutNId(CurEdge); }
00113     const TNodeData& GetSrcNDat() const { return CurNode.GetNDat(); }
00114     TNodeData& GetDstNDat() { return CurNode.GetOutNDat(CurEdge); }
00115     friend class TNodeNet<TNodeData>;
00116   };
00117 protected:
00118   bool IsNode(const int& NId, TNode& Node) const { return NodeH.IsKeyGetDat(NId, Node); }
00119   int* GetInNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).InVId); }
00120   int* GetOutNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).OutVId); }
00121   static void AddSorted(int* Beg, int* End, const int& Val);
00122   static const int* BinSearch(const int* Beg, const int* End, const int& Val);
00123   static const int* BinSearch2(const int* Beg, const int* End, const int& Val);
00124   const TNode& GetNode(const int& NId) const { return NodeH.GetDat(NId); }
00125   TNode& GetNode(const int& NId) { return NodeH.GetDat(NId); }
00126 public:
00127   enum { DelNId = INT_MAX }; // id of a deleted node
00128 protected:
00129   TCRef CRef;
00130   TInt MxNId;
00131   TB32Set Flags;
00132   TVPool Pool;
00133   TNodeH NodeH;
00134 public:
00135   TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources=false);
00136   TBigNet(TSIn& SIn) : MxNId(SIn), Flags(SIn), Pool(SIn), NodeH(SIn) { TBool Dir(SIn); IAssert(Dir()==IsDir); }
00137   virtual ~TBigNet() { }
00138   virtual void Save(TSOut& SOut) const;
00139   static PBigNet New(const int& Nodes, const TSize& Edges, const bool& Sources=false) {
00140     return PBigNet(new TBigNet(Nodes, Edges, Sources)); }
00141   static PBigNet Load(TSIn& SIn) { return PBigNet(new TBigNet(SIn)); }
00142   TBigNet& operator = (const TBigNet& Net) { if (this!=&Net) {
00143     MxNId=Net.MxNId; Flags=Net.Flags; Pool=Net.Pool; NodeH=Net.NodeH; }  return *this; }
00144 
00145   bool OnlySources() const { return Flags.In(gfSources); }
00146   bool HasFlag(const TGraphFlag& Flag) const {
00147     return HasGraphFlag(typename TBigNet, Flag) || (Flag==gfSources && OnlySources()); }
00148   void DumpFlags() const;
00149 
00150   // vertices
00151   int GetNodes() const { return NodeH.Len(); }
00152   int GetMxNId() const { return MxNId; }
00153   int AddNode(int NId, const int& InDeg, const int& OutDeg);
00154   int AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeDat& NodeDat);
00155   int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV);
00156   int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeDat& NodeDat);
00157   int AddUndirNode(int NId, const int& Deg);
00158   int AddUndirNode(int NId, const int& Deg, const TNodeDat& NodeDat);
00159   int AddUndirNode(int NId, const TIntV& EdgeNIdV);
00160   int AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeDat& NodeDat);
00161   void SetInNIdV(int NId, const TIntV& InNIdV);
00162   void SetOutNIdV(int NId, const TIntV& OutNIdV);
00163   void GetInNIdV(int NId, TIntV& OutNIdV) const;
00164   void GetOutNIdV(int NId, TIntV& OutNIdV) const;
00165   bool IsNode(const int& NId) const { return NodeH.IsKey(NId); }
00166   TNodeI BegNI() const { return TNodeI(NodeH.BegI(), (TVPool *)&Pool); }
00167   TNodeI EndNI() const { return TNodeI(NodeH.EndI(), (TVPool *)&Pool); }
00168   TNodeI GetNI(const int& NId) const { return TNodeI(NodeH.GetI(NId), (TVPool *)&Pool); }
00169   TNodeDat& GetNDat(const int& NId) { return NodeH.GetDat(NId).Dat; }
00170   const TNodeDat& GetNDat(const int& NId) const { return NodeH.GetDat(NId).Dat; }
00171   // edges
00172   TEdgeI BegEI() const { TNodeI NI=BegNI();  while(NI<EndNI() && NI.GetOutDeg()==0) NI++;  return TEdgeI(NI, EndNI()); }
00173   TEdgeI EndEI() const { return TEdgeI(EndNI(), EndNI()); }
00174   TEdgeI GetEI(const int& EId) const; // not supported
00175 
00176   // delete nodes
00177   int IsolateNode(int NId); // isolate the node by setting edge endpoints to point to node id DelNId, IsNode(DelNId)==false)
00178   int DelNode(int NId); // set neighbors to point to DelNId, delete node from the node table
00179   bool IsIsoNode(const int& NId) const;
00180   uint GetDelEdges(); // the number deleted edges
00181   void CompactEdgePool(); // after nodes are isolated and deleted, we compact the empty space
00182 
00183   // edges
00184   ::TSize GetEdges() const { return Pool.GetVals(); }
00185   int AddEdge(const int& SrcNId, const int& DstNId);
00186   bool IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir=true) const;
00187 
00188   void SortEdgeV();
00189   void InvertFromSources(uint ExpectNodes=0); // add missing nodes and in-links
00190   void Rewire(TRnd& Rnd = TInt::Rnd); // create a random network with same degree distribution (configuration model)
00191 
00192   // manipulation
00193   PNGraph GetNGraph(const bool& RenumberNodes=false) const;
00194   PNGraph GetSubNGraph(const TIntV& NIdV) const;
00195   PBigNet GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes=false) const;
00196   void GetSubGraph(const TIntV& NIdV, TBigNet* NewNet, const bool& RenumberNodes=false) const;
00197 
00198   int GetRndNId(TRnd& Rnd=TInt::Rnd) const { return NodeH.GetKey(NodeH.GetRndKeyId(Rnd)); }
00199   TNodeI GetRndNI(TRnd& Rnd=TInt::Rnd) const { return GetNI(GetRndNId(Rnd)); }
00200   void GetNIdV(TIntV& NIdV) const;
00201 
00202   bool Empty() const { return GetNodes()==0; }
00203   void Clr(const bool& DoDel = true) { MxNId = 0;  NodeH.Clr(DoDel); Pool.Clr(DoDel); }
00204   void Reserve(const int& Nodes, const TSize& Edges);
00205   void Defrag(const bool& OnlyNodeLinks = false) { }
00206   bool IsOk() const;
00207   void Dump(const TStr& Desc = TStr()) const;
00208   void SaveForDisk(const TStr& OutFNm) const;
00209 
00210   static void LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH);
00211   static void SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash);
00212   friend class TPt<TBigNet>;
00213 };
00214 
00215 // set flags
00216 namespace TSnap {
00217 template <class TNodeData, bool IsDir> struct IsDirected<TBigNet<TNodeData, IsDir> > { enum { Val = 0 }; };
00218 template <class TNodeData> struct IsDirected<TBigNet<TNodeData, true> > { enum { Val = 1 }; };
00219 template <class TNodeData, bool IsDir> struct IsNodeDat<TBigNet<TNodeData, IsDir> > { enum { Val = 1 }; };
00220 }
00221 
00222 template <class TNodeData, bool IsDir>
00223 inline void TBigNet<TNodeData, IsDir>::TNodeI::GetInOutNIdV() {
00224   if (NodeHI.IsEnd()) return;
00225   const TNode& N = NodeHI->Dat;
00226   if (! Pool->IsVId(N.InVId)) {
00227     InDeg=0; OutDeg=0; }
00228   else {
00229     InDeg=Pool->GetVLen(N.InVId);
00230     OutDeg=Pool->GetVLen(N.OutVId);
00231     InNIdV=(int *)Pool->GetValVPt(N.InVId);
00232     OutNIdV=(int *)Pool->GetValVPt(N.OutVId);
00233   }
00234 }
00235 
00236 template <class TNodeData, bool IsDir>
00237 void TBigNet<TNodeData, IsDir>::TNodeI::Dump() const {
00238   printf("NodeId: %d\n", GetId());
00239   printf("  I:%4d]", GetInDeg());
00240   for (int i = 0; i < GetInDeg(); i++) { printf("  %d", GetInNId(i)); }
00241   printf("\n  O:%4d]", GetOutDeg());
00242   for (int i = 0; i < GetOutDeg(); i++) { printf("  %d", GetOutNId(i)); }
00243   printf("\n");
00244 }
00245 
00246 template <class TNodeData, bool IsDir>
00247 void TBigNet<TNodeData, IsDir>::AddSorted(int* Beg, int* End, const int& Val) {
00248   // there is at least one free slot available for the Val
00249   IAssertR(*(End-1)==TInt::Mx, "Edge can not be added: no free space");
00250   // find position to insert
00251   int Half, Len = int(End-Beg);
00252   while (Len > 0) {
00253     Half = Len/2;
00254     int* Mid=Beg+Half;
00255     if (*Mid < Val) { Beg=Mid+1; Len=Len-Half-1; } else { Len=Half; } }
00256   IAssertR(*Beg != Val, "Value already existis");
00257   // insert
00258   memmove(Beg+1, Beg, sizeof(int)*(End-Beg-1));
00259   *Beg = Val;
00260 }
00261 
00262 template <class TNodeData, bool IsDir>
00263 const int* TBigNet<TNodeData, IsDir>::BinSearch(const int* Beg, const int* End, const int& Val) {
00264   End--;  const int *Mid;
00265   while (Beg <= End) { Mid = Beg+(End-Beg)/2;
00266     if (*Mid == Val) { return Mid; }
00267     else if (Val < *Mid) { End=Mid-1; } else { Beg=Mid+1; }
00268   }
00269   return NULL;
00270 }
00271 
00272 template <class TNodeData, bool IsDir>
00273 const int* TBigNet<TNodeData, IsDir>::BinSearch2(const int* Beg, const int* End, const int& Val) {
00274   const int* First = Beg;
00275   const int* Last = End;
00276   const int* Mid;
00277   TSize len = End-Beg, half;
00278   while (len > 0) {
00279     half = len>>1;  Mid=First+half;
00280     if (*Mid < Val) { First = Mid; First++; len=len-half-1; }
00281     else { len=half; }
00282   }
00283   return First==Last ? Last-1 : First;
00284 }
00285 
00286 template <class TNodeData, bool IsDir>
00287 TBigNet<TNodeData, IsDir>::TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources) :
00288 CRef(), MxNId(0), Flags(), Pool(IsDir ? 2*Edges:Edges, 10000000, true, TInt::Mx), NodeH(Nodes) {
00289   //Flags.Incl(gfNodeGraph);
00290   //Flags.Incl(gfNetwork);
00291   //if (IsDir) { Flags.Incl(gfDirected); }
00292   if (Sources) {
00293     Flags.Incl(gfSources);
00294     IAssertR(! IsDir, "Jure: not clear what happens is graph is Undirected and has only sources.");
00295   }
00296   //DumpFlags();
00297 }
00298 
00299 template <class TNodeData, bool IsDir>
00300 void TBigNet<TNodeData, IsDir>::Save(TSOut& SOut) const {
00301   MxNId.Save(SOut);
00302   Flags.Save(SOut);
00303   Pool.Save(SOut);
00304   NodeH.Save(SOut);
00305   TBool(IsDir).Save(SOut);
00306 }
00307 
00308 template <class TNodeData, bool IsDir>
00309 void TBigNet<TNodeData, IsDir>::DumpFlags() const {
00310   for (int i = 1; i <int(gfMx); i++) {
00311     if (HasFlag(TGraphFlag(i))) { printf("  +"); }
00312     else { printf("  -"); }
00313     printf("%s", TSnap::GetFlagStr(TGraphFlag(i)).CStr());
00314   }
00315   printf("\n");
00316 }
00317 
00318 template <class TNodeData, bool IsDir>
00319 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg) {
00320   CAssert(IsDir);
00321   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00322   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00323   TNode& Node = NodeH.AddDat(NId);
00324   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00325   Node.InVId = Pool.AddEmptyV(InDeg);
00326   Node.OutVId = Pool.AddEmptyV(OutDeg);
00327   return NId;
00328 }
00329 
00330 template <class TNodeData, bool IsDir>
00331 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeData& NodeDat) {
00332   CAssert(IsDir);
00333   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00334   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00335   TNode& Node = NodeH.AddDat(NId);
00336   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00337   Node.InVId = Pool.AddEmptyV(InDeg);
00338   Node.OutVId = Pool.AddEmptyV(OutDeg);
00339   Node.Dat = NodeDat;
00340   return NId;
00341 }
00342 
00343 template <class TNodeData, bool IsDir>
00344 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg) {
00345   CAssert(! IsDir);
00346   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00347   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00348   TNode& Node = NodeH.AddDat(NId);
00349   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00350   Node.InVId = Pool.AddEmptyV(Deg);
00351   Node.OutVId = Node.InVId; // same vector
00352   return NId;
00353 }
00354 
00355 template <class TNodeData, bool IsDir>
00356 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg, const TNodeData& NodeDat) {
00357   CAssert(! IsDir);
00358   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00359   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00360   TNode& Node = NodeH.AddDat(NId);
00361   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00362   Node.InVId = Pool.AddEmptyV(Deg);
00363   Node.OutVId = Node.InVId; // same vector
00364   Node.Dat = NodeDat;
00365   return NId;
00366 }
00367 
00368 template <class TNodeData, bool IsDir>
00369 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV) {
00370   CAssert(IsDir);
00371   IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted());
00372   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00373   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00374   TNode& Node = NodeH.AddDat(NId);
00375   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00376   Node.InVId = Pool.AddV(InNIdV);
00377   Node.OutVId = Pool.AddV(OutNIdV);
00378   return NId;
00379 }
00380 
00381 template <class TNodeData, bool IsDir>
00382 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeData& NodeDat) {
00383   CAssert(IsDir);
00384   IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted());
00385   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00386   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00387   TNode& Node = NodeH.AddDat(NId);
00388   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00389   Node.InVId = Pool.AddV(InNIdV);
00390   Node.OutVId = Pool.AddV(OutNIdV);
00391   Node.Dat = NodeDat;
00392   return NId;
00393 }
00394 
00395 template <class TNodeData, bool IsDir>
00396 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV) {
00397   CAssert(! IsDir);
00398   IAssert(EdgeNIdV.IsSorted());
00399   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00400   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00401   TNode& Node = NodeH.AddDat(NId);
00402   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00403   Node.InVId = Pool.AddV(EdgeNIdV);
00404   Node.OutVId = Node.InVId;
00405   return NId;
00406 }
00407 
00408 template <class TNodeData, bool IsDir>
00409 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeData& NodeDat) {
00410   CAssert(! IsDir);
00411   IAssert(EdgeNIdV.IsSorted());
00412   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00413   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00414   TNode& Node = NodeH.AddDat(NId);
00415   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00416   Node.InVId = Pool.AddV(EdgeNIdV);
00417   Node.OutVId = Node.InVId;
00418   Node.Dat = NodeDat;
00419   return NId;
00420 }
00421 
00422 template <class TNodeData, bool IsDir>
00423 void TBigNet<TNodeData, IsDir>::SetInNIdV(int NId, const TIntV& InNIdV) {
00424   TNode Node;  EAssert(IsNode(NId, Node));
00425   TIntV InNodesV;  Pool.GetV(Node.InVId, InNodesV);
00426   EAssert(InNIdV.Len() == InNodesV.Len());
00427   memcpy(InNodesV.BegI(), InNIdV.BegI(), sizeof(TInt)*InNIdV.Len());
00428 }
00429 
00430 template <class TNodeData, bool IsDir>
00431 void TBigNet<TNodeData, IsDir>::SetOutNIdV(int NId, const TIntV& OutNIdV) {
00432   TNode Node;  EAssert(IsNode(NId, Node));
00433   TIntV OutNodesV;  Pool.GetV(Node.OutVId, OutNodesV);
00434   EAssert(OutNIdV.Len() == OutNodesV.Len());
00435   memcpy(OutNodesV.BegI(), OutNIdV.BegI(), sizeof(TInt)*OutNIdV.Len());
00436 }
00437 
00438 template <class TNodeData, bool IsDir>
00439 void TBigNet<TNodeData, IsDir>::GetInNIdV(int NId, TIntV& InNIdV) const {
00440   TNode Node;  EAssertR(IsNode(NId, Node), TStr::Fmt("Not node: NId=%d\n", NId).CStr());
00441   Pool.GetV(Node.InVId, InNIdV);
00442 }
00443 
00444 template <class TNodeData, bool IsDir>
00445 void TBigNet<TNodeData, IsDir>::GetOutNIdV(int NId, TIntV& OutNIdV) const {
00446   TNode Node;  EAssert(IsNode(NId, Node));
00447   Pool.GetV(Node.OutVId, OutNIdV);
00448 }
00449 
00450 // Node is deleted by setting edge endpoints to point to node id -1 (DelNId)
00451 // No memory is freed
00452 template <class TNodeData, bool IsDir>
00453 int  TBigNet<TNodeData, IsDir>::IsolateNode(int NId) {
00454   TIntV OutV;
00455   int NDel = 0;
00456   // out-edges
00457   GetOutNIdV(NId, OutV);
00458   for (int i = 0; i < OutV.Len(); i++) {
00459     if (OutV[i] == DelNId) { break; } // because they are sorted
00460     // fast implementation
00461     const TNode& N = NodeH.GetDat(OutV[i]);
00462     int *InNIdV = (int *) Pool.GetValVPt(N.InVId);
00463     const int Deg = Pool.GetVLen(N.InVId);
00464     int* Val = (int *) BinSearch(InNIdV, InNIdV+Deg, NId);
00465     if (Val == NULL) {
00466       printf("BAD: Can't find: OUT: NId: %d -- OutNbrId: %d\n", NId, OutV[i]());
00467       continue;
00468     }
00469     IAssert(Val != 0);
00470     memcpy(Val, Val+1, sizeof(int)*int(InNIdV+Deg-Val));
00471     *(InNIdV+Deg-1) = DelNId;  NDel++;
00472   }
00473   OutV.PutAll(DelNId);
00474   // in-edges
00475   if (IsDir) {
00476     TIntV InV;
00477     GetInNIdV(NId, InV);
00478     for (int i = 0; i < InV.Len(); i++) {
00479       if (InV[i] == DelNId) { break; }
00480       // fast implementation
00481       const TNode& N = NodeH.GetDat(InV[i]);
00482       int *OutNIdV = (int *) Pool.GetValVPt(N.OutVId);
00483       const int Deg = Pool.GetVLen(N.OutVId);
00484       int* Val = (int *) BinSearch(OutNIdV, OutNIdV+Deg, NId);
00485       if (Val == NULL) {
00486         printf("IN: NId: %d -- InNbrId: %d\n", NId, OutV[i]());
00487         continue;
00488       }
00489       IAssert(Val != 0);
00490       memcpy(Val, Val+1, sizeof(int)*int(OutNIdV+Deg-Val));
00491       *(OutNIdV+Deg-1) = DelNId;  NDel++;
00492     }
00493     InV.PutAll(DelNId);
00494   }
00495   return NDel;
00496 }
00497 
00498 // set neigbors to point to DelNId, delete node from the node table
00499 template <class TNodeData, bool IsDir>
00500 int  TBigNet<TNodeData, IsDir>::DelNode(int NId) {
00501   const int DelEdges = IsolateNode(NId);
00502   NodeH.DelKey(NId);
00503   return DelEdges;
00504 }
00505 
00506 template <class TNodeData, bool IsDir>
00507 bool TBigNet<TNodeData, IsDir>::IsIsoNode(const int& NId) const {
00508   if (NId == DelNId) { return true; }
00509   TIntV OutV;
00510   GetOutNIdV(NId, OutV);
00511   if (OutV.Empty() || OutV[0] == DelNId) { return true; }
00512   return false;
00513 }
00514 
00515 // the number deleted edges
00516 template <class TNodeData, bool IsDir>
00517 uint TBigNet<TNodeData, IsDir>::GetDelEdges() {
00518   uint DelEdges = 0;
00519   TIntV OutV;
00520   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00521     const int NId = NI.GetId();
00522     GetOutNIdV(NId, OutV);
00523     for (int i = 0; i < OutV.Len(); i++) {
00524       if (OutV[i] == DelNId) { DelEdges++; }
00525     }
00526   }
00527   return DelEdges;
00528 }
00529 
00530 template <class TNodeData, bool IsDir>
00531 void TBigNet<TNodeData, IsDir>::CompactEdgePool() {
00532   Pool.CompactPool(DelNId);
00533 }
00534 
00535 template <class TNodeData, bool IsDir>
00536 int TBigNet<TNodeData, IsDir>::AddEdge(const int& SrcNId, const int& DstNId) {
00537   TNode Src;  IAssert(IsNode(SrcNId, Src));
00538   int* OutV = (int *)Pool.GetValVPt(Src.OutVId);
00539   const int OutVLen = Pool.GetVLen(Src.OutVId);
00540   Assert(BinSearch(OutV, OutV+OutVLen, DstNId)==NULL); // no edge yet
00541   AddSorted(OutV, OutV+OutVLen, DstNId);
00542   if (! OnlySources()) {
00543     TNode Dst;  IAssert(IsNode(DstNId, Dst));
00544     int* InV = (int *)Pool.GetValVPt(Dst.InVId);
00545     const int InVLen = Pool.GetVLen(Dst.InVId);
00546     AddSorted(InV, InV+InVLen, SrcNId);
00547   }
00548   return -1; // edge id
00549 }
00550 
00551 template <class TNodeData, bool IsDir>
00552 bool TBigNet<TNodeData, IsDir>::IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir) const {
00553   TNode Src, Dst;
00554   if (! IsNode(SrcNId, Src)) { return false; } // no source node
00555   int* OutV1 = (int *)Pool.GetValVPt(Src.OutVId);
00556   const bool IsEdge = BinSearch(OutV1, OutV1+Pool.GetVLen(Src.OutVId), DstNId) != NULL;
00557   if (IsEdge && OnlySources()) { return true; }
00558   const bool IsDstNode = IsNode(DstNId, Dst);
00559   if (! IsDstNode) { return false; } // no destination node
00560   else if (IsEdge) { return true; } // destination and link found
00561   else if (! Dir) { // check for the undirected version of the edge
00562     int* OutV2 = (int *)Pool.GetValVPt(Dst.OutVId);
00563     return BinSearch(OutV2, OutV2+Pool.GetVLen(Dst.OutVId), SrcNId) != NULL; }
00564   else { return false; }
00565 }
00566 
00567 template <class TNodeData, bool IsDir>
00568 void TBigNet<TNodeData, IsDir>::GetNIdV(TIntV& NIdV) const {
00569   NIdV.Reserve(GetNodes(), 0);
00570   for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) {
00571     NIdV.Add(I->Key); }
00572 }
00573 
00574 template <class TNodeData, bool IsDir>
00575 void TBigNet<TNodeData, IsDir>::SortEdgeV() {
00576   printf("Sorting Edges... ");
00577   TExeTm ExeTm;
00578   TIntV OutV, InV;
00579   int cnt = 0;
00580   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00581     const int NId = NI.GetId();
00582     GetOutNIdV(NId, OutV);
00583     OutV.Sort();
00584     if (IsDir) {
00585       GetInNIdV(NId, InV);
00586       InV.Sort();
00587     }
00588     if (++cnt % Mega(1) == 0) { printf("\r  sort:%dm  %s", cnt/Mega(1), ExeTm.GetStr()); }
00589   }
00590   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00591     const int NId = NI.GetId();
00592     GetOutNIdV(NId, OutV);
00593     IAssert(OutV.IsSorted());
00594     GetInNIdV(NId, InV);
00595     IAssert(InV.IsSorted());
00596     if (++cnt % Mega(1) == 0) { printf("\r  check sorted:%dm  %s", cnt/Mega(1), ExeTm.GetStr()); }
00597   }
00598   printf("done. [%s]\n", ExeTm.GetStr());
00599 }
00600 
00601 // add missing nodes and in-links from a network of sources
00602 template <class TNodeData, bool IsDir>
00603 void TBigNet<TNodeData, IsDir>::InvertFromSources(uint ExpectNodes) {
00604   typedef THash<TInt, TInt> TDegHash;
00605   typedef int* TIntPt;
00606   if (ExpectNodes == 0) ExpectNodes=4*GetNodes();
00607   printf("\nInverting BigNet: reserved for %s nodes\n", TInt::GetMegaStr(ExpectNodes).CStr());
00608   CAssert(IsDir);
00609   IAssert(OnlySources());
00610   TDegHash InDegH(ExpectNodes);
00611   TSize NDest=0;
00612   // get node in-degree
00613   uint c=0;  TExeTm ExeTm;
00614   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00615     for (int e = 0; e < NI.GetOutDeg(); e++) {
00616       InDegH.AddDat(NI.GetOutNId(e)) += 1; }
00617     if (c%100000==0) printf("\r%s h:%d [%g]    ", TInt::GetMegaStr(c).CStr(), InDegH.Len(), ExeTm.GetSecs());
00618   }
00619   printf("\n Resizing NodePool: %lld -> %lld\n", uint64(Pool.Reserved()), uint64(GetEdges()));
00620   if (2*GetEdges() > Pool.Reserved()) {
00621     Pool.Reserve(2*GetEdges()); }
00622   // add nodes
00623   printf("NodeH: %s nodes, InDeg: %s nodes, Reserve: %s\n", TInt::GetMegaStr(NodeH.Len()).CStr(),
00624     TInt::GetMegaStr(InDegH.Len()).CStr(), TInt::GetMegaStr(ExpectNodes).CStr());
00625   NodeH.Reserve(ExpectNodes);
00626   for (TDegHash::TIter I = InDegH.BegI(); I < InDegH.EndI(); I++) {
00627     const int NId = I->Key, InDeg = I->Dat;
00628     if (! IsNode(NId)) {
00629       AddNode(NId, InDeg, 0); NDest++; } // new destination node, no out-links
00630     else {
00631       TNode& Node = GetNode(NId);
00632       IAssert(Node.InVId == 0); // no in-links
00633       Node.InVId = Pool.AddEmptyV(InDeg); }
00634   }
00635   InDegH.Clr(true);
00636   printf("Added: %lld destination nodes\n", uint64(NDest));
00637   printf("Graph nodes: %lld nodes\n", uint64(GetNodes()));
00638   // pointer to in-links vector
00639   THash<TInt, int*> NIdToPtH(GetNodes());
00640   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++)
00641     NIdToPtH.AddDat(NI.GetId(), (int *)Pool.GetValVPt(NI.GetInVId()));
00642   // add in-edges
00643   printf("Adding edges...\n");
00644   c = 0;
00645   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00646     const int SrcNId = NI.GetId();
00647     for (int e = 0; e < NI.GetOutDeg(); e++) {
00648       TIntPt& InV = NIdToPtH.GetDat(NI.GetOutNId(e));
00649       IAssert(*InV == TInt::Mx);
00650       *InV = SrcNId;  InV++;
00651     }
00652     if (c%100000==0) printf("\r%s [%g]   ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs());
00653   }
00654   // sort in-links
00655   printf("\nSorting in-links...\n");
00656   TIntV InNIdV;  c = 0;
00657   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00658     Pool.GetV(NI.GetInVId(), InNIdV);
00659     InNIdV.Sort();
00660     if (c%100000==0) printf("\r%s [%g]    ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs());
00661   }
00662   printf("\r...done [%g]\n", ExeTm.GetSecs());
00663   Flags.Excl(gfSources);
00664 }
00665 
00666 template <class TNodeData, bool IsDir>
00667 void TBigNet<TNodeData, IsDir>::Rewire(TRnd& Rnd) {
00668   uint64 NDup=0, NResolve=0, NAddit=0, cnt = 0;
00669   TExeTm ExeTm;
00670   IAssertR(! IsDir, "Only undirected networks are supported");
00671   printf("Rewiring the network... 1");
00672   Pool.ShuffleAll(Rnd);  printf("[%s]\n", ExeTm.GetStr());
00673   //Pool.ShuffleAll(Rnd);  printf(" done [%s]\n", ExeTm.GetStr());
00674   printf("  sorting edges...\n");
00675   SortEdgeV(); // sort individual edge vectors
00676   printf(" done [%s]\n", ExeTm.GetStr());
00677   // remove duplicates and self edges
00678   printf("  removing duplicates...\n");
00679   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) {
00680     const int VId = NI.GetOutVId();
00681     int Len = Pool.GetVLen(VId);
00682     int* V = (int *)Pool.GetValVPt(VId);
00683     for (int i = 0; i < Len-1 && V[i]!=DelNId; i++) {
00684       if (V[i] == V[i+1] || V[i]==NI.GetId()) {
00685         memcpy(V+i, V+i+1, sizeof(int)*(Len-i-1)); i--;
00686         V[Len-1] = DelNId;  NDup++;
00687       }
00688     }
00689     if (Len > 0 && V[Len-1]==NI.GetId()) { V[Len-1] = DelNId;  NDup++; }
00690     if (cnt % Mega(10) == 0) { printf(".");  fflush(stdout); }
00691   }
00692   printf("\n    %llu duplicate edges removed [%s]\n", NDup, ExeTm.GetStr());
00693   if (OnlySources()) { return; }
00694   // resolve one way edges
00695   printf("  resolving one way edges...\n"); cnt=0; fflush(stdout);
00696   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) {
00697     for (int e = 0; e < NI.GetOutDeg(); e++) {
00698       if (NI.GetOutNId(e) == DelNId) { continue; } // skip deleted nodes
00699       const int InVId = GetNode(NI.GetOutNId(e)).InVId;
00700       const int InVLen = Pool.GetVLen(InVId); IAssert(InVLen>=0 && InVLen < Mega(3));
00701       int* InV = (int *) Pool.GetValVPt(InVId);
00702       int* Val = (int *) BinSearch2(InV, InV+InVLen, NI.GetId());
00703       if (*Val == NI.GetId()) { continue; } // points back, everything is ok
00704       if (InVLen>0 && InV[InVLen-1] == DelNId) { // there is space at the end, insert
00705         IAssert((InVLen-(Val-InV)-1) >= 0);
00706         memmove(Val+1, Val, sizeof(int)*(InVLen-(Val-InV)-1));
00707         *Val = NI.GetId();
00708       } else { // the other end could point at us but it doesn't
00709         memmove(NI.OutNIdV+e, NI.OutNIdV+e+1, sizeof(int)*(NI.GetOutDeg()-e-1));
00710         NI.OutNIdV[NI.GetOutDeg()-1]=DelNId;  e--;
00711       }
00712       NResolve++;
00713     }
00714     if (cnt % Mega(10) == 0) { printf(".");  fflush(stdout); }
00715   }
00716   printf("\n    %llu resolved edges [%s]\n", NResolve, ExeTm.GetStr());
00717   // check if there are two nodes that still have space and are not yet connected and connect them
00718   printf("  filling empty edge slots...\n");
00719   TIntPrV SlotNIdV;
00720   THash<TInt, TInt> SlotNIdH;
00721   int CumSlots=0;
00722   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00723     if (NI.GetOutNId(NI.GetOutDeg()-1) == DelNId) {
00724       int NSlots = 0;
00725       for (int s = NI.GetOutDeg()-1; s >= 0; s--) {
00726         if (NI.GetOutNId(s) == DelNId) { NSlots++; }
00727         else { break; }
00728       }
00729       SlotNIdV.Add(TIntPr(NI.GetId(), NSlots));
00730       SlotNIdH.AddDat(NI.GetId(), NSlots);
00731       CumSlots+=NSlots;
00732     }
00733   }
00734   printf("    %d nodes, with %d spokes available, %d edges\n", SlotNIdH.Len(), CumSlots, CumSlots/2);
00735   TIntV NIdV;  SlotNIdH.GetKeyV(NIdV);
00736   for (int ni1 = 0; ni1 < NIdV.Len(); ni1++) {
00737     const int nid = NIdV[ni1];
00738     if (! SlotNIdH.IsKey(nid) || SlotNIdH.GetDat(nid) == 0) { continue; }
00739     const int NI1Slots = SlotNIdH.GetDat(nid);
00740     if ((SlotNIdH.GetMxKeyIds() - SlotNIdH.Len())/double(SlotNIdH.GetMxKeyIds()) > 0.5) { SlotNIdH.Defrag(); }
00741     TNodeI NI  = GetNI(nid);
00742     for (int NTries = 0; NTries < 4*NI1Slots && NI.GetOutNId(NI.GetOutDeg()-1) == DelNId; NTries++) {
00743       const int nid2 = SlotNIdH.GetKey(SlotNIdH.GetRndKeyId(Rnd));
00744       if (nid == nid2) { continue; }
00745       TNodeI NI2  = GetNI(nid2);
00746       // insert the edge
00747       if (NI2.GetOutNId(NI2.GetOutDeg()-1)==DelNId && ! NI2.IsInNId(NI.GetId())) {
00748         int *V1 = (int *) BinSearch2(NI.OutNIdV, NI.OutNIdV+NI.GetOutDeg(), NI2.GetId());
00749         int *V2 = (int *) BinSearch2(NI2.InNIdV, NI2.InNIdV+NI2.GetInDeg(), NI.GetId());
00750         if (*V1 != DelNId) { memmove(V1+1, V1, sizeof(int)*(NI.GetOutDeg()-(V1-NI.OutNIdV)-1)); }
00751         if (*V2 != DelNId) { memmove(V2+1, V2, sizeof(int)*(NI2.GetInDeg()-(V2-NI2.InNIdV)-1)); }
00752         *V1 = NI2.GetId();  *V2 = NI.GetId();
00753         NAddit++;
00754         SlotNIdH.GetDat(nid)--;  SlotNIdH.GetDat(nid2)--;
00755       }
00756       if (SlotNIdH.GetDat(nid2) == 0) { SlotNIdH.DelKey(nid2); continue; }
00757       if (SlotNIdH.GetDat(nid) == 0) { SlotNIdH.DelKey(nid); break; }
00758     }
00759     if (ni1 % Mega(1) == 0) { printf(".");  fflush(stdout); }
00760   }
00761   printf("    %llu edges added.\nDONE. TOTAL REWIRE TIME [%s]\n\n", NAddit, ExeTm.GetStr());
00762 }
00763 
00764 template <class TNodeData, bool IsDir>
00765 PNGraph TBigNet<TNodeData, IsDir>::GetNGraph(const bool& RenumberNodes) const {
00766   IAssert(RenumberNodes == false);
00767   PNGraph Graph = TNGraph::New();
00768   Graph->Reserve(GetNodes(), 0);
00769   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00770     Graph->AddNode(NI.GetId(), Pool, NI.GetInVId(), NI.GetOutVId());
00771   }
00772   return Graph;
00773 }
00774 
00775 template <class TNodeData, bool IsDir>
00776 PNGraph TBigNet<TNodeData, IsDir>::GetSubNGraph(const TIntV& NIdV) const {
00777   PNGraph Graph = TNGraph::New(NIdV.Len(), 0);
00778   // add nodes
00779   for (int i = 0; i < NIdV.Len(); i++) {
00780     Graph->AddNode(NIdV[i]); }
00781   // reserve node in- and out-degree
00782   for (int i = 0; i < NIdV.Len(); i++) {
00783     const int SrcNId = NIdV[i];
00784     const TNodeI NI = GetNI(SrcNId);
00785     int InDeg=0, OutDeg=0;
00786     for (int e = 0; e < NI.GetInDeg(); e++) { if (Graph->IsNode(NI.GetInNId(e))) InDeg++; }
00787     for (int e = 0; e < NI.GetOutDeg(); e++) { if (Graph->IsNode(NI.GetOutNId(e))) OutDeg++; }
00788     Graph->ReserveNIdInDeg(SrcNId, InDeg);
00789     Graph->ReserveNIdOutDeg(SrcNId, OutDeg);
00790   }
00791   // add edges
00792   for (int i = 0; i < NIdV.Len(); i++) {
00793     const int SrcNId = NIdV[i];
00794     const TNodeI NI = GetNI(SrcNId);
00795     for (int e = 0; e < NI.GetOutDeg(); e++) {
00796       const int DstNId = NI.GetOutNId(e);
00797       if (Graph->IsNode(DstNId)) {
00798         Graph->AddEdge(SrcNId, DstNId); }
00799     }
00800   }
00801   return Graph;
00802 }
00803 
00804 template <class TNodeData, bool IsDir>
00805 TPt<TBigNet<TNodeData, IsDir> > TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes) const {
00806   const bool isDir = IsDir, onlySources = HasFlag(gfSources);
00807   TSize Edges=0;
00808   // find in- and out-degree
00809   TSparseHash<TInt, TIntPr> NIdDegH(NIdV.Len());
00810   for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); }
00811   for (int i = 0; i < NIdV.Len(); i++) {
00812     const TNodeI NI = GetNI(NIdV[i]);
00813     int InDeg=0, OutDeg=0;
00814     for (int n = 0; n < NI.GetOutDeg(); n++) {
00815       if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; }
00816     if (! onlySources && isDir) {
00817       for (int n = 0; n < NI.GetInDeg(); n++) {
00818         if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; }
00819     }
00820     Edges += OutDeg;
00821     NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg);
00822   }
00823   // make network
00824   typedef TBigNet<TNodeData, IsDir> TBNet;
00825   TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected));
00826   TBNet& NewNet = *NewNetPt;
00827   NewNet.Flags = Flags;
00828   // add nodes
00829   if (isDir || onlySources) {
00830     for (int i = 0; i < NIdV.Len(); i++) {
00831       const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00832       NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00833   } else {
00834     for (int i = 0; i < NIdV.Len(); i++) {
00835       const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00836       NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00837   }
00838   // add edges
00839   for (int i = 0; i < NIdV.Len(); i++) {
00840     int NId = NIdV[i];
00841     const TNodeI NI = GetNI(NId);
00842     int *NIdVPt = (int *) NewNet.GetOutNIdVPt(NId);
00843     int deg = 0;
00844     for (int e = 0; e < NI.GetOutDeg(); e++) {
00845       const int Dst = NI.GetOutNId(e);
00846       if (NewNet.IsNode(Dst)) {
00847         *NIdVPt = Dst;  NIdVPt++;  deg++; }
00848     }
00849     EAssert(deg == NIdDegH.GetDat(NId).Val2);
00850     if (isDir && ! onlySources) {
00851       EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0)
00852         || (NI.GetInVId() != NI.GetOutVId()));
00853       int * NIdVPt = (int *) NewNet.GetInNIdVPt(NId);
00854       int deg = 0;
00855       for (int e = 0; e < NI.GetInDeg(); e++) {
00856         const int Dst = NI.GetInNId(e);
00857         if (NewNet.IsNode(Dst)) {
00858           *NIdVPt = Dst;  NIdVPt++;  deg++; }
00859       }
00860       EAssert(deg == NIdDegH.GetDat(NId).Val1);
00861     }
00862   }
00863   return NewNetPt;
00864 }
00865 
00866 template <class TNodeData, bool IsDir>
00867 void TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, TBigNet* NewNetPt, const bool& RenumberNodes) const {
00868   printf("Set subgraph on %d nodes\n", NIdV.Len()); TExeTm ExeTm;
00869   const bool isDir = IsDir, onlySources = HasFlag(gfSources);
00870   TSize Edges=0;
00871   // find in- and out-degree
00872   THash<TInt, TIntPr> NIdDegH(NIdV.Len());
00873   for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); }
00874   for (int i = 0; i < NIdV.Len(); i++) {
00875     const TNodeI NI = GetNI(NIdV[i]);
00876     int InDeg=0, OutDeg=0;
00877     for (int n = 0; n < NI.GetOutDeg(); n++) {
00878       if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; }
00879     if (! onlySources && isDir) {
00880       for (int n = 0; n < NI.GetInDeg(); n++) {
00881         if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; }
00882     }
00883     Edges += OutDeg;
00884     NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg);
00885   }
00886   // make network
00887   //typedef TBigNet<TNodeData, IsDir> TBNet;
00888   //TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected));
00889   NewNetPt->Reserve(NIdV.Len(), Edges);
00890   TBigNet<TNodeData, IsDir>& NewNet = *NewNetPt;
00891   NewNet.Flags = Flags;
00892   TIntSet NIdMap;
00893   // add nodes
00894   if (! RenumberNodes) {
00895     if (isDir || onlySources) {
00896       for (int i = 0; i < NIdV.Len(); i++) {
00897         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00898         NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00899     } else {
00900       for (int i = 0; i < NIdV.Len(); i++) {
00901         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00902         NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00903     }
00904   } else { // renumber nodes
00905     NIdMap.Gen(NIdV.Len());
00906     for (int i = 0; i < NIdV.Len(); i++) { NIdMap.AddKey(NIdV[i]);  }
00907     if (isDir || onlySources) {
00908       for (int i = 0; i < NIdV.Len(); i++) {
00909         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00910         NewNet.AddNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00911     } else {
00912       for (int i = 0; i < NIdV.Len(); i++) {
00913         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00914         NewNet.AddUndirNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00915     }
00916   }
00917   // add edges
00918   for (int i = 0; i < NIdV.Len(); i++) {
00919     int NId = NIdV[i];
00920     const TNodeI NI = GetNI(NId);
00921     int *NIdVPt = (int *) NewNet.GetOutNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId);
00922     int deg = 0;
00923     for (int e = 0; e < NI.GetOutDeg(); e++) {
00924       const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetOutNId(e)):NI.GetOutNId(e);
00925       if (NewNet.IsNode(Dst)) {
00926         *NIdVPt = Dst;  NIdVPt++;  deg++; }
00927     }
00928     EAssert(deg == NIdDegH.GetDat(NId).Val2);
00929     if (isDir && ! onlySources) {
00930       EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0)
00931         || (NI.GetInVId() != NI.GetOutVId()));
00932       int * NIdVPt = (int *) NewNet.GetInNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId);
00933       int deg = 0;
00934       for (int e = 0; e < NI.GetInDeg(); e++) {
00935         const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetInNId(e)):NI.GetInNId(e);
00936         if (NewNet.IsNode(Dst)) {
00937           *NIdVPt = Dst;  NIdVPt++;  deg++; }
00938       }
00939       EAssert(deg == NIdDegH.GetDat(NId).Val1);
00940     }
00941   }
00942   printf("  %u edges [%s]\n", uint(Edges), ExeTm.GetStr());
00943 }
00944 
00945 template <class TNodeData, bool IsDir>
00946 void TBigNet<TNodeData, IsDir>::Reserve(const int& Nodes, const TSize& Edges) {
00947   NodeH.Gen(TMath::Mx(int(1.1*Nodes), 100));
00948   Pool = TVPool(TMath::Mx(Edges, (TSize) Mega(1)), Mega(100), true);
00949 }
00950 
00951 // check that in- and out-edges matchs, the table is sorted and so on
00952 template <class TNodeData, bool IsDir>
00953 bool TBigNet<TNodeData, IsDir>::IsOk() const {
00954   // check the node pool
00955   TIntV ValV; TExeTm ExeTm;
00956   printf("Is ok network:\n  Check Vec...");
00957   for (uint id = 1; id < Pool.GetVecs(); id++) {
00958     Pool.GetV(id, ValV);
00959     if (! ValV.Empty()) {
00960       EAssert((0<=ValV[0] && ValV[0]<MxNId) || ValV[0]==DelNId);
00961       try {
00962       for (int i = 1; i < ValV.Len(); i++) {
00963         //if (ValV[i]==DelNId) { continue; }
00964         // sorted & no duplicates & no empty values (can have deleted nodes)
00965         EAssertR((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId),
00966           TStr::Fmt("NId1: %d NId2:%d", ValV[i-1](),ValV[i]()));
00967         EAssertR((0<=ValV[i] && ValV[i]<MxNId) || ValV[i]==DelNId,
00968           TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId));
00969         if (! OnlySources()) {
00970           EAssertR(IsNode(ValV[i]) || ValV[i]==DelNId,
00971             TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); } // every link is a node
00972       }
00973       } catch (PExcept Except){
00974         printf("  %s\n", Except->GetStr().CStr());
00975         printf("  vec id: %d, lenght: %d\n", id, ValV.Len());
00976         for (int i = 1; i < ValV.Len(); i++) {
00977           printf("  %d", ValV[i]());
00978           if (!((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId))) { printf("*"); }
00979         }
00980         printf("\n");
00981         return false;
00982       }
00983     }
00984     if (id % 10000 == 0) {
00985       printf("\r  %dk / %dk [%s]", id/1000, Pool.GetVecs()/1000, ExeTm.GetStr()); }
00986   }
00987   printf("[%s]\n  Check Edges...\n", ExeTm.GetStr());
00988   // check edges
00989   int ErrCnt = 0;
00990   if (! OnlySources()) {
00991     int Cnt=0;
00992     for (TNodeI NI = BegNI(); NI < EndNI(); NI++, Cnt++) {
00993       // nodes that point to NI, have it on out-list
00994       for (int e = 0; e < NI.GetInDeg(); e++) {
00995         if (NI.GetInNId(e) == DelNId) { continue; } // skip deleted nodes
00996     if (! IsEdge(NI.GetInNId(e), NI.GetId())) {
00997       printf("\nno edge: %d --> %d", NI.GetInNId(e), NI.GetId());
00998       printf("NId: %d   deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg());
00999       for (int i = 0; i < NI.GetInDeg(); i++) { printf("  %d", NI.GetInNId(i)); }
01000       TNodeI NI2 = GetNI(NI.GetInNId(e));
01001       printf("\nNId2: %d   deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg());
01002       for (int j = 0; j < NI2.GetOutDeg(); j++) { printf("  %d", NI2.GetOutNId(j)); }
01003       printf("\n");
01004       ErrCnt++;
01005     }
01006         //EAssertR(IsEdge(NI.GetInNId(e), NI.GetId()),
01007         //  TStr::Fmt("no edge: %d --> %d", NI.GetInNId(e), NI.GetId()));
01008     }
01009       // nodes NI points to, have it on in-list
01010       for (int e = 0; e < NI.GetOutDeg(); e++) {
01011         if (NI.GetOutNId(e) == DelNId) { continue; }
01012         const int InVId = GetNode(NI.GetOutNId(e)).InVId;
01013         int* DstInV = (int *)Pool.GetValVPt(InVId);
01014     if (BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) == NULL) {
01015       printf("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e));
01016       printf("NId: %d   deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg());
01017       for (int i = 0; i < NI.GetOutDeg(); i++) { printf("  %d", NI.GetOutNId(i)); }
01018       TNodeI NI2 = GetNI(NI.GetOutNId(e));
01019       printf("\nNId2: %d   deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg());
01020       for (int j = 0; j < NI2.GetInDeg(); j++) { printf("  %d", NI2.GetInNId(j)); }
01021       printf("\n");  ErrCnt++;
01022     }
01023         //EAssertR(BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) != NULL,
01024         //  TStr::Fmt("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e)));
01025     }
01026     if (ErrCnt > 5) { FailR("error count too large!"); }
01027       if (Cnt % 100000 == 0) {
01028         printf("\r%s [%s]", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); }
01029     }
01030     printf("\r%s [%s]\n", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr());
01031   }
01032   return true;
01033 }
01034 
01035 template <class TNodeData, bool IsDir>
01036 void TBigNet<TNodeData, IsDir>::Dump(const TStr& Desc) const {
01037   if (! Desc.Empty()) { printf("\n%s (%d, %u):\n", Desc.CStr(), GetNodes(), GetEdges()); }
01038   else { printf("\nNodes: %d,  Edges: %u\n", GetNodes(), GetEdges()); }
01039   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
01040     printf("%d]\n  IN %d]", NI.GetId(), NI.GetInDeg());
01041     for (int i=0; i<NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); }
01042     if (IsDir) {
01043       printf("\n  OUT %d]", NI.GetOutDeg());
01044       for (int i=0; i<NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); }
01045     }
01046     printf("\n");
01047   }
01048 }
01049 
01050 // header: <Nodes, Edges, IsDir>
01051 // format: undirected <NId, Dat, OutDeg, OutNodeV>
01052 // format: directed <NId, Dat, OutDeg, OutNodeV, InDeg, InNodeV>
01053 template <class TNodeData, bool IsDir>
01054 void TBigNet<TNodeData, IsDir>::SaveForDisk(const TStr& OutFNm) const {
01055   const bool IsDirected = IsDir;
01056   TFOut FOut(OutFNm);
01057   FOut.Save(GetNodes());
01058   FOut.Save(GetEdges());
01059   FOut.Save(IsDirected);
01060   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
01061     FOut.Save(NI.GetId());
01062     NI.GetDat().Save(FOut);
01063     FOut.Save(NI.GetOutDeg());
01064     for (int i = 0; i < NI.GetOutDeg(); i++) {
01065       FOut.Save(NI.GetOutNId(i)); }
01066     if (IsDirected) {
01067       FOut.Save(NI.GetInDeg());
01068       for (int i = 0; i < NI.GetInDeg(); i++) {
01069         FOut.Save(NI.GetInNId(i)); }
01070     }
01071   }
01072 }
01073 
01074 // skip the edge pool and only load the node data hash table
01075 template <class TNodeData, bool IsDir>
01076 void TBigNet<TNodeData, IsDir>::LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH) {
01077   TFIn SIn(InFNm);
01078   TInt MxNId(SIn);
01079   TB32Set Flags(SIn);
01080   printf("skipping Pool...\n");
01081   TBool FastCopy(SIn);
01082   uint64 _GrowBy, _MxVals, _Vals;
01083   SIn.Load(_GrowBy); SIn.Load(_MxVals);  SIn.Load(_Vals);
01084   TInt EmptyVal(SIn);
01085   int Tmp;
01086   for (TSize ValN = 0; ValN < _Vals; ValN++) { SIn.Load(Tmp); }
01087   TInt MxVals(SIn), Vals(SIn);
01088   uint64 Offset;
01089   for (int ValN = 0; ValN < Vals; ValN++) { SIn.Load(Offset); }
01090   printf("loading Hode hash table...\n");
01091   NodeH.Load(SIn);
01092 }
01093 
01094 // Save the network without loading it. Save the node hash table as THash or TSparseHash
01095 template <class TNodeData, bool IsDir>
01096 void TBigNet<TNodeData, IsDir>::SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash) {
01097   TFIn FIn(InFNm);
01098   TFOut FOut(OutFNm);
01099   { TInt MxNId(FIn);  MxNId.Save(FOut);
01100   TB32Set Flags(FIn);  Flags.Save(FOut);
01101   TVPool Pool(FIn);  Pool.Save(FOut); }
01102   { TNodeH NodeH(FIn);
01103   if (! SaveSparseHash) {
01104     THash<TInt, TNode> NewH(NodeH.Len(), true);
01105     for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) {
01106       NewH.AddDat(I->Key, I->Dat);
01107     }
01108     NewH.Save(FOut);
01109   } else {
01110     FailR("Not implemented");
01111   } }
01112 }