SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
bignet.h
Go to the documentation of this file.
00001 
00002 // Big Network
00003 template <class TNodeData, bool IsDir>
00004 class TBigNet {
00005 public:
00006   typedef TNodeData TNodeDat;
00007   typedef TBigNet<TNodeData, IsDir> TNet;
00008   typedef TPt<TNet> PNet;
00009 public:
00010   class TNode;
00011   //typedef TSparseHash<TInt, TNode> TNodeH;
00012   typedef THash<TInt, TNode> TNodeH; // use SaveToDisk to convert between the two hash table types
00013   typedef TPt<TBigNet<TNodeData, IsDir> > PBigNet;
00014   typedef TVecPool<TInt> TPool;
00015   typedef TPt<TPool> PPool;
00016 
00017   class TNode {
00018   public:
00019     TInt InVId, OutVId; // if undirected, InNIdV == OutNIdV
00020     TNodeDat Dat;
00021   public:
00022     TNode() : InVId(-1), OutVId(-1), Dat() { }
00023     TNode(const int& InVecId, const int& OutVecId) : InVId(InVecId), OutVId(OutVecId), Dat() { }
00024     TNode(const int& InVecId, const int& OutVecId, const TNodeDat& NodeDat) :
00025       InVId(InVecId), OutVId(OutVecId), Dat(NodeDat) { }
00026     TNode(const TNode& Node) : InVId(Node.InVId), OutVId(Node.OutVId), Dat(Node.Dat) { }
00027     TNode(TSIn& SIn) : InVId(SIn), OutVId(SIn), Dat(SIn) { }
00028     void Save(TSOut& SOut) const { SOut.Save(InVId); SOut.Save(OutVId); Dat.Save(SOut); }
00029     bool IsUnused() const { return InVId==-1 && OutVId==-1; }
00030   };
00031 
00032   class TNodeI {
00033   protected:
00034     typedef typename TNodeH::TIter THashIter;
00035     THashIter NodeHI;
00036     TPool *Pool;
00037     int InDeg, OutDeg, *InNIdV, *OutNIdV; // if undirected, InNIdV==OutNIdV
00038   public:
00039     inline void GetInOutNIdV();
00040     int GetInVId() const { return NodeHI->Dat.InVId; }
00041     int GetOutVId() const { return NodeHI->Dat.OutVId; }
00042   public:
00043     TNodeI() : NodeHI(), Pool(NULL), InDeg(0), OutDeg(0), InNIdV(NULL), OutNIdV(NULL) { }
00044     TNodeI(const THashIter& NodeHIter, TPool *PoolPt) : NodeHI(NodeHIter), Pool(PoolPt) { GetInOutNIdV(); }
00045     TNodeI(const TNodeI& NodeI) : NodeHI(NodeI.NodeHI), Pool(NodeI.Pool) { GetInOutNIdV(); }
00046     TNodeI& operator = (const TNodeI& NI) { NodeHI=NI.NodeHI; Pool=NI.Pool; GetInOutNIdV(); return *this; }
00047     TNodeI& operator++ (int) { NodeHI++; GetInOutNIdV(); return *this; }
00048     bool operator < (const TNodeI& NI) const { return NodeHI < NI.NodeHI; }
00049     bool operator == (const TNodeI& NI) const { return NodeHI == NI.NodeHI; }
00050     int GetId() const { return NodeHI->Key(); }
00051     int GetDeg() const { return GetInDeg()+(InNIdV!=OutNIdV?GetOutDeg():0); }
00052     int GetInDeg() const { return InDeg; }
00053     int GetOutDeg() const { return OutDeg; }
00054     int GetInNId(const int& NodeN) const { return InNIdV[NodeN]; }
00055     int GetOutNId(const int& NodeN) const { return OutNIdV[NodeN]; }
00056     int GetOutNbrId(const int& NodeN) const { return NodeN<OutDeg ? OutNIdV[NodeN]:InNIdV[NodeN-OutDeg]; }
00057     bool IsInNId(const int& NId) const { return BinSearch(InNIdV, InNIdV+InDeg, NId)!=NULL; }
00058     bool IsOutNId(const int& NId) const { return BinSearch(OutNIdV, OutNIdV+OutDeg, NId)!=NULL; }
00059     bool IsNbrNId(const int& NId) const { return IsOutNId(NId) || IsInNId(NId); }
00060     const TNodeData& operator () () const { return GetDat(); }
00061     TNodeData& operator () () { return GetDat(); }
00062     const TNodeData& GetDat() const { return NodeHI->Dat.Dat; }
00063     TNodeData& GetDat() { return NodeHI->Dat.Dat; }
00064     // requires pointer back to the network (see as in TNodeNet)
00065     //const TNodeData& GetInNDat(const int& NodeN) const { return Net->GetNDat(GetInNId(NodeN)); }
00066     //TNodeData& GetInNDat(const int& NodeN) { return Net->GetNDat(GetInNId(NodeN)); }
00067     //const TNodeData& GetOutNDat(const int& NodeN) const { return Net->GetNDat(GetOutNId(NodeN)); }
00068     //TNodeData& GetOutNDat(const int& NodeN) { return Net->GetNDat(GetOutNId(NodeN)); }
00069     //const TNodeData& GetNbrNDat(const int& NodeN) const { return Net->GetNDat(GetNbrNId(NodeN)); }
00070     //TNodeData& GetNbrNDat(const int& NodeN) { return Net->GetNDat(GetNbrNId(NodeN)); }
00071     void Dump() const;
00072     friend class TBigNet<TNodeData, IsDir>;
00073   };
00074 
00075   class TEdgeI {
00076   private:
00077     TNodeI CurNode, EndNode;
00078     int CurEdge;
00079   public:
00080     TEdgeI() : CurNode(), EndNode(), CurEdge(0) { }
00081     TEdgeI(const TNodeI& NodeI, const TNodeI& EndNodeI, const int& EdgeN=0) : CurNode(NodeI), EndNode(EndNodeI), CurEdge(0) { }
00082     TEdgeI(const TEdgeI& EdgeI) : CurNode(EdgeI.CurNode), EndNode(EdgeI.EndNode), CurEdge(EdgeI.CurEdge) { }
00083     TEdgeI& operator = (const TEdgeI& EdgeI) { if (this!=&EdgeI) { CurNode=EdgeI.CurNode;  EndNode=EdgeI.EndNode;  CurEdge=EdgeI.CurEdge; }  return *this; }
00084     TEdgeI& operator++ (int) { CurEdge++; if (CurEdge >= CurNode.GetOutDeg()) { CurEdge=0;  CurNode++;
00085       while (CurNode < EndNode && CurNode.GetOutDeg()==0) { CurNode++; } }  return *this; }
00086     bool operator < (const TEdgeI& EdgeI) const { return CurNode<EdgeI.CurNode || (CurNode==EdgeI.CurNode && CurEdge<EdgeI.CurEdge); }
00087     bool operator == (const TEdgeI& EdgeI) const { return CurNode == EdgeI.CurNode && CurEdge == EdgeI.CurEdge; }
00088     int GetId() const { return -1; }
00089     int GetSrcNId() const { return CurNode.GetId(); }
00090     int GetDstNId() const { return CurNode.GetOutNId(CurEdge); }
00091     const TNodeData& GetSrcNDat() const { return CurNode.GetNDat(); }
00092     TNodeData& GetDstNDat() { return CurNode.GetOutNDat(CurEdge); }
00093     friend class TNodeNet<TNodeData>;
00094   };
00095 protected:
00096   bool IsNode(const int& NId, TNode& Node) const { return NodeH.IsKeyGetDat(NId, Node); }
00097   int* GetInNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).InVId); }
00098   int* GetOutNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).OutVId); }
00099   static void AddSorted(int* Beg, int* End, const int& Val);
00100   static const int* BinSearch(const int* Beg, const int* End, const int& Val);
00101   static const int* BinSearch2(const int* Beg, const int* End, const int& Val);
00102   const TNode& GetNode(const int& NId) const { return NodeH.GetDat(NId); }
00103   TNode& GetNode(const int& NId) { return NodeH.GetDat(NId); }
00104 public:
00105   enum { DelNId = INT_MAX }; // id of a deleted node
00106 protected:
00107   TCRef CRef;
00108   TInt MxNId;
00109   TB32Set Flags;
00110   TPool Pool;
00111   TNodeH NodeH;
00112 public:
00113   TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources=false);
00114   TBigNet(TSIn& SIn) : MxNId(SIn), Flags(SIn), Pool(SIn), NodeH(SIn) { TBool Dir(SIn); IAssert(Dir()==IsDir); }
00115   virtual ~TBigNet() { }
00116   virtual void Save(TSOut& SOut) const;
00117   static PBigNet New(const int& Nodes, const TSize& Edges, const bool& Sources=false) {
00118     return PBigNet(new TBigNet(Nodes, Edges, Sources)); }
00119   static PBigNet Load(TSIn& SIn) { return PBigNet(new TBigNet(SIn)); }
00120   TBigNet& operator = (const TBigNet& Net) { if (this!=&Net) {
00121     MxNId=Net.MxNId; Flags=Net.Flags; Pool=Net.Pool; NodeH=Net.NodeH; }  return *this; }
00122 
00123   bool OnlySources() const { return Flags.In(gfSources); }
00124   bool HasFlag(const TGraphFlag& Flag) const {
00125     return HasGraphFlag(typename TBigNet, Flag) || (Flag==gfSources && OnlySources()); }
00126   void DumpFlags() const;
00127 
00128   // vertices
00129   int GetNodes() const { return NodeH.Len(); }
00130   int GetMxNId() const { return MxNId; }
00131   int AddNode(int NId, const int& InDeg, const int& OutDeg);
00132   int AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeDat& NodeDat);
00133   int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV);
00134   int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeDat& NodeDat);
00135   int AddUndirNode(int NId, const int& Deg);
00136   int AddUndirNode(int NId, const int& Deg, const TNodeDat& NodeDat);
00137   int AddUndirNode(int NId, const TIntV& EdgeNIdV);
00138   int AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeDat& NodeDat);
00139   void SetInNIdV(int NId, const TIntV& InNIdV);
00140   void SetOutNIdV(int NId, const TIntV& OutNIdV);
00141   void GetInNIdV(int NId, TIntV& OutNIdV) const;
00142   void GetOutNIdV(int NId, TIntV& OutNIdV) const;
00143   bool IsNode(const int& NId) const { return NodeH.IsKey(NId); }
00144   TNodeI BegNI() const { return TNodeI(NodeH.BegI(), (TPool *)&Pool); }
00145   TNodeI EndNI() const { return TNodeI(NodeH.EndI(), (TPool *)&Pool); }
00146   TNodeI GetNI(const int& NId) const { return TNodeI(NodeH.GetI(NId), (TPool *)&Pool); }
00147   TNodeDat& GetNDat(const int& NId) { return NodeH.GetDat(NId).Dat; }
00148   const TNodeDat& GetNDat(const int& NId) const { return NodeH.GetDat(NId).Dat; }
00149   // edges
00150   TEdgeI BegEI() const { TNodeI NI=BegNI();  while(NI<EndNI() && NI.GetOutDeg()==0) NI++;  return TEdgeI(NI, EndNI()); }
00151   TEdgeI EndEI() const { return TEdgeI(EndNI(), EndNI()); }
00152   TEdgeI GetEI(const int& EId) const; // not supported
00153 
00154   // delete nodes
00155   int IsolateNode(int NId); // isolate the node by setting edge endpoints to point to node id DelNId, IsNode(DelNId)==false)
00156   int DelNode(int NId); // set neigbors to point to DelNId, delete node from the node table
00157   bool IsIsoNode(const int& NId) const;
00158   uint GetDelEdges(); // the number deleted edges
00159   void CompactEdgePool(); // after nodes are isolated and deleted, we compact the empty space
00160 
00161   // edges
00162   ::TSize GetEdges() const { return Pool.GetVals(); }
00163   int AddEdge(const int& SrcNId, const int& DstNId);
00164   bool IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir=true) const;
00165 
00166   void SortEdgeV();
00167   void InvertFromSources(uint ExpectNodes=0); // add missing nodes and in-links
00168   void Rewire(TRnd& Rnd = TInt::Rnd); // create a random network with same degree distribution (configuration model)
00169 
00170   // manipulation
00171   PNGraph GetNGraph(const bool& RenumberNodes=false) const;
00172   PNGraph GetSubNGraph(const TIntV& NIdV) const;
00173   PBigNet GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes=false) const;
00174   void GetSubGraph(const TIntV& NIdV, TBigNet* NewNet, const bool& RenumberNodes=false) const;
00175 
00176   int GetRndNId(TRnd& Rnd=TInt::Rnd) const { return NodeH.GetKey(NodeH.GetRndKeyId(Rnd)); }
00177   TNodeI GetRndNI(TRnd& Rnd=TInt::Rnd) const { return GetNI(GetRndNId(Rnd)); }
00178   void GetNIdV(TIntV& NIdV) const;
00179 
00180   bool Empty() const { return GetNodes()==0; }
00181   void Clr(const bool& DoDel = true) { MxNId = 0;  NodeH.Clr(DoDel); Pool.Clr(DoDel); }
00182   void Reserve(const int& Nodes, const TSize& Edges);
00183   void Defrag(const bool& OnlyNodeLinks = false) { }
00184   bool IsOk() const;
00185   void Dump(const TStr& Desc = TStr()) const;
00186   void SaveForDisk(const TStr& OutFNm) const;
00187 
00188   static void LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH);
00189   static void SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash);
00190   friend class TPt<TBigNet>;
00191 };
00192 
00193 // set flags
00194 namespace TSnap {
00195 template <class TNodeData, bool IsDir> struct IsDirected<TBigNet<TNodeData, IsDir> > { enum { Val = 0 }; };
00196 template <class TNodeData> struct IsDirected<TBigNet<TNodeData, true> > { enum { Val = 1 }; };
00197 template <class TNodeData, bool IsDir> struct IsNodeDat<TBigNet<TNodeData, IsDir> > { enum { Val = 1 }; };
00198 }
00199 
00200 template <class TNodeData, bool IsDir>
00201 inline void TBigNet<TNodeData, IsDir>::TNodeI::GetInOutNIdV() {
00202   if (NodeHI.IsEnd()) return;
00203   const TNode& N = NodeHI->Dat;
00204   if (! Pool->IsVId(N.InVId)) {
00205     InDeg=0; OutDeg=0; }
00206   else {
00207     InDeg=Pool->GetVLen(N.InVId);
00208     OutDeg=Pool->GetVLen(N.OutVId);
00209     InNIdV=(int *)Pool->GetValVPt(N.InVId);
00210     OutNIdV=(int *)Pool->GetValVPt(N.OutVId);
00211   }
00212 }
00213 
00214 template <class TNodeData, bool IsDir>
00215 void TBigNet<TNodeData, IsDir>::TNodeI::Dump() const {
00216   printf("NodeId: %d\n", GetId());
00217   printf("  I:%4d]", GetInDeg());
00218   for (int i = 0; i < GetInDeg(); i++) { printf("  %d", GetInNId(i)); }
00219   printf("\n  O:%4d]", GetOutDeg());
00220   for (int i = 0; i < GetOutDeg(); i++) { printf("  %d", GetOutNId(i)); }
00221   printf("\n");
00222 }
00223 
00224 template <class TNodeData, bool IsDir>
00225 void TBigNet<TNodeData, IsDir>::AddSorted(int* Beg, int* End, const int& Val) {
00226   // there is at least one free slot available for the Val
00227   IAssertR(*(End-1)==TInt::Mx, "Edge can not be added: no free space");
00228   // find position to insert
00229   int Half, Len = int(End-Beg);
00230   while (Len > 0) {
00231     Half = Len/2;
00232     int* Mid=Beg+Half;
00233     if (*Mid < Val) { Beg=Mid+1; Len=Len-Half-1; } else { Len=Half; } }
00234   IAssertR(*Beg != Val, "Value already existis");
00235   // insert
00236   memmove(Beg+1, Beg, sizeof(int)*(End-Beg-1));
00237   *Beg = Val;
00238 }
00239 
00240 template <class TNodeData, bool IsDir>
00241 const int* TBigNet<TNodeData, IsDir>::BinSearch(const int* Beg, const int* End, const int& Val) {
00242   End--;  const int *Mid;
00243   while (Beg <= End) { Mid = Beg+(End-Beg)/2;
00244     if (*Mid == Val) { return Mid; }
00245     else if (Val < *Mid) { End=Mid-1; } else { Beg=Mid+1; }
00246   }
00247   return NULL;
00248 }
00249 
00250 template <class TNodeData, bool IsDir>
00251 const int* TBigNet<TNodeData, IsDir>::BinSearch2(const int* Beg, const int* End, const int& Val) {
00252   const int* First = Beg;
00253   const int* Last = End;
00254   const int* Mid;
00255   TSize len = End-Beg, half;
00256   while (len > 0) {
00257     half = len>>1;  Mid=First+half;
00258     if (*Mid < Val) { First = Mid; First++; len=len-half-1; }
00259     else { len=half; }
00260   }
00261   return First==Last ? Last-1 : First;
00262 }
00263 
00264 template <class TNodeData, bool IsDir>
00265 TBigNet<TNodeData, IsDir>::TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources) :
00266 CRef(), MxNId(0), Flags(), Pool(IsDir ? 2*Edges:Edges, 10000000, true, TInt::Mx), NodeH(Nodes) {
00267   //Flags.Incl(gfNodeGraph);
00268   //Flags.Incl(gfNetwork);
00269   //if (IsDir) { Flags.Incl(gfDirected); }
00270   if (Sources) {
00271     Flags.Incl(gfSources);
00272     IAssertR(! IsDir, "Jure: not clear what happens is graph is Undirected and has only sources.");
00273   }
00274   //DumpFlags();
00275 }
00276 
00277 template <class TNodeData, bool IsDir>
00278 void TBigNet<TNodeData, IsDir>::Save(TSOut& SOut) const {
00279   MxNId.Save(SOut);
00280   Flags.Save(SOut);
00281   Pool.Save(SOut);
00282   NodeH.Save(SOut);
00283   TBool(IsDir).Save(SOut);
00284 }
00285 
00286 template <class TNodeData, bool IsDir>
00287 void TBigNet<TNodeData, IsDir>::DumpFlags() const {
00288   for (int i = 1; i <int(gfMx); i++) {
00289     if (HasFlag(TGraphFlag(i))) { printf("  +"); }
00290     else { printf("  -"); }
00291     printf("%s", TSnap::GetFlagStr(TGraphFlag(i)).CStr());
00292   }
00293   printf("\n");
00294 }
00295 
00296 template <class TNodeData, bool IsDir>
00297 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg) {
00298   CAssert(IsDir);
00299   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00300   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00301   TNode& Node = NodeH.AddDat(NId);
00302   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00303   Node.InVId = Pool.AddEmptyV(InDeg);
00304   Node.OutVId = Pool.AddEmptyV(OutDeg);
00305   return NId;
00306 }
00307 
00308 template <class TNodeData, bool IsDir>
00309 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeData& NodeDat) {
00310   CAssert(IsDir);
00311   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00312   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00313   TNode& Node = NodeH.AddDat(NId);
00314   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00315   Node.InVId = Pool.AddEmptyV(InDeg);
00316   Node.OutVId = Pool.AddEmptyV(OutDeg);
00317   Node.Dat = NodeDat;
00318   return NId;
00319 }
00320 
00321 template <class TNodeData, bool IsDir>
00322 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg) {
00323   CAssert(! IsDir);
00324   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00325   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00326   TNode& Node = NodeH.AddDat(NId);
00327   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00328   Node.InVId = Pool.AddEmptyV(Deg);
00329   Node.OutVId = Node.InVId; // same vector
00330   return NId;
00331 }
00332 
00333 template <class TNodeData, bool IsDir>
00334 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg, const TNodeData& NodeDat) {
00335   CAssert(! IsDir);
00336   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00337   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00338   TNode& Node = NodeH.AddDat(NId);
00339   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00340   Node.InVId = Pool.AddEmptyV(Deg);
00341   Node.OutVId = Node.InVId; // same vector
00342   Node.Dat = NodeDat;
00343   return NId;
00344 }
00345 
00346 template <class TNodeData, bool IsDir>
00347 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV) {
00348   CAssert(IsDir);
00349   IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted());
00350   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00351   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00352   TNode& Node = NodeH.AddDat(NId);
00353   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00354   Node.InVId = Pool.AddV(InNIdV);
00355   Node.OutVId = Pool.AddV(OutNIdV);
00356   return NId;
00357 }
00358 
00359 template <class TNodeData, bool IsDir>
00360 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeData& NodeDat) {
00361   CAssert(IsDir);
00362   IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted());
00363   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00364   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00365   TNode& Node = NodeH.AddDat(NId);
00366   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00367   Node.InVId = Pool.AddV(InNIdV);
00368   Node.OutVId = Pool.AddV(OutNIdV);
00369   Node.Dat = NodeDat;
00370   return NId;
00371 }
00372 
00373 template <class TNodeData, bool IsDir>
00374 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV) {
00375   CAssert(! IsDir);
00376   IAssert(EdgeNIdV.IsSorted());
00377   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00378   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00379   TNode& Node = NodeH.AddDat(NId);
00380   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00381   Node.InVId = Pool.AddV(EdgeNIdV);
00382   Node.OutVId = Node.InVId;
00383   return NId;
00384 }
00385 
00386 template <class TNodeData, bool IsDir>
00387 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeData& NodeDat) {
00388   CAssert(! IsDir);
00389   IAssert(EdgeNIdV.IsSorted());
00390   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00391   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00392   TNode& Node = NodeH.AddDat(NId);
00393   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00394   Node.InVId = Pool.AddV(EdgeNIdV);
00395   Node.OutVId = Node.InVId;
00396   Node.Dat = NodeDat;
00397   return NId;
00398 }
00399 
00400 template <class TNodeData, bool IsDir>
00401 void TBigNet<TNodeData, IsDir>::SetInNIdV(int NId, const TIntV& InNIdV) {
00402   TNode Node;  EAssert(IsNode(NId, Node));
00403   TIntV InNodesV;  Pool.GetV(Node.InVId, InNodesV);
00404   EAssert(InNIdV.Len() == InNodesV.Len());
00405   memcpy(InNodesV.BegI(), InNIdV.BegI(), sizeof(TInt)*InNIdV.Len());
00406 }
00407 
00408 template <class TNodeData, bool IsDir>
00409 void TBigNet<TNodeData, IsDir>::SetOutNIdV(int NId, const TIntV& OutNIdV) {
00410   TNode Node;  EAssert(IsNode(NId, Node));
00411   TIntV OutNodesV;  Pool.GetV(Node.OutVId, OutNodesV);
00412   EAssert(OutNIdV.Len() == OutNodesV.Len());
00413   memcpy(OutNodesV.BegI(), OutNIdV.BegI(), sizeof(TInt)*OutNIdV.Len());
00414 }
00415 
00416 template <class TNodeData, bool IsDir>
00417 void TBigNet<TNodeData, IsDir>::GetInNIdV(int NId, TIntV& InNIdV) const {
00418   TNode Node;  EAssertR(IsNode(NId, Node), TStr::Fmt("Not node: NId=%d\n", NId).CStr());
00419   Pool.GetV(Node.InVId, InNIdV);
00420 }
00421 
00422 template <class TNodeData, bool IsDir>
00423 void TBigNet<TNodeData, IsDir>::GetOutNIdV(int NId, TIntV& OutNIdV) const {
00424   TNode Node;  EAssert(IsNode(NId, Node));
00425   Pool.GetV(Node.OutVId, OutNIdV);
00426 }
00427 
00428 // Node is deleted by setting edge endpoints to point to node id -1 (DelNId)
00429 // No memory is freed
00430 template <class TNodeData, bool IsDir>
00431 int  TBigNet<TNodeData, IsDir>::IsolateNode(int NId) {
00432   TIntV OutV;
00433   int NDel = 0;
00434   // out-edges
00435   GetOutNIdV(NId, OutV);
00436   for (int i = 0; i < OutV.Len(); i++) {
00437     if (OutV[i] == DelNId) { break; } // because they are sorted
00438     // fast implementation
00439     const TNode& N = NodeH.GetDat(OutV[i]);
00440     int *InNIdV = (int *) Pool.GetValVPt(N.InVId);
00441     const int Deg = Pool.GetVLen(N.InVId);
00442     int* Val = (int *) BinSearch(InNIdV, InNIdV+Deg, NId);
00443     if (Val == NULL) {
00444       printf("BAD: Can't find: OUT: NId: %d -- OutNbrId: %d\n", NId, OutV[i]());
00445       continue;
00446     }
00447     IAssert(Val != 0);
00448     memcpy(Val, Val+1, sizeof(int)*int(InNIdV+Deg-Val));
00449     *(InNIdV+Deg-1) = DelNId;  NDel++;
00450   }
00451   OutV.PutAll(DelNId);
00452   // in-edges
00453   if (IsDir) {
00454     TIntV InV;
00455     GetInNIdV(NId, InV);
00456     for (int i = 0; i < InV.Len(); i++) {
00457       if (InV[i] == DelNId) { break; }
00458       // fast implementation
00459       const TNode& N = NodeH.GetDat(InV[i]);
00460       int *OutNIdV = (int *) Pool.GetValVPt(N.OutVId);
00461       const int Deg = Pool.GetVLen(N.OutVId);
00462       int* Val = (int *) BinSearch(OutNIdV, OutNIdV+Deg, NId);
00463       if (Val == NULL) {
00464         printf("IN: NId: %d -- InNbrId: %d\n", NId, OutV[i]());
00465         continue;
00466       }
00467       IAssert(Val != 0);
00468       memcpy(Val, Val+1, sizeof(int)*int(OutNIdV+Deg-Val));
00469       *(OutNIdV+Deg-1) = DelNId;  NDel++;
00470     }
00471     InV.PutAll(DelNId);
00472   }
00473   return NDel;
00474 }
00475 
00476 // set neigbors to point to DelNId, delete node from the node table
00477 template <class TNodeData, bool IsDir>
00478 int  TBigNet<TNodeData, IsDir>::DelNode(int NId) {
00479   const int DelEdges = IsolateNode(NId);
00480   NodeH.DelKey(NId);
00481   return DelEdges;
00482 }
00483 
00484 template <class TNodeData, bool IsDir>
00485 bool TBigNet<TNodeData, IsDir>::IsIsoNode(const int& NId) const {
00486   if (NId == DelNId) { return true; }
00487   TIntV OutV;
00488   GetOutNIdV(NId, OutV);
00489   if (OutV.Empty() || OutV[0] == DelNId) { return true; }
00490   return false;
00491 }
00492 
00493 // the number deleted edges
00494 template <class TNodeData, bool IsDir>
00495 uint TBigNet<TNodeData, IsDir>::GetDelEdges() {
00496   uint DelEdges = 0;
00497   TIntV OutV;
00498   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00499     const int NId = NI.GetId();
00500     GetOutNIdV(NId, OutV);
00501     for (int i = 0; i < OutV.Len(); i++) {
00502       if (OutV[i] == DelNId) { DelEdges++; }
00503     }
00504   }
00505   return DelEdges;
00506 }
00507 
00508 template <class TNodeData, bool IsDir>
00509 void TBigNet<TNodeData, IsDir>::CompactEdgePool() {
00510   Pool.CompactPool(DelNId);
00511 }
00512 
00513 template <class TNodeData, bool IsDir>
00514 int TBigNet<TNodeData, IsDir>::AddEdge(const int& SrcNId, const int& DstNId) {
00515   TNode Src;  IAssert(IsNode(SrcNId, Src));
00516   int* OutV = (int *)Pool.GetValVPt(Src.OutVId);
00517   const int OutVLen = Pool.GetVLen(Src.OutVId);
00518   Assert(BinSearch(OutV, OutV+OutVLen, DstNId)==NULL); // no edge yet
00519   AddSorted(OutV, OutV+OutVLen, DstNId);
00520   if (! OnlySources()) {
00521     TNode Dst;  IAssert(IsNode(DstNId, Dst));
00522     int* InV = (int *)Pool.GetValVPt(Dst.InVId);
00523     const int InVLen = Pool.GetVLen(Dst.InVId);
00524     AddSorted(InV, InV+InVLen, SrcNId);
00525   }
00526   return -1; // edge id
00527 }
00528 
00529 template <class TNodeData, bool IsDir>
00530 bool TBigNet<TNodeData, IsDir>::IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir) const {
00531   TNode Src, Dst;
00532   if (! IsNode(SrcNId, Src)) { return false; } // no source node
00533   int* OutV1 = (int *)Pool.GetValVPt(Src.OutVId);
00534   const bool IsEdge = BinSearch(OutV1, OutV1+Pool.GetVLen(Src.OutVId), DstNId) != NULL;
00535   if (IsEdge && OnlySources()) { return true; }
00536   const bool IsDstNode = IsNode(DstNId, Dst);
00537   if (! IsDstNode) { return false; } // no destination node
00538   else if (IsEdge) { return true; } // destination and link found
00539   else if (! Dir) { // check for the undirected version of the edge
00540     int* OutV2 = (int *)Pool.GetValVPt(Dst.OutVId);
00541     return BinSearch(OutV2, OutV2+Pool.GetVLen(Dst.OutVId), SrcNId) != NULL; }
00542   else { return false; }
00543 }
00544 
00545 template <class TNodeData, bool IsDir>
00546 void TBigNet<TNodeData, IsDir>::GetNIdV(TIntV& NIdV) const {
00547   NIdV.Reserve(GetNodes(), 0);
00548   for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) {
00549     NIdV.Add(I->Key); }
00550 }
00551 
00552 template <class TNodeData, bool IsDir>
00553 void TBigNet<TNodeData, IsDir>::SortEdgeV() {
00554   printf("Sorting Edges... ");
00555   TExeTm ExeTm;
00556   TIntV OutV, InV;
00557   int cnt = 0;
00558   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00559     const int NId = NI.GetId();
00560     GetOutNIdV(NId, OutV);
00561     OutV.Sort();
00562     if (IsDir) {
00563       GetInNIdV(NId, InV);
00564       InV.Sort();
00565     }
00566     if (++cnt % Mega(1) == 0) { printf("\r  sort:%dm  %s", cnt/Mega(1), ExeTm.GetStr()); }
00567   }
00568   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00569     const int NId = NI.GetId();
00570     GetOutNIdV(NId, OutV);
00571     IAssert(OutV.IsSorted());
00572     GetInNIdV(NId, InV);
00573     IAssert(InV.IsSorted());
00574     if (++cnt % Mega(1) == 0) { printf("\r  check sorted:%dm  %s", cnt/Mega(1), ExeTm.GetStr()); }
00575   }
00576   printf("done. [%s]\n", ExeTm.GetStr());
00577 }
00578 
00579 // add missing nodes and in-links from a network of sources
00580 template <class TNodeData, bool IsDir>
00581 void TBigNet<TNodeData, IsDir>::InvertFromSources(uint ExpectNodes) {
00582   typedef THash<TInt, TInt> TDegHash;
00583   typedef int* TIntPt;
00584   if (ExpectNodes == 0) ExpectNodes=4*GetNodes();
00585   printf("\nInverting BigNet: reserved for %s nodes\n", TInt::GetMegaStr(ExpectNodes).CStr());
00586   CAssert(IsDir);
00587   IAssert(OnlySources());
00588   TDegHash InDegH(ExpectNodes);
00589   TSize NDest=0;
00590   // get node in-degree
00591   uint c=0;  TExeTm ExeTm;
00592   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00593     for (int e = 0; e < NI.GetOutDeg(); e++) {
00594       InDegH.AddDat(NI.GetOutNId(e)) += 1; }
00595     if (c%100000==0) printf("\r%s h:%d [%g]    ", TInt::GetMegaStr(c).CStr(), InDegH.Len(), ExeTm.GetSecs());
00596   }
00597   printf("\n Resizing NodePool: %lld -> %lld\n", uint64(Pool.Reserved()), uint64(GetEdges()));
00598   if (2*GetEdges() > Pool.Reserved()) {
00599     Pool.Reserve(2*GetEdges()); }
00600   // add nodes
00601   printf("NodeH: %s nodes, InDeg: %s nodes, Reserve: %s\n", TInt::GetMegaStr(NodeH.Len()).CStr(),
00602     TInt::GetMegaStr(InDegH.Len()).CStr(), TInt::GetMegaStr(ExpectNodes).CStr());
00603   NodeH.Reserve(ExpectNodes);
00604   for (TDegHash::TIter I = InDegH.BegI(); I < InDegH.EndI(); I++) {
00605     const int NId = I->Key, InDeg = I->Dat;
00606     if (! IsNode(NId)) {
00607       AddNode(NId, InDeg, 0); NDest++; } // new destination node, no out-links
00608     else {
00609       TNode& Node = GetNode(NId);
00610       IAssert(Node.InVId == 0); // no in-links
00611       Node.InVId = Pool.AddEmptyV(InDeg); }
00612   }
00613   InDegH.Clr(true);
00614   printf("Added: %lld destination nodes\n", uint64(NDest));
00615   printf("Graph nodes: %lld nodes\n", uint64(GetNodes()));
00616   // pointer to in-links vector
00617   THash<TInt, int*> NIdToPtH(GetNodes());
00618   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++)
00619     NIdToPtH.AddDat(NI.GetId(), (int *)Pool.GetValVPt(NI.GetInVId()));
00620   // add in-edges
00621   printf("Adding edges...\n");
00622   c = 0;
00623   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00624     const int SrcNId = NI.GetId();
00625     for (int e = 0; e < NI.GetOutDeg(); e++) {
00626       TIntPt& InV = NIdToPtH.GetDat(NI.GetOutNId(e));
00627       IAssert(*InV == TInt::Mx);
00628       *InV = SrcNId;  InV++;
00629     }
00630     if (c%100000==0) printf("\r%s [%g]   ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs());
00631   }
00632   // sort in-links
00633   printf("\nSorting in-links...\n");
00634   TIntV InNIdV;  c = 0;
00635   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00636     Pool.GetV(NI.GetInVId(), InNIdV);
00637     InNIdV.Sort();
00638     if (c%100000==0) printf("\r%s [%g]    ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs());
00639   }
00640   printf("\r...done [%g]\n", ExeTm.GetSecs());
00641   Flags.Excl(gfSources);
00642 }
00643 
00644 template <class TNodeData, bool IsDir>
00645 void TBigNet<TNodeData, IsDir>::Rewire(TRnd& Rnd) {
00646   uint64 NDup=0, NResolve=0, NAddit=0, cnt = 0;
00647   TExeTm ExeTm;
00648   IAssertR(! IsDir, "Only undirected networks are supported");
00649   printf("Rewiring the network... 1");
00650   Pool.ShuffleAll(Rnd);  printf("[%s]\n", ExeTm.GetStr());
00651   //Pool.ShuffleAll(Rnd);  printf(" done [%s]\n", ExeTm.GetStr());
00652   printf("  sorting edges...\n");
00653   SortEdgeV(); // sort individual edge vectors
00654   printf(" done [%s]\n", ExeTm.GetStr());
00655   // remove duplicates and self edges
00656   printf("  removing duplicates...\n");
00657   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) {
00658     const int VId = NI.GetOutVId();
00659     int Len = Pool.GetVLen(VId);
00660     int* V = (int *)Pool.GetValVPt(VId);
00661     for (int i = 0; i < Len-1 && V[i]!=DelNId; i++) {
00662       if (V[i] == V[i+1] || V[i]==NI.GetId()) {
00663         memcpy(V+i, V+i+1, sizeof(int)*(Len-i-1)); i--;
00664         V[Len-1] = DelNId;  NDup++;
00665       }
00666     }
00667     if (Len > 0 && V[Len-1]==NI.GetId()) { V[Len-1] = DelNId;  NDup++; }
00668     if (cnt % Mega(10) == 0) { printf(".");  fflush(stdout); }
00669   }
00670   printf("\n    %llu duplicate edges removed [%s]\n", NDup, ExeTm.GetStr());
00671   if (OnlySources()) { return; }
00672   // resolve one way edges
00673   printf("  resolving one way edges...\n"); cnt=0; fflush(stdout);
00674   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) {
00675     for (int e = 0; e < NI.GetOutDeg(); e++) {
00676       if (NI.GetOutNId(e) == DelNId) { continue; } // skip deleted nodes
00677       const int InVId = GetNode(NI.GetOutNId(e)).InVId;
00678       const int InVLen = Pool.GetVLen(InVId); IAssert(InVLen>=0 && InVLen < Mega(3));
00679       int* InV = (int *) Pool.GetValVPt(InVId);
00680       int* Val = (int *) BinSearch2(InV, InV+InVLen, NI.GetId());
00681       if (*Val == NI.GetId()) { continue; } // points back, everything is ok
00682       if (InVLen>0 && InV[InVLen-1] == DelNId) { // there is space at the end, insert
00683         IAssert((InVLen-(Val-InV)-1) >= 0);
00684         memmove(Val+1, Val, sizeof(int)*(InVLen-(Val-InV)-1));
00685         *Val = NI.GetId();
00686       } else { // the other end could point at us but it doesn't
00687         memmove(NI.OutNIdV+e, NI.OutNIdV+e+1, sizeof(int)*(NI.GetOutDeg()-e-1));
00688         NI.OutNIdV[NI.GetOutDeg()-1]=DelNId;  e--;
00689       }
00690       NResolve++;
00691     }
00692     if (cnt % Mega(10) == 0) { printf(".");  fflush(stdout); }
00693   }
00694   printf("\n    %llu resolved edges [%s]\n", NResolve, ExeTm.GetStr());
00695   // check if there are two nodes that still have space and are not yet connected and connect them
00696   printf("  filling empty edge slots...\n");
00697   TIntPrV SlotNIdV;
00698   THash<TInt, TInt> SlotNIdH;
00699   int CumSlots=0;
00700   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00701     if (NI.GetOutNId(NI.GetOutDeg()-1) == DelNId) {
00702       int NSlots = 0;
00703       for (int s = NI.GetOutDeg()-1; s >= 0; s--) {
00704         if (NI.GetOutNId(s) == DelNId) { NSlots++; }
00705         else { break; }
00706       }
00707       SlotNIdV.Add(TIntPr(NI.GetId(), NSlots));
00708       SlotNIdH.AddDat(NI.GetId(), NSlots);
00709       CumSlots+=NSlots;
00710     }
00711   }
00712   printf("    %d nodes, with %d spokes available, %d edges\n", SlotNIdH.Len(), CumSlots, CumSlots/2);
00713   TIntV NIdV;  SlotNIdH.GetKeyV(NIdV);
00714   for (int ni1 = 0; ni1 < NIdV.Len(); ni1++) {
00715     const int nid = NIdV[ni1];
00716     if (! SlotNIdH.IsKey(nid) || SlotNIdH.GetDat(nid) == 0) { continue; }
00717     const int NI1Slots = SlotNIdH.GetDat(nid);
00718     if ((SlotNIdH.GetMxKeyIds() - SlotNIdH.Len())/double(SlotNIdH.GetMxKeyIds()) > 0.5) { SlotNIdH.Defrag(); }
00719     TNodeI NI  = GetNI(nid);
00720     for (int NTries = 0; NTries < 4*NI1Slots && NI.GetOutNId(NI.GetOutDeg()-1) == DelNId; NTries++) {
00721       const int nid2 = SlotNIdH.GetKey(SlotNIdH.GetRndKeyId(Rnd));
00722       if (nid == nid2) { continue; }
00723       TNodeI NI2  = GetNI(nid2);
00724       // insert the edge
00725       if (NI2.GetOutNId(NI2.GetOutDeg()-1)==DelNId && ! NI2.IsInNId(NI.GetId())) {
00726         int *V1 = (int *) BinSearch2(NI.OutNIdV, NI.OutNIdV+NI.GetOutDeg(), NI2.GetId());
00727         int *V2 = (int *) BinSearch2(NI2.InNIdV, NI2.InNIdV+NI2.GetInDeg(), NI.GetId());
00728         if (*V1 != DelNId) { memmove(V1+1, V1, sizeof(int)*(NI.GetOutDeg()-(V1-NI.OutNIdV)-1)); }
00729         if (*V2 != DelNId) { memmove(V2+1, V2, sizeof(int)*(NI2.GetInDeg()-(V2-NI2.InNIdV)-1)); }
00730         *V1 = NI2.GetId();  *V2 = NI.GetId();
00731         NAddit++;
00732         SlotNIdH.GetDat(nid)--;  SlotNIdH.GetDat(nid2)--;
00733       }
00734       if (SlotNIdH.GetDat(nid2) == 0) { SlotNIdH.DelKey(nid2); continue; }
00735       if (SlotNIdH.GetDat(nid) == 0) { SlotNIdH.DelKey(nid); break; }
00736     }
00737     if (ni1 % Mega(1) == 0) { printf(".");  fflush(stdout); }
00738   }
00739   printf("    %llu edges added.\nDONE. TOTAL REWIRE TIME [%s]\n\n", NAddit, ExeTm.GetStr());
00740 }
00741 
00742 template <class TNodeData, bool IsDir>
00743 PNGraph TBigNet<TNodeData, IsDir>::GetNGraph(const bool& RenumberNodes) const {
00744   IAssert(RenumberNodes == false);
00745   PNGraph Graph = TNGraph::New();
00746   Graph->Reserve(GetNodes(), 0);
00747   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00748     Graph->AddNode(NI.GetId(), Pool, NI.GetInVId(), NI.GetOutVId());
00749   }
00750   return Graph;
00751 }
00752 
00753 template <class TNodeData, bool IsDir>
00754 PNGraph TBigNet<TNodeData, IsDir>::GetSubNGraph(const TIntV& NIdV) const {
00755   PNGraph Graph = TNGraph::New(NIdV.Len(), 0);
00756   // add nodes
00757   for (int i = 0; i < NIdV.Len(); i++) {
00758     Graph->AddNode(NIdV[i]); }
00759   // reserve node in- and out-degree
00760   for (int i = 0; i < NIdV.Len(); i++) {
00761     const int SrcNId = NIdV[i];
00762     const TNodeI NI = GetNI(SrcNId);
00763     int InDeg=0, OutDeg=0;
00764     for (int e = 0; e < NI.GetInDeg(); e++) { if (Graph->IsNode(NI.GetInNId(e))) InDeg++; }
00765     for (int e = 0; e < NI.GetOutDeg(); e++) { if (Graph->IsNode(NI.GetOutNId(e))) OutDeg++; }
00766     Graph->ReserveNIdInDeg(SrcNId, InDeg);
00767     Graph->ReserveNIdOutDeg(SrcNId, OutDeg);
00768   }
00769   // add edges
00770   for (int i = 0; i < NIdV.Len(); i++) {
00771     const int SrcNId = NIdV[i];
00772     const TNodeI NI = GetNI(SrcNId);
00773     for (int e = 0; e < NI.GetOutDeg(); e++) {
00774       const int DstNId = NI.GetOutNId(e);
00775       if (Graph->IsNode(DstNId)) {
00776         Graph->AddEdge(SrcNId, DstNId); }
00777     }
00778   }
00779   return Graph;
00780 }
00781 
00782 template <class TNodeData, bool IsDir>
00783 TPt<TBigNet<TNodeData, IsDir> > TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes) const {
00784   const bool isDir = IsDir, onlySources = HasFlag(gfSources);
00785   TSize Edges=0;
00786   // find in- and out-degree
00787   TSparseHash<TInt, TIntPr> NIdDegH(NIdV.Len());
00788   for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); }
00789   for (int i = 0; i < NIdV.Len(); i++) {
00790     const TNodeI NI = GetNI(NIdV[i]);
00791     int InDeg=0, OutDeg=0;
00792     for (int n = 0; n < NI.GetOutDeg(); n++) {
00793       if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; }
00794     if (! onlySources && isDir) {
00795       for (int n = 0; n < NI.GetInDeg(); n++) {
00796         if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; }
00797     }
00798     Edges += OutDeg;
00799     NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg);
00800   }
00801   // make network
00802   typedef TBigNet<TNodeData, IsDir> TBNet;
00803   TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected));
00804   TBNet& NewNet = *NewNetPt;
00805   NewNet.Flags = Flags;
00806   // add nodes
00807   if (isDir || onlySources) {
00808     for (int i = 0; i < NIdV.Len(); i++) {
00809       const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00810       NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00811   } else {
00812     for (int i = 0; i < NIdV.Len(); i++) {
00813       const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00814       NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00815   }
00816   // add edges
00817   for (int i = 0; i < NIdV.Len(); i++) {
00818     int NId = NIdV[i];
00819     const TNodeI NI = GetNI(NId);
00820     int *NIdVPt = (int *) NewNet.GetOutNIdVPt(NId);
00821     int deg = 0;
00822     for (int e = 0; e < NI.GetOutDeg(); e++) {
00823       const int Dst = NI.GetOutNId(e);
00824       if (NewNet.IsNode(Dst)) {
00825         *NIdVPt = Dst;  NIdVPt++;  deg++; }
00826     }
00827     EAssert(deg == NIdDegH.GetDat(NId).Val2);
00828     if (isDir && ! onlySources) {
00829       EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0)
00830         || (NI.GetInVId() != NI.GetOutVId()));
00831       int * NIdVPt = (int *) NewNet.GetInNIdVPt(NId);
00832       int deg = 0;
00833       for (int e = 0; e < NI.GetInDeg(); e++) {
00834         const int Dst = NI.GetInNId(e);
00835         if (NewNet.IsNode(Dst)) {
00836           *NIdVPt = Dst;  NIdVPt++;  deg++; }
00837       }
00838       EAssert(deg == NIdDegH.GetDat(NId).Val1);
00839     }
00840   }
00841   return NewNetPt;
00842 }
00843 
00844 template <class TNodeData, bool IsDir>
00845 void TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, TBigNet* NewNetPt, const bool& RenumberNodes) const {
00846   printf("Set subgraph on %d nodes\n", NIdV.Len()); TExeTm ExeTm;
00847   const bool isDir = IsDir, onlySources = HasFlag(gfSources);
00848   TSize Edges=0;
00849   // find in- and out-degree
00850   THash<TInt, TIntPr> NIdDegH(NIdV.Len());
00851   for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); }
00852   for (int i = 0; i < NIdV.Len(); i++) {
00853     const TNodeI NI = GetNI(NIdV[i]);
00854     int InDeg=0, OutDeg=0;
00855     for (int n = 0; n < NI.GetOutDeg(); n++) {
00856       if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; }
00857     if (! onlySources && isDir) {
00858       for (int n = 0; n < NI.GetInDeg(); n++) {
00859         if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; }
00860     }
00861     Edges += OutDeg;
00862     NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg);
00863   }
00864   // make network
00865   //typedef TBigNet<TNodeData, IsDir> TBNet;
00866   //TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected));
00867   NewNetPt->Reserve(NIdV.Len(), Edges);
00868   TBigNet<TNodeData, IsDir>& NewNet = *NewNetPt;
00869   NewNet.Flags = Flags;
00870   TIntSet NIdMap;
00871   // add nodes
00872   if (! RenumberNodes) {
00873     if (isDir || onlySources) {
00874       for (int i = 0; i < NIdV.Len(); i++) {
00875         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00876         NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00877     } else {
00878       for (int i = 0; i < NIdV.Len(); i++) {
00879         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00880         NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00881     }
00882   } else { // renumber nodes
00883     NIdMap.Gen(NIdV.Len());
00884     for (int i = 0; i < NIdV.Len(); i++) { NIdMap.AddKey(NIdV[i]);  }
00885     if (isDir || onlySources) {
00886       for (int i = 0; i < NIdV.Len(); i++) {
00887         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00888         NewNet.AddNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00889     } else {
00890       for (int i = 0; i < NIdV.Len(); i++) {
00891         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00892         NewNet.AddUndirNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00893     }
00894   }
00895   // add edges
00896   for (int i = 0; i < NIdV.Len(); i++) {
00897     int NId = NIdV[i];
00898     const TNodeI NI = GetNI(NId);
00899     int *NIdVPt = (int *) NewNet.GetOutNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId);
00900     int deg = 0;
00901     for (int e = 0; e < NI.GetOutDeg(); e++) {
00902       const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetOutNId(e)):NI.GetOutNId(e);
00903       if (NewNet.IsNode(Dst)) {
00904         *NIdVPt = Dst;  NIdVPt++;  deg++; }
00905     }
00906     EAssert(deg == NIdDegH.GetDat(NId).Val2);
00907     if (isDir && ! onlySources) {
00908       EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0)
00909         || (NI.GetInVId() != NI.GetOutVId()));
00910       int * NIdVPt = (int *) NewNet.GetInNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId);
00911       int deg = 0;
00912       for (int e = 0; e < NI.GetInDeg(); e++) {
00913         const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetInNId(e)):NI.GetInNId(e);
00914         if (NewNet.IsNode(Dst)) {
00915           *NIdVPt = Dst;  NIdVPt++;  deg++; }
00916       }
00917       EAssert(deg == NIdDegH.GetDat(NId).Val1);
00918     }
00919   }
00920   printf("  %u edges [%s]\n", uint(Edges), ExeTm.GetStr());
00921 }
00922 
00923 template <class TNodeData, bool IsDir>
00924 void TBigNet<TNodeData, IsDir>::Reserve(const int& Nodes, const TSize& Edges) {
00925   NodeH.Gen(TMath::Mx(int(1.1*Nodes), 100));
00926   Pool = TPool(TMath::Mx(Edges, (TSize) Mega(1)), Mega(100), true);
00927 }
00928 
00929 // check that in- and out-edges matchs, the table is sorted and so on
00930 template <class TNodeData, bool IsDir>
00931 bool TBigNet<TNodeData, IsDir>::IsOk() const {
00932   // check the node pool
00933   TIntV ValV; TExeTm ExeTm;
00934   printf("Is ok network:\n  Check Vec...");
00935   for (uint id = 1; id < Pool.GetVecs(); id++) {
00936     Pool.GetV(id, ValV);
00937     if (! ValV.Empty()) {
00938       EAssert((0<=ValV[0] && ValV[0]<MxNId) || ValV[0]==DelNId);
00939       try {
00940       for (int i = 1; i < ValV.Len(); i++) {
00941         //if (ValV[i]==DelNId) { continue; }
00942         // sorted & no duplicates & no empty values (can have deleted nodes)
00943         EAssertR((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId),
00944           TStr::Fmt("NId1: %d NId2:%d", ValV[i-1](),ValV[i]()));
00945         EAssertR((0<=ValV[i] && ValV[i]<MxNId) || ValV[i]==DelNId,
00946           TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId));
00947         if (! OnlySources()) {
00948           EAssertR(IsNode(ValV[i]) || ValV[i]==DelNId,
00949             TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); } // every link is a node
00950       }
00951       } catch (PExcept Except){
00952         printf("  %s\n", Except->GetStr().CStr());
00953         printf("  vec id: %d, lenght: %d\n", id, ValV.Len());
00954         for (int i = 1; i < ValV.Len(); i++) {
00955           printf("  %d", ValV[i]());
00956           if (!((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId))) { printf("*"); }
00957         }
00958         printf("\n");
00959         return false;
00960       }
00961     }
00962     if (id % 10000 == 0) {
00963       printf("\r  %dk / %dk [%s]", id/1000, Pool.GetVecs()/1000, ExeTm.GetStr()); }
00964   }
00965   printf("[%s]\n  Check Edges...\n", ExeTm.GetStr());
00966   // check edges
00967   int ErrCnt = 0;
00968   if (! OnlySources()) {
00969     int Cnt=0;
00970     for (TNodeI NI = BegNI(); NI < EndNI(); NI++, Cnt++) {
00971       // nodes that point to NI, have it on out-list
00972       for (int e = 0; e < NI.GetInDeg(); e++) {
00973         if (NI.GetInNId(e) == DelNId) { continue; } // skip deleted nodes
00974     if (! IsEdge(NI.GetInNId(e), NI.GetId())) {
00975       printf("\nno edge: %d --> %d", NI.GetInNId(e), NI.GetId());
00976       printf("NId: %d   deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg());
00977       for (int i = 0; i < NI.GetInDeg(); i++) { printf("  %d", NI.GetInNId(i)); }
00978       TNodeI NI2 = GetNI(NI.GetInNId(e));
00979       printf("\nNId2: %d   deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg());
00980       for (int j = 0; j < NI2.GetOutDeg(); j++) { printf("  %d", NI2.GetOutNId(j)); }
00981       printf("\n");
00982       ErrCnt++;
00983     }
00984         //EAssertR(IsEdge(NI.GetInNId(e), NI.GetId()),
00985         //  TStr::Fmt("no edge: %d --> %d", NI.GetInNId(e), NI.GetId()));
00986     }
00987       // nodes NI points to, have it on in-list
00988       for (int e = 0; e < NI.GetOutDeg(); e++) {
00989         if (NI.GetOutNId(e) == DelNId) { continue; }
00990         const int InVId = GetNode(NI.GetOutNId(e)).InVId;
00991         int* DstInV = (int *)Pool.GetValVPt(InVId);
00992     if (BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) == NULL) {
00993       printf("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e));
00994       printf("NId: %d   deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg());
00995       for (int i = 0; i < NI.GetOutDeg(); i++) { printf("  %d", NI.GetOutNId(i)); }
00996       TNodeI NI2 = GetNI(NI.GetOutNId(e));
00997       printf("\nNId2: %d   deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg());
00998       for (int j = 0; j < NI2.GetInDeg(); j++) { printf("  %d", NI2.GetInNId(j)); }
00999       printf("\n");  ErrCnt++;
01000     }
01001         //EAssertR(BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) != NULL,
01002         //  TStr::Fmt("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e)));
01003     }
01004     if (ErrCnt > 5) { FailR("error count too large!"); }
01005       if (Cnt % 100000 == 0) {
01006         printf("\r%s [%s]", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); }
01007     }
01008     printf("\r%s [%s]\n", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr());
01009   }
01010   return true;
01011 }
01012 
01013 template <class TNodeData, bool IsDir>
01014 void TBigNet<TNodeData, IsDir>::Dump(const TStr& Desc) const {
01015   if (! Desc.Empty()) { printf("\n%s (%d, %u):\n", Desc.CStr(), GetNodes(), GetEdges()); }
01016   else { printf("\nNodes: %d,  Edges: %u\n", GetNodes(), GetEdges()); }
01017   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
01018     printf("%d]\n  IN %d]", NI.GetId(), NI.GetInDeg());
01019     for (int i=0; i<NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); }
01020     if (IsDir) {
01021       printf("\n  OUT %d]", NI.GetOutDeg());
01022       for (int i=0; i<NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); }
01023     }
01024     printf("\n");
01025   }
01026 }
01027 
01028 // header: <Nodes, Edges, IsDir>
01029 // format: undirected <NId, Dat, OutDeg, OutNodeV>
01030 // format: directed <NId, Dat, OutDeg, OutNodeV, InDeg, InNodeV>
01031 template <class TNodeData, bool IsDir>
01032 void TBigNet<TNodeData, IsDir>::SaveForDisk(const TStr& OutFNm) const {
01033   const bool IsDirected = IsDir;
01034   TFOut FOut(OutFNm);
01035   FOut.Save(GetNodes());
01036   FOut.Save(GetEdges());
01037   FOut.Save(IsDirected);
01038   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
01039     FOut.Save(NI.GetId());
01040     NI.GetDat().Save(FOut);
01041     FOut.Save(NI.GetOutDeg());
01042     for (int i = 0; i < NI.GetOutDeg(); i++) {
01043       FOut.Save(NI.GetOutNId(i)); }
01044     if (IsDirected) {
01045       FOut.Save(NI.GetInDeg());
01046       for (int i = 0; i < NI.GetInDeg(); i++) {
01047         FOut.Save(NI.GetInNId(i)); }
01048     }
01049   }
01050 }
01051 
01052 // skip the edge pool and only load the node data hash table
01053 template <class TNodeData, bool IsDir>
01054 void TBigNet<TNodeData, IsDir>::LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH) {
01055   TFIn SIn(InFNm);
01056   TInt MxNId(SIn);
01057   TB32Set Flags(SIn);
01058   printf("skipping Pool...\n");
01059   TBool FastCopy(SIn);
01060   uint64 _GrowBy, _MxVals, _Vals;
01061   SIn.Load(_GrowBy); SIn.Load(_MxVals);  SIn.Load(_Vals);
01062   TInt EmptyVal(SIn);
01063   int Tmp;
01064   for (TSize ValN = 0; ValN < _Vals; ValN++) { SIn.Load(Tmp); }
01065   TInt MxVals(SIn), Vals(SIn);
01066   uint64 Offset;
01067   for (int ValN = 0; ValN < Vals; ValN++) { SIn.Load(Offset); }
01068   printf("loading Hode hash table...\n");
01069   NodeH.Load(SIn);
01070 }
01071 
01072 // Save the network without loading it. Save the node hash table as THash or TSparseHash
01073 template <class TNodeData, bool IsDir>
01074 void TBigNet<TNodeData, IsDir>::SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash) {
01075   TFIn FIn(InFNm);
01076   TFOut FOut(OutFNm);
01077   { TInt MxNId(FIn);  MxNId.Save(FOut);
01078   TB32Set Flags(FIn);  Flags.Save(FOut);
01079   TPool Pool(FIn);  Pool.Save(FOut); }
01080   { TNodeH NodeH(FIn);
01081   if (! SaveSparseHash) {
01082     THash<TInt, TNode> NewH(NodeH.Len(), true);
01083     for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) {
01084       NewH.AddDat(I->Key, I->Dat);
01085     }
01086     NewH.Save(FOut);
01087   } else {
01088     FailR("Not implemented");
01089   } }
01090 }