SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
anf.h
Go to the documentation of this file.
00001 
00002 // Approximate Neighborhood Function.
00003 namespace TSnap {
00010 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32); 
00017 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
00020 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox);
00023 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1);
00024 } // namespace TSnap
00025 
00032 template <class PGraph>
00033 class TGraphAnf {
00034 private:
00035   typedef TVec<uint64> TAnfBitV;
00036   THash<TInt, uint64> NIdToBitPosH;  // NId to byte(!) offset in BitV
00037   TInt NApprox;                      // maintain N parallel approximations (multiple of 8)
00038   TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits;  MoreBits: additional R bits;  ApproxBytes: Approx/8;
00039   PGraph Graph;
00040   TRnd Rnd;
00041 private:
00042   UndefDefaultCopyAssign(TGraphAnf);
00043 public:
00044   TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) :
00045     NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0);  IAssert(sizeof(uint)==4); }
00046   uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); }
00047   void InitAnfBits(TAnfBitV& BitV);
00048   void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const;
00049   double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const;
00050   double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const {
00051     return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
00057   void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
00063   void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
00064 };
00065 
00066 template <class PGraph>
00067 void TGraphAnf<PGraph>::InitAnfBits(TAnfBitV& BitV) {
00068   const int NNodes = Graph->GetNodes();
00069   const int LogNodes = int(ceil(TMath::Log2(NNodes)));
00070   ApproxBytes = NApprox / 8;
00071   NBits = LogNodes + MoreBits; // bits per node
00072   const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
00073   int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1;
00074   IAssertR(VSize <= TInt::Mx,
00075     TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d",
00076     TUInt64::GetStr(VSize).CStr(),TInt::Mx));
00077   printf("size %d\n", static_cast<int>(VSize));
00078   BitV.Gen(VSize);  IAssert(BitV.BegI() != NULL);
00079   BitV.PutAll(0);
00080   int SetBit = 0;
00081   uint64 NodeOff = 0;
00082   uchar* BitVPt = (uchar *) BitV.BegI();
00083   // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
00084   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
00085     NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
00086     // init vertex bits
00087     for (int approx = 0; approx < NApprox; approx++) {
00088       const int RndNum = Rnd.GetUniDevInt();
00089       for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
00090       if (SetBit >= NBits) SetBit = NBits-1;
00091       const int BitPos = ApproxBytes * SetBit + approx / 8;
00092       *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
00093     }
00094     NodeOff += BytesPerNd;
00095   }
00096 }
00097 
00098 template <class PGraph>
00099 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const {
00100   uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
00101   uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
00102   for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
00103 }
00104 
00105 // Average least zero bit position (least significant zero)
00106 template <class PGraph>
00107 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero)
00108   int approx, bit, AvgBitPos = 0;
00109   uchar* BitVPt = (uchar *) BitV.BegI();
00110   for (approx = 0; approx < NApprox; approx++) {
00111     for (bit = 0; bit < NBits; bit++) {
00112       if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
00113     if (bit > NBits) bit = NBits;
00114     AvgBitPos += bit;
00115   }
00116   return AvgBitPos/double(NApprox) ;
00117 }
00118 
00119 template <class PGraph>
00120 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
00121   //const int NNodes = Graph->GetNodes();
00122   TAnfBitV CurBitsV, LastBitsV;
00123   InitAnfBits(CurBitsV);          IAssert(CurBitsV.BegI() != NULL);
00124   LastBitsV.Gen(CurBitsV.Len());  IAssert(LastBitsV.BegI() != NULL);
00125   DistNbrsV.Clr();
00126   DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
00127   for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
00128     memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
00129     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00130       const uint64 NIdOffset = GetNIdOffset(NI.GetId());
00131       for (int e = 0; e < NI.GetOutDeg(); e++) {
00132         const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
00133         Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00134       }
00135       if (! IsDir) {
00136         for (int e = 0; e < NI.GetInDeg(); e++) {
00137           const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
00138           Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00139         }
00140       }
00141     }
00142     DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
00143     if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1%  change
00144   }
00145 }
00146 
00147 template <class PGraph>
00148 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
00149   TAnfBitV CurBitsV, LastBitsV;
00150   InitAnfBits(CurBitsV);          IAssert(CurBitsV.BegI() != NULL);
00151   LastBitsV.Gen(CurBitsV.Len());  IAssert(LastBitsV.BegI() != NULL);
00152   int v, e;
00153   double NPairs = 0.0;
00154   DistNbrsV.Clr();
00155   DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
00156   DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges()));
00157   //TExeTm ExeTm;
00158   for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
00159     //printf("ANF dist %d...", dist);  ExeTm.Tick();
00160     memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
00161     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00162       const uint64 NIdOffset = GetNIdOffset(NI.GetId());
00163       for (e = 0; e < NI.GetOutDeg(); e++) {
00164         const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
00165         Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00166       }
00167       if (! IsDir) {
00168         for (e = 0; e < NI.GetInDeg(); e++) {
00169           const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
00170           Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00171         }
00172       }
00173     }
00174     NPairs = 0.0;
00175     for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
00176       NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
00177     }
00178     DistNbrsV.Add(TIntFltKd(dist, NPairs));
00179     //printf("pairs: %g  %s\n", NPairs, ExeTm.GetTmStr());
00180     if (NPairs == 0) { break; }
00181     if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1%  change
00182     //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
00183   }
00184 }
00186 // Approximate Neighborhood Function
00187 namespace TSnap {
00188 
00189 namespace TSnapDetail {
00191 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9);
00193 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9);
00195 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9);
00197 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9);
00199 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV);
00201 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV);
00202 } // TSnapDetail
00203 
00204 template <class PGraph>
00205 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
00206   TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
00207   Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir);
00208 }
00209 
00210 template <class PGraph>
00211 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
00212   TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
00213   Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir);
00214 }
00215 
00216 template <class PGraph>
00217 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) {
00218   TIntFltKdV DistNbrsV;
00219   TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
00220   Anf.GetGraphAnf(DistNbrsV, -1, IsDir);
00221   return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile);
00222 }
00223 
00224 template<class PGraph>
00225 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) {
00226   //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32);
00227   TMom Mom;
00228   if (NApprox == -1) {
00229     if (Graph->GetNodes() < 100000) { NApprox = 64; }
00230     else if (Graph->GetNodes() < 1000000) { NApprox = 32; }
00231     else { NApprox = 16; }
00232   }
00233   const bool IsDir = false;
00234   for (int r = 0; r < NRuns; r++) {
00235     Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox));
00236   }
00237   Mom.Def();
00238   return Mom.GetMean();
00239 }
00240 
00241 template <class PGraph> void TestAnf() {
00242   PGraph Graph = PGraph::TObj::New();
00243   //Graph:
00244   //  0    2 ----> 3
00245   //       ^       |
00246   //       |       |
00247   //       |       ^
00248   //  1    5 <---- 4
00249   for (int v = 0; v < 6; v++) { Graph->AddNode(v); }
00250   Graph->AddEdge(2, 3);
00251   Graph->AddEdge(3, 4);
00252   Graph->AddEdge(4, 5);
00253   Graph->AddEdge(5, 2);
00254   TFltV AnfV;
00255   for (int t = 0; t < 10; t++) {
00256     TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1);
00257     TIntFltKdV DistToNbrsV;
00258     Anf.GetGraphAnf(DistToNbrsV, 5, true);
00259     printf("\n--seed: %d---------------------\n", t+1);
00260     for (int i = 0; i < DistToNbrsV.Len(); i++) {
00261       printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat());
00262     }
00263     AnfV.Add(DistToNbrsV.Last().Dat);
00264   }
00265   TMom Mom(AnfV);
00266   printf("-----------\nAvgAnf: %f  StDev:  %f\n", Mom.GetMean(), Mom.GetSDev());//*/
00267   // const int NApprox = 32;
00268   /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox);
00269   //Graph = TGGen<PGraph>::GenGrid(20, 20);
00270   Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000));
00271   TFltV FullAnf, EffAnf;
00272   for (int tryn = 0; tryn < 10; tryn++) {
00273     FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox));
00274     EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox));
00275   }
00276   TMom FullMom(FullAnf);
00277   TMom AnfMom(EffAnf);
00278   printf("  Sample FullDiam:      %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false));
00279   printf("  Anf    FullDiam:      %f  [%f]\n", FullMom.GetMean(), FullMom.GetSDev());
00280   printf("  Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false));
00281   printf("  Anf    EffDiam [90%%]: %f  [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev());
00282   // epinions
00283   printf("\nEpinions graph:\n");
00284   { typedef PNGraph PGraph;
00285   PGraph G = TGGen<PGraph>::GenEpinions();
00286   TIntFltKdV DistToPairsV;
00287   GetAnf(G, DistToPairsV, 50, true);
00288   for(int i = 0; i < DistToPairsV.Len(); i++) {
00289     printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); }
00290   printf("\nUndir\n");
00291   TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false);
00292   for(int j = 0; j < DistToPairsV.Len(); j++) {
00293     printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); }
00294   }//*/
00295 }
00296 
00297 } // namespace TSnap