SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Approximate Neighborhood Function. 00003 namespace TSnap { 00010 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32); 00017 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32); 00020 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox); 00023 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1); 00024 } // namespace TSnap 00025 00032 template <class PGraph> 00033 class TGraphAnf { 00034 private: 00035 typedef TVec<uint64> TAnfBitV; 00036 THash<TInt, uint64> NIdToBitPosH; // NId to byte(!) offset in BitV 00037 TInt NApprox; // maintain N parallel approximations (multiple of 8) 00038 TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits; MoreBits: additional R bits; ApproxBytes: Approx/8; 00039 PGraph Graph; 00040 TRnd Rnd; 00041 private: 00042 UndefDefaultCopyAssign(TGraphAnf); 00043 public: 00044 TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) : 00045 NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0); IAssert(sizeof(uint)==4); } 00046 uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); } 00047 void InitAnfBits(TAnfBitV& BitV); 00048 void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const; 00049 double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const; 00050 double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const { 00051 return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; } 00057 void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir); 00063 void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir); 00064 }; 00065 00066 template <class PGraph> 00067 void TGraphAnf<PGraph>::InitAnfBits(TAnfBitV& BitV) { 00068 const int NNodes = Graph->GetNodes(); 00069 const int LogNodes = int(ceil(TMath::Log2(NNodes))); 00070 ApproxBytes = NApprox / 8; 00071 NBits = LogNodes + MoreBits; // bits per node 00072 const int BytesPerNd = ApproxBytes * NBits; // total bytes per node 00073 int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1; 00074 IAssertR(VSize <= TInt::Mx, 00075 TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d", 00076 TUInt64::GetStr(VSize).CStr(),TInt::Mx)); 00077 printf("size %d\n", static_cast<int>(VSize)); 00078 BitV.Gen(VSize); IAssert(BitV.BegI() != NULL); 00079 BitV.PutAll(0); 00080 int SetBit = 0; 00081 uint64 NodeOff = 0; 00082 uchar* BitVPt = (uchar *) BitV.BegI(); 00083 // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ... 00084 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) { 00085 NIdToBitPosH.AddDat(NI.GetId(), NodeOff); 00086 // init vertex bits 00087 for (int approx = 0; approx < NApprox; approx++) { 00088 const int RndNum = Rnd.GetUniDevInt(); 00089 for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { } 00090 if (SetBit >= NBits) SetBit = NBits-1; 00091 const int BitPos = ApproxBytes * SetBit + approx / 8; 00092 *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c) 00093 } 00094 NodeOff += BytesPerNd; 00095 } 00096 } 00097 00098 template <class PGraph> 00099 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const { 00100 uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset; 00101 uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset; 00102 for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; } 00103 } 00104 00105 // Average least zero bit position (least significant zero) 00106 template <class PGraph> 00107 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero) 00108 int approx, bit, AvgBitPos = 0; 00109 uchar* BitVPt = (uchar *) BitV.BegI(); 00110 for (approx = 0; approx < NApprox; approx++) { 00111 for (bit = 0; bit < NBits; bit++) { 00112 if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero 00113 if (bit > NBits) bit = NBits; 00114 AvgBitPos += bit; 00115 } 00116 return AvgBitPos/double(NApprox) ; 00117 } 00118 00119 template <class PGraph> 00120 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) { 00121 //const int NNodes = Graph->GetNodes(); 00122 TAnfBitV CurBitsV, LastBitsV; 00123 InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL); 00124 LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL); 00125 DistNbrsV.Clr(); 00126 DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg())); 00127 for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) { 00128 memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV; 00129 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00130 const uint64 NIdOffset = GetNIdOffset(NI.GetId()); 00131 for (int e = 0; e < NI.GetOutDeg(); e++) { 00132 const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e)); 00133 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00134 } 00135 if (! IsDir) { 00136 for (int e = 0; e < NI.GetInDeg(); e++) { 00137 const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e)); 00138 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00139 } 00140 } 00141 } 00142 DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId)))); 00143 if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change 00144 } 00145 } 00146 00147 template <class PGraph> 00148 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) { 00149 TAnfBitV CurBitsV, LastBitsV; 00150 InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL); 00151 LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL); 00152 int v, e; 00153 double NPairs = 0.0; 00154 DistNbrsV.Clr(); 00155 DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes())); 00156 DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges())); 00157 //TExeTm ExeTm; 00158 for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) { 00159 //printf("ANF dist %d...", dist); ExeTm.Tick(); 00160 memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV; 00161 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00162 const uint64 NIdOffset = GetNIdOffset(NI.GetId()); 00163 for (e = 0; e < NI.GetOutDeg(); e++) { 00164 const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e)); 00165 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00166 } 00167 if (! IsDir) { 00168 for (e = 0; e < NI.GetInDeg(); e++) { 00169 const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e)); 00170 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00171 } 00172 } 00173 } 00174 NPairs = 0.0; 00175 for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) { 00176 NPairs += GetCount(CurBitsV, NIdToBitPosH[v]); 00177 } 00178 DistNbrsV.Add(TIntFltKd(dist, NPairs)); 00179 //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr()); 00180 if (NPairs == 0) { break; } 00181 if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change 00182 //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS"); 00183 } 00184 } 00186 // Approximate Neighborhood Function 00187 namespace TSnap { 00188 00189 namespace TSnapDetail { 00191 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9); 00193 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9); 00195 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9); 00197 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9); 00199 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV); 00201 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV); 00202 } // TSnapDetail 00203 00204 template <class PGraph> 00205 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) { 00206 TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0); 00207 Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir); 00208 } 00209 00210 template <class PGraph> 00211 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) { 00212 TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0); 00213 Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir); 00214 } 00215 00216 template <class PGraph> 00217 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) { 00218 TIntFltKdV DistNbrsV; 00219 TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0); 00220 Anf.GetGraphAnf(DistNbrsV, -1, IsDir); 00221 return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile); 00222 } 00223 00224 template<class PGraph> 00225 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) { 00226 //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32); 00227 TMom Mom; 00228 if (NApprox == -1) { 00229 if (Graph->GetNodes() < 100000) { NApprox = 64; } 00230 else if (Graph->GetNodes() < 1000000) { NApprox = 32; } 00231 else { NApprox = 16; } 00232 } 00233 const bool IsDir = false; 00234 for (int r = 0; r < NRuns; r++) { 00235 Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox)); 00236 } 00237 Mom.Def(); 00238 return Mom.GetMean(); 00239 } 00240 00241 template <class PGraph> void TestAnf() { 00242 PGraph Graph = PGraph::TObj::New(); 00243 //Graph: 00244 // 0 2 ----> 3 00245 // ^ | 00246 // | | 00247 // | ^ 00248 // 1 5 <---- 4 00249 for (int v = 0; v < 6; v++) { Graph->AddNode(v); } 00250 Graph->AddEdge(2, 3); 00251 Graph->AddEdge(3, 4); 00252 Graph->AddEdge(4, 5); 00253 Graph->AddEdge(5, 2); 00254 TFltV AnfV; 00255 for (int t = 0; t < 10; t++) { 00256 TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1); 00257 TIntFltKdV DistToNbrsV; 00258 Anf.GetGraphAnf(DistToNbrsV, 5, true); 00259 printf("\n--seed: %d---------------------\n", t+1); 00260 for (int i = 0; i < DistToNbrsV.Len(); i++) { 00261 printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat()); 00262 } 00263 AnfV.Add(DistToNbrsV.Last().Dat); 00264 } 00265 TMom Mom(AnfV); 00266 printf("-----------\nAvgAnf: %f StDev: %f\n", Mom.GetMean(), Mom.GetSDev());//*/ 00267 // const int NApprox = 32; 00268 /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox); 00269 //Graph = TGGen<PGraph>::GenGrid(20, 20); 00270 Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000)); 00271 TFltV FullAnf, EffAnf; 00272 for (int tryn = 0; tryn < 10; tryn++) { 00273 FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox)); 00274 EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox)); 00275 } 00276 TMom FullMom(FullAnf); 00277 TMom AnfMom(EffAnf); 00278 printf(" Sample FullDiam: %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false)); 00279 printf(" Anf FullDiam: %f [%f]\n", FullMom.GetMean(), FullMom.GetSDev()); 00280 printf(" Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false)); 00281 printf(" Anf EffDiam [90%%]: %f [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev()); 00282 // epinions 00283 printf("\nEpinions graph:\n"); 00284 { typedef PNGraph PGraph; 00285 PGraph G = TGGen<PGraph>::GenEpinions(); 00286 TIntFltKdV DistToPairsV; 00287 GetAnf(G, DistToPairsV, 50, true); 00288 for(int i = 0; i < DistToPairsV.Len(); i++) { 00289 printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); } 00290 printf("\nUndir\n"); 00291 TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false); 00292 for(int j = 0; j < DistToPairsV.Len(); j++) { 00293 printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); } 00294 }//*/ 00295 } 00296 00297 } // namespace TSnap