SNAP Library 2.0, Developer Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <anf.h>
Public Member Functions | |
TGraphAnf (const PGraph &GraphPt, const int &Approx=32, const int &moreBits=5, const int &RndSeed=0) | |
uint64 | GetNIdOffset (const int &NId) const |
void | InitAnfBits (TAnfBitV &BitV) |
void | Union (TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const |
double | AvgLstZero (const TAnfBitV &BitV, const uint64 &NIdOffset) const |
double | GetCount (const TAnfBitV &BitV, const uint64 &NIdOffset) const |
void | GetNodeAnf (const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir) |
void | GetGraphAnf (TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir) |
Private Types | |
typedef TVec< uint64 > | TAnfBitV |
Private Member Functions | |
UndefDefaultCopyAssign (TGraphAnf) | |
Private Attributes | |
THash< TInt, uint64 > | NIdToBitPosH |
TInt | NApprox |
TInt | NBits |
TInt | MoreBits |
TInt | ApproxBytes |
PGraph | Graph |
TRnd | Rnd |
Approximate Neighborhood Function. Implements the algorithm for computing the diameter of a given graph. The method is based on the approximate counting argument by Flajolet-Martin. For more details see C. R. Palmer, P. B. Gibbons and C. Faloutsos, ANF: A Fast and Scalable Tool for Data Mining in Massive Graphs, KDD 2002 (http://www.cs.cmu.edu/~christos/PUBLICATIONS/kdd02-anf.ps.gz) See TSnap::TestAnf() for example of how to use the class.
double TGraphAnf< PGraph >::AvgLstZero | ( | const TAnfBitV & | BitV, |
const uint64 & | NIdOffset | ||
) | const |
Definition at line 107 of file anf.h.
References TVec< TVal, TSizeTy >::BegI().
Referenced by TGraphAnf< PGraph >::GetCount().
{ //average least zero bit position (least significant zero) int approx, bit, AvgBitPos = 0; uchar* BitVPt = (uchar *) BitV.BegI(); for (approx = 0; approx < NApprox; approx++) { for (bit = 0; bit < NBits; bit++) { if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero if (bit > NBits) bit = NBits; AvgBitPos += bit; } return AvgBitPos/double(NApprox) ; }
double TGraphAnf< PGraph >::GetCount | ( | const TAnfBitV & | BitV, |
const uint64 & | NIdOffset | ||
) | const [inline] |
Definition at line 50 of file anf.h.
References TGraphAnf< PGraph >::AvgLstZero().
{ return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
void TGraphAnf< PGraph >::GetGraphAnf | ( | TIntFltKdV & | DistNbrsV, |
const int & | MxDist, | ||
const bool & | IsDir | ||
) |
Returns the number of pairs of nodes reachable in less than H hops. For example, DistNbrsV.GetDat(0) is the number of nodes in the graph, DistNbrsV.GetDat(1) is the number of nodes+edges and so on.
DistNbrsV | Maps between the distance H (in hops) and the number of nodes reachable in <=H hops. |
MxDist | Maximum number of hops the algorithm spreads from SrcNId. |
IsDir | false: consider links as undirected (drop link directions). |
Definition at line 148 of file anf.h.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::BegI(), TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::LastLast(), TVec< TVal, TSizeTy >::Len(), and TInt::Mx.
Referenced by TSnap::GetAnf(), TSnap::GetAnfEffDiam(), and TSnap::TestAnf().
{ TAnfBitV CurBitsV, LastBitsV; InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL); LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL); int v, e; double NPairs = 0.0; DistNbrsV.Clr(); DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes())); DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges())); //TExeTm ExeTm; for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) { //printf("ANF dist %d...", dist); ExeTm.Tick(); memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV; for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { const uint64 NIdOffset = GetNIdOffset(NI.GetId()); for (e = 0; e < NI.GetOutDeg(); e++) { const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e)); Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); } if (! IsDir) { for (e = 0; e < NI.GetInDeg(); e++) { const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e)); Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); } } } NPairs = 0.0; for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) { NPairs += GetCount(CurBitsV, NIdToBitPosH[v]); } DistNbrsV.Add(TIntFltKd(dist, NPairs)); //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr()); if (NPairs == 0) { break; } if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS"); } }
uint64 TGraphAnf< PGraph >::GetNIdOffset | ( | const int & | NId | ) | const [inline] |
Definition at line 46 of file anf.h.
References THash< TKey, TDat, THashFunc >::GetDat(), and TGraphAnf< PGraph >::NIdToBitPosH.
{ return NIdToBitPosH.GetDat(NId); }
void TGraphAnf< PGraph >::GetNodeAnf | ( | const int & | SrcNId, |
TIntFltKdV & | DistNbrsV, | ||
const int & | MxDist, | ||
const bool & | IsDir | ||
) |
Returns the number of nodes reachable from SrcNId in less than H hops.
SrcNId | Starting node. |
DistNbrsV | Maps between the distance H (in hops) and the number of nodes reachable in <=H hops. |
MxDist | Maximum number of hops the algorithm spreads from SrcNId. |
IsDir | false: consider links as undirected (drop link directions). |
Definition at line 120 of file anf.h.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::BegI(), TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Gen(), IAssert, TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), and TInt::Mx.
Referenced by TSnap::GetAnf().
{ //const int NNodes = Graph->GetNodes(); TAnfBitV CurBitsV, LastBitsV; InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL); LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL); DistNbrsV.Clr(); DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg())); for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) { memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV; for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { const uint64 NIdOffset = GetNIdOffset(NI.GetId()); for (int e = 0; e < NI.GetOutDeg(); e++) { const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e)); Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); } if (! IsDir) { for (int e = 0; e < NI.GetInDeg(); e++) { const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e)); Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); } } } DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId)))); if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change } }
void TGraphAnf< PGraph >::InitAnfBits | ( | TAnfBitV & | BitV | ) |
Definition at line 67 of file anf.h.
References TVec< TVal, TSizeTy >::BegI(), TStr::Fmt(), TVec< TVal, TSizeTy >::Gen(), TUInt64::GetStr(), IAssert, IAssertR, TMath::Log2(), TInt::Mx, and TVec< TVal, TSizeTy >::PutAll().
{ const int NNodes = Graph->GetNodes(); const int LogNodes = int(ceil(TMath::Log2(NNodes))); ApproxBytes = NApprox / 8; NBits = LogNodes + MoreBits; // bits per node const int BytesPerNd = ApproxBytes * NBits; // total bytes per node int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1; IAssertR(VSize <= TInt::Mx, TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d", TUInt64::GetStr(VSize).CStr(),TInt::Mx)); printf("size %d\n", static_cast<int>(VSize)); BitV.Gen((const int)VSize); IAssert(BitV.BegI() != NULL); BitV.PutAll(0); int SetBit = 0; uint64 NodeOff = 0; uchar* BitVPt = (uchar *) BitV.BegI(); // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ... for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) { NIdToBitPosH.AddDat(NI.GetId(), NodeOff); // init vertex bits for (int approx = 0; approx < NApprox; approx++) { const int RndNum = Rnd.GetUniDevInt(); for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { } if (SetBit >= NBits) SetBit = NBits-1; const int BitPos = ApproxBytes * SetBit + approx / 8; *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c) } NodeOff += BytesPerNd; } }
TGraphAnf< PGraph >::UndefDefaultCopyAssign | ( | TGraphAnf< PGraph > | ) | [private] |
void TGraphAnf< PGraph >::Union | ( | TAnfBitV & | BitV1, |
const uint64 & | NId1Offset, | ||
TAnfBitV & | BitV2, | ||
const uint64 & | NId2Offset | ||
) | const |
Definition at line 99 of file anf.h.
References TVec< TVal, TSizeTy >::BegI().
{ uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset; uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset; for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; } }
TInt TGraphAnf< PGraph >::ApproxBytes [private] |
Definition at line 37 of file anf.h.
Referenced by TGraphAnf< PGraph >::TGraphAnf().
THash<TInt, uint64> TGraphAnf< PGraph >::NIdToBitPosH [private] |
Definition at line 36 of file anf.h.
Referenced by TGraphAnf< PGraph >::GetNIdOffset().