|
SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <anf.h>

Public Member Functions | |
| TGraphAnf (const PGraph &GraphPt, const int &Approx=32, const int &moreBits=5, const int &RndSeed=0) | |
| uint64 | GetNIdOffset (const int &NId) const |
| void | InitAnfBits (TAnfBitV &BitV) |
| void | Union (TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const |
| double | AvgLstZero (const TAnfBitV &BitV, const uint64 &NIdOffset) const |
| double | GetCount (const TAnfBitV &BitV, const uint64 &NIdOffset) const |
| void | GetNodeAnf (const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir) |
| void | GetGraphAnf (TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir) |
Private Types | |
| typedef TVec< uint64 > | TAnfBitV |
Private Member Functions | |
| UndefDefaultCopyAssign (TGraphAnf) | |
Private Attributes | |
| THash< TInt, uint64 > | NIdToBitPosH |
| TInt | NApprox |
| TInt | NBits |
| TInt | MoreBits |
| TInt | ApproxBytes |
| PGraph | Graph |
| TRnd | Rnd |
Approximate Neighborhood Function. Implements the algorithm for computing the diameter of a given graph. The method is based on the approximate counting argument by Flajolet-Martin. For more details see C. R. Palmer, P. B. Gibbons and C. Faloutsos, ANF: A Fast and Scalable Tool for Data Mining in Massive Graphs, KDD 2002 (http://www.cs.cmu.edu/~christos/PUBLICATIONS/kdd02-anf.ps.gz) See TSnap::TestAnf() for example of how to use the class.
| double TGraphAnf< PGraph >::AvgLstZero | ( | const TAnfBitV & | BitV, |
| const uint64 & | NIdOffset | ||
| ) | const |
Definition at line 107 of file anf.h.
References TVec< TVal >::BegI().
Referenced by TGraphAnf< PGraph >::GetCount().
{ //average least zero bit position (least significant zero)
int approx, bit, AvgBitPos = 0;
uchar* BitVPt = (uchar *) BitV.BegI();
for (approx = 0; approx < NApprox; approx++) {
for (bit = 0; bit < NBits; bit++) {
if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
if (bit > NBits) bit = NBits;
AvgBitPos += bit;
}
return AvgBitPos/double(NApprox) ;
}


| double TGraphAnf< PGraph >::GetCount | ( | const TAnfBitV & | BitV, |
| const uint64 & | NIdOffset | ||
| ) | const [inline] |
Definition at line 50 of file anf.h.
References TGraphAnf< PGraph >::AvgLstZero().
{
return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }

| void TGraphAnf< PGraph >::GetGraphAnf | ( | TIntFltKdV & | DistNbrsV, |
| const int & | MxDist, | ||
| const bool & | IsDir | ||
| ) |
Returns the number of pairs of nodes reachable in less than H hops. For example, DistNbrsV.GetDat(0) is the number of nodes in the graph, DistNbrsV.GetDat(1) is the number of nodes+edges and so on.
| DistNbrsV | Maps between the distance H (in hops) and the number of nodes reachable in <=H hops. |
| MxDist | Maximum number of hops the algorithm spreads from SrcNId. |
| IsDir | false: consider links as undirected (drop link directions). |
Definition at line 148 of file anf.h.
References TVec< TVal >::Add(), TVec< TVal >::BegI(), TVec< TVal >::Clr(), TVec< TVal >::Gen(), IAssert, TVec< TVal >::LastLast(), TVec< TVal >::Len(), and TInt::Mx.
Referenced by TSnap::GetAnf(), TSnap::GetAnfEffDiam(), and TSnap::TestAnf().
{
TAnfBitV CurBitsV, LastBitsV;
InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
int v, e;
double NPairs = 0.0;
DistNbrsV.Clr();
DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges()));
//TExeTm ExeTm;
for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
//printf("ANF dist %d...", dist); ExeTm.Tick();
memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
const uint64 NIdOffset = GetNIdOffset(NI.GetId());
for (e = 0; e < NI.GetOutDeg(); e++) {
const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
}
if (! IsDir) {
for (e = 0; e < NI.GetInDeg(); e++) {
const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
}
}
}
NPairs = 0.0;
for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
}
DistNbrsV.Add(TIntFltKd(dist, NPairs));
//printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr());
if (NPairs == 0) { break; }
if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change
//TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
}
}


| uint64 TGraphAnf< PGraph >::GetNIdOffset | ( | const int & | NId | ) | const [inline] |
Definition at line 46 of file anf.h.
References THash< TKey, TDat, THashFunc >::GetDat(), and TGraphAnf< PGraph >::NIdToBitPosH.
{ return NIdToBitPosH.GetDat(NId); }

| void TGraphAnf< PGraph >::GetNodeAnf | ( | const int & | SrcNId, |
| TIntFltKdV & | DistNbrsV, | ||
| const int & | MxDist, | ||
| const bool & | IsDir | ||
| ) |
Returns the number of nodes reachable from SrcNId in less than H hops.
| SrcNId | Starting node. |
| DistNbrsV | Maps between the distance H (in hops) and the number of nodes reachable in <=H hops. |
| MxDist | Maximum number of hops the algorithm spreads from SrcNId. |
| IsDir | false: consider links as undirected (drop link directions). |
Definition at line 120 of file anf.h.
References TVec< TVal >::Add(), TVec< TVal >::BegI(), TVec< TVal >::Clr(), TVec< TVal >::Gen(), IAssert, TVec< TVal >::Last(), TVec< TVal >::Len(), and TInt::Mx.
Referenced by TSnap::GetAnf().
{
//const int NNodes = Graph->GetNodes();
TAnfBitV CurBitsV, LastBitsV;
InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
DistNbrsV.Clr();
DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
const uint64 NIdOffset = GetNIdOffset(NI.GetId());
for (int e = 0; e < NI.GetOutDeg(); e++) {
const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
}
if (! IsDir) {
for (int e = 0; e < NI.GetInDeg(); e++) {
const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
}
}
}
DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change
}
}


| void TGraphAnf< PGraph >::InitAnfBits | ( | TAnfBitV & | BitV | ) |
Definition at line 67 of file anf.h.
References TVec< TVal >::BegI(), TStr::Fmt(), TVec< TVal >::Gen(), TUInt64::GetStr(), IAssert, IAssertR, TMath::Log2(), TInt::Mx, and TVec< TVal >::PutAll().
{
const int NNodes = Graph->GetNodes();
const int LogNodes = int(ceil(TMath::Log2(NNodes)));
ApproxBytes = NApprox / 8;
NBits = LogNodes + MoreBits; // bits per node
const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1;
IAssertR(VSize <= TInt::Mx,
TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d",
TUInt64::GetStr(VSize).CStr(),TInt::Mx));
printf("size %d\n", static_cast<int>(VSize));
BitV.Gen(VSize); IAssert(BitV.BegI() != NULL);
BitV.PutAll(0);
int SetBit = 0;
uint64 NodeOff = 0;
uchar* BitVPt = (uchar *) BitV.BegI();
// for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
// init vertex bits
for (int approx = 0; approx < NApprox; approx++) {
const int RndNum = Rnd.GetUniDevInt();
for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
if (SetBit >= NBits) SetBit = NBits-1;
const int BitPos = ApproxBytes * SetBit + approx / 8;
*(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
}
NodeOff += BytesPerNd;
}
}

| TGraphAnf< PGraph >::UndefDefaultCopyAssign | ( | TGraphAnf< PGraph > | ) | [private] |
| void TGraphAnf< PGraph >::Union | ( | TAnfBitV & | BitV1, |
| const uint64 & | NId1Offset, | ||
| TAnfBitV & | BitV2, | ||
| const uint64 & | NId2Offset | ||
| ) | const |
Definition at line 99 of file anf.h.
References TVec< TVal >::BegI().
{
uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
}

TInt TGraphAnf< PGraph >::ApproxBytes [private] |
Definition at line 37 of file anf.h.
Referenced by TGraphAnf< PGraph >::TGraphAnf().
THash<TInt, uint64> TGraphAnf< PGraph >::NIdToBitPosH [private] |
Definition at line 36 of file anf.h.
Referenced by TGraphAnf< PGraph >::GetNIdOffset().