SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Community detection algorithms 00003 namespace TSnap { 00004 00005 00006 namespace TSnapDetail { 00007 // GIRVAN-NEWMAN algorithm 00008 // 1. The betweenness of all existing edges in the network is calculated first. 00009 // 2. The edge with the highest betweenness is removed. 00010 // 3. The betweenness of all edges affected by the removal is recalculated. 00011 // 4. Steps 2 and 3 are repeated until no edges remain. 00012 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002) 00013 // Keep removing edges from Graph until one of the connected components of Graph splits into two. 00014 void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) { 00015 TIntPrFltH BtwEH; 00016 TBreathFS<PUNGraph> BFS(Graph); 00017 Cmty1.Clr(false); Cmty2.Clr(false); 00018 while (true) { 00019 TSnap::GetBetweennessCentr(Graph, BtwEH); 00020 BtwEH.SortByDat(false); 00021 if (BtwEH.Empty()) { return; } 00022 const int NId1 = BtwEH.GetKey(0).Val1; 00023 const int NId2 = BtwEH.GetKey(0).Val2; 00024 Graph->DelEdge(NId1, NId2); 00025 BFS.DoBfs(NId1, true, false, NId2, TInt::Mx); 00026 if (BFS.GetHops(NId1, NId2) == -1) { // two components 00027 TSnap::GetNodeWcc(Graph, NId1, Cmty1); 00028 TSnap::GetNodeWcc(Graph, NId2, Cmty2); 00029 return; 00030 } 00031 } 00032 } 00033 00034 // Connected components of a graph define clusters 00035 // OutDegH and OrigEdges stores node degrees and number of edges in the original graph 00036 double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) { 00037 TSnap::GetWccs(G, CnComV); // get communities 00038 double Mod = 0; 00039 for (int c = 0; c < CnComV.Len(); c++) { 00040 const TIntV& NIdV = CnComV[c](); 00041 double EIn=0, EEIn=0; 00042 for (int i = 0; i < NIdV.Len(); i++) { 00043 TUNGraph::TNodeI NI = G->GetNI(NIdV[i]); 00044 EIn += NI.GetOutDeg(); 00045 EEIn += OutDegH.GetDat(NIdV[i]); 00046 } 00047 Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges)); 00048 } 00049 if (Mod == 0) { return 0; } 00050 else { return Mod/(2.0*OrigEdges); } 00051 } 00052 00053 } // namespace TSnapDetail 00054 00055 // Maximum modularity clustering by Girvan-Newman algorithm (slow) 00056 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002) 00057 double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) { 00058 TIntH OutDegH; 00059 const int NEdges = Graph->GetEdges(); 00060 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00061 OutDegH.AddDat(NI.GetId(), NI.GetOutDeg()); 00062 } 00063 double BestQ = -1; // modularity 00064 TCnComV CurCmtyV; 00065 CmtyV.Clr(); 00066 TIntV Cmty1, Cmty2; 00067 while (true) { 00068 TSnapDetail::CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2); 00069 const double Q = TSnapDetail::_GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV); 00070 //printf("current modularity: %f\n", Q); 00071 if (Q > BestQ) { 00072 BestQ = Q; 00073 CmtyV.Swap(CurCmtyV); 00074 } 00075 if (Cmty1.Len()==0 || Cmty2.Len() == 0) { break; } 00076 } 00077 return BestQ; 00078 } 00079 00080 namespace TSnapDetail { 00084 class TCNMQMatrix { 00085 private: 00086 struct TCmtyDat { 00087 double DegFrac; 00088 TIntFltH NIdQH; 00089 int MxQId; 00090 TCmtyDat() : MxQId(-1) { } 00091 TCmtyDat(const double& NodeDegFrac, const int& OutDeg) : 00092 DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { } 00093 void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q); 00094 if (MxQId==-1 || NIdQH[MxQId]<Q) { MxQId=NIdQH.GetKeyId(NId); } } 00095 void UpdateMaxQ() { MxQId=-1; 00096 for (int i = -1; NIdQH.FNextKeyId(i); ) { 00097 if (MxQId==-1 || NIdQH[MxQId]< NIdQH[i]) { MxQId=i; } } } 00098 void DelLink(const int& K) { const int NId=GetMxQNId(); 00099 NIdQH.DelKey(K); if (NId==K) { UpdateMaxQ(); } } 00100 int GetMxQNId() const { return NIdQH.GetKey(MxQId); } 00101 double GetMxQ() const { return NIdQH[MxQId]; } 00102 }; 00103 private: 00104 THash<TInt, TCmtyDat> CmtyQH; 00105 TVec<TFltIntIntTr> MxQHeap; 00106 TUnionFind CmtyIdUF; 00107 double Q; 00108 public: 00109 TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()), 00110 MxQHeap(Graph->GetNodes(), 0), CmtyIdUF(Graph->GetNodes()) { Init(Graph); } 00111 void Init(const PUNGraph& Graph) { 00112 const double M = 0.5/Graph->GetEdges(); // 1/2m 00113 Q = 0.0; 00114 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00115 CmtyIdUF.Add(NI.GetId()); 00116 const int OutDeg = NI.GetOutDeg(); 00117 if (OutDeg == 0) { continue; } 00118 TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg)); 00119 for (int e = 0; e < NI.GetOutDeg(); e++) { 00120 const int DstNId = NI.GetOutNId(e); 00121 const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M); 00122 Dat.AddQ(DstNId, DstMod); 00123 } 00124 Q += -1.0*TMath::Sqr(OutDeg*M); 00125 if (NI.GetId() < Dat.GetMxQNId()) { 00126 MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId())); } 00127 } 00128 MxQHeap.MakeHeap(); 00129 } 00130 TFltIntIntTr FindMxQEdge() { 00131 while (true) { 00132 if (MxQHeap.Empty()) { break; } 00133 const TFltIntIntTr TopQ = MxQHeap.PopHeap(); 00134 if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; } 00135 if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; } 00136 return TopQ; 00137 } 00138 return TFltIntIntTr(-1, -1, -1); 00139 } 00140 bool MergeBestQ() { 00141 const TFltIntIntTr TopQ = FindMxQEdge(); 00142 if (TopQ.Val1 <= 0.0) { return false; } 00143 // joint communities 00144 const int I = TopQ.Val3; 00145 const int J = TopQ.Val2; 00146 CmtyIdUF.Union(I, J); // join 00147 Q += TopQ.Val1; 00148 TCmtyDat& DatJ = CmtyQH.GetDat(J); 00149 { TCmtyDat& DatI = CmtyQH.GetDat(I); 00150 DatI.DelLink(J); DatJ.DelLink(I); 00151 for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) { 00152 const int K = DatJ.NIdQH.GetKey(i); 00153 TCmtyDat& DatK = CmtyQH.GetDat(K); 00154 double NewQ = DatJ.NIdQH[i]; 00155 if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K); DatK.DelLink(I); } // K connected to I and J 00156 else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; } // K connected to J not I 00157 DatJ.AddQ(K, NewQ); 00158 DatK.AddQ(J, NewQ); 00159 MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K))); 00160 } 00161 for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) { 00162 const int K = DatI.NIdQH.GetKey(i); 00163 if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J 00164 TCmtyDat& DatK = CmtyQH.GetDat(K); 00165 const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac; 00166 DatJ.AddQ(K, NewQ); 00167 DatK.DelLink(I); 00168 DatK.AddQ(J, NewQ); 00169 MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K))); 00170 } 00171 } 00172 DatJ.DegFrac += DatI.DegFrac; } 00173 if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done) 00174 CmtyQH.DelKey(I); 00175 return true; 00176 } 00177 static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) { 00178 TCNMQMatrix QMatrix(Graph); 00179 // maximize modularity 00180 while (QMatrix.MergeBestQ()) { } 00181 // reconstruct communities 00182 THash<TInt, TIntV> IdCmtyH; 00183 for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00184 IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); 00185 } 00186 CmtyV.Gen(IdCmtyH.Len()); 00187 for (int j = 0; j < IdCmtyH.Len(); j++) { 00188 CmtyV[j].NIdV.Swap(IdCmtyH[j]); 00189 } 00190 return QMatrix.Q; 00191 } 00192 }; 00193 00194 } // namespace TSnapDetail 00195 00196 double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) { 00197 return TSnapDetail::TCNMQMatrix::CmtyCMN(Graph, CmtyV); 00198 } 00199 00200 }; //namespace TSnap