SNAP Library 2.4, User Reference  2015-05-11 19:40:56
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
cliques.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "cliques.h"
3 
5 // TCommunity implementation
7  for (THashSet<TInt>::TIter it=A.BegI(); it<A.EndI(); it++) {
8  const int nId = it.GetKey();
9  if (!B.IsKey(nId)) Complement.AddKey(nId);
10  }
11 }
12 
14  if (A.Len() < B.Len()) {
15  for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++)
16  if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
17  } else {
18  for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++)
19  if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
20  }
21 }
22 
24  int n = 0;
25  if (A.Len() < B.Len()) {
26  for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++)
27  if (B.IsKey(it.GetKey())) n++;
28  } else {
29  for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++)
30  if (A.IsKey(it.GetKey())) n++;
31  }
32  return n;
33 }
34 
35 void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{
36  TUNGraph::TNodeI node = m_G->GetNI(NId);
37  int deg = node.GetDeg();
38  for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i));
39 }
40 
42  int id = -1;
43  int maxDeg = -1;
44  //
45  for (THashSetKeyI<TInt> it=Set.BegI(); it<Set.EndI(); it++) {
46  int nId = it.GetKey();
47  int deg = m_G->GetNI(nId).GetDeg();
48  if (maxDeg < deg) { maxDeg=deg; id=nId; }
49  }
50  return id;
51 }
52 
54  int id = -1;
55  int maxIntersection = -1;
56  //
57  for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) {
58  int nId = it.GetKey();
59  TUNGraph::TNodeI nIt = m_G->GetNI(nId);
60  int deg = nIt.GetDeg();
61  //
62  int curIntersection = 0;
63  for (int i=0; i<deg; i++) {
64  int nbrId = nIt.GetNbrNId(i);
65  if (CAND.IsKey(nbrId)) curIntersection++;
66  }
67  //
68  if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; }
69  }
70  return id;
71 }
72 
74  if (SUBG.Len()==0) { if (m_Q.Len() >= m_minMaxCliqueSize) { m_Q.Pack(); m_maxCliques->Add(m_Q); } return; }
75  if (CAND.Len()==0) return;
76  //Get u that maximaze CAND intersection with neighbours of vertex u
77  int u = MaxNbrsInCANDNodeId(SUBG, CAND);
78  //Get neighbours of node u
79  THashSet<TInt> nbrsU;
80  GetNbrs(u, nbrsU);
81  //Get relative complement of nbrsU in CAND
82  THashSet<TInt> EXT;
83  GetRelativeComplement(CAND, nbrsU, EXT);
84  while(EXT.Len() != 0) {
85  int q = GetNodeIdWithMaxDeg(EXT);
86  //
87  m_Q.Add(q);
88  //
89  THashSet<TInt> nbrsQ;
90  GetNbrs(q, nbrsQ);
91  //
92  THashSet<TInt> SUBGq;
93  GetIntersection(SUBG, nbrsQ, SUBGq);
94  //
95  THashSet<TInt> CANDq;
96  GetIntersection(CAND, nbrsQ, CANDq);
97  //
98  Expand(SUBGq, CANDq);
99  //
100  CAND.DelKey(q);
101  m_Q.DelLast();
102  //
103  EXT.Clr();
104  GetRelativeComplement(CAND, nbrsU, EXT);
105  }
106 }
107 
108 void TCliqueOverlap::GetMaximalCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
109  if (G->GetNodes() == 0) return;
110  //
111  m_G = G;
112  m_minMaxCliqueSize = MinMaxCliqueSize;
113  m_maxCliques =& MaxCliques;
114  m_Q.Clr();
115  //
116  THashSet<TInt> SUBG;
117  THashSet<TInt> CAND;
118  for (TUNGraph::TNodeI NI=m_G->BegNI(); NI<m_G->EndNI(); NI++) {
119  TInt nId = NI.GetId();
120  SUBG.AddKey(nId);
121  CAND.AddKey(nId);
122  }
123  //
124  Expand(SUBG, CAND);
125 }
126 
127 void TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap, TVec<TIntV>& OverlapMtx) {
128  OverlapMtx.Clr();
129  //
130  int n = MaxCliques.Len();
131  //Convert max cliques to HashSets
132  TVec<THashSet<TInt> > cliques;
133  for (int i=0; i<n; i++) {
134  const int len = MaxCliques[i].Len();
135  cliques.Add();
136  if (len < MinNodeOverlap) { continue; }
137  THashSet<TInt>& set = cliques.Last(); set.Gen(len);
138  for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
139  }
140  //Init clique clique overlap matrix
141  n = cliques.Len();
142  OverlapMtx.Gen(n);
143  for (int i=0; i<n; i++) OverlapMtx[i].Gen(n);
144  //Calculate clique clique overlap matrix
145  for (int i=0; i<n; i++) {
146  OverlapMtx[i][i] = cliques[i].Len();
147  for (int j=i+1; j<n; j++) {
148  OverlapMtx[i][j] = Intersection(cliques[i], cliques[j]); }
149  }
150 }
151 
152 PUNGraph TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap) {
153  const int n = MaxCliques.Len();
154  //Convert max cliques to HashSets
155  TVec<THashSet<TInt> > cliques;
156  for (int i=0; i<n; i++) {
157  const int len = MaxCliques[i].Len();
158  cliques.Add();
159  if (len < MinNodeOverlap) { continue; }
160  THashSet<TInt>& set = cliques.Last(); set.Gen(len);
161  for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
162  }
163  //Init clique clique overlap matrix
164  PUNGraph OverlapMtx = TUNGraph::New();
165  for (int i=0; i < n; i++) {
166  OverlapMtx->AddNode(i); }
167  //Calculate clique clique overlap matrix
168  for (int i=0; i<n; i++) {
169  for (int j=i+1; j<n; j++) {
170  if (Intersection(cliques[i], cliques[j]) >= MinNodeOverlap) {
171  OverlapMtx->AddEdge(i,j); }
172  }
173  }
174  return OverlapMtx;
175 }
176 
177 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, int MinNodeOverlap, TVec<TIntV>& CliqueIdVV) {
178  int n = OverlapMtx.Len();
179  for (int i=0; i<n; i++) {
180  bool isCommunity = false;
181  for (int j=i+1; j<n; j++) {
182  if (OverlapMtx[i][j] >= MinNodeOverlap) {
183  if (! isCommunity) {
184  TIntV v; v.Add(i);
185  CliqueIdVV.Add(v);
186  isCommunity=true;
187  }
188  CliqueIdVV.Last().Add(j);
189  }
190  }
191  }
192 }
193 
194 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, const TVec<TIntV>& MaxCliques, double MinOverlapFrac, TVec<TIntV>& CliqueIdVV) {
195  int n = OverlapMtx.Len();
196  for(int i=0; i<n; i++){
197  bool isCommunity = false;
198  int size1 = MaxCliques[i].Len();
199  for(int j=i+1; j<n; j++){
200  int size2 = MaxCliques[j].Len();
201  double ratio = OverlapMtx[i][j];
202  if(size1 < size2) ratio /= size1;
203  else ratio /= size2;
204  if(ratio >= MinOverlapFrac){
205  if(!isCommunity){
206  TIntV v; v.Add(i);
207  CliqueIdVV.Add(v);
208  isCommunity=true;
209  }
210  CliqueIdVV.Last().Add(j);
211  }
212  }
213  }
214 }
215 
217 void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
218  TCliqueOverlap CO;
219  MaxCliques.Clr(false);
220  CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques);
221 }
222 
224 void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) {
225  printf("Clique Percolation Method\n");
226  TExeTm ExeTm;
227  TVec<TIntV> MaxCliques;
228  TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques);
229  // op RS 2012/05/15, commented out next line, a parameter is missing,
230  // creating a warning on OS X
231  // printf("...%d cliques found\n");
232  // get clique overlap matrix (graph)
233  PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1);
234  printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges());
235  // connected components are communities
236  TCnComV CnComV;
237  TSnap::GetWccs(OverlapGraph, CnComV);
238  NIdCmtyVV.Clr(false);
239  TIntSet CmtySet;
240  for (int c = 0; c < CnComV.Len(); c++) {
241  CmtySet.Clr(false);
242  for (int i = 0; i <CnComV[c].Len(); i++) {
243  const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]];
244  CmtySet.AddKeyV(CliqueNIdV);
245  }
246  NIdCmtyVV.Add();
247  CmtySet.GetKeyV(NIdCmtyVV.Last());
248  NIdCmtyVV.Last().Sort();
249  }
250  printf("done [%s].\n", ExeTm.GetStr());
251 }
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
static void GetRelativeComplement(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &Complement)
Definition: cliques.cpp:6
void GetNbrs(int NId, THashSet< TInt > &Nbrs) const
Definition: cliques.cpp:35
Definition: tm.h:354
void AddKeyV(const TVec< TKey > &KeyV)
Definition: shash.h:1284
static void GetCPMCommunities(const PUNGraph &G, int MinMaxCliqueSize, TVec< TIntV > &Communities)
Clique Percolation method communities.
Definition: cliques.cpp:224
TIter BegI() const
Definition: shash.h:1105
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:74
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:535
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:63
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
void Gen(const int &ExpectVals)
Definition: shash.h:1115
static void GetMaxCliques(const PUNGraph &G, int MinMaxCliqueSize, TVec< TIntV > &MaxCliques)
Enumerate maximal cliques of the network on more than MinMaxCliqueSize nodes.
Definition: cliques.cpp:217
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:165
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:85
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:953
static void GetOverlapCliques(const TVec< TIntV > &OverlapMtx, int MinNodeOverlap, TVec< TIntV > &CliqueIdVV)
Definition: cliques.cpp:177
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1218
void DelKey(const TKey &Key)
Definition: shash.h:1289
int MaxNbrsInCANDNodeId(const THashSet< TInt > &SUBG, const THashSet< TInt > &CAND) const
Definition: cliques.cpp:53
static void GetIntersection(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &C)
Definition: cliques.cpp:13
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:208
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:152
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:539
void Expand(const THashSet< TInt > &SUBG, THashSet< TInt > &CAND)
Definition: cliques.cpp:73
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:84
Definition: dt.h:1042
int m_minMaxCliqueSize
Definition: cliques.h:13
TIter EndI() const
Definition: shash.h:1112
int Len() const
Definition: shash.h:1121
TVec< TIntV > * m_maxCliques
Definition: cliques.h:12
PUNGraph m_G
Definition: cliques.h:10
void Pack()
The vector reduces its capacity (frees memory) to match its size.
Definition: ds.h:987
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:206
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:104
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:486
const char * GetStr() const
Definition: tm.h:367
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:559
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:204
void DelLast()
Removes the last element of the vector.
Definition: ds.h:609
static int Intersection(const THashSet< TInt > &A, const THashSet< TInt > &B)
Definition: cliques.cpp:23
static void CalculateOverlapMtx(const TVec< TIntV > &MaxCliques, int MinNodeOverlap, TVec< TIntV > &OverlapMtx)
Definition: cliques.cpp:127
TIntV m_Q
Definition: cliques.h:11
void GetWccs(const PGraph &Graph, TCnComV &CnComV)
Returns all weakly connected components in a Graph.
Definition: cncom.h:376
void GetMaximalCliques(const PUNGraph &G, int MinMaxCliqueSize, TVec< TIntV > &MaxCliques)
Definition: cliques.cpp:108
int GetNodeIdWithMaxDeg(const THashSet< TInt > &Set) const
Definition: cliques.cpp:41
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:420