42 void TAGMFit::RandomInitCmtyVV(
const int InitComs, 
const double ComSzAlpha, 
const double MemAlpha, 
const int MinComSz, 
const int MaxComSz, 
const int MinMem, 
const int MaxMem) {
 
   54     int SrcNID = SrcNI.GetId();
 
   55     for (
int v = 0; v < SrcNI.GetDeg(); v++) {
 
   56       int DstNID = SrcNI.GetNbrNId(v);
 
   57       if (SrcNID >= DstNID) { 
continue; }
 
   63       for (
int k = 0; k < JointCom.
Len(); k++) {
 
   79   LEdges = 0.0; LNoEdges = 0.0;
 
   80   for (
int e = 0; e < 
EdgeComVH.Len(); e++) {
 
   83     double Puv = 1 - exp(- LambdaSum);
 
   84     if (JointCom.
Len() == 0) {  Puv = 
PNoCom;  }
 
   88   for (
int k = 0; k < NewLambdaV.
Len(); k++) {
 
   91     if(NotEdgesInCom > 0) {
 
   92       if (LNoEdges >= 
TFlt::Mn + 
double(NotEdgesInCom) * NewLambdaV[k]) { 
 
   93         LNoEdges -= double(NotEdgesInCom) * NewLambdaV[k];
 
  101   return LEdges + LNoEdges + LReg;
 
  110   double StepSize = 1.0;
 
  114   for (
int iter = 0; ; iter++) {
 
  116       NewLambdaV[i] = 
LambdaV[i] + StepSize * DeltaV[i];
 
  136   double GradCutOff = 1000;
 
  137   for (iter = 0; iter < MaxIter; iter++) {
 
  140       if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
 
  141       if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
 
  145     double Alpha = 0.15, Beta = 0.2;
 
  146     if (Edges > 
Kilo(100)) { Alpha = 0.00015; Beta = 0.3;}
 
  150       double Change = LearnRate * GradV[i];
 
  155     if (! PlotNm.
Empty()) {
 
  161   if (! PlotNm.
Empty()) {
 
  164     printf(
"MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.
GetTmStr());
 
  171   for (
int c = 0; c < MaxK; c++) {
 
  176     for (
int v = 0; v < NC; v++) {
 
  191   TIntV ChosenNIDV(InitComs, 0); 
 
  196   for (
int u = 0; u < NIdV.
Len(); u++) {
 
  209   printf(
"conductance computation completed [%s]\n", RunTm.
GetTmStr());
 
  213   for (
int ui = 0; ui < NIdPhiV.Len(); ui++) {
 
  214     int UID = NIdPhiV[ui].Val2;
 
  216     if (InvalidNIDS.IsKey(UID)) { 
continue; }
 
  222     for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  226     for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  231     if (CurCID >= InitComs) { 
break;  }
 
  233   if (InitComs > CurCID) {
 
  234     printf(
"%d communities needed to fill randomly\n", InitComs - CurCID);
 
  237   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  240       for (
int u = 0; u < ComSz; u++) {
 
  254   TIntV TmpV = CmtyVV[0];
 
  273   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  285   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  295   for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  297     if (
NIDComVH.GetDat(VID).IsKey(CID)) {
 
  299       EdgeComVH.GetDat(SrcDstNIDPr).DelKey(CID);
 
  311   for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  313     if (
NIDComVH.GetDat(VID).IsKey(JoinCID)) {
 
  315       EdgeComVH.GetDat(SrcDstNIDPr).AddKey(JoinCID);
 
  320   NIDComVH.GetDat(NID).AddKey(JoinCID);
 
  336     if (TryCnt < MaxTryCnt) { 
 
  340   else if (Option == 1) {
 
  344       LeaveCID = NIDCIDPr.
Val2;
 
  345     } 
while (TryCnt++ < MaxTryCnt && LeaveCID == 
BaseCID);
 
  346     if (TryCnt < MaxTryCnt) {
 
  354       LeaveCID = NIDCIDPr.
Val2;
 
  359     if (TryCnt < MaxTryCnt) {
 
  369   double BestL = PrevL;
 
  370   printf(
"initial likelihood = %f\n",PrevL);
 
  371   TIntFltPrV IterTrueLV, IterJoinV, IterLeaveV, IterAcceptV, IterSwitchV, IterLBV;
 
  376   int SwitchCnt = 0, LeaveCnt = 0, JoinCnt = 0, AcceptCnt = 0, ProbBinSz;
 
  382   for (
int iter = 0; iter < MaxIter; iter++) {
 
  385     int JoinCID = -1, LeaveCID = -1;
 
  391       if (JoinCID > -1 && JoinCID != 
BaseCID) { 
JoinCom(NID, JoinCID); }
 
  392       if (LeaveCID > -1 && JoinCID > -1 && JoinCID != 
BaseCID && LeaveCID != 
BaseCID) { SwitchCnt++; }
 
  393       else if (LeaveCID > -1  && LeaveCID != 
BaseCID) { LeaveCnt++;}
 
  394       else if (JoinCID > -1 && JoinCID != 
BaseCID) { JoinCnt++;}
 
  396       if ((iter + 1) % EvalLambdaIter == 0) {
 
  402         OptL = PrevL + DeltaL;
 
  404       if (BestL <= OptL && 
CIDNSetV.Len() > 0) {
 
  410     if (iter > 0 && (iter % ProbBinSz == 0) && PlotFPrx.
Len() > 0) { 
 
  412       IterSwitchV.
Add(
TIntFltPr(iter, (
double) SwitchCnt / (
double) AcceptCnt));
 
  413       IterLeaveV.
Add(
TIntFltPr(iter, (
double) LeaveCnt / (
double) AcceptCnt));
 
  414       IterJoinV.
Add(
TIntFltPr(iter, (
double) JoinCnt / (
double) AcceptCnt));
 
  415       IterAcceptV.
Add(
TIntFltPr(iter, (
double) AcceptCnt / (
double) ProbBinSz));
 
  416       SwitchCnt = JoinCnt = LeaveCnt = AcceptCnt = 0;
 
  419     if ((iter + 1) % 10000 == 0) {
 
  420       printf(
"\r%d iterations completed [%.2f]", iter, (
double) iter / (
double) MaxIter);
 
  425   if (PlotFPrx.
Len() > 0) {
 
  428     GP1.
SetDataPlotFNm(PlotFPrx + 
".likelihood.tab", PlotFPrx + 
".likelihood.plt");
 
  430     GP1.
SetTitle(PlotFPrx + 
".likelihood" + TitleStr);
 
  431     GP1.
SavePng(PlotFPrx + 
".likelihood.png");
 
  438     GP2.
SetTitle(PlotFPrx + 
".transition");
 
  439     GP2.
SetDataPlotFNm(PlotFPrx + 
"transition_prob.tab", PlotFPrx + 
"transition_prob.plt");
 
  440     GP2.
SavePng(PlotFPrx + 
"transition_prob.png");
 
  447   printf(
"\nMCMC completed (best likelihood: %.2f) [%s]\n", BestL, TotalTm.
GetTmStr());
 
  461   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  481   for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  483     if (! 
NIDComVH.GetDat(VID).IsKey(CID)) { 
continue; }
 
  487     CurPuv = 1 - exp(- LambdaSum);
 
  488     NewPuv = 1 - exp(- LambdaSum + 
LambdaV[CID]);
 
  490     if (JointCom.
Len() == 1) {
 
  493     Delta += (log(NewPuv) - log(CurPuv));
 
  507   for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  509     if (! 
NIDComVH.GetDat(VID).IsKey(CID)) { 
continue; }
 
  513     CurPuv = 1 - exp(- LambdaSum);
 
  514     if (JointCom.
Len() == 0) { CurPuv = 
PNoCom; }
 
  515     NewPuv = 1 - exp(- LambdaSum - 
LambdaV[CID]);
 
  516     Delta += (log(NewPuv) - log(CurPuv));
 
  531   for (
int e = 0; e < NI.
GetDeg(); e++) {
 
  533     if (! 
NIDComVH.GetDat(VID).IsKey(CurCID) || ! 
NIDComVH.GetDat(VID).IsKey(NewCID)) {
continue;}
 
  536     double CurPuv, NewPuvAfterJoin, NewPuvAfterLeave, NewPuvAfterSwitch, LambdaSum = 
SelectLambdaSum(JointCom);
 
  537     CurPuv = 1 - exp(- LambdaSum);
 
  538     NewPuvAfterLeave = 1 - exp(- LambdaSum + 
LambdaV[CurCID]);
 
  539     NewPuvAfterJoin = 1 - exp(- LambdaSum - 
LambdaV[NewCID]);
 
  540     NewPuvAfterSwitch = 1 - exp(- LambdaSum - 
LambdaV[NewCID] + 
LambdaV[CurCID]);
 
  541     if (JointCom.
Len() == 1 || NewPuvAfterLeave == 0.0) {
 
  542       NewPuvAfterLeave = 
PNoCom;
 
  544     Delta += (log(NewPuvAfterSwitch) + log(CurPuv) - log(NewPuvAfterLeave) - log(NewPuvAfterJoin));
 
  546       printf(
"NS:%f C:%f NL:%f NJ:%f PNoCom:%f", NewPuvAfterSwitch, CurPuv, NewPuvAfterLeave, NewPuvAfterJoin, 
PNoCom.
Val);
 
  563   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  566   CIDLambdaH.SortByDat(
false);
 
  567   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  568     int CID = CIDLambdaH.GetKey(c);
 
  570     double Q = exp( - (
double) 
LambdaV[CID]);
 
  571     if (Q > QMax) { 
continue; }
 
  574     if (CmtyV.
Len() == 0) { 
continue; }
 
  586   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  598   for (
int e = 0; e < 
EdgeComVH.Len(); e++) {
 
  601     double Puv = 1 - exp(- LambdaSum);
 
  602     if (JointCom.
Len() == 0) {  Puv = 
PNoCom;  }
 
  604       SumEdgeProbsV[SI.GetKey()] += (1 - Puv) / Puv;
 
  609     int NotEdgesInCom = MaxEk - 
ComEdgesV[k];
 
  610     GradV[k] = SumEdgeProbsV[k] - (double) NotEdgesInCom;
 
  625     IAssert(NewLambdaV[SI.GetKey()] >= 0);
 
  626     Result += NewLambdaV[SI.GetKey()];
 
  635   uint64 PairNoCom = 0, EdgesNoCom = 0;
 
  636   for (
int u = 0; u < NIdV.
Len(); u++) {
 
  637     for (
int v = u + 1; v < NIdV.
Len(); v++) {
 
  638       int SrcNID = NIdV[u], DstNID = NIdV[v];
 
  641       if(JointCom.
Len() == 0) {
 
  643         if (
G->
IsEdge(SrcNID, DstNID)) { EdgesNoCom++; }
 
  644         if (SamplePairs > 0 && PairNoCom >= (
uint64) SamplePairs) { 
break; }
 
  647     if (SamplePairs > 0 && PairNoCom >= (
uint64) SamplePairs) { 
break; }
 
  650   if (EdgesNoCom > 0) {
 
  651     PNoCom = (double) EdgesNoCom / (
double) PairNoCom;
 
  661   for (
int c = 0; c < 
CIDNSetV.Len(); c++) {
 
  664   CIDLambdaH.SortByDat(
false);
 
  667     int CID = CIDLambdaH.GetKey(i);
 
  668     if (
LambdaV[CID] <= 0.0001) { 
continue; }
 
  669     printf(
"P_c : %.3f Com Sz: %d, Total Edges inside: %d \n", 1.0 - exp(- 
LambdaV[CID]), 
CIDNSetV[CID].Len(), (
int) 
ComEdgesV[CID]);
 
static const T & Mn(const T &LVal, const T &RVal)
 
void Load(TSIn &SIn, const int &RndSeed=0)
 
TPair< TInt, TInt > TIntPr
 
static void GetNbhCom(const PUNGraph &Graph, const int NID, TIntSet &NBCmtyS)
 
static double Norm(const TFltV &x)
 
TPair< TFlt, TInt > TFltIntPr
 
double SeekSwitch(const int &UID, const int &CurCID, const int &NewCID)
 
static double SumVec(const TFltV &x)
 
static void GetNodeMembership(THash< TInt, TIntSet > &NIDComVH, const TVec< TIntV > &CmtyVV)
get hash table of  
 
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
 
static const T & Mx(const T &LVal, const T &RVal)
 
void SetDefaultPNoCom()
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
 
void Save(TSOut &SOut) const 
 
THash< TIntPr, TInt > NIDCIDPrS
 pairs (for sampling MCMC moves). 
 
void GetNIdV(TIntV &NIdV) const 
Gets a vector IDs of all nodes in the graph. 
 
void SetTitle(const TStr &PlotTitle)
 
double GetStepSizeByLineSearchForLambda(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Step size search for updating P_c (which is parametarized by regularization parameter lambda)...
 
double Likelihood()
COMMENT. 
 
int GetEdges() const 
Returns the number of edges in the graph. 
 
TSizeTy Len() const 
Returns the number of elements in the vector. 
 
Node iterator. Only forward iteration (operator++) is supported. 
 
TFltV LambdaV
Parametrization of P_c (edge probability in community c), P_c = 1 - exp(-lambda). ...
 
void Save(TSOut &SOut) const 
 
void PrintSummary()
COMMENT. 
 
TInt BaseCID
ID of the Epsilon-community (in case we fit P_c of the epsilon community). We do not fit for the Epsi...
 
static void GetIntersection(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &C)
 
void Save(TSOut &SOut) const 
Saves the graph to a (binary) stream SOut. 
 
void SetDataPlotFNm(const TStr &DatFNm, const TStr &PltFNm)
 
THash< TIntPr, TIntSet > EdgeComVH
Edge -> Shared Community ID Set. 
 
static double GetConductance(const PUNGraph &Graph, const TIntSet &CmtyS, const int Edges)
 
void GetEdgeJointCom()
For each (u, v) in edges, precompute C_uv (the set of communities nodes u and v share). 
 
int GetNodes() const 
Returns the number of nodes in the graph. 
 
void Save(TSOut &SOut) const 
 
int GetRndNId(TRnd &Rnd=TInt::Rnd)
Returns an ID of a random node in the graph. 
 
TPair< TInt, TFlt > TIntFltPr
 
int GetDeg() const 
Returns degree of the current node. 
 
const char * GetTmStr() const 
 
void GradLogLForLambda(TFltV &GradV)
 
void DelKey(const TKey &Key)
 
double CalcPNoComByCmtyVV(const int &SamplePairs=-1)
Compute the empirical edge probability between a pair of nodes who share no community (epsilon)...
 
void LeaveCom(const int &NID, const int &CID)
After MCMC, NID leaves community CID. 
 
TFlt MinLambda
Minimum value of regularization parameter lambda (default = 1e-5). 
 
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double QMax=2.0)
Get communities whose  p_c is higher than 1 - QMax. 
 
int RemoveEmptyCom()
Remove all communities with no members. 
 
int MLEGradAscentGivenCAG(const double &Thres=0.001, const int &MaxIter=10000, const TStr PlotNm=TStr())
Gradient descent for p_c while keeping the community affiliation graph (CAG) fixed. 
 
void Gen(const int &ExpectVals)
 
THash< TInt, TIntSet > NIDComVH
Node ID -> Communitie IDs the node belongs to. 
 
unsigned long long uint64
 
double SelectLambdaSum(const TIntSet &ComK)
Compute sum of lambda_c (which is log (1 - p_c)) over C_uv (ComK). The function is used to compute ed...
 
static void GenCmtyVVFromPL(TVec< TIntV > &CmtyVV, const PUNGraph &Graph, const int &Nodes, const int &Coms, const double &ComSzAlpha, const double &MemAlpha, const int &MinSz, const int &MaxSz, const int &MinK, const int &MaxK, TRnd &Rnd)
Generate bipartite community affiliation from given power law coefficients for membership distributio...
 
TFlt RegCoef
Regularization parameter when we fit for P_c (for finding # communities). 
 
TNodeI GetNI(const int &NId) const 
Returns an iterator referring to the node of ID NId in the graph. 
 
const TVal & Last() const 
Returns a reference to the last element of the vector. 
 
void NeighborComInit(const int InitComs)
Initialize node community memberships using best neighborhood communities (see D. Gleich et al...
 
void GetQV(TFltV &OutV)
Returns QV, a vector of (1 - p_c) for each community c. 
 
TNodeI GetRndNI(TRnd &Rnd=TInt::Rnd)
Returns an interator referring to a random node in the graph. 
 
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
 
void PutSeed(const int &_Seed)
 
int AddKey(const TKey &Key)
 
double SeekJoin(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID joins community CID. 
 
void RunMCMC(const int &MaxIter, const int &EvalLambdaIter, const TStr &PlotFPrx=TStr())
Main procedure for fitting the AGM to a given graph using MCMC. 
 
void InitNodeData()
COMMENT. 
 
static TStr Fmt(const char *FmtStr,...)
 
double SeekLeave(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID leaves community CID. 
 
void JoinCom(const int &NID, const int &JoinCID)
 
void RandomInitCmtyVV(const int InitComs, const double ComSzAlpha=1.3, const double MemAlpha=1.8, const int MinComSz=8, const int MaxComSz=200, const int MinMem=1, const int MaxMem=10)
Randomly initialize bipartite community affiliation graph. 
 
TFlt MaxLambda
Maximum value of regularization parameter lambda (default = 10). 
 
TVec< TIntSet > CIDNSetV
Community ID -> Member Node ID Sets. 
 
TNodeI EndNI() const 
Returns an iterator referring to the past-the-end node in the graph. 
 
void AddBaseCmty()
Add Epsilon community (base community which includes all nodes) into community affiliation graph...
 
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
COMMENT. 
 
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
 
int GetNbrNId(const int &NodeN) const 
Returns ID of NodeN-th neighboring node. 
 
bool IsNode(const int &NId) const 
Tests whether ID NId is a node. 
 
static double DotProduct(const TFltV &x, const TFltV &y)
 
int GetRndKeyId(TRnd &Rnd) const 
Get an index of a random element. If the hash table has many deleted keys, this may take a long time...
 
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements. 
 
int GetUniDevInt(const int &Range=0)
 
int GetId() const 
Returns ID of the current node. 
 
bool IsEdge(const int &SrcNId, const int &DstNId) const 
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph. 
 
bool IsKey(const TKey &Key) const 
 
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element. 
 
TNodeI BegNI() const 
Returns an iterator referring to the first node in the graph. 
 
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph. 
 
void DelLast()
Removes the last element of the vector. 
 
TDat & AddDat(const TKey &Key)
 
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
 
TIntV ComEdgesV
The number of edges in each community. 
 
void SampleTransition(int &NID, int &JoinCID, int &LeaveCID, double &DeltaL)
Sample MMCM transitions: Choose among (join, leave, switch), and then sample (NID, CID). 
 
void Save(TSOut &SOut) const 
 
const TKey & GetKey(const int &KeyId) const 
 
TFlt PNoCom
Probability of edge when two nodes share no community (epsilon in the paper). 
 
void RandomInit(const int &MaxK)
COMMENT. 
 
THash< TIntPr, TFlt > NIDCIDPrH
 pairs (for sampling MCMC moves).