1 #ifndef yanglib_agmdirected_h 
    2 #define yanglib_agmdirected_h 
   27   TCoda(
const PNGraph& GraphPt, 
const int& InitComs, 
const int RndSeed = 0): Rnd(RndSeed), RegCoef(0), 
 
   28     NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { 
SetGraph(GraphPt); 
RandomInit(InitComs); }
 
   33   void SetRegCoef(
const double _RegCoef) { RegCoef = _RegCoef; }
 
   40   double Likelihood(
const bool DoParallel = 
false);
 
   52   void GetTopCIDs(
TIntV& CIdV, 
const int TopK, 
const int IsAverage = 1, 
const int MinSz = 1);
 
   68   int FindComsByCV(
TIntV& ComsV, 
const double HOFrac = 0.2, 
const int NumThreads = 20, 
const TStr PlotLFNm = 
TStr(), 
const int EdgesForCV = 100, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1);
 
   69   int FindComsByCV(
const int NumThreads, 
const int MaxComs, 
const int MinComs, 
const int DivComs, 
const TStr OutFNm, 
const int EdgesForCV = 100, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.3);
 
   72   int MLEGradAscent(
const double& Thres, 
const int& MaxIter, 
const TStr PlotNm, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1);
 
   73   int MLEGradAscentParallel(
const double& Thres, 
const int& MaxIter, 
const int ChunkNum, 
const int ChunkSize, 
const TStr PlotNm, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1);
 
   74   int MLEGradAscentParallel(
const double& Thres, 
const int& MaxIter, 
const int ChunkNum, 
const TStr PlotNm = 
TStr(), 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1) {
 
   75     int ChunkSize = G->
GetNodes() / 10 / ChunkNum;
 
   76     if (ChunkSize == 0) { ChunkSize = 1; }
 
   85   void Load(
TSIn& SIn, 
const int& RndSeed = 0);
 
   93   double inline GetCom(
const bool IsOut, 
const int& NID, 
const int& CID) {
 
  100   double inline GetComOut(
const int& NID, 
const int& CID) {
 
  101     if (F[NID].IsKey(CID)) {
 
  102       return F[NID].
GetDat(CID);
 
  107   double inline GetComIn(
const int& NID, 
const int& CID) {
 
  108     if (H[NID].IsKey(CID)) {
 
  109       return H[NID].
GetDat(CID);
 
  114   void inline AddCom(
const bool IsOut, 
const int& NID, 
const int& CID, 
const double& Val) {
 
  121   void inline AddComOut(
const int& NID, 
const int& CID, 
const double& Val) {
 
  122     if (F[NID].IsKey(CID)) {
 
  123       SumFV[CID] -= F[NID].
GetDat(CID);
 
  125     F[NID].AddDat(CID) = Val;
 
  128   void inline AddComIn(
const int& NID, 
const int& CID, 
const double& Val) {
 
  129     if (H[NID].IsKey(CID)) {
 
  130       SumHV[CID] -= H[NID].
GetDat(CID);
 
  132     H[NID].AddDat(CID) = Val;
 
  135   void inline DelCom(
const bool IsOut, 
const int& NID, 
const int& CID) {
 
  143     if (F[NID].IsKey(CID)) {
 
  144       SumFV[CID] -= F[NID].
GetDat(CID);
 
  148   void inline DelComIn(
const int& NID, 
const int& CID) {
 
  149     if (H[NID].IsKey(CID)) {
 
  150       SumHV[CID] -= H[NID].
GetDat(CID);
 
  156     if (UV.
Len() > VV.
Len()) {
 
  158         if (VV.
IsKey(HI.GetKey())) { 
 
  159           DP += VV.
GetDat(HI.GetKey()) * HI.GetDat(); 
 
  164         if (UV.
IsKey(HI.GetKey())) { 
 
  165           DP += UV.
GetDat(HI.GetKey()) * HI.GetDat(); 
 
  175     double DP = log (1.0 / (1.0 - PNoCom)) + 
DotProduct(FU, HV);
 
  192       N += HI.GetDat() * HI.GetDat();
 
  210     double Delta = MemThres == -1.0 ? sqrt(Coda.
PNoCom): MemThres;
 
  211     for (
int c = 0; c < CIdV.Len(); c++) {
 
  213       TIntFltH InMemH, OutMemH, InOutMemH;
 
  214       Coda.
GetNIDValH(InOutMemH, OutMemH, InMemH, CID, Delta);
 
  215       InCmtyValHV.
Add(InMemH);
 
  216       OutCmtyValHV.
Add(OutMemH);
 
  217       InOutCmtyValHV.
Add(InOutMemH);
 
  219     printf(
"Communities copied (%d communities)\n", InCmtyValHV.
Len());
 
  222     for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
 
  223       TIntV CmtyVIn, CmtyVOut, CmtyVInOut;
 
  224       if (InCmtyValHV[c].Len() < MinSz || OutCmtyValHV[c].
Len() < MinSz) { 
continue; }
 
  225       InOutCmtyValHV[c].GetKeyV(CmtyVInOut);
 
  226       InCmtyValHV[c].GetKeyV(CmtyVIn);
 
  227       OutCmtyValHV[c].GetKeyV(CmtyVOut);
 
  228       CmtyVV.
Add(CmtyVInOut);
 
  229       CmtyVV.
Add(CmtyVOut);
 
  234   double GetFrac2Mode(
const double Thres2Mode = 0.2, 
const int MinSzEach = 2) {
 
  237     for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
 
  238       double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
 
  239       if (InCmtyValHV[c].Len() < MinSzEach || OutCmtyValHV[c].
Len() < MinSzEach) { 
continue; }
 
  240       if (Jacc <= Thres2Mode) { Cnt2Mode++; }
 
  243     return (
double) Cnt2Mode / (double) CntAll;
 
  246   void Summary(
const int TopK = 10, 
const double Thres2Mode = 0.2) {
 
  248     double SumJacc = 0.0;
 
  249     for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
 
  250       double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
 
  251       if (Jacc <= Thres2Mode) { Cnt2Mode++; }
 
  254         printf(
"Cmty %d: InOut: %d, In:%d, Out:%d, Jacc;%.3f\n", c, InCmtyValHV[c].Len(), InCmtyValHV[c].Len(), OutCmtyValHV[c].Len(), Jacc);
 
  257     double AvgJacc = SumJacc / (double) InCmtyValHV.
Len();
 
  258     printf(
"Average jaccard similarity = %.3f. (%d / %d communities are 2-mode)\n", AvgJacc, Cnt2Mode, InCmtyValHV.
Len());
 
  264     TIntV CmtyVIn, CmtyVOut;
 
  265     InCmtyValHV[CID].GetKeyV(CmtyVIn);
 
  266     OutCmtyValHV[CID].GetKeyV(CmtyVOut);
 
  269     CmtyVAll.
Gen(CmtyVIn.
Len() + CmtyVOut.
Len(), 0);
 
  270     CmtyVIn.
Union(CmtyVOut, CmtyVAll);
 
  275     int Coms = InCmtyValHV.
Len();
 
  277     for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
 
  278       double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
 
  279       if (Jacc > MaxJac) { 
continue; }
 
  283     for (
int c = 0; c < Coms; c++) {
 
  284       TIntV CmtyVIn, CmtyVOut;
 
  285       InCmtyValHV[c].GetKeyV(CmtyVIn);
 
  286       OutCmtyValHV[c].GetKeyV(CmtyVOut);
 
  287       TIntSet CmtySIn(CmtyVIn), CmtySOut(CmtyVOut);
 
  288       CmtySVIn.
Add(CmtySIn);
 
  289       CmtySVOut.
Add(CmtySOut);
 
  291     for (
int c1 = 0; c1 < Coms; c1++) {
 
  292       if (! ComG->
IsNode(c1)) { 
continue; }
 
  293       for (
int c2 = 0; c2 < Coms; c2++) {
 
  294         if (! ComG->
IsNode(c2)) { 
continue; }
 
  296         double Jac = (double) IntC1C2 / (CmtySVIn[c1].Len() + CmtySVOut[c2].
Len() - IntC1C2);
 
  297         if (Jac >= JacEdge) {
 
  305     for (
int u = 0; u < NIDV.
Len(); u++) {
 
  311     printf(
"Community graph made (Jaccard similarity for edges: %f, %d nodes, %d edges)\n", JacEdge, ComG->
GetNodes(), ComG->
GetEdges());
 
  318     FILE* F = fopen(OutFNm.
CStr(), 
"wt");
 
  319     for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
 
  320       double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
 
  321       if (Jacc > MaxJac) { 
continue; }
 
  322       TIntV CmtyVIn, CmtyVOut, CmtyVAll;
 
  323       InCmtyValHV[c].GetKeyV(CmtyVIn);
 
  324       OutCmtyValHV[c].GetKeyV(CmtyVOut);
 
  327       for (
int u = 0; u < InOutCmtyValHV[c].
Len(); u++) {
 
  328         int UID = InOutCmtyValHV[c].GetKey(u);
 
  329         if (CmtyVIn.
Len() >= CmtyVOut.
Len()) {
 
  335       if (CmtyVAll.
Len() == 0) { 
continue; }
 
  336       fprintf(F, 
"Com %d\n", c);
 
  337       for (
int u = 0; u < CmtyVOut.
Len(); u++) {
 
  338         int NID = CmtyVOut[u];
 
  340         fprintf(F, 
"%s:%f\n", Label.
CStr(), OutCmtyValHV[c].
GetDat(NID).Val);
 
  342       fprintf(F, 
"||==>||\n");
 
  343       for (
int u = 0; u < CmtyVIn.
Len(); u++) {
 
  344         int NID = CmtyVIn[u];
 
  346         fprintf(F, 
"%s:%f\n", Label.
CStr(), InCmtyValHV[c].
GetDat(NID).Val);
 
  356     TIntV CmtyVIn, CmtyVOut, CmtyVAll;
 
  357     InCmtyValHV[CID].GetKeyV(CmtyVIn);
 
  358     OutCmtyValHV[CID].GetKeyV(CmtyVOut);
 
  362     for (
int u = 0; u < InOutCmtyValHV[CID].
Len(); u++) {
 
  363       int UID = InOutCmtyValHV[CID].GetKey(u);
 
  364       if (CmtyVIn.
Len() >= CmtyVOut.
Len()) {
 
  373     if (CmtyVAll.
Len() == 0) { 
return; }
 
  374     double OXMin = 0.1, YMin = 0.1, OXMax = 2500.00, YMax = 1000.0, IXMin = 0.1, IXMax = 2500.00;
 
  375     double OStep = (OXMax - OXMin) / (
double) CmtyVOut.
Len(), IStep = (IXMax - IXMin) / (
double) CmtyVIn.
Len();
 
  377     FILE* F = fopen(OutFNm.
CStr(), 
"wt");
 
  378     fprintf(F, 
"<?xml version='1.0' encoding='UTF-8'?>\n");
 
  379     fprintf(F, 
"<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
 
  380     fprintf(F, 
"\t<graph mode='static' defaultedgetype='directed'>\n");
 
  381     fprintf(F, 
"\t\t<nodes>\n");
 
  382     for (
int c = 0; c < CmtyVOut.
Len(); c++) {
 
  383       int NID = CmtyVOut[c];
 
  384       double XPos = c * OStep + OXMin;
 
  391       fprintf(F, 
"\t\t\t<node id='%d' label='%s'>\n", NID, Label.
CStr());
 
  392       fprintf(F, 
"\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.
Val1.Val, Color.
Val2.Val, Color.
Val3.Val);
 
  393       fprintf(F, 
"\t\t\t\t<viz:size value='4.0'/>\n");
 
  394       fprintf(F, 
"\t\t\t\t<viz:shape value='square'/>\n");
 
  395       fprintf(F, 
"\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); 
 
  396       fprintf(F, 
"\t\t\t</node>\n");
 
  399     for (
int u = 0; u < CmtyVIn.
Len(); u++) {
 
  400       int NID = CmtyVIn[u];
 
  406       double XPos = IXMin + u * IStep;
 
  409       fprintf(F, 
"\t\t\t<node id='%d' label='%s'>\n", NID, Label.
CStr());
 
  410       fprintf(F, 
"\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.
Val1.Val, Color.
Val2.Val, Color.
Val3.Val, Alpha);
 
  411       fprintf(F, 
"\t\t\t\t<viz:size value='4.0'/>\n");
 
  412       fprintf(F, 
"\t\t\t\t<viz:shape value='square'/>\n");
 
  413       fprintf(F, 
"\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); 
 
  414       fprintf(F, 
"\t\t\t</node>\n");
 
  416     fprintf(F, 
"\t\t</nodes>\n");
 
  419     fprintf(F, 
"\t\t<edges>\n");
 
  421       if (NI.GetOutDeg() == 0 && NI.GetInDeg() == 0  ) { 
continue; }
 
  422       for (
int e = 0; e < NI.GetOutDeg(); e++) {
 
  423         fprintf(F, 
"\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e));
 
  426     fprintf(F, 
"\t\t</edges>\n");
 
  427     fprintf(F, 
"\t</graph>\n");
 
  428     fprintf(F, 
"</gexf>\n");
 
bool DelIfIn(const TVal &Val)
Removes the first occurrence of element Val. 
 
void GetNIdV(TIntV &NIdV) const 
Gets a vector IDs of all nodes in the graph. 
 
#define IAssertR(Cond, Reason)
 
TNodeI BegNI() const 
Returns an iterator referring to the first node in the graph. 
 
double LikelihoodForNode(const bool IsRow, const int UID)
 
void AddComIn(const int &NID, const int &CID, const double &Val)
 
void DelComOut(const int &NID, const int &CID)
 
void Dump2ModeCommunities(const TStr &OutFNm, const double MaxJac, const TIntStrH &NIDNameH)
 
void DumpMemberships(const TStr &OutFNm)
 
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New(). 
 
TNodeI GetNI(const int &NId) const 
Returns an iterator referring to the node of ID NId in the graph. 
 
void SetGraph(const PNGraph &GraphPt)
 
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
 
void GetAllCmtyVV(TVec< TIntV > &CmtyVV, const int MinSz)
 
double Likelihood(const bool DoParallel=false)
 
void NeighborComInit(const int InitComs)
 
int GetEdges() const 
Returns the number of edges in the graph. 
 
TSizeTy Len() const 
Returns the number of elements in the vector. 
 
void DelComIn(const int &NID, const int &CID)
 
static int Intersection(const TIntV &C1, const TIntV &C2)
 
TVec< TIntFltH > OutCmtyValHV
 
double GetComOut(const int &NID, const int &CID)
 
void SetCmtyVV(const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
 
int GetNodes() const 
Returns the number of nodes in the graph. 
 
TFlt & GetSumVal(const bool IsOut, const int CID)
 
int AddNode(int NId=-1)
Adds a node of ID NId to the graph. 
 
void GetCmtyS(TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
 
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
 
const TDat & GetDat(const TKey &Key) const 
 
TVec< TIntFltH > InCmtyValHV
 
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
 
void SetRegCoef(const double _RegCoef)
 
int ChangeChAll(const char &SrcCh, const char &DstCh)
 
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
 
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
 
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
 
double GetFrac2Mode(const double Thres2Mode=0.2, const int MinSzEach=2)
 
void DelNode(const int &NId)
Deletes node of ID NId from the graph. 
 
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
double Prediction(const int &UID, const int &VID)
 
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
 
void AddComOut(const int &NID, const int &CID, const double &Val)
 
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
 
double LikelihoodHoldOut(const bool DoParallel=false)
 
void Sort(const bool &Asc=true)
Sorts the elements of the vector. 
 
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the graph. 
 
TCoda(const PNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
 
void Summary(const int TopK=10, const double Thres2Mode=0.2)
 
const TVal & GetDat(const TVal &Val) const 
Returns reference to the first occurrence of element Val. 
 
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
 
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
bool IsNode(const int &NId) const 
Tests whether ID NId is a node. 
 
void Load(TSIn &SIn, const int &RndSeed=0)
 
TCodaAnalyzer(TCoda &Coda, const double MemThres=-1.0)
 
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
 
double GetComIn(const int &NID, const int &CID)
 
int GetDeg() const 
Returns degree of the current node, the sum of in-degree and out-degree. 
 
void DelCom(const bool IsOut, const int &NID, const int &CID)
 
void GetNonEdgePairScores(TFltIntIntTrV &ScoreV)
 
void RandomInit(const int InitComs)
 
PNGraph Net2ModeCommunities(const double MaxJac, const double JacEdge, const bool GetWcc=true)
 
double Norm2(const TIntFltH &UV)
 
void SetHoldOut(const double HOFrac)
 
TNodeI EndNI() const 
Returns an iterator referring to the past-the-end node in the graph. 
 
double DotProductUtoV(const int &UID, const int &VID)
 
void Union(const TVec< TVal, TSizeTy > &ValV)
Sets this vector to its union with ValV. Assumes the vectors are sorted! 
 
int GetOutDeg() const 
Returns out-degree of the current node. 
 
static TStr Fmt(const char *FmtStr,...)
 
Node iterator. Only forward iteration (operator++) is supported. 
 
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
 
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
 
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
 
TTriple< TInt, TInt, TInt > TIntTr
 
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements. 
 
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
 
void GetCmtyVAll(TIntV &CmtyVAll, const int CID)
save bipartite community affiliation into gexf file 
 
int GetInDeg() const 
Returns in-degree of the current node. 
 
bool IsKey(const TKey &Key) const 
 
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element. 
 
double Sum(const TIntFltH &UV)
 
double GetCom(const bool IsOut, const int &NID, const int &CID)
 
TVec< TIntFltH > InOutCmtyValHV
 
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
 
int GetOutNId(const int &NodeN) const 
Returns ID of NodeN-th out-node (the node the current node points to). 
 
void Draw2ModeCommunity(const int CID, const TStr &OutFNm, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH)
 
Vector is a sequence TVal objects representing an array that can change in size.