1 #ifndef yanglib_agmattr1_h 
    2 #define yanglib_agmattr1_h 
   13   if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
 
   14   else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
 
   17   for (
int i = 0; i < CmtyS.
Len(); i++) {
 
   18     if (! Graph->IsNode(CmtyS[i])) { 
continue; }
 
   19     typename PGraph::TObj::TNodeI  NI = Graph->GetNI(CmtyS[i]);
 
   20     for (
int e = 0; e < NI.GetOutDeg(); e++) {
 
   21       if (! CmtyS.
IsKey(NI.GetOutNId(e))) { Cut += 1; }
 
   23     Vol += NI.GetOutDeg();
 
   27     if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
 
   28     else if (Vol == 0) { Phi = 0.0; }
 
   29     else { Phi = Cut / double(Vol); }
 
   31     if (Vol == Edges2) { Phi = 1.0; }
 
   37 template<
class PGraph>
 
   39     TIntPrV EdgeV(G->GetEdges(), 0);
 
   40     for (
typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
 
   41       EdgeV.
Add(
TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
 
   46     HoldOutSet.
Gen(G->GetNodes());
 
   47     int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
 
   48     if (GraphType) { HOTotal *= 2;}
 
   50     int HOEdges = (int) 
TMath::Round(HOFrac * G->GetEdges());
 
   51     printf(
"holding out %d edges...\n", HOEdges);
 
   52     for (
int he = 0; he < (int) HOEdges; he++) {
 
   53       HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
 
   54       if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
 
   57     printf(
"%d Edges hold out\n", HOCnt);
 
   58     while(HOCnt++ < HOTotal) {
 
   61       if (SrcNID == DstNID) { 
continue; }
 
   62       HoldOutSet[SrcNID].AddKey(DstNID);
 
   63       if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
 
   67 template<
class PGraph>
 
   69     typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
 
   70     NBCmtyS.
Gen(NI.GetDeg());
 
   72     for (
int e = 0; e < NI.GetDeg(); e++) {
 
   73       NBCmtyS.
AddKey(NI.GetNbrNId(e));
 
   76 template<
class PGraph>
 
   78     NIdPhiV.
Gen(G->GetNodes(), 0);
 
   79     const int Edges = G->GetEdges();
 
   82     for (
typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
 
   83       TIntSet NBCmty(NI.GetDeg() + 1);
 
   85       if (NI.GetDeg() < 5) { 
 
   88         TCesnaUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
 
   96     printf(
"conductance computation completed [%s]\n", RunTm.
GetTmStr());
 
  103     printf(
"nodes in the graph:%d\n", NIDV.
Len());
 
  104     for (
int u = 0; u < NIDV.
Len(); u++) { NIDAttrH.
AddDat(NIDV[u]).
Gen(0, 0); }
 
  109       if (NodeNameH.
Len() > 0 && ! NodeNameH.
IsKey(NodeName)) { 
continue; }
 
  110       if (NodeNameH.
Len() > 0) { 
 
  114       if (! NIDAttrH.
IsKey(NID)) { 
 
  128     FILE* F = fopen(FNm.
CStr(), 
"wt");
 
  129     for (
int u = 0; u < NIDAttrH.
Len(); u++) {
 
  130       int NID = NIDAttrH.
GetKey(u);
 
  132       for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
 
  133         int KID = NIDAttrH[u][k];
 
  135         fprintf(F,
"%s\t%s\n", NodeName.
CStr(), FeatName.
CStr());
 
  149     FILE* F = fopen(FNm.
CStr(), 
"wt");
 
  150     for (
int u = 0; u < NIDAttrH.
Len(); u++) {
 
  151       int NID = NIDAttrH.
GetKey(u);
 
  153       for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
 
  154         int KID = NIDAttrH[u][k];
 
  156         fprintf(F,
"%s\t%s\n", NodeName.
CStr(), FeatName.
CStr());
 
  171     for (
int u = 0; u < NIDAttrH.
Len(); u++) {
 
  172       for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
 
  173         if (NIDAttrH[u][k] >= Attrs) { Attrs = NIDAttrH[u][k] + 1; }
 
  181     for (
int u = 1; u < NIDV.
Len(); u++) {
 
  182       if (! NIDAttrH.
IsKey(NIDV[u])) { 
continue; }
 
  183       AttrCnt += NIDAttrH.
GetDat(NIDV[u]).
Len();
 
  186     FILE* F = fopen(FNm.
CStr(), 
"wt");
 
  187     fprintf(F, 
"%d %d\n", NIDV.
Len() - 1, AttrCnt);
 
  189     for (
int u = 1; u < NIDV.
Len(); u++) {
 
  190       if (NIDAttrH.
IsKey(NIDV[u])) {  
 
  191         for (
int k = 0; k < NIDAttrH.
GetDat(NIDV[u]).
Len(); k++) {
 
  192           if (k > 0) { fprintf(F, 
" "); }
 
  193           fprintf(F, 
"%d", NIDAttrH.
GetDat(NIDV[u])[k].Val + 1);
 
  205     for (
int u = 0; u < OldNIDAttrH.
Len(); u++) {
 
  206       for (
int k = 0; k < OldNIDAttrH[u].
Len(); k++) {
 
  207         KIDCntH.
AddDat(OldNIDAttrH[u][k])++;
 
  213     for (
int c = 0; c < KIDCntH.
Len(); c++) {
 
  214       double Frac = (double) KIDCntH[c].Val / (
double) OldNIDAttrH.
Len();
 
  215       if (KIDCntH[c].Val < MinCnt) { 
continue; }
 
  216       if (Frac > MaxFrac || Frac < MinFrac) { 
continue; }
 
  217       SelectedK.AddKey(KIDCntH.
GetKey(c));
 
  219     printf(
"%d attributes selected from %d\n", SelectedK.Len(), KIDCntH.
Len());
 
  220     NewNIDAttrH.
Gen(OldNIDAttrH.
Len());
 
  221     for (
int u = 0; u < OldNIDAttrH.
Len(); u++) {
 
  222       int NID = OldNIDAttrH.
GetKey(u);
 
  224       for (
int k = 0; k < OldNIDAttrH[u].
Len(); k++) {
 
  225         if (! SelectedK.IsKey(OldNIDAttrH[u][k])) { 
continue; }
 
  226         AttrV.
Add(SelectedK.GetKeyId(OldNIDAttrH[u][k]));
 
  230     if (! OldNameH.
Empty()) {
 
  231       NewNameH.
Gen(SelectedK.Len());
 
  232       for (
int k = 0; k < SelectedK.Len(); k++) {
 
  233         int OldKID = SelectedK.GetKey(k);
 
  234         if (OldNameH.
IsKey(OldKID)) {
 
  238       printf(
"%d attributes names copied\n", NewNameH.
Len());
 
  243     FilterLowEntropy(OldNIDAttrH, NewNIDAttrH, TmpH1, TmpH2, MinFrac, MaxFrac, MinCnt);
 
  273     MinVal(0.0), MaxVal(10.0), MinValW(-10.0), MaxValW(10.0), NegWgt(1.0), LassoCoef(1.0), WeightAttr(1.0) { 
SetGraph(GraphPt, NIDAttrH); 
NeighborComInit(InitComs); }
 
  282     LassoCoef.
Save(SOut);
 
  316   void SetRegCoef(
const double _RegCoef) { RegCoef = _RegCoef; }
 
  318   void SetWeightAttr(
const double _WeightAttr) { 
IAssert (_WeightAttr <= 1.0 && _WeightAttr >= 0.0); WeightAttr = _WeightAttr; }
 
  324     if (F[NIdx].IsKey(CID)) {
 
  325       return F[NIdx].
GetDat(CID);
 
  333     for (
int k = 0; k < 
Attrs; k++) {
 
  334       W[k].
Gen(NumComs + 1);
 
  339     HOKIDSV[NIdx].AddKey(KID);
 
  342     for (
int k = 0; k < 
Attrs; k++) {
 
  353   double Likelihood(
const bool DoParallel = 
false);
 
  361     for (
int u = 0; u < F.
Len(); u++) {
 
  362       if (HOKIDSV[u].IsKey(K)) { 
continue; }
 
  365     for (
int c = 0; c < WK.
Len() - 1; c++) {
 
  366       L -= LassoCoef * fabs(WK[c]);
 
  373     for (
int k = 0; k < 
Attrs; k++) {
 
  374       for (
int u = 0; u < F.
Len(); u++) {
 
  375         if (HOKIDSV[u].IsKey(k)) { 
continue; }
 
  385       for (
int u = 0; u < F.
Len(); u++) {
 
  386         L += RegCoef * 
Sum(F[u]);
 
  390       for (
int u = 0; u < F.
Len(); u++) {
 
  391         L -= RegCoef * 
Norm2(F[u]);
 
  402     for (
int h = 0; h < 10 * HoldOutCnt; h++) {
 
  407         HOSetV[UID].AddKey(KID);
 
  410       if (Cnt >= HoldOutCnt) { 
break; }
 
  412     printf(
"%d hold out pairs generated for attributes\n", Cnt);
 
  422     GradV.
Gen(NumComs + 1);
 
  423     for (
int u = 0; u < F.
Len(); u++) {
 
  424       if (HOKIDSV[u].IsKey(K)) { 
continue; }
 
  427         GradV[CI.GetKey()] += (
GetAttr(u, K) - Pred) * 
GetCom(u, CI.GetKey());
 
  432     for (
int c = 0; c < GradV.
Len() - 1; c++) {
 
  468   int FindComs(
TIntV& ComsV, 
const bool UseBIC = 
false, 
const double HOFrac = 0.2, 
const int NumThreads = 20, 
const TStr PlotLFNm = 
TStr(), 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1);
 
  469   int FindComs(
const int NumThreads, 
const int MaxComs, 
const int MinComs, 
const int DivComs, 
const TStr OutFNm, 
const bool UseBIC = 
false, 
const double HOFrac = 0.1, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.3);
 
  471     for (
int u = 0; u < X.
Len(); u++) {
 
  472       if (NodeNameH.
Len() > 0) {
 
  473         printf(
"NID: %s\t Attrs: ", NodeNameH.
GetKey(NIDToIdx[u]));
 
  475         printf(
"NID: %d\t Attrs: ", NIDToIdx[u].Val);
 
  477       for (
int k = 0; k < X[u].
Len(); k++) {
 
  478         printf(
"%d, ", X[u][k].Val);
 
  481       if (u >= TopK) { 
break; }
 
  487     double StepSize = 1.0;
 
  491     for(
int iter = 0; iter < MaxIter; iter++) {
 
  492       for (
int c = 0; c < DeltaV.
Len(); c++){
 
  493         double NewVal = W[K][c] + StepSize * DeltaV[c];
 
  494         if (NewVal < MinValW) { NewVal = 
MinValW; }
 
  495         if (NewVal > MaxValW) { NewVal = 
MaxValW; }
 
  503       if (iter == MaxIter - 1) { 
 
  512     for (
int c = 0; c < 
NumComs; c++) {
 
  513       for (
int k = 0; k < 
Attrs; k++) {
 
  514         if (
GetW(c, k) > 0.0) { PosCnt++; }
 
  519   int MLEGradAscent(
const double& Thres, 
const int& MaxIter, 
const TStr PlotNm, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1);
 
  520   int MLEGradAscentParallel(
const double& Thres, 
const int& MaxIter, 
const int ChunkNum, 
const int ChunkSize, 
const TStr PlotNm, 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1);
 
  521   int MLEGradAscentParallel(
const double& Thres, 
const int& MaxIter, 
const int ChunkNum, 
const TStr PlotNm = 
TStr(), 
const double StepAlpha = 0.3, 
const double StepBeta = 0.1) {
 
  522     int ChunkSize = G->
GetNodes() / 10 / ChunkNum;
 
  523     if (ChunkSize == 0) { ChunkSize = 1; }
 
  527   double inline GetCom(
const int& NID, 
const int& CID) {
 
  528     if (F[NID].IsKey(CID)) {
 
  529       return F[NID].
GetDat(CID);
 
  534   double inline GetAttr(
const int& NID, 
const int& K) {
 
  535     if (X[NID].IsKey(K)) {
 
  541   void inline AddCom(
const int& NID, 
const int& CID, 
const double& Val) {
 
  542     if (F[NID].IsKey(CID)) {
 
  543       SumFV[CID] -= F[NID].
GetDat(CID);
 
  545     F[NID].AddDat(CID) = Val;
 
  549   void inline DelCom(
const int& NID, 
const int& CID) {
 
  550     if (F[NID].IsKey(CID)) {
 
  551       SumFV[CID] -= F[NID].
GetDat(CID);
 
  566     if (UV.
Len() > VV.
Len()) {
 
  568         if (VV.
IsKey(HI.GetKey())) { 
 
  569           DP += VV.
GetDat(HI.GetKey()) * HI.GetDat(); 
 
  574         if (UV.
IsKey(HI.GetKey())) { 
 
  575           DP += UV.
GetDat(HI.GetKey()) * HI.GetDat(); 
 
  585     double DP = log (1.0 / (1.0 - PNoCom)) + 
DotProduct(FU, FV);
 
  592       DP += FI.GetDat() * WK[FI.GetKey()];
 
  603   double inline GetW(
const int CID, 
const int K) {
 
  619       N += HI.GetDat() * HI.GetDat();
 
  633     return 1.0 / ( 1.0 + exp(-X));
 
double Sigmoid(const double X)
 
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
 
TPair< TInt, TInt > TIntPr
 
double LikelihoodForWK(const int K)
 
TIter EndI() const 
Returns an iterator referring to the past-the-end element in the vector. 
 
#define IAssertR(Cond, Reason)
 
static void GetNIdPhiV(const PGraph &G, TFltIntPrV &NIdPhiV)
 
TPair< TFlt, TInt > TFltIntPr
 
void SetWeightAttr(const double _WeightAttr)
 
double PredictAttrK(const int UID, const int K)
 
double Sum(const TIntFltH &UV)
 
double GetCom(const int &NID, const int &CID)
 
static void DumpNIDAttrHToMetis(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TIntV &NIDV)
 
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
void Save(TSOut &SOut) const 
 
bool IsIn(const TVal &Val) const 
Checks whether element Val is a member of the vector. 
 
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
 
int GetKeyId(const TKey &Key) const 
 
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
 
void SetRegCoef(const double _RegCoef)
 
static void GetNbhCom(const PGraph &Graph, const int NID, TIntSet &NBCmtyS)
 
int GetEdges() const 
Returns the number of edges in the graph. 
 
TSizeTy Len() const 
Returns the number of elements in the vector. 
 
void Gen(const int &ExpectVals)
 
double LikelihoodForWK(const int K, const TFltV &WK)
 
double GetAttr(const int &NID, const int &K)
 
bool GetInt(const int &FldN, int &Val) const 
If the field FldN is an integer its value is returned in Val and the function returns true...
 
const TDat & GetDat(const TKey &Key) const 
 
void Load(TSIn &SIn, const int &RndSeed=0)
 
void Save(TSOut &SOut) const 
Saves the graph to a (binary) stream SOut. 
 
void GetW(TVec< TFltV > &_W)
 
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
 
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
 
bool IsKey(const TKey &Key) const 
 
double Likelihood(const bool DoParallel=false)
 
int GetNodes() const 
Returns the number of nodes in the graph. 
 
const char * GetFld(const int &FldN) const 
Returns the contents of the field at index FldN. 
 
double LikelihoodAttrKForRow(const int UID, const int K)
 
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const TIntStrH &OldNameH, TIntStrH &NewNameH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
 
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH)
 
void Save(TSOut &SOut) const 
 
TSsFmt
Spread-Sheet Separator Format. 
 
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
 
void DisplayAttrs(const int TopK, const TStrHash< TInt > &NodeNameH)
 
const char * GetTmStr() const 
 
void SetW(TVec< TFltV > &_W)
 
void Gen(const int &ExpectVals)
 
bool IsKey(const char *Key) const 
 
#define HasGraphFlag(TGraph, Flag)
For quick testing of the properties of the graph/network object (see TGraphFlag). ...
 
double GetComFromNID(const int &NID, const int &CID)
 
void RandomInit(const int InitComs)
 
void SetAttrHoldOut(const int NID, const int KID)
 
double LikelihoodHoldOut()
 
const TVal & GetDat(const TVal &Val) const 
Returns reference to the first occurrence of element Val. 
 
void GetCmtyVV(TVec< TIntV > &CmtyVV, TVec< TFltV > &Wck)
 
const char * GetKey(const int &KeyId) const 
 
static double Round(const double &Val)
 
double Norm2(const TIntFltH &UV)
 
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
 
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New(). 
 
int AddKey(const TKey &Key)
 
const TVal & Last() const 
Returns a reference to the last element of the vector. 
 
void AddCom(const int &NID, const int &CID, const double &Val)
 
void GetCmtyVVUnSorted(TVec< TIntV > &CmtyVV)
 
double PredictAttrK(const TIntFltH &FU, const int K)
 
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
 
TCesna(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH, const int &InitComs, const int RndSeed=0)
 
void GetCmtyVV(TVec< TIntV > &CmtyVV)
 
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH)
 
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
 
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
 
directed graph (TNGraph, TNEGraph), else graph is undirected TUNGraph 
 
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &NodeNameH, const TSsFmt Sep=ssfTabSep)
 
void NeighborComInit(const int InitComs)
 
double DotProduct(const int &UID, const int &VID)
 
static double GetConductance(const PGraph &Graph, const TIntSet &CmtyS, const int Edges)
 
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
 
double LikelihoodForRow(const int UID)
 
static TStr Fmt(const char *FmtStr,...)
 
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
 
uint64 GetLineNo() const 
Returns the line number of the current line. 
 
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
 
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
 
static int Sign(const T &Val)
 
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
 
void SetLassoCoef(const double _LassoCoef)
 
void SetGraph(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
 
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
 
bool Next()
Loads next line from the input file. 
 
void GradientForWK(TFltV &GradV, const int K)
 
static double DotProduct(const TFltV &x, const TFltV &y)
 
void Save(TSOut &SOut) const 
 
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
 
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements. 
 
double Prediction(const int &UID, const int &VID)
 
int GetUniDevInt(const int &Range=0)
 
static int GetAttrs(const THash< TInt, TIntV > &NIDAttrH)
 
int FindComs(TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
 
double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH &FU)
 
bool IsKey(const TKey &Key) const 
 
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element. 
 
void SetHoldOut(const double HOFrac)
 
TDat & AddDat(const TKey &Key)
 
void DelCom(const int &NID, const int &CID)
 
void SetAttrHoldOutForOneNode(const int NID)
 
double GetW(const int CID, const int K)
 
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH)
 
void Save(TSOut &SOut) const 
 
const TKey & GetKey(const int &KeyId) const 
 
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
 
int GetKeyId(const char *Key) const 
 
Vector is a sequence TVal objects representing an array that can change in size. 
 
bool IsKeyId(const int &KeyId) const 
 
void SortByDat(const bool &Asc=true)