|
SNAP Library 2.2, User Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <circles.h>
Public Member Functions | |
| TCluster (PGraphAttributes GraphAttributes, TInt K, TFlt Lambda) | |
| ~TCluster () | |
| void | Train (TInt OuterReps, TInt GradientReps, TInt MCMCReps) |
| Train the model to predict K Clusters. | |
| TVec< TIntSet > | GetCircles (void) |
Public Attributes | |
| TCRef | CRef |
Private Member Functions | |
| TFlt | LogLikelihood () |
| Compute the log-likelihood of Parameters and cluster assignments. | |
| TIntSet | MCMC (TInt k, TInt MCMCReps) |
| Optimize the cluster assignments for the k'th cluster. | |
| void | Gradient () |
| Update partial derivatives of log-likelihood. | |
Private Attributes | |
| TFlt * | Theta |
| TFlt * | Derivative |
| TVec< TIntSet > | CHat |
| PGraphAttributes | GraphAttributes |
| TInt | K |
| TFlt | Lambda |
| TCluster::TCluster | ( | PGraphAttributes | GraphAttributes, |
| TInt | K, | ||
| TFlt | Lambda | ||
| ) | [inline] |
| GraphAttributes | attributed graph object with attributes |
| K | number of communities to detect |
| Lambda | regularization parameter |
Definition at line 35 of file circles.h.
:
GraphAttributes(GraphAttributes), K(K), Lambda(Lambda) {
Theta = new TFlt[K * GraphAttributes->NFeatures];
Derivative = new TFlt[K * GraphAttributes->NFeatures];
for (int k = 0; k < K; k++) {
for (int f = 0; f < GraphAttributes->NFeatures; f++) {
Theta[k * GraphAttributes->NFeatures + f] = 0;
Derivative[k * GraphAttributes->NFeatures + f] = 0;
}
// Clusters are initially empty.
CHat.Add(TIntSet());
}
}
| TCluster::~TCluster | ( | ) | [inline] |
Definition at line 49 of file circles.h.
{
delete[] Theta;
delete[] Derivative;
}
| TVec<TIntSet> TCluster::GetCircles | ( | void | ) | [inline] |
| void TCluster::Gradient | ( | void | ) | [private] |
Update partial derivatives of log-likelihood.
Definition at line 456 of file circles.h.
{
for (int i = 0; i < K * GraphAttributes->NFeatures; i++) {
if (Theta[i] > 0) {
Derivative[i] = -Lambda * Theta[i];
} else {
Derivative[i] = Lambda * Theta[i];
}
}
for (THashKeyDatI<TIntPr, TIntIntH> it = GraphAttributes->EdgeFeatures.BegI();
not it.IsEnd(); it++) {
TFlt InnerProduct = 0;
TIntPr Edge = it.GetKey();
TInt Src = Edge.Val1;
TInt Dst = Edge.Val2;
TBool Exists = GraphAttributes->G->IsEdge(Src, Dst);
for (int k = 0; k < K; k++) {
TFlt d = CHat[k].IsKey(Src) and CHat[k].IsKey(Dst) ? 1 : -1;
InnerProduct += d * Inner(it.GetDat(), Theta + k * GraphAttributes->NFeatures);
}
TFlt expinp = exp(InnerProduct);
TFlt q = expinp / (1 + expinp);
if (q != q) {
q = 1; // Test for nan in case of overflow.
}
for (int k = 0; k < K; k++) {
TBool d_ = CHat[k].IsKey(Src) and CHat[k].IsKey(Dst);
TFlt d = d_ ? 1 : -1;
for (THashKeyDatI<TInt, TInt> itf = it.GetDat().BegI();
not itf.IsEnd(); itf++) {
TInt i = itf.GetKey();
TInt f = itf.GetDat();
if (Exists) {
Derivative[k * GraphAttributes->NFeatures + i] += d * f;
}
Derivative[k * GraphAttributes->NFeatures + i] += -d * f * q;
}
}
}
}
| TFlt TCluster::LogLikelihood | ( | void | ) | [private] |
Compute the log-likelihood of Parameters and cluster assignments.
Definition at line 499 of file circles.h.
{
TFlt ll = 0;
for (THashKeyDatI<TIntPr, TIntIntH> it = GraphAttributes->EdgeFeatures.BegI();
not it.IsEnd(); it++) {
TFlt InnerProduct = 0;
TIntPr Edge = it.GetKey();
TInt Src = Edge.Val1;
TInt Dst = Edge.Val2;
TBool Exists = GraphAttributes->G->IsEdge(Src, Dst);
for (int k = 0; k < K; k++) {
TFlt d = CHat[k].IsKey(Src) and CHat[k].IsKey(Dst) ? 1 : -1;
InnerProduct += d * Inner(it.GetDat(), Theta + k * GraphAttributes->NFeatures);
}
if (Exists) {
ll += InnerProduct;
}
TFlt ll_ = log(1 + exp(InnerProduct));
ll += -ll_;
}
if (ll != ll) {
printf("ll isnan\n");
exit(1);
}
return ll;
}
| TIntSet TCluster::MCMC | ( | TInt | k, |
| TInt | MCMCReps | ||
| ) | [private] |
Optimize the cluster assignments for the k'th cluster.
| k | community index on which to run MCMC |
| MCMCReps | number of iterations of MCMC |
Definition at line 358 of file circles.h.
{
TRnd t;
THash<TInt, TFlt> CostNotIncludeHash;
THash<TInt, TFlt> CostIncludeHash;
TVec<TInt> NewLabel;
int csize = 0;
for (int i = 0; i < GraphAttributes->NodeIDs.Len(); i++) {
if (CHat[k].IsKey(GraphAttributes->NodeIDs[i])) {
NewLabel.Add(0);
} else {
NewLabel.Add(1);
}
if (CHat[k].IsKey(GraphAttributes->NodeIDs[i])) {
csize++;
}
}
// Compute edge log-probabilities.
for (THashKeyDatI<TIntPr, TIntIntH> it = GraphAttributes->EdgeFeatures.BegI();
not it.IsEnd(); it++) {
TIntPr e = it.GetKey();
TInt kv = GraphAttributes->EdgeFeatures.GetKeyId(e);
TInt Src = e.Val1;
TInt Dst = e.Val2;
TBool Exists = GraphAttributes->G->IsEdge(Src, Dst);
TFlt InnerProduct = Inner(it.GetDat(), Theta + k * GraphAttributes->NFeatures);
TFlt Other = 0;
for (int l = 0; l < K; l++) {
if (l == k) {
continue;
}
TFlt d = (CHat[l].IsKey(Src) and CHat[l].IsKey(Dst)) ? 1 : -1;
Other += d * Inner(it.GetDat(), Theta + l * GraphAttributes->NFeatures);
}
TFlt CostNotInclude;
TFlt CostInclude;
if (Exists) {
CostNotInclude = -Other + InnerProduct + log(1 + exp(Other - InnerProduct));
CostInclude = -Other - InnerProduct + log(1 + exp(Other + InnerProduct));
} else {
CostNotInclude = log(1 + exp(Other - InnerProduct));
CostInclude = log(1 + exp(Other + InnerProduct));
}
CostNotIncludeHash.AddDat(kv) = -CostNotInclude;
CostIncludeHash.AddDat(kv) = -CostInclude;
}
// Run MCMC using precomputed probabilities
TFlt InitialTemperature = 1.0; // Initial temperature
for (int r = 2; r < MCMCReps + 2; r++) {
TFlt Temperature = InitialTemperature / log(r);
for (int n = 0; n < GraphAttributes->NodeIDs.Len(); n++) {
TFlt l0 = 0;
TFlt l1 = 0;
for (int np = 0; np < GraphAttributes->NodeIDs.Len(); np++) {
if (n == np) {
continue;
}
TIntPr ed(GraphAttributes->NodeIDs[n], GraphAttributes->NodeIDs[np]);
if (ed.Val1 > ed.Val2) {
ed = TIntPr(ed.Val2, ed.Val1);
}
TInt kv = GraphAttributes->EdgeFeatures.GetKeyId(ed);
TFlt m0 = CostNotIncludeHash.GetDat(kv);
if (NewLabel[np] == 0) {
l0 += m0;
l1 += m0;
} else {
l0 += m0;
l1 += CostIncludeHash.GetDat(kv);
}
}
TFlt LogLikelihoodDiff = exp(l1 - l0);
TFlt AcceptProb = pow(LogLikelihoodDiff, 1.0 / Temperature);
if (t.GetUniDev() < AcceptProb) {
NewLabel[n] = 1;
} else {
NewLabel[n] = 0;
}
}
}
TIntSet Result;
for (int i = 0; i < GraphAttributes->NodeIDs.Len(); i++) {
if (NewLabel[i]) {
Result.AddKey(GraphAttributes->NodeIDs[i]);
}
}
return Result;
}
| void TCluster::Train | ( | TInt | OuterReps, |
| TInt | GradientReps, | ||
| TInt | MCMCReps | ||
| ) |
Train the model to predict K Clusters.
| OuterReps | number of coordinate ascent iterations |
| GradientReps | number of iterations of gradient ascent |
| MCMCReps | number of iterations of MCMC |
Definition at line 292 of file circles.h.
{
// Learning rate
TFlt Increment = 1.0 / (1.0 * GraphAttributes->NodeIDs.Len() * GraphAttributes->NodeIDs.Len());
TRnd t;
for (int OuterRep = 0; OuterRep < OuterReps; OuterRep++) {
// If it's the first iteration or the solution is degenerate,
// randomly initialize the weights and Clusters
for (int k = 0; k < K; k++) {
if (OuterRep == 0 or CHat[k].Empty() or CHat[k].Len()
== GraphAttributes->NodeIDs.Len()) {
CHat[k].Clr();
for (int i = 0; i < GraphAttributes->NodeIDs.Len(); i++) {
if (t.GetUniDevInt(2) == 0) {
CHat[k].AddKey(GraphAttributes->NodeIDs[i]);
}
}
for (int i = 0; i < GraphAttributes->NFeatures; i++) {
Theta[k * GraphAttributes->NFeatures + i] = 0;
}
// Just set a single Feature to 1 as a random initialization.
Theta[k * GraphAttributes->NFeatures + t.GetUniDevInt(GraphAttributes->NFeatures)] = 1.0;
Theta[k * GraphAttributes->NFeatures] = 1;
}
}
for (int k = 0; k < K; k++) {
CHat[k] = MCMC(k, MCMCReps);
}
TFlt llPrevious = LogLikelihood();
// Perform gradient ascent
TFlt ll = 0;
for (int gradientRep = 0; gradientRep < GradientReps; gradientRep++) {
Gradient();
for (int i = 0; i < K * GraphAttributes->NFeatures; i++) {
Theta[i] += Increment * Derivative[i];
}
printf(".");
fflush( stdout);
ll = LogLikelihood();
// If we reduced the objective, undo the update and stop.
if (ll < llPrevious) {
for (int i = 0; i < K * GraphAttributes->NFeatures; i++) {
Theta[i] -= Increment * Derivative[i];
}
ll = llPrevious;
break;
}
llPrevious = ll;
}
printf("\nIteration %d, ll = %f\n", OuterRep + 1, (double) ll);
}
}
TVec<TIntSet> TCluster::CHat [private] |
TFlt* TCluster::Derivative [private] |
PGraphAttributes TCluster::GraphAttributes [private] |
TInt TCluster::K [private] |
TFlt TCluster::Lambda [private] |
TFlt* TCluster::Theta [private] |