SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "bd.h" 00002 00004 // Mathematics-Utilities 00005 class TMath{ 00006 public: 00007 static double E; 00008 static double Pi; 00009 static double LogOf2; 00010 00011 static double Inv(const double& x){IAssert(x!=0.0); return (1.0/x);} 00012 static double Sqr(const double& x){return x*x;} 00013 static double Sqrt(const double& x){IAssert(!(x<0.0)); return sqrt(x);} 00014 static double Log(const double& Val){return log(Val);} 00015 static double Log2(const double& Val){return log(Val)/LogOf2;} 00016 static double Round(const double& Val){ 00017 return (Val>0)?floor(Val+0.5):ceil(Val-0.5);} 00018 static double Round(const double & Val, int Decs){ 00019 const double pwr=pow(10.0, Decs); return Round(Val * pwr) / pwr;} 00020 static int Fac(const int& Val){ 00021 if (Val<=1){return 1;} else {return Val*Fac(Val-1);}} 00022 static int Choose(const int& N, const int& K){ // binomial coefficient 00023 return Fac(N)/(Fac(K)*Fac(N-K)); } 00024 static uint Pow2(const int& pow){return uint(1u<<pow);} 00025 static double Power(const double& Base, const double& Exponent){ 00026 return exp(log(Base)*Exponent);} 00027 00028 template <typename T> 00029 static int Sign(const T& Val){return Val<0?-1:(Val>0?1:0);} 00030 00031 template <class T> 00032 static const T& Mx(const T& LVal, const T& RVal) { 00033 return LVal > RVal ? LVal : RVal;} 00034 00035 template <class T> 00036 static const T& Mn(const T& LVal, const T& RVal){ 00037 return LVal < RVal ? LVal : RVal;} 00038 00039 template <class T> 00040 static const T& Mx(const T& Val1, const T& Val2, const T& Val3) { 00041 if (Val1 > Val2) { 00042 if (Val1 > Val3) return Val1; 00043 else return Val3; 00044 } else { 00045 if (Val2 > Val3) return Val2; 00046 else return Val3; 00047 } 00048 } 00049 00050 template <class T> 00051 static const T& Mn(const T& Val1, const T& Val2, const T& Val3) { 00052 if(Val1 < Val2) { 00053 if (Val1 < Val3) return Val1; 00054 else return Val3; 00055 } else { 00056 if (Val2 < Val3) return Val2; 00057 else return Val3; 00058 } 00059 } 00060 00061 template <class T> 00062 static const T& Median(const T& Val1, const T& Val2, const T& Val3) { 00063 if (Val1 < Val2) { 00064 if (Val2 < Val3) return Val2; 00065 else if (Val3 < Val1) return Val1; 00066 else return Val3; 00067 } else { 00068 if (Val1 < Val3) return Val1; 00069 else if (Val3 < Val2) return Val2; 00070 else return Val3; 00071 } 00072 } 00073 00074 template <class T> 00075 static const T& InRange(const T& Val, const T& Mn, const T& Mx) { 00076 IAssert(Mn <= Mx); return Val < Mn ? Mn : (Val > Mx ? Mx : Val);} 00077 00078 template <class T> 00079 static bool IsInRange(const T& Val, const T& Mn, const T& Mx) { 00080 IAssert(Mn <= Mx); return Val >= Mn && Val <= Mx;} 00081 00082 template <class T> 00083 static bool IsInEps(const T& Val, const T& Eps) { 00084 return Val >= -Eps && Val <= Eps;} 00085 }; 00086 00088 // Special-Functions 00089 class TSpecFunc{ 00090 public: 00091 static void GammaPSeries/*gser*/( 00092 double& gamser, const double& a, const double& x, double& gln); 00093 static void GammaQContFrac/*gcf*/( 00094 double& gammcf, const double& a, const double& x, double& gln); 00095 static double GammaQ/*gammq*/(const double& a, const double& x); 00096 static double LnGamma/*gammln*/(const double& xx); 00097 static double BetaCf/*betacf*/( 00098 const double& a, const double& b, const double& x); 00099 static double BetaI(const double& a, const double& b, const double& x); 00100 00101 static void LinearFit( // Y = A + B*X 00102 const TVec<TFltPr>& XY, double& A, double& B, 00103 double& SigA, double& SigB, double& Chi2, double& R2); 00104 static void PowerFit( // Y = A * X^B 00105 const TVec<TFltPr>& XY, double& A, double& B, 00106 double& SigA, double& SigB, double& Chi2, double& R2); 00107 static void LogFit( // Y = A + B*log(X) 00108 const TVec<TFltPr>& XY, double& A, double& B, 00109 double& SigA, double& SigB, double& Chi2, double& R2); 00110 static void ExpFit( // Y = A * exp(B*X) 00111 const TVec<TFltPr>& XY, double& A, double& B, 00112 double& SigA, double& SigB, double& Chi2, double& R2); 00113 public: 00114 static double LnComb(const int& n, const int& k); 00115 public: 00116 static double Entropy(const TIntV& ValV); 00117 static double Entropy(const TFltV& ValV); 00118 static void EntropyFracDim(const TIntV& ValV, TFltV& EntropyV); 00119 static void EntropyFracDim(const TFltV& ValV, TFltV& EntropyV); 00120 public: 00121 static double EntropyBias(const double& B); // solves for p: B = p*log2(p)+(1-p)log2(1-p) 00122 //MLE of the power-law coefficient 00123 static double GetPowerCoef(const TFltV& XValV, double MinX=-1.0); // values (sampled from the distribution) 00124 static double GetPowerCoef(const TFltPrV& XValCntV, double MinX=-1.0); // (value, count) pairs 00125 }; 00126 00128 // Statistical-Moments 00129 ClassTPV(TMom, PMom, TMomV)//{ 00130 private: 00131 TBool DefP; 00132 TFltPrV ValWgtV; 00133 TFlt SumW, ValSumW; 00134 TInt Vals; 00135 TBool UsableP; 00136 TFlt UnusableVal; 00137 TFlt Mn, Mx; 00138 TFlt Mean, Vari, SDev, SErr; 00139 TFlt Median, Quart1, Quart3; 00140 TFltV DecileV; // 0=min 1=1.decile, ..., 9=9.decile, 10=max 00141 TFltV PercentileV; // 0=min 1=1.percentile, ..., 9=9.percentile, 10=max 00142 public: 00143 TMom(): 00144 DefP(false), ValWgtV(), 00145 SumW(), ValSumW(), Vals(), 00146 UsableP(false), UnusableVal(-1), 00147 Mn(), Mx(), 00148 Mean(), Vari(), SDev(), SErr(), 00149 Median(), Quart1(), Quart3(), 00150 DecileV(), PercentileV(){} 00151 TMom(const TMom& Mom): 00152 DefP(Mom.DefP), ValWgtV(Mom.ValWgtV), 00153 SumW(Mom.SumW), ValSumW(Mom.ValSumW), Vals(Mom.Vals), 00154 UsableP(Mom.UsableP), UnusableVal(Mom.UnusableVal), 00155 Mn(Mom.Mn), Mx(Mom.Mx), 00156 Mean(Mom.Mean), Vari(Mom.Vari), SDev(Mom.SDev), SErr(Mom.SErr), 00157 Median(Mom.Median), Quart1(Mom.Quart1), Quart3(Mom.Quart3), 00158 DecileV(Mom.DecileV), PercentileV(Mom.PercentileV){} 00159 static PMom New(){return PMom(new TMom());} 00160 static void NewV(TMomV& MomV, const int& Moms){ 00161 MomV.Gen(Moms); for (int MomN=0; MomN<Moms; MomN++){MomV[MomN]=New();}} 00162 static void NewVV(TVVec<PMom>& MomVV, const int& XMoms, const int& YMoms){ 00163 MomVV.Gen(XMoms, YMoms); 00164 for (int XMomN=0; XMomN<XMoms; XMomN++){ 00165 for (int YMomN=0; YMomN<YMoms; YMomN++){ 00166 MomVV.At(XMomN, YMomN)=New();}}} 00167 TMom(const TFltV& _ValV); 00168 static PMom New(const TFltV& ValV){ 00169 return PMom(new TMom(ValV));} 00170 TMom(TSIn& SIn): 00171 DefP(SIn), 00172 ValWgtV(SIn), 00173 SumW(SIn), ValSumW(SIn), Vals(SIn), 00174 UsableP(SIn), UnusableVal(SIn), 00175 Mn(SIn), Mx(SIn), 00176 Mean(SIn), Vari(SIn), SDev(SIn), SErr(SIn), 00177 Median(SIn), Quart1(SIn), Quart3(SIn), 00178 DecileV(SIn), PercentileV(SIn){} 00179 static PMom Load(TSIn& SIn){return new TMom(SIn);} 00180 void Save(TSOut& SOut) const { 00181 DefP.Save(SOut); 00182 ValWgtV.Save(SOut); 00183 SumW.Save(SOut); ValSumW.Save(SOut); Vals.Save(SOut); 00184 UsableP.Save(SOut); UnusableVal.Save(SOut); 00185 Mn.Save(SOut); Mx.Save(SOut); 00186 Mean.Save(SOut); Vari.Save(SOut); SDev.Save(SOut); SErr.Save(SOut); 00187 Median.Save(SOut); Quart1.Save(SOut); Quart3.Save(SOut); 00188 DecileV.Save(SOut); PercentileV.Save(SOut);} 00189 00190 TMom& operator=(const TMom& Mom){ 00191 Assert(!DefP); DefP=Mom.DefP; 00192 ValWgtV=Mom.ValWgtV; 00193 SumW=Mom.SumW; ValSumW=Mom.ValSumW; Vals=Mom.Vals; 00194 UsableP=Mom.UsableP; UnusableVal=Mom.UnusableVal; 00195 Mn=Mom.Mn; Mx=Mom.Mx; 00196 Mean=Mom.Mean; Vari=Mom.Vari; SDev=Mom.SDev; SErr=Mom.SErr; 00197 Median=Mom.Median; Quart1=Mom.Quart1; Quart3=Mom.Quart3; 00198 DecileV=Mom.DecileV; PercentileV=Mom.PercentileV; 00199 return *this;} 00200 bool operator==(const TMom& Mom) const { 00201 return Vals==Mom.Vals;} 00202 bool operator<(const TMom& Mom) const { 00203 return Vals<Mom.Vals;} 00204 00205 // define 00206 void Def(); 00207 static void DefV(TMomV& MomV){ 00208 for (int MomN=0; MomN<MomV.Len(); MomN++){MomV[MomN]->Def();}} 00209 static void DefVV(TVVec<PMom>& MomVV){ 00210 for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){ 00211 for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){ 00212 MomVV.At(XMomN, YMomN)->Def();}}} 00213 bool IsDef() const {return DefP;} 00214 00215 // values 00216 void Add(const TFlt& Val, const TFlt& Wgt=1){Assert(!DefP); 00217 ValWgtV.Add(TFltPr(Val, Wgt)); SumW+=Wgt; ValSumW+=Wgt*Val; Vals++;} 00218 double GetWgt() const {return SumW;} 00219 int GetVals() const {return Vals;} 00220 TFlt GetVal(const int& ValN) const {IAssert(!IsDef()); return ValWgtV[ValN].Val1;} 00221 //const TFltV& GetValV() const {IAssert(!IsDef()); return ValV;} //J: 00222 00223 // usability 00224 bool IsUsable() const {Assert(DefP); return UsableP;} 00225 static bool IsUsableV(const TMomV& MomV){ 00226 for (int MomN=0; MomN<MomV.Len(); MomN++){ 00227 if (!MomV[MomN]->IsUsable()){return false;}} 00228 return true;} 00229 static bool IsUsableVV(const TVVec<PMom>& MomVV){ 00230 for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){ 00231 for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){ 00232 if (!MomVV.At(XMomN, YMomN)->IsUsable()){return false;}}} 00233 return true;} 00234 00235 // moments 00236 double GetMn() const {Assert(DefP&&UsableP); return Mn;} 00237 double GetMx() const {Assert(DefP&&UsableP); return Mx;} 00238 double GetExtent() const {Assert(DefP&&UsableP); return Mx-Mn;} 00239 double GetMean() const {Assert(DefP&&UsableP); return Mean;} 00240 double GetVari() const {Assert(DefP&&UsableP); return Vari;} 00241 double GetSDev() const {Assert(DefP&&UsableP); return SDev;} 00242 double GetSErr() const {Assert(DefP&&UsableP); return SErr;} 00243 double GetMedian() const {Assert(DefP&&UsableP); return Median;} 00244 double GetQuart1() const {Assert(DefP&&UsableP); return Quart1;} 00245 double GetQuart3() const {Assert(DefP&&UsableP); return Quart3;} 00246 double GetDecile(const int& DecileN) const { 00247 Assert(DefP&&UsableP); return DecileV[DecileN];} 00248 double GetPercentile(const int& PercentileN) const { 00249 Assert(DefP&&UsableP); return PercentileV[PercentileN];} 00250 double GetByNm(const TStr& MomNm) const; 00251 TStr GetStrByNm(const TStr& MomNm, char* FmtStr=NULL) const; 00252 00253 // strings 00254 TStr GetStr(const char& SepCh=' ', const char& DelimCh=':', 00255 const bool& DecileP=true, const bool& PercentileP=true, const TStr& FmtStr="%g") const; 00256 static TStr GetNmVStr(const TStr& VarPfx, 00257 const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true); 00258 TStr GetValVStr(const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true) const; 00259 }; 00260 typedef TVVec<PMom> TMomVV; 00261 typedef THash<TInt, PMom> TIntMomH; 00262 typedef THash<TInt, TMomV> TIntMomVH; 00263 typedef THash<TInt, TMomVV> TIntMomVVH; 00264 00266 // Correlation 00267 ClassTP(TCorr, PCorr)//{ 00268 private: 00269 int ValVLen; 00270 double CorrCf; 00271 double CorrCfPrb; 00272 double FisherZ; 00273 public: 00274 TCorr(){} 00275 TCorr(const TFltV& ValV1, const TFltV& ValV2); 00276 static PCorr New(const TFltV& ValV1, const TFltV& ValV2){ 00277 return PCorr(new TCorr(ValV1, ValV2));} 00278 TCorr(TSIn&){Fail;} 00279 static PCorr Load(TSIn& SIn){return new TCorr(SIn);} 00280 void Save(TSOut&){Fail;} 00281 00282 TCorr& operator=(const TCorr&){Fail; return *this;} 00283 00284 double GetCorrCf() const {return CorrCf;} 00285 double GetCorrCfPrb() const {return CorrCfPrb;} 00286 00287 TStr GetStr() const; 00288 }; 00289 00291 // Statistical Tests 00292 class TStatTest { 00293 private: 00294 static void AveVar(const TFltV& ValV, double& Ave, double& Var); 00295 static double KsProb(const double& Alam); 00296 public: 00297 static void ChiSquareOne( 00298 const TFltV& ObservedBinV, const TFltV& ExpectedBinV, 00299 double& ChiSquareVal, double& SignificancePrb); 00300 static void ChiSquareTwo( 00301 const TFltV& ObservedBin1V, const TFltV& ObservedBin2V, 00302 double& ChiSquareVal, double& SignificancePrb); 00303 00304 static void TTest( 00305 const TFltV& ValV1, const TFltV& ValV2, double& TTestVal, double& TTestPrb); 00306 00307 // Kolmogorov-Smirnov (are two distributions the same) 00308 static void KsTest(const TFltV& ValV1, const TFltV& ValV2, double& DStat, double& PVal); 00309 static void KsTest(const TFltPrV& ValCntV1, const TFltPrV& ValCntV2, double& DStat, double& PVal); 00310 }; 00311 00313 // Combinations 00314 ClassTP(TComb, PComb)//{ 00315 public: 00316 int Items; 00317 int Order; 00318 int CombN; 00319 TIntV ItemV; 00320 public: 00321 TComb(): Items(-1), Order(-1), CombN(-1), ItemV(){} 00322 TComb(const int& _Items, const int& _Order): 00323 Items(_Items), Order(_Order), CombN(0), ItemV(){ 00324 IAssert((Order>0)&&(Order<=Items));} 00325 static PComb New(const int& Items, const int& Order){ 00326 return PComb(new TComb(Items, Order));} 00327 ~TComb(){} 00328 TComb(TSIn&){Fail;} 00329 static PComb Load(TSIn& SIn){return new TComb(SIn);} 00330 void Save(TSOut&){Fail;} 00331 00332 TComb& operator=(const TComb&){Fail; return *this;} 00333 00334 bool GetNext(); 00335 TIntV& GetItemV(){return ItemV;} 00336 int GetCombN() const {return CombN;} 00337 int GetCombs() const; 00338 void Wr(); 00339 }; 00340 00342 // Linear-Regression 00343 ClassTP(TLinReg, PLinReg)//{ 00344 public: 00345 TFltVV XVV; 00346 TFltV YV; 00347 TFltV SigV; 00348 int Recs, Vars; 00349 TFltVV CovarVV; // 1 based 00350 TFltV CfV; // 1 based 00351 double ChiSq; 00352 void GetXV(const int RecN, TFltV& VarV) const { 00353 VarV.Gen(Vars+1); 00354 for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);} 00355 } 00356 double GetY(const int RecN) const {return YV[RecN-1];} 00357 double GetSig(const int RecN) const {return SigV[RecN-1];} 00358 void NR_covsrt(TFltVV& CovarVV, const int& Vars, const TIntV& ia, const int& mfit); 00359 void NR_gaussj(TFltVV& a, const int& n, TFltVV& b, const int& m); 00360 void NR_lfit(); 00361 public: 00362 TLinReg(){} 00363 static PLinReg New( 00364 const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV()); 00365 ~TLinReg(){} 00366 TLinReg(TSIn&){Fail;} 00367 static PLinReg Load(TSIn& SIn){return new TLinReg(SIn);} 00368 void Save(TSOut&){Fail;} 00369 00370 TLinReg& operator=(const TLinReg&){Fail; return *this;} 00371 00372 int GetRecs() const {return Recs;} 00373 int GetVars() const {return Vars;} 00374 00375 double GetCf(const int& VarN) const {return CfV[VarN+1];} 00376 double GetCfUncer(const int& VarN) const { 00377 return sqrt(double(CovarVV.At(VarN+1, VarN+1)));} 00378 double GetCovar(const int& VarN1, const int& VarN2) const { 00379 return CovarVV.At(VarN1+1, VarN2+1);} 00380 00381 double GetChiSq() const {return ChiSq;} 00382 00383 static double LinInterp(const double& x1, const double& y1, 00384 const double& x2, const double& y2, const double& AtX) _CMPWARN{ 00385 if (x1 == x2) return (y1+y2)/2.0; 00386 const double k = (y2 - y1) / (x2 - x1); 00387 return k*(AtX - x1) + y1; 00388 } 00389 00390 void Wr() const; 00391 }; 00392 00394 // Singular-Value-Decomposition 00395 ClassTP(TSvd, PSvd)//{ 00396 public: 00397 TFltVV XVV; 00398 TFltV YV; 00399 TFltV SigV; 00400 int Recs, Vars; 00401 TFltVV CovarVV; // 1 based 00402 TFltV CfV; // 1 based 00403 double ChiSq; 00404 void GetXV(const int RecN, TFltV& VarV) const { 00405 VarV.Gen(Vars+1); 00406 for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);} 00407 } 00408 double GetY(const int RecN) const {return YV[RecN-1];} 00409 double GetSig(const int RecN) const {return SigV[RecN-1];} 00410 static double NR_SIGN(double a, double b){return b >= 0.0 ? fabs(a) : -fabs(a);} 00411 static double NR_FMAX(double maxarg1, double maxarg2){ 00412 return maxarg1 > maxarg2 ? maxarg1 : maxarg2;} 00413 static int NR_IMIN(int iminarg1, int iminarg2){ 00414 return iminarg1 < iminarg2 ? iminarg1 : iminarg2;} 00415 static double NR_pythag(double a, double b); 00416 static void NR_svdcmp(TFltVV& a, int m, int n, TFltV& w, TFltVV& v); 00417 void NR_svbksb( 00418 TFltVV& u, TFltV& w, TFltVV& v, int m, int n, TFltV& b, TFltV& x); 00419 void NR_svdvar(TFltVV& v, int ma, TFltV& w, TFltVV& cvm); 00420 void NR_svdfit(); 00421 public: 00422 TSvd(){} 00423 static PSvd New( 00424 const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV()); 00425 ~TSvd(){} 00426 TSvd(TSIn&){Fail;} 00427 static PSvd Load(TSIn& SIn){return new TSvd(SIn);} 00428 void Save(TSOut&){Fail;} 00429 00430 TSvd& operator=(const TSvd&){Fail; return *this;} 00431 00432 int GetRecs() const {return Recs;} 00433 int GetVars() const {return Vars;} 00434 00435 double GetCf(const int& VarN) const {return CfV[VarN+1];} 00436 double GetCfUncer(const int& VarN) const { 00437 return sqrt(double(CovarVV.At(VarN+1, VarN+1)));} 00438 double GetCovar(const int& VarN1, const int& VarN2) const { 00439 return CovarVV.At(VarN1+1, VarN2+1);} 00440 00441 double GetChiSq() const {return ChiSq;} 00442 00443 void GetCfV(TFltV& _CfV); 00444 void GetCfUncerV(TFltV& CfUncerV); 00445 00446 static void Svd(const TFltVV& InMtx, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV); 00447 static void Svd1Based(const TFltVV& InMtx1, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV); 00448 00449 void Wr() const; 00450 }; 00451 00453 // Histogram 00454 class THist { 00455 private: 00456 TFlt MnVal; 00457 TFlt MxVal; 00458 TIntV BucketV; 00459 TFlt BucketSize; 00460 TInt Vals; 00461 public: 00462 THist() { } 00463 THist(const double& _MnVal, const double& _MxVal, const int& Buckets): 00464 MnVal(_MnVal), MxVal(_MxVal), BucketV(Buckets), 00465 BucketSize(1.01 * double(MxVal - MnVal) / double(Buckets)) { } 00466 00467 void Add(const double& Val, const bool& OnlyInP); 00468 00469 int GetVals() const { return Vals; } 00470 int GetBuckets() const { return BucketV.Len(); } 00471 double GetBucketMn(const int& BucketN) const { return MnVal + BucketN * BucketSize; } 00472 double GetBucketMx(const int& BucketN) const { return MnVal + (BucketN + 1) * BucketSize; } 00473 int GetBucketVal(const int& BucketN) const { return BucketV[BucketN]; } 00474 double GetBucketValPerc(const int& BucketN) const { 00475 return (Vals > 0) ? (double(BucketV[BucketN]) / double(Vals)) : 0.0; } 00476 00477 void SaveStat(const TStr& ValNm, TSOut& FOut) const; 00478 void SaveTxt(const TStr& ValNm, const TStr& FNm) const { 00479 TFOut FOut(FNm); SaveStat(ValNm, FOut); } 00480 };