SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "bd.h" 00002 00004 // Url 00005 typedef enum {usUndef, usHttp, usOther} TUrlScheme; 00006 00007 ClassTPV(TUrl, PUrl, TUrlV)//{ 00008 private: 00009 static const TStr UrlHttpPrefixStr; 00010 static const TStr UrlHttpAbsPrefixStr; 00011 TUrlScheme Scheme; 00012 TStr UrlStr, RelUrlStr, BaseUrlStr; 00013 TStr SchemeNm, HostNm; 00014 TStr PortStr, PathStr, SearchStr, FragIdStr; 00015 int PortN; 00016 TStrV PathSegV; 00017 TStr IpNum; 00018 TStr FinalUrlStr, FinalHostNm; 00019 TStr HttpRqStr; 00020 void GetAbs(const TStr& AbsUrlStr); 00021 void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr); 00022 UndefDefaultCopyAssign(TUrl); 00023 public: 00024 TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr()); 00025 static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){ 00026 return PUrl(new TUrl(RelUrlStr, BaseUrlStr));} 00027 ~TUrl(){} 00028 TUrl(TSIn&){Fail;} 00029 static PUrl Load(TSIn&){Fail; return NULL;} 00030 void Save(TSOut&){Fail;} 00031 00032 bool IsOk(const TUrlScheme _Scheme=usUndef) const { 00033 if (_Scheme==usUndef){return Scheme!=usUndef;} 00034 else {return Scheme==_Scheme;}} 00035 TUrlScheme GetScheme(){return Scheme;} 00036 TStr GetUrlStr() const {return UrlStr;} 00037 TStr GetRelUrlStr() const {return RelUrlStr;} 00038 bool IsBaseUrl(){return !BaseUrlStr.Empty();} 00039 TStr GetBaseUrlStr() const {return BaseUrlStr;} 00040 TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;} 00041 TStr GetHostNm() const {EAssert(IsOk()); return HostNm;} 00042 TStr GetDmNm(const int& MxDmSegs=-1) const; 00043 bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); } 00044 TStr GetPortStr() const {EAssert(IsOk()); return PortStr;} 00045 int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;} 00046 TStr GetPathStr() const {EAssert(IsOk()); return PathStr;} 00047 int GetPathSegs() const {return PathSegV.Len();} 00048 TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];} 00049 TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;} 00050 TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;} 00051 00052 bool IsIpNum() const {return !IpNum.Empty();} 00053 void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;} 00054 TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;} 00055 TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();} 00056 00057 bool IsDefFinalUrl() const { 00058 EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();} 00059 TStr GetFinalUrlStr() const { 00060 EAssert(IsDefFinalUrl()); return FinalUrlStr;} 00061 TStr GetAsFinalUrlStr() const { 00062 if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}} 00063 TStr GetFinalHostNm() const { 00064 EAssert(IsDefFinalUrl()); return FinalHostNm;} 00065 TStr GetAsFinalHostNm() const { 00066 if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}} 00067 void DefUrlAsFinal(){ 00068 EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl()); 00069 FinalUrlStr=UrlStr; FinalHostNm=HostNm;} 00070 void DefFinalUrl(const TStr& _FinalHostNm); 00071 00072 void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;} 00073 TStr GetHttpRqStr() const {return HttpRqStr;} 00074 bool IsHttpRqStr() const {return !HttpRqStr.Empty();} 00075 void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){ 00076 HttpRqStr.ChangeStr(SrcStr, DstStr);} 00077 00078 bool IsInHost(const TStr& _HostNm) const { 00079 EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());} 00080 bool IsInPath(const TStr& _PathStr) const { 00081 EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());} 00082 void ToLcPath(); 00083 00084 static bool IsAbs(const TStr& UrlStr); 00085 static bool IsScript(const TStr& UrlStr); 00086 static bool IsSite(const TStr& UrlStr); 00087 00088 static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr, 00089 const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix); 00090 static TStr GetUrlSearchStr(const TStr& Str); 00091 static TStr DecodeUrlStr(const TStr& UrlStr); 00092 static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1); 00093 static TStr GetTopDownDocNm( 00094 const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false); 00095 }; 00096 typedef TPair<TInt, PUrl> TIdUrlPr; 00097 typedef TQQueue<TIdUrlPr> TIdUrlPrQ; 00098 typedef THash<TInt, PUrl> TIdToUrlH; 00099 00101 // Url-Environment 00102 ClassTP(TUrlEnv, PUrlEnv)//{ 00103 private: 00104 TStr BaseUrlStr; 00105 TStrV KeyNmV; 00106 TStrStrVH KeyNmToValH; 00107 public: 00108 TUrlEnv(): 00109 KeyNmV(), KeyNmToValH(10){} 00110 TUrlEnv(const TUrlEnv& UrlEnv): 00111 KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){} 00112 static PUrlEnv New(){return new TUrlEnv();} 00113 static PUrlEnv New(const TStr& BaseUrlStr, 00114 const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(), 00115 const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(), 00116 const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(), 00117 const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){ 00118 PUrlEnv UrlEnv=New(); 00119 UrlEnv->PutBaseUrlStr(BaseUrlStr); 00120 if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);} 00121 if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);} 00122 if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);} 00123 if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);} 00124 return UrlEnv;} 00125 ~TUrlEnv(){} 00126 TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){} 00127 static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);} 00128 void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);} 00129 00130 TUrlEnv& operator=(const TUrlEnv& Env){ 00131 if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;} 00132 return *this;} 00133 00134 // base url 00135 void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;} 00136 TStr GetBaseUrlStr() const {return BaseUrlStr;} 00137 00138 // adding key-value 00139 void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){ 00140 if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} 00141 KeyNmToValH.GetDat(KeyNm).Clr(); 00142 KeyNmToValH.GetDat(KeyNm).Add(ValStr);} 00143 void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){ 00144 if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} 00145 KeyNmToValH.GetDat(KeyNm).Add(ValStr);} 00146 00147 // key retrieval 00148 bool Empty() const {return KeyNmV.Empty();} 00149 int GetKeys() const {return KeyNmV.Len();} 00150 bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;} 00151 int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);} 00152 TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];} 00153 00154 // value retrieval 00155 int GetVals(const int& KeyN) const { 00156 return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();} 00157 int GetVals(const TStr& KeyNm) const { 00158 return KeyNmToValH.GetDat(KeyNm).Len();} 00159 TStr GetVal(const int& KeyN, const int& ValN=0) const { 00160 return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];} 00161 TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const { 00162 if (KeyNmToValH.IsKey(KeyNm)){ 00163 return KeyNmToValH.GetDat(KeyNm)[ValN];} 00164 else {return DfVal;}} 00165 00166 // full-url-string 00167 TStr GetFullUrlStr() const; 00168 00169 static PUrlEnv MkClone(const PUrlEnv& UrlEnv); 00170 }; 00171