SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <html.h>
Public Member Functions | |
THtmlLxChDef () | |
THtmlLxChDef (TSIn &SIn) | |
void | Save (TSOut &SOut) |
THtmlLxChDef & | operator= (const THtmlLxChDef &) |
int | GetChTy (const char &Ch) const |
bool | IsEoln (const char &Ch) const |
bool | IsWs (const char &Ch) const |
bool | IsSpace (const char &Ch) const |
bool | IsAlpha (const char &Ch) const |
bool | IsNum (const char &Ch) const |
bool | IsAlNum (const char &Ch) const |
bool | IsSym (const char &Ch) const |
bool | IsUrl (const char &Ch) const |
bool | IsUc (const char &Ch) const |
bool | IsLc (const char &Ch) const |
char | GetUc (const char &Ch) const |
char | GetLc (const char &Ch) const |
void | GetUcChA (TChA &ChA) const |
void | GetLcChA (TChA &ChA) const |
TStr | GetUcStr (const TStr &Str) const |
TStr | GetLcStr (const TStr &Str) const |
TStr | GetEscStr (const TStr &Str) const |
Static Public Member Functions | |
static PHtmlLxChDef | Load (TSIn &SIn) |
static PHtmlLxChDef | GetChDef () |
static THtmlLxChDef & | GetChDefRef () |
static TStr | GetCSZFromYuascii (const TChA &ChA) |
static TStr | GetCSZFromWin1250 (const TChA &ChA) |
static TStr | GetWin1250FromYuascii (const TChA &ChA) |
static TStr | GetIsoCeFromYuascii (const TChA &ChA) |
Static Public Attributes | |
static PHtmlLxChDef | ChDef = PHtmlLxChDef(new THtmlLxChDef()) |
Private Member Functions | |
void | SetUcCh (const char &UcCh, const char &LcCh) |
void | SetUcCh (const TStr &Str) |
void | SetChTy (const THtmlLxChTy &ChTy, const TStr &Str) |
void | SetEscStr (const TStr &SrcStr, const TStr &DstStr) |
Private Attributes | |
TCRef | CRef |
TIntV | ChTyV |
TChV | UcChV |
TChV | LcChV |
TStrStrH | EscStrH |
Friends | |
class | TPt< THtmlLxChDef > |
Definition at line 48 of file html.cpp.
References ChTyV, TCh::EofCh, hlctAlpha, hlctEof, hlctLTag, hlctNum, hlctRTag, hlctSpace, hlctSym, TCh::Mn, TCh::Mx, TVec< TVal, TSizeTy >::PutAll(), SetChTy(), SetEscStr(), and SetUcCh().
: ChTyV(TCh::Vals), UcChV(TCh::Vals), LcChV(TCh::Vals), EscStrH(100){ // Character-Types ChTyV.PutAll(TInt(hlctSpace)); SetChTy(hlctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); SetChTy(hlctAlpha, "abcdefghijklmnopqrstuvwxyz"); SetChTy(hlctAlpha, "@_"); SetChTy(hlctNum, "0123456789"); SetChTy(hlctSym, "`~!#$%^&*()-=+[{]}\\|;:'\",<.>/?"); SetChTy(hlctLTag, "<"); SetChTy(hlctRTag, ">"); SetChTy(hlctEof, TStr(TCh::EofCh)); for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){ if ((Ch<0)||(127<Ch)){SetChTy(hlctAlpha, TStr(TCh(char(Ch))));}} //SetChTy(hlctSpace, TStr(TCh(char(160)))); // Upper-Case {for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){ SetUcCh(char(Ch), char(Ch));}} SetUcCh("Aa"); SetUcCh("\xc0\xe0"); SetUcCh("\xc1\xe1"); SetUcCh("\xc2\xe2"); SetUcCh("\xc3\xe3"); SetUcCh("\xc4\xe4"); SetUcCh("\xc5\xe5"); SetUcCh("\xc6\xe6"); SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("\xc7\xe7"); SetUcCh("Dd"); SetUcCh("\xd0\xf0"); SetUcCh("Ee"); SetUcCh("\xc8\xe8"); SetUcCh("\xc9\xe9"); SetUcCh("\xca\xea"); SetUcCh("\xcb\xeb"); SetUcCh("Ff"); SetUcCh("Gg"); SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("\xcc\xec"); SetUcCh("\xcd\xed"); SetUcCh("\xce\xee"); SetUcCh("\xcf\xef"); SetUcCh("Jj"); SetUcCh("Kk"); SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("\xd1\xf1"); SetUcCh("Oo"); SetUcCh("\xd2\xf2"); SetUcCh("\xd3\xf3"); SetUcCh("\xd4\xf4"); SetUcCh("\xd5\xf5"); SetUcCh("\xd6\xf6"); SetUcCh("\xd8\xf8"); SetUcCh("Pp"); SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("\x8a\x9a"); SetUcCh("Tt"); SetUcCh("Uu"); SetUcCh("\xd9\xf9"); SetUcCh("\xda\xfa"); SetUcCh("\xdb\xfb"); SetUcCh("\xdc\xfc"); SetUcCh("Vv"); SetUcCh("Ww"); SetUcCh("Xx"); SetUcCh("Yy\xff"); SetUcCh("\xdd\xfd"); SetUcCh("Zz"); SetUcCh("\x8e\x9e"); // ISO-CE //SetUcCh(uchar(169), uchar(185)); /*Sh - \xa9\xb9*/ //SetUcCh(uchar(174), uchar(190)); /*Zh - \xae\xbe*/ //SetUcCh(uchar(200), uchar(232)); /*Ch - \xc8\xe8*/ //SetUcCh(uchar(198), uchar(230)); /*Cs - \xc6\xe6*/ //SetUcCh(uchar(208), uchar(240)); /*Dz - \xd0\xf0*/ // Annoying Unicode-characters //SetChTy(hlctSpace, "\xc2\xef"); // Escape-Sequences SetEscStr(""", "\""); SetEscStr("&", "&"); SetEscStr("<", "<"); SetEscStr(">", ">"); SetEscStr(" ", " "); SetEscStr("ä", "\xe4"); SetEscStr("Ä", "\xc4"); SetEscStr("ö", "\xf6"); SetEscStr("Ö", "\xd6"); SetEscStr("ü", "\xfc"); SetEscStr("Ü", "\xdc"); SetEscStr("å", "\xe5"); SetEscStr("Å", "\xc5"); SetEscStr("ø", "\xf8"); SetEscStr("Ø", "\xd8"); SetEscStr("&Aelig", "\xc6"); SetEscStr("æ", "\xe6"); SetEscStr("é", "e"); SetEscStr("É", "E"); SetEscStr("è", "e"); SetEscStr("È", "E"); SetEscStr("à", "a"); SetEscStr("À", "A"); }
THtmlLxChDef::THtmlLxChDef | ( | TSIn & | SIn | ) | [inline] |
static PHtmlLxChDef THtmlLxChDef::GetChDef | ( | ) | [inline, static] |
static THtmlLxChDef& THtmlLxChDef::GetChDefRef | ( | ) | [inline, static] |
int THtmlLxChDef::GetChTy | ( | const char & | Ch | ) | const [inline] |
TStr THtmlLxChDef::GetCSZFromWin1250 | ( | const TChA & | ChA | ) | [static] |
Definition at line 132 of file html.cpp.
References TChA::Len().
{ TChA DstChA; for (int ChN=0; ChN<ChA.Len(); ChN++){ const uchar Ch=ChA[ChN]; switch (Ch){ case 232: DstChA+='c'; break; case 200: DstChA+='C'; break; case 154: DstChA+='s'; break; case 138: DstChA+='S'; break; case 158: DstChA+='z'; break; case 142: DstChA+='Z'; break; default: DstChA+=Ch; } } return DstChA; }
TStr THtmlLxChDef::GetCSZFromYuascii | ( | const TChA & | ChA | ) | [static] |
Definition at line 111 of file html.cpp.
References TChA::Len().
{ TChA DstChA; for (int ChN=0; ChN<ChA.Len(); ChN++){ char Ch=ChA[ChN]; switch (Ch){ case '~': DstChA+='c'; break; case '^': DstChA+='C'; break; case '}': DstChA+='c'; break; case ']': DstChA+='C'; break; case '|': DstChA+='d'; break; case '\\': DstChA+='D'; break; case '{': DstChA+='s'; break; case '[': DstChA+='S'; break; case '`': DstChA+='z'; break; case '@': DstChA+='Z'; break; default: DstChA+=Ch; } } return DstChA; }
TStr THtmlLxChDef::GetEscStr | ( | const TStr & | Str | ) | const |
Definition at line 33 of file html.cpp.
References EscStrH, THash< TKey, TDat, THashFunc >::GetKeyId(), and TStr::Len().
Referenced by THtmlLx::GetEscCh().
{ int EscStrId; if ((EscStrId=EscStrH.GetKeyId(Str))!=-1){ return EscStrH[EscStrId]; } else if ((Str.Len()>=2)&&(Str[0]=='&')&&(Str[1]=='#')){ int ChCd=0; for (int ChN=2; ChN<Str.Len(); ChN++){ if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-'0';}} return TStr((char)ChCd); } else { return TStr(' '); } }
TStr THtmlLxChDef::GetIsoCeFromYuascii | ( | const TChA & | ChA | ) | [static] |
Definition at line 170 of file html.cpp.
References TChA::Len().
{ TChA DstChA; for (int ChN=0; ChN<ChA.Len(); ChN++){ char Ch=ChA[ChN]; switch (Ch){ case '~': DstChA+=uchar(232); break; case '^': DstChA+=uchar(200); break; case '}': DstChA+=uchar(230); break; case ']': DstChA+=uchar(198); break; case '|': DstChA+=uchar(240); break; case '\\': DstChA+=uchar(208); break; case '{': DstChA+=uchar(185); break; case '[': DstChA+=uchar(169); break; case '`': DstChA+=uchar(190); break; case '@': DstChA+=uchar(174); break; default: DstChA+=Ch; } } return DstChA; }
char THtmlLxChDef::GetLc | ( | const char & | Ch | ) | const [inline] |
void THtmlLxChDef::GetLcChA | ( | TChA & | ChA | ) | const [inline] |
Definition at line 56 of file html.h.
References TChA::Len(), and TChA::PutCh().
TStr THtmlLxChDef::GetLcStr | ( | const TStr & | Str | ) | const [inline] |
char THtmlLxChDef::GetUc | ( | const char & | Ch | ) | const [inline] |
Definition at line 52 of file html.h.
References TCh::Mn.
Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().
void THtmlLxChDef::GetUcChA | ( | TChA & | ChA | ) | const [inline] |
Definition at line 54 of file html.h.
References TChA::Len(), and TChA::PutCh().
TStr THtmlLxChDef::GetUcStr | ( | const TStr & | Str | ) | const [inline] |
TStr THtmlLxChDef::GetWin1250FromYuascii | ( | const TChA & | ChA | ) | [static] |
Definition at line 149 of file html.cpp.
References TChA::Len().
{ TChA DstChA; for (int ChN=0; ChN<ChA.Len(); ChN++){ char Ch=ChA[ChN]; switch (Ch){ case '~': DstChA+=uchar(232); break; case '^': DstChA+=uchar(200); break; case '}': DstChA+='c'; break; case ']': DstChA+='C'; break; case '|': DstChA+='d'; break; case '\\': DstChA+='D'; break; case '{': DstChA+=uchar(154); break; case '[': DstChA+=uchar(138); break; case '`': DstChA+=uchar(158); break; case '@': DstChA+=uchar(142); break; default: DstChA+=Ch; } } return DstChA; }
bool THtmlLxChDef::IsAlNum | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsAlpha | ( | const char & | Ch | ) | const [inline] |
Definition at line 39 of file html.h.
References hlctAlpha, and TCh::Mn.
Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().
bool THtmlLxChDef::IsEoln | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsLc | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsNum | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsSpace | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsSym | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsUc | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsUrl | ( | const char & | Ch | ) | const [inline] |
bool THtmlLxChDef::IsWs | ( | const char & | Ch | ) | const [inline] |
Definition at line 36 of file html.h.
References TCh::CrCh, TCh::LfCh, and TCh::TabCh.
Referenced by THtmlLx::GetTag().
{ return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
static PHtmlLxChDef THtmlLxChDef::Load | ( | TSIn & | SIn | ) | [inline, static] |
Definition at line 27 of file html.h.
{return new THtmlLxChDef(SIn);}
THtmlLxChDef& THtmlLxChDef::operator= | ( | const THtmlLxChDef & | ) | [inline] |
void THtmlLxChDef::Save | ( | TSOut & | SOut | ) | [inline] |
void THtmlLxChDef::SetChTy | ( | const THtmlLxChTy & | ChTy, |
const TStr & | Str | ||
) | [private] |
void THtmlLxChDef::SetEscStr | ( | const TStr & | SrcStr, |
const TStr & | DstStr | ||
) | [private] |
Definition at line 29 of file html.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), and EscStrH.
Referenced by THtmlLxChDef().
void THtmlLxChDef::SetUcCh | ( | const char & | UcCh, |
const char & | LcCh | ||
) | [private] |
Definition at line 3 of file html.cpp.
References IAssert, LcChV, TCh::Mn, and UcChV.
Referenced by SetUcCh(), and THtmlLxChDef().
{ // update upper-case (more lower cases may have one upper case) IAssert( (UcChV[LcCh-TCh::Mn]==TCh(0))|| (UcChV[LcCh-TCh::Mn]==TCh(LcCh))); UcChV[LcCh-TCh::Mn]=TCh(UcCh); // update lower-case (one upper case may have only one lower case) if ((LcChV[UcCh-TCh::Mn]==TCh(0))||(LcChV[UcCh-TCh::Mn]==TCh(UcCh))){ LcChV[UcCh-TCh::Mn]=TCh(LcCh); } }
void THtmlLxChDef::SetUcCh | ( | const TStr & | Str | ) | [private] |
Definition at line 15 of file html.cpp.
References hlctAlpha, TStr::Len(), SetChTy(), and SetUcCh().
{ // set type of characters as letters SetChTy(hlctAlpha, Str); // first char in string is upper-case, rest are lower-case for (int ChN=1; ChN<Str.Len(); ChN++){ SetUcCh(Str[0], Str[ChN]); } }
friend class TPt< THtmlLxChDef > [friend] |
PHtmlLxChDef THtmlLxChDef::ChDef = PHtmlLxChDef(new THtmlLxChDef()) [static] |
TIntV THtmlLxChDef::ChTyV [private] |
Definition at line 16 of file html.h.
Referenced by SetChTy(), and THtmlLxChDef().
TCRef THtmlLxChDef::CRef [private] |
TStrStrH THtmlLxChDef::EscStrH [private] |
Definition at line 19 of file html.h.
Referenced by GetEscStr(), and SetEscStr().
TChV THtmlLxChDef::LcChV [private] |
TChV THtmlLxChDef::UcChV [private] |