|
SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <html.h>

Public Member Functions | |
| THtmlLxChDef () | |
| THtmlLxChDef (TSIn &SIn) | |
| void | Save (TSOut &SOut) |
| THtmlLxChDef & | operator= (const THtmlLxChDef &) |
| int | GetChTy (const char &Ch) const |
| bool | IsEoln (const char &Ch) const |
| bool | IsWs (const char &Ch) const |
| bool | IsSpace (const char &Ch) const |
| bool | IsAlpha (const char &Ch) const |
| bool | IsNum (const char &Ch) const |
| bool | IsAlNum (const char &Ch) const |
| bool | IsSym (const char &Ch) const |
| bool | IsUrl (const char &Ch) const |
| bool | IsUc (const char &Ch) const |
| bool | IsLc (const char &Ch) const |
| char | GetUc (const char &Ch) const |
| char | GetLc (const char &Ch) const |
| void | GetUcChA (TChA &ChA) const |
| void | GetLcChA (TChA &ChA) const |
| TStr | GetUcStr (const TStr &Str) const |
| TStr | GetLcStr (const TStr &Str) const |
| TStr | GetEscStr (const TStr &Str) const |
Static Public Member Functions | |
| static PHtmlLxChDef | Load (TSIn &SIn) |
| static PHtmlLxChDef | GetChDef () |
| static THtmlLxChDef & | GetChDefRef () |
| static TStr | GetCSZFromYuascii (const TChA &ChA) |
| static TStr | GetCSZFromWin1250 (const TChA &ChA) |
| static TStr | GetWin1250FromYuascii (const TChA &ChA) |
| static TStr | GetIsoCeFromYuascii (const TChA &ChA) |
Static Public Attributes | |
| static PHtmlLxChDef | ChDef = PHtmlLxChDef(new THtmlLxChDef()) |
Private Member Functions | |
| void | SetUcCh (const char &UcCh, const char &LcCh) |
| void | SetUcCh (const TStr &Str) |
| void | SetChTy (const THtmlLxChTy &ChTy, const TStr &Str) |
| void | SetEscStr (const TStr &SrcStr, const TStr &DstStr) |
Private Attributes | |
| TCRef | CRef |
| TIntV | ChTyV |
| TChV | UcChV |
| TChV | LcChV |
| TStrStrH | EscStrH |
Friends | |
| class | TPt< THtmlLxChDef > |
Definition at line 48 of file html.cpp.
References ChTyV, TCh::EofCh, hlctAlpha, hlctEof, hlctLTag, hlctNum, hlctRTag, hlctSpace, hlctSym, TCh::Mn, TCh::Mx, TVec< TVal >::PutAll(), SetChTy(), SetEscStr(), and SetUcCh().
: ChTyV(TCh::Vals), UcChV(TCh::Vals), LcChV(TCh::Vals), EscStrH(100){ // Character-Types ChTyV.PutAll(TInt(hlctSpace)); SetChTy(hlctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); SetChTy(hlctAlpha, "abcdefghijklmnopqrstuvwxyz"); SetChTy(hlctAlpha, "@_"); SetChTy(hlctNum, "0123456789"); SetChTy(hlctSym, "`~!#$%^&*()-=+[{]}\\|;:'\",<.>/?"); SetChTy(hlctLTag, "<"); SetChTy(hlctRTag, ">"); SetChTy(hlctEof, TStr(TCh::EofCh)); for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){ if ((Ch<0)||(127<Ch)){SetChTy(hlctAlpha, TStr(TCh(char(Ch))));}} //SetChTy(hlctSpace, TStr(TCh(char(160)))); // Upper-Case {for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){ SetUcCh(char(Ch), char(Ch));}} SetUcCh("Aa"); SetUcCh("\xc0\xe0"); SetUcCh("\xc1\xe1"); SetUcCh("\xc2\xe2"); SetUcCh("\xc3\xe3"); SetUcCh("\xc4\xe4"); SetUcCh("\xc5\xe5"); SetUcCh("\xc6\xe6"); SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("\xc7\xe7"); SetUcCh("Dd"); SetUcCh("\xd0\xf0"); SetUcCh("Ee"); SetUcCh("\xc8\xe8"); SetUcCh("\xc9\xe9"); SetUcCh("\xca\xea"); SetUcCh("\xcb\xeb"); SetUcCh("Ff"); SetUcCh("Gg"); SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("\xcc\xec"); SetUcCh("\xcd\xed"); SetUcCh("\xce\xee"); SetUcCh("\xcf\xef"); SetUcCh("Jj"); SetUcCh("Kk"); SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("\xd1\xf1"); SetUcCh("Oo"); SetUcCh("\xd2\xf2"); SetUcCh("\xd3\xf3"); SetUcCh("\xd4\xf4"); SetUcCh("\xd5\xf5"); SetUcCh("\xd6\xf6"); SetUcCh("\xd8\xf8"); SetUcCh("Pp"); SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("\x8a\x9a"); SetUcCh("Tt"); SetUcCh("Uu"); SetUcCh("\xd9\xf9"); SetUcCh("\xda\xfa"); SetUcCh("\xdb\xfb"); SetUcCh("\xdc\xfc"); SetUcCh("Vv"); SetUcCh("Ww"); SetUcCh("Xx"); SetUcCh("Yy\xff"); SetUcCh("\xdd\xfd"); SetUcCh("Zz"); SetUcCh("\x8e\x9e"); // ISO-CE //SetUcCh(uchar(169), uchar(185)); /*Sh - \xa9\xb9*/ //SetUcCh(uchar(174), uchar(190)); /*Zh - \xae\xbe*/ //SetUcCh(uchar(200), uchar(232)); /*Ch - \xc8\xe8*/ //SetUcCh(uchar(198), uchar(230)); /*Cs - \xc6\xe6*/ //SetUcCh(uchar(208), uchar(240)); /*Dz - \xd0\xf0*/ // Annoying Unicode-characters //SetChTy(hlctSpace, "\xc2\xef"); // Escape-Sequences SetEscStr(""", "\""); SetEscStr("&", "&"); SetEscStr("<", "<"); SetEscStr(">", ">"); SetEscStr(" ", " "); SetEscStr("ä", "\xe4"); SetEscStr("Ä", "\xc4"); SetEscStr("ö", "\xf6"); SetEscStr("Ö", "\xd6"); SetEscStr("ü", "\xfc"); SetEscStr("Ü", "\xdc"); SetEscStr("å", "\xe5"); SetEscStr("Å", "\xc5"); SetEscStr("ø", "\xf8"); SetEscStr("Ø", "\xd8"); SetEscStr("&Aelig", "\xc6"); SetEscStr("æ", "\xe6"); SetEscStr("é", "e"); SetEscStr("É", "E"); SetEscStr("è", "e"); SetEscStr("È", "E"); SetEscStr("à", "a"); SetEscStr("À", "A"); }

| THtmlLxChDef::THtmlLxChDef | ( | TSIn & | SIn | ) | [inline] |
| static PHtmlLxChDef THtmlLxChDef::GetChDef | ( | ) | [inline, static] |
| static THtmlLxChDef& THtmlLxChDef::GetChDefRef | ( | ) | [inline, static] |
| int THtmlLxChDef::GetChTy | ( | const char & | Ch | ) | const [inline] |
| TStr THtmlLxChDef::GetCSZFromWin1250 | ( | const TChA & | ChA | ) | [static] |
Definition at line 132 of file html.cpp.
References TChA::Len().
{
TChA DstChA;
for (int ChN=0; ChN<ChA.Len(); ChN++){
const uchar Ch=ChA[ChN];
switch (Ch){
case 232: DstChA+='c'; break;
case 200: DstChA+='C'; break;
case 154: DstChA+='s'; break;
case 138: DstChA+='S'; break;
case 158: DstChA+='z'; break;
case 142: DstChA+='Z'; break;
default: DstChA+=Ch;
}
}
return DstChA;
}

| TStr THtmlLxChDef::GetCSZFromYuascii | ( | const TChA & | ChA | ) | [static] |
Definition at line 111 of file html.cpp.
References TChA::Len().
{
TChA DstChA;
for (int ChN=0; ChN<ChA.Len(); ChN++){
char Ch=ChA[ChN];
switch (Ch){
case '~': DstChA+='c'; break;
case '^': DstChA+='C'; break;
case '}': DstChA+='c'; break;
case ']': DstChA+='C'; break;
case '|': DstChA+='d'; break;
case '\\': DstChA+='D'; break;
case '{': DstChA+='s'; break;
case '[': DstChA+='S'; break;
case '`': DstChA+='z'; break;
case '@': DstChA+='Z'; break;
default: DstChA+=Ch;
}
}
return DstChA;
}

| TStr THtmlLxChDef::GetEscStr | ( | const TStr & | Str | ) | const |
Definition at line 33 of file html.cpp.
References EscStrH, THash< TKey, TDat, THashFunc >::GetKeyId(), and TStr::Len().
Referenced by THtmlLx::GetEscCh().
{
int EscStrId;
if ((EscStrId=EscStrH.GetKeyId(Str))!=-1){
return EscStrH[EscStrId];
} else
if ((Str.Len()>=2)&&(Str[0]=='&')&&(Str[1]=='#')){
int ChCd=0;
for (int ChN=2; ChN<Str.Len(); ChN++){
if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-'0';}}
return TStr((char)ChCd);
} else {
return TStr(' ');
}
}


| TStr THtmlLxChDef::GetIsoCeFromYuascii | ( | const TChA & | ChA | ) | [static] |
Definition at line 170 of file html.cpp.
References TChA::Len().
{
TChA DstChA;
for (int ChN=0; ChN<ChA.Len(); ChN++){
char Ch=ChA[ChN];
switch (Ch){
case '~': DstChA+=uchar(232); break;
case '^': DstChA+=uchar(200); break;
case '}': DstChA+=uchar(230); break;
case ']': DstChA+=uchar(198); break;
case '|': DstChA+=uchar(240); break;
case '\\': DstChA+=uchar(208); break;
case '{': DstChA+=uchar(185); break;
case '[': DstChA+=uchar(169); break;
case '`': DstChA+=uchar(190); break;
case '@': DstChA+=uchar(174); break;
default: DstChA+=Ch;
}
}
return DstChA;
}

| char THtmlLxChDef::GetLc | ( | const char & | Ch | ) | const [inline] |
| void THtmlLxChDef::GetLcChA | ( | TChA & | ChA | ) | const [inline] |
Definition at line 56 of file html.h.
References TChA::Len(), and TChA::PutCh().

| TStr THtmlLxChDef::GetLcStr | ( | const TStr & | Str | ) | const [inline] |
| char THtmlLxChDef::GetUc | ( | const char & | Ch | ) | const [inline] |
Definition at line 52 of file html.h.
References TCh::Mn.
Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().

| void THtmlLxChDef::GetUcChA | ( | TChA & | ChA | ) | const [inline] |
Definition at line 54 of file html.h.
References TChA::Len(), and TChA::PutCh().

| TStr THtmlLxChDef::GetUcStr | ( | const TStr & | Str | ) | const [inline] |
| TStr THtmlLxChDef::GetWin1250FromYuascii | ( | const TChA & | ChA | ) | [static] |
Definition at line 149 of file html.cpp.
References TChA::Len().
{
TChA DstChA;
for (int ChN=0; ChN<ChA.Len(); ChN++){
char Ch=ChA[ChN];
switch (Ch){
case '~': DstChA+=uchar(232); break;
case '^': DstChA+=uchar(200); break;
case '}': DstChA+='c'; break;
case ']': DstChA+='C'; break;
case '|': DstChA+='d'; break;
case '\\': DstChA+='D'; break;
case '{': DstChA+=uchar(154); break;
case '[': DstChA+=uchar(138); break;
case '`': DstChA+=uchar(158); break;
case '@': DstChA+=uchar(142); break;
default: DstChA+=Ch;
}
}
return DstChA;
}

| bool THtmlLxChDef::IsAlNum | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsAlpha | ( | const char & | Ch | ) | const [inline] |
Definition at line 39 of file html.h.
References hlctAlpha, and TCh::Mn.
Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().

| bool THtmlLxChDef::IsEoln | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsLc | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsNum | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsSpace | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsSym | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsUc | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsUrl | ( | const char & | Ch | ) | const [inline] |
| bool THtmlLxChDef::IsWs | ( | const char & | Ch | ) | const [inline] |
Definition at line 36 of file html.h.
References TCh::CrCh, TCh::LfCh, and TCh::TabCh.
Referenced by THtmlLx::GetTag().
{
return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}

| static PHtmlLxChDef THtmlLxChDef::Load | ( | TSIn & | SIn | ) | [inline, static] |
Definition at line 27 of file html.h.
{return new THtmlLxChDef(SIn);}
| THtmlLxChDef& THtmlLxChDef::operator= | ( | const THtmlLxChDef & | ) | [inline] |
| void THtmlLxChDef::Save | ( | TSOut & | SOut | ) | [inline] |
| void THtmlLxChDef::SetChTy | ( | const THtmlLxChTy & | ChTy, |
| const TStr & | Str | ||
| ) | [private] |
| void THtmlLxChDef::SetEscStr | ( | const TStr & | SrcStr, |
| const TStr & | DstStr | ||
| ) | [private] |
Definition at line 29 of file html.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), and EscStrH.
Referenced by THtmlLxChDef().


| void THtmlLxChDef::SetUcCh | ( | const char & | UcCh, |
| const char & | LcCh | ||
| ) | [private] |
Definition at line 3 of file html.cpp.
References IAssert, LcChV, TCh::Mn, and UcChV.
Referenced by SetUcCh(), and THtmlLxChDef().
{
// update upper-case (more lower cases may have one upper case)
IAssert(
(UcChV[LcCh-TCh::Mn]==TCh(0))||
(UcChV[LcCh-TCh::Mn]==TCh(LcCh)));
UcChV[LcCh-TCh::Mn]=TCh(UcCh);
// update lower-case (one upper case may have only one lower case)
if ((LcChV[UcCh-TCh::Mn]==TCh(0))||(LcChV[UcCh-TCh::Mn]==TCh(UcCh))){
LcChV[UcCh-TCh::Mn]=TCh(LcCh);
}
}

| void THtmlLxChDef::SetUcCh | ( | const TStr & | Str | ) | [private] |
Definition at line 15 of file html.cpp.
References hlctAlpha, TStr::Len(), SetChTy(), and SetUcCh().
{
// set type of characters as letters
SetChTy(hlctAlpha, Str);
// first char in string is upper-case, rest are lower-case
for (int ChN=1; ChN<Str.Len(); ChN++){
SetUcCh(Str[0], Str[ChN]);
}
}

friend class TPt< THtmlLxChDef > [friend] |
PHtmlLxChDef THtmlLxChDef::ChDef = PHtmlLxChDef(new THtmlLxChDef()) [static] |
TIntV THtmlLxChDef::ChTyV [private] |
Definition at line 16 of file html.h.
Referenced by SetChTy(), and THtmlLxChDef().
TCRef THtmlLxChDef::CRef [private] |
TStrStrH THtmlLxChDef::EscStrH [private] |
Definition at line 19 of file html.h.
Referenced by GetEscStr(), and SetEscStr().
TChV THtmlLxChDef::LcChV [private] |
TChV THtmlLxChDef::UcChV [private] |