|
SNAP Library 2.1, User Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <html.h>
Public Types | |
| typedef TStrKdV | TArgNmValV |
Public Member Functions | |
| THtmlLx (const PSIn &_SIn, const bool &_DoParseArg=true) | |
| THtmlLx & | operator= (const THtmlLx &) |
| void | PutCh (const char &_Ch) |
| void | PutStr (const TStr &Str) |
| THtmlLxSym | GetSym () |
| PHtmlTok | GetTok (const bool &DoUc=true) |
| TStr | GetPreSpaceStr () const |
| int | GetArgs () const |
| TStr | GetArgNm (const int &ArgN) const |
| TStr | GetArgVal (const int &ArgN) const |
| bool | IsArg (const TStr &ArgNm) const |
| TStr | GetArg (const TStr &ArgNm, const TStr &DfArgVal=TStr()) const |
| void | PutArg (const TStr &ArgNm, const TStr &ArgVal) |
| TStr | GetFullBTagStr () const |
| void | MoveToStrOrEof (const TStr &Str) |
| void | MoveToBTagOrEof (const TStr &TagNm) |
| void | MoveToBTag2OrEof (const TStr &TagNm1, const TStr &TagNm2) |
| void | MoveToBTag3OrEof (const TStr &TagNm1, const TStr &TagNm2, const TStr &TagNm3) |
| void | MoveToBTagOrETagOrEof (const TStr &BTagNm, const TStr &ETagNm) |
| void | MoveToBTagArgOrEof (const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal) |
| void | MoveToBTagArg2OrEof (const TStr &TagNm, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &ArgNm2, const TStr &ArgVal2, const bool &AndOpP=true) |
| void | MoveToBTagOrEof (const TStr &TagNm1, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &TagNm2, const TStr &ArgNm2, const TStr &ArgVal2) |
| void | MoveToETagOrEof (const TStr &TagNm) |
| TStr | GetTextOnlyStrToEof () |
| TStr | GetStrToBTag (const TStr &TagNm, const bool &TxtOnlyP=false) |
| TStr | GetStrToBTag (const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal, const bool &TxtOnlyP=false) |
| TStr | GetStrToETag (const TStr &TagNm, const bool &TxtOnlyP=false) |
| TStr | GetStrToETag2 (const TStr &TagNm1, const TStr &TagNm2, const bool &TxtOnlyP=false) |
| TStr | GetStrInTag (const TStr &TagNm, const bool &TxtOnlyP=false) |
| TStr | GetHRefBeforeStr (const TStr &Str) |
| bool | IsGetBTag (const TStr &TagNm) |
| bool | IsGetETag (const TStr &TagNm) |
Static Public Member Functions | |
| static TStr | GetSymStr (const THtmlLxSym &Sym) |
| static TStr | GetEscapedStr (const TChA &ChA) |
| static TStr | GetAsciiStr (const TChA &ChA, const char &GenericCh='_') |
| static void | GetTokStrV (const TStr &Str, TStrV &TokStrV) |
| static TStr | GetNoTag (const TStr &Str) |
Public Attributes | |
| THtmlLxSym | Sym |
| int | SymBChX |
| int | SymEChX |
| TChA | ChA |
| TChA | UcChA |
| TChA | SymChA |
| int | PreSpaces |
| TChA | PreSpaceChA |
| TArgNmValV | ArgNmValV |
Private Member Functions | |
| void | GetCh () |
| void | GetEscCh () |
| void | GetMetaTag () |
| void | GetTag () |
Private Attributes | |
| PSIn | SIn |
| TSIn & | RSIn |
| bool | DoParseArg |
| TChA | ChStack |
| char | Ch |
| int | ChX |
| bool | EscCh |
| TChA | EscChA |
| TChA | ArgNm |
| TChA | ArgVal |
Static Private Attributes | |
| static THtmlLxChDef | ChDef |
| typedef TStrKdV THtmlLx::TArgNmValV |
| THtmlLx::THtmlLx | ( | const PSIn & | _SIn, |
| const bool & | _DoParseArg = true |
||
| ) | [inline] |
| TStr THtmlLx::GetArg | ( | const TStr & | ArgNm, |
| const TStr & | DfArgVal = TStr() |
||
| ) | const [inline] |
| TStr THtmlLx::GetArgNm | ( | const int & | ArgN | ) | const [inline] |
| int THtmlLx::GetArgs | ( | ) | const [inline] |
| TStr THtmlLx::GetArgVal | ( | const int & | ArgN | ) | const [inline] |
| TStr THtmlLx::GetAsciiStr | ( | const TChA & | ChA, |
| const char & | GenericCh = '_' |
||
| ) | [static] |
| void THtmlLx::GetCh | ( | ) | [inline, private] |
| TStr THtmlLx::GetEscapedStr | ( | const TChA & | ChA | ) | [static] |
Definition at line 568 of file html.cpp.
{
TChA EscapedChA;
for (int ChN=0; ChN<ChA.Len(); ChN++){
char Ch=ChA[ChN];
switch (Ch){
case '"': EscapedChA+="""; break;
case '&': EscapedChA+="&"; break;
case '\'': EscapedChA+="'"; break;
case '<': EscapedChA+="<"; break;
case '>': EscapedChA+=">"; break;
default: EscapedChA+=Ch;
}
}
return EscapedChA;
}
| void THtmlLx::GetEscCh | ( | ) | [private] |
Definition at line 195 of file html.cpp.
{
GetCh();
EscCh=(Ch=='&');
if (EscCh){
EscChA.Clr(); EscChA.AddCh(Ch); GetCh();
if (Ch=='#'){
EscChA.AddCh(Ch); GetCh();
if (('0'<=Ch)&&(Ch<='9')){
do {EscChA.AddCh(Ch); GetCh();} while (('0'<=Ch)&&(Ch<='9'));
if (Ch==';'){GetCh();}
PutStr(ChDef.GetEscStr(EscChA));
} else {
PutCh('#'); PutCh('&');
}
} else
if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){
do {
EscChA.AddCh(Ch); GetCh();
} while ((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')));
if (Ch==';'){
GetCh(); PutStr(ChDef.GetEscStr(EscChA));
} else {
PutStr(EscChA);
}
} else {
PutCh('&');
}
}
}
| TStr THtmlLx::GetFullBTagStr | ( | ) | const |
| TStr THtmlLx::GetHRefBeforeStr | ( | const TStr & | Str | ) |
| void THtmlLx::GetMetaTag | ( | ) | [private] |
| TStr THtmlLx::GetNoTag | ( | const TStr & | Str | ) | [static] |
| TStr THtmlLx::GetPreSpaceStr | ( | ) | const [inline] |
Definition at line 132 of file html.h.
{
return TStr::GetSpaceStr(PreSpaces);}
| TStr THtmlLx::GetStrInTag | ( | const TStr & | TagNm, |
| const bool & | TxtOnlyP = false |
||
| ) |
Definition at line 525 of file html.cpp.
{
MoveToBTagOrEof(TagNm);
return GetStrToETag(TagNm, TxtOnlyP);
}
| TStr THtmlLx::GetStrToBTag | ( | const TStr & | TagNm, |
| const bool & | TxtOnlyP = false |
||
| ) |
| TStr THtmlLx::GetStrToBTag | ( | const TStr & | TagNm, |
| const TStr & | ArgNm, | ||
| const TStr & | ArgVal, | ||
| const bool & | TxtOnlyP = false |
||
| ) |
| TStr THtmlLx::GetStrToETag | ( | const TStr & | TagNm, |
| const bool & | TxtOnlyP = false |
||
| ) |
| TStr THtmlLx::GetStrToETag2 | ( | const TStr & | TagNm1, |
| const TStr & | TagNm2, | ||
| const bool & | TxtOnlyP = false |
||
| ) |
Definition at line 277 of file html.cpp.
{
// prepare symbol descriptions
ChA.Clr(); UcChA.Clr();
PreSpaces=0; PreSpaceChA.Clr();
ArgNmValV.Clr();
// skip white-space
while (ChDef.IsSpace(Ch)){
if (ChX>0){PreSpaceChA+=Ch; PreSpaces++;} GetEscCh();}
// parse symbol
SymChA.Clr(); SymChA+=Ch; SymBChX=ChX;
switch (ChDef.GetChTy(Ch)){
case hlctAlpha:
Sym=hsyStr;
forever{
do {
ChA.AddCh(Ch); UcChA.AddCh(ChDef.GetUc(Ch)); GetEscCh();
} while (ChDef.IsAlNum(Ch));
if (Ch=='.'){
GetCh();
if (ChDef.IsAlNum(Ch)){ChA.AddCh('.'); UcChA.AddCh('.');}
else {PutCh(Ch); Ch='.'; break;}
} else {break;}
}
break;
case hlctNum:
Sym=hsyNum;
forever{
do {
ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh();
} while (ChDef.IsNum(Ch));
if (Ch=='.'){
GetCh();
if (ChDef.IsAlNum(Ch)){ChA.AddCh('.'); UcChA.AddCh('.');}
else {PutCh(Ch); Ch='.'; break;}
} else if (ChDef.IsAlpha(Ch)){
Sym=hsyStr;
} else {
break;
}
}
break;
case hlctSym:
Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh();
if ((ChA.LastCh()=='.')&&(ChDef.IsAlNum(Ch))){
Sym=hsyStr;
do {
ChA.AddCh(Ch); UcChA.AddCh(ChDef.GetUc(Ch)); GetEscCh();
} while (ChDef.IsAlNum(Ch));
}
break;
case hlctLTag:
if (EscCh){
Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh();
} else {
GetCh();
if (Ch=='!'){GetCh(); GetMetaTag();} else {GetTag();}
}
break;
case hlctRTag:
if (EscCh){
Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh();
} else {
Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh();
}
break;
case hlctEof: Sym=hsyEof; break;
default: Sym=hsyUndef; GetEscCh();
}
// set symbol last-character-position
SymEChX=ChX-1;
// delete last character
if (!SymChA.Empty()){SymChA.Pop();}
// return symbol
return Sym;
}
| TStr THtmlLx::GetSymStr | ( | const THtmlLxSym & | Sym | ) | [static] |
Definition at line 553 of file html.cpp.
{
switch (Sym){
case hsyUndef: return "Undef";
case hsyStr: return "Str";
case hsyNum: return "Num";
case hsySSym: return "SSym";
case hsyUrl: return "Url";
case hsyBTag: return "BTag";
case hsyETag: return "ETag";
case hsyMTag: return "MTag";
case hsyEof: return "Eof";
default: Fail; return TStr();
}
}
| void THtmlLx::GetTag | ( | ) | [private] |
Definition at line 236 of file html.cpp.
{
if (Ch=='/'){Sym=hsyETag; GetCh();} else {Sym=hsyBTag;}
UcChA.AddCh('<');
while (ChDef.IsAlNum(Ch)||(Ch==':')){
UcChA.AddCh(ChDef.GetUc(Ch)); GetCh();}
UcChA.AddCh('>');
ChA=UcChA;
if (DoParseArg){
while ((Ch!='>')&&(Ch!=TCh::EofCh)){
while ((!ChDef.IsAlpha(Ch))&&(Ch!='>')&&(Ch!=TCh::EofCh)){GetCh();}
if (ChDef.IsAlpha(Ch)){
ArgNm.Clr(); ArgVal.Clr();
while (ChDef.IsAlNum(Ch)||(Ch=='-')){ArgNm.AddCh(ChDef.GetUc(Ch)); GetCh();}
while (ChDef.IsWs(Ch)){GetCh();}
if (Ch=='='){
GetCh(); while (ChDef.IsWs(Ch)){GetCh();}
if (Ch=='"'){
GetCh();
while ((Ch!=TCh::EofCh)&&(Ch!='"')&&(Ch!='>')){
if (!ChDef.IsEoln(Ch)){ArgVal.AddCh(Ch);} GetCh();}
if (Ch=='"'){GetCh();}
} else if (Ch=='\''){
GetCh();
while ((Ch!=TCh::EofCh)&&(Ch!='\'')&&(Ch!='>')){
if (!ChDef.IsEoln(Ch)){ArgVal.AddCh(Ch);} GetCh();}
if (Ch=='\''){GetCh();}
} else {
while ((!ChDef.IsWs(Ch))&&(Ch!='>')&&(Ch!=TCh::EofCh)){
ArgVal.AddCh(Ch); GetCh();}
}
ArgNmValV.Add(TStrKd(ArgNm, ArgVal));
}
}
}
} else {
while ((Ch!='>')&&(Ch!=TCh::EofCh)){GetCh();}
}
if (Ch!=TCh::EofCh){GetEscCh();}
}
| PHtmlTok THtmlLx::GetTok | ( | const bool & | DoUc = true | ) |
| void THtmlLx::GetTokStrV | ( | const TStr & | Str, |
| TStrV & | TokStrV | ||
| ) | [static] |
| bool THtmlLx::IsArg | ( | const TStr & | ArgNm | ) | const [inline] |
| bool THtmlLx::IsGetBTag | ( | const TStr & | TagNm | ) |
| bool THtmlLx::IsGetETag | ( | const TStr & | TagNm | ) |
| void THtmlLx::MoveToBTag2OrEof | ( | const TStr & | TagNm1, |
| const TStr & | TagNm2 | ||
| ) |
| void THtmlLx::MoveToBTag3OrEof | ( | const TStr & | TagNm1, |
| const TStr & | TagNm2, | ||
| const TStr & | TagNm3 | ||
| ) |
| void THtmlLx::MoveToBTagArg2OrEof | ( | const TStr & | TagNm, |
| const TStr & | ArgNm1, | ||
| const TStr & | ArgVal1, | ||
| const TStr & | ArgNm2, | ||
| const TStr & | ArgVal2, | ||
| const bool & | AndOpP = true |
||
| ) |
Definition at line 410 of file html.cpp.
{
forever {
GetSym();
if (Sym==hsyEof){break;}
if (AndOpP){
if ((Sym==hsyBTag)&&(UcChA==TagNm)&&
(IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1)&&
(IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)){break;}
} else {
if ((Sym==hsyBTag)&&(UcChA==TagNm)&&
(((IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1))||
((IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)))){break;}
}
}
}
| void THtmlLx::MoveToBTagArgOrEof | ( | const TStr & | TagNm, |
| const TStr & | ArgNm, | ||
| const TStr & | ArgVal | ||
| ) |
| void THtmlLx::MoveToBTagOrEof | ( | const TStr & | TagNm | ) |
| void THtmlLx::MoveToBTagOrETagOrEof | ( | const TStr & | BTagNm, |
| const TStr & | ETagNm | ||
| ) |
| void THtmlLx::MoveToETagOrEof | ( | const TStr & | TagNm | ) |
| void THtmlLx::MoveToStrOrEof | ( | const TStr & | Str | ) |
| void THtmlLx::PutArg | ( | const TStr & | ArgNm, |
| const TStr & | ArgVal | ||
| ) | [inline] |
| void THtmlLx::PutCh | ( | const char & | _Ch | ) | [inline] |
| void THtmlLx::PutStr | ( | const TStr & | Str | ) | [inline] |
TChA THtmlLx::ArgNm [private] |
TChA THtmlLx::ArgVal [private] |
char THtmlLx::Ch [private] |
THtmlLxChDef THtmlLx::ChDef [static, private] |
TChA THtmlLx::ChStack [private] |
int THtmlLx::ChX [private] |
bool THtmlLx::DoParseArg [private] |
bool THtmlLx::EscCh [private] |
TChA THtmlLx::EscChA [private] |
TSIn& THtmlLx::RSIn [private] |
PSIn THtmlLx::SIn [private] |
| int THtmlLx::SymBChX |
| int THtmlLx::SymEChX |