SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
xml.cpp
Go to the documentation of this file.
00001 
00002 // Xml-Object-Saving
00003 TStrStrH TXmlObjSer::TypeNmToTagNmH;
00004 
00005 TStr TXmlObjSer::GetTagNm(const TStr& TypeNm){
00006   TStr& XmlTagNm=TypeNmToTagNmH.AddDat(TypeNm);
00007   if (XmlTagNm.Empty()){
00008     TChA XmlTagChA=TypeNm;
00009     for (int ChN=0; ChN<XmlTagChA.Len(); ChN++){
00010       char Ch=XmlTagChA[ChN];
00011       if (!((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')))){
00012         XmlTagChA.PutCh(ChN, '_');
00013       }
00014     }
00015     while ((XmlTagChA.Len()>0)&&(XmlTagChA.LastCh()=='_')){
00016       XmlTagChA.Pop();}
00017     XmlTagNm=XmlTagChA;
00018   }
00019   return XmlTagNm;
00020 }
00021 
00022 void TXmlObjSer::AssertXmlHd(
00023  const PXmlTok& XmlTok, const TStr& Nm, const TStr& TypeNm){
00024   // check if the token is full
00025   EAssertR(!XmlTok.Empty(), "Xml-Token Empty");
00026   // if name is empty then tag=type else tag=name
00027   if (!Nm.Empty()){
00028     // check if the token is tag
00029     if (!XmlTok->IsTag()){
00030       TStr ArgStr1="Expected: Tag";
00031       TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr();
00032       TExcept::Throw("Invalid Xml-Token", ArgStr1, ArgStr2);
00033     }
00034     if (Nm!="-"){
00035       // check if the tag is correct
00036       if (!XmlTok->IsTag(Nm)){
00037         TStr ArgStr1=TStr("Expected: ")+Nm;
00038         TStr ArgStr2=TStr("Found: ")+XmlTok->GetStr();
00039         TExcept::Throw("Invalid Xml-Tag", ArgStr1, ArgStr2);
00040       }
00041       // check if the type is correct
00042       TStr TypeArgVal=XmlTok->GetStrArgVal("Type");
00043       if (TypeArgVal!=TypeNm){
00044         TStr ArgStr1=TStr("Expected: ")+TypeNm;
00045         TStr ArgStr2=TStr("Found: ")+TypeArgVal;
00046         TExcept::Throw("Invalid Xml-Type", ArgStr1, ArgStr2);
00047       }
00048     }
00049   } else {
00050     // check if the tag is correct
00051     if (!XmlTok->IsTag(TypeNm)){
00052       TStr ArgStr1=TStr("Expected: ")+TypeNm;
00053       TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr();
00054       TExcept::Throw("Invalid Xml-Type-Tag", ArgStr1, ArgStr2);
00055     }
00056   }
00057 }
00058 
00059 bool TXmlObjSer::GetBoolArg(const PXmlTok& XmlTok, const TStr& Nm){
00060   TStr ValStr;
00061   if (XmlTok->IsArg(Nm, ValStr)){
00062     bool Val;
00063     if (ValStr.IsBool(Val)){
00064       return Val;
00065     } else {
00066       TExcept::Throw("Invalid Xml-Argument Boolean-Value", Nm, ValStr);
00067     }
00068   } else {
00069     TExcept::Throw("Xml-Argument Missing", Nm);
00070   }
00071   Fail; return 0;
00072 }
00073 
00074 int TXmlObjSer::GetIntArg(const PXmlTok& XmlTok, const TStr& Nm){
00075   TStr ValStr;
00076   if (XmlTok->IsArg(Nm, ValStr)){
00077     int Val;
00078     if (ValStr.IsInt(Val)){
00079       return Val;
00080     } else {
00081       TExcept::Throw("Invalid Xml-Argument Integer-Value", Nm, ValStr);
00082     }
00083   } else {
00084     TExcept::Throw("Xml-Argument Missing", Nm);
00085   }
00086   Fail; return 0;
00087 }
00088 
00089 int64 TXmlObjSer::GetInt64Arg(const PXmlTok& XmlTok, const TStr& Nm){
00090   TStr ValStr;
00091   if (XmlTok->IsArg(Nm, ValStr)){
00092     int64 Val;
00093     if (ValStr.IsInt64(Val)){
00094       return Val;
00095     } else {
00096       TExcept::Throw("Invalid Xml-Argument Integer64-Value", Nm, ValStr);
00097     }
00098   } else {
00099     TExcept::Throw("Xml-Argument Missing", Nm);
00100   }
00101   Fail; return 0;
00102 }
00103 
00104 double TXmlObjSer::GetFltArg(const PXmlTok& XmlTok, const TStr& Nm){
00105   TStr ValStr;
00106   if (XmlTok->IsArg(Nm, ValStr)){
00107     double Val;
00108     if (ValStr.IsFlt(Val)){
00109       return Val;
00110     } else {
00111       TExcept::Throw("Invalid Xml-Argument Double-Value", Nm, ValStr);
00112     }
00113   } else {
00114     TExcept::Throw("Xml-Argument Missing", Nm);
00115   }
00116   Fail; return 0;
00117 }
00118 
00120 // Xml-Object-Serialization-Tag-Name
00121 TXmlObjSerTagNm::TXmlObjSerTagNm(
00122  TSOut& _SOut, const bool& ETagP,
00123  const TStr& Nm, const TStr& TypeNm,
00124  const TStr& ArgNm, const TStr& ArgVal):
00125   TagNm(), SOut(&_SOut){
00126   if (Nm!="-"){
00127     SOut->PutCh('<');
00128     if (Nm.Empty()){
00129       SOut->PutStr(TagNm=TypeNm);
00130     } else {
00131       SOut->PutStr(TagNm=Nm);
00132       SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"');
00133     }
00134     if (!ArgNm.Empty()){
00135       SOut->PutCh(' '); SOut->PutStr(ArgNm); SOut->PutCh('=');
00136       SOut->PutCh('"'); SOut->PutStr(ArgVal); SOut->PutCh('"');
00137     }
00138     if (ETagP){
00139       SOut->PutCh('/'); TagNm="";}
00140     SOut->PutCh('>');
00141   }
00142 }
00143 
00144 TXmlObjSerTagNm::TXmlObjSerTagNm(
00145  TSOut& _SOut, const bool& ETagP,
00146  const TStr& Nm, const TStr& TypeNm,
00147  const TStr& ArgNm1, const TStr& ArgVal1,
00148  const TStr& ArgNm2, const TStr& ArgVal2,
00149  const TStr& ArgNm3, const TStr& ArgVal3,
00150  const TStr& ArgNm4, const TStr& ArgVal4):
00151   TagNm(), SOut(&_SOut){
00152   if (Nm!="-"){
00153     SOut->PutCh('<');
00154     if (Nm.Empty()){
00155       SOut->PutStr(TagNm=TypeNm);
00156     } else {
00157       SOut->PutStr(TagNm=Nm);
00158       SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"');
00159     }
00160     if (!ArgNm1.Empty()){
00161       SOut->PutCh(' '); SOut->PutStr(ArgNm1); SOut->PutCh('=');
00162       SOut->PutCh('"'); SOut->PutStr(ArgVal1); SOut->PutCh('"');
00163     }
00164     if (!ArgNm2.Empty()){
00165       SOut->PutCh(' '); SOut->PutStr(ArgNm2); SOut->PutCh('=');
00166       SOut->PutCh('"'); SOut->PutStr(ArgVal2); SOut->PutCh('"');
00167     }
00168     if (!ArgNm3.Empty()){
00169       SOut->PutCh(' '); SOut->PutStr(ArgNm3); SOut->PutCh('=');
00170       SOut->PutCh('"'); SOut->PutStr(ArgVal3); SOut->PutCh('"');
00171     }
00172     if (!ArgNm4.Empty()){
00173       SOut->PutCh(' '); SOut->PutStr(ArgNm4); SOut->PutCh('=');
00174       SOut->PutCh('"'); SOut->PutStr(ArgVal4); SOut->PutCh('"');
00175     }
00176     if (ETagP){
00177       SOut->PutCh('/'); TagNm="";}
00178     SOut->PutCh('>');
00179   }
00180 }
00181 
00182 TXmlObjSerTagNm::~TXmlObjSerTagNm(){
00183   if (!TagNm.Empty()){
00184     SOut->PutCh('<'); SOut->PutCh('/'); SOut->PutStr(TagNm); SOut->PutCh('>');
00185   }
00186 }
00187 
00189 // Xml-Chars
00190 void TXmlChDef::SetChTy(TBSet& ChSet, const int& MnCh, const int& MxCh){
00191   IAssert((0<=MnCh)&&((MxCh==-1)||((MnCh<=MxCh)&&(MxCh<Chs))));
00192   ChSet.Incl(MnCh);
00193   for (int Ch=MnCh+1; Ch<=MxCh; Ch++){
00194     ChSet.Incl(Ch);}
00195 }
00196 
00197 void TXmlChDef::SetChTy(TBSet& ChSet, const TStr& Str){
00198   for (int ChN=0; ChN<Str.Len(); ChN++){
00199     uchar Ch=Str[ChN];
00200     ChSet.Incl(Ch);
00201   }
00202 }
00203 
00204 void TXmlChDef::SetEntityVal(const TStr& Nm, const TStr& Val){
00205   EntityNmToValH.AddDat(Nm, Val);
00206 }
00207 
00208 TXmlChDef::TXmlChDef():
00209   Chs(TUCh::Vals),
00210   CharChSet(), CombChSet(), ExtChSet(),
00211   LetterChSet(), DigitChSet(), NameChSet(), PubidChSet(),
00212   EntityNmToValH(100){
00213 
00214   // Character-Sets
00215   // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | ...
00216   CharChSet.Gen(Chs);
00217   // ... because of DMoz (temporary patch)
00218   SetChTy(CharChSet, 0x1); SetChTy(CharChSet, 0x3); SetChTy(CharChSet, 0x6);
00219   SetChTy(CharChSet, 11); SetChTy(CharChSet, 24); SetChTy(CharChSet, 27);
00220   // regular characters
00221   SetChTy(CharChSet, 0x9); SetChTy(CharChSet, 0xA); SetChTy(CharChSet, 0xD);
00222   SetChTy(CharChSet, 0x20, TUCh::Mx);
00223   // BaseChar ::=  [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] |
00224   //  [#x00D8-#x00F6] | [#x00F8-#x00FF] | ...
00225   TBSet BaseChSet(Chs);
00226   SetChTy(BaseChSet, 0x41, 0x5A); SetChTy(BaseChSet, 0x61, 0x7A);
00227   SetChTy(BaseChSet, 0xC0, 0xD6); SetChTy(BaseChSet, 0xD8, 0xF6);
00228   SetChTy(BaseChSet, 0xF8, 0xFF);
00229   // Ideographic ::= ...
00230   TBSet IdeoChSet(Chs);
00231   // CombiningChar ::= ...
00232   CombChSet.Gen(Chs);
00233   // Extender ::=  #x00B7 | ...
00234   ExtChSet.Gen(Chs);
00235   SetChTy(ExtChSet, 0xB7);
00236   // Letter ::=  BaseChar | Ideographic
00237   LetterChSet=BaseChSet|IdeoChSet;
00238   // Digit ::=  [#x0030-#x0039] | ...
00239   DigitChSet.Gen(Chs);
00240   SetChTy(DigitChSet, 0x30, 0x39);
00241   // NameChar ::=  Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar
00242   NameChSet=LetterChSet|DigitChSet|
00243    uchar('.')|uchar('-')|uchar('_')|uchar(':')|CombChSet;
00244   // PubidChar ::=  #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
00245   PubidChSet.Gen(Chs);
00246   SetChTy(PubidChSet, 0x20); SetChTy(PubidChSet, 0xD); SetChTy(PubidChSet, 0xA);
00247   SetChTy(PubidChSet, 'a', 'z'); SetChTy(PubidChSet, 'A', 'Z');
00248   SetChTy(PubidChSet, '0', '9'); SetChTy(PubidChSet, "-'()+,./:=?;!*#@$_%");
00249 
00250   // Standard-Entity-Sequences
00251   SetEntityVal("amp", "&");
00252   SetEntityVal("lt", "<"); SetEntityVal("gt", ">");
00253   SetEntityVal("apos", "'"); SetEntityVal("quot", "\"");
00254 }
00255 
00257 // Xml-Lexical
00258 TXmlChDef TXmlLx::ChDef;
00259 
00260 uchar TXmlLx::GetCh(){
00261   EAssert(Ch!=TCh::EofCh);
00262   PrevCh=Ch;
00263   if (ChStack.Empty()){Ch=(RSIn.Eof()) ? TCh::EofCh : RSIn.GetCh();}
00264   else {Ch=ChStack.Pop();}
00265   ChN++; if (Ch==TCh::LfCh){LnN++; LnChN=0;} else {LnChN++;}
00266   //putchar(Ch);
00267   return Ch;
00268 }
00269 
00270 void TXmlLx::ToNrSpacing(){
00271   if (Spacing==xspIntact){
00272   } else
00273   if (Spacing==xspPreserve){
00274     int SrcChN=0; int DstChN=0;
00275     while (SrcChN<TxtChA.Len()){
00276       if (TxtChA[SrcChN]==TCh::CrCh){
00277         TxtChA.PutCh(DstChN, TCh::LfCh); SrcChN++; DstChN++;
00278         if ((SrcChN<TxtChA.Len())&&(TxtChA[SrcChN]==TCh::LfCh)){SrcChN++;}
00279       } else {
00280         if (SrcChN!=DstChN){
00281           TxtChA.PutCh(DstChN, TxtChA[SrcChN]);}
00282         SrcChN++; DstChN++;
00283       }
00284     }
00285     TxtChA.Trunc(DstChN);
00286   } else
00287   if (Spacing==xspSeparate){
00288     // squeeze series of white-spaces to single space
00289     int SrcChN=0; int DstChN=0;
00290     while (SrcChN<TxtChA.Len()){
00291       if (ChDef.IsWs(TxtChA[SrcChN])){
00292         if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){
00293           SrcChN++;
00294         } else {
00295           TxtChA.PutCh(DstChN, ' ');
00296           SrcChN++; DstChN++;
00297         }
00298       } else {
00299         TxtChA.PutCh(DstChN, TxtChA[SrcChN]);
00300         SrcChN++; DstChN++;
00301       }
00302     }
00303     TxtChA.Trunc(DstChN);
00304   } else
00305   if (Spacing==xspTruncate){
00306     // cut leading and trailing white-spaces and
00307     // squeeze series of white-spaces to single space
00308     int SrcChN=0; int DstChN=0;
00309     while (SrcChN<TxtChA.Len()){
00310       if (ChDef.IsWs(TxtChA[SrcChN])){
00311         if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){
00312           SrcChN++;
00313         } else {
00314           TxtChA.PutCh(DstChN, ' ');
00315           SrcChN++; DstChN++;
00316         }
00317       } else {
00318         TxtChA.PutCh(DstChN, TxtChA[SrcChN]);
00319         SrcChN++; DstChN++;
00320       }
00321     }
00322     TxtChA.Trunc(DstChN);
00323     // delete trailing white-spaces
00324     while ((TxtChA.Len()>0)&&(ChDef.IsWs(TxtChA.LastCh()))){
00325       TxtChA.Pop();}
00326   } else {
00327     Fail;
00328   }
00329 }
00330 
00331 void TXmlLx::GetWs(const bool& IsRq){
00332   // [3] S ::=  (#x20 | #x9 | #xD | #xA)+
00333   int WSpaces=0; TxtChA.Clr();
00334   while (ChDef.IsWs(Ch)){
00335     WSpaces++; TxtChA+=Ch; GetCh();}
00336   if (IsRq&&(WSpaces==0)){
00337     EThrow("White-space required.");}
00338 }
00339 
00340 TStr TXmlLx::GetReference(){
00341   // [67] Reference ::=  EntityRef | CharRef
00342   if (Ch=='#'){
00343     // [66]  CharRef ::=  '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
00344     TChA RefChA; int RefCd=0;
00345     if (GetCh()=='x'){
00346       // hex-decimal character code
00347       forever {
00348         GetCh();
00349         if (TCh::IsHex(Ch)){
00350           RefChA+=Ch;
00351           RefCd=RefCd*16+TCh::GetHex(Ch);
00352         } else {
00353           break;
00354         }
00355       }
00356     } else {
00357       // decimal character code
00358       forever {
00359         if (TCh::IsNum(Ch)){
00360           RefChA+=Ch;
00361           RefCd=RefCd*10+TCh::GetNum(Ch);
00362         } else {
00363           break;
00364         }
00365         GetCh();
00366       }
00367     }
00368     if ((!RefChA.Empty())&&(Ch==';')){
00369       GetCh();
00370           if (RefCd < 0x100) {
00371                   // 8-bit char
00372               uchar RefCh=uchar(RefCd);
00373                   return TStr(RefCh);
00374           } else {
00375                   TStr ResStr = TUStr::EncodeUtf8(RefCd);
00376                   return ResStr;
00377           }
00378     } else {
00379       EThrow("Invalid Char-Reference."); Fail; return TStr();
00380     }
00381   } else {
00382     // [68]  EntityRef ::=  '&' Name ';'
00383     TStr EntityNm=GetName();
00384     if ((!EntityNm.Empty())&&(Ch==';')){
00385       GetCh();
00386       TStr EntityVal;
00387       if (IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00388       else if (ChDef.IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00389       else {EThrow(TStr("Entity-Reference (")+EntityNm+") does not exist.");}
00390       return EntityVal;
00391     } else {
00392       EThrow("Invalid Entity-Reference."); Fail; return TStr();
00393     }
00394   }
00395 }
00396 
00397 TStr TXmlLx::GetPEReference(){
00398   // [69]  PEReference ::=  '%' Name ';'
00399   TStr EntityNm=GetName();
00400   if ((EntityNm.Empty())||(Ch!=';')){EThrow("Invalid PEntity-Reference.");}
00401   GetCh();
00402   TStr EntityVal;
00403   if (IsPEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00404   else {EThrow(TStr("PEntity-Reference (")+EntityNm+") does not exist.");}
00405   return EntityVal;
00406 }
00407 
00408 void TXmlLx::GetEq(){
00409   // [25] Eq ::=  S? '=' S?
00410   GetWs(false);
00411   if (Ch=='='){GetCh();}
00412   else {EThrow("Equality ('=') character expected.");}
00413   GetWs(false);
00414 }
00415 
00416 TStr TXmlLx::GetName(){
00417   // [5] Name ::=  (Letter | '_' | ':') (NameChar)*
00418   TChA NmChA;
00419   if (ChDef.IsFirstNameCh(Ch)){
00420     do {NmChA+=Ch;} while (ChDef.IsName(GetCh()));
00421   } else {
00422     EThrow("Invalid first name character.");
00423     // EThrow(TStr::Fmt("Invalid first name character [%u:'%c%c%c%c%c'].",
00424     //  uint(Ch), Ch, RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh()));
00425   }
00426   return NmChA;
00427 }
00428 
00429 TStr TXmlLx::GetName(const TStr& RqNm){
00430   TStr Nm=GetName();
00431   // test if the name is equal to the required name
00432   if (Nm==RqNm){return RqNm;}
00433   else {EThrow(TStr("Name '")+RqNm+"' expected."); Fail; return TStr();}
00434 }
00435 
00436 void TXmlLx::GetComment(){
00437   // [15] Comment ::=  {{'<!-}}-' ((Char - '-') | ('-' (Char - '-')))* '-->'
00438   if (GetCh()!='-'){EThrow("Invalid comment start.");}
00439   TxtChA.Clr();
00440   forever {
00441     GetCh();
00442     if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");}
00443     if (Ch=='-'){
00444       if (GetCh()=='-'){
00445         if (GetCh()=='>'){GetCh(); break;} // final bracket
00446         else {EThrow("Invalid comment end.");}
00447       } else {
00448         if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");}
00449         TxtChA+='-'; TxtChA+=Ch; // special case if single '-'
00450       }
00451     } else {
00452       TxtChA+=Ch; // usual char
00453     }
00454   }
00455 }
00456 
00457 TStr TXmlLx::GetAttValue(){
00458   // [10]  AttValue ::=  '"' ([^<&"] | Reference)* '"'
00459   //  |  "'" ([^<&'] | Reference)* "'"
00460   uchar QCh=Ch;
00461   if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid attribute-value start.");}
00462   TChA ValChA; GetCh();
00463   forever {
00464     if ((Ch=='<')||(!ChDef.IsChar(Ch))){
00465       EThrow("Invalid attribute-value character.");}
00466     if (Ch==QCh){GetCh(); break;} // final quote
00467     else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference
00468     else {ValChA+=Ch; GetCh();} // usual char
00469   }
00470   return ValChA;
00471 }
00472 
00473 TStr TXmlLx::GetVersionNum(){
00474   // [24] VersionInfo ::=  {{S 'version' Eq}} (' VersionNum ' | " VersionNum ")
00475   // [26] VersionNum ::=  ([a-zA-Z0-9_.:] | '-')+
00476   char QCh=Ch;
00477   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00478   TChA VerNumChA;
00479   GetCh();
00480   do {
00481     if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))||
00482      (('0'<=Ch)&&(Ch<='9'))||(Ch=='_')||(Ch=='.')||(Ch==':')||(Ch=='-')){
00483       VerNumChA+=Ch;
00484     } else {
00485       EThrow("Invalid version-number character.");
00486     }
00487     GetCh();
00488   } while (Ch!=QCh);
00489   GetCh();
00490   return VerNumChA;
00491 }
00492 
00493 TStr TXmlLx::GetEncName(){
00494   // [80] EncodingDecl ::=  {{S 'encoding' Eq}} ('"' EncName '"' |  "'" EncName "'" )
00495   // [81] EncName ::=  [A-Za-z] ([A-Za-z0-9._] | '-')*
00496   char QCh=Ch;
00497   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00498   TChA EncNmChA;
00499   GetCh();
00500   if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){EncNmChA+=Ch;}
00501   else {EThrow("Invalid encoding-name character.");}
00502   GetCh();
00503   while (Ch!=QCh){
00504     if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))||
00505      (('0'<=Ch)&&(Ch<='9'))||(Ch=='.')||(Ch=='_')||(Ch=='-')){EncNmChA+=Ch;}
00506     else {EThrow("Invalid version-number character.");}
00507     GetCh();
00508   }
00509   GetCh();
00510   return EncNmChA;
00511 }
00512 
00513 TStr TXmlLx::GetStalVal(){
00514   // [32] SDDecl ::=  {{S 'standalone' Eq}}
00515   //  (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
00516   char QCh=Ch;
00517   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00518   TChA StalChA;
00519   GetCh();
00520   while (Ch!=QCh){
00521     if (('a'<=Ch)&&(Ch<='z')){StalChA+=Ch;}
00522     else {EThrow("Invalid standalone-value character.");}
00523     GetCh();
00524   }
00525   GetCh();
00526   TStr StalVal=StalChA;
00527   if ((StalVal=="yes")||(StalVal=="no")){return StalVal;}
00528   else {EThrow("Invalid standalone-value."); Fail; return TStr();}
00529 }
00530 
00531 void TXmlLx::GetXmlDecl(){
00532   // [23] XMLDecl ::=  {{'<?xml'}}... VersionInfo EncodingDecl? SDDecl? S? '?>'
00533   // [24] VersionInfo ::=  S 'version' Eq (' VersionNum ' | " VersionNum ")
00534   GetWs(true);
00535   TStr VerNm=GetName("version"); GetEq(); TStr VerVal=GetVersionNum();
00536   if (VerVal!="1.0"){EThrow("Invalid XML version.");}
00537   AddArg(VerNm, VerVal);
00538   GetWs(false);
00539   if (Ch!='?'){
00540     // EncodingDecl ::=  {{S}} 'encoding' Eq
00541     //  ('"' EncName '"' |  "'" EncName "'" )
00542     TStr EncNm=GetName("encoding"); GetEq(); TStr EncVal=GetEncName();
00543     AddArg(EncNm, EncVal);
00544   }
00545   GetWs(false);
00546   if (Ch!='?'){
00547     // SDDecl ::=  {{S}} 'standalone' Eq
00548     //  (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
00549     TStr StalNm=GetName("standalone"); GetEq(); TStr StalVal=GetStalVal();
00550     AddArg(StalNm, StalVal);
00551   }
00552   GetWs(false);
00553   if (Ch=='?'){
00554     GetCh();
00555     if (Ch=='>'){GetCh();}
00556     else {EThrow("Invalid end-of-tag in XML-declaration.");}
00557   } else {
00558     EThrow("Invalid end-of-tag in XML-declaration.");
00559   }
00560 }
00561 
00562 void TXmlLx::GetPI(){
00563   // [16]  PI ::=  {{'<?' PITarget}} (S (Char* - (Char* '?>' Char*)))? '?>'
00564   // [17]  PITarget ::=  Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
00565   GetWs(false);
00566   TxtChA.Clr();
00567   forever {
00568     if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");}
00569     if (Ch=='?'){
00570       if (GetCh()=='>'){
00571         GetCh(); break;
00572       } else {
00573         if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");}
00574         TxtChA+='?'; TxtChA+=Ch; // special case if single '?'
00575       }
00576     } else {
00577       TxtChA+=Ch; // usual char
00578     }
00579     GetCh();
00580   }
00581 }
00582 
00583 TStr TXmlLx::GetSystemLiteral(){
00584   // [11]  SystemLiteral ::=  ('"' [^"]* '"') | ("'" [^']* "'")
00585   char QCh=Ch;
00586   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00587   TChA LitChA; GetCh();
00588   while (Ch!=QCh){
00589     if (!ChDef.IsChar(Ch)){EThrow("Invalid System-Literal character.");}
00590     LitChA+=Ch; GetCh();
00591   }
00592   GetCh();
00593   return LitChA;
00594 }
00595 
00596 TStr TXmlLx::GetPubidLiteral(){
00597   // [12]  PubidLiteral ::=  '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
00598   char QCh=Ch;
00599   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00600   TChA LitChA; GetCh();
00601   while (Ch!=QCh){
00602     if (!ChDef.IsPubid(Ch)){EThrow("Invalid Public-Id-Literal character.");}
00603     LitChA+=Ch; GetCh();
00604   }
00605   GetCh();
00606   return LitChA;
00607 }
00608 
00609 void TXmlLx::GetExternalId(){
00610   // ExternalID ::=  'SYSTEM' S SystemLiteral
00611   //  | 'PUBLIC' S PubidLiteral S SystemLiteral
00612   TStr ExtIdNm=GetName();
00613   if (ExtIdNm=="SYSTEM"){
00614     GetWs(true); GetSystemLiteral();
00615   } else if (ExtIdNm=="PUBLIC"){
00616     GetWs(true); GetPubidLiteral(); GetWs(true); GetSystemLiteral();
00617   } else {
00618     EThrow("Invalid external-id ('SYSTEM' or 'PUBLIC' expected).");
00619   }
00620 }
00621 
00622 void TXmlLx::GetNData(){
00623   // [76]  NDataDecl ::=  S 'NDATA' S Name
00624   GetName("NDATA"); GetWs(true); GetName();
00625 }
00626 
00627 void TXmlLx::GetDocTypeDecl(){
00628   // [28] doctypedecl ::=  {{'<!DOCTYPE'}} S Name (S ExternalID)? S?
00629   //  ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
00630   GetWs(true);
00631   TStr DocTypeDeclNm=GetName();
00632   GetWs(false);
00633   if (Ch=='>'){GetCh(); return;}
00634   if (Ch!='['){GetExternalId();}
00635   GetWs(false);
00636   if (Ch=='['){
00637     GetCh();
00638     // [28] (markupdecl | PEReference | S)*
00639     GetWs(false);
00640     while (Ch!=']'){
00641       if (ChDef.IsWs(Ch)){GetWs(true);}
00642       else if (Ch=='%'){GetPEReference();}
00643       else {
00644         GetSym();
00645       }
00646     }
00647     GetCh();
00648   }
00649   GetWs(false);
00650   // '>'
00651   if (Ch=='>'){GetCh();}
00652   else {EThrow("Invalid end-of-tag in document-type-declaration.");}
00653   TagNm=DocTypeDeclNm;
00654 }
00655 
00656 void TXmlLx::GetElement(){
00657   TxtChA.Clr();
00658   while (Ch!='>'){
00659     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00660     TxtChA+=Ch; GetCh();
00661   }
00662   GetCh();
00663 }
00664 
00665 void TXmlLx::GetAttList(){
00666   TxtChA.Clr();
00667   while (Ch!='>'){
00668     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00669     TxtChA+=Ch; GetCh();
00670   }
00671   GetCh();
00672 }
00673 
00674 TStr TXmlLx::GetEntityValue(){
00675   // [9]  EntityValue ::=  '"' ([^%&"] | PEReference | Reference)* '"'
00676   //  | "'" ([^%&'] | PEReference | Reference)* "'"
00677   uchar QCh=Ch;
00678   if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid entity-value start.");}
00679   TChA ValChA; GetCh();
00680   forever {
00681     if (!ChDef.IsChar(Ch)){EThrow("Invalid entity-value character.");}
00682     if (Ch==QCh){GetCh(); break;} // final quote
00683     else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference
00684     else if (Ch=='%'){GetCh(); ValChA+=GetPEReference();} // pereference
00685     else {ValChA+=Ch; GetCh();} // usual char
00686   }
00687   return ValChA;
00688 }
00689 
00690 void TXmlLx::GetEntity(){
00691   // [70] EntityDecl ::=  GEDecl | PEDecl
00692   // [71] GEDecl ::=  '<!ENTITY' S Name S EntityDef S? '>'
00693   // [72] PEDecl ::=  '<!ENTITY' S '%' S Name S PEDef S? '>'
00694   GetWs(true); TStr EntityNm;
00695   if (Ch=='%'){
00696     GetCh(); GetWs(true); EntityNm=GetName(); GetWs(true);
00697     // [74] PEDef ::=  EntityValue | ExternalID
00698     if ((Ch=='\"')||(Ch=='\'')){
00699       TStr EntityVal=GetEntityValue();
00700       PutPEntityVal(EntityNm, EntityVal);
00701     } else {
00702       GetExternalId();
00703       GetWs(false);
00704       if (Ch!='>'){GetNData();}
00705     }
00706   } else {
00707     EntityNm=GetName(); GetWs(true);
00708     // [73] EntityDef ::=  EntityValue | (ExternalID NDataDecl?)
00709     if ((Ch=='\"')||(Ch=='\'')){
00710       TStr EntityVal=GetEntityValue();
00711       PutEntityVal(EntityNm, EntityVal);
00712     } else {
00713       GetExternalId();
00714     }
00715   }
00716   GetWs(false);
00717   if (Ch=='>'){GetCh();}
00718   else {EThrow("Invalid end-of-tag in entity-declaration.");}
00719   TagNm=EntityNm;
00720 }
00721 
00722 void TXmlLx::GetNotation(){
00723   // [82] NotationDecl ::=  '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
00724   // [83]  PublicID ::=  'PUBLIC' S PubidLiteral
00725   TxtChA.Clr();
00726   while (Ch!='>'){
00727     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00728     TxtChA+=Ch; GetCh();
00729   }
00730   GetCh();
00731 }
00732 
00733 void TXmlLx::GetCDSect(){
00734   // [18]  CDSect ::=  CDStart CData CDEnd
00735   // [19]  CDStart ::=  '<![CDATA{{['}}
00736   // [20]  CData ::=  (Char* - (Char* ']]>' Char*))
00737   // [21]  CDEnd ::=  ']]>'
00738   if (Ch=='['){GetCh();}
00739   else {EThrow("Invalid start of CDATA section.");}
00740   TxtChA.Clr();
00741   forever {
00742     if (!ChDef.IsChar(Ch)){EThrow("Invalid CDATA character.");}
00743     if ((Ch=='>')&&(TxtChA.Len()>=2)&&
00744      (TxtChA.LastLastCh()==']') && (TxtChA.LastCh()==']')){
00745       GetCh(); TxtChA.Pop(); TxtChA.Pop(); break;
00746     } else {
00747       TxtChA+=Ch; GetCh();
00748     }
00749   }
00750 }
00751 
00752 void TXmlLx::SkipWs(){
00753   // [3] S ::=  (#x20 | #x9 | #xD | #xA)+
00754   while (ChDef.IsWs(Ch)){GetCh();}
00755 }
00756 
00757 TXmlLxSym TXmlLx::GetSym(){
00758   if (Ch=='<'){
00759     GetCh(); ClrArgV();
00760     if (Ch=='?'){
00761       GetCh(); TagNm=GetName();
00762       if (TagNm.GetLc()=="xml"){Sym=xsyXmlDecl; GetXmlDecl();}
00763       else {Sym=xsyPI; GetPI();}
00764     } else
00765     if (Ch=='!'){
00766       GetCh();
00767       if (Ch=='['){
00768         GetCh(); TagNm=GetName();
00769         if (TagNm=="CDATA"){Sym=xsyQStr; GetCDSect();}
00770         else {EThrow(TStr("Invalid tag after '<![' (")+TagNm+").");}
00771       } else
00772       if (Ch=='-'){
00773         Sym=xsyComment; GetComment();
00774       } else {
00775         TagNm=GetName();
00776         if (TagNm=="DOCTYPE"){GetDocTypeDecl(); Sym=xsyDocTypeDecl;}
00777         else if (TagNm=="ELEMENT"){GetElement(); Sym=xsyElement;}
00778         else if (TagNm=="ATTLIST"){GetAttList(); Sym=xsyAttList;}
00779         else if (TagNm=="ENTITY"){GetEntity(); Sym=xsyEntity;}
00780         else if (TagNm=="NOTATION"){GetNotation(); Sym=xsyNotation;}
00781         else {EThrow(TStr("Invalid tag (")+TagNm+").");}
00782       }
00783     } else
00784     if (Ch=='/'){
00785       // xsyETag
00786       GetCh(); Sym=xsyETag; TagNm=GetName(); GetWs(false);
00787       if (Ch=='>'){GetCh();}
00788       else {EThrow("Invalid End-Tag.");}
00789     } else {
00790       // xsySTag or xsySETag
00791       TagNm=GetName(); GetWs(false);
00792       while ((Ch!='>')&&(Ch!='/')){
00793         TStr AttrNm=GetName();
00794         GetEq();
00795         TStr AttrVal=GetAttValue();
00796         GetWs(false);
00797         AddArg(AttrNm, AttrVal);
00798       }
00799       if (Ch=='/'){
00800         if (GetCh()=='>'){Sym=xsySETag; GetCh();}
00801         else {EThrow("Invalid Empty-Element-Tag.");}
00802       } else {
00803         Sym=xsySTag; GetCh();
00804       }
00805     }
00806     if (Spacing==xspTruncate){SkipWs();}
00807   } else
00808   if (ChDef.IsWs(Ch)){
00809     Sym=xsyWs; GetWs(true); ToNrSpacing();
00810     if (Spacing==xspTruncate){GetSym();}
00811   } else
00812   if (Ch==TCh::EofCh){
00813     Sym=xsyEof;
00814   } else {
00815     Sym=xsyStr; TxtChA.Clr();
00816     // [14]  CharData ::=  [^<&]* - ([^<&]* ']]>' [^<&]*)
00817     forever {
00818       if (!ChDef.IsChar(Ch)){
00819         EThrow(TUInt::GetStr(Ch, "Invalid character (%d)."));}
00820                 // GetCh();  continue; // skip invalid characters
00821       if (Ch=='<'){break;} // tag
00822       if (Ch=='&'){GetCh(); TxtChA+=GetReference();} // reference
00823       else {
00824         if ((Ch=='>')&&(TxtChA.Len()>=2)&&
00825          (TxtChA.LastLastCh()==']')&&(TxtChA.LastCh()==']')){
00826           EThrow("Forbidden substring ']]>' in character data.");}
00827         TxtChA+=Ch; GetCh(); // usual char
00828       }
00829     }
00830     ToNrSpacing();
00831   }
00832   return Sym;
00833 }
00834 
00835 TStr TXmlLx::GetSymStr() const {
00836   TChA SymChA;
00837   switch (Sym){
00838     case xsyUndef:
00839       SymChA="{Undef}"; break;
00840     case xsyWs:
00841       SymChA+="{Space:'"; SymChA+=TStr(TxtChA).GetHex(); SymChA+="'}"; break;
00842     case xsyComment:
00843       SymChA+="<!--"; SymChA+=TxtChA; SymChA+="-->"; break;
00844     case xsyXmlDecl:{
00845       SymChA+="<?"; SymChA+=TagNm;
00846       for (int ArgN=0; ArgN<GetArgs(); ArgN++){
00847         TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
00848         char ArgValQCh=GetArgValQCh(ArgVal);
00849         SymChA+=' '; SymChA+=ArgNm; SymChA+='=';
00850         SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh;
00851       }
00852       SymChA+="?>"; break;}
00853     case xsyPI:
00854       SymChA+="<?"; SymChA+=TagNm;
00855       if (!TxtChA.Empty()){SymChA+=' '; SymChA+=TxtChA;}
00856       SymChA+="?>"; break;
00857     case xsyDocTypeDecl:
00858       SymChA+="<!DOCTYPE "; SymChA+=TagNm; SymChA+=">"; break;
00859     case xsySTag:
00860     case xsySETag:{
00861       SymChA+="<"; SymChA+=TagNm;
00862       for (int ArgN=0; ArgN<GetArgs(); ArgN++){
00863         TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
00864         char ArgValQCh=GetArgValQCh(ArgVal);
00865         SymChA+=' '; SymChA+=ArgNm; SymChA+='=';
00866         SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh;
00867       }
00868       if (Sym==xsySTag){SymChA+=">";}
00869       else if (Sym==xsySETag){SymChA+="/>";}
00870       else {Fail;}
00871       break;}
00872     case xsyETag:
00873       SymChA+="</"; SymChA+=TagNm; SymChA+=">"; break;
00874     case xsyStr:
00875       SymChA="{String:'"; SymChA+=TxtChA; SymChA+="'}"; break;
00876     case xsyQStr:
00877       SymChA="{QString:'"; SymChA+=TxtChA; SymChA+="'}"; break;
00878     case xsyEof:
00879       SymChA="{Eof}"; break;
00880     default: Fail;
00881   }
00882   return SymChA;
00883 }
00884 
00885 void TXmlLx::EThrow(const TStr& MsgStr) const {
00886   TChA FPosChA;
00887   FPosChA+=" [File:"; FPosChA+=SIn->GetSNm();
00888   FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN);
00889   FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN);
00890   FPosChA+="]";
00891   TStr FullMsgStr=MsgStr+FPosChA;
00892   TExcept::Throw(FullMsgStr);
00893 }
00894 
00895 TStr TXmlLx::GetFPosStr() const {
00896   TChA FPosChA;
00897   FPosChA+=" [File:"; FPosChA+=SIn->GetSNm();
00898   FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN);
00899   FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN);
00900   FPosChA+="]";
00901   return FPosChA;
00902 }
00903 
00904 TStr TXmlLx::GetXmlLxSymStr(const TXmlLxSym& XmlLxSym){
00905   switch (XmlLxSym){
00906     case xsyUndef: return "Undef";
00907     case xsyWs: return "White-Space";
00908     case xsyComment: return "Comment";
00909     case xsyXmlDecl: return "Declaration";
00910     case xsyPI: return "PI";
00911     case xsyDocTypeDecl: return "Document-Type";
00912     case xsyElement: return "Element";
00913     case xsyAttList: return "Attribute-List";
00914     case xsyEntity: return "Entity";
00915     case xsyNotation: return "Notation";
00916     case xsyTag: return "Tag";
00917     case xsySTag: return "Start-Tag";
00918     case xsyETag: return "End-Tag";
00919     case xsySETag: return "Start-End-Tag";
00920     case xsyStr: return "String";
00921     case xsyQStr: return "Quoted-String";
00922     case xsyEof: return "Eon-Of-File";
00923     default: return "Undef";
00924   }
00925 }
00926 
00927 bool TXmlLx::IsTagNm(const TStr& Str){
00928   TChA ChA=Str;
00929   if (ChA.Len()>0){
00930     if (TXmlLx::ChDef.IsFirstNameCh(ChA[0])){
00931       for (int ChN=1; ChN<ChA.Len(); ChN++){
00932         if (!TXmlLx::ChDef.IsName(ChA[ChN])){
00933           return false;
00934         }
00935       }
00936       return true;
00937     } else {
00938       return false;
00939     }
00940   } else {
00941     return false;
00942   }
00943 }
00944 
00945 TStr TXmlLx::GetXmlStrFromPlainMem(const TMem& PlainMem){
00946   TChA XmlChA;
00947   for (int ChN=0; ChN<PlainMem.Len(); ChN++){
00948     uchar Ch=PlainMem[ChN];
00949     if ((' '<=Ch)&&(Ch<='~')){
00950       switch (Ch){
00951         case '"': XmlChA+="&quot;"; break;
00952         case '&': XmlChA+="&amp;"; break;
00953         case '\'': XmlChA+="&apos;"; break;
00954         case '<': XmlChA+="&lt;"; break;
00955         case '>': XmlChA+="&gt;"; break;
00956         default: XmlChA+=Ch;
00957       }
00958     } else
00959     if ((Ch=='\r')||(Ch=='\n')){
00960       XmlChA+=Ch;
00961     } else {
00962       XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';';
00963     }
00964   }
00965   return XmlChA;
00966 }
00967 
00968 TStr TXmlLx::GetXmlStrFromPlainStr(const TChA& PlainChA){
00969   TChA XmlChA;
00970   for (int ChN=0; ChN<PlainChA.Len(); ChN++){
00971     uchar Ch=PlainChA[ChN];
00972     if ((' '<=Ch)&&(Ch<='~')){
00973       switch (Ch){
00974         case '"': XmlChA+="&quot;"; break;
00975         case '&': XmlChA+="&amp;"; break;
00976         case '\'': XmlChA+="&apos;"; break;
00977         case '<': XmlChA+="&lt;"; break;
00978         case '>': XmlChA+="&gt;"; break;
00979         default: XmlChA+=Ch;
00980       }
00981     } else
00982     if ((Ch=='\r')||(Ch=='\n')){
00983       XmlChA+=Ch;
00984     } else {
00985       XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';';
00986     }
00987   }
00988   return XmlChA;
00989 }
00990 
00991 TStr TXmlLx::GetPlainStrFromXmlStr(const TStr& XmlStr){
00992   TChA PlainChA;
00993   TChRet Ch(TStrIn::New(XmlStr));
00994   Ch.GetCh();
00995   while (!Ch.Eof()){
00996     if (Ch()!='&'){
00997       PlainChA+=Ch(); Ch.GetCh();
00998     } else {
00999       // [67] Reference ::=  EntityRef | CharRef
01000       if (Ch.GetCh()=='#'){
01001         // [66]  CharRef ::=  '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
01002         TChA RefChA; int RefCd=0;
01003         if (Ch.GetCh()=='x'){
01004           // hex-decimal character code
01005           forever {
01006             Ch.GetCh();
01007             if (TCh::IsHex(Ch())){
01008               RefChA+=Ch();
01009               RefCd=RefCd*16+TCh::GetHex(Ch());
01010             } else {
01011               break;
01012             }
01013           }
01014         } else {
01015           // decimal character code
01016           forever {
01017             if (TCh::IsNum(Ch())){
01018               RefChA+=Ch();
01019               RefCd=RefCd*10+TCh::GetNum(Ch());
01020             } else {
01021               break;
01022             }
01023             Ch.GetCh();
01024           }
01025         }
01026         if ((!RefChA.Empty())&&(Ch()==';')){
01027           Ch.GetCh();
01028           uchar RefCh=uchar(RefCd);
01029           PlainChA+=RefCh;
01030         }
01031       } else {
01032         // [68]  EntityRef ::=  '&' Name ';'
01033         TChA EntityNm;
01034         while ((!Ch.Eof())&&(Ch()!=';')){
01035           EntityNm+=Ch(); Ch.GetCh();}
01036         if ((!EntityNm.Empty())&&(Ch()==';')){
01037           Ch.GetCh();
01038           if (EntityNm=="quot"){PlainChA+='"';}
01039           else if (EntityNm=="amp"){PlainChA+='&';}
01040           else if (EntityNm=="apos"){PlainChA+='\'';}
01041           else if (EntityNm=="lt"){PlainChA+='<';}
01042           else if (EntityNm=="gt"){PlainChA+='>';}
01043         }
01044       }
01045     }
01046   }
01047   return PlainChA;
01048 }
01049 
01050 TStr TXmlLx::GetUsAsciiStrFromXmlStr(const TStr& XmlStr){
01051   TStr UsAsciiStr=XmlStr;
01052   UsAsciiStr.ChangeStrAll("&#232;", "c");
01053   UsAsciiStr.ChangeStrAll("&#200;", "C");
01054   UsAsciiStr.ChangeStrAll("&#154;", "s");
01055   UsAsciiStr.ChangeStrAll("&#138;", "S");
01056   UsAsciiStr.ChangeStrAll("&#158;", "z");
01057   UsAsciiStr.ChangeStrAll("&#142;", "Z");
01058   TChA UsAsciiChA=TXmlLx::GetPlainStrFromXmlStr(UsAsciiStr);
01059   for (int ChN=0; ChN<UsAsciiChA.Len(); ChN++){
01060     char Ch=UsAsciiChA[ChN];
01061     if ((Ch<' ')||('~'<Ch)){UsAsciiChA.PutCh(ChN, 'x');}
01062   }
01063   return UsAsciiChA;
01064 }
01065 
01066 TStr TXmlLx::GetChRefFromYuEntRef(const TStr& YuEntRefStr){
01067   TStr ChRefStr=YuEntRefStr;
01068   ChRefStr.ChangeStrAll("&ch;", "&#232;");
01069   ChRefStr.ChangeStrAll("&Ch;", "&#200;");
01070   ChRefStr.ChangeStrAll("&sh;", "&#154;");
01071   ChRefStr.ChangeStrAll("&Sh;", "&#138;");
01072   ChRefStr.ChangeStrAll("&zh;", "&#158;");
01073   ChRefStr.ChangeStrAll("&Zh;", "&#142;");
01074   ChRefStr.ChangeStrAll("&cs", "c");
01075   ChRefStr.ChangeStrAll("&Cs;", "C");
01076   ChRefStr.ChangeStrAll("&dz;", "dz");
01077   ChRefStr.ChangeStrAll("&Dz;", "Dz");
01078   return ChRefStr;
01079 }
01080 
01082 // Xml-Token
01083 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, const bool& DfVal) const {
01084   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01085   return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TBool::TrueStr);
01086 }
01087 
01088 bool TXmlTok::GetBoolArgVal(
01089  const TStr& ArgNm, const TStr& TrueVal, const bool& DfVal) const {
01090   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01091   return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TrueVal);
01092 }
01093 
01094 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm,
01095  const TStr& TrueVal, const TStr& FalseVal, const bool& DfVal) const {
01096   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01097   if (ArgN==-1){return DfVal;}
01098   TStr ArgVal=ArgNmValV[ArgN].Dat;
01099   if (ArgVal==TrueVal){return true;}
01100   IAssert(ArgVal == FalseVal); return false;
01101 }
01102 
01103 int TXmlTok::GetIntArgVal(const TStr& ArgNm, const int& DfVal) const {
01104   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01105   if (ArgN==-1){
01106     return DfVal;
01107   } else {
01108     int Val;
01109     if (ArgNmValV[ArgN].Dat.IsInt(Val)){return Val;} else {return DfVal;}
01110   }
01111 }
01112 
01113 double TXmlTok::GetFltArgVal(const TStr& ArgNm, const double& DfVal) const {
01114   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01115   if (ArgN==-1){
01116     return DfVal;
01117   } else {
01118     double Val;
01119     if (ArgNmValV[ArgN].Dat.IsFlt(Val)){return Val;} else {return DfVal;}
01120   }
01121 }
01122 
01123 TStr TXmlTok::GetStrArgVal(const TStr& ArgNm, const TStr& DfVal) const {
01124   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01125   return (ArgN==-1) ? DfVal : ArgNmValV[ArgN].Dat;
01126 }
01127 
01128 void TXmlTok::PutSubTok(const PXmlTok& Tok, const int& SubTokN){
01129   if (SubTokN==-1){
01130     ClrSubTok(); AddSubTok(Tok);
01131   } else {
01132     SubTokV[SubTokN]=Tok;
01133   }
01134 }
01135 
01136 PXmlTok TXmlTok::GetTagTok(const TStr& TagPath) const {
01137   if (TagPath.Empty()){
01138     return (TXmlTok*)this;
01139   } else {
01140     TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath);
01141     PXmlTok SubTok;
01142     for (int SubTokN=0; SubTokN<SubTokV.Len(); SubTokN++){
01143       SubTok=SubTokV[SubTokN];
01144       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){break;}
01145       else {SubTok=NULL;}
01146     }
01147     if ((SubTok.Empty())||(RestTagPath.Empty())){return SubTok;}
01148     else {return SubTok->GetTagTok(RestTagPath);}
01149   }
01150 }
01151 
01152 void TXmlTok::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const {
01153   XmlTokV.Clr();
01154   TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm);
01155   PXmlTok Tok=GetTagTok(PreTagPath);
01156   if (!Tok.Empty()){
01157     for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){
01158       PXmlTok SubTok=Tok->GetSubTok(SubTokN);
01159       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){
01160         XmlTokV.Add(SubTok);}
01161     }
01162   }
01163 }
01164 
01165 void TXmlTok::GetTagValV(const TStr& TagNm, const bool& XmlP, TStrV& ValV) const {
01166   if ((Sym==xsyTag)&&(Str==TagNm)){
01167     ValV.Add(GetTokStr(XmlP));
01168   } else {
01169     for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){
01170       GetSubTok(SubTokN)->GetTagValV(TagNm, XmlP, ValV);}
01171   }
01172 }
01173 
01174 TStr TXmlTok::GetTagVal(const TStr& TagNm, const bool& XmlP) const {
01175   TStrV ValV; GetTagValV(TagNm, XmlP, ValV);
01176   if (ValV.Len()>0){return ValV[0];} else {return "";}
01177 }
01178 
01179 void TXmlTok::AddTokToChA(const bool& XmlP, TChA& ChA) const {
01180   switch (Sym){
01181     case xsyWs:
01182       ChA+=Str; break;
01183     case xsyStr:
01184       if (XmlP){ChA+=TXmlLx::GetXmlStrFromPlainStr(Str);} else {ChA+=Str;} break;
01185     case xsyQStr:
01186       if (XmlP){ChA+="<![CDATA[";}
01187       ChA+=Str;
01188       if (XmlP){ChA+="]]>";} break;
01189     case xsyTag:
01190       if (XmlP){
01191         ChA+='<'; ChA+=Str;
01192         for (int ArgN=0; ArgN<GetArgs(); ArgN++){
01193           TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
01194           if (XmlP){ArgVal=TXmlLx::GetXmlStrFromPlainStr(ArgVal);}
01195           char ArgValQCh=TXmlLx::GetArgValQCh(ArgVal);
01196           ChA+=' '; ChA+=ArgNm; ChA+='=';
01197           ChA+=ArgValQCh; ChA+=ArgVal; ChA+=ArgValQCh;
01198         }
01199       }
01200       if (GetSubToks()==0){
01201         if (XmlP){ChA+="/>";}
01202       } else {
01203         if (XmlP){ChA+=">";}
01204         for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){
01205           GetSubTok(SubTokN)->AddTokToChA(XmlP, ChA);}
01206         if (XmlP){ChA+="</"; ChA+=Str; ChA+='>';}
01207       }
01208       break;
01209     default: Fail;
01210   }
01211 }
01212 
01213 TStr TXmlTok::GetTokVStr(const TXmlTokV& TokV, const bool& XmlP){
01214   TChA TokVChA;
01215   for (int TokN=0; TokN<TokV.Len(); TokN++){
01216     if (TokN>0){TokVChA+=' ';}
01217     TokVChA+=TokV[TokN]->GetTokStr(XmlP);
01218   }
01219   return TokVChA;
01220 }
01221 
01222 PXmlTok TXmlTok::GetTok(TXmlLx& Lx){
01223   switch (Lx.Sym){
01224     case xsyWs:
01225     case xsyStr:
01226     case xsyQStr:
01227       return TXmlTok::New(Lx.Sym, Lx.TxtChA);
01228     case xsySTag:
01229     case xsySETag:
01230       return TXmlTok::New(xsyTag, Lx.TagNm, Lx.ArgNmValKdV);
01231     default: Fail; return NULL;
01232   }
01233 }
01234 
01236 // Xml-Document
01237 void TXmlDoc::LoadTxtMiscStar(TXmlLx& Lx){
01238   // [27] Misc ::=  Comment | PI |  S
01239   while ((Lx.Sym==xsyComment)||(Lx.Sym==xsyPI)||(Lx.Sym==xsyWs)){
01240     Lx.GetSym();}
01241 }
01242 
01243 PXmlTok TXmlDoc::LoadTxtElement(TXmlLx& Lx){
01244   // [39]  element ::=  EmptyElemTag | STag content ETag
01245   PXmlTok Tok;
01246   if (Lx.Sym==xsySETag){
01247     Tok=TXmlTok::GetTok(Lx);
01248   } else
01249   if (Lx.Sym==xsySTag){
01250     Tok=TXmlTok::GetTok(Lx);
01251     forever {
01252       Lx.GetSym();
01253       if (Lx.Sym==xsyETag){
01254         if (Tok->GetStr()==Lx.TagNm){
01255           break;
01256         } else {
01257           TStr MsgStr=TStr("Invalid End-Tag '")+Lx.TagNm+
01258            "' ('"+Tok->GetStr()+"' expected).";
01259           Lx.EThrow(MsgStr);
01260         }
01261       } else {
01262         PXmlTok SubTok;
01263         switch (Lx.Sym){
01264           case xsySTag:
01265             SubTok=LoadTxtElement(Lx); break;
01266           case xsySETag:
01267           case xsyStr:
01268           case xsyQStr:
01269           case xsyWs:
01270             SubTok=TXmlTok::GetTok(Lx); break;
01271           case xsyPI:
01272           case xsyComment:
01273             break;
01274           default: Lx.EThrow("Content or End-Tag expected.");
01275         }
01276         if (!SubTok.Empty()){
01277           Tok->AddSubTok(SubTok);}
01278       }
01279     }
01280   } else
01281   if (Lx.Sym==xsyETag){
01282     TStr MsgStr=
01283      TStr("Xml-Element (Start-Tag or Empty-Element-Tag) required.")+
01284      TStr::GetStr(Lx.TagNm, " End-Tag </%s> encountered.");
01285     Lx.EThrow(MsgStr);
01286   } else {
01287     Lx.EThrow("Xml-Element (Start-Tag or Empty-Element-Tag) required.");
01288   }
01289   return Tok;
01290 }
01291 
01292 PXmlTok TXmlDoc::GetTagTok(const TStr& TagPath) const {
01293   if (TagPath.Empty()){
01294     return Tok;
01295   } else {
01296     TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath);
01297     if ((Tok->GetSym()==xsyTag)&&(Tok->GetStr()==TagNm)){
01298       if (RestTagPath.Empty()){return Tok;}
01299       else {return Tok->GetTagTok(RestTagPath);}
01300     } else {
01301       return NULL;
01302     }
01303   }
01304 }
01305 
01306 void TXmlDoc::PutTagTokStr(const TStr& TagPath, const TStr& TokStr) const {
01307   PXmlTok Tok=GetTagTok(TagPath);
01308   Tok->ClrSubTok();
01309   PXmlTok StrTok=TXmlTok::New(xsyStr, TokStr);
01310   Tok->AddSubTok(StrTok);
01311 }
01312 
01313 void TXmlDoc::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const {
01314   XmlTokV.Clr();
01315   TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm);
01316   PXmlTok Tok=GetTagTok(PreTagPath);
01317   if (!Tok.Empty()){
01318     for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){
01319       PXmlTok SubTok=Tok->GetSubTok(SubTokN);
01320       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){
01321         XmlTokV.Add(SubTok);}
01322     }
01323   }
01324 }
01325 
01326 bool TXmlDoc::GetTagTokBoolArgVal(
01327  const TStr& TagPath, const TStr& ArgNm, const bool& DfVal) const {
01328   PXmlTok TagTok;
01329   if (IsTagTok(TagPath, TagTok)){
01330     return TagTok->GetBoolArgVal(ArgNm, DfVal);}
01331   else {return DfVal;}
01332 }
01333 
01334 int TXmlDoc::GetTagTokIntArgVal(
01335  const TStr& TagPath, const TStr& ArgNm, const int& DfVal) const {
01336   PXmlTok TagTok;
01337   if (IsTagTok(TagPath, TagTok)){
01338     return TagTok->GetIntArgVal(ArgNm, DfVal);}
01339   else {return DfVal;}
01340 }
01341 
01342 double TXmlDoc::GetTagTokFltArgVal(
01343  const TStr& TagPath, const TStr& ArgNm, const double& DfVal) const {
01344   PXmlTok TagTok;
01345   if (IsTagTok(TagPath, TagTok)){
01346     return TagTok->GetFltArgVal(ArgNm, DfVal);}
01347   else {return DfVal;}
01348 }
01349 
01350 TStr TXmlDoc::GetTagTokStrArgVal(
01351  const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal) const {
01352   PXmlTok TagTok;
01353   if (IsTagTok(TagPath, TagTok)){
01354     return TagTok->GetStrArgVal(ArgNm, DfVal);}
01355   else {return DfVal;}
01356 }
01357 
01358 TStr TXmlDoc::GetXmlStr(const TStr& Str){
01359   TChA ChA=Str;
01360   TChA XmlChA;
01361   for (int ChN=0; ChN<ChA.Len(); ChN++){
01362     uchar Ch=ChA[ChN];
01363     if ((' '<=Ch)&&(Ch<='~')){
01364       if (Ch=='&'){XmlChA+="&amp;";}
01365       else if (Ch=='>'){XmlChA+="&lt;";}
01366       else if (Ch=='<'){XmlChA+="&gt;";}
01367       else if (Ch=='\''){XmlChA+="&apos;";}
01368       else if (Ch=='\"'){XmlChA+="&quot;";}
01369       else {XmlChA+=Ch;}
01370     } else {
01371       XmlChA+="&#"; XmlChA+=TUInt::GetStr(Ch); XmlChA+=";";
01372     }
01373   }
01374   return XmlChA;
01375 }
01376 
01377 bool TXmlDoc::SkipTopTag(const PSIn& SIn){
01378   bool Ok=true;
01379   TXmlLx Lx(SIn, xspIntact);
01380   try {
01381     Lx.GetSym();
01382     // [22] prolog ::=  XMLDecl? Misc* (doctypedecl Misc*)?
01383     if (Lx.Sym==xsyXmlDecl){Lx.GetSym();}
01384     LoadTxtMiscStar(Lx);
01385     if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();}
01386     LoadTxtMiscStar(Lx);
01387     Ok=true;
01388   }
01389   catch (PExcept Except){
01390     Ok=false;
01391   }
01392   return Ok;
01393 }
01394 
01395 PXmlDoc TXmlDoc::LoadTxt(TXmlLx& Lx){
01396   PXmlDoc Doc=TXmlDoc::New();
01397   // [1]  document ::=  prolog element Misc*
01398   try {
01399     Lx.GetSym();
01400     // [22] prolog ::=  XMLDecl? Misc* (doctypedecl Misc*)?
01401     if (Lx.Sym==xsyXmlDecl){Lx.GetSym();}
01402     LoadTxtMiscStar(Lx);
01403     if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();}
01404     LoadTxtMiscStar(Lx);
01405     Doc->Tok=LoadTxtElement(Lx);
01406     LoadTxtMiscStar(Lx);
01407     Doc->Ok=true; Doc->MsgStr="Ok";
01408   }
01409   catch (PExcept& Except){
01410     Doc->Ok=false; Doc->MsgStr=Except->GetMsgStr();
01411   }
01412   return Doc;
01413 }
01414 
01415 PXmlDoc TXmlDoc::LoadTxt(const PSIn& SIn, const TXmlSpacing& Spacing){
01416   TXmlLx Lx(SIn, Spacing); return LoadTxt(Lx);
01417 }
01418 
01419 PXmlDoc TXmlDoc::LoadTxt(const TStr& FNm, const TXmlSpacing& Spacing){
01420   PSIn SIn=TFIn::New(FNm); return LoadTxt(SIn, Spacing);
01421 }
01422 
01423 void TXmlDoc::LoadTxt(
01424  const TStr& FNm, TXmlDocV& XmlDocV, const TXmlSpacing& Spacing){
01425   XmlDocV.Clr();
01426   PSIn SIn=TFIn::New(FNm);
01427   TXmlLx Lx(SIn, Spacing);
01428   PXmlDoc XmlDoc;
01429   forever {
01430     Lx.SkipWs();
01431     XmlDoc=LoadTxt(Lx);
01432     if (XmlDoc->IsOk()){XmlDocV.Add(XmlDoc);}
01433     else {break;}
01434   }
01435 }
01436 
01437 PXmlDoc TXmlDoc::LoadStr(const TStr& Str){
01438   PSIn SIn=TStrIn::New(Str);
01439   return LoadTxt(SIn);
01440 }
01441 
01442 void TXmlDoc::SaveStr(TStr& Str){
01443   PSOut SOut=TMOut::New(); TMOut& MOut=*(TMOut*)SOut();
01444   SaveTxt(SOut);
01445   Str=MOut.GetAsStr();
01446 }
01447 
01449 // Fast and dirty XML parser
01450 // very basic it does only <item>string</item>, no comments, no arguments
01451 TXmlLxSym TXmlParser::GetSym() {
01452   if (NextSym != xsyUndef) {
01453     Sym = NextSym;  NextSym=xsyUndef;
01454     SymStr=NextSymStr;  NextSymStr.Clr();
01455     return Sym;
01456   }
01457   SymStr.Clr();
01458   char Ch;
01459   while (TCh::IsWs(Ch=GetCh())) { }
01460   if (Ch == TCh::EofCh) { Sym = xsyEof; return xsyEof; }
01461   if (Ch == '<') { // load tag
01462     Ch = GetCh();
01463     if (Ch == '/') { Sym = xsyETag; }
01464     else { Sym = xsySTag;  SymStr.Push(Ch); }
01465     while((Ch=GetCh())!='>' && Ch!=TCh::EofCh) { SymStr.Push(Ch); }
01466     const int StrLen = SymStr.Len();
01467     if (StrLen > 1 && SymStr[StrLen-1] == '/') {
01468       Sym = xsyETag; SymStr[StrLen-1] = 0;
01469       for (char *c = SymStr.CStr()+StrLen-2; TCh::IsWs(*c); c--) { *c=0; }
01470     }
01471   } else { // load string
01472     _SymStr.Clr();  _SymStr.Push(Ch);
01473     while (! RSIn.Eof() && RSIn.PeekCh() != '<') { _SymStr.Push(GetCh()); }
01474     GetPlainStrFromXmlStr(_SymStr, SymStr);
01475     Sym = xsyStr;
01476   }
01477   if (Ch == TCh::EofCh) { SymStr.Clr(); Sym = xsyEof; return xsyEof; }
01478   return Sym;
01479 }
01480 
01481 TXmlLxSym TXmlParser::GetSym(TChA& _SymStr) {
01482   GetSym();
01483   _SymStr = SymStr;
01484   return Sym;
01485 }
01486 
01487 TXmlLxSym TXmlParser::PeekSym() {
01488   if (NextSym == xsyUndef) {
01489     const TXmlLxSym TmpSim=Sym;
01490     const TChA TmpSymStr=SymStr;
01491     NextSym=GetSym(NextSymStr);
01492     Sym=TmpSim;
01493     SymStr=TmpSymStr;
01494   }
01495   return NextSym;
01496 }
01497 
01498 TXmlLxSym TXmlParser::PeekSym(TChA& _SymStr) {
01499   PeekSym();
01500   _SymStr = NextSymStr;
01501   return NextSym;
01502 }
01503 
01504 void TXmlParser::SkipTillTag(const TChA& _SymStr) {
01505   while(PeekSym() != xsyEof) {
01506     if (NextSymStr == _SymStr) { return; }
01507     GetSym();
01508   }
01509 }
01510 
01511 // get <tag>value</tag>
01512 void TXmlParser::GetTagVal(const TChA& TagStr, TChA& TagVal) {
01513   EAssertR(GetTag(TagStr) == xsySTag, TStr::Fmt("Expected '<%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr());
01514   EAssertR(GetSym(TagVal) == xsyStr, "Expected string tag.");
01515   EAssertR(GetTag(TagStr) == xsyETag, TStr::Fmt("Expected '</%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr());
01516 }
01517 
01518 TXmlLxSym TXmlParser::GetTag(const TChA& TagStr) {
01519   GetSym();
01520   EAssertR(TagStr==SymStr, TStr::Fmt("Expected xml symbol '%s'. Found '%s'",
01521     TagStr.CStr(), SymStr.CStr()).CStr());
01522   return Sym;
01523 }
01524 
01525 void TXmlParser::GetPlainStrFromXmlStr(const TChA& XmlStr, TChA& PlainChA) {
01526   static TChA EntityNm;
01527   PlainChA.Clr();
01528   const char *Ch = XmlStr.CStr();
01529   while (*Ch){
01530     if (*Ch!='&'){ PlainChA+=*Ch; Ch++; }
01531     else {
01532       if (*++Ch=='#'){
01533         TChA RefChA; int RefCd=0;
01534         if (*++Ch=='x'){
01535           forever {  Ch++;
01536             if (TCh::IsHex(*Ch)){ RefChA+=*Ch;  RefCd=RefCd*16+TCh::GetHex(*Ch); }
01537             else { break; } }
01538         } else { // decimal character code
01539           forever {
01540             if (TCh::IsNum(*Ch)){ RefChA+=*Ch; RefCd=RefCd*10+TCh::GetNum(*Ch); }
01541             else { break; } Ch++; }
01542         }
01543         if ((!RefChA.Empty())&&(*Ch==';')){
01544           Ch++;  const uchar RefCh=uchar(RefCd);  PlainChA+=RefCh; }
01545       } else {
01546         EntityNm.Clr();
01547         while ((*Ch)&&(*Ch!=';')){EntityNm+=*Ch; Ch++;}
01548         if ((!EntityNm.Empty())&&(*Ch==';')){  Ch++;
01549           if (EntityNm=="quot"){PlainChA+='"';}
01550           else if (EntityNm=="amp"){PlainChA+='&';}
01551           else if (EntityNm=="apos"){PlainChA+='\'';}
01552           else if (EntityNm=="lt"){PlainChA+='<';}
01553           else if (EntityNm=="gt"){PlainChA+='>';}
01554         }
01555       }
01556     }
01557   }
01558 }