SNAP Library 2.0, User Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Xml-Object-Saving 00003 TStrStrH TXmlObjSer::TypeNmToTagNmH; 00004 00005 TStr TXmlObjSer::GetTagNm(const TStr& TypeNm){ 00006 TStr& XmlTagNm=TypeNmToTagNmH.AddDat(TypeNm); 00007 if (XmlTagNm.Empty()){ 00008 TChA XmlTagChA=TypeNm; 00009 for (int ChN=0; ChN<XmlTagChA.Len(); ChN++){ 00010 char Ch=XmlTagChA[ChN]; 00011 if (!((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')))){ 00012 XmlTagChA.PutCh(ChN, '_'); 00013 } 00014 } 00015 while ((XmlTagChA.Len()>0)&&(XmlTagChA.LastCh()=='_')){ 00016 XmlTagChA.Pop();} 00017 XmlTagNm=XmlTagChA; 00018 } 00019 return XmlTagNm; 00020 } 00021 00022 void TXmlObjSer::AssertXmlHd( 00023 const PXmlTok& XmlTok, const TStr& Nm, const TStr& TypeNm){ 00024 // check if the token is full 00025 EAssertR(!XmlTok.Empty(), "Xml-Token Empty"); 00026 // if name is empty then tag=type else tag=name 00027 if (!Nm.Empty()){ 00028 // check if the token is tag 00029 if (!XmlTok->IsTag()){ 00030 TStr ArgStr1="Expected: Tag"; 00031 TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr(); 00032 TExcept::Throw("Invalid Xml-Token", ArgStr1, ArgStr2); 00033 } 00034 if (Nm!="-"){ 00035 // check if the tag is correct 00036 if (!XmlTok->IsTag(Nm)){ 00037 TStr ArgStr1=TStr("Expected: ")+Nm; 00038 TStr ArgStr2=TStr("Found: ")+XmlTok->GetStr(); 00039 TExcept::Throw("Invalid Xml-Tag", ArgStr1, ArgStr2); 00040 } 00041 // check if the type is correct 00042 TStr TypeArgVal=XmlTok->GetStrArgVal("Type"); 00043 if (TypeArgVal!=TypeNm){ 00044 TStr ArgStr1=TStr("Expected: ")+TypeNm; 00045 TStr ArgStr2=TStr("Found: ")+TypeArgVal; 00046 TExcept::Throw("Invalid Xml-Type", ArgStr1, ArgStr2); 00047 } 00048 } 00049 } else { 00050 // check if the tag is correct 00051 if (!XmlTok->IsTag(TypeNm)){ 00052 TStr ArgStr1=TStr("Expected: ")+TypeNm; 00053 TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr(); 00054 TExcept::Throw("Invalid Xml-Type-Tag", ArgStr1, ArgStr2); 00055 } 00056 } 00057 } 00058 00059 bool TXmlObjSer::GetBoolArg(const PXmlTok& XmlTok, const TStr& Nm){ 00060 TStr ValStr; 00061 if (XmlTok->IsArg(Nm, ValStr)){ 00062 bool Val; 00063 if (ValStr.IsBool(Val)){ 00064 return Val; 00065 } else { 00066 TExcept::Throw("Invalid Xml-Argument Boolean-Value", Nm, ValStr); 00067 } 00068 } else { 00069 TExcept::Throw("Xml-Argument Missing", Nm); 00070 } 00071 Fail; return 0; 00072 } 00073 00074 int TXmlObjSer::GetIntArg(const PXmlTok& XmlTok, const TStr& Nm){ 00075 TStr ValStr; 00076 if (XmlTok->IsArg(Nm, ValStr)){ 00077 int Val; 00078 if (ValStr.IsInt(Val)){ 00079 return Val; 00080 } else { 00081 TExcept::Throw("Invalid Xml-Argument Integer-Value", Nm, ValStr); 00082 } 00083 } else { 00084 TExcept::Throw("Xml-Argument Missing", Nm); 00085 } 00086 Fail; return 0; 00087 } 00088 00089 int64 TXmlObjSer::GetInt64Arg(const PXmlTok& XmlTok, const TStr& Nm){ 00090 TStr ValStr; 00091 if (XmlTok->IsArg(Nm, ValStr)){ 00092 int64 Val; 00093 if (ValStr.IsInt64(Val)){ 00094 return Val; 00095 } else { 00096 TExcept::Throw("Invalid Xml-Argument Integer64-Value", Nm, ValStr); 00097 } 00098 } else { 00099 TExcept::Throw("Xml-Argument Missing", Nm); 00100 } 00101 Fail; return 0; 00102 } 00103 00104 double TXmlObjSer::GetFltArg(const PXmlTok& XmlTok, const TStr& Nm){ 00105 TStr ValStr; 00106 if (XmlTok->IsArg(Nm, ValStr)){ 00107 double Val; 00108 if (ValStr.IsFlt(Val)){ 00109 return Val; 00110 } else { 00111 TExcept::Throw("Invalid Xml-Argument Double-Value", Nm, ValStr); 00112 } 00113 } else { 00114 TExcept::Throw("Xml-Argument Missing", Nm); 00115 } 00116 Fail; return 0; 00117 } 00118 00120 // Xml-Object-Serialization-Tag-Name 00121 TXmlObjSerTagNm::TXmlObjSerTagNm( 00122 TSOut& _SOut, const bool& ETagP, 00123 const TStr& Nm, const TStr& TypeNm, 00124 const TStr& ArgNm, const TStr& ArgVal): 00125 TagNm(), SOut(&_SOut){ 00126 if (Nm!="-"){ 00127 SOut->PutCh('<'); 00128 if (Nm.Empty()){ 00129 SOut->PutStr(TagNm=TypeNm); 00130 } else { 00131 SOut->PutStr(TagNm=Nm); 00132 SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"'); 00133 } 00134 if (!ArgNm.Empty()){ 00135 SOut->PutCh(' '); SOut->PutStr(ArgNm); SOut->PutCh('='); 00136 SOut->PutCh('"'); SOut->PutStr(ArgVal); SOut->PutCh('"'); 00137 } 00138 if (ETagP){ 00139 SOut->PutCh('/'); TagNm="";} 00140 SOut->PutCh('>'); 00141 } 00142 } 00143 00144 TXmlObjSerTagNm::TXmlObjSerTagNm( 00145 TSOut& _SOut, const bool& ETagP, 00146 const TStr& Nm, const TStr& TypeNm, 00147 const TStr& ArgNm1, const TStr& ArgVal1, 00148 const TStr& ArgNm2, const TStr& ArgVal2, 00149 const TStr& ArgNm3, const TStr& ArgVal3, 00150 const TStr& ArgNm4, const TStr& ArgVal4): 00151 TagNm(), SOut(&_SOut){ 00152 if (Nm!="-"){ 00153 SOut->PutCh('<'); 00154 if (Nm.Empty()){ 00155 SOut->PutStr(TagNm=TypeNm); 00156 } else { 00157 SOut->PutStr(TagNm=Nm); 00158 SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"'); 00159 } 00160 if (!ArgNm1.Empty()){ 00161 SOut->PutCh(' '); SOut->PutStr(ArgNm1); SOut->PutCh('='); 00162 SOut->PutCh('"'); SOut->PutStr(ArgVal1); SOut->PutCh('"'); 00163 } 00164 if (!ArgNm2.Empty()){ 00165 SOut->PutCh(' '); SOut->PutStr(ArgNm2); SOut->PutCh('='); 00166 SOut->PutCh('"'); SOut->PutStr(ArgVal2); SOut->PutCh('"'); 00167 } 00168 if (!ArgNm3.Empty()){ 00169 SOut->PutCh(' '); SOut->PutStr(ArgNm3); SOut->PutCh('='); 00170 SOut->PutCh('"'); SOut->PutStr(ArgVal3); SOut->PutCh('"'); 00171 } 00172 if (!ArgNm4.Empty()){ 00173 SOut->PutCh(' '); SOut->PutStr(ArgNm4); SOut->PutCh('='); 00174 SOut->PutCh('"'); SOut->PutStr(ArgVal4); SOut->PutCh('"'); 00175 } 00176 if (ETagP){ 00177 SOut->PutCh('/'); TagNm="";} 00178 SOut->PutCh('>'); 00179 } 00180 } 00181 00182 TXmlObjSerTagNm::~TXmlObjSerTagNm(){ 00183 if (!TagNm.Empty()){ 00184 SOut->PutCh('<'); SOut->PutCh('/'); SOut->PutStr(TagNm); SOut->PutCh('>'); 00185 } 00186 } 00187 00189 // Xml-Chars 00190 void TXmlChDef::SetChTy(TBSet& ChSet, const int& MnCh, const int& MxCh){ 00191 IAssert((0<=MnCh)&&((MxCh==-1)||((MnCh<=MxCh)&&(MxCh<Chs)))); 00192 ChSet.Incl(MnCh); 00193 for (int Ch=MnCh+1; Ch<=MxCh; Ch++){ 00194 ChSet.Incl(Ch);} 00195 } 00196 00197 void TXmlChDef::SetChTy(TBSet& ChSet, const TStr& Str){ 00198 for (int ChN=0; ChN<Str.Len(); ChN++){ 00199 uchar Ch=Str[ChN]; 00200 ChSet.Incl(Ch); 00201 } 00202 } 00203 00204 void TXmlChDef::SetEntityVal(const TStr& Nm, const TStr& Val){ 00205 EntityNmToValH.AddDat(Nm, Val); 00206 } 00207 00208 TXmlChDef::TXmlChDef(): 00209 Chs(TUCh::Vals), 00210 CharChSet(), CombChSet(), ExtChSet(), 00211 LetterChSet(), DigitChSet(), NameChSet(), PubidChSet(), 00212 EntityNmToValH(100){ 00213 00214 // Character-Sets 00215 // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | ... 00216 CharChSet.Gen(Chs); 00217 // ... because of DMoz (temporary patch) 00218 SetChTy(CharChSet, 0x1); SetChTy(CharChSet, 0x3); SetChTy(CharChSet, 0x6); 00219 SetChTy(CharChSet, 11); SetChTy(CharChSet, 24); SetChTy(CharChSet, 27); 00220 // regular characters 00221 SetChTy(CharChSet, 0x9); SetChTy(CharChSet, 0xA); SetChTy(CharChSet, 0xD); 00222 SetChTy(CharChSet, 0x20, TUCh::Mx); 00223 // BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | 00224 // [#x00D8-#x00F6] | [#x00F8-#x00FF] | ... 00225 TBSet BaseChSet(Chs); 00226 SetChTy(BaseChSet, 0x41, 0x5A); SetChTy(BaseChSet, 0x61, 0x7A); 00227 SetChTy(BaseChSet, 0xC0, 0xD6); SetChTy(BaseChSet, 0xD8, 0xF6); 00228 SetChTy(BaseChSet, 0xF8, 0xFF); 00229 // Ideographic ::= ... 00230 TBSet IdeoChSet(Chs); 00231 // CombiningChar ::= ... 00232 CombChSet.Gen(Chs); 00233 // Extender ::= #x00B7 | ... 00234 ExtChSet.Gen(Chs); 00235 SetChTy(ExtChSet, 0xB7); 00236 // Letter ::= BaseChar | Ideographic 00237 LetterChSet=BaseChSet|IdeoChSet; 00238 // Digit ::= [#x0030-#x0039] | ... 00239 DigitChSet.Gen(Chs); 00240 SetChTy(DigitChSet, 0x30, 0x39); 00241 // NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar 00242 NameChSet=LetterChSet|DigitChSet| 00243 uchar('.')|uchar('-')|uchar('_')|uchar(':')|CombChSet; 00244 // PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 00245 PubidChSet.Gen(Chs); 00246 SetChTy(PubidChSet, 0x20); SetChTy(PubidChSet, 0xD); SetChTy(PubidChSet, 0xA); 00247 SetChTy(PubidChSet, 'a', 'z'); SetChTy(PubidChSet, 'A', 'Z'); 00248 SetChTy(PubidChSet, '0', '9'); SetChTy(PubidChSet, "-'()+,./:=?;!*#@$_%"); 00249 00250 // Standard-Entity-Sequences 00251 SetEntityVal("amp", "&"); 00252 SetEntityVal("lt", "<"); SetEntityVal("gt", ">"); 00253 SetEntityVal("apos", "'"); SetEntityVal("quot", "\""); 00254 } 00255 00257 // Xml-Lexical 00258 TXmlChDef TXmlLx::ChDef; 00259 00260 uchar TXmlLx::GetCh(){ 00261 EAssert(Ch!=TCh::EofCh); 00262 PrevCh=Ch; 00263 if (ChStack.Empty()){Ch=(RSIn.Eof()) ? TCh::EofCh : RSIn.GetCh();} 00264 else {Ch=ChStack.Pop();} 00265 ChN++; if (Ch==TCh::LfCh){LnN++; LnChN=0;} else {LnChN++;} 00266 //putchar(Ch); 00267 return Ch; 00268 } 00269 00270 void TXmlLx::ToNrSpacing(){ 00271 if (Spacing==xspIntact){ 00272 } else 00273 if (Spacing==xspPreserve){ 00274 int SrcChN=0; int DstChN=0; 00275 while (SrcChN<TxtChA.Len()){ 00276 if (TxtChA[SrcChN]==TCh::CrCh){ 00277 TxtChA.PutCh(DstChN, TCh::LfCh); SrcChN++; DstChN++; 00278 if ((SrcChN<TxtChA.Len())&&(TxtChA[SrcChN]==TCh::LfCh)){SrcChN++;} 00279 } else { 00280 if (SrcChN!=DstChN){ 00281 TxtChA.PutCh(DstChN, TxtChA[SrcChN]);} 00282 SrcChN++; DstChN++; 00283 } 00284 } 00285 TxtChA.Trunc(DstChN); 00286 } else 00287 if (Spacing==xspSeparate){ 00288 // squeeze series of white-spaces to single space 00289 int SrcChN=0; int DstChN=0; 00290 while (SrcChN<TxtChA.Len()){ 00291 if (ChDef.IsWs(TxtChA[SrcChN])){ 00292 if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){ 00293 SrcChN++; 00294 } else { 00295 TxtChA.PutCh(DstChN, ' '); 00296 SrcChN++; DstChN++; 00297 } 00298 } else { 00299 TxtChA.PutCh(DstChN, TxtChA[SrcChN]); 00300 SrcChN++; DstChN++; 00301 } 00302 } 00303 TxtChA.Trunc(DstChN); 00304 } else 00305 if (Spacing==xspTruncate){ 00306 // cut leading and trailing white-spaces and 00307 // squeeze series of white-spaces to single space 00308 int SrcChN=0; int DstChN=0; 00309 while (SrcChN<TxtChA.Len()){ 00310 if (ChDef.IsWs(TxtChA[SrcChN])){ 00311 if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){ 00312 SrcChN++; 00313 } else { 00314 TxtChA.PutCh(DstChN, ' '); 00315 SrcChN++; DstChN++; 00316 } 00317 } else { 00318 TxtChA.PutCh(DstChN, TxtChA[SrcChN]); 00319 SrcChN++; DstChN++; 00320 } 00321 } 00322 TxtChA.Trunc(DstChN); 00323 // delete trailing white-spaces 00324 while ((TxtChA.Len()>0)&&(ChDef.IsWs(TxtChA.LastCh()))){ 00325 TxtChA.Pop();} 00326 } else { 00327 Fail; 00328 } 00329 } 00330 00331 void TXmlLx::GetWs(const bool& IsRq){ 00332 // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 00333 int WSpaces=0; TxtChA.Clr(); 00334 while (ChDef.IsWs(Ch)){ 00335 WSpaces++; TxtChA+=Ch; GetCh();} 00336 if (IsRq&&(WSpaces==0)){ 00337 EThrow("White-space required.");} 00338 } 00339 00340 TStr TXmlLx::GetReference(){ 00341 // [67] Reference ::= EntityRef | CharRef 00342 if (Ch=='#'){ 00343 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 00344 TChA RefChA; int RefCd=0; 00345 if (GetCh()=='x'){ 00346 // hex-decimal character code 00347 forever { 00348 GetCh(); 00349 if (TCh::IsHex(Ch)){ 00350 RefChA+=Ch; 00351 RefCd=RefCd*16+TCh::GetHex(Ch); 00352 } else { 00353 break; 00354 } 00355 } 00356 } else { 00357 // decimal character code 00358 forever { 00359 if (TCh::IsNum(Ch)){ 00360 RefChA+=Ch; 00361 RefCd=RefCd*10+TCh::GetNum(Ch); 00362 } else { 00363 break; 00364 } 00365 GetCh(); 00366 } 00367 } 00368 if ((!RefChA.Empty())&&(Ch==';')){ 00369 GetCh(); 00370 if (RefCd < 0x80) { 00371 // 8-bit char 00372 uchar RefCh=uchar(RefCd); 00373 return TStr(RefCh); 00374 } else { 00375 TStr ResStr = TUnicode::EncodeUtf8(RefCd); 00376 return ResStr; 00377 } 00378 } else { 00379 EThrow("Invalid Char-Reference."); Fail; return TStr(); 00380 } 00381 } else { 00382 // [68] EntityRef ::= '&' Name ';' 00383 TStr EntityNm=GetName(); 00384 if ((!EntityNm.Empty())&&(Ch==';')){ 00385 GetCh(); 00386 TStr EntityVal; 00387 if (IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/} 00388 else if (ChDef.IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/} 00389 else {EThrow(TStr("Entity-Reference (")+EntityNm+") does not exist.");} 00390 return EntityVal; 00391 } else { 00392 EThrow("Invalid Entity-Reference."); Fail; return TStr(); 00393 } 00394 } 00395 } 00396 00397 TStr TXmlLx::GetPEReference(){ 00398 // [69] PEReference ::= '%' Name ';' 00399 TStr EntityNm=GetName(); 00400 if ((EntityNm.Empty())||(Ch!=';')){EThrow("Invalid PEntity-Reference.");} 00401 GetCh(); 00402 TStr EntityVal; 00403 if (IsPEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/} 00404 else {EThrow(TStr("PEntity-Reference (")+EntityNm+") does not exist.");} 00405 return EntityVal; 00406 } 00407 00408 void TXmlLx::GetEq(){ 00409 // [25] Eq ::= S? '=' S? 00410 GetWs(false); 00411 if (Ch=='='){GetCh();} 00412 else {EThrow("Equality ('=') character expected.");} 00413 GetWs(false); 00414 } 00415 00416 TStr TXmlLx::GetName(){ 00417 // [5] Name ::= (Letter | '_' | ':') (NameChar)* 00418 TChA NmChA; 00419 if (ChDef.IsFirstNameCh(Ch)){ 00420 do {NmChA+=Ch;} while (ChDef.IsName(GetCh())); 00421 } else { 00422 EThrow("Invalid first name character."); 00423 // EThrow(TStr::Fmt("Invalid first name character [%u:'%c%c%c%c%c'].", 00424 // uint(Ch), Ch, RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh())); 00425 } 00426 return NmChA; 00427 } 00428 00429 TStr TXmlLx::GetName(const TStr& RqNm){ 00430 TStr Nm=GetName(); 00431 // test if the name is equal to the required name 00432 if (Nm==RqNm){return RqNm;} 00433 else {EThrow(TStr("Name '")+RqNm+"' expected."); Fail; return TStr();} 00434 } 00435 00436 void TXmlLx::GetComment(){ 00437 // [15] Comment ::= {{'<!-}}-' ((Char - '-') | ('-' (Char - '-')))* '-->' 00438 if (GetCh()!='-'){EThrow("Invalid comment start.");} 00439 TxtChA.Clr(); 00440 forever { 00441 GetCh(); 00442 if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");} 00443 if (Ch=='-'){ 00444 if (GetCh()=='-'){ 00445 if (GetCh()=='>'){GetCh(); break;} // final bracket 00446 else {EThrow("Invalid comment end.");} 00447 } else { 00448 if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");} 00449 TxtChA+='-'; TxtChA+=Ch; // special case if single '-' 00450 } 00451 } else { 00452 TxtChA+=Ch; // usual char 00453 } 00454 } 00455 } 00456 00457 TStr TXmlLx::GetAttValue(){ 00458 // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' 00459 // | "'" ([^<&'] | Reference)* "'" 00460 uchar QCh=Ch; 00461 if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid attribute-value start.");} 00462 TChA ValChA; GetCh(); 00463 forever { 00464 if ((Ch=='<')||(!ChDef.IsChar(Ch))){ 00465 EThrow("Invalid attribute-value character.");} 00466 if (Ch==QCh){GetCh(); break;} // final quote 00467 else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference 00468 else {ValChA+=Ch; GetCh();} // usual char 00469 } 00470 return ValChA; 00471 } 00472 00473 TStr TXmlLx::GetVersionNum(){ 00474 // [24] VersionInfo ::= {{S 'version' Eq}} (' VersionNum ' | " VersionNum ") 00475 // [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 00476 char QCh=Ch; 00477 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00478 TChA VerNumChA; 00479 GetCh(); 00480 do { 00481 if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))|| 00482 (('0'<=Ch)&&(Ch<='9'))||(Ch=='_')||(Ch=='.')||(Ch==':')||(Ch=='-')){ 00483 VerNumChA+=Ch; 00484 } else { 00485 EThrow("Invalid version-number character."); 00486 } 00487 GetCh(); 00488 } while (Ch!=QCh); 00489 GetCh(); 00490 return VerNumChA; 00491 } 00492 00493 TStr TXmlLx::GetEncName(){ 00494 // [80] EncodingDecl ::= {{S 'encoding' Eq}} ('"' EncName '"' | "'" EncName "'" ) 00495 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 00496 char QCh=Ch; 00497 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00498 TChA EncNmChA; 00499 GetCh(); 00500 if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){EncNmChA+=Ch;} 00501 else {EThrow("Invalid encoding-name character.");} 00502 GetCh(); 00503 while (Ch!=QCh){ 00504 if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))|| 00505 (('0'<=Ch)&&(Ch<='9'))||(Ch=='.')||(Ch=='_')||(Ch=='-')){EncNmChA+=Ch;} 00506 else {EThrow("Invalid version-number character.");} 00507 GetCh(); 00508 } 00509 GetCh(); 00510 return EncNmChA; 00511 } 00512 00513 TStr TXmlLx::GetStalVal(){ 00514 // [32] SDDecl ::= {{S 'standalone' Eq}} 00515 // (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) 00516 char QCh=Ch; 00517 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00518 TChA StalChA; 00519 GetCh(); 00520 while (Ch!=QCh){ 00521 if (('a'<=Ch)&&(Ch<='z')){StalChA+=Ch;} 00522 else {EThrow("Invalid standalone-value character.");} 00523 GetCh(); 00524 } 00525 GetCh(); 00526 TStr StalVal=StalChA; 00527 if ((StalVal=="yes")||(StalVal=="no")){return StalVal;} 00528 else {EThrow("Invalid standalone-value."); Fail; return TStr();} 00529 } 00530 00531 void TXmlLx::GetXmlDecl(){ 00532 // [23] XMLDecl ::= {{'<?xml'}}... VersionInfo EncodingDecl? SDDecl? S? '?>' 00533 // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 00534 GetWs(true); 00535 TStr VerNm=GetName("version"); GetEq(); TStr VerVal=GetVersionNum(); 00536 if (VerVal!="1.0"){EThrow("Invalid XML version.");} 00537 AddArg(VerNm, VerVal); 00538 GetWs(false); 00539 if (Ch!='?'){ 00540 // EncodingDecl ::= {{S}} 'encoding' Eq 00541 // ('"' EncName '"' | "'" EncName "'" ) 00542 TStr EncNm=GetName("encoding"); GetEq(); TStr EncVal=GetEncName(); 00543 AddArg(EncNm, EncVal); 00544 } 00545 GetWs(false); 00546 if (Ch!='?'){ 00547 // SDDecl ::= {{S}} 'standalone' Eq 00548 // (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) 00549 TStr StalNm=GetName("standalone"); GetEq(); TStr StalVal=GetStalVal(); 00550 AddArg(StalNm, StalVal); 00551 } 00552 GetWs(false); 00553 if (Ch=='?'){ 00554 GetCh(); 00555 if (Ch=='>'){GetCh();} 00556 else {EThrow("Invalid end-of-tag in XML-declaration.");} 00557 } else { 00558 EThrow("Invalid end-of-tag in XML-declaration."); 00559 } 00560 } 00561 00562 void TXmlLx::GetPI(){ 00563 // [16] PI ::= {{'<?' PITarget}} (S (Char* - (Char* '?>' Char*)))? '?>' 00564 // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 00565 GetWs(false); 00566 TxtChA.Clr(); 00567 forever { 00568 if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");} 00569 if (Ch=='?'){ 00570 if (GetCh()=='>'){ 00571 GetCh(); break; 00572 } else { 00573 if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");} 00574 TxtChA+='?'; TxtChA+=Ch; // special case if single '?' 00575 } 00576 } else { 00577 TxtChA+=Ch; // usual char 00578 } 00579 GetCh(); 00580 } 00581 } 00582 00583 TStr TXmlLx::GetSystemLiteral(){ 00584 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 00585 char QCh=Ch; 00586 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00587 TChA LitChA; GetCh(); 00588 while (Ch!=QCh){ 00589 if (!ChDef.IsChar(Ch)){EThrow("Invalid System-Literal character.");} 00590 LitChA+=Ch; GetCh(); 00591 } 00592 GetCh(); 00593 return LitChA; 00594 } 00595 00596 TStr TXmlLx::GetPubidLiteral(){ 00597 // [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 00598 char QCh=Ch; 00599 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00600 TChA LitChA; GetCh(); 00601 while (Ch!=QCh){ 00602 if (!ChDef.IsPubid(Ch)){EThrow("Invalid Public-Id-Literal character.");} 00603 LitChA+=Ch; GetCh(); 00604 } 00605 GetCh(); 00606 return LitChA; 00607 } 00608 00609 void TXmlLx::GetExternalId(){ 00610 // ExternalID ::= 'SYSTEM' S SystemLiteral 00611 // | 'PUBLIC' S PubidLiteral S SystemLiteral 00612 TStr ExtIdNm=GetName(); 00613 if (ExtIdNm=="SYSTEM"){ 00614 GetWs(true); GetSystemLiteral(); 00615 } else if (ExtIdNm=="PUBLIC"){ 00616 GetWs(true); GetPubidLiteral(); GetWs(true); GetSystemLiteral(); 00617 } else { 00618 EThrow("Invalid external-id ('SYSTEM' or 'PUBLIC' expected)."); 00619 } 00620 } 00621 00622 void TXmlLx::GetNData(){ 00623 // [76] NDataDecl ::= S 'NDATA' S Name 00624 GetName("NDATA"); GetWs(true); GetName(); 00625 } 00626 00627 void TXmlLx::GetDocTypeDecl(){ 00628 // [28] doctypedecl ::= {{'<!DOCTYPE'}} S Name (S ExternalID)? S? 00629 // ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 00630 GetWs(true); 00631 TStr DocTypeDeclNm=GetName(); 00632 GetWs(false); 00633 if (Ch=='>'){GetCh(); return;} 00634 if (Ch!='['){GetExternalId();} 00635 GetWs(false); 00636 if (Ch=='['){ 00637 GetCh(); 00638 // [28] (markupdecl | PEReference | S)* 00639 GetWs(false); 00640 while (Ch!=']'){ 00641 if (ChDef.IsWs(Ch)){GetWs(true);} 00642 else if (Ch=='%'){GetPEReference();} 00643 else { 00644 GetSym(); 00645 } 00646 } 00647 GetCh(); 00648 } 00649 GetWs(false); 00650 // '>' 00651 if (Ch=='>'){GetCh();} 00652 else {EThrow("Invalid end-of-tag in document-type-declaration.");} 00653 TagNm=DocTypeDeclNm; 00654 } 00655 00656 void TXmlLx::GetElement(){ 00657 TxtChA.Clr(); 00658 while (Ch!='>'){ 00659 if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");} 00660 TxtChA+=Ch; GetCh(); 00661 } 00662 GetCh(); 00663 } 00664 00665 void TXmlLx::GetAttList(){ 00666 TxtChA.Clr(); 00667 while (Ch!='>'){ 00668 if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");} 00669 TxtChA+=Ch; GetCh(); 00670 } 00671 GetCh(); 00672 } 00673 00674 TStr TXmlLx::GetEntityValue(){ 00675 // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' 00676 // | "'" ([^%&'] | PEReference | Reference)* "'" 00677 uchar QCh=Ch; 00678 if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid entity-value start.");} 00679 TChA ValChA; GetCh(); 00680 forever { 00681 if (!ChDef.IsChar(Ch)){EThrow("Invalid entity-value character.");} 00682 if (Ch==QCh){GetCh(); break;} // final quote 00683 else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference 00684 else if (Ch=='%'){GetCh(); ValChA+=GetPEReference();} // pereference 00685 else {ValChA+=Ch; GetCh();} // usual char 00686 } 00687 return ValChA; 00688 } 00689 00690 void TXmlLx::GetEntity(){ 00691 // [70] EntityDecl ::= GEDecl | PEDecl 00692 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 00693 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 00694 GetWs(true); TStr EntityNm; 00695 if (Ch=='%'){ 00696 GetCh(); GetWs(true); EntityNm=GetName(); GetWs(true); 00697 // [74] PEDef ::= EntityValue | ExternalID 00698 if ((Ch=='\"')||(Ch=='\'')){ 00699 TStr EntityVal=GetEntityValue(); 00700 PutPEntityVal(EntityNm, EntityVal); 00701 } else { 00702 GetExternalId(); 00703 GetWs(false); 00704 if (Ch!='>'){GetNData();} 00705 } 00706 } else { 00707 EntityNm=GetName(); GetWs(true); 00708 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 00709 if ((Ch=='\"')||(Ch=='\'')){ 00710 TStr EntityVal=GetEntityValue(); 00711 PutEntityVal(EntityNm, EntityVal); 00712 } else { 00713 GetExternalId(); 00714 } 00715 } 00716 GetWs(false); 00717 if (Ch=='>'){GetCh();} 00718 else {EThrow("Invalid end-of-tag in entity-declaration.");} 00719 TagNm=EntityNm; 00720 } 00721 00722 void TXmlLx::GetNotation(){ 00723 // [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 00724 // [83] PublicID ::= 'PUBLIC' S PubidLiteral 00725 TxtChA.Clr(); 00726 while (Ch!='>'){ 00727 if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");} 00728 TxtChA+=Ch; GetCh(); 00729 } 00730 GetCh(); 00731 } 00732 00733 void TXmlLx::GetCDSect(){ 00734 // [18] CDSect ::= CDStart CData CDEnd 00735 // [19] CDStart ::= '<![CDATA{{['}} 00736 // [20] CData ::= (Char* - (Char* ']]>' Char*)) 00737 // [21] CDEnd ::= ']]>' 00738 if (Ch=='['){GetCh();} 00739 else {EThrow("Invalid start of CDATA section.");} 00740 TxtChA.Clr(); 00741 forever { 00742 if (!ChDef.IsChar(Ch)){EThrow("Invalid CDATA character.");} 00743 if ((Ch=='>')&&(TxtChA.Len()>=2)&& 00744 (TxtChA.LastLastCh()==']') && (TxtChA.LastCh()==']')){ 00745 GetCh(); TxtChA.Pop(); TxtChA.Pop(); break; 00746 } else { 00747 TxtChA+=Ch; GetCh(); 00748 } 00749 } 00750 } 00751 00752 void TXmlLx::SkipWs(){ 00753 // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 00754 while (ChDef.IsWs(Ch)){GetCh();} 00755 } 00756 00757 TXmlLxSym TXmlLx::GetSym(){ 00758 if (Ch=='<'){ 00759 GetCh(); ClrArgV(); 00760 if (Ch=='?'){ 00761 GetCh(); TagNm=GetName(); 00762 if (TagNm.GetLc()=="xml"){Sym=xsyXmlDecl; GetXmlDecl();} 00763 else {Sym=xsyPI; GetPI();} 00764 } else 00765 if (Ch=='!'){ 00766 GetCh(); 00767 if (Ch=='['){ 00768 GetCh(); TagNm=GetName(); 00769 if (TagNm=="CDATA"){Sym=xsyQStr; GetCDSect();} 00770 else {EThrow(TStr("Invalid tag after '<![' (")+TagNm+").");} 00771 } else 00772 if (Ch=='-'){ 00773 Sym=xsyComment; GetComment(); 00774 } else { 00775 TagNm=GetName(); 00776 if (TagNm=="DOCTYPE"){GetDocTypeDecl(); Sym=xsyDocTypeDecl;} 00777 else if (TagNm=="ELEMENT"){GetElement(); Sym=xsyElement;} 00778 else if (TagNm=="ATTLIST"){GetAttList(); Sym=xsyAttList;} 00779 else if (TagNm=="ENTITY"){GetEntity(); Sym=xsyEntity;} 00780 else if (TagNm=="NOTATION"){GetNotation(); Sym=xsyNotation;} 00781 else {EThrow(TStr("Invalid tag (")+TagNm+").");} 00782 } 00783 } else 00784 if (Ch=='/'){ 00785 // xsyETag 00786 GetCh(); Sym=xsyETag; TagNm=GetName(); GetWs(false); 00787 if (Ch=='>'){GetCh();} 00788 else {EThrow("Invalid End-Tag.");} 00789 } else { 00790 // xsySTag or xsySETag 00791 TagNm=GetName(); GetWs(false); 00792 while ((Ch!='>')&&(Ch!='/')){ 00793 TStr AttrNm=GetName(); 00794 GetEq(); 00795 TStr AttrVal=GetAttValue(); 00796 GetWs(false); 00797 AddArg(AttrNm, AttrVal); 00798 } 00799 if (Ch=='/'){ 00800 if (GetCh()=='>'){Sym=xsySETag; GetCh();} 00801 else {EThrow("Invalid Empty-Element-Tag.");} 00802 } else { 00803 Sym=xsySTag; GetCh(); 00804 } 00805 } 00806 if (Spacing==xspTruncate){SkipWs();} 00807 } else 00808 if (ChDef.IsWs(Ch)){ 00809 Sym=xsyWs; GetWs(true); ToNrSpacing(); 00810 if (Spacing==xspTruncate){GetSym();} 00811 } else 00812 if (Ch==TCh::EofCh){ 00813 Sym=xsyEof; 00814 } else { 00815 Sym=xsyStr; TxtChA.Clr(); 00816 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 00817 forever { 00818 if (!ChDef.IsChar(Ch)){ 00819 EThrow(TUInt::GetStr(Ch, "Invalid character (%d)."));} 00820 // GetCh(); continue; // skip invalid characters 00821 if (Ch=='<'){break;} // tag 00822 if (Ch=='&'){GetCh(); TxtChA+=GetReference();} // reference 00823 else { 00824 if ((Ch=='>')&&(TxtChA.Len()>=2)&& 00825 (TxtChA.LastLastCh()==']')&&(TxtChA.LastCh()==']')){ 00826 EThrow("Forbidden substring ']]>' in character data.");} 00827 TxtChA+=Ch; GetCh(); // usual char 00828 } 00829 } 00830 ToNrSpacing(); 00831 } 00832 return Sym; 00833 } 00834 00835 TStr TXmlLx::GetSymStr() const { 00836 TChA SymChA; 00837 switch (Sym){ 00838 case xsyUndef: 00839 SymChA="{Undef}"; break; 00840 case xsyWs: 00841 SymChA+="{Space:'"; SymChA+=TStr(TxtChA).GetHex(); SymChA+="'}"; break; 00842 case xsyComment: 00843 SymChA+="<!--"; SymChA+=TxtChA; SymChA+="-->"; break; 00844 case xsyXmlDecl:{ 00845 SymChA+="<?"; SymChA+=TagNm; 00846 for (int ArgN=0; ArgN<GetArgs(); ArgN++){ 00847 TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal); 00848 char ArgValQCh=GetArgValQCh(ArgVal); 00849 SymChA+=' '; SymChA+=ArgNm; SymChA+='='; 00850 SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh; 00851 } 00852 SymChA+="?>"; break;} 00853 case xsyPI: 00854 SymChA+="<?"; SymChA+=TagNm; 00855 if (!TxtChA.Empty()){SymChA+=' '; SymChA+=TxtChA;} 00856 SymChA+="?>"; break; 00857 case xsyDocTypeDecl: 00858 SymChA+="<!DOCTYPE "; SymChA+=TagNm; SymChA+=">"; break; 00859 case xsySTag: 00860 case xsySETag:{ 00861 SymChA+="<"; SymChA+=TagNm; 00862 for (int ArgN=0; ArgN<GetArgs(); ArgN++){ 00863 TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal); 00864 char ArgValQCh=GetArgValQCh(ArgVal); 00865 SymChA+=' '; SymChA+=ArgNm; SymChA+='='; 00866 SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh; 00867 } 00868 if (Sym==xsySTag){SymChA+=">";} 00869 else if (Sym==xsySETag){SymChA+="/>";} 00870 else {Fail;} 00871 break;} 00872 case xsyETag: 00873 SymChA+="</"; SymChA+=TagNm; SymChA+=">"; break; 00874 case xsyStr: 00875 SymChA="{String:'"; SymChA+=TxtChA; SymChA+="'}"; break; 00876 case xsyQStr: 00877 SymChA="{QString:'"; SymChA+=TxtChA; SymChA+="'}"; break; 00878 case xsyEof: 00879 SymChA="{Eof}"; break; 00880 default: Fail; 00881 } 00882 return SymChA; 00883 } 00884 00885 void TXmlLx::EThrow(const TStr& MsgStr) const { 00886 TChA FPosChA; 00887 FPosChA+=" [File:"; FPosChA+=SIn->GetSNm(); 00888 FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN); 00889 FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN); 00890 FPosChA+="]"; 00891 TStr FullMsgStr=MsgStr+FPosChA; 00892 TExcept::Throw(FullMsgStr); 00893 } 00894 00895 TStr TXmlLx::GetFPosStr() const { 00896 TChA FPosChA; 00897 FPosChA+=" [File:"; FPosChA+=SIn->GetSNm(); 00898 FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN); 00899 FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN); 00900 FPosChA+="]"; 00901 return FPosChA; 00902 } 00903 00904 TStr TXmlLx::GetXmlLxSymStr(const TXmlLxSym& XmlLxSym){ 00905 switch (XmlLxSym){ 00906 case xsyUndef: return "Undef"; 00907 case xsyWs: return "White-Space"; 00908 case xsyComment: return "Comment"; 00909 case xsyXmlDecl: return "Declaration"; 00910 case xsyPI: return "PI"; 00911 case xsyDocTypeDecl: return "Document-Type"; 00912 case xsyElement: return "Element"; 00913 case xsyAttList: return "Attribute-List"; 00914 case xsyEntity: return "Entity"; 00915 case xsyNotation: return "Notation"; 00916 case xsyTag: return "Tag"; 00917 case xsySTag: return "Start-Tag"; 00918 case xsyETag: return "End-Tag"; 00919 case xsySETag: return "Start-End-Tag"; 00920 case xsyStr: return "String"; 00921 case xsyQStr: return "Quoted-String"; 00922 case xsyEof: return "Eon-Of-File"; 00923 default: return "Undef"; 00924 } 00925 } 00926 00927 bool TXmlLx::IsTagNm(const TStr& Str){ 00928 TChA ChA=Str; 00929 if (ChA.Len()>0){ 00930 if (TXmlLx::ChDef.IsFirstNameCh(ChA[0])){ 00931 for (int ChN=1; ChN<ChA.Len(); ChN++){ 00932 if (!TXmlLx::ChDef.IsName(ChA[ChN])){ 00933 return false; 00934 } 00935 } 00936 return true; 00937 } else { 00938 return false; 00939 } 00940 } else { 00941 return false; 00942 } 00943 } 00944 00945 TStr TXmlLx::GetXmlStrFromPlainMem(const TMem& PlainMem){ 00946 TChA XmlChA; 00947 for (int ChN=0; ChN<PlainMem.Len(); ChN++){ 00948 uchar Ch=PlainMem[ChN]; 00949 if ((' '<=Ch)&&(Ch<='~')){ 00950 switch (Ch){ 00951 case '"': XmlChA+="""; break; 00952 case '&': XmlChA+="&"; break; 00953 case '\'': XmlChA+="'"; break; 00954 case '<': XmlChA+="<"; break; 00955 case '>': XmlChA+=">"; break; 00956 default: XmlChA+=Ch; 00957 } 00958 } else 00959 if ((Ch=='\r')||(Ch=='\n')){ 00960 XmlChA+=Ch; 00961 } else { 00962 XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';'; 00963 } 00964 } 00965 return XmlChA; 00966 } 00967 00968 TStr TXmlLx::GetXmlStrFromPlainStr(const TChA& PlainChA){ 00969 TChA XmlChA; 00970 for (int ChN=0; ChN<PlainChA.Len(); ChN++){ 00971 uchar Ch=PlainChA[ChN]; 00972 if ((' '<=Ch)&&(Ch<='~')){ 00973 switch (Ch){ 00974 case '"': XmlChA+="""; break; 00975 case '&': XmlChA+="&"; break; 00976 case '\'': XmlChA+="'"; break; 00977 case '<': XmlChA+="<"; break; 00978 case '>': XmlChA+=">"; break; 00979 default: XmlChA+=Ch; 00980 } 00981 } else 00982 if ((Ch=='\r')||(Ch=='\n')){ 00983 XmlChA+=Ch; 00984 } else { 00985 XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';'; 00986 } 00987 } 00988 return XmlChA; 00989 } 00990 00991 TStr TXmlLx::GetPlainStrFromXmlStr(const TStr& XmlStr){ 00992 TChA PlainChA; 00993 TChRet Ch(TStrIn::New(XmlStr)); 00994 Ch.GetCh(); 00995 while (!Ch.Eof()){ 00996 if (Ch()!='&'){ 00997 PlainChA+=Ch(); Ch.GetCh(); 00998 } else { 00999 // [67] Reference ::= EntityRef | CharRef 01000 if (Ch.GetCh()=='#'){ 01001 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 01002 TChA RefChA; int RefCd=0; 01003 if (Ch.GetCh()=='x'){ 01004 // hex-decimal character code 01005 forever { 01006 Ch.GetCh(); 01007 if (TCh::IsHex(Ch())){ 01008 RefChA+=Ch(); 01009 RefCd=RefCd*16+TCh::GetHex(Ch()); 01010 } else { 01011 break; 01012 } 01013 } 01014 } else { 01015 // decimal character code 01016 forever { 01017 if (TCh::IsNum(Ch())){ 01018 RefChA+=Ch(); 01019 RefCd=RefCd*10+TCh::GetNum(Ch()); 01020 } else { 01021 break; 01022 } 01023 Ch.GetCh(); 01024 } 01025 } 01026 if ((!RefChA.Empty())&&(Ch()==';')){ 01027 Ch.GetCh(); 01028 if (RefCd < 0x80) { 01029 // ascii character 01030 uchar RefCh=uchar(RefCd); 01031 PlainChA+=RefCh; 01032 } else { 01033 // unicode 01034 TUnicode::EncodeUtf8(RefCd, PlainChA); 01035 } 01036 } 01037 } else { 01038 // [68] EntityRef ::= '&' Name ';' 01039 TChA EntityNm; 01040 while ((!Ch.Eof())&&(Ch()!=';')){ 01041 EntityNm+=Ch(); Ch.GetCh();} 01042 if ((!EntityNm.Empty())&&(Ch()==';')){ 01043 Ch.GetCh(); 01044 if (EntityNm=="quot"){PlainChA+='"';} 01045 else if (EntityNm=="amp"){PlainChA+='&';} 01046 else if (EntityNm=="apos"){PlainChA+='\'';} 01047 else if (EntityNm=="lt"){PlainChA+='<';} 01048 else if (EntityNm=="gt"){PlainChA+='>';} 01049 } 01050 } 01051 } 01052 } 01053 return PlainChA; 01054 } 01055 01056 TStr TXmlLx::GetUsAsciiStrFromXmlStr(const TStr& XmlStr){ 01057 TStr UsAsciiStr=XmlStr; 01058 UsAsciiStr.ChangeStrAll("è", "c"); 01059 UsAsciiStr.ChangeStrAll("È", "C"); 01060 UsAsciiStr.ChangeStrAll("š", "s"); 01061 UsAsciiStr.ChangeStrAll("Š", "S"); 01062 UsAsciiStr.ChangeStrAll("ž", "z"); 01063 UsAsciiStr.ChangeStrAll("Ž", "Z"); 01064 TChA UsAsciiChA=TXmlLx::GetPlainStrFromXmlStr(UsAsciiStr); 01065 for (int ChN=0; ChN<UsAsciiChA.Len(); ChN++){ 01066 char Ch=UsAsciiChA[ChN]; 01067 if ((Ch<' ')||('~'<Ch)){UsAsciiChA.PutCh(ChN, 'x');} 01068 } 01069 return UsAsciiChA; 01070 } 01071 01072 TStr TXmlLx::GetChRefFromYuEntRef(const TStr& YuEntRefStr){ 01073 TStr ChRefStr=YuEntRefStr; 01074 ChRefStr.ChangeStrAll("&ch;", "è"); 01075 ChRefStr.ChangeStrAll("&Ch;", "È"); 01076 ChRefStr.ChangeStrAll("&sh;", "š"); 01077 ChRefStr.ChangeStrAll("&Sh;", "Š"); 01078 ChRefStr.ChangeStrAll("&zh;", "ž"); 01079 ChRefStr.ChangeStrAll("&Zh;", "Ž"); 01080 ChRefStr.ChangeStrAll("&cs", "c"); 01081 ChRefStr.ChangeStrAll("&Cs;", "C"); 01082 ChRefStr.ChangeStrAll("&dz;", "dz"); 01083 ChRefStr.ChangeStrAll("&Dz;", "Dz"); 01084 return ChRefStr; 01085 } 01086 01088 // Xml-Token 01089 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, const bool& DfVal) const { 01090 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01091 return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TBool::TrueStr); 01092 } 01093 01094 bool TXmlTok::GetBoolArgVal( 01095 const TStr& ArgNm, const TStr& TrueVal, const bool& DfVal) const { 01096 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01097 return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TrueVal); 01098 } 01099 01100 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, 01101 const TStr& TrueVal, const TStr& FalseVal, const bool& DfVal) const { 01102 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01103 if (ArgN==-1){return DfVal;} 01104 TStr ArgVal=ArgNmValV[ArgN].Dat; 01105 if (ArgVal==TrueVal){return true;} 01106 IAssert(ArgVal == FalseVal); return false; 01107 } 01108 01109 int TXmlTok::GetIntArgVal(const TStr& ArgNm, const int& DfVal) const { 01110 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01111 if (ArgN==-1){ 01112 return DfVal; 01113 } else { 01114 int Val; 01115 if (ArgNmValV[ArgN].Dat.IsInt(Val)){return Val;} else {return DfVal;} 01116 } 01117 } 01118 01119 double TXmlTok::GetFltArgVal(const TStr& ArgNm, const double& DfVal) const { 01120 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01121 if (ArgN==-1){ 01122 return DfVal; 01123 } else { 01124 double Val; 01125 if (ArgNmValV[ArgN].Dat.IsFlt(Val)){return Val;} else {return DfVal;} 01126 } 01127 } 01128 01129 TStr TXmlTok::GetStrArgVal(const TStr& ArgNm, const TStr& DfVal) const { 01130 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01131 return (ArgN==-1) ? DfVal : ArgNmValV[ArgN].Dat; 01132 } 01133 01134 void TXmlTok::PutSubTok(const PXmlTok& Tok, const int& SubTokN){ 01135 if (SubTokN==-1){ 01136 ClrSubTok(); AddSubTok(Tok); 01137 } else { 01138 SubTokV[SubTokN]=Tok; 01139 } 01140 } 01141 01142 PXmlTok TXmlTok::GetTagTok(const TStr& TagPath) const { 01143 if (TagPath.Empty()){ 01144 return (TXmlTok*)this; 01145 } else { 01146 TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath); 01147 PXmlTok SubTok; 01148 for (int SubTokN=0; SubTokN<SubTokV.Len(); SubTokN++){ 01149 SubTok=SubTokV[SubTokN]; 01150 if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){break;} 01151 else {SubTok=NULL;} 01152 } 01153 if ((SubTok.Empty())||(RestTagPath.Empty())){return SubTok;} 01154 else {return SubTok->GetTagTok(RestTagPath);} 01155 } 01156 } 01157 01158 void TXmlTok::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const { 01159 XmlTokV.Clr(); 01160 TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm); 01161 PXmlTok Tok=GetTagTok(PreTagPath); 01162 if (!Tok.Empty()){ 01163 for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){ 01164 PXmlTok SubTok=Tok->GetSubTok(SubTokN); 01165 if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){ 01166 XmlTokV.Add(SubTok);} 01167 } 01168 } 01169 } 01170 01171 void TXmlTok::GetTagValV(const TStr& TagNm, const bool& XmlP, TStrV& ValV) const { 01172 if ((Sym==xsyTag)&&(Str==TagNm)){ 01173 ValV.Add(GetTokStr(XmlP)); 01174 } else { 01175 for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){ 01176 GetSubTok(SubTokN)->GetTagValV(TagNm, XmlP, ValV);} 01177 } 01178 } 01179 01180 TStr TXmlTok::GetTagVal(const TStr& TagNm, const bool& XmlP) const { 01181 TStrV ValV; GetTagValV(TagNm, XmlP, ValV); 01182 if (ValV.Len()>0){return ValV[0];} else {return "";} 01183 } 01184 01185 void TXmlTok::AddTokToChA(const bool& XmlP, TChA& ChA) const { 01186 switch (Sym){ 01187 case xsyWs: 01188 ChA+=Str; break; 01189 case xsyStr: 01190 if (XmlP){ChA+=TXmlLx::GetXmlStrFromPlainStr(Str);} else {ChA+=Str;} break; 01191 case xsyQStr: 01192 if (XmlP){ChA+="<![CDATA[";} 01193 ChA+=Str; 01194 if (XmlP){ChA+="]]>";} break; 01195 case xsyTag: 01196 if (XmlP){ 01197 ChA+='<'; ChA+=Str; 01198 for (int ArgN=0; ArgN<GetArgs(); ArgN++){ 01199 TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal); 01200 if (XmlP){ArgVal=TXmlLx::GetXmlStrFromPlainStr(ArgVal);} 01201 char ArgValQCh=TXmlLx::GetArgValQCh(ArgVal); 01202 ChA+=' '; ChA+=ArgNm; ChA+='='; 01203 ChA+=ArgValQCh; ChA+=ArgVal; ChA+=ArgValQCh; 01204 } 01205 } 01206 if (GetSubToks()==0){ 01207 if (XmlP){ChA+="/>";} 01208 } else { 01209 if (XmlP){ChA+=">";} 01210 for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){ 01211 GetSubTok(SubTokN)->AddTokToChA(XmlP, ChA);} 01212 if (XmlP){ChA+="</"; ChA+=Str; ChA+='>';} 01213 } 01214 break; 01215 default: Fail; 01216 } 01217 } 01218 01219 TStr TXmlTok::GetTokVStr(const TXmlTokV& TokV, const bool& XmlP){ 01220 TChA TokVChA; 01221 for (int TokN=0; TokN<TokV.Len(); TokN++){ 01222 if (TokN>0){TokVChA+=' ';} 01223 TokVChA+=TokV[TokN]->GetTokStr(XmlP); 01224 } 01225 return TokVChA; 01226 } 01227 01228 PXmlTok TXmlTok::GetTok(TXmlLx& Lx){ 01229 switch (Lx.Sym){ 01230 case xsyWs: 01231 case xsyStr: 01232 case xsyQStr: 01233 return TXmlTok::New(Lx.Sym, Lx.TxtChA); 01234 case xsySTag: 01235 case xsySETag: 01236 return TXmlTok::New(xsyTag, Lx.TagNm, Lx.ArgNmValKdV); 01237 default: Fail; return NULL; 01238 } 01239 } 01240 01242 // Xml-Document 01243 void TXmlDoc::LoadTxtMiscStar(TXmlLx& Lx){ 01244 // [27] Misc ::= Comment | PI | S 01245 while ((Lx.Sym==xsyComment)||(Lx.Sym==xsyPI)||(Lx.Sym==xsyWs)){ 01246 Lx.GetSym();} 01247 } 01248 01249 PXmlTok TXmlDoc::LoadTxtElement(TXmlLx& Lx){ 01250 // [39] element ::= EmptyElemTag | STag content ETag 01251 PXmlTok Tok; 01252 if (Lx.Sym==xsySETag){ 01253 Tok=TXmlTok::GetTok(Lx); 01254 } else 01255 if (Lx.Sym==xsySTag){ 01256 Tok=TXmlTok::GetTok(Lx); 01257 forever { 01258 Lx.GetSym(); 01259 if (Lx.Sym==xsyETag){ 01260 if (Tok->GetStr()==Lx.TagNm){ 01261 break; 01262 } else { 01263 TStr MsgStr=TStr("Invalid End-Tag '")+Lx.TagNm+ 01264 "' ('"+Tok->GetStr()+"' expected)."; 01265 Lx.EThrow(MsgStr); 01266 } 01267 } else { 01268 PXmlTok SubTok; 01269 switch (Lx.Sym){ 01270 case xsySTag: 01271 SubTok=LoadTxtElement(Lx); break; 01272 case xsySETag: 01273 case xsyStr: 01274 case xsyQStr: 01275 case xsyWs: 01276 SubTok=TXmlTok::GetTok(Lx); break; 01277 case xsyPI: 01278 case xsyComment: 01279 break; 01280 default: Lx.EThrow("Content or End-Tag expected."); 01281 } 01282 if (!SubTok.Empty()){ 01283 Tok->AddSubTok(SubTok);} 01284 } 01285 } 01286 } else 01287 if (Lx.Sym==xsyETag){ 01288 TStr MsgStr= 01289 TStr("Xml-Element (Start-Tag or Empty-Element-Tag) required.")+ 01290 TStr::GetStr(Lx.TagNm, " End-Tag </%s> encountered."); 01291 Lx.EThrow(MsgStr); 01292 } else { 01293 Lx.EThrow("Xml-Element (Start-Tag or Empty-Element-Tag) required."); 01294 } 01295 return Tok; 01296 } 01297 01298 PXmlTok TXmlDoc::GetTagTok(const TStr& TagPath) const { 01299 if (TagPath.Empty()){ 01300 return Tok; 01301 } else { 01302 TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath); 01303 if ((Tok->GetSym()==xsyTag)&&(Tok->GetStr()==TagNm)){ 01304 if (RestTagPath.Empty()){return Tok;} 01305 else {return Tok->GetTagTok(RestTagPath);} 01306 } else { 01307 return NULL; 01308 } 01309 } 01310 } 01311 01312 void TXmlDoc::PutTagTokStr(const TStr& TagPath, const TStr& TokStr) const { 01313 PXmlTok Tok=GetTagTok(TagPath); 01314 Tok->ClrSubTok(); 01315 PXmlTok StrTok=TXmlTok::New(xsyStr, TokStr); 01316 Tok->AddSubTok(StrTok); 01317 } 01318 01319 void TXmlDoc::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const { 01320 XmlTokV.Clr(); 01321 TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm); 01322 PXmlTok Tok=GetTagTok(PreTagPath); 01323 if (!Tok.Empty()){ 01324 for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){ 01325 PXmlTok SubTok=Tok->GetSubTok(SubTokN); 01326 if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){ 01327 XmlTokV.Add(SubTok);} 01328 } 01329 } 01330 } 01331 01332 bool TXmlDoc::GetTagTokBoolArgVal( 01333 const TStr& TagPath, const TStr& ArgNm, const bool& DfVal) const { 01334 PXmlTok TagTok; 01335 if (IsTagTok(TagPath, TagTok)){ 01336 return TagTok->GetBoolArgVal(ArgNm, DfVal);} 01337 else {return DfVal;} 01338 } 01339 01340 int TXmlDoc::GetTagTokIntArgVal( 01341 const TStr& TagPath, const TStr& ArgNm, const int& DfVal) const { 01342 PXmlTok TagTok; 01343 if (IsTagTok(TagPath, TagTok)){ 01344 return TagTok->GetIntArgVal(ArgNm, DfVal);} 01345 else {return DfVal;} 01346 } 01347 01348 double TXmlDoc::GetTagTokFltArgVal( 01349 const TStr& TagPath, const TStr& ArgNm, const double& DfVal) const { 01350 PXmlTok TagTok; 01351 if (IsTagTok(TagPath, TagTok)){ 01352 return TagTok->GetFltArgVal(ArgNm, DfVal);} 01353 else {return DfVal;} 01354 } 01355 01356 TStr TXmlDoc::GetTagTokStrArgVal( 01357 const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal) const { 01358 PXmlTok TagTok; 01359 if (IsTagTok(TagPath, TagTok)){ 01360 return TagTok->GetStrArgVal(ArgNm, DfVal);} 01361 else {return DfVal;} 01362 } 01363 01364 TStr TXmlDoc::GetXmlStr(const TStr& Str){ 01365 TChA ChA=Str; 01366 TChA XmlChA; 01367 for (int ChN=0; ChN<ChA.Len(); ChN++){ 01368 uchar Ch=ChA[ChN]; 01369 if ((' '<=Ch)&&(Ch<='~')){ 01370 if (Ch=='&'){XmlChA+="&";} 01371 else if (Ch=='>'){XmlChA+="<";} 01372 else if (Ch=='<'){XmlChA+=">";} 01373 else if (Ch=='\''){XmlChA+="'";} 01374 else if (Ch=='\"'){XmlChA+=""";} 01375 else {XmlChA+=Ch;} 01376 } else { 01377 XmlChA+="&#"; XmlChA+=TUInt::GetStr(Ch); XmlChA+=";"; 01378 } 01379 } 01380 return XmlChA; 01381 } 01382 01383 bool TXmlDoc::SkipTopTag(const PSIn& SIn){ 01384 bool Ok=true; 01385 TXmlLx Lx(SIn, xspIntact); 01386 try { 01387 Lx.GetSym(); 01388 // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 01389 if (Lx.Sym==xsyXmlDecl){Lx.GetSym();} 01390 LoadTxtMiscStar(Lx); 01391 if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();} 01392 LoadTxtMiscStar(Lx); 01393 Ok=true; 01394 } 01395 catch (PExcept Except){ 01396 Ok=false; 01397 } 01398 return Ok; 01399 } 01400 01401 PXmlDoc TXmlDoc::LoadTxt(TXmlLx& Lx){ 01402 PXmlDoc Doc=TXmlDoc::New(); 01403 // [1] document ::= prolog element Misc* 01404 try { 01405 Lx.GetSym(); 01406 // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 01407 if (Lx.Sym==xsyXmlDecl){Lx.GetSym();} 01408 LoadTxtMiscStar(Lx); 01409 if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();} 01410 LoadTxtMiscStar(Lx); 01411 Doc->Tok=LoadTxtElement(Lx); 01412 LoadTxtMiscStar(Lx); 01413 Doc->Ok=true; Doc->MsgStr="Ok"; 01414 } 01415 catch (PExcept& Except){ 01416 Doc->Ok=false; Doc->MsgStr=Except->GetMsgStr(); 01417 } 01418 return Doc; 01419 } 01420 01421 PXmlDoc TXmlDoc::LoadTxt(const PSIn& SIn, const TXmlSpacing& Spacing){ 01422 TXmlLx Lx(SIn, Spacing); return LoadTxt(Lx); 01423 } 01424 01425 PXmlDoc TXmlDoc::LoadTxt(const TStr& FNm, const TXmlSpacing& Spacing){ 01426 PSIn SIn=TFIn::New(FNm); return LoadTxt(SIn, Spacing); 01427 } 01428 01429 void TXmlDoc::LoadTxt( 01430 const TStr& FNm, TXmlDocV& XmlDocV, const TXmlSpacing& Spacing){ 01431 XmlDocV.Clr(); 01432 PSIn SIn=TFIn::New(FNm); 01433 TXmlLx Lx(SIn, Spacing); 01434 PXmlDoc XmlDoc; 01435 forever { 01436 Lx.SkipWs(); 01437 XmlDoc=LoadTxt(Lx); 01438 if (XmlDoc->IsOk()){XmlDocV.Add(XmlDoc);} 01439 else {break;} 01440 } 01441 } 01442 01443 PXmlDoc TXmlDoc::LoadStr(const TStr& Str){ 01444 PSIn SIn=TStrIn::New(Str); 01445 return LoadTxt(SIn); 01446 } 01447 01448 void TXmlDoc::SaveStr(TStr& Str){ 01449 PSOut SOut=TMOut::New(); TMOut& MOut=*(TMOut*)SOut(); 01450 SaveTxt(SOut); 01451 Str=MOut.GetAsStr(); 01452 } 01453 01455 // Fast and dirty XML parser 01456 // very basic it does only <item>string</item>, no comments, no arguments 01457 TXmlLxSym TXmlParser::GetSym() { 01458 if (NextSym != xsyUndef) { 01459 Sym = NextSym; NextSym=xsyUndef; 01460 SymStr=NextSymStr; NextSymStr.Clr(); 01461 return Sym; 01462 } 01463 SymStr.Clr(); 01464 char Ch; 01465 while (TCh::IsWs(Ch=GetCh())) { } 01466 if (Ch == TCh::EofCh) { Sym = xsyEof; return xsyEof; } 01467 if (Ch == '<') { // load tag 01468 Ch = GetCh(); 01469 if (Ch == '/') { Sym = xsyETag; } 01470 else { Sym = xsySTag; SymStr.Push(Ch); } 01471 while((Ch=GetCh())!='>' && Ch!=TCh::EofCh) { SymStr.Push(Ch); } 01472 const int StrLen = SymStr.Len(); 01473 if (StrLen > 1 && SymStr[StrLen-1] == '/') { 01474 Sym = xsyETag; SymStr[StrLen-1] = 0; 01475 for (char *c = SymStr.CStr()+StrLen-2; TCh::IsWs(*c); c--) { *c=0; } 01476 } 01477 } else { // load string 01478 _SymStr.Clr(); _SymStr.Push(Ch); 01479 while (! RSIn.Eof() && RSIn.PeekCh() != '<') { _SymStr.Push(GetCh()); } 01480 GetPlainStrFromXmlStr(_SymStr, SymStr); 01481 Sym = xsyStr; 01482 } 01483 if (Ch == TCh::EofCh) { SymStr.Clr(); Sym = xsyEof; return xsyEof; } 01484 return Sym; 01485 } 01486 01487 TXmlLxSym TXmlParser::GetSym(TChA& _SymStr) { 01488 GetSym(); 01489 _SymStr = SymStr; 01490 return Sym; 01491 } 01492 01493 TXmlLxSym TXmlParser::PeekSym() { 01494 if (NextSym == xsyUndef) { 01495 const TXmlLxSym TmpSim=Sym; 01496 const TChA TmpSymStr=SymStr; 01497 NextSym=GetSym(NextSymStr); 01498 Sym=TmpSim; 01499 SymStr=TmpSymStr; 01500 } 01501 return NextSym; 01502 } 01503 01504 TXmlLxSym TXmlParser::PeekSym(TChA& _SymStr) { 01505 PeekSym(); 01506 _SymStr = NextSymStr; 01507 return NextSym; 01508 } 01509 01510 void TXmlParser::SkipTillTag(const TChA& _SymStr) { 01511 while(PeekSym() != xsyEof) { 01512 if (NextSymStr == _SymStr) { return; } 01513 GetSym(); 01514 } 01515 } 01516 01517 // get <tag>value</tag> 01518 void TXmlParser::GetTagVal(const TChA& TagStr, TChA& TagVal) { 01519 EAssertR(GetTag(TagStr) == xsySTag, TStr::Fmt("Expected '<%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr()); 01520 EAssertR(GetSym(TagVal) == xsyStr, "Expected string tag."); 01521 EAssertR(GetTag(TagStr) == xsyETag, TStr::Fmt("Expected '</%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr()); 01522 } 01523 01524 TXmlLxSym TXmlParser::GetTag(const TChA& TagStr) { 01525 GetSym(); 01526 EAssertR(TagStr==SymStr, TStr::Fmt("Expected xml symbol '%s'. Found '%s'", 01527 TagStr.CStr(), SymStr.CStr()).CStr()); 01528 return Sym; 01529 } 01530 01531 void TXmlParser::GetPlainStrFromXmlStr(const TChA& XmlStr, TChA& PlainChA) { 01532 static TChA EntityNm; 01533 PlainChA.Clr(); 01534 const char *Ch = XmlStr.CStr(); 01535 while (*Ch){ 01536 if (*Ch!='&'){ PlainChA+=*Ch; Ch++; } 01537 else { 01538 if (*++Ch=='#'){ 01539 TChA RefChA; int RefCd=0; 01540 if (*++Ch=='x'){ 01541 forever { Ch++; 01542 if (TCh::IsHex(*Ch)){ RefChA+=*Ch; RefCd=RefCd*16+TCh::GetHex(*Ch); } 01543 else { break; } } 01544 } else { // decimal character code 01545 forever { 01546 if (TCh::IsNum(*Ch)){ RefChA+=*Ch; RefCd=RefCd*10+TCh::GetNum(*Ch); } 01547 else { break; } Ch++; } 01548 } 01549 if ((!RefChA.Empty())&&(*Ch==';')){ 01550 Ch++; const uchar RefCh=uchar(RefCd); PlainChA+=RefCh; } 01551 } else { 01552 EntityNm.Clr(); 01553 while ((*Ch)&&(*Ch!=';')){EntityNm+=*Ch; Ch++;} 01554 if ((!EntityNm.Empty())&&(*Ch==';')){ Ch++; 01555 if (EntityNm=="quot"){PlainChA+='"';} 01556 else if (EntityNm=="amp"){PlainChA+='&';} 01557 else if (EntityNm=="apos"){PlainChA+='\'';} 01558 else if (EntityNm=="lt"){PlainChA+='<';} 01559 else if (EntityNm=="gt"){PlainChA+='>';} 01560 } 01561 } 01562 } 01563 } 01564 }