SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Xml-Object-Saving 00003 TStrStrH TXmlObjSer::TypeNmToTagNmH; 00004 00005 TStr TXmlObjSer::GetTagNm(const TStr& TypeNm){ 00006 TStr& XmlTagNm=TypeNmToTagNmH.AddDat(TypeNm); 00007 if (XmlTagNm.Empty()){ 00008 TChA XmlTagChA=TypeNm; 00009 for (int ChN=0; ChN<XmlTagChA.Len(); ChN++){ 00010 char Ch=XmlTagChA[ChN]; 00011 if (!((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')))){ 00012 XmlTagChA.PutCh(ChN, '_'); 00013 } 00014 } 00015 while ((XmlTagChA.Len()>0)&&(XmlTagChA.LastCh()=='_')){ 00016 XmlTagChA.Pop();} 00017 XmlTagNm=XmlTagChA; 00018 } 00019 return XmlTagNm; 00020 } 00021 00022 void TXmlObjSer::AssertXmlHd( 00023 const PXmlTok& XmlTok, const TStr& Nm, const TStr& TypeNm){ 00024 // check if the token is full 00025 EAssertR(!XmlTok.Empty(), "Xml-Token Empty"); 00026 // if name is empty then tag=type else tag=name 00027 if (!Nm.Empty()){ 00028 // check if the token is tag 00029 if (!XmlTok->IsTag()){ 00030 TStr ArgStr1="Expected: Tag"; 00031 TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr(); 00032 TExcept::Throw("Invalid Xml-Token", ArgStr1, ArgStr2); 00033 } 00034 if (Nm!="-"){ 00035 // check if the tag is correct 00036 if (!XmlTok->IsTag(Nm)){ 00037 TStr ArgStr1=TStr("Expected: ")+Nm; 00038 TStr ArgStr2=TStr("Found: ")+XmlTok->GetStr(); 00039 TExcept::Throw("Invalid Xml-Tag", ArgStr1, ArgStr2); 00040 } 00041 // check if the type is correct 00042 TStr TypeArgVal=XmlTok->GetStrArgVal("Type"); 00043 if (TypeArgVal!=TypeNm){ 00044 TStr ArgStr1=TStr("Expected: ")+TypeNm; 00045 TStr ArgStr2=TStr("Found: ")+TypeArgVal; 00046 TExcept::Throw("Invalid Xml-Type", ArgStr1, ArgStr2); 00047 } 00048 } 00049 } else { 00050 // check if the tag is correct 00051 if (!XmlTok->IsTag(TypeNm)){ 00052 TStr ArgStr1=TStr("Expected: ")+TypeNm; 00053 TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr(); 00054 TExcept::Throw("Invalid Xml-Type-Tag", ArgStr1, ArgStr2); 00055 } 00056 } 00057 } 00058 00059 bool TXmlObjSer::GetBoolArg(const PXmlTok& XmlTok, const TStr& Nm){ 00060 TStr ValStr; 00061 if (XmlTok->IsArg(Nm, ValStr)){ 00062 bool Val; 00063 if (ValStr.IsBool(Val)){ 00064 return Val; 00065 } else { 00066 TExcept::Throw("Invalid Xml-Argument Boolean-Value", Nm, ValStr); 00067 } 00068 } else { 00069 TExcept::Throw("Xml-Argument Missing", Nm); 00070 } 00071 Fail; return 0; 00072 } 00073 00074 int TXmlObjSer::GetIntArg(const PXmlTok& XmlTok, const TStr& Nm){ 00075 TStr ValStr; 00076 if (XmlTok->IsArg(Nm, ValStr)){ 00077 int Val; 00078 if (ValStr.IsInt(Val)){ 00079 return Val; 00080 } else { 00081 TExcept::Throw("Invalid Xml-Argument Integer-Value", Nm, ValStr); 00082 } 00083 } else { 00084 TExcept::Throw("Xml-Argument Missing", Nm); 00085 } 00086 Fail; return 0; 00087 } 00088 00089 int64 TXmlObjSer::GetInt64Arg(const PXmlTok& XmlTok, const TStr& Nm){ 00090 TStr ValStr; 00091 if (XmlTok->IsArg(Nm, ValStr)){ 00092 int64 Val; 00093 if (ValStr.IsInt64(Val)){ 00094 return Val; 00095 } else { 00096 TExcept::Throw("Invalid Xml-Argument Integer64-Value", Nm, ValStr); 00097 } 00098 } else { 00099 TExcept::Throw("Xml-Argument Missing", Nm); 00100 } 00101 Fail; return 0; 00102 } 00103 00104 double TXmlObjSer::GetFltArg(const PXmlTok& XmlTok, const TStr& Nm){ 00105 TStr ValStr; 00106 if (XmlTok->IsArg(Nm, ValStr)){ 00107 double Val; 00108 if (ValStr.IsFlt(Val)){ 00109 return Val; 00110 } else { 00111 TExcept::Throw("Invalid Xml-Argument Double-Value", Nm, ValStr); 00112 } 00113 } else { 00114 TExcept::Throw("Xml-Argument Missing", Nm); 00115 } 00116 Fail; return 0; 00117 } 00118 00120 // Xml-Object-Serialization-Tag-Name 00121 TXmlObjSerTagNm::TXmlObjSerTagNm( 00122 TSOut& _SOut, const bool& ETagP, 00123 const TStr& Nm, const TStr& TypeNm, 00124 const TStr& ArgNm, const TStr& ArgVal): 00125 TagNm(), SOut(&_SOut){ 00126 if (Nm!="-"){ 00127 SOut->PutCh('<'); 00128 if (Nm.Empty()){ 00129 SOut->PutStr(TagNm=TypeNm); 00130 } else { 00131 SOut->PutStr(TagNm=Nm); 00132 SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"'); 00133 } 00134 if (!ArgNm.Empty()){ 00135 SOut->PutCh(' '); SOut->PutStr(ArgNm); SOut->PutCh('='); 00136 SOut->PutCh('"'); SOut->PutStr(ArgVal); SOut->PutCh('"'); 00137 } 00138 if (ETagP){ 00139 SOut->PutCh('/'); TagNm="";} 00140 SOut->PutCh('>'); 00141 } 00142 } 00143 00144 TXmlObjSerTagNm::TXmlObjSerTagNm( 00145 TSOut& _SOut, const bool& ETagP, 00146 const TStr& Nm, const TStr& TypeNm, 00147 const TStr& ArgNm1, const TStr& ArgVal1, 00148 const TStr& ArgNm2, const TStr& ArgVal2, 00149 const TStr& ArgNm3, const TStr& ArgVal3, 00150 const TStr& ArgNm4, const TStr& ArgVal4): 00151 TagNm(), SOut(&_SOut){ 00152 if (Nm!="-"){ 00153 SOut->PutCh('<'); 00154 if (Nm.Empty()){ 00155 SOut->PutStr(TagNm=TypeNm); 00156 } else { 00157 SOut->PutStr(TagNm=Nm); 00158 SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"'); 00159 } 00160 if (!ArgNm1.Empty()){ 00161 SOut->PutCh(' '); SOut->PutStr(ArgNm1); SOut->PutCh('='); 00162 SOut->PutCh('"'); SOut->PutStr(ArgVal1); SOut->PutCh('"'); 00163 } 00164 if (!ArgNm2.Empty()){ 00165 SOut->PutCh(' '); SOut->PutStr(ArgNm2); SOut->PutCh('='); 00166 SOut->PutCh('"'); SOut->PutStr(ArgVal2); SOut->PutCh('"'); 00167 } 00168 if (!ArgNm3.Empty()){ 00169 SOut->PutCh(' '); SOut->PutStr(ArgNm3); SOut->PutCh('='); 00170 SOut->PutCh('"'); SOut->PutStr(ArgVal3); SOut->PutCh('"'); 00171 } 00172 if (!ArgNm4.Empty()){ 00173 SOut->PutCh(' '); SOut->PutStr(ArgNm4); SOut->PutCh('='); 00174 SOut->PutCh('"'); SOut->PutStr(ArgVal4); SOut->PutCh('"'); 00175 } 00176 if (ETagP){ 00177 SOut->PutCh('/'); TagNm="";} 00178 SOut->PutCh('>'); 00179 } 00180 } 00181 00182 TXmlObjSerTagNm::~TXmlObjSerTagNm(){ 00183 if (!TagNm.Empty()){ 00184 SOut->PutCh('<'); SOut->PutCh('/'); SOut->PutStr(TagNm); SOut->PutCh('>'); 00185 } 00186 } 00187 00189 // Xml-Chars 00190 void TXmlChDef::SetChTy(TBSet& ChSet, const int& MnCh, const int& MxCh){ 00191 IAssert((0<=MnCh)&&((MxCh==-1)||((MnCh<=MxCh)&&(MxCh<Chs)))); 00192 ChSet.Incl(MnCh); 00193 for (int Ch=MnCh+1; Ch<=MxCh; Ch++){ 00194 ChSet.Incl(Ch);} 00195 } 00196 00197 void TXmlChDef::SetChTy(TBSet& ChSet, const TStr& Str){ 00198 for (int ChN=0; ChN<Str.Len(); ChN++){ 00199 uchar Ch=Str[ChN]; 00200 ChSet.Incl(Ch); 00201 } 00202 } 00203 00204 void TXmlChDef::SetEntityVal(const TStr& Nm, const TStr& Val){ 00205 EntityNmToValH.AddDat(Nm, Val); 00206 } 00207 00208 TXmlChDef::TXmlChDef(): 00209 Chs(TUCh::Vals), 00210 CharChSet(), CombChSet(), ExtChSet(), 00211 LetterChSet(), DigitChSet(), NameChSet(), PubidChSet(), 00212 EntityNmToValH(100){ 00213 00214 // Character-Sets 00215 // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | ... 00216 CharChSet.Gen(Chs); 00217 // ... because of DMoz (temporary patch) 00218 SetChTy(CharChSet, 0x1); SetChTy(CharChSet, 0x3); SetChTy(CharChSet, 0x6); 00219 SetChTy(CharChSet, 11); SetChTy(CharChSet, 24); SetChTy(CharChSet, 27); 00220 // regular characters 00221 SetChTy(CharChSet, 0x9); SetChTy(CharChSet, 0xA); SetChTy(CharChSet, 0xD); 00222 SetChTy(CharChSet, 0x20, TUCh::Mx); 00223 // BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | 00224 // [#x00D8-#x00F6] | [#x00F8-#x00FF] | ... 00225 TBSet BaseChSet(Chs); 00226 SetChTy(BaseChSet, 0x41, 0x5A); SetChTy(BaseChSet, 0x61, 0x7A); 00227 SetChTy(BaseChSet, 0xC0, 0xD6); SetChTy(BaseChSet, 0xD8, 0xF6); 00228 SetChTy(BaseChSet, 0xF8, 0xFF); 00229 // Ideographic ::= ... 00230 TBSet IdeoChSet(Chs); 00231 // CombiningChar ::= ... 00232 CombChSet.Gen(Chs); 00233 // Extender ::= #x00B7 | ... 00234 ExtChSet.Gen(Chs); 00235 SetChTy(ExtChSet, 0xB7); 00236 // Letter ::= BaseChar | Ideographic 00237 LetterChSet=BaseChSet|IdeoChSet; 00238 // Digit ::= [#x0030-#x0039] | ... 00239 DigitChSet.Gen(Chs); 00240 SetChTy(DigitChSet, 0x30, 0x39); 00241 // NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar 00242 NameChSet=LetterChSet|DigitChSet| 00243 uchar('.')|uchar('-')|uchar('_')|uchar(':')|CombChSet; 00244 // PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 00245 PubidChSet.Gen(Chs); 00246 SetChTy(PubidChSet, 0x20); SetChTy(PubidChSet, 0xD); SetChTy(PubidChSet, 0xA); 00247 SetChTy(PubidChSet, 'a', 'z'); SetChTy(PubidChSet, 'A', 'Z'); 00248 SetChTy(PubidChSet, '0', '9'); SetChTy(PubidChSet, "-'()+,./:=?;!*#@$_%"); 00249 00250 // Standard-Entity-Sequences 00251 SetEntityVal("amp", "&"); 00252 SetEntityVal("lt", "<"); SetEntityVal("gt", ">"); 00253 SetEntityVal("apos", "'"); SetEntityVal("quot", "\""); 00254 } 00255 00257 // Xml-Lexical 00258 TXmlChDef TXmlLx::ChDef; 00259 00260 uchar TXmlLx::GetCh(){ 00261 EAssert(Ch!=TCh::EofCh); 00262 PrevCh=Ch; 00263 if (ChStack.Empty()){Ch=(RSIn.Eof()) ? TCh::EofCh : RSIn.GetCh();} 00264 else {Ch=ChStack.Pop();} 00265 ChN++; if (Ch==TCh::LfCh){LnN++; LnChN=0;} else {LnChN++;} 00266 //putchar(Ch); 00267 return Ch; 00268 } 00269 00270 void TXmlLx::ToNrSpacing(){ 00271 if (Spacing==xspIntact){ 00272 } else 00273 if (Spacing==xspPreserve){ 00274 int SrcChN=0; int DstChN=0; 00275 while (SrcChN<TxtChA.Len()){ 00276 if (TxtChA[SrcChN]==TCh::CrCh){ 00277 TxtChA.PutCh(DstChN, TCh::LfCh); SrcChN++; DstChN++; 00278 if ((SrcChN<TxtChA.Len())&&(TxtChA[SrcChN]==TCh::LfCh)){SrcChN++;} 00279 } else { 00280 if (SrcChN!=DstChN){ 00281 TxtChA.PutCh(DstChN, TxtChA[SrcChN]);} 00282 SrcChN++; DstChN++; 00283 } 00284 } 00285 TxtChA.Trunc(DstChN); 00286 } else 00287 if (Spacing==xspSeparate){ 00288 // squeeze series of white-spaces to single space 00289 int SrcChN=0; int DstChN=0; 00290 while (SrcChN<TxtChA.Len()){ 00291 if (ChDef.IsWs(TxtChA[SrcChN])){ 00292 if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){ 00293 SrcChN++; 00294 } else { 00295 TxtChA.PutCh(DstChN, ' '); 00296 SrcChN++; DstChN++; 00297 } 00298 } else { 00299 TxtChA.PutCh(DstChN, TxtChA[SrcChN]); 00300 SrcChN++; DstChN++; 00301 } 00302 } 00303 TxtChA.Trunc(DstChN); 00304 } else 00305 if (Spacing==xspTruncate){ 00306 // cut leading and trailing white-spaces and 00307 // squeeze series of white-spaces to single space 00308 int SrcChN=0; int DstChN=0; 00309 while (SrcChN<TxtChA.Len()){ 00310 if (ChDef.IsWs(TxtChA[SrcChN])){ 00311 if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){ 00312 SrcChN++; 00313 } else { 00314 TxtChA.PutCh(DstChN, ' '); 00315 SrcChN++; DstChN++; 00316 } 00317 } else { 00318 TxtChA.PutCh(DstChN, TxtChA[SrcChN]); 00319 SrcChN++; DstChN++; 00320 } 00321 } 00322 TxtChA.Trunc(DstChN); 00323 // delete trailing white-spaces 00324 while ((TxtChA.Len()>0)&&(ChDef.IsWs(TxtChA.LastCh()))){ 00325 TxtChA.Pop();} 00326 } else { 00327 Fail; 00328 } 00329 } 00330 00331 void TXmlLx::GetWs(const bool& IsRq){ 00332 // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 00333 int WSpaces=0; TxtChA.Clr(); 00334 while (ChDef.IsWs(Ch)){ 00335 WSpaces++; TxtChA+=Ch; GetCh();} 00336 if (IsRq&&(WSpaces==0)){ 00337 EThrow("White-space required.");} 00338 } 00339 00340 TStr TXmlLx::GetReference(){ 00341 // [67] Reference ::= EntityRef | CharRef 00342 if (Ch=='#'){ 00343 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 00344 TChA RefChA; int RefCd=0; 00345 if (GetCh()=='x'){ 00346 // hex-decimal character code 00347 forever { 00348 GetCh(); 00349 if (TCh::IsHex(Ch)){ 00350 RefChA+=Ch; 00351 RefCd=RefCd*16+TCh::GetHex(Ch); 00352 } else { 00353 break; 00354 } 00355 } 00356 } else { 00357 // decimal character code 00358 forever { 00359 if (TCh::IsNum(Ch)){ 00360 RefChA+=Ch; 00361 RefCd=RefCd*10+TCh::GetNum(Ch); 00362 } else { 00363 break; 00364 } 00365 GetCh(); 00366 } 00367 } 00368 if ((!RefChA.Empty())&&(Ch==';')){ 00369 GetCh(); 00370 if (RefCd < 0x100) { 00371 // 8-bit char 00372 uchar RefCh=uchar(RefCd); 00373 return TStr(RefCh); 00374 } else { 00375 TStr ResStr = TUStr::EncodeUtf8(RefCd); 00376 return ResStr; 00377 } 00378 } else { 00379 EThrow("Invalid Char-Reference."); Fail; return TStr(); 00380 } 00381 } else { 00382 // [68] EntityRef ::= '&' Name ';' 00383 TStr EntityNm=GetName(); 00384 if ((!EntityNm.Empty())&&(Ch==';')){ 00385 GetCh(); 00386 TStr EntityVal; 00387 if (IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/} 00388 else if (ChDef.IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/} 00389 else {EThrow(TStr("Entity-Reference (")+EntityNm+") does not exist.");} 00390 return EntityVal; 00391 } else { 00392 EThrow("Invalid Entity-Reference."); Fail; return TStr(); 00393 } 00394 } 00395 } 00396 00397 TStr TXmlLx::GetPEReference(){ 00398 // [69] PEReference ::= '%' Name ';' 00399 TStr EntityNm=GetName(); 00400 if ((EntityNm.Empty())||(Ch!=';')){EThrow("Invalid PEntity-Reference.");} 00401 GetCh(); 00402 TStr EntityVal; 00403 if (IsPEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/} 00404 else {EThrow(TStr("PEntity-Reference (")+EntityNm+") does not exist.");} 00405 return EntityVal; 00406 } 00407 00408 void TXmlLx::GetEq(){ 00409 // [25] Eq ::= S? '=' S? 00410 GetWs(false); 00411 if (Ch=='='){GetCh();} 00412 else {EThrow("Equality ('=') character expected.");} 00413 GetWs(false); 00414 } 00415 00416 TStr TXmlLx::GetName(){ 00417 // [5] Name ::= (Letter | '_' | ':') (NameChar)* 00418 TChA NmChA; 00419 if (ChDef.IsFirstNameCh(Ch)){ 00420 do {NmChA+=Ch;} while (ChDef.IsName(GetCh())); 00421 } else { 00422 EThrow("Invalid first name character."); 00423 // EThrow(TStr::Fmt("Invalid first name character [%u:'%c%c%c%c%c'].", 00424 // uint(Ch), Ch, RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh())); 00425 } 00426 return NmChA; 00427 } 00428 00429 TStr TXmlLx::GetName(const TStr& RqNm){ 00430 TStr Nm=GetName(); 00431 // test if the name is equal to the required name 00432 if (Nm==RqNm){return RqNm;} 00433 else {EThrow(TStr("Name '")+RqNm+"' expected."); Fail; return TStr();} 00434 } 00435 00436 void TXmlLx::GetComment(){ 00437 // [15] Comment ::= {{'<!-}}-' ((Char - '-') | ('-' (Char - '-')))* '-->' 00438 if (GetCh()!='-'){EThrow("Invalid comment start.");} 00439 TxtChA.Clr(); 00440 forever { 00441 GetCh(); 00442 if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");} 00443 if (Ch=='-'){ 00444 if (GetCh()=='-'){ 00445 if (GetCh()=='>'){GetCh(); break;} // final bracket 00446 else {EThrow("Invalid comment end.");} 00447 } else { 00448 if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");} 00449 TxtChA+='-'; TxtChA+=Ch; // special case if single '-' 00450 } 00451 } else { 00452 TxtChA+=Ch; // usual char 00453 } 00454 } 00455 } 00456 00457 TStr TXmlLx::GetAttValue(){ 00458 // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' 00459 // | "'" ([^<&'] | Reference)* "'" 00460 uchar QCh=Ch; 00461 if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid attribute-value start.");} 00462 TChA ValChA; GetCh(); 00463 forever { 00464 if ((Ch=='<')||(!ChDef.IsChar(Ch))){ 00465 EThrow("Invalid attribute-value character.");} 00466 if (Ch==QCh){GetCh(); break;} // final quote 00467 else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference 00468 else {ValChA+=Ch; GetCh();} // usual char 00469 } 00470 return ValChA; 00471 } 00472 00473 TStr TXmlLx::GetVersionNum(){ 00474 // [24] VersionInfo ::= {{S 'version' Eq}} (' VersionNum ' | " VersionNum ") 00475 // [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 00476 char QCh=Ch; 00477 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00478 TChA VerNumChA; 00479 GetCh(); 00480 do { 00481 if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))|| 00482 (('0'<=Ch)&&(Ch<='9'))||(Ch=='_')||(Ch=='.')||(Ch==':')||(Ch=='-')){ 00483 VerNumChA+=Ch; 00484 } else { 00485 EThrow("Invalid version-number character."); 00486 } 00487 GetCh(); 00488 } while (Ch!=QCh); 00489 GetCh(); 00490 return VerNumChA; 00491 } 00492 00493 TStr TXmlLx::GetEncName(){ 00494 // [80] EncodingDecl ::= {{S 'encoding' Eq}} ('"' EncName '"' | "'" EncName "'" ) 00495 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 00496 char QCh=Ch; 00497 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00498 TChA EncNmChA; 00499 GetCh(); 00500 if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){EncNmChA+=Ch;} 00501 else {EThrow("Invalid encoding-name character.");} 00502 GetCh(); 00503 while (Ch!=QCh){ 00504 if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))|| 00505 (('0'<=Ch)&&(Ch<='9'))||(Ch=='.')||(Ch=='_')||(Ch=='-')){EncNmChA+=Ch;} 00506 else {EThrow("Invalid version-number character.");} 00507 GetCh(); 00508 } 00509 GetCh(); 00510 return EncNmChA; 00511 } 00512 00513 TStr TXmlLx::GetStalVal(){ 00514 // [32] SDDecl ::= {{S 'standalone' Eq}} 00515 // (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) 00516 char QCh=Ch; 00517 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00518 TChA StalChA; 00519 GetCh(); 00520 while (Ch!=QCh){ 00521 if (('a'<=Ch)&&(Ch<='z')){StalChA+=Ch;} 00522 else {EThrow("Invalid standalone-value character.");} 00523 GetCh(); 00524 } 00525 GetCh(); 00526 TStr StalVal=StalChA; 00527 if ((StalVal=="yes")||(StalVal=="no")){return StalVal;} 00528 else {EThrow("Invalid standalone-value."); Fail; return TStr();} 00529 } 00530 00531 void TXmlLx::GetXmlDecl(){ 00532 // [23] XMLDecl ::= {{'<?xml'}}... VersionInfo EncodingDecl? SDDecl? S? '?>' 00533 // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 00534 GetWs(true); 00535 TStr VerNm=GetName("version"); GetEq(); TStr VerVal=GetVersionNum(); 00536 if (VerVal!="1.0"){EThrow("Invalid XML version.");} 00537 AddArg(VerNm, VerVal); 00538 GetWs(false); 00539 if (Ch!='?'){ 00540 // EncodingDecl ::= {{S}} 'encoding' Eq 00541 // ('"' EncName '"' | "'" EncName "'" ) 00542 TStr EncNm=GetName("encoding"); GetEq(); TStr EncVal=GetEncName(); 00543 AddArg(EncNm, EncVal); 00544 } 00545 GetWs(false); 00546 if (Ch!='?'){ 00547 // SDDecl ::= {{S}} 'standalone' Eq 00548 // (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) 00549 TStr StalNm=GetName("standalone"); GetEq(); TStr StalVal=GetStalVal(); 00550 AddArg(StalNm, StalVal); 00551 } 00552 GetWs(false); 00553 if (Ch=='?'){ 00554 GetCh(); 00555 if (Ch=='>'){GetCh();} 00556 else {EThrow("Invalid end-of-tag in XML-declaration.");} 00557 } else { 00558 EThrow("Invalid end-of-tag in XML-declaration."); 00559 } 00560 } 00561 00562 void TXmlLx::GetPI(){ 00563 // [16] PI ::= {{'<?' PITarget}} (S (Char* - (Char* '?>' Char*)))? '?>' 00564 // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 00565 GetWs(false); 00566 TxtChA.Clr(); 00567 forever { 00568 if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");} 00569 if (Ch=='?'){ 00570 if (GetCh()=='>'){ 00571 GetCh(); break; 00572 } else { 00573 if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");} 00574 TxtChA+='?'; TxtChA+=Ch; // special case if single '?' 00575 } 00576 } else { 00577 TxtChA+=Ch; // usual char 00578 } 00579 GetCh(); 00580 } 00581 } 00582 00583 TStr TXmlLx::GetSystemLiteral(){ 00584 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 00585 char QCh=Ch; 00586 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00587 TChA LitChA; GetCh(); 00588 while (Ch!=QCh){ 00589 if (!ChDef.IsChar(Ch)){EThrow("Invalid System-Literal character.");} 00590 LitChA+=Ch; GetCh(); 00591 } 00592 GetCh(); 00593 return LitChA; 00594 } 00595 00596 TStr TXmlLx::GetPubidLiteral(){ 00597 // [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 00598 char QCh=Ch; 00599 if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");} 00600 TChA LitChA; GetCh(); 00601 while (Ch!=QCh){ 00602 if (!ChDef.IsPubid(Ch)){EThrow("Invalid Public-Id-Literal character.");} 00603 LitChA+=Ch; GetCh(); 00604 } 00605 GetCh(); 00606 return LitChA; 00607 } 00608 00609 void TXmlLx::GetExternalId(){ 00610 // ExternalID ::= 'SYSTEM' S SystemLiteral 00611 // | 'PUBLIC' S PubidLiteral S SystemLiteral 00612 TStr ExtIdNm=GetName(); 00613 if (ExtIdNm=="SYSTEM"){ 00614 GetWs(true); GetSystemLiteral(); 00615 } else if (ExtIdNm=="PUBLIC"){ 00616 GetWs(true); GetPubidLiteral(); GetWs(true); GetSystemLiteral(); 00617 } else { 00618 EThrow("Invalid external-id ('SYSTEM' or 'PUBLIC' expected)."); 00619 } 00620 } 00621 00622 void TXmlLx::GetNData(){ 00623 // [76] NDataDecl ::= S 'NDATA' S Name 00624 GetName("NDATA"); GetWs(true); GetName(); 00625 } 00626 00627 void TXmlLx::GetDocTypeDecl(){ 00628 // [28] doctypedecl ::= {{'<!DOCTYPE'}} S Name (S ExternalID)? S? 00629 // ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 00630 GetWs(true); 00631 TStr DocTypeDeclNm=GetName(); 00632 GetWs(false); 00633 if (Ch=='>'){GetCh(); return;} 00634 if (Ch!='['){GetExternalId();} 00635 GetWs(false); 00636 if (Ch=='['){ 00637 GetCh(); 00638 // [28] (markupdecl | PEReference | S)* 00639 GetWs(false); 00640 while (Ch!=']'){ 00641 if (ChDef.IsWs(Ch)){GetWs(true);} 00642 else if (Ch=='%'){GetPEReference();} 00643 else { 00644 GetSym(); 00645 } 00646 } 00647 GetCh(); 00648 } 00649 GetWs(false); 00650 // '>' 00651 if (Ch=='>'){GetCh();} 00652 else {EThrow("Invalid end-of-tag in document-type-declaration.");} 00653 TagNm=DocTypeDeclNm; 00654 } 00655 00656 void TXmlLx::GetElement(){ 00657 TxtChA.Clr(); 00658 while (Ch!='>'){ 00659 if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");} 00660 TxtChA+=Ch; GetCh(); 00661 } 00662 GetCh(); 00663 } 00664 00665 void TXmlLx::GetAttList(){ 00666 TxtChA.Clr(); 00667 while (Ch!='>'){ 00668 if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");} 00669 TxtChA+=Ch; GetCh(); 00670 } 00671 GetCh(); 00672 } 00673 00674 TStr TXmlLx::GetEntityValue(){ 00675 // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' 00676 // | "'" ([^%&'] | PEReference | Reference)* "'" 00677 uchar QCh=Ch; 00678 if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid entity-value start.");} 00679 TChA ValChA; GetCh(); 00680 forever { 00681 if (!ChDef.IsChar(Ch)){EThrow("Invalid entity-value character.");} 00682 if (Ch==QCh){GetCh(); break;} // final quote 00683 else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference 00684 else if (Ch=='%'){GetCh(); ValChA+=GetPEReference();} // pereference 00685 else {ValChA+=Ch; GetCh();} // usual char 00686 } 00687 return ValChA; 00688 } 00689 00690 void TXmlLx::GetEntity(){ 00691 // [70] EntityDecl ::= GEDecl | PEDecl 00692 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 00693 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 00694 GetWs(true); TStr EntityNm; 00695 if (Ch=='%'){ 00696 GetCh(); GetWs(true); EntityNm=GetName(); GetWs(true); 00697 // [74] PEDef ::= EntityValue | ExternalID 00698 if ((Ch=='\"')||(Ch=='\'')){ 00699 TStr EntityVal=GetEntityValue(); 00700 PutPEntityVal(EntityNm, EntityVal); 00701 } else { 00702 GetExternalId(); 00703 GetWs(false); 00704 if (Ch!='>'){GetNData();} 00705 } 00706 } else { 00707 EntityNm=GetName(); GetWs(true); 00708 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 00709 if ((Ch=='\"')||(Ch=='\'')){ 00710 TStr EntityVal=GetEntityValue(); 00711 PutEntityVal(EntityNm, EntityVal); 00712 } else { 00713 GetExternalId(); 00714 } 00715 } 00716 GetWs(false); 00717 if (Ch=='>'){GetCh();} 00718 else {EThrow("Invalid end-of-tag in entity-declaration.");} 00719 TagNm=EntityNm; 00720 } 00721 00722 void TXmlLx::GetNotation(){ 00723 // [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 00724 // [83] PublicID ::= 'PUBLIC' S PubidLiteral 00725 TxtChA.Clr(); 00726 while (Ch!='>'){ 00727 if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");} 00728 TxtChA+=Ch; GetCh(); 00729 } 00730 GetCh(); 00731 } 00732 00733 void TXmlLx::GetCDSect(){ 00734 // [18] CDSect ::= CDStart CData CDEnd 00735 // [19] CDStart ::= '<![CDATA{{['}} 00736 // [20] CData ::= (Char* - (Char* ']]>' Char*)) 00737 // [21] CDEnd ::= ']]>' 00738 if (Ch=='['){GetCh();} 00739 else {EThrow("Invalid start of CDATA section.");} 00740 TxtChA.Clr(); 00741 forever { 00742 if (!ChDef.IsChar(Ch)){EThrow("Invalid CDATA character.");} 00743 if ((Ch=='>')&&(TxtChA.Len()>=2)&& 00744 (TxtChA.LastLastCh()==']') && (TxtChA.LastCh()==']')){ 00745 GetCh(); TxtChA.Pop(); TxtChA.Pop(); break; 00746 } else { 00747 TxtChA+=Ch; GetCh(); 00748 } 00749 } 00750 } 00751 00752 void TXmlLx::SkipWs(){ 00753 // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 00754 while (ChDef.IsWs(Ch)){GetCh();} 00755 } 00756 00757 TXmlLxSym TXmlLx::GetSym(){ 00758 if (Ch=='<'){ 00759 GetCh(); ClrArgV(); 00760 if (Ch=='?'){ 00761 GetCh(); TagNm=GetName(); 00762 if (TagNm.GetLc()=="xml"){Sym=xsyXmlDecl; GetXmlDecl();} 00763 else {Sym=xsyPI; GetPI();} 00764 } else 00765 if (Ch=='!'){ 00766 GetCh(); 00767 if (Ch=='['){ 00768 GetCh(); TagNm=GetName(); 00769 if (TagNm=="CDATA"){Sym=xsyQStr; GetCDSect();} 00770 else {EThrow(TStr("Invalid tag after '<![' (")+TagNm+").");} 00771 } else 00772 if (Ch=='-'){ 00773 Sym=xsyComment; GetComment(); 00774 } else { 00775 TagNm=GetName(); 00776 if (TagNm=="DOCTYPE"){GetDocTypeDecl(); Sym=xsyDocTypeDecl;} 00777 else if (TagNm=="ELEMENT"){GetElement(); Sym=xsyElement;} 00778 else if (TagNm=="ATTLIST"){GetAttList(); Sym=xsyAttList;} 00779 else if (TagNm=="ENTITY"){GetEntity(); Sym=xsyEntity;} 00780 else if (TagNm=="NOTATION"){GetNotation(); Sym=xsyNotation;} 00781 else {EThrow(TStr("Invalid tag (")+TagNm+").");} 00782 } 00783 } else 00784 if (Ch=='/'){ 00785 // xsyETag 00786 GetCh(); Sym=xsyETag; TagNm=GetName(); GetWs(false); 00787 if (Ch=='>'){GetCh();} 00788 else {EThrow("Invalid End-Tag.");} 00789 } else { 00790 // xsySTag or xsySETag 00791 TagNm=GetName(); GetWs(false); 00792 while ((Ch!='>')&&(Ch!='/')){ 00793 TStr AttrNm=GetName(); 00794 GetEq(); 00795 TStr AttrVal=GetAttValue(); 00796 GetWs(false); 00797 AddArg(AttrNm, AttrVal); 00798 } 00799 if (Ch=='/'){ 00800 if (GetCh()=='>'){Sym=xsySETag; GetCh();} 00801 else {EThrow("Invalid Empty-Element-Tag.");} 00802 } else { 00803 Sym=xsySTag; GetCh(); 00804 } 00805 } 00806 if (Spacing==xspTruncate){SkipWs();} 00807 } else 00808 if (ChDef.IsWs(Ch)){ 00809 Sym=xsyWs; GetWs(true); ToNrSpacing(); 00810 if (Spacing==xspTruncate){GetSym();} 00811 } else 00812 if (Ch==TCh::EofCh){ 00813 Sym=xsyEof; 00814 } else { 00815 Sym=xsyStr; TxtChA.Clr(); 00816 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 00817 forever { 00818 if (!ChDef.IsChar(Ch)){ 00819 EThrow(TUInt::GetStr(Ch, "Invalid character (%d)."));} 00820 // GetCh(); continue; // skip invalid characters 00821 if (Ch=='<'){break;} // tag 00822 if (Ch=='&'){GetCh(); TxtChA+=GetReference();} // reference 00823 else { 00824 if ((Ch=='>')&&(TxtChA.Len()>=2)&& 00825 (TxtChA.LastLastCh()==']')&&(TxtChA.LastCh()==']')){ 00826 EThrow("Forbidden substring ']]>' in character data.");} 00827 TxtChA+=Ch; GetCh(); // usual char 00828 } 00829 } 00830 ToNrSpacing(); 00831 } 00832 return Sym; 00833 } 00834 00835 TStr TXmlLx::GetSymStr() const { 00836 TChA SymChA; 00837 switch (Sym){ 00838 case xsyUndef: 00839 SymChA="{Undef}"; break; 00840 case xsyWs: 00841 SymChA+="{Space:'"; SymChA+=TStr(TxtChA).GetHex(); SymChA+="'}"; break; 00842 case xsyComment: 00843 SymChA+="<!--"; SymChA+=TxtChA; SymChA+="-->"; break; 00844 case xsyXmlDecl:{ 00845 SymChA+="<?"; SymChA+=TagNm; 00846 for (int ArgN=0; ArgN<GetArgs(); ArgN++){ 00847 TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal); 00848 char ArgValQCh=GetArgValQCh(ArgVal); 00849 SymChA+=' '; SymChA+=ArgNm; SymChA+='='; 00850 SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh; 00851 } 00852 SymChA+="?>"; break;} 00853 case xsyPI: 00854 SymChA+="<?"; SymChA+=TagNm; 00855 if (!TxtChA.Empty()){SymChA+=' '; SymChA+=TxtChA;} 00856 SymChA+="?>"; break; 00857 case xsyDocTypeDecl: 00858 SymChA+="<!DOCTYPE "; SymChA+=TagNm; SymChA+=">"; break; 00859 case xsySTag: 00860 case xsySETag:{ 00861 SymChA+="<"; SymChA+=TagNm; 00862 for (int ArgN=0; ArgN<GetArgs(); ArgN++){ 00863 TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal); 00864 char ArgValQCh=GetArgValQCh(ArgVal); 00865 SymChA+=' '; SymChA+=ArgNm; SymChA+='='; 00866 SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh; 00867 } 00868 if (Sym==xsySTag){SymChA+=">";} 00869 else if (Sym==xsySETag){SymChA+="/>";} 00870 else {Fail;} 00871 break;} 00872 case xsyETag: 00873 SymChA+="</"; SymChA+=TagNm; SymChA+=">"; break; 00874 case xsyStr: 00875 SymChA="{String:'"; SymChA+=TxtChA; SymChA+="'}"; break; 00876 case xsyQStr: 00877 SymChA="{QString:'"; SymChA+=TxtChA; SymChA+="'}"; break; 00878 case xsyEof: 00879 SymChA="{Eof}"; break; 00880 default: Fail; 00881 } 00882 return SymChA; 00883 } 00884 00885 void TXmlLx::EThrow(const TStr& MsgStr) const { 00886 TChA FPosChA; 00887 FPosChA+=" [File:"; FPosChA+=SIn->GetSNm(); 00888 FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN); 00889 FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN); 00890 FPosChA+="]"; 00891 TStr FullMsgStr=MsgStr+FPosChA; 00892 TExcept::Throw(FullMsgStr); 00893 } 00894 00895 TStr TXmlLx::GetFPosStr() const { 00896 TChA FPosChA; 00897 FPosChA+=" [File:"; FPosChA+=SIn->GetSNm(); 00898 FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN); 00899 FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN); 00900 FPosChA+="]"; 00901 return FPosChA; 00902 } 00903 00904 TStr TXmlLx::GetXmlLxSymStr(const TXmlLxSym& XmlLxSym){ 00905 switch (XmlLxSym){ 00906 case xsyUndef: return "Undef"; 00907 case xsyWs: return "White-Space"; 00908 case xsyComment: return "Comment"; 00909 case xsyXmlDecl: return "Declaration"; 00910 case xsyPI: return "PI"; 00911 case xsyDocTypeDecl: return "Document-Type"; 00912 case xsyElement: return "Element"; 00913 case xsyAttList: return "Attribute-List"; 00914 case xsyEntity: return "Entity"; 00915 case xsyNotation: return "Notation"; 00916 case xsyTag: return "Tag"; 00917 case xsySTag: return "Start-Tag"; 00918 case xsyETag: return "End-Tag"; 00919 case xsySETag: return "Start-End-Tag"; 00920 case xsyStr: return "String"; 00921 case xsyQStr: return "Quoted-String"; 00922 case xsyEof: return "Eon-Of-File"; 00923 default: return "Undef"; 00924 } 00925 } 00926 00927 bool TXmlLx::IsTagNm(const TStr& Str){ 00928 TChA ChA=Str; 00929 if (ChA.Len()>0){ 00930 if (TXmlLx::ChDef.IsFirstNameCh(ChA[0])){ 00931 for (int ChN=1; ChN<ChA.Len(); ChN++){ 00932 if (!TXmlLx::ChDef.IsName(ChA[ChN])){ 00933 return false; 00934 } 00935 } 00936 return true; 00937 } else { 00938 return false; 00939 } 00940 } else { 00941 return false; 00942 } 00943 } 00944 00945 TStr TXmlLx::GetXmlStrFromPlainMem(const TMem& PlainMem){ 00946 TChA XmlChA; 00947 for (int ChN=0; ChN<PlainMem.Len(); ChN++){ 00948 uchar Ch=PlainMem[ChN]; 00949 if ((' '<=Ch)&&(Ch<='~')){ 00950 switch (Ch){ 00951 case '"': XmlChA+="""; break; 00952 case '&': XmlChA+="&"; break; 00953 case '\'': XmlChA+="'"; break; 00954 case '<': XmlChA+="<"; break; 00955 case '>': XmlChA+=">"; break; 00956 default: XmlChA+=Ch; 00957 } 00958 } else 00959 if ((Ch=='\r')||(Ch=='\n')){ 00960 XmlChA+=Ch; 00961 } else { 00962 XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';'; 00963 } 00964 } 00965 return XmlChA; 00966 } 00967 00968 TStr TXmlLx::GetXmlStrFromPlainStr(const TChA& PlainChA){ 00969 TChA XmlChA; 00970 for (int ChN=0; ChN<PlainChA.Len(); ChN++){ 00971 uchar Ch=PlainChA[ChN]; 00972 if ((' '<=Ch)&&(Ch<='~')){ 00973 switch (Ch){ 00974 case '"': XmlChA+="""; break; 00975 case '&': XmlChA+="&"; break; 00976 case '\'': XmlChA+="'"; break; 00977 case '<': XmlChA+="<"; break; 00978 case '>': XmlChA+=">"; break; 00979 default: XmlChA+=Ch; 00980 } 00981 } else 00982 if ((Ch=='\r')||(Ch=='\n')){ 00983 XmlChA+=Ch; 00984 } else { 00985 XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';'; 00986 } 00987 } 00988 return XmlChA; 00989 } 00990 00991 TStr TXmlLx::GetPlainStrFromXmlStr(const TStr& XmlStr){ 00992 TChA PlainChA; 00993 TChRet Ch(TStrIn::New(XmlStr)); 00994 Ch.GetCh(); 00995 while (!Ch.Eof()){ 00996 if (Ch()!='&'){ 00997 PlainChA+=Ch(); Ch.GetCh(); 00998 } else { 00999 // [67] Reference ::= EntityRef | CharRef 01000 if (Ch.GetCh()=='#'){ 01001 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 01002 TChA RefChA; int RefCd=0; 01003 if (Ch.GetCh()=='x'){ 01004 // hex-decimal character code 01005 forever { 01006 Ch.GetCh(); 01007 if (TCh::IsHex(Ch())){ 01008 RefChA+=Ch(); 01009 RefCd=RefCd*16+TCh::GetHex(Ch()); 01010 } else { 01011 break; 01012 } 01013 } 01014 } else { 01015 // decimal character code 01016 forever { 01017 if (TCh::IsNum(Ch())){ 01018 RefChA+=Ch(); 01019 RefCd=RefCd*10+TCh::GetNum(Ch()); 01020 } else { 01021 break; 01022 } 01023 Ch.GetCh(); 01024 } 01025 } 01026 if ((!RefChA.Empty())&&(Ch()==';')){ 01027 Ch.GetCh(); 01028 uchar RefCh=uchar(RefCd); 01029 PlainChA+=RefCh; 01030 } 01031 } else { 01032 // [68] EntityRef ::= '&' Name ';' 01033 TChA EntityNm; 01034 while ((!Ch.Eof())&&(Ch()!=';')){ 01035 EntityNm+=Ch(); Ch.GetCh();} 01036 if ((!EntityNm.Empty())&&(Ch()==';')){ 01037 Ch.GetCh(); 01038 if (EntityNm=="quot"){PlainChA+='"';} 01039 else if (EntityNm=="amp"){PlainChA+='&';} 01040 else if (EntityNm=="apos"){PlainChA+='\'';} 01041 else if (EntityNm=="lt"){PlainChA+='<';} 01042 else if (EntityNm=="gt"){PlainChA+='>';} 01043 } 01044 } 01045 } 01046 } 01047 return PlainChA; 01048 } 01049 01050 TStr TXmlLx::GetUsAsciiStrFromXmlStr(const TStr& XmlStr){ 01051 TStr UsAsciiStr=XmlStr; 01052 UsAsciiStr.ChangeStrAll("è", "c"); 01053 UsAsciiStr.ChangeStrAll("È", "C"); 01054 UsAsciiStr.ChangeStrAll("š", "s"); 01055 UsAsciiStr.ChangeStrAll("Š", "S"); 01056 UsAsciiStr.ChangeStrAll("ž", "z"); 01057 UsAsciiStr.ChangeStrAll("Ž", "Z"); 01058 TChA UsAsciiChA=TXmlLx::GetPlainStrFromXmlStr(UsAsciiStr); 01059 for (int ChN=0; ChN<UsAsciiChA.Len(); ChN++){ 01060 char Ch=UsAsciiChA[ChN]; 01061 if ((Ch<' ')||('~'<Ch)){UsAsciiChA.PutCh(ChN, 'x');} 01062 } 01063 return UsAsciiChA; 01064 } 01065 01066 TStr TXmlLx::GetChRefFromYuEntRef(const TStr& YuEntRefStr){ 01067 TStr ChRefStr=YuEntRefStr; 01068 ChRefStr.ChangeStrAll("&ch;", "è"); 01069 ChRefStr.ChangeStrAll("&Ch;", "È"); 01070 ChRefStr.ChangeStrAll("&sh;", "š"); 01071 ChRefStr.ChangeStrAll("&Sh;", "Š"); 01072 ChRefStr.ChangeStrAll("&zh;", "ž"); 01073 ChRefStr.ChangeStrAll("&Zh;", "Ž"); 01074 ChRefStr.ChangeStrAll("&cs", "c"); 01075 ChRefStr.ChangeStrAll("&Cs;", "C"); 01076 ChRefStr.ChangeStrAll("&dz;", "dz"); 01077 ChRefStr.ChangeStrAll("&Dz;", "Dz"); 01078 return ChRefStr; 01079 } 01080 01082 // Xml-Token 01083 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, const bool& DfVal) const { 01084 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01085 return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TBool::TrueStr); 01086 } 01087 01088 bool TXmlTok::GetBoolArgVal( 01089 const TStr& ArgNm, const TStr& TrueVal, const bool& DfVal) const { 01090 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01091 return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TrueVal); 01092 } 01093 01094 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, 01095 const TStr& TrueVal, const TStr& FalseVal, const bool& DfVal) const { 01096 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01097 if (ArgN==-1){return DfVal;} 01098 TStr ArgVal=ArgNmValV[ArgN].Dat; 01099 if (ArgVal==TrueVal){return true;} 01100 IAssert(ArgVal == FalseVal); return false; 01101 } 01102 01103 int TXmlTok::GetIntArgVal(const TStr& ArgNm, const int& DfVal) const { 01104 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01105 if (ArgN==-1){ 01106 return DfVal; 01107 } else { 01108 int Val; 01109 if (ArgNmValV[ArgN].Dat.IsInt(Val)){return Val;} else {return DfVal;} 01110 } 01111 } 01112 01113 double TXmlTok::GetFltArgVal(const TStr& ArgNm, const double& DfVal) const { 01114 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01115 if (ArgN==-1){ 01116 return DfVal; 01117 } else { 01118 double Val; 01119 if (ArgNmValV[ArgN].Dat.IsFlt(Val)){return Val;} else {return DfVal;} 01120 } 01121 } 01122 01123 TStr TXmlTok::GetStrArgVal(const TStr& ArgNm, const TStr& DfVal) const { 01124 int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm)); 01125 return (ArgN==-1) ? DfVal : ArgNmValV[ArgN].Dat; 01126 } 01127 01128 void TXmlTok::PutSubTok(const PXmlTok& Tok, const int& SubTokN){ 01129 if (SubTokN==-1){ 01130 ClrSubTok(); AddSubTok(Tok); 01131 } else { 01132 SubTokV[SubTokN]=Tok; 01133 } 01134 } 01135 01136 PXmlTok TXmlTok::GetTagTok(const TStr& TagPath) const { 01137 if (TagPath.Empty()){ 01138 return (TXmlTok*)this; 01139 } else { 01140 TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath); 01141 PXmlTok SubTok; 01142 for (int SubTokN=0; SubTokN<SubTokV.Len(); SubTokN++){ 01143 SubTok=SubTokV[SubTokN]; 01144 if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){break;} 01145 else {SubTok=NULL;} 01146 } 01147 if ((SubTok.Empty())||(RestTagPath.Empty())){return SubTok;} 01148 else {return SubTok->GetTagTok(RestTagPath);} 01149 } 01150 } 01151 01152 void TXmlTok::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const { 01153 XmlTokV.Clr(); 01154 TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm); 01155 PXmlTok Tok=GetTagTok(PreTagPath); 01156 if (!Tok.Empty()){ 01157 for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){ 01158 PXmlTok SubTok=Tok->GetSubTok(SubTokN); 01159 if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){ 01160 XmlTokV.Add(SubTok);} 01161 } 01162 } 01163 } 01164 01165 void TXmlTok::GetTagValV(const TStr& TagNm, const bool& XmlP, TStrV& ValV) const { 01166 if ((Sym==xsyTag)&&(Str==TagNm)){ 01167 ValV.Add(GetTokStr(XmlP)); 01168 } else { 01169 for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){ 01170 GetSubTok(SubTokN)->GetTagValV(TagNm, XmlP, ValV);} 01171 } 01172 } 01173 01174 TStr TXmlTok::GetTagVal(const TStr& TagNm, const bool& XmlP) const { 01175 TStrV ValV; GetTagValV(TagNm, XmlP, ValV); 01176 if (ValV.Len()>0){return ValV[0];} else {return "";} 01177 } 01178 01179 void TXmlTok::AddTokToChA(const bool& XmlP, TChA& ChA) const { 01180 switch (Sym){ 01181 case xsyWs: 01182 ChA+=Str; break; 01183 case xsyStr: 01184 if (XmlP){ChA+=TXmlLx::GetXmlStrFromPlainStr(Str);} else {ChA+=Str;} break; 01185 case xsyQStr: 01186 if (XmlP){ChA+="<![CDATA[";} 01187 ChA+=Str; 01188 if (XmlP){ChA+="]]>";} break; 01189 case xsyTag: 01190 if (XmlP){ 01191 ChA+='<'; ChA+=Str; 01192 for (int ArgN=0; ArgN<GetArgs(); ArgN++){ 01193 TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal); 01194 if (XmlP){ArgVal=TXmlLx::GetXmlStrFromPlainStr(ArgVal);} 01195 char ArgValQCh=TXmlLx::GetArgValQCh(ArgVal); 01196 ChA+=' '; ChA+=ArgNm; ChA+='='; 01197 ChA+=ArgValQCh; ChA+=ArgVal; ChA+=ArgValQCh; 01198 } 01199 } 01200 if (GetSubToks()==0){ 01201 if (XmlP){ChA+="/>";} 01202 } else { 01203 if (XmlP){ChA+=">";} 01204 for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){ 01205 GetSubTok(SubTokN)->AddTokToChA(XmlP, ChA);} 01206 if (XmlP){ChA+="</"; ChA+=Str; ChA+='>';} 01207 } 01208 break; 01209 default: Fail; 01210 } 01211 } 01212 01213 TStr TXmlTok::GetTokVStr(const TXmlTokV& TokV, const bool& XmlP){ 01214 TChA TokVChA; 01215 for (int TokN=0; TokN<TokV.Len(); TokN++){ 01216 if (TokN>0){TokVChA+=' ';} 01217 TokVChA+=TokV[TokN]->GetTokStr(XmlP); 01218 } 01219 return TokVChA; 01220 } 01221 01222 PXmlTok TXmlTok::GetTok(TXmlLx& Lx){ 01223 switch (Lx.Sym){ 01224 case xsyWs: 01225 case xsyStr: 01226 case xsyQStr: 01227 return TXmlTok::New(Lx.Sym, Lx.TxtChA); 01228 case xsySTag: 01229 case xsySETag: 01230 return TXmlTok::New(xsyTag, Lx.TagNm, Lx.ArgNmValKdV); 01231 default: Fail; return NULL; 01232 } 01233 } 01234 01236 // Xml-Document 01237 void TXmlDoc::LoadTxtMiscStar(TXmlLx& Lx){ 01238 // [27] Misc ::= Comment | PI | S 01239 while ((Lx.Sym==xsyComment)||(Lx.Sym==xsyPI)||(Lx.Sym==xsyWs)){ 01240 Lx.GetSym();} 01241 } 01242 01243 PXmlTok TXmlDoc::LoadTxtElement(TXmlLx& Lx){ 01244 // [39] element ::= EmptyElemTag | STag content ETag 01245 PXmlTok Tok; 01246 if (Lx.Sym==xsySETag){ 01247 Tok=TXmlTok::GetTok(Lx); 01248 } else 01249 if (Lx.Sym==xsySTag){ 01250 Tok=TXmlTok::GetTok(Lx); 01251 forever { 01252 Lx.GetSym(); 01253 if (Lx.Sym==xsyETag){ 01254 if (Tok->GetStr()==Lx.TagNm){ 01255 break; 01256 } else { 01257 TStr MsgStr=TStr("Invalid End-Tag '")+Lx.TagNm+ 01258 "' ('"+Tok->GetStr()+"' expected)."; 01259 Lx.EThrow(MsgStr); 01260 } 01261 } else { 01262 PXmlTok SubTok; 01263 switch (Lx.Sym){ 01264 case xsySTag: 01265 SubTok=LoadTxtElement(Lx); break; 01266 case xsySETag: 01267 case xsyStr: 01268 case xsyQStr: 01269 case xsyWs: 01270 SubTok=TXmlTok::GetTok(Lx); break; 01271 case xsyPI: 01272 case xsyComment: 01273 break; 01274 default: Lx.EThrow("Content or End-Tag expected."); 01275 } 01276 if (!SubTok.Empty()){ 01277 Tok->AddSubTok(SubTok);} 01278 } 01279 } 01280 } else 01281 if (Lx.Sym==xsyETag){ 01282 TStr MsgStr= 01283 TStr("Xml-Element (Start-Tag or Empty-Element-Tag) required.")+ 01284 TStr::GetStr(Lx.TagNm, " End-Tag </%s> encountered."); 01285 Lx.EThrow(MsgStr); 01286 } else { 01287 Lx.EThrow("Xml-Element (Start-Tag or Empty-Element-Tag) required."); 01288 } 01289 return Tok; 01290 } 01291 01292 PXmlTok TXmlDoc::GetTagTok(const TStr& TagPath) const { 01293 if (TagPath.Empty()){ 01294 return Tok; 01295 } else { 01296 TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath); 01297 if ((Tok->GetSym()==xsyTag)&&(Tok->GetStr()==TagNm)){ 01298 if (RestTagPath.Empty()){return Tok;} 01299 else {return Tok->GetTagTok(RestTagPath);} 01300 } else { 01301 return NULL; 01302 } 01303 } 01304 } 01305 01306 void TXmlDoc::PutTagTokStr(const TStr& TagPath, const TStr& TokStr) const { 01307 PXmlTok Tok=GetTagTok(TagPath); 01308 Tok->ClrSubTok(); 01309 PXmlTok StrTok=TXmlTok::New(xsyStr, TokStr); 01310 Tok->AddSubTok(StrTok); 01311 } 01312 01313 void TXmlDoc::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const { 01314 XmlTokV.Clr(); 01315 TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm); 01316 PXmlTok Tok=GetTagTok(PreTagPath); 01317 if (!Tok.Empty()){ 01318 for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){ 01319 PXmlTok SubTok=Tok->GetSubTok(SubTokN); 01320 if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){ 01321 XmlTokV.Add(SubTok);} 01322 } 01323 } 01324 } 01325 01326 bool TXmlDoc::GetTagTokBoolArgVal( 01327 const TStr& TagPath, const TStr& ArgNm, const bool& DfVal) const { 01328 PXmlTok TagTok; 01329 if (IsTagTok(TagPath, TagTok)){ 01330 return TagTok->GetBoolArgVal(ArgNm, DfVal);} 01331 else {return DfVal;} 01332 } 01333 01334 int TXmlDoc::GetTagTokIntArgVal( 01335 const TStr& TagPath, const TStr& ArgNm, const int& DfVal) const { 01336 PXmlTok TagTok; 01337 if (IsTagTok(TagPath, TagTok)){ 01338 return TagTok->GetIntArgVal(ArgNm, DfVal);} 01339 else {return DfVal;} 01340 } 01341 01342 double TXmlDoc::GetTagTokFltArgVal( 01343 const TStr& TagPath, const TStr& ArgNm, const double& DfVal) const { 01344 PXmlTok TagTok; 01345 if (IsTagTok(TagPath, TagTok)){ 01346 return TagTok->GetFltArgVal(ArgNm, DfVal);} 01347 else {return DfVal;} 01348 } 01349 01350 TStr TXmlDoc::GetTagTokStrArgVal( 01351 const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal) const { 01352 PXmlTok TagTok; 01353 if (IsTagTok(TagPath, TagTok)){ 01354 return TagTok->GetStrArgVal(ArgNm, DfVal);} 01355 else {return DfVal;} 01356 } 01357 01358 TStr TXmlDoc::GetXmlStr(const TStr& Str){ 01359 TChA ChA=Str; 01360 TChA XmlChA; 01361 for (int ChN=0; ChN<ChA.Len(); ChN++){ 01362 uchar Ch=ChA[ChN]; 01363 if ((' '<=Ch)&&(Ch<='~')){ 01364 if (Ch=='&'){XmlChA+="&";} 01365 else if (Ch=='>'){XmlChA+="<";} 01366 else if (Ch=='<'){XmlChA+=">";} 01367 else if (Ch=='\''){XmlChA+="'";} 01368 else if (Ch=='\"'){XmlChA+=""";} 01369 else {XmlChA+=Ch;} 01370 } else { 01371 XmlChA+="&#"; XmlChA+=TUInt::GetStr(Ch); XmlChA+=";"; 01372 } 01373 } 01374 return XmlChA; 01375 } 01376 01377 bool TXmlDoc::SkipTopTag(const PSIn& SIn){ 01378 bool Ok=true; 01379 TXmlLx Lx(SIn, xspIntact); 01380 try { 01381 Lx.GetSym(); 01382 // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 01383 if (Lx.Sym==xsyXmlDecl){Lx.GetSym();} 01384 LoadTxtMiscStar(Lx); 01385 if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();} 01386 LoadTxtMiscStar(Lx); 01387 Ok=true; 01388 } 01389 catch (PExcept Except){ 01390 Ok=false; 01391 } 01392 return Ok; 01393 } 01394 01395 PXmlDoc TXmlDoc::LoadTxt(TXmlLx& Lx){ 01396 PXmlDoc Doc=TXmlDoc::New(); 01397 // [1] document ::= prolog element Misc* 01398 try { 01399 Lx.GetSym(); 01400 // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 01401 if (Lx.Sym==xsyXmlDecl){Lx.GetSym();} 01402 LoadTxtMiscStar(Lx); 01403 if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();} 01404 LoadTxtMiscStar(Lx); 01405 Doc->Tok=LoadTxtElement(Lx); 01406 LoadTxtMiscStar(Lx); 01407 Doc->Ok=true; Doc->MsgStr="Ok"; 01408 } 01409 catch (PExcept& Except){ 01410 Doc->Ok=false; Doc->MsgStr=Except->GetMsgStr(); 01411 } 01412 return Doc; 01413 } 01414 01415 PXmlDoc TXmlDoc::LoadTxt(const PSIn& SIn, const TXmlSpacing& Spacing){ 01416 TXmlLx Lx(SIn, Spacing); return LoadTxt(Lx); 01417 } 01418 01419 PXmlDoc TXmlDoc::LoadTxt(const TStr& FNm, const TXmlSpacing& Spacing){ 01420 PSIn SIn=TFIn::New(FNm); return LoadTxt(SIn, Spacing); 01421 } 01422 01423 void TXmlDoc::LoadTxt( 01424 const TStr& FNm, TXmlDocV& XmlDocV, const TXmlSpacing& Spacing){ 01425 XmlDocV.Clr(); 01426 PSIn SIn=TFIn::New(FNm); 01427 TXmlLx Lx(SIn, Spacing); 01428 PXmlDoc XmlDoc; 01429 forever { 01430 Lx.SkipWs(); 01431 XmlDoc=LoadTxt(Lx); 01432 if (XmlDoc->IsOk()){XmlDocV.Add(XmlDoc);} 01433 else {break;} 01434 } 01435 } 01436 01437 PXmlDoc TXmlDoc::LoadStr(const TStr& Str){ 01438 PSIn SIn=TStrIn::New(Str); 01439 return LoadTxt(SIn); 01440 } 01441 01442 void TXmlDoc::SaveStr(TStr& Str){ 01443 PSOut SOut=TMOut::New(); TMOut& MOut=*(TMOut*)SOut(); 01444 SaveTxt(SOut); 01445 Str=MOut.GetAsStr(); 01446 } 01447 01449 // Fast and dirty XML parser 01450 // very basic it does only <item>string</item>, no comments, no arguments 01451 TXmlLxSym TXmlParser::GetSym() { 01452 if (NextSym != xsyUndef) { 01453 Sym = NextSym; NextSym=xsyUndef; 01454 SymStr=NextSymStr; NextSymStr.Clr(); 01455 return Sym; 01456 } 01457 SymStr.Clr(); 01458 char Ch; 01459 while (TCh::IsWs(Ch=GetCh())) { } 01460 if (Ch == TCh::EofCh) { Sym = xsyEof; return xsyEof; } 01461 if (Ch == '<') { // load tag 01462 Ch = GetCh(); 01463 if (Ch == '/') { Sym = xsyETag; } 01464 else { Sym = xsySTag; SymStr.Push(Ch); } 01465 while((Ch=GetCh())!='>' && Ch!=TCh::EofCh) { SymStr.Push(Ch); } 01466 const int StrLen = SymStr.Len(); 01467 if (StrLen > 1 && SymStr[StrLen-1] == '/') { 01468 Sym = xsyETag; SymStr[StrLen-1] = 0; 01469 for (char *c = SymStr.CStr()+StrLen-2; TCh::IsWs(*c); c--) { *c=0; } 01470 } 01471 } else { // load string 01472 _SymStr.Clr(); _SymStr.Push(Ch); 01473 while (! RSIn.Eof() && RSIn.PeekCh() != '<') { _SymStr.Push(GetCh()); } 01474 GetPlainStrFromXmlStr(_SymStr, SymStr); 01475 Sym = xsyStr; 01476 } 01477 if (Ch == TCh::EofCh) { SymStr.Clr(); Sym = xsyEof; return xsyEof; } 01478 return Sym; 01479 } 01480 01481 TXmlLxSym TXmlParser::GetSym(TChA& _SymStr) { 01482 GetSym(); 01483 _SymStr = SymStr; 01484 return Sym; 01485 } 01486 01487 TXmlLxSym TXmlParser::PeekSym() { 01488 if (NextSym == xsyUndef) { 01489 const TXmlLxSym TmpSim=Sym; 01490 const TChA TmpSymStr=SymStr; 01491 NextSym=GetSym(NextSymStr); 01492 Sym=TmpSim; 01493 SymStr=TmpSymStr; 01494 } 01495 return NextSym; 01496 } 01497 01498 TXmlLxSym TXmlParser::PeekSym(TChA& _SymStr) { 01499 PeekSym(); 01500 _SymStr = NextSymStr; 01501 return NextSym; 01502 } 01503 01504 void TXmlParser::SkipTillTag(const TChA& _SymStr) { 01505 while(PeekSym() != xsyEof) { 01506 if (NextSymStr == _SymStr) { return; } 01507 GetSym(); 01508 } 01509 } 01510 01511 // get <tag>value</tag> 01512 void TXmlParser::GetTagVal(const TChA& TagStr, TChA& TagVal) { 01513 EAssertR(GetTag(TagStr) == xsySTag, TStr::Fmt("Expected '<%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr()); 01514 EAssertR(GetSym(TagVal) == xsyStr, "Expected string tag."); 01515 EAssertR(GetTag(TagStr) == xsyETag, TStr::Fmt("Expected '</%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr()); 01516 } 01517 01518 TXmlLxSym TXmlParser::GetTag(const TChA& TagStr) { 01519 GetSym(); 01520 EAssertR(TagStr==SymStr, TStr::Fmt("Expected xml symbol '%s'. Found '%s'", 01521 TagStr.CStr(), SymStr.CStr()).CStr()); 01522 return Sym; 01523 } 01524 01525 void TXmlParser::GetPlainStrFromXmlStr(const TChA& XmlStr, TChA& PlainChA) { 01526 static TChA EntityNm; 01527 PlainChA.Clr(); 01528 const char *Ch = XmlStr.CStr(); 01529 while (*Ch){ 01530 if (*Ch!='&'){ PlainChA+=*Ch; Ch++; } 01531 else { 01532 if (*++Ch=='#'){ 01533 TChA RefChA; int RefCd=0; 01534 if (*++Ch=='x'){ 01535 forever { Ch++; 01536 if (TCh::IsHex(*Ch)){ RefChA+=*Ch; RefCd=RefCd*16+TCh::GetHex(*Ch); } 01537 else { break; } } 01538 } else { // decimal character code 01539 forever { 01540 if (TCh::IsNum(*Ch)){ RefChA+=*Ch; RefCd=RefCd*10+TCh::GetNum(*Ch); } 01541 else { break; } Ch++; } 01542 } 01543 if ((!RefChA.Empty())&&(*Ch==';')){ 01544 Ch++; const uchar RefCh=uchar(RefCd); PlainChA+=RefCh; } 01545 } else { 01546 EntityNm.Clr(); 01547 while ((*Ch)&&(*Ch!=';')){EntityNm+=*Ch; Ch++;} 01548 if ((!EntityNm.Empty())&&(*Ch==';')){ Ch++; 01549 if (EntityNm=="quot"){PlainChA+='"';} 01550 else if (EntityNm=="amp"){PlainChA+='&';} 01551 else if (EntityNm=="apos"){PlainChA+='\'';} 01552 else if (EntityNm=="lt"){PlainChA+='<';} 01553 else if (EntityNm=="gt"){PlainChA+='>';} 01554 } 01555 } 01556 } 01557 } 01558 }