102 #define DefineByte(b7, b6, b5, b4, b3, b2, b1, b0) _ ## b7 ## b6 ## b5 ## b4 ## _ ## b3 ## b2 ## b1 ## b0 = (b7 << 7) | (b6 << 6) | (b5 << 5) | (b4 << 4) | (b3 << 3) | (b2 << 2) | (b1 << 1) | b0 
  131         template<
typename TSrcVec, 
typename TDestCh>
 
  133                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  135         template<
typename TSrcVec, 
typename TDestCh>
 
  140         template<
typename TSrcVec, 
typename TDestCh>
 
  142                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  144         template<
typename TSrcVec, 
typename TDestCh>
 
  170         template<
typename TSrcVec, 
typename TDestCh>
 
  172                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  185         template<
typename TSrcVec, 
typename TDestCh>
 
  187                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  207         template<
typename TSrcVec, 
typename TDestCh>
 
  209                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  210                 TVec<TDestCh>& dest, 
const bool clrDest, 
const bool insertBom,
 
  213         template<
typename TSrcVec, 
typename TDestCh>
 
  215                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  216                 TVec<TDestCh>& dest, 
const bool clrDest, 
const bool insertBom,
 
  233         void TestUtf8(
bool decode, 
size_t expectedRetVal, 
bool expectedThrow, 
const TIntV& src, 
const TIntV& expectedDest, FILE *f);
 
  246         void TestUtf16(
bool decode, 
size_t expectedRetVal, 
bool expectedThrow, 
const TIntV& src, 
const TIntV& expectedDest,
 
  251                 return ((x >> 8) & 0xff) | ((x & 0xff) << 8); }
 
  257                 const bool insertBom);
 
  277         template<
typename TSrcDat, 
typename TDestDat>
 
  279                 for (
int i = 0; i < src.
Len(); i++) dest.
Add(src[i]); }
 
  292         template<
typename TSrcVec, 
typename TDestCh>
 
  293         void Fold(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  294                 TVec<TDestCh>& dest, 
const bool clrDest, 
const bool full, 
const bool turkic)
 const 
  296                 for (
const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
 
  298                         int c = src[
TVecIdx(srcIdx)], i; srcIdx++;
 
  306         template<
typename TSrcVec>
 
  307         void FoldInPlace(TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
const bool turkic)
 const 
  309                 for (
const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
 
  311                         int c = src[
TVecIdx(srcIdx)], i;
 
  319         void Test(
const TIntV& src, 
const TIntV& expectedDest, 
const bool full, 
const bool turkic, FILE *f);
 
  340         template<
class TCodecImpl>
 
  341         static PCodecBase 
New(); 
 
  349         virtual size_t ToUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TIntV& dest, 
const bool clrDest = 
true) 
const = 0;
 
  350         virtual size_t ToUnicode(
const TStr& src, 
size_t srcIdx, 
const size_t srcCount, 
TIntV& dest, 
const bool clrDest = 
true) 
const = 0;
 
  357         virtual size_t FromUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TIntV& dest, 
const bool clrDest = 
true) 
const = 0;
 
  358         virtual size_t FromUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TChA& dest, 
const bool clrDest = 
true) 
const = 0;
 
  359         virtual size_t FromUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TStr& dest, 
const bool clrDest = 
true) 
const = 0;
 
  376 template<
class TCodecImpl_>
 
  388         virtual size_t ToUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TIntV& dest, 
const bool clrDest = 
true)
 const {
 
  389                 return impl.ToUnicode(src, srcIdx, srcCount, dest, clrDest); }
 
  390         virtual size_t ToUnicode(
const TStr& src, 
size_t srcIdx, 
const size_t srcCount, 
TIntV& dest, 
const bool clrDest = 
true)
 const {
 
  391                 return impl.ToUnicode(src, srcIdx, srcCount, dest, clrDest); }
 
  393         virtual size_t FromUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TIntV& dest, 
const bool clrDest = 
true)
 const {
 
  394                 return impl.FromUnicode(src, srcIdx, srcCount, dest, clrDest); }
 
  395         virtual size_t FromUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TChA& dest, 
const bool clrDest = 
true)
 const {
 
  396                 return impl.FromUnicode(src, srcIdx, srcCount, dest, clrDest); }
 
  397         virtual size_t FromUnicode(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TStr& dest, 
const bool clrDest = 
true)
 const {
 
  398                 TChA buf; 
size_t retVal = 
impl.FromUnicode(src, srcIdx, srcCount, buf, 
false);
 
  399                 if (clrDest) dest += buf.
CStr(); 
else dest = buf.
CStr();
 
  403 template<
class TCodecImpl>
 
  412 template<
class TVector_>
 
  445         static int FromUnicode(
int c) { 
if (0 <= c && c <= 255) 
return c; 
else return -1; }
 
  456                 if (0 <= c && c < 0xa0) 
return c;
 
  470                 if (0 <= c && c < 0xa0) 
return c;
 
  484                 if (0 <= c && c < 0xa0) 
return c;
 
  503                 if (0 <= c && c <= 255) 
return c; 
else return -1; }
 
  514                 if (0 <= c && c < 0x80) 
return c;
 
  519                 else if (c == 0x192) 
return 0x9f;
 
  520                 else if (c == 0x207f) 
return 0xfc;
 
  521                 else if (c == 0x20a7) 
return 0x9e;
 
  522                 else if (c == 0x2310) 
return 0xa9;
 
  523                 else if (c == 0x2320) 
return 0xf4;
 
  524                 else if (c == 0x2321) 
return 0xf5;
 
  536                 if (0 <= c && c < 0x80) 
return c;
 
  551                 if (0 <= c && c < 0x80) 
return c;
 
  555                 else if (c == 0x20ac) 
return 0x80;
 
  556                 else if (c == 0x2122) 
return 0x99;
 
  560 template<
class TEncoding_>
 
  578                 for (
int c = 0; c <= 255; c++) {
 
  579                         int cu = TEncoding::ToUnicode(c); 
if (cu == -1) 
continue;
 
  581                         IAssert(0 <= cu && cu < 0x110000);
 
  582                         int c2 = TEncoding::FromUnicode(cu);
 
  585                 for (
int cu = 0; cu < 0x110000; cu++) {
 
  586                         int c = TEncoding::FromUnicode(cu); 
if (c == -1) 
continue;
 
  589                         int cu2 = TEncoding::ToUnicode(c);
 
  596         template<
typename TSrcVec, 
typename TDestCh>
 
  598                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  601                 if (clrDest) dest.
Clr();
 
  602                 size_t toDo = srcCount;
 
  604                         int chSrc = ((int) src[
TVecIdx(srcIdx)]) & 0xff; srcIdx++;
 
  605                         int chDest = TEncoding::ToUnicode(chSrc);
 
  609         template<
typename TSrcVec, 
typename TDestCh>
 
  617         template<
typename TSrcVec, 
typename TDestVec>
 
  619                 const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
  620                 TDestVec& dest, 
const bool clrDest = 
true)
 const 
  623                 if (clrDest) dest.Clr();
 
  624                 size_t toDo = srcCount, nEncoded = 0;
 
  626                         int chSrc = (int) src[
TVecIdx(srcIdx)]; srcIdx++;
 
  627                         int chDest = TEncoding::FromUnicode(chSrc);
 
  639         template<
typename TSrcVec, 
typename TDestVec>
 
  640         size_t FromUnicode(
const TSrcVec& src, TDestVec& dest, 
const bool clrDest = 
true)
 const { 
return FromUnicode(src, 0, src.Len(), dest, clrDest); }
 
  642         size_t UniToStr(
const TIntV& src, 
size_t srcIdx, 
const size_t srcCount, 
TStr& dest, 
const bool clrDest = 
true)
 const {
 
  643                 TChA buf; 
size_t retVal = 
FromUnicode(src, srcIdx, srcCount, buf, 
false);
 
  644                 if (clrDest) dest += buf.
CStr(); 
else dest = buf.
CStr();
 
  664 #define DefineUniCat(cat, c) uc ## cat = (int(uchar(c)) & 0xff) 
  678 #define DefineUniSubCat(cat, subCat, c) uc ## cat ## subCat = ((uc ## cat) << 8) | (int(uchar(c)) & 0xff) 
 1031         static inline ushort GetLineBreakCode(
char c1, 
char c2) { 
return ((static_cast<ushort>(static_cast<uchar>(c1)) & 0xff) << 8) | ((
static_cast<ushort>(
static_cast<uchar>(c2)) & 0xff)); }
 
 1165                 static const char s[] = 
"LuLlLtLmLoMnMcMeNdNlNoPcPdPsPePiPfPoSmScSkSoZsZlZpCcCfCsCoCn";
 
 1166                 for (
const char *p = s; *p; p += 2)
 
 1167                         if (chCat == p[0] && chSubCat == p[1]) 
return true;
 
 1176 template<
typename TItem_>
 
 1207                 int keyId = 
roots.GetKeyId(
TItemTr(last, butLast, butButLast));
 
 1208                 if (keyId < 0) 
return 0; 
else return roots[keyId]; }
 
 1210                 for (
int childIdx = 
nodes[parentIdx].child; childIdx >= 0; ) {
 
 1211                         const TNode &node = 
nodes[childIdx];
 
 1212                         if (node.item == item) 
return childIdx;
 
 1213                         childIdx = node.sib; }
 
 1219         template<
typename TSrcVec>
 
 1220         void Add(
const TSrcVec& src, 
const size_t srcIdx, 
const size_t srcCount)
 
 1225                 size_t srcLast = srcIdx + (srcCount - 1);
 
 1227                 int keyId = 
roots.GetKeyId(tr), curNodeIdx = -1;
 
 1228                 if (keyId >= 0) curNodeIdx = 
roots[keyId];
 
 1229                 else { curNodeIdx = 
nodes.
Add(TNode(
TItem(0), -1, -1, 
false)); 
roots.AddDat(tr, curNodeIdx); }
 
 1231                 if (srcCount > 3) 
for (
size_t srcPos = srcLast - 3; ; )
 
 1234                         int childNodeIdx = 
nodes[curNodeIdx].child;
 
 1235                         while (childNodeIdx >= 0) {
 
 1236                                 TNode &childNode = 
nodes[childNodeIdx];
 
 1237                                 if (childNode.item == curItem) 
break;
 
 1238                                 childNodeIdx = childNode.sib; }
 
 1239                         if (childNodeIdx < 0) {
 
 1240                                 childNodeIdx = 
nodes.
Add(TNode(curItem, -1, 
nodes[curNodeIdx].child, 
false));
 
 1241                                 nodes[curNodeIdx].child = childNodeIdx; }
 
 1242                         curNodeIdx = childNodeIdx;
 
 1243                         if (srcPos == srcIdx) 
break; 
else srcPos--;
 
 1245                 nodes[curNodeIdx].terminal = 
true;
 
 1248         template<
typename TSrcVec>
 
 1249         void Add(
const TSrcVec& src) { 
Add(src, 0, (
size_t) src.Len()); }
 
 1335                 char buf[20]; sprintf(buf, 
"U+%04x", cp); 
return TStr(buf); }
 
 1336         template<
class TSrcVec> 
void PrintCharNames(FILE *f, 
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
const TStr& prefix)
 const {
 
 1337                 if (! f) f = stdout;
 
 1338                 for (
const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++) {
 
 1339                         fprintf(f, 
"%s", prefix.
CStr());
 
 1340                         int cp = src[
TVecIdx(srcIdx)]; fprintf(f, (cp >= 0x10000 ? 
"U+%05x" : 
"U+%04x "), cp);
 
 1352                 if (i < 0) 
return false; 
else { ChInfo=
h[i]; 
return true; }}
 
 1361 #define ___UniFwd1(name) bool name(const int cp) const { int i = h.GetKeyId(cp); if (i < 0) return false; else return h[i].name(); } 
 1362 #define ___UniFwd2(name1, name2) ___UniFwd1(name1) ___UniFwd1(name2) 
 1363 #define ___UniFwd3(name1, name2, name3) ___UniFwd2(name1, name2) ___UniFwd1(name3) 
 1364 #define ___UniFwd4(name1, name2, name3, name4) ___UniFwd3(name1, name2, name3) ___UniFwd1(name4) 
 1365 #define ___UniFwd5(name1, name2, name3, name4, name5) ___UniFwd4(name1, name2, name3, name4) ___UniFwd1(name5) 
 1367 #define DECLARE_FORWARDED_PROPERTY_METHODS \ 
 1368         ___UniFwd5(IsAsciiHexDigit, IsBidiControl, IsDash, IsDeprecated, IsDiacritic) \ 
 1369         ___UniFwd5(IsExtender, IsGraphemeLink, IsHexDigit, IsHyphen, IsIdeographic)  \ 
 1370         ___UniFwd5(IsJoinControl, IsLogicalOrderException, IsNoncharacter, IsQuotationMark, IsSoftDotted)  \ 
 1371         ___UniFwd4(IsSTerminal, IsTerminalPunctuation, IsVariationSelector, IsWhiteSpace)  \ 
 1372         ___UniFwd5(IsAlphabetic, IsUppercase, IsLowercase, IsMath, IsDefaultIgnorable)  \ 
 1373         ___UniFwd4(IsGraphemeBase, IsGraphemeExtend, IsIdStart, IsIdContinue)  \ 
 1374         ___UniFwd2(IsXidStart, IsXidContinue)  \ 
 1375         ___UniFwd3(IsCompositionExclusion, IsCompatibilityDecomposition, IsSbSep)  \ 
 1376         ___UniFwd1(IsGbExtend)  \ 
 1377         ___UniFwd2(IsCased, IsCurrency) 
 1384                 int i = 
h.
GetKeyId(cp); 
if (i >= 0) 
return h[i].IsPrivateUse();
 
 1385                 return (0xe000 <= cp && cp <= 0xf8ff) ||  
 
 1387                         (0xf0000 <= cp && cp <= 0xffffd) || (0x100000 <= cp && cp <= 0x10fffd); }
 
 1393                 int i = 
h.
GetKeyId(cp); 
if (i >= 0) 
return h[i].IsSurrogate();
 
 1394                 return 0xd800 <= cp && cp <= 0xdcff; }
 
 1425         template<
typename TSrcVec> 
void WbFindNextNonIgnored(
const TSrcVec& src, 
size_t& position, 
const size_t srcEnd)
 const {
 
 1426                 if (position >= srcEnd) 
return;
 
 1427                 position++; 
while (position < srcEnd && 
IsWbIgnored(src[
TVecIdx(position)])) position++; }
 
 1429         template<
typename TSrcVec> 
void WbFindNextNonIgnoredS(
const TSrcVec& src, 
size_t& position, 
const size_t srcEnd)
 const {
 
 1430                 if (position >= srcEnd) 
return;
 
 1431                 if (IsSbSep(src[
TVecIdx(position)])) { position++; 
return; }
 
 1432                 position++; 
while (position < srcEnd && 
IsWbIgnored(src[
TVecIdx(position)])) position++; }
 
 1434         template<
typename TSrcVec> 
bool WbFindPrevNonIgnored(
const TSrcVec& src, 
const size_t srcStart, 
size_t& position)
 const {
 
 1435                 if (position <= srcStart) 
return false;
 
 1436                 while (position > srcStart) {
 
 1446         template<
typename TSrcVec>
 
 1447         bool FindNextWordBoundary(
const TSrcVec& src, 
const size_t srcIdx, 
const size_t srcCount, 
size_t &position) 
const;
 
 1451         template<
typename TSrcVec>
 
 1465         template<
typename TSrcVec>
 
 1466         bool CanSentenceEndHere(
const TSrcVec& src, 
const size_t srcIdx, 
const size_t position) 
const;
 
 1472         template<
typename TSrcVec>
 
 1473         bool FindNextSentenceBoundary(
const TSrcVec& src, 
const size_t srcIdx, 
const size_t srcCount, 
size_t &position) 
const;
 
 1477         template<
typename TSrcVec>
 
 1500                 static const TStr data = 
"Ms|Mrs|Mr|Rev|Dr|Prof|Gov|Sen|Rep|Gen|Brig|Col|Capt|Lieut|Lt|Sgt|Pvt|Cmdr|Adm|Corp|St|Mt|Ft|e.g|e. g.|i.e|i. e|ib|ibid|s.v|s. v|s.vv|s. vv";
 
 1510         template<
typename TDestCh>
 
 1516         template<
typename TSrcVec, 
typename TDestCh>
 
 1517         void Decompose(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 1518                         TVec<TDestCh>& dest, 
bool compatibility, 
bool clrDest = 
true) 
const;
 
 1519         template<
typename TSrcVec, 
typename TDestCh>
 
 1521                 Decompose(src, 0, src.Len(), dest, compatibility, clrDest); }
 
 1528         template<
typename TSrcVec, 
typename TDestCh>
 
 1529         void Compose(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 1531         template<
typename TSrcVec, 
typename TDestCh>
 
 1533                 Compose(src, 0, src.Len(), dest, clrDest); }
 
 1538         template<
typename TSrcVec, 
typename TDestCh>
 
 1540                         TVec<TDestCh>& dest, 
bool compatibility, 
bool clrDest = 
true) 
const;
 
 1541         template<
typename TSrcVec, 
typename TDestCh>
 
 1547         template<
typename TSrcVec, 
typename TDestCh>
 
 1548         size_t ExtractStarters(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 1550         template<
typename TSrcVec, 
typename TDestCh>
 
 1554         template<
typename TSrcVec>
 
 1557                 src.Clr(); 
for (
int i = 0; i < temp.
Len(); i++) src.Add(temp[i]);
 
 1589         template<
typename TSrcVec, 
typename TDestCh> 
void GetCaseConverted(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest, 
const TCaseConversion how, 
const bool turkic, 
const bool lithuanian) 
const;
 
 1590         template<
typename TSrcVec, 
typename TDestCh> 
void GetLowerCase(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool turkic = 
false, 
const bool lithuanian = 
false)
 const { 
GetCaseConverted(src, srcIdx, srcCount, dest, clrDest, 
ccLower, turkic, lithuanian); }
 
 1591         template<
typename TSrcVec, 
typename TDestCh> 
void GetUpperCase(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool turkic = 
false, 
const bool lithuanian = 
false)
 const { 
GetCaseConverted(src, srcIdx, srcCount, dest, clrDest, 
ccUpper, turkic, lithuanian); }
 
 1592         template<
typename TSrcVec, 
typename TDestCh> 
void GetTitleCase(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool turkic = 
false, 
const bool lithuanian = 
false)
 const { 
GetCaseConverted(src, srcIdx, srcCount, dest, clrDest, 
ccTitle, turkic, lithuanian); }
 
 1593         template<
typename TSrcVec, 
typename TDestCh> 
void GetLowerCase(
const TSrcVec& src, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool turkic = 
false, 
const bool lithuanian = 
false)
 const { 
GetLowerCase(src, 0, src.Len(), dest, clrDest, turkic, lithuanian); }
 
 1594         template<
typename TSrcVec, 
typename TDestCh> 
void GetUpperCase(
const TSrcVec& src, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool turkic = 
false, 
const bool lithuanian = 
false)
 const { 
GetUpperCase(src, 0, src.Len(), dest, clrDest, turkic, lithuanian); }
 
 1595         template<
typename TSrcVec, 
typename TDestCh> 
void GetTitleCase(
const TSrcVec& src, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool turkic = 
false, 
const bool lithuanian = 
false)
 const { 
GetTitleCase(src, 0, src.Len(), dest, clrDest, turkic, lithuanian); }
 
 1601         template<
typename TSrcVec, 
typename TDestCh> 
void GetSimpleLowerCase(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true)
 const { 
GetSimpleCaseConverted(src, srcIdx, srcCount, dest, clrDest, 
ccLower); }
 
 1602         template<
typename TSrcVec, 
typename TDestCh> 
void GetSimpleUpperCase(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true)
 const { 
GetSimpleCaseConverted(src, srcIdx, srcCount, dest, clrDest, 
ccUpper); }
 
 1603         template<
typename TSrcVec, 
typename TDestCh> 
void GetSimpleTitleCase(
const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true)
 const { 
GetSimpleCaseConverted(src, srcIdx, srcCount, dest, clrDest, 
ccTitle); }
 
 1628         template<
typename TSrcVec, 
typename TDestCh>
 
 1630                 TVec<TDestCh>& dest, 
const bool clrDest, 
const bool full, 
const bool turkic = 
false)
 const { 
caseFolding.
Fold(src, srcIdx, srcCount, dest, clrDest, full, turkic); }
 
 1631         template<
typename TSrcVec, 
typename TDestCh>
 
 1632         void GetCaseFolded(
const TSrcVec& src, 
TVec<TDestCh>& dest, 
const bool clrDest = 
true, 
const bool full = 
true, 
const bool turkic = 
false)
 const {
 
 1633                 GetCaseFolded(src, 0, src.Len(), dest, clrDest, full, turkic); }
 
 1636         template<
typename TSrcVec> 
void ToCaseFolded(TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount, 
const bool turkic = 
false)
 const { 
caseFolding.
FoldInPlace(src, srcIdx, srcCount, turkic); }
 
 1637         template<
typename TSrcVec> 
void ToCaseFolded(TSrcVec& src, 
const bool turkic = 
false)
 const { 
ToCaseFolded(src, 0, src.Len(), turkic); }
 
 1665                         bool inComment = 
false, first = 
true;
 
 1668                                 if (c == EOF) 
return ! first;
 
 1672                                 else if (c == 10) 
return true;
 
 1673                                 else if (c == 
'#') inComment = 
true;
 
 1674                                 if (! inComment) 
buf += char(c);
 
 1691                                 if (line.
Len() <= 0) 
continue;
 
 1693                                 for (
int i = 0; i < dest.
Len(); i++) dest[i].ToTrunc();
 
 1696                         int c; 
bool ok = s.
IsHexInt(
true, 0, 0x10ffff, c); 
IAssertR(ok, s); 
return c; }
 
 1698                         if (ClrDestP) dest.
Clr();
 
 1700                         for (
int i = 0; i < parts.
Len(); i++) {
 
 1701                                 int c; 
bool ok = parts[i].IsHexInt(
true, 0, 0x10ffff, c); 
IAssertR(ok, s);
 
 1759                                 printf(
"Invalid cat code(s) in the comments: ");
 
 1875                 for (
int i = 0; i < names.
Len(); i++)
 
 1879                 for (
int i = 0; i < names.
Len(); i++)
 
 1885                 PCodecBase p; 
if (! 
codecs.IsKeyGetDat(s, p)) p.
Clr();
 
 1889                 for (
int i = 
codecs.FFirstKeyId(); 
codecs.FNextKeyId(i); ) {
 
 1891                         for (
int j = 0; j < dest.
Len(); j++) 
if (dest[j]() == 
codec()) { found = 
true; 
break; }
 
 1892                         if (! found) dest.
Add(codec); }}
 
 1902                 if (position < 0) { position = 0; 
return true; }
 
 1903                 size_t position_; 
bool retVal = 
ucd.
FindNextWordBoundary(src, 0, src.
Len(), position_); position = int(position_); 
return retVal; }
 
 1917                 if (position < 0) { position = 0; 
return true; }
 
 1995                 bool isAscii = 
true;
 
 1996                 for (
int i = 0, n = s.
Len(); i < n; i++) if (uchar(s[i]) >= 128) { isAscii = 
false; 
break; }
 
 1997                 if (isAscii) 
return s.
GetLc();
 
 2014 #define ___UniFwd1(name) bool name(const int cp) const { return ucd.name(cp); } 
 2016 #undef DECLARE_FORWARDED_PROPERTY_METHODS 
 2035 template<
typename TSrcVec, 
typename TDestCh>
 
 2037         const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 2040         size_t nDecoded = 0;
 
 2041         if (clrDest) dest.
Clr();
 
 2042         const size_t origSrcIdx = srcIdx;
 
 2043         const size_t srcEnd = srcIdx + srcCount;
 
 2044         while (srcIdx < srcEnd)
 
 2046                 const size_t charSrcIdx = srcIdx;
 
 2048                 if ((c & _1000_0000) == 0) {
 
 2050                         dest.
Add(TDestCh(c)); nDecoded++; 
continue; }
 
 2051                 else if ((c & _1100_0000) == _1000_0000) {
 
 2064                         uint nMoreBytes = 0, nBits = 0, minVal = 0;
 
 2065                         if ((c & _1110_0000) == _1100_0000) nMoreBytes = 1, nBits = 5, minVal = 0x80;
 
 2066                         else if ((c & _1111_0000) == _1110_0000) nMoreBytes = 2, nBits = 4, minVal = 0x800;
 
 2067                         else if ((c & _1111_1000) == _1111_0000) nMoreBytes = 3, nBits = 3, minVal = 0x10000;
 
 2068                         else if ((c & _1111_1100) == _1111_1000) nMoreBytes = 4, nBits = 2, minVal = 0x200000;
 
 2069                         else if ((c & _1111_1110) == _1111_1100) nMoreBytes = 5, nBits = 1, minVal = 0x4000000;
 
 2090                                 nMoreBytes = 5; nBits = 2; minVal = 0x80000000u; }
 
 2092                         uint cOut = c & ((1 << nBits) - 1); 
 
 2093                         bool cancel = 
false;
 
 2094                         for (
uint i = 0; i < nMoreBytes && ! cancel; i++) {
 
 2096                                 if (! (srcIdx < srcEnd)) {
 
 2101                                         case uehIgnore: cancel = 
true; 
continue;
 
 2104                                 c = src[
TVecIdx(srcIdx)] & 0xff; srcIdx++;
 
 2105                                 if ((c & _1100_0000) != _1000_0000) { 
 
 2110                                         case uehIgnore: srcIdx--; cancel = 
true; 
continue;
 
 2112                                 cOut <<= 6; cOut |= (c & _0011_1111); }
 
 2113                         if (cancel) 
continue;
 
 2121                                 bool err1 = (cOut < minVal);
 
 2125                                 bool err2 = (nMoreBytes > 3 || (nMoreBytes == 3 && cOut > 0x10ffff));
 
 2129                                                 else if (err2) 
throw TUnicodeException(charSrcIdx, c, 
"Invalid multibyte sequence: it decodes into 0x" + 
TInt::GetStr(cOut, 
"%08x") + 
", but only codepoints 0..0x10ffff are valid.");
 
 2130                                                 else { 
Fail; 
break; }
 
 2138                         if (! (
skipBom && (cOut == 0xfffe || cOut == 0xfeff) && charSrcIdx == origSrcIdx)) {
 
 2139                                 dest.
Add(cOut); nDecoded++; }
 
 2151 template<
typename TSrcVec, 
typename TDestCh>
 
 2153         const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 2156         size_t nEncoded = 0;
 
 2157         for (
const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
 
 2161                 if (
strict && c > 0x10ffff) {
 
 2170                         dest.
Add(TDestCh(c & 0xffu));
 
 2171                 else if (c < 0x800u) {
 
 2172                         dest.
Add(TDestCh(_1100_0000 | ((c >> 6) & _0001_1111)));
 
 2173                         dest.
Add(TDestCh(_1000_0000 | (c & _0011_1111))); }
 
 2174                 else if (c < 0x10000u) {
 
 2175                         dest.
Add(TDestCh(_1110_0000 | ((c >> 12) & _0000_1111)));
 
 2176                         dest.
Add(TDestCh(_1000_0000 | ((c >> 6) & _0011_1111)));
 
 2177                         dest.
Add(TDestCh(_1000_0000 | (c & _0011_1111))); }
 
 2178                 else if (c < 0x200000u) {
 
 2179                         dest.
Add(TDestCh(_1111_0000 | ((c >> 18) & _0000_0111)));
 
 2180                         dest.
Add(TDestCh(_1000_0000 | ((c >> 12) & _0011_1111)));
 
 2181                         dest.
Add(TDestCh(_1000_0000 | ((c >> 6) & _0011_1111)));
 
 2182                         dest.
Add(TDestCh(_1000_0000 | (c & _0011_1111))); }
 
 2183                 else if (c < 0x4000000u) {
 
 2184                         dest.
Add(TDestCh(_1111_1000 | ((c >> 24) & _0000_0011)));
 
 2185                         dest.
Add(TDestCh(_1000_0000 | ((c >> 18) & _0011_1111)));
 
 2186                         dest.
Add(TDestCh(_1000_0000 | ((c >> 12) & _0011_1111)));
 
 2187                         dest.
Add(TDestCh(_1000_0000 | ((c >> 6) & _0011_1111)));
 
 2188                         dest.
Add(TDestCh(_1000_0000 | (c & _0011_1111))); }
 
 2190                         dest.
Add(TDestCh(_1111_1100 | ((c >> 30) & _0000_0011)));
 
 2191                         dest.
Add(TDestCh(_1000_0000 | ((c >> 24) & _0011_1111)));
 
 2192                         dest.
Add(TDestCh(_1000_0000 | ((c >> 18) & _0011_1111)));
 
 2193                         dest.
Add(TDestCh(_1000_0000 | ((c >> 12) & _0011_1111)));
 
 2194                         dest.
Add(TDestCh(_1000_0000 | ((c >> 6) & _0011_1111)));
 
 2195                         dest.
Add(TDestCh(_1000_0000 | (c & _0011_1111))); }
 
 2196                 if (! err) nEncoded++;
 
 2209 template<
typename TSrcVec, 
typename TDestCh>
 
 2211         const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 2219         if (clrDest) dest.
Clr();
 
 2220         size_t nDecoded = 0;
 
 2221         if (srcCount <= 0) 
return nDecoded;
 
 2222         const size_t origSrcIdx = srcIdx, srcEnd = srcIdx + srcCount;
 
 2223         bool littleEndian = 
false;
 
 2225         if (bomHandling == 
bomIgnored) littleEndian = leDefault;
 
 2229                 if (byte1 == 0xfe && byte2 == 0xff) { littleEndian = 
false; 
if (
skipBom) srcIdx += 2; }
 
 2230                 else if (byte1 == 0xff && byte2 == 0xfe) { littleEndian = 
true; 
if (
skipBom) srcIdx += 2; }
 
 2231                 else if (bomHandling == 
bomAllowed) littleEndian = leDefault;
 
 2239         while (srcIdx < srcEnd)
 
 2241                 const size_t charSrcIdx = srcIdx;
 
 2243                 uint c = littleEndian ? (byte1 | (byte2 << 8)) : (byte2 | (byte1 << 8));
 
 2247                         if (! (srcIdx + 2 <= srcEnd)) {
 
 2255                         uint c2 = littleEndian ? (byte1 | (byte2 << 8)) : (byte2 | (byte1 << 8));
 
 2268                         dest.
Add(TDestCh(cc)); nDecoded++; 
continue;
 
 2272                         case uehThrow: 
throw TUnicodeException(charSrcIdx, c, 
"This 16-bit value should be used only as the second character of a surrogate pair.");
 
 2278                 if (charSrcIdx == origSrcIdx && (c == 0xfffeu || c == 0xfeffu) && 
skipBom) 
continue;
 
 2280                 dest.
Add(TDestCh(c)); nDecoded++;
 
 2293 template<
typename TSrcVec, 
typename TDestCh>
 
 2295         const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 2302         if (clrDest) dest.
Clr();
 
 2303         size_t nDecoded = 0;
 
 2304         if (srcCount <= 0) 
return nDecoded;
 
 2305         const size_t origSrcIdx = srcIdx, srcEnd = srcIdx + srcCount;
 
 2309         if (bomHandling == 
bomIgnored) swap = (isDefaultLe != isMachineLe);
 
 2313                 if (c == 0xfeff) { swap = 
false; 
if (
skipBom) srcIdx += 1; }
 
 2314                 else if (c == 0xfffe) { swap = 
true; 
if (
skipBom) srcIdx += 1; }
 
 2315                 else if (bomHandling == 
bomAllowed) swap = (isMachineLe != isDefaultLe);
 
 2323         while (srcIdx < srcEnd)
 
 2325                 const size_t charSrcIdx = srcIdx;
 
 2327                 if (swap) c = ((c >> 8) & 0xff) | ((c & 0xff) << 8);
 
 2331                         if (! (srcIdx < srcEnd)) {
 
 2339                         if (swap) c2 = ((c2 >> 8) & 0xff) | ((c2 & 0xff) << 8);
 
 2352                         dest.
Add(TDestCh(cc)); nDecoded++; 
continue;
 
 2356                         case uehThrow: 
throw TUnicodeException(charSrcIdx, c, 
"This 16-bit value should be used only as the second character of a surrogate pair.");
 
 2362                 if (charSrcIdx == origSrcIdx && (c == 0xfffeu || c == 0xfeffu) && 
skipBom) 
continue;
 
 2364                 dest.
Add(TDestCh(c)); nDecoded++;
 
 2375 template<
typename TSrcVec, 
typename TDestCh>
 
 2377         const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 2378         TVec<TDestCh>& dest, 
const bool clrDest, 
const bool insertBom,
 
 2383         size_t nEncoded = 0, srcEnd = srcIdx + srcCount;
 
 2384         if (insertBom) { dest.
Add(TDestCh(swap ? 0xfffeu : 0xfeffu)); nEncoded++; }
 
 2385         while (srcIdx < srcEnd)
 
 2388                 if (! (c <= 0x10ffffu)) {
 
 2411                         if (swap) c = ((c >> 8) & 0xff) | ((c & 0xff) << 8);
 
 2412                         dest.
Add(TDestCh(c)); nEncoded++; 
continue; }
 
 2414                 c -= 0x10000u; 
IAssert( c <= 0xfffffu);
 
 2415                 uint c1 = (c >> 10) & 1023, c2 = c & 1023;
 
 2418                         c1 = ((c1 >> 8) & 0xff) | ((c1 & 0xff) << 8);
 
 2419                         c2 = ((c2 >> 8) & 0xff) | ((c2 & 0xff) << 8); }
 
 2420                 dest.
Add(TDestCh(c1));
 
 2421                 dest.
Add(TDestCh(c2));
 
 2422                 nEncoded++; 
continue;
 
 2427 template<
typename TSrcVec, 
typename TDestCh>
 
 2429         const TSrcVec& src, 
size_t srcIdx, 
const size_t srcCount,
 
 2430         TVec<TDestCh>& dest, 
const bool clrDest, 
const bool insertBom,
 
 2434         size_t nEncoded = 0, srcEnd = srcIdx + srcCount;
 
 2435         if (insertBom) { dest.
Add(isDestLe ? 0xff : 0xfe); dest.
Add(isDestLe ? 0xfe : 0xff); nEncoded++; }
 
 2436         while (srcIdx < srcEnd)
 
 2439                 if (! (c <= 0x10ffffu)) {
 
 2443 #define ___OutRepl if (isDestLe) { dest.Add(replacementChar & 0xff); dest.Add((replacementChar >> 8) & 0xff); } else { dest.Add((replacementChar >> 8) & 0xff); dest.Add(replacementChar & 0xff); } 
 2464                         if (isDestLe) { dest.
Add(c & 0xff); dest.
Add((c >> 8) & 0xff); }
 
 2465                         else { dest.
Add((c >> 8) & 0xff); dest.
Add(c & 0xff); }
 
 2466                         nEncoded++; 
continue; }
 
 2468                 c -= 0x10000u; 
IAssert( c <= 0xfffffu);
 
 2469                 uint c1 = (c >> 10) & 1023, c2 = c & 1023;
 
 2471                 if (isDestLe) { dest.
Add(c1 & 0xff); dest.
Add((c1 >> 8) & 0xff); dest.
Add(c2 & 0xff); dest.
Add((c2 >> 8) & 0xff); }
 
 2472                 else { dest.
Add((c1 >> 8) & 0xff); dest.
Add(c1 & 0xff); dest.
Add((c2 >> 8) & 0xff); dest.
Add(c2 & 0xff); }
 
 2473                 nEncoded++; 
continue;
 
 2482 template<
typename TSrcVec>
 
 2486         if (position < srcIdx) { position = srcIdx; 
return true; }
 
 2488         const size_t srcEnd = srcIdx + srcCount;
 
 2489         if (position >= srcEnd) 
return false;
 
 2491         size_t origPos = position;
 
 2497         size_t posPrev = position;
 
 2500         if (position == origPos && position + 1 < srcEnd && IsSbSep(src[
TVecIdx(position)]) && 
IsWbIgnored(src[
TVecIdx(position + 1)])) { position += 1; 
return true; }
 
 2504         int cPrev = (posPrev < position ? (int) src[
TVecIdx(posPrev)] : -1), cCur = (position < srcEnd ? (
int) src[
TVecIdx(position)] : -1);
 
 2505         int cNext = (position < posNext && posNext < srcEnd ? (int) src[
TVecIdx(posNext)] : -1);
 
 2507         int cNext2, wbfNext2;
 
 2509         for ( ; position < srcEnd; posPrev = position, position = posNext, posNext = posNext2,
 
 2510                                                            cPrev = cCur, cCur = cNext, cNext = cNext2,
 
 2511                                                            wbfPrev = wbfCur, wbfCur = wbfNext, wbfNext = wbfNext2)
 
 2517                 cNext2 = (posNext < posNext2 && posNext2 < srcEnd ? (int) src[
TVecIdx(posNext2)] : -1);
 
 2519 #define TestCurNext(curFlag, nextFlag) if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue 
 2520 #define TestCurNext2(curFlag, nextFlag, next2Flag) if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag && (wbfNext2 & next2Flag) == next2Flag) continue 
 2521 #define TestPrevCurNext(prevFlag, curFlag, nextFlag) if ((wbfPrev & prevFlag) == prevFlag && (wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue 
 2523                 if (cCur == 13 && cNext == 10) 
continue;
 
 2546                 if ((wbfCur & ucfWbExtendNumLet) == ucfWbExtendNumLet &&
 
 2549                 position = posNext; 
return true;
 
 2552 #undef TestPrevCurNext 
 2560 template<
typename TSrcVec>
 
 2563         if (
size_t(dest.
Len()) != srcCount + 1) dest.
Gen(
TVecIdx(srcCount + 1));
 
 2565         size_t position = srcIdx;
 
 2566         dest[
TVecIdx(position - srcIdx)] = 
true;
 
 2567         while (position < srcIdx + srcCount)
 
 2569                 size_t oldPos = position;
 
 2571     if (oldPos < position) {
 
 2572                   Assert(oldPos < position);
 
 2574     Assert(position <= srcIdx + srcCount);
 
 2575                 dest[
TVecIdx(position - srcIdx)] = 
true;
 
 2584 template<
typename TSrcVec>
 
 2589         size_t pos = position;
 
 2597         while ((sfb & 
ucfSbSp) == ucfSbSp) {
 
 2601         while ((sfb & ucfSbSp) == ucfSbSp) {
 
 2609         int cLast = c, cButLast = -1, cButButLast = -1, len = 1, node = -1;
 
 2613                 c = (atEnd ? -1 : (int) src[
TVecIdx(pos)]);
 
 2615                 if (atEnd || ! (cat == ucLetter || cat == ucNumber || cat == ucSymbol)) {
 
 2621                         if (atEnd) 
return true; }
 
 2622                 if (len == 1) { cButLast = c; len++; }
 
 2623                 else if (len == 2) { cButButLast = c; len++;
 
 2626                         if (node < 0) 
return true; }
 
 2630                         if (node < 0) 
return true; }
 
 2635 template<
typename TSrcVec>
 
 2639         if (position < srcIdx) { position = srcIdx; 
return true; }
 
 2641         const size_t srcEnd = srcIdx + srcCount;
 
 2642         if (position >= srcEnd) 
return false;
 
 2644         size_t origPos = position;
 
 2650         size_t posPrev = position;
 
 2653         if (position == origPos && position + 1 < srcEnd && IsSbSep(src[
TVecIdx(position)]) && 
IsWbIgnored(src[
TVecIdx(position + 1)])) { position += 1; 
return true; }
 
 2657         int cPrev = (posPrev < position ? (int) src[
TVecIdx(posPrev)] : -1), cCur = (position < srcEnd ? (
int) src[
TVecIdx(position)] : -1);
 
 2658         int cNext = (position < posNext && posNext < srcEnd ? (int) src[
TVecIdx(posNext)] : -1);
 
 2660         int cNext2, sbfNext2;
 
 2662         typedef enum { stInit, stATerm, stATermSp, stATermSep, stSTerm, stSTermSp, stSTermSep } TPeekBackState;
 
 2663         TPeekBackState backState;
 
 2665                 size_t pos = position;
 
 2666                 bool wasSep = 
false, wasSp = 
false, wasATerm = 
false, wasSTerm = 
false;
 
 2672                         if ((sbf & 
ucfSbSep) == ucfSbSep) {
 
 2678                         while ((sbf & 
ucfSbSp) == ucfSbSp) {
 
 2693                 if (wasATerm) backState = (wasSep ? stATermSep : wasSp ? stATermSp : stATerm);
 
 2694                 else if (wasSTerm) backState = (wasSep ? stSTermSep : wasSp ? stSTermSp : stSTerm);
 
 2695                 else backState = stInit;
 
 2701         typedef enum { stUnknown, stLower, stNotLower } TPeekAheadState;
 
 2702         TPeekAheadState aheadState = stUnknown;
 
 2704         for ( ; position < srcEnd; posPrev = position, position = posNext, posNext = posNext2,
 
 2705                                                            cPrev = cCur, cCur = cNext, cNext = cNext2,
 
 2706                                                            sbfPrev = sbfCur, sbfCur = sbfNext, sbfNext = sbfNext2)
 
 2712                 cNext2 = (posNext < posNext2 && posNext2 < srcEnd ? (int) src[
TVecIdx(posNext2)] : -1);
 
 2715 #define TestCur(curFlag) ((sbfCur & ucfSb##curFlag) == ucfSb##curFlag) 
 2716 #define Trans(curFlag, newState) if (TestCur(curFlag)) { backState = st##newState; break; } 
 2717                 switch (backState) {
 
 2718                         case stInit: 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); 
break;
 
 2719                         case stATerm: 
Trans(Sp, ATermSp); 
Trans(Sep, ATermSep); 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); 
Trans(Close, ATerm); backState = stInit; 
break;
 
 2720                         case stSTerm: 
Trans(Sp, STermSp); 
Trans(Sep, STermSep); 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); 
Trans(Close, STerm); backState = stInit; 
break;
 
 2721                         case stATermSp: 
Trans(Sp, ATermSp); 
Trans(Sep, ATermSep); 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); backState = stInit; 
break;
 
 2722                         case stSTermSp: 
Trans(Sp, STermSp); 
Trans(Sep, STermSep); 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); backState = stInit; 
break;
 
 2723                         case stATermSep: 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); backState = stInit; 
break;
 
 2724                         case stSTermSep: 
Trans(ATerm, ATerm); 
Trans(STerm, STerm); backState = stInit; 
break;
 
 2729 #define IsPeekAheadSkippable(sbf) ((sbf & (ucfSbOLetter | ucfSbUpper | ucfSbLower | ucfSbSep | ucfSbSTerm | ucfSbATerm)) == 0) 
 2732                         if (aheadState == stLower) 
IAssert(isLower);
 
 2733                         else if (aheadState == stNotLower) 
IAssert(! isLower);
 
 2735                         aheadState = stUnknown; }
 
 2736                 if (aheadState == stUnknown)
 
 2739                         size_t pos = posNext;
 
 2740                         while (pos < srcEnd) {
 
 2743                                         if ((sbf & 
ucfSbLower) == ucfSbLower) aheadState = stLower;
 
 2744                                         else aheadState = stNotLower;
 
 2747                         if (! (pos < srcEnd)) aheadState = stNotLower;
 
 2749 #undef IsPeekAheadSkippable 
 2751 #define TestCurNext(curFlag, nextFlag) if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue 
 2752 #define TestCurNext2(curFlag, nextFlag, next2Flag) if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag && (sbfNext2 & next2Flag) == next2Flag) continue 
 2753 #define TestPrevCurNext(prevFlag, curFlag, nextFlag) if ((sbfPrev & prevFlag) == prevFlag && (sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue 
 2755                 if (cCur == 13 && cNext == 10) 
continue;
 
 2757                 if ((sbfCur & 
ucfSbSep) == ucfSbSep) {
 
 2759                         position = posNext; 
return true; }
 
 2767                 if ((backState == stATerm || backState == stATermSp || backState == stSTerm || backState == stSTermSp) &&
 
 2770                 if ((backState == stATerm || backState == stATermSp) && aheadState == stLower) 
continue;
 
 2773                 if ((backState == stATerm || backState == stSTerm) && (sbfNext & (
ucfSbClose | 
ucfSbSp | ucfSbSep)) != 0) 
continue;
 
 2776                 if (backState == stATerm || backState == stATermSp || backState == stATermSep || backState == stSTerm || backState == stSTermSp || backState == stSTermSep) {
 
 2777                         if ((sbfNext & (
ucfSbSp | ucfSbSep)) != 0) 
continue; 
 
 2779                         position = posNext; 
return true; } 
 
 2784 #undef TestPrevCurNext 
 2792 template<
typename TSrcVec>
 
 2795         if (
size_t(dest.
Len()) != srcCount + 1) dest.
Gen(
TVecIdx(srcCount + 1));
 
 2797         size_t position = srcIdx;
 
 2798         dest[
TVecIdx(position - srcIdx)] = 
true;
 
 2799         while (position < srcIdx + srcCount)
 
 2801                 size_t oldPos = position;
 
 2803     if (oldPos < position) {
 
 2804                   Assert(oldPos < position);
 
 2806     Assert(position <= srcIdx + srcCount);
 
 2807                 dest[
TVecIdx(position - srcIdx)] = 
true;
 
 2816 template<
typename TSrcVec, 
typename TDestCh>
 
 2820                                                                 const bool turkic, 
const bool lithuanian)
 const 
 2823         if (clrDest) dest.
Clr();
 
 2825                 GreekCapitalLetterSigma = 0x3a3,
 
 2826                 GreekSmallLetterSigma = 0x3c3,
 
 2827                 GreekSmallLetterFinalSigma = 0x3c2,
 
 2828                 LatinCapitalLetterI = 0x49,
 
 2829                 LatinCapitalLetterJ = 0x4a,
 
 2830                 LatinCapitalLetterIWithOgonek = 0x12e,
 
 2831                 LatinCapitalLetterIWithGrave = 0xcc,
 
 2832                 LatinCapitalLetterIWithAcute = 0xcd,
 
 2833                 LatinCapitalLetterIWithTilde = 0x128,
 
 2834                 LatinCapitalLetterIWithDotAbove = 0x130,
 
 2835                 LatinSmallLetterI = 0x69,
 
 2836                 CombiningDotAbove = 0x307
 
 2839         bool seenCased = 
false, seenTwoCased = 
false; 
int cpFirstCased = -1;
 
 2840         size_t nextWordBoundary = srcIdx;
 
 2841         TBoolV wordBoundaries; 
bool wbsKnown = 
false;
 
 2842         for (
const size_t origSrcIdx = srcIdx, srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
 
 2844                 int cp = src[
TVecIdx(srcIdx)]; srcIdx++;
 
 2850                 if (how != 
ccTitle) howHere = how;
 
 2852                         if (srcIdx - 1 == nextWordBoundary) { 
 
 2853                                 seenCased = 
false; seenTwoCased = 
false; cpFirstCased = -1;
 
 2855                                 IAssert(next > nextWordBoundary); nextWordBoundary = next; }
 
 2856                         bool isCased = IsCased(cp);
 
 2857                         if (isCased && ! seenCased) { howHere = 
ccTitle; seenCased = 
true; cpFirstCased = cp; }
 
 2859                                 if (isCased && seenCased) seenTwoCased = 
true; }
 
 2864                 if (cp == GreekCapitalLetterSigma && howHere == 
ccLower)
 
 2871                         if (! wbsKnown) { 
FindWordBoundaries(src, origSrcIdx, srcCount, wordBoundaries); wbsKnown = 
true; }
 
 2872                         size_t srcIdx2 = srcIdx; 
bool casedAfter = 
false;
 
 2876                         while (! wordBoundaries[
TVecIdx(srcIdx2 - origSrcIdx)])
 
 2878                                 int cp2 = src[
TVecIdx(srcIdx2)]; srcIdx2++;
 
 2879                                 if (IsCased(cp2)) { casedAfter = 
true; 
break; }
 
 2885                                 srcIdx2 = srcIdx - 1; 
bool casedBefore = 
false;
 
 2887                                 while (! wordBoundaries[
TVecIdx(srcIdx2 - origSrcIdx)])
 
 2889                                         --srcIdx2; 
int cp2 = src[
TVecIdx(srcIdx2)];
 
 2890                                         if (IsCased(cp2)) { casedBefore = 
true; 
break; }
 
 2894                                         dest.
Add(GreekSmallLetterFinalSigma); 
Assert(howHere == 
ccLower); 
continue; }
 
 2897                         dest.
Add(GreekSmallLetterSigma); 
continue;
 
 2899                 else if (lithuanian)
 
 2903                                 if (cp == LatinCapitalLetterI || cp == LatinCapitalLetterJ || cp == LatinCapitalLetterIWithOgonek)
 
 2905                                         bool moreAbove = 
false;
 
 2906                                         for (
size_t srcIdx2 = srcIdx; srcIdx2 < srcEnd; )
 
 2908                                                 const int cp2 = src[
TVecIdx(srcIdx2)]; srcIdx2++;
 
 2915                                                 if (cp == LatinCapitalLetterI) { dest.
Add(0x69); dest.
Add(0x307); 
continue; }
 
 2916                                                 if (cp == LatinCapitalLetterJ) { dest.
Add(0x6a); dest.
Add(0x307); 
continue; }
 
 2917                                                 if (cp == LatinCapitalLetterIWithOgonek) { dest.
Add(0x12f); dest.
Add(0x307); 
continue; }
 
 2920                                 else if (cp == LatinCapitalLetterIWithGrave) { dest.
Add(0x69); dest.
Add(0x307); dest.
Add(0x300); 
continue; }
 
 2921                                 else if (cp == LatinCapitalLetterIWithAcute) { dest.
Add(0x69); dest.
Add(0x307); dest.
Add(0x301); 
continue; }
 
 2922                                 else if (cp == LatinCapitalLetterIWithTilde) { dest.
Add(0x69); dest.
Add(0x307); dest.
Add(0x303); 
continue; }
 
 2924                         if (cp == CombiningDotAbove)
 
 2930                                 bool afterSoftDotted = 
false;
 
 2931                                 size_t srcIdx2 = srcIdx - 1; 
 
 2932                                 while (origSrcIdx < srcIdx2)
 
 2934                                         --srcIdx2; 
int cp2 = src[
TVecIdx(srcIdx2)];
 
 2938                                                 afterSoftDotted = IsSoftDotted(cp2); 
break; }
 
 2940                                 if (afterSoftDotted)
 
 2946                                         if (how == 
ccLower) { dest.
Add(0x307); 
continue; }
 
 2950                                         if (seenCased && ! seenTwoCased) 
continue; 
 
 2951                                         dest.
Add(0x307); 
continue;
 
 2959                         if (cp == LatinCapitalLetterIWithDotAbove) {
 
 2960                                 dest.
Add(howHere == 
ccLower ? 0x69 : 0x130); 
continue; }
 
 2964                         else if (cp == CombiningDotAbove)
 
 2968                                 bool afterI = 
false;
 
 2969                                 size_t srcIdx2 = srcIdx - 1; 
 
 2970                                 while (origSrcIdx < srcIdx2)
 
 2972                                         --srcIdx2; 
int cp2 = src[
TVecIdx(srcIdx2)];
 
 2973                                         if (cp2 == LatinCapitalLetterI) { afterI = 
true; 
break; }
 
 2978                                         if (how == 
ccTitle && seenCased && ! seenTwoCased) {
 
 2988                                                 IAssert(cpFirstCased == LatinCapitalLetterI);
 
 2989                                                 dest.
Add(0x307); 
continue; }
 
 2995                         else if (cp == LatinCapitalLetterI)
 
 3001                                 bool beforeDot = 
false;
 
 3002                                 for (
size_t srcIdx2 = srcIdx; srcIdx2 < srcEnd; )
 
 3004                                         const int cp2 = src[
TVecIdx(srcIdx2)]; srcIdx2++;
 
 3005                                         if (cp2 == 0x307) { beforeDot = 
true; 
break; }
 
 3010                                         dest.
Add(howHere == 
ccLower ? 0x131 : 0x49); 
continue; }
 
 3013                         else if (cp == LatinSmallLetterI)
 
 3015                                 dest.
Add(howHere == 
ccLower ? 0x69 : 0x130); 
continue;
 
 3019                 const TIntIntVH &specHere = (
 
 3020                         howHere == how ? specials :
 
 3034                         if (cpNew < 0) cpNew = cp;
 
 3035                         dest.
Add(cpNew); 
continue; }
 
 3041 template<
typename TSrcVec, 
typename TDestCh>
 
 3045         if (clrDest) dest.
Clr();
 
 3046         bool seenCased = 
false; 
size_t nextWordBoundary = srcIdx;
 
 3047         for (
const size_t origSrcIdx = srcIdx, srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
 
 3049                 const int cp = src[
TVecIdx(srcIdx)]; srcIdx++;
 
 3050                 int i = 
h.
GetKeyId(cp); 
if (i < 0) { dest.
Add(cp); 
continue; }
 
 3055                 if (how != 
ccTitle) howHere = how;
 
 3057                         if (srcIdx - 1 == nextWordBoundary) { 
 
 3060                                 IAssert(next > nextWordBoundary); nextWordBoundary = next; }
 
 3061                         bool isCased = IsCased(cp);
 
 3062                         if (isCased && ! seenCased) { howHere = 
ccTitle; seenCased = 
true; }
 
 3066                 if (cpNew < 0) cpNew = cp;
 
 3071 template<
typename TSrcVec>
 
 3074         bool seenCased = 
false; 
size_t nextWordBoundary = srcIdx;
 
 3075         for (
const size_t origSrcIdx = srcIdx, srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
 
 3077                 const int cp = src[
TVecIdx(srcIdx)];
 
 3078                 int i = 
h.
GetKeyId(cp); 
if (i < 0) 
continue;
 
 3083                 if (how != 
ccTitle) howHere = how;
 
 3085                         if (srcIdx == nextWordBoundary) { 
 
 3088                                 IAssert(next > nextWordBoundary); nextWordBoundary = next; }
 
 3089                         bool isCased = IsCased(cp);
 
 3090                         if (isCased && ! seenCased) { howHere = 
ccTitle; seenCased = 
true; }
 
 3094                 if (cpNew >= 0) src[
TVecIdx(srcIdx)] = cpNew;
 
 3102 template<
typename TDestCh>
 
 3112                 dest.
Add(L); dest.
Add(V);
 
 3116         int i = 
h.
GetKeyId(codePoint); 
if (i < 0) { dest.
Add(codePoint); 
return; }
 
 3118         int ofs = ci.
decompOffset; 
if (ofs < 0) { dest.
Add(codePoint); 
return; }
 
 3125 template<
typename TSrcVec, 
typename TDestCh>
 
 3127                 TVec<TDestCh>& dest, 
const bool compatibility, 
bool clrDest)
 const 
 3129         if (clrDest) dest.
Clr();
 
 3130         const size_t destStart = dest.
Len();
 
 3132         while (srcIdx < srcCount) {
 
 3135         for (
size_t destIdx = destStart, destEnd = dest.
Len(); destIdx < destEnd; )
 
 3138                 int cp = dest[
TVecIdx(destIdx)]; destIdx++;
 
 3147 template<
typename TSrcVec, 
typename TDestCh>
 
 3149                 TVec<TDestCh>& dest, 
bool compatibility, 
bool clrDest)
 const 
 3151         if (clrDest) dest.
Clr();
 
 3153         Decompose(src, srcIdx, srcCount, temp, compatibility);
 
 3157 template<
typename TSrcVec, 
typename TDestCh>
 
 3161         if (clrDest) dest.
Clr();
 
 3162         bool lastStarterKnown = 
false; 
 
 3163         size_t lastStarterPos = size_t(-1);  
 
 3164         int cpLastStarter = -1; 
 
 3165         const size_t srcEnd = srcIdx + srcCount;
 
 3167         while (srcIdx < srcEnd)
 
 3169                 const int cp = src[
TVecIdx(srcIdx)]; srcIdx++;
 
 3174                 if (lastStarterKnown && ccMax < cpClass)
 
 3177                         int cpCombined = -1;
 
 3180                                 if (j >= 0) { cpCombined = 
inverseDec[j]; 
break; }
 
 3194                                         if (0 <= TIndex && TIndex < HangulTCount) {
 
 3195                                                 cpCombined = cpLastStarter + TIndex;
 
 3200                         if (cpCombined >= 0) {
 
 3201                                 dest[
TVecIdx(lastStarterPos)] = cpCombined;
 
 3204                                 cpLastStarter = cpCombined; 
continue; }
 
 3207                         lastStarterKnown = 
true; lastStarterPos = dest.
Len(); cpLastStarter = cp; ccMax = cpClass - 1; }
 
 3208                 else if (cpClass > ccMax) 
 
 3214 template<
typename TSrcVec, 
typename TDestCh>
 
 3218         if (clrDest) dest.
Clr();
 
 3220         for (
const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++) {
 
 3221                 const int cp = src[
TVecIdx(srcIdx)];
 
 3223                         { dest.
Add(cp); retVal++; } }
 
 3230         for (
int i = 0; i < 5; i++) sum += i;
 
 3237         for (
int i = 0; i < 5; i++) sum += i;
 
bool IsVariationSelector() const 
 
void ToSimpleTitleCase(TSrcVec &src, size_t srcIdx, const size_t srcCount) const 
 
static int SwapBytes(int x)
 
TPair< TInt, TInt > TIntPr
 
bool IsSbFlag(const int cp, const TUniChFlags flag) const 
 
static int FromUnicode(int c)
 
void ToSimpleUpperCase(TIntV &src) const 
 
void GetUpperCase(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const 
 
static void Add(TVector &vector, const TElement &element)
 
void TestDecodeUtf16(TRnd &rnd, const TStr &testCaseDesc, const TUtf16BomHandling bomHandling, const TUniByteOrder defaultByteOrder, const bool insertBom)
 
static const int fromUnicodeTable1[6 *16]
 
void DecomposeAndCompose(const TSrcVec &src, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const 
 
const char * GetCharName(const int cp) const 
 
void Compose(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest=true) const 
 
enum TUniChProperties_ TUniChProperties
 
T8BitCodec< TEncoding_ISO8859_4 > iso8859_4
 
#define IAssertR(Cond, Reason)
 
bool Has1Gram(const TItem &item) const 
 
void ToSimpleTitleCase(TSrcVec &src) const 
 
void SetPropertyX(const TUniChPropertiesX flag)
 
static void AppendVector(const TVec< TSrcDat > &src, TVec< TDestDat > &dest)
 
TUniChCategory GetCat(const int cp) const 
 
static int ToUnicode(int c)
 
int GetScriptByName(const TStr &scriptName) const 
 
TUcdFileReader & operator=(const TUcdFileReader &r)
 
void ToSimpleUpperCase(TSrcVec &src) const 
 
void SbEx_Add(const TStr &s)
 
TStr EncodeUtf8Str(const TIntV &src) const 
 
enum TUniChFlags_ TUniChFlags
 
bool IsCompositionExclusion() const 
 
static void Add(TVector &vector, const TElement &element)
 
THash< TItemPr, TVoid > pairs
 
TUniCaseFolding(TSIn &SIn)
 
void SaveBin(const TStr &fnBinUcd)
 
TUniChDb::TCaseConversion TCaseConversion
 
bool IsDcpFlag(const TUniChFlags flag) const 
 
static const ushort LineBreak_Quotation
 
void SetProperty(const TUniChProperties flag)
 
void Add(const TSrcVec &src)
 
bool IsGraphemeExtend() const 
 
void SetSbFlag(const TUniChFlags flag)
 
void GetSimpleCaseConverted(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TCaseConversion how) const 
 
static const int fromUnicodeTable1[14 *16]
 
static TStr GetSpecialCasingFn()
 
enum TUniChSubCategory_ TUniChSubCategory
 
TPair< TItem, TItem > TItemPr
 
T8BitCodec< TEncoding_ISO8859_1 > iso8859_1
 
void Save(TSOut &SOut) const 
 
int GetWbFlags(const int cp) const 
 
TPt< TCodecBase > PCodecBase
 
virtual size_t FromUnicode(const TIntV &src, size_t srcIdx, const size_t srcCount, TIntV &dest, const bool clrDest=true) const =0
 
void WbFindNextNonIgnoredS(const TSrcVec &src, size_t &position, const size_t srcEnd) const 
 
void SetDcpFlag(const TUniChFlags flag)
 
void SetWbFlag(const TUniChFlags flag)
 
virtual void Test() const 
 
bool IsGraphemeBase() const 
 
TStr EncodeUtf8Str(const TSrcVec &src, size_t srcIdx, const size_t srcCount) const 
 
void ToSimpleUpperCase(TSrcVec &src, size_t srcIdx, const size_t srcCount) const 
 
enum TUniChCategory_ TUniChCategory
 
enum TUnicodeErrorHandling_ TUnicodeErrorHandling
 
T8BitCodec< TEncoding_ISO8859_3 > TCodec_ISO8859_3
 
static int ToUnicode(int c)
 
TUniChSubCategory GetSubCat(const int cp) const 
 
TSubcatHelper(TUniChDb &owner_)
 
size_t FromUnicode(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TDestVec &dest, const bool clrDest=true) const 
 
size_t ToUnicode(const TStr &src, TIntV &dest, const bool clrDest=true) const 
 
static TStr GetScriptNameKatakana()
 
TUniTrie< TInt > sbExTrie
 
const char * GetCStr(const uint &Offset) const 
 
static const ushort LineBreak_InfixNumeric
 
static uint GetRndUint(TRnd &rnd)
 
bool IsLogicalOrderException() const 
 
T8BitCodec< TEncoding_ISO8859_4 > TCodec_ISO8859_4
 
TSizeTy Len() const 
Returns the number of elements in the vector. 
 
static int ToUnicode(int c)
 
void InitPropList(const TStr &basePath)
 
static const int toUnicodeTable[8 *16]
 
TUniChSubCategory GetSubCat() const 
 
void ToCaseFolded(TSrcVec &src, const bool turkic=false) const 
 
void Save(TSOut &SOut) const 
 
void GetSimpleTitleCase(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
bool IsDefaultIgnorable() const 
 
void ToSimpleLowerCase(TSrcVec &src) const 
 
T8BitCodec(TUnicodeErrorHandling errorHandling_, int replacementChar_=TUniCodec::DefaultReplacementChar)
 
enum TUniChDb::TCaseConversion_ TCaseConversion
 
void GetSimpleUpperCase(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
bool IsAlphabetic() const 
 
void GetLowerCase(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const 
 
int GetSbFlags(const int cp) const 
 
void WbFindCurOrNextNonIgnored(const TSrcVec &src, size_t &position, const size_t srcEnd) const 
 
virtual size_t FromUnicode(const TIntV &src, size_t srcIdx, const size_t srcCount, TStr &dest, const bool clrDest=true) const 
 
static const ushort LineBreak_ComplexContext
 
T8BitCodec< TEncoding_CP852 > cp852
 
const TStr & GetScriptName(const int scriptId) const 
 
TUniChCategory GetCat() const 
 
TIntIntVH specialCasingUpper
 
int GetScript(const TUniChInfo &ci) const 
 
static const int yuAsciiChars[10]
 
TStr GetSubStr(const int &BChN, const int &EChN) const 
 
void GetUpperCase(const TIntV &src, TIntV &dest) const 
 
T8BitCodec< TEncoding_ISO8859_3 > iso8859_3
 
THash< TItem, TVoid > singles
 
void RegisterCodec(const TStr &nameList, const PCodecBase &codec)
 
void InitDerivedCoreProperties(const TStr &basePath)
 
virtual void Test() const 
 
static int FromUnicode(int c)
 
bool IsWhiteSpace() const 
 
static int FromUnicode(int c)
 
void InitLineBreaks(const TStr &basePath)
 
static const int uniChars[10]
 
void Decompose(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const 
 
void GetLowerCase(const TIntV &src, TIntV &dest) const 
 
void WbFindNextNonIgnored(const TSrcVec &src, size_t &position, const size_t srcEnd) const 
 
void ToSimpleLowerCase(TIntV &src) const 
 
bool WbFindPrevNonIgnored(const TSrcVec &src, const size_t srcStart, size_t &position) const 
 
#define Trans(curFlag, newState)
 
void Add(const TSrcVec &src, const size_t srcIdx, const size_t srcCount)
 
static TStr GetNormalizationTestFn()
 
T8BitCodec< TEncoding_ISO8859_2 > TCodec_ISO8859_2
 
void GetCaseFolded(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool full=true, const bool turkic=false) const 
 
enum TUniChPropertiesX_ TUniChPropertiesX
 
void GetSimpleLowerCase(const TIntV &src, TIntV &dest) const 
 
bool IsWbFlag(const int cp, const TUniChFlags flag) const 
 
static void SaveUShort(TSOut &SOut, ushort u)
 
TUnicodeErrorHandling errorHandling
 
THash< TInt, TIntV > TIntIntVH
 
void ToCaseFolded(TIntV &src) const 
 
void DecomposeAndCompose(const TIntV &src, TIntV &dest, bool compatibility) const 
 
void Test(const TStr &basePath)
 
static const int fromUnicodeTable2[2 *16]
 
static const int fromUnicodeTable2[4 *16]
 
T8BitCodec< TEncoding_CP437 > cp437
 
static void ParseCodePointRange(const TStr &s, int &from, int &to)
 
TIntIntVH specialCasingLower
 
virtual TStr GetName() const =0
 
virtual TStr GetName() const 
 
int simpleUpperCaseMapping
 
size_t ExtractStarters(const TSrcVec &src, TVec< TDestCh > &dest, bool clrDest=true) const 
 
bool FindNextWordBoundary(const TIntV &src, int &position) const 
 
size_t ToUnicode(const TStr &src, TIntV &dest, const bool clrDest=true) const 
 
static void LoadUShort(TSIn &SIn, ushort &u)
 
size_t UniToStr(const TIntV &src, TStr &dest, const bool clrDest=true) const 
 
void TestCaseConversion(const TStr &source, const TStr &trueLc, const TStr &trueTc, const TStr &trueUc, bool turkic, bool lithuanian)
 
static int FromUnicode(int c)
 
void Save(TSOut &SOut) const 
 
static TStr GetUnicodeDataFn()
 
T8BitCodec< TEncoding_YuAscii > TCodec_YuAscii
 
THash< TIntPr, TInt > inverseDec
 
void FindWordBoundaries(const TIntV &src, TBoolV &dest) const 
 
bool IsPropertyX(const TUniChPropertiesX flag) const 
 
bool FindNextSentenceBoundary(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, size_t &position) const 
 
size_t EncodeUtf8(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
void ClrSentenceBoundaryExceptions()
 
bool IsPrivateUse() const 
 
size_t DecodeUtf16FromWords(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const 
 
TStr GetWbFlagsStr() const 
 
static TStr GetScriptsFn()
 
static const int fromUnicodeTable3[6 *16]
 
void ToSimpleCaseConverted(TSrcVec &src, size_t srcIdx, const size_t srcCount, const TCaseConversion how) const 
 
void FindSentenceBoundaries(const TIntV &src, TBoolV &dest) const 
 
static int ToUnicode(int c)
 
void Clr(bool DoDel=false)
 
size_t FromUnicode(const TIntV &src, TChA &dest, const bool clrDest=true) const 
 
bool IsDeprecated() const 
 
void TestCaseConversions()
 
int simpleTitleCaseMapping
 
static PSIn New(const TStr &FNm)
 
void DelKey(const TKey &Key)
 
void SetCatAndSubCat(const TUniChSubCategory catAndSubCat)
 
static TStr GetWbFlagsStr(const int flags)
 
size_t UniToStr(const TIntV &src, size_t srcIdx, const size_t srcCount, TStr &dest, const bool clrDest=true) const 
 
size_t ToUnicode(const TIntV &src, TIntV &dest, const bool clrDest=true) const 
 
TStr GetCharNameS(const int cp) const 
 
PCodecBase GetCodec(const TStr &name) const 
 
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector. 
 
int ChangeStrAll(const TStr &SrcStr, const TStr &DstStr, const bool &FromStartP=false)
 
static const int fromUnicodeTable2[2 *16]
 
int DecodeUtf16FromWords(const TIntV &src, TIntV &dest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const 
 
TUcdFileReader(const TUcdFileReader &r)
 
void TestDecodeUtf8(TRnd &rnd, const TStr &testCaseDesc)
 
int ExtractStarters(TIntV &src) const 
 
bool IsCompatibilityDecomposition() const 
 
void ToSimpleTitleCase(TIntV &src) const 
 
void PrintCharNames(FILE *f, const TSrcVec &src, const TStr &prefix) const 
 
static TStr GetScriptNameUnknown()
 
bool IsHexInt(const bool &Check, const int &MnVal, const int &MxVal, int &Val) const 
 
virtual size_t FromUnicode(const TIntV &src, size_t srcIdx, const size_t srcCount, TChA &dest, const bool clrDest=true) const 
 
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val. 
 
void GetSimpleUpperCase(const TIntV &src, TIntV &dest) const 
 
TVec< PCodecBase > TCodecBaseV
 
bool IsNodeTerminal(const int nodeIdx) const 
 
static TStr GetSentenceBreakTestFn()
 
#define TestCurNext(curFlag, nextFlag)
 
size_t EncodeUtf16ToBytes(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const 
 
int SearchStr(const TStr &Str, const int &BChN=0) const 
 
void Save(TSOut &SOut) const 
 
TStr GetUtf8CaseFolded(const TStr &s) const 
 
bool IsAsciiHexDigit() const 
 
#define DefineUniSubCat(cat, subCat, c)
 
T8BitCodec< TEncoding_ISO8859_1 > TCodec_ISO8859_1
 
bool IsWbIgnored(const int cp) const 
 
enum TUniByteOrder_ TUniByteOrder
 
bool FNextKeyId(int &KeyId) const 
 
void GetCaseFolded(const TIntV &src, TIntV &dest, const bool full=true) const 
 
static const int fromUnicodeTable3[3 *16]
 
int ExtractStarters(const TIntV &src, TIntV &dest) const 
 
TStr GetSbFlagsStr() const 
 
size_t ExtractStarters(TSrcVec &src) const 
 
void LoadTxt_ProcessDecomposition(TUniChInfo &ci, TStr s)
 
bool FindNextWordBoundary(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, size_t &position) const 
 
size_t FromUnicode(const TIntV &src, TIntV &dest, const bool clrDest=true) const 
 
static const int toUnicodeTable[6 *16]
 
void InitSpecialCasing(const TStr &basePath)
 
void AddDecomposition(const int codePoint, TVec< TDestCh > &dest, const bool compatibility) const 
 
size_t DecodeUtf8(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
static TStr GetDerivedCorePropsFn()
 
bool IsSurrogate(const int cp) const 
 
THash< TItemTr, TInt > roots
 
TUnicodeException(size_t srcIdx_, int srcChar_, const TStr &message_)
 
bool IsNoncharacter() const 
 
static void LoadSChar(TSIn &SIn, signed char &u)
 
static TStr GetWordBreakPropertyFn()
 
static const int fromUnicodeTable3[11 *16]
 
THash< TInt, TUniChInfo > h
 
bool GetNextLine(TStrV &dest)
 
void SbEx_AddUtf8(const TStr &s)
 
void GetSimpleLowerCase(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
bool CanSentenceEndHere(const TSrcVec &src, const size_t srcIdx, const size_t position) const 
 
void Save(TSOut &SOut) const 
 
void GetSimpleLowerCase(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
void GetTitleCase(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const 
 
int DecodeUtf16FromBytes(const TIntV &src, TIntV &dest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const 
 
void Open(const TStr &fileName)
 
T8BitCodec< TEncoding_CP852 > TCodec_CP852
 
void UseEnglishSentenceBoundaryExceptions()
 
static bool IsValidSubCat(const char chCat, const char chSubCat)
 
static const int fromUnicodeTable1[14 *16]
 
bool IsSbFlag(const TUniChFlags flag) const 
 
static int ToUnicode(int c)
 
static const ushort LineBreak_Numeric
 
void SaveBf(const void *Bf, const TSize &BfL)
 
void LoadBin(const TStr &fnBin)
 
void InitScripts(const TStr &basePath)
 
virtual size_t ToUnicode(const TStr &src, size_t srcIdx, const size_t srcCount, TIntV &dest, const bool clrDest=true) const 
 
void TestComposition(const TStr &basePath)
 
enum TUtf16BomHandling_ TUtf16BomHandling
 
void GetCaseFolded(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic=false) const 
 
static TStr GetLineBreakFn()
 
void Fold(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const 
 
static const int fromUnicodeTable1[14 *16]
 
static void ParseCodePointList(const TStr &s, TIntV &dest, bool ClrDestP=true)
 
bool IsGetChInfo(const int cp, TUniChInfo &ChInfo)
 
bool FindNextSentenceBoundary(const TIntV &src, int &position) const 
 
T8BitCodec< TEncoding_YuAscii > yuAscii
 
static TStr GetWordBreakTestFn()
 
TUniCodec(TUnicodeErrorHandling errorHandling_, bool strict_, int replacementChar_, bool skipBom_)
 
size_t ExtractStarters(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest=true) const 
 
TUcdFileReader(const TStr &fileName)
 
size_t DecodeUtf16FromBytes(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const 
 
static bool IsWbIgnored(const TUniChInfo &ci)
 
void Save(const bool &Bool)
 
void Decompose(const TSrcVec &src, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const 
 
size_t FromUnicode(const TSrcVec &src, TDestVec &dest, const bool clrDest=true) const 
 
static int ToUnicode(int c)
 
int Get3GramRoot(const TItem &last, const TItem &butLast, const TItem &butButLast) const 
 
static TStr NormalizeCodecName(const TStr &name)
 
static const int toUnicodeTable[8 *16]
 
static TStr GetSbFlagsStr(const int flags)
 
TStr EncodeUtf8Str(const TSrcVec &src) const 
 
TStr GetCharNameS(const int cp) const 
 
void UnregisterCodec(const TStr &nameList)
 
T8BitCodec< TEncoding_CP437 > TCodec_CP437
 
int GetScript(const int cp) const 
 
int DecodeUtf8(const TStr &src, TIntV &dest) const 
 
TUniChSubCategory GetSubCat(const int cp) const 
 
int GetKeyId(const TKey &Key) const 
 
int DecodeUtf8(const TIntV &src, TIntV &dest) const 
 
#define IsPeekAheadSkippable(sbf)
 
static int FromUnicode(int c)
 
DECLARE_FORWARDED_PROPERTY_METHODS bool IsPrivateUse(const int cp) const 
 
void SetCat(const int cp)
 
static int FromUnicode(int c)
 
void ToCaseFolded(TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic=false) const 
 
int GetCombiningClass(const int cp) const 
 
static const int toUnicodeTable[8 *16]
 
static TStr GetSentenceBreakPropertyFn()
 
void SbEx_Add(const TSrcVec &v)
 
void GetSimpleTitleCase(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
void LoadTxt(const TStr &fileName)
 
void PrintCharNames(FILE *f, const TSrcVec &src, size_t srcIdx, const size_t srcCount, const TStr &prefix) const 
 
static bool IsMachineLittleEndian()
 
int AddKey(const TKey &Key)
 
void InitWordAndSentenceBoundaryFlags(const TStr &basePath)
 
void Compose(const TSrcVec &src, TVec< TDestCh > &dest, bool clrDest=true) const 
 
#define DECLARE_FORWARDED_PROPERTY_METHODS
 
void TestFindNextWordOrSentenceBoundary(const TStr &basePath, bool sentence)
 
int SbEx_AddMulti(const TStr &words, const bool wordsAreUtf8=true)
 
static const int toUnicodeTable[6 *16]
 
int simpleLowerCaseMapping
 
bool IsXidContinue() const 
 
static const ushort LineBreak_Unknown
 
static TStr GetCompositionExclusionsFn()
 
virtual size_t ToUnicode(const TIntV &src, size_t srcIdx, const size_t srcCount, TIntV &dest, const bool clrDest=true) const 
 
static int ToUnicode(int c)
 
bool IsGraphemeLink() const 
 
bool IsQuotationMark() const 
 
void LoadTxt(const TStr &basePath)
 
T8BitCodec< TEncoding_CP1250 > TCodec_CP1250
 
size_t EncodeUtf16ToWords(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const 
 
DECLARE_FORWARDED_PROPERTY_METHODS ___UniFwd2(IsPrivateUse, IsSurrogate) TUniChCategory GetCat(const int cp) const 
 
void ProcessComment(TUniChDb::TUcdFileReader &reader)
 
T8BitCodec< TEncoding_CP1250 > cp1250
 
size_t ToUnicode(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
void FindWordBoundaries(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, TBoolV &dest) const 
 
bool Has2Gram(const TItem &last, const TItem &butLast) const 
 
static TStr GetPropListFn()
 
void ToSimpleLowerCase(TSrcVec &src, size_t srcIdx, const size_t srcCount) const 
 
void TestCat(const int cp)
 
size_t FromUnicode(const TIntV &src, TStr &dest, const bool clrDest=true) const 
 
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const 
 
const char * GetCharName(const int cp) const 
 
void LoadBf(const void *Bf, const TSize &BfL)
 
void Compose(const TIntV &src, TIntV &dest) const 
 
void GetSimpleTitleCase(const TIntV &src, TIntV &dest) const 
 
size_t DecodeUtf8(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
bool IsIdContinue() const 
 
void GetUpperCase(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const 
 
void GetLowerCase(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const 
 
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
 
#define TestCurNext2(curFlag, nextFlag, next2Flag)
 
virtual size_t ToUnicode(const TIntV &src, size_t srcIdx, const size_t srcCount, TIntV &dest, const bool clrDest=true) const =0
 
TTriple< TItem, TItem, TItem > TItemTr
 
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements. 
 
void GetSimpleUpperCase(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
void Save(TSOut &SOut) const 
 
void FindSentenceBoundaries(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, TBoolV &dest) const 
 
void Decompose(const TIntV &src, TIntV &dest, bool compatibility) const 
 
int EncodeUtf8(const TIntV &src, TIntV &dest) const 
 
static const int toUnicodeTable[6 *16]
 
bool IsProperty(const TUniChProperties flag) const 
 
void GetTitleCase(const TIntV &src, TIntV &dest) const 
 
static int FromUnicode(int c)
 
void SplitOnWs(TStrV &StrV) const 
 
bool IsWbFlag(const TUniChFlags flag) const 
 
void GetAllCodecs(TCodecBaseV &dest) const 
 
TUnicode(const TStr &fnBinUcd)
 
TUnicodeErrorHandling errorHandling
 
static void SaveSChar(TSOut &SOut, signed char u)
 
static int FromUnicode(int c)
 
bool IsKey(const TKey &Key) const 
 
size_t ToUnicode(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
bool IsTerminalPunctuation() const 
 
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element. 
 
static const int fromUnicodeTable2[2 *16]
 
int EncodeUtf16ToBytes(const TIntV &src, TIntV &dest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const 
 
TUniCaseFolding caseFolding
 
static const int fromUnicodeTable2[2 *16]
 
TNode(const TItem &item_, const int child_, const int sib_, const bool terminal_)
 
bool IsIdeographic() const 
 
TIntIntVH specialCasingTitle
 
bool IsJoinControl() const 
 
void FoldInPlace(TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const 
 
int EncodeUtf16ToWords(const TIntV &src, TIntV &dest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const 
 
void DecomposeAndCompose(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const 
 
static const int fromUnicodeTable1[14 *16]
 
static const int fromUnicodeTable1[14 *16]
 
THash< TStr, PCodecBase > codecs
 
static const int fromUnicodeTable4[11 *16]
 
void SbEx_Set(const TUniTrie< TInt > &newTrie)
 
const TKey & GetKey(const int &KeyId) const 
 
static const int fromUnicodeTable2[2]
 
void TestWbFindNonIgnored() const 
 
static int ParseCodePoint(const TStr &s)
 
virtual size_t FromUnicode(const TIntV &src, size_t srcIdx, const size_t srcCount, TIntV &dest, const bool clrDest=true) const 
 
static TStr GetCaseFoldingFn()
 
static int ToUnicode(int c)
 
bool IsSoftDotted() const 
 
bool IsBidiControl() const 
 
int GetChild(const int parentIdx, const TItem &item) const 
 
#define TestPrevCurNext(prevFlag, curFlag, nextFlag)
 
size_t EncodeUtf8(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const 
 
void GetTitleCase(const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const 
 
Vector is a sequence TVal objects representing an array that can change in size. 
 
static TStr GetAuxiliaryDir()
 
T8BitCodec< TEncoding_ISO8859_2 > iso8859_2
 
static TStr GetScriptNameHiragana()
 
static ushort GetLineBreakCode(char c1, char c2)
 
#define DefineUniCat(cat, c)
 
void GetCaseConverted(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TCaseConversion how, const bool turkic, const bool lithuanian) const 
 
size_t ToUnicode(const TIntV &src, TIntV &dest, const bool clrDest=true) const 
 
void WordsToBytes(const TIntV &src, TIntV &dest)