| 
    SNAP Library 2.0, Developer Reference
    2013-05-13 16:33:57
    
   SNAP, a general purpose, high performance system for analysis and manipulation of large networks 
   | 
  
  
  
 


Go to the source code of this file.
Classes | |
| class | TUnicodeException | 
| class | TUniCodec | 
| class | TUniCaseFolding | 
| class | TCodecBase | 
| class | TCodecWrapper< TCodecImpl_ > | 
| class | TVecElt< TVector_ > | 
| class | TVecElt< TVec< TDat > > | 
| class | TVecElt< TChA > | 
| class | TEncoding_ISO8859_1 | 
| class | TEncoding_ISO8859_2 | 
| class | TEncoding_ISO8859_3 | 
| class | TEncoding_ISO8859_4 | 
| class | TEncoding_YuAscii | 
| class | TEncoding_CP437 | 
| class | TEncoding_CP852 | 
| class | TEncoding_CP1250 | 
| class | T8BitCodec< TEncoding_ > | 
| class | TUniChInfo | 
| class | TUniTrie< TItem_ > | 
| class | TUniTrie< TItem_ >::TNode | 
| class | TUniChDb | 
| class | TUniChDb::TUcdFileReader | 
| class | TUniChDb::TSubcatHelper | 
| class | TUnicode | 
Defines | |
| #define | DefineByte(b7, b6, b5, b4, b3, b2, b1, b0) _ ## b7 ## b6 ## b5 ## b4 ## _ ## b3 ## b2 ## b1 ## b0 = (b7 << 7) | (b6 << 6) | (b5 << 5) | (b4 << 4) | (b3 << 3) | (b2 << 2) | (b1 << 1) | b0 | 
| #define | DefineUniCat(cat, c) uc ## cat = (int(uchar(c)) & 0xff) | 
| #define | DefineUniSubCat(cat, subCat, c) uc ## cat ## subCat = ((uc ## cat) << 8) | (int(uchar(c)) & 0xff) | 
| #define | ___UniFwd1(name) bool name(const int cp) const { int i = h.GetKeyId(cp); if (i < 0) return false; else return h[i].name(); } | 
| #define | ___UniFwd2(name1, name2) ___UniFwd1(name1) ___UniFwd1(name2) | 
| #define | ___UniFwd3(name1, name2, name3) ___UniFwd2(name1, name2) ___UniFwd1(name3) | 
| #define | ___UniFwd4(name1, name2, name3, name4) ___UniFwd3(name1, name2, name3) ___UniFwd1(name4) | 
| #define | ___UniFwd5(name1, name2, name3, name4, name5) ___UniFwd4(name1, name2, name3, name4) ___UniFwd1(name5) | 
| #define | DECLARE_FORWARDED_PROPERTY_METHODS | 
| #define | ___UniFwd1(name) bool name(const int cp) const { return ucd.name(cp); } | 
| #define | ___OutRepl if (isDestLe) { dest.Add(replacementChar & 0xff); dest.Add((replacementChar >> 8) & 0xff); } else { dest.Add((replacementChar >> 8) & 0xff); dest.Add(replacementChar & 0xff); } | 
| #define | TestCurNext(curFlag, nextFlag) if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue | 
| #define | TestCurNext2(curFlag, nextFlag, next2Flag) if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag && (wbfNext2 & next2Flag) == next2Flag) continue | 
| #define | TestPrevCurNext(prevFlag, curFlag, nextFlag) if ((wbfPrev & prevFlag) == prevFlag && (wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue | 
| #define | TestCur(curFlag) ((sbfCur & ucfSb##curFlag) == ucfSb##curFlag) | 
| #define | Trans(curFlag, newState) if (TestCur(curFlag)) { backState = st##newState; break; } | 
| #define | IsPeekAheadSkippable(sbf) ((sbf & (ucfSbOLetter | ucfSbUpper | ucfSbLower | ucfSbSep | ucfSbSTerm | ucfSbATerm)) == 0) | 
| #define | TestCurNext(curFlag, nextFlag) if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue | 
| #define | TestCurNext2(curFlag, nextFlag, next2Flag) if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag && (sbfNext2 & next2Flag) == next2Flag) continue | 
| #define | TestPrevCurNext(prevFlag, curFlag, nextFlag) if ((sbfPrev & prevFlag) == prevFlag && (sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue | 
Typedefs | |
| typedef int | TUniVecIdx | 
| typedef enum TUnicodeErrorHandling_ | TUnicodeErrorHandling | 
| typedef enum TUniByteOrder_ | TUniByteOrder | 
| typedef enum TUtf16BomHandling_ | TUtf16BomHandling | 
| typedef THash< TInt, TIntV > | TIntIntVH | 
| typedef TPt< TCodecBase > | PCodecBase | 
| typedef TVec< PCodecBase > | TCodecBaseV | 
| typedef T8BitCodec < TEncoding_ISO8859_1 >  | TCodec_ISO8859_1 | 
| typedef T8BitCodec < TEncoding_ISO8859_2 >  | TCodec_ISO8859_2 | 
| typedef T8BitCodec < TEncoding_ISO8859_3 >  | TCodec_ISO8859_3 | 
| typedef T8BitCodec < TEncoding_ISO8859_4 >  | TCodec_ISO8859_4 | 
| typedef T8BitCodec < TEncoding_CP852 >  | TCodec_CP852 | 
| typedef T8BitCodec < TEncoding_CP437 >  | TCodec_CP437 | 
| typedef T8BitCodec < TEncoding_CP1250 >  | TCodec_CP1250 | 
| typedef T8BitCodec < TEncoding_YuAscii >  | TCodec_YuAscii | 
| typedef enum TUniChCategory_ | TUniChCategory | 
| typedef enum TUniChSubCategory_ | TUniChSubCategory | 
| typedef enum TUniChFlags_ | TUniChFlags | 
| typedef enum TUniChProperties_ | TUniChProperties | 
| typedef enum TUniChPropertiesX_ | TUniChPropertiesX | 
Enumerations | |
| enum | TUnicodeErrorHandling_ { uehIgnore = 0, uehThrow = 1, uehReplace = 2, uehAbort = 3 } | 
| enum | TUniByteOrder_ { boMachineEndian = 0, boLittleEndian = 1, boBigEndian = 2 } | 
| enum | TUtf16BomHandling_ { bomAllowed = 0, bomRequired = 1, bomIgnored = 2 } | 
| enum | TUniChCategory_ {  DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L'), DefineUniCat = (Letter, 'L') }  | 
| enum | TUniChSubCategory_ {  DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u'), DefineUniSubCat = (Letter, Uppercase, 'u') }  | 
| enum | TUniChFlags_ {  ucfCompatibilityDecomposition = 1, ucfCompositionExclusion = 1 << 1, ucfWbFormat = 1 << 2, ucfWbKatakana = 1 << 3, ucfWbALetter = 1 << 4, ucfWbMidLetter = 1 << 5, ucfWbMidNum = 1 << 6, ucfWbNumeric = 1 << 7, ucfWbExtendNumLet = 1 << 8, ucfSbSep = 1 << 9, ucfSbFormat = 1 << 10, ucfSbSp = 1 << 11, ucfSbLower = 1 << 12, ucfSbUpper = 1 << 13, ucfSbOLetter = 1 << 14, ucfSbNumeric = 1 << 15, ucfSbATerm = 1 << 16, ucfSbSTerm = 1 << 17, ucfSbClose = 1 << 18, ucfSbMask = ucfSbSep | ucfSbFormat | ucfSbSp | ucfSbLower | ucfSbUpper | ucfSbOLetter | ucfSbNumeric | ucfSbATerm | ucfSbSTerm | ucfSbClose, ucfWbMask = ucfWbFormat | ucfWbKatakana | ucfWbALetter | ucfWbMidLetter | ucfWbMidNum | ucfWbNumeric | ucfWbExtendNumLet | ucfSbSep, ucfDcpAlphabetic = 1 << 19, ucfDcpDefaultIgnorableCodePoint = 1 << 20, ucfDcpLowercase = 1 << 21, ucfDcpGraphemeBase = 1 << 22, ucfDcpGraphemeExtend = 1 << 23, ucfDcpIdStart = 1 << 24, ucfDcpIdContinue = 1 << 25, ucfDcpMath = 1 << 26, ucfDcpUppercase = 1 << 27, ucfDcpXidStart = 1 << 28, ucfDcpXidContinue = 1 << 29, ucfDcpMask }  | 
| enum | TUniChProperties_ {  ucfPrAsciiHexDigit = 1, ucfPrBidiControl = 2, ucfPrDash = 4, ucfPrDeprecated = 8, ucfPrDiacritic = 0x10, ucfPrExtender = 0x20, ucfPrGraphemeLink = 0x40, ucfPrHexDigit = 0x80, ucfPrHyphen = 0x100, ucfPrIdeographic = 0x200, ucfPrJoinControl = 0x400, ucfPrLogicalOrderException = 0x800, ucfPrNoncharacterCodePoint = 0x1000, ucfPrPatternSyntax = 0x2000, ucfPrPatternWhiteSpace = 0x4000, ucfPrQuotationMark = 0x8000, ucfPrSoftDotted = 0x10000, ucfPrSTerm = 0x20000, ucfPrTerminalPunctuation = 0x40000, ucfPrVariationSelector = 0x80000, ucfPrWhiteSpace = 0x100000 }  | 
| enum | TUniChPropertiesX_ {  ucfPxOtherAlphabetic = 1, ucfPxOtherDefaultIgnorableCodePoint = 2, ucfPxOtherGraphemeExtend = 4, ucfPxOtherIdContinue = 8, ucfPxOtherIdStart = 0x10, ucfPxOtherLowercase = 0x20, ucfPxOtherMath = 0x40, ucfPxOtherUppercase = 0x80, ucfPxIdsBinaryOperator = 0x100, ucfPxIdsTrinaryOperator = 0x200, ucfPxRadical = 0x400, ucfPxUnifiedIdeograph = 0x800 }  | 
Functions | |
| bool | AlwaysFalse () | 
| bool | AlwaysTrue () | 
| #define ___OutRepl if (isDestLe) { dest.Add(replacementChar & 0xff); dest.Add((replacementChar >> 8) & 0xff); } else { dest.Add((replacementChar >> 8) & 0xff); dest.Add(replacementChar & 0xff); } | 
Referenced by TUniCodec::EncodeUtf16ToBytes().
| #define ___UniFwd1 | ( | name | ) | bool name(const int cp) const { int i = h.GetKeyId(cp); if (i < 0) return false; else return h[i].name(); } | 
| #define ___UniFwd1 | ( | name | ) | bool name(const int cp) const { return ucd.name(cp); } | 
| #define ___UniFwd2 | ( | name1, | |
| name2 | |||
| ) | ___UniFwd1(name1) ___UniFwd1(name2) | 
| #define ___UniFwd3 | ( | name1, | |
| name2, | |||
| name3 | |||
| ) | ___UniFwd2(name1, name2) ___UniFwd1(name3) | 
| #define ___UniFwd4 | ( | name1, | |
| name2, | |||
| name3, | |||
| name4 | |||
| ) | ___UniFwd3(name1, name2, name3) ___UniFwd1(name4) | 
| #define ___UniFwd5 | ( | name1, | |
| name2, | |||
| name3, | |||
| name4, | |||
| name5 | |||
| ) | ___UniFwd4(name1, name2, name3, name4) ___UniFwd1(name5) | 
___UniFwd5(IsAsciiHexDigit, IsBidiControl, IsDash, IsDeprecated, IsDiacritic) \ ___UniFwd5(IsExtender, IsGraphemeLink, IsHexDigit, IsHyphen, IsIdeographic) \ ___UniFwd5(IsJoinControl, IsLogicalOrderException, IsNoncharacter, IsQuotationMark, IsSoftDotted) \ ___UniFwd4(IsSTerminal, IsTerminalPunctuation, IsVariationSelector, IsWhiteSpace) \ ___UniFwd5(IsAlphabetic, IsUppercase, IsLowercase, IsMath, IsDefaultIgnorable) \ ___UniFwd4(IsGraphemeBase, IsGraphemeExtend, IsIdStart, IsIdContinue) \ ___UniFwd2(IsXidStart, IsXidContinue) \ ___UniFwd3(IsCompositionExclusion, IsCompatibilityDecomposition, IsSbSep) \ ___UniFwd1(IsGbExtend) \ ___UniFwd2(IsCased, IsCurrency)
| #define DefineByte | ( | b7, | |
| b6, | |||
| b5, | |||
| b4, | |||
| b3, | |||
| b2, | |||
| b1, | |||
| b0 | |||
| ) | _ ## b7 ## b6 ## b5 ## b4 ## _ ## b3 ## b2 ## b1 ## b0 = (b7 << 7) | (b6 << 6) | (b5 << 5) | (b4 << 4) | (b3 << 3) | (b2 << 2) | (b1 << 1) | b0 | 
| #define DefineUniCat | ( | cat, | |
| c | |||
| ) | uc ## cat = (int(uchar(c)) & 0xff) | 
| #define DefineUniSubCat | ( | cat, | |
| subCat, | |||
| c | |||
| ) | uc ## cat ## subCat = ((uc ## cat) << 8) | (int(uchar(c)) & 0xff) | 
| #define IsPeekAheadSkippable | ( | sbf | ) | ((sbf & (ucfSbOLetter | ucfSbUpper | ucfSbLower | ucfSbSep | ucfSbSTerm | ucfSbATerm)) == 0) | 
Referenced by TUniChDb::FindNextSentenceBoundary().
| #define TestCur | ( | curFlag | ) | ((sbfCur & ucfSb##curFlag) == ucfSb##curFlag) | 
| #define TestCurNext | ( | curFlag, | |
| nextFlag | |||
| ) | if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue | 
Referenced by TUniChDb::FindNextSentenceBoundary(), and TUniChDb::FindNextWordBoundary().
| #define TestCurNext | ( | curFlag, | |
| nextFlag | |||
| ) | if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue | 
| #define TestCurNext2 | ( | curFlag, | |
| nextFlag, | |||
| next2Flag | |||
| ) | if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag && (wbfNext2 & next2Flag) == next2Flag) continue | 
Referenced by TUniChDb::FindNextWordBoundary().
| #define TestCurNext2 | ( | curFlag, | |
| nextFlag, | |||
| next2Flag | |||
| ) | if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag && (sbfNext2 & next2Flag) == next2Flag) continue | 
| #define TestPrevCurNext | ( | prevFlag, | |
| curFlag, | |||
| nextFlag | |||
| ) | if ((wbfPrev & prevFlag) == prevFlag && (wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue | 
Referenced by TUniChDb::FindNextSentenceBoundary(), and TUniChDb::FindNextWordBoundary().
| #define TestPrevCurNext | ( | prevFlag, | |
| curFlag, | |||
| nextFlag | |||
| ) | if ((sbfPrev & prevFlag) == prevFlag && (sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue | 
Referenced by TUniChDb::FindNextSentenceBoundary().
| typedef TPt<TCodecBase> PCodecBase | 
| typedef T8BitCodec<TEncoding_CP1250> TCodec_CP1250 | 
| typedef T8BitCodec<TEncoding_CP437> TCodec_CP437 | 
| typedef T8BitCodec<TEncoding_CP852> TCodec_CP852 | 
| typedef T8BitCodec<TEncoding_YuAscii> TCodec_YuAscii | 
| typedef TVec<PCodecBase> TCodecBaseV | 
| typedef enum TUniByteOrder_ TUniByteOrder | 
| typedef enum TUniChCategory_ TUniChCategory | 
| typedef enum TUniChFlags_ TUniChFlags | 
| typedef enum TUniChProperties_ TUniChProperties | 
| typedef enum TUniChPropertiesX_ TUniChPropertiesX | 
| typedef enum TUniChSubCategory_ TUniChSubCategory | 
| typedef enum TUnicodeErrorHandling_ TUnicodeErrorHandling | 
| typedef int TUniVecIdx | 
| typedef enum TUtf16BomHandling_ TUtf16BomHandling | 
| enum TUniByteOrder_ | 
Definition at line 38 of file unicode.h.
{
        boMachineEndian = 0,
        boLittleEndian = 1,
        boBigEndian = 2
}
| enum TUniChCategory_ | 
| DefineUniCat | |
| DefineUniCat | |
| DefineUniCat | |
| DefineUniCat | |
| DefineUniCat | |
| DefineUniCat | |
| DefineUniCat | |
| DefineUniCat | 
Definition at line 662 of file unicode.h.
{
#define DefineUniCat(cat, c) uc ## cat = (int(uchar(c)) & 0xff)
        DefineUniCat(Letter, 'L'),             // ucLetter
        DefineUniCat(Mark, 'M'),
        DefineUniCat(Number, 'N'),
        DefineUniCat(Punctuation, 'P'),
        DefineUniCat(Symbol, 'S'),
        DefineUniCat(Separator, 'Z'),
        DefineUniCat(Other, 'C')
#undef DefineUniCat
}
| enum TUniChFlags_ | 
Definition at line 712 of file unicode.h.
{
        ucfCompatibilityDecomposition = 1, // if this flag is not set, the decomposition is canonical
        ucfCompositionExclusion = 1 << 1,       // from CompositionExclusions.txt
        // Flags used when searching for word boundaries.  See UAX #29.
        ucfWbFormat = 1 << 2,
        ucfWbKatakana = 1 << 3,
        ucfWbALetter = 1 << 4,
        ucfWbMidLetter = 1 << 5,
        ucfWbMidNum = 1 << 6,
        ucfWbNumeric = 1 << 7,
        ucfWbExtendNumLet = 1 << 8,
        // Flags used with sentence boundaries (Sep is also used with word boundaries).  See UAX #29.
        ucfSbSep = 1 << 9,
        ucfSbFormat = 1 << 10,
        ucfSbSp = 1 << 11,
        ucfSbLower = 1 << 12,
        ucfSbUpper = 1 << 13,
        ucfSbOLetter = 1 << 14,
        ucfSbNumeric = 1 << 15,
        ucfSbATerm = 1 << 16,
        ucfSbSTerm = 1 << 17,
        ucfSbClose = 1 << 18,
        ucfSbMask = ucfSbSep | ucfSbFormat | ucfSbSp | ucfSbLower | ucfSbUpper | ucfSbOLetter | ucfSbNumeric | ucfSbATerm | ucfSbSTerm | ucfSbClose,
        ucfWbMask = ucfWbFormat | ucfWbKatakana | ucfWbALetter | ucfWbMidLetter | ucfWbMidNum | ucfWbNumeric | ucfWbExtendNumLet | ucfSbSep,
        // Flags from DerivedCoreProperties.txt.
        // [The comments are from UCD.html.]
        // - Characters with the Alphabetic property. For more information, see Chapter 4 in [Unicode].
        //   Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
        ucfDcpAlphabetic = 1 << 19,
        // - For programmatic determination of default-ignorable code points.
        //   New characters that should be ignored in processing (unless explicitly supported)
        //   will be assigned in these ranges, permitting programs to correctly handle the default
        //   behavior of such characters when not otherwise supported.  For more information, see
        //   UAX #29: Text Boundaries [Breaks].
        //   Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters - White_Space - annotation characters
        //   [Examples: soft hyphen, zero-width space, noncharacters (e.g. U+fffe, U+ffff, U+1fffe, U+1ffff, etc.), surrogates, language tags, variation selectors]
        ucfDcpDefaultIgnorableCodePoint = 1 << 20,
        // - Characters with the Lowercase property.  For more information, see Chapter 4 in [Unicode].
        //   Generated from: Other_Lowercase + Ll
        ucfDcpLowercase = 1 << 21,
        // - For programmatic determination of grapheme cluster boundaries.
        //   For more information, see UAX #29: Text Boundaries [Breaks].
        //   Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend
        ucfDcpGraphemeBase = 1 << 22,
        // - For programmatic determination of grapheme cluster boundaries.
        //   For more information, see UAX #29: Text Boundaries [Breaks].
        //   Generated from: Other_Grapheme_Extend + Me + Mn
        //   Note: depending on an application's interpretation of Co (private use), they may be either
        //         in Grapheme_Base, or in Grapheme_Extend, or in neither.
        ucfDcpGraphemeExtend = 1 << 23,
        // - Used to determine programming identifiers, as described in UAX #31: Identifier and Pattern Syntax.
        ucfDcpIdStart = 1 << 24,
        ucfDcpIdContinue = 1 << 25,
        // - Characters with the Math property. For more information, see Chapter 4 in [Unicode].
        //   Generated from: Sm + Other_Math
        ucfDcpMath = 1 << 26,
        // - Characters with the Uppercase property. For more information, see Chapter 4 in [Unicode].
        //   Generated from: Lu + Other_Uppercase
        ucfDcpUppercase = 1 << 27,
        // - Used to determine programming identifiers, as described in UAX #31: Identifier and Pattern Syntax.
        ucfDcpXidStart = 1 << 28,
        ucfDcpXidContinue = 1 << 29,
        ucfDcpMask = ucfDcpAlphabetic | ucfDcpDefaultIgnorableCodePoint | ucfDcpLowercase | ucfDcpGraphemeBase | ucfDcpGraphemeExtend |
                ucfDcpIdStart | ucfDcpIdContinue | ucfDcpMath | ucfDcpUppercase | ucfDcpXidStart | ucfDcpXidContinue,
}
| enum TUniChProperties_ | 
Definition at line 780 of file unicode.h.
{
        // The flags from PropList.txt.
        // [The comments are from UCD.html.]
        // - ASCII characters commonly used for the representation of hexadecimal numbers.
        //   [= 0123456789abcdefABCDEF]
        ucfPrAsciiHexDigit = 1,
        // - Those format control characters which have specific functions in the Bidirectional Algorithm.
        ucfPrBidiControl = 2,
        // - Those punctuation characters explicitly called out as dashes in the Unicode Standard,
        //   plus compatibility equivalents to those. Most of these have the Pd General Category,
        //   but some have the Sm General Category because of their use in mathematics.
        //     U+0002d  HYPHEN-MINUS
        //     U+0058a  ARMENIAN HYPHEN
        //     U+005be  HEBREW PUNCTUATION MAQAF
        //     U+01806  MONGOLIAN TODO SOFT HYPHEN
        //     U+02010  HYPHEN
        //     U+02011  NON-BREAKING HYPHEN
        //     U+02012  FIGURE DASH
        //     U+02013  EN DASH
        //     U+02014  EM DASH
        //     U+02015  HORIZONTAL BAR
        //     U+02053  SWUNG DASH
        //     U+0207b  SUPERSCRIPT MINUS
        //     U+0208b  SUBSCRIPT MINUS
        //     U+02212  MINUS SIGN
        //     U+02e17  DOUBLE OBLIQUE HYPHEN
        //     U+0301c  WAVE DASH
        //     U+03030  WAVY DASH
        //     U+030a0  KATAKANA-HIRAGANA DOUBLE HYPHEN
        //     U+0fe31  PRESENTATION FORM FOR VERTICAL EM DASH
        //     U+0fe32  PRESENTATION FORM FOR VERTICAL EN DASH
        //     U+0fe58  SMALL EM DASH
        //     U+0fe63  SMALL HYPHEN-MINUS
        //     U+0ff0d  FULLWIDTH HYPHEN-MINUS
        ucfPrDash = 4,
        // - For a machine-readable list of deprecated characters.  No characters will ever be removed
        //   from the standard, but the usage of deprecated characters is strongly discouraged.
        ucfPrDeprecated = 8,
        // - Characters that linguistically modify the meaning of another character to which they apply.
        //   Some diacritics are not combining characters, and some combining characters are not diacritics.
        ucfPrDiacritic = 0x10,
        // - Characters whose principal function is to extend the value or shape of a preceding alphabetic
        //   character.  Typical of these are length and iteration marks.
        ucfPrExtender = 0x20,
        // - Used in determining default grapheme cluster boundaries.  For more information, see UAX #29: Text Boundaries.
        ucfPrGraphemeLink = 0x40,
        // - Characters commonly used for the representation of hexadecimal numbers, plus their compatibility equivalents.
        //   [= AsciiHexDigit + fullwidth digit {0..9} + fullwidth latin {small|capital} letter {a..f}]
        ucfPrHexDigit = 0x80,
        // - Those dashes used to mark connections between pieces of words, plus the Katakana middle dot.
        //   The Katakana middle dot functions like a hyphen, but is shaped like a dot rather than a dash.
        //     U+0002d  HYPHEN-MINUS
        //     U+000ad  SOFT HYPHEN
        //     U+0058a  ARMENIAN HYPHEN
        //     U+01806  MONGOLIAN TODO SOFT HYPHEN
        //     U+02010  HYPHEN
        //     U+02011  NON-BREAKING HYPHEN
        //     U+02e17  DOUBLE OBLIQUE HYPHEN
        //     U+030fb  KATAKANA MIDDLE DOT
        //     U+0fe63  SMALL HYPHEN-MINUS
        //     U+0ff0d  FULLWIDTH HYPHEN-MINUS
        //     U+0ff65  HALFWIDTH KATAKANA MIDDLE DOT
        ucfPrHyphen = 0x100,
        // - Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese) ideographs.
        ucfPrIdeographic = 0x200,
        // - Those format control characters which have specific functions for control of cursive joining and ligation.
        ucfPrJoinControl = 0x400,
        // - There are a small number of characters that do not use logical order.
        //   These characters require special handling in most processing.
        ucfPrLogicalOrderException = 0x800,
        // - Code points that are permanently reserved for internal use.
        ucfPrNoncharacterCodePoint = 0x1000,
        // - Used for pattern syntax as described in UAX #31: Identifier and Pattern Syntax.
        ucfPrPatternSyntax = 0x2000,
        ucfPrPatternWhiteSpace = 0x4000,
        // - Those punctuation characters that function as quotation marks.
        //     U+00022  QUOTATION MARK
        //     U+00027  APOSTROPHE
        //     U+000ab  LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
        //     U+000bb  RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
        //     U+02018  LEFT SINGLE QUOTATION MARK
        //     U+02019  RIGHT SINGLE QUOTATION MARK
        //     U+0201a  SINGLE LOW-9 QUOTATION MARK
        //     U+0201b  SINGLE HIGH-REVERSED-9 QUOTATION MARK
        //     U+0201c  LEFT DOUBLE QUOTATION MARK
        //     U+0201d  RIGHT DOUBLE QUOTATION MARK
        //     U+0201e  DOUBLE LOW-9 QUOTATION MARK
        //     U+0201f  DOUBLE HIGH-REVERSED-9 QUOTATION MARK
        //     U+02039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
        //     U+0203a  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
        //     U+0300c  LEFT CORNER BRACKET
        //     U+0300d  RIGHT CORNER BRACKET
        //     U+0300e  LEFT WHITE CORNER BRACKET
        //     U+0300f  RIGHT WHITE CORNER BRACKET
        //     U+0301d  REVERSED DOUBLE PRIME QUOTATION MARK
        //     U+0301e  DOUBLE PRIME QUOTATION MARK
        //     U+0301f  LOW DOUBLE PRIME QUOTATION MARK
        //     U+0fe41  PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
        //     U+0fe42  PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
        //     U+0fe43  PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
        //     U+0fe44  PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
        //     U+0ff02  FULLWIDTH QUOTATION MARK
        //     U+0ff07  FULLWIDTH APOSTROPHE
        //     U+0ff62  HALFWIDTH LEFT CORNER BRACKET
        //     U+0ff63  HALFWIDTH RIGHT CORNER BRACKET
        ucfPrQuotationMark = 0x8000,
        // - Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear.
        //   An explicit _dot above_ can be added where required, such as in Lithuanian.
        ucfPrSoftDotted = 0x10000,
        // - Sentence Terminal. Used in UAX #29: Text Boundaries.
        //     U+00021  EXCLAMATION MARK
        //     U+0002e  FULL STOP
        //     U+0003f  QUESTION MARK
        //     U+0203c  DOUBLE EXCLAMATION MARK
        //     U+0203d  INTERROBANG
        //     U+02047  DOUBLE QUESTION MARK
        //     U+02048  QUESTION EXCLAMATION MARK
        //     U+02049  EXCLAMATION QUESTION MARK
        //     U+03002  IDEOGRAPHIC FULL STOP
        //     [plus many characters from other writing systems]
        ucfPrSTerm = 0x20000,
        // - Those punctuation characters that generally mark the end of textual units.
        //   [JB note: this set contains more character than STerm.  For example, it contains
        //   the comma, colon and semicolon, whereas STerm doesn't.]
        //     U+00021  EXCLAMATION MARK
        //     U+0002c  COMMA
        //     U+0002e  FULL STOP
        //     U+0003a  COLON
        //     U+0003b  SEMICOLON
        //     U+0003f  QUESTION MARK
        //     U+0203c  DOUBLE EXCLAMATION MARK
        //     U+0203d  INTERROBANG
        //     U+02047  DOUBLE QUESTION MARK
        //     U+02048  QUESTION EXCLAMATION MARK
        //     U+02049  EXCLAMATION QUESTION MARK
        //     [plus *lots* of charcters from other writing systems]
        ucfPrTerminalPunctuation = 0x40000,
        // - Indicates all those characters that qualify as Variation Selectors.
        //   For details on the behavior of these characters, see StandardizedVariants.html and
        //   Section 16.4, Variation Selectors in [Unicode].
        ucfPrVariationSelector = 0x80000,
        // - Those separator characters and control characters which should be treated by
        //   programming languages as "white space" for the purpose of parsing elements.
        //   Note: ZERO WIDTH SPACE and ZERO WIDTH NO-BREAK SPACE are not included,
        //         since their functions are restricted to line-break control.
        //         Their names are unfortunately misleading in this respect.
        //   Note: There are other senses of "whitespace" that encompass a different set of characters.
        //         [JB note: e.g. there's a BIDI class for whitespace ('WS') in UnicodeData.txt.
        //         There's also a "Sp" class in the sentence boundary algorithm, see UAX #29, sec. 5.1.]
        //   This includes the following characters:
        //     U+0009  <control>
        //     U+000a  <control>
        //     U+000b  <control>
        //     U+000c  <control>
        //     U+000d  <control>
        //     U+0020  SPACE
        //     U+0085  <control>
        //     U+00a0  NO-BREAK SPACE
        //     U+1680  OGHAM SPACE MARK
        //     U+180e  MONGOLIAN VOWEL SEPARATOR
        //     U+2000  EN QUAD
        //     U+2001  EM QUAD
        //     U+2002  EN SPACE
        //     U+2003  EM SPACE
        //     U+2004  THREE-PER-EM SPACE
        //     U+2005  FOUR-PER-EM SPACE
        //     U+2006  SIX-PER-EM SPACE
        //     U+2007  FIGURE SPACE
        //     U+2008  PUNCTUATION SPACE
        //     U+2009  THIN SPACE
        //     U+200a  HAIR SPACE
        //     U+2028  LINE SEPARATOR
        //     U+2029  PARAGRAPH SEPARATOR
        //     U+202f  NARROW NO-BREAK SPACE
        //     U+205f  MEDIUM MATHEMATICAL SPACE
        //     U+3000  IDEOGRAPHIC SPACE
        ucfPrWhiteSpace = 0x100000
}
| enum TUniChPropertiesX_ | 
Definition at line 961 of file unicode.h.
{
        // More properties from PropList.txt.
        // - Used to derive the properties in DerivedCoreProperties.txt.
        ucfPxOtherAlphabetic = 1,
        ucfPxOtherDefaultIgnorableCodePoint = 2,
        ucfPxOtherGraphemeExtend = 4,
        ucfPxOtherIdContinue = 8,
        ucfPxOtherIdStart = 0x10,
        ucfPxOtherLowercase = 0x20,
        ucfPxOtherMath = 0x40,
        ucfPxOtherUppercase = 0x80,
        // - Used in ideographic description sequences.
        ucfPxIdsBinaryOperator = 0x100,
        ucfPxIdsTrinaryOperator = 0x200,
        ucfPxRadical = 0x400,
        ucfPxUnifiedIdeograph = 0x800
}
| enum TUniChSubCategory_ | 
Definition at line 676 of file unicode.h.
{
#define DefineUniSubCat(cat, subCat, c) uc ## cat ## subCat = ((uc ## cat) << 8) | (int(uchar(c)) & 0xff)
        DefineUniSubCat(Letter, Uppercase, 'u'),            // ucLetterUppercase
        DefineUniSubCat(Letter, Lowercase, 'l'),
        DefineUniSubCat(Letter, Titlecase, 't'),
        DefineUniSubCat(Letter, Modifier, 'm'),
        DefineUniSubCat(Letter, Other, 'o'),
        DefineUniSubCat(Mark, Nonspacing, 'n'),
        DefineUniSubCat(Mark, SpacingCombining, 'c'),
        DefineUniSubCat(Mark, Enclosing, 'e'),
        DefineUniSubCat(Number, DecimalDigit, 'd'),
        DefineUniSubCat(Number, Letter, 'l'),
        DefineUniSubCat(Number, Other, 'o'),
        DefineUniSubCat(Punctuation, Connector, 'c'),
        DefineUniSubCat(Punctuation, Dash, 'd'),
        DefineUniSubCat(Punctuation, Open, 's'),
        DefineUniSubCat(Punctuation, Close, 'e'),
        DefineUniSubCat(Punctuation, InitialQuote, 'i'),
        DefineUniSubCat(Punctuation, FinalQuote, 'f'),
        DefineUniSubCat(Punctuation, Other, 'o'),
        DefineUniSubCat(Symbol, Math, 'm'),
        DefineUniSubCat(Symbol, Currency, 'c'),
        DefineUniSubCat(Symbol, Modifier, 'k'),
        DefineUniSubCat(Symbol, Other, 'o'),
        DefineUniSubCat(Separator, Space, 's'),
        DefineUniSubCat(Separator, Line, 'l'),
        DefineUniSubCat(Separator, Paragraph, 'p'),
        DefineUniSubCat(Other, Control, 'c'),
        DefineUniSubCat(Other, Format, 'f'),
        DefineUniSubCat(Other, Surrogate, 's'),
        DefineUniSubCat(Other, PrivateUse, 'o'),
        DefineUniSubCat(Other, NotAssigned, 'n')
}
Definition at line 18 of file unicode.h.
{
        // What happens when an error occurs:
        uehIgnore = 0,  // - it is silently ignored (nothing is added to the output vector)
        uehThrow = 1,   // - an exception is thrown (TUnicodeException)
        uehReplace = 2, // - the replacement character is added to the output vector
        uehAbort = 3    // - the encoding/decoding process stops immediately
}
| enum TUtf16BomHandling_ | 
Definition at line 46 of file unicode.h.
{
        bomAllowed = 0,   // if a BOM is present, it is used to determine the byte order; otherwise, the default byte order is used
        bomRequired = 1,  // if a BOM is present, it is used to determine the byte order; otherwise, an error is reported
        bomIgnored = 2    // the default byte order is used; if a BOM is present, it is treated like any other character
}
| bool AlwaysFalse | ( | ) |  [inline] | 
        
Definition at line 3221 of file unicode.h.
Referenced by TUniChDb::InitScripts(), and TUniChDb::TestFindNextWordOrSentenceBoundary().
{
        int sum = 0;
        for (int i = 0; i < 5; i++) sum += i;
        return sum > 100;
}

| bool AlwaysTrue | ( | ) |  [inline] |