|
SNAP Library 3.0, Developer Reference
2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|


Go to the source code of this file.
Classes | |
| class | TUnicodeException |
| class | TUniCodec |
| class | TUniCaseFolding |
| class | TCodecBase |
| class | TCodecWrapper< TCodecImpl_ > |
| class | TVecElt< TVector_ > |
| class | TVecElt< TVec< TDat > > |
| class | TVecElt< TChA > |
| class | TEncoding_ISO8859_1 |
| class | TEncoding_ISO8859_2 |
| class | TEncoding_ISO8859_3 |
| class | TEncoding_ISO8859_4 |
| class | TEncoding_YuAscii |
| class | TEncoding_CP437 |
| class | TEncoding_CP852 |
| class | TEncoding_CP1250 |
| class | T8BitCodec< TEncoding_ > |
| class | TUniChInfo |
| class | TUniTrie< TItem_ > |
| class | TUniTrie< TItem_ >::TNode |
| class | TUniChDb |
| class | TUniChDb::TUcdFileReader |
| class | TUniChDb::TSubcatHelper |
| class | TUnicode |
Macros | |
| #define | DefineByte(b7, b6, b5, b4, b3, b2, b1, b0) _ ## b7 ## b6 ## b5 ## b4 ## _ ## b3 ## b2 ## b1 ## b0 = (b7 << 7) | (b6 << 6) | (b5 << 5) | (b4 << 4) | (b3 << 3) | (b2 << 2) | (b1 << 1) | b0 |
| #define | DefineUniCat(cat, c) uc ## cat = (int(uchar(c)) & 0xff) |
| #define | DefineUniSubCat(cat, subCat, c) uc ## cat ## subCat = ((uc ## cat) << 8) | (int(uchar(c)) & 0xff) |
| #define | ___UniFwd1(name) bool name(const int cp) const { int i = h.GetKeyId(cp); if (i < 0) return false; else return h[i].name(); } |
| #define | ___UniFwd2(name1, name2) ___UniFwd1(name1) ___UniFwd1(name2) |
| #define | ___UniFwd3(name1, name2, name3) ___UniFwd2(name1, name2) ___UniFwd1(name3) |
| #define | ___UniFwd4(name1, name2, name3, name4) ___UniFwd3(name1, name2, name3) ___UniFwd1(name4) |
| #define | ___UniFwd5(name1, name2, name3, name4, name5) ___UniFwd4(name1, name2, name3, name4) ___UniFwd1(name5) |
| #define | DECLARE_FORWARDED_PROPERTY_METHODS |
| #define | ___UniFwd1(name) bool name(const int cp) const { return ucd.name(cp); } |
| #define | ___OutRepl if (isDestLe) { dest.Add(replacementChar & 0xff); dest.Add((replacementChar >> 8) & 0xff); } else { dest.Add((replacementChar >> 8) & 0xff); dest.Add(replacementChar & 0xff); } |
| #define | TestCurNext(curFlag, nextFlag) if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue |
| #define | TestCurNext2(curFlag, nextFlag, next2Flag) if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag && (wbfNext2 & next2Flag) == next2Flag) continue |
| #define | TestPrevCurNext(prevFlag, curFlag, nextFlag) if ((wbfPrev & prevFlag) == prevFlag && (wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue |
| #define | TestCur(curFlag) ((sbfCur & ucfSb##curFlag) == ucfSb##curFlag) |
| #define | Trans(curFlag, newState) if (TestCur(curFlag)) { backState = st##newState; break; } |
| #define | IsPeekAheadSkippable(sbf) ((sbf & (ucfSbOLetter | ucfSbUpper | ucfSbLower | ucfSbSep | ucfSbSTerm | ucfSbATerm)) == 0) |
| #define | TestCurNext(curFlag, nextFlag) if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue |
| #define | TestCurNext2(curFlag, nextFlag, next2Flag) if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag && (sbfNext2 & next2Flag) == next2Flag) continue |
| #define | TestPrevCurNext(prevFlag, curFlag, nextFlag) if ((sbfPrev & prevFlag) == prevFlag && (sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue |
Typedefs | |
| typedef int | TUniVecIdx |
| typedef enum TUnicodeErrorHandling_ | TUnicodeErrorHandling |
| typedef enum TUniByteOrder_ | TUniByteOrder |
| typedef enum TUtf16BomHandling_ | TUtf16BomHandling |
| typedef THash< TInt, TIntV > | TIntIntVH |
| typedef TPt< TCodecBase > | PCodecBase |
| typedef TVec< PCodecBase > | TCodecBaseV |
| typedef T8BitCodec < TEncoding_ISO8859_1 > | TCodec_ISO8859_1 |
| typedef T8BitCodec < TEncoding_ISO8859_2 > | TCodec_ISO8859_2 |
| typedef T8BitCodec < TEncoding_ISO8859_3 > | TCodec_ISO8859_3 |
| typedef T8BitCodec < TEncoding_ISO8859_4 > | TCodec_ISO8859_4 |
| typedef T8BitCodec < TEncoding_CP852 > | TCodec_CP852 |
| typedef T8BitCodec < TEncoding_CP437 > | TCodec_CP437 |
| typedef T8BitCodec < TEncoding_CP1250 > | TCodec_CP1250 |
| typedef T8BitCodec < TEncoding_YuAscii > | TCodec_YuAscii |
| typedef enum TUniChCategory_ | TUniChCategory |
| typedef enum TUniChSubCategory_ | TUniChSubCategory |
| typedef enum TUniChFlags_ | TUniChFlags |
| typedef enum TUniChProperties_ | TUniChProperties |
| typedef enum TUniChPropertiesX_ | TUniChPropertiesX |
Enumerations | |
| enum | TUnicodeErrorHandling_ { uehIgnore = 0, uehThrow = 1, uehReplace = 2, uehAbort = 3 } |
| enum | TUniByteOrder_ { boMachineEndian = 0, boLittleEndian = 1, boBigEndian = 2 } |
| enum | TUtf16BomHandling_ { bomAllowed = 0, bomRequired = 1, bomIgnored = 2 } |
| enum | TUniChCategory_ { DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L'), DefineUniCat =(Letter, 'L') } |
| enum | TUniChSubCategory_ { DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u'), DefineUniSubCat =(Letter, Uppercase, 'u') } |
| enum | TUniChFlags_ { ucfCompatibilityDecomposition = 1, ucfCompositionExclusion = 1 << 1, ucfWbFormat = 1 << 2, ucfWbKatakana = 1 << 3, ucfWbALetter = 1 << 4, ucfWbMidLetter = 1 << 5, ucfWbMidNum = 1 << 6, ucfWbNumeric = 1 << 7, ucfWbExtendNumLet = 1 << 8, ucfSbSep = 1 << 9, ucfSbFormat = 1 << 10, ucfSbSp = 1 << 11, ucfSbLower = 1 << 12, ucfSbUpper = 1 << 13, ucfSbOLetter = 1 << 14, ucfSbNumeric = 1 << 15, ucfSbATerm = 1 << 16, ucfSbSTerm = 1 << 17, ucfSbClose = 1 << 18, ucfSbMask = ucfSbSep | ucfSbFormat | ucfSbSp | ucfSbLower | ucfSbUpper | ucfSbOLetter | ucfSbNumeric | ucfSbATerm | ucfSbSTerm | ucfSbClose, ucfWbMask = ucfWbFormat | ucfWbKatakana | ucfWbALetter | ucfWbMidLetter | ucfWbMidNum | ucfWbNumeric | ucfWbExtendNumLet | ucfSbSep, ucfDcpAlphabetic = 1 << 19, ucfDcpDefaultIgnorableCodePoint = 1 << 20, ucfDcpLowercase = 1 << 21, ucfDcpGraphemeBase = 1 << 22, ucfDcpGraphemeExtend = 1 << 23, ucfDcpIdStart = 1 << 24, ucfDcpIdContinue = 1 << 25, ucfDcpMath = 1 << 26, ucfDcpUppercase = 1 << 27, ucfDcpXidStart = 1 << 28, ucfDcpXidContinue = 1 << 29, ucfDcpMask } |
| enum | TUniChProperties_ { ucfPrAsciiHexDigit = 1, ucfPrBidiControl = 2, ucfPrDash = 4, ucfPrDeprecated = 8, ucfPrDiacritic = 0x10, ucfPrExtender = 0x20, ucfPrGraphemeLink = 0x40, ucfPrHexDigit = 0x80, ucfPrHyphen = 0x100, ucfPrIdeographic = 0x200, ucfPrJoinControl = 0x400, ucfPrLogicalOrderException = 0x800, ucfPrNoncharacterCodePoint = 0x1000, ucfPrPatternSyntax = 0x2000, ucfPrPatternWhiteSpace = 0x4000, ucfPrQuotationMark = 0x8000, ucfPrSoftDotted = 0x10000, ucfPrSTerm = 0x20000, ucfPrTerminalPunctuation = 0x40000, ucfPrVariationSelector = 0x80000, ucfPrWhiteSpace = 0x100000 } |
| enum | TUniChPropertiesX_ { ucfPxOtherAlphabetic = 1, ucfPxOtherDefaultIgnorableCodePoint = 2, ucfPxOtherGraphemeExtend = 4, ucfPxOtherIdContinue = 8, ucfPxOtherIdStart = 0x10, ucfPxOtherLowercase = 0x20, ucfPxOtherMath = 0x40, ucfPxOtherUppercase = 0x80, ucfPxIdsBinaryOperator = 0x100, ucfPxIdsTrinaryOperator = 0x200, ucfPxRadical = 0x400, ucfPxUnifiedIdeograph = 0x800 } |
Functions | |
| bool | AlwaysFalse () |
| bool | AlwaysTrue () |
| #define ___OutRepl if (isDestLe) { dest.Add(replacementChar & 0xff); dest.Add((replacementChar >> 8) & 0xff); } else { dest.Add((replacementChar >> 8) & 0xff); dest.Add(replacementChar & 0xff); } |
Referenced by TUniCodec::EncodeUtf16ToBytes().
| #define ___UniFwd1 | ( | name | ) | bool name(const int cp) const { int i = h.GetKeyId(cp); if (i < 0) return false; else return h[i].name(); } |
| #define ___UniFwd1 | ( | name | ) | bool name(const int cp) const { return ucd.name(cp); } |
| #define ___UniFwd2 | ( | name1, | |
| name2 | |||
| ) | ___UniFwd1(name1) ___UniFwd1(name2) |
| #define ___UniFwd3 | ( | name1, | |
| name2, | |||
| name3 | |||
| ) | ___UniFwd2(name1, name2) ___UniFwd1(name3) |
| #define ___UniFwd4 | ( | name1, | |
| name2, | |||
| name3, | |||
| name4 | |||
| ) | ___UniFwd3(name1, name2, name3) ___UniFwd1(name4) |
| #define ___UniFwd5 | ( | name1, | |
| name2, | |||
| name3, | |||
| name4, | |||
| name5 | |||
| ) | ___UniFwd4(name1, name2, name3, name4) ___UniFwd1(name5) |
| #define DECLARE_FORWARDED_PROPERTY_METHODS |
| #define DefineByte | ( | b7, | |
| b6, | |||
| b5, | |||
| b4, | |||
| b3, | |||
| b2, | |||
| b1, | |||
| b0 | |||
| ) | _ ## b7 ## b6 ## b5 ## b4 ## _ ## b3 ## b2 ## b1 ## b0 = (b7 << 7) | (b6 << 6) | (b5 << 5) | (b4 << 4) | (b3 << 3) | (b2 << 2) | (b1 << 1) | b0 |
| #define DefineUniCat | ( | cat, | |
| c | |||
| ) | uc ## cat = (int(uchar(c)) & 0xff) |
| #define DefineUniSubCat | ( | cat, | |
| subCat, | |||
| c | |||
| ) | uc ## cat ## subCat = ((uc ## cat) << 8) | (int(uchar(c)) & 0xff) |
| #define IsPeekAheadSkippable | ( | sbf | ) | ((sbf & (ucfSbOLetter | ucfSbUpper | ucfSbLower | ucfSbSep | ucfSbSTerm | ucfSbATerm)) == 0) |
Referenced by TUniChDb::FindNextSentenceBoundary().
| #define TestCur | ( | curFlag | ) | ((sbfCur & ucfSb##curFlag) == ucfSb##curFlag) |
| #define TestCurNext | ( | curFlag, | |
| nextFlag | |||
| ) | if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue |
Referenced by TUniChDb::FindNextSentenceBoundary(), and TUniChDb::FindNextWordBoundary().
| #define TestCurNext | ( | curFlag, | |
| nextFlag | |||
| ) | if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue |
| #define TestCurNext2 | ( | curFlag, | |
| nextFlag, | |||
| next2Flag | |||
| ) | if ((wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag && (wbfNext2 & next2Flag) == next2Flag) continue |
Referenced by TUniChDb::FindNextWordBoundary().
| #define TestCurNext2 | ( | curFlag, | |
| nextFlag, | |||
| next2Flag | |||
| ) | if ((sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag && (sbfNext2 & next2Flag) == next2Flag) continue |
| #define TestPrevCurNext | ( | prevFlag, | |
| curFlag, | |||
| nextFlag | |||
| ) | if ((wbfPrev & prevFlag) == prevFlag && (wbfCur & curFlag) == curFlag && (wbfNext & nextFlag) == nextFlag) continue |
Referenced by TUniChDb::FindNextSentenceBoundary(), and TUniChDb::FindNextWordBoundary().
| #define TestPrevCurNext | ( | prevFlag, | |
| curFlag, | |||
| nextFlag | |||
| ) | if ((sbfPrev & prevFlag) == prevFlag && (sbfCur & curFlag) == curFlag && (sbfNext & nextFlag) == nextFlag) continue |
| #define Trans | ( | curFlag, | |
| newState | |||
| ) | if (TestCur(curFlag)) { backState = st##newState; break; } |
Referenced by TUniChDb::FindNextSentenceBoundary().
| typedef TPt<TCodecBase> PCodecBase |
| typedef T8BitCodec<TEncoding_CP1250> TCodec_CP1250 |
| typedef T8BitCodec<TEncoding_CP437> TCodec_CP437 |
| typedef T8BitCodec<TEncoding_CP852> TCodec_CP852 |
| typedef T8BitCodec<TEncoding_YuAscii> TCodec_YuAscii |
| typedef TVec<PCodecBase> TCodecBaseV |
| typedef enum TUniByteOrder_ TUniByteOrder |
| typedef enum TUniChCategory_ TUniChCategory |
| typedef enum TUniChFlags_ TUniChFlags |
| typedef enum TUniChProperties_ TUniChProperties |
| typedef enum TUniChPropertiesX_ TUniChPropertiesX |
| typedef enum TUniChSubCategory_ TUniChSubCategory |
| typedef enum TUnicodeErrorHandling_ TUnicodeErrorHandling |
| typedef int TUniVecIdx |
| typedef enum TUtf16BomHandling_ TUtf16BomHandling |
| enum TUniByteOrder_ |
| Enumerator | |
|---|---|
| boMachineEndian | |
| boLittleEndian | |
| boBigEndian | |
| enum TUniChCategory_ |
| enum TUniChFlags_ |
Definition at line 712 of file unicode.h.
| enum TUniChProperties_ |
Definition at line 780 of file unicode.h.
| enum TUniChPropertiesX_ |
Definition at line 961 of file unicode.h.
| enum TUniChSubCategory_ |
| Enumerator | |
|---|---|
| uehIgnore | |
| uehThrow | |
| uehReplace | |
| uehAbort | |
| enum TUtf16BomHandling_ |
| Enumerator | |
|---|---|
| bomAllowed | |
| bomRequired | |
| bomIgnored | |
Definition at line 46 of file unicode.h.
|
inline |
Definition at line 3227 of file unicode.h.
Referenced by TUniChDb::InitScripts(), and TUniChDb::TestFindNextWordOrSentenceBoundary().
