|
SNAP Library 3.0, User Reference
2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <unicode.h>
Classes | |
| class | TSubcatHelper |
| class | TUcdFileReader |
Public Types | |
| enum | { HangulSBase = 0xAC00, HangulLBase = 0x1100, HangulVBase = 0x1161, HangulTBase = 0x11A7, HangulLCount = 19, HangulVCount = 21, HangulTCount = 28, HangulNCount = HangulVCount * HangulTCount, HangulSCount = HangulLCount * HangulNCount } |
| enum | TCaseConversion_ { ccLower = 0, ccUpper = 1, ccTitle = 2, ccMax = 3 } |
| typedef enum TUniChDb::TCaseConversion_ | TCaseConversion |
Public Member Functions | |
| TUniChDb () | |
| TUniChDb (TSIn &SIn) | |
| void | Clr () |
| void | Save (TSOut &SOut) const |
| void | Load (TSIn &SIn) |
| void | LoadBin (const TStr &fnBin) |
| void | Test (const TStr &basePath) |
| const TStr & | GetScriptName (const int scriptId) const |
| int | GetScriptByName (const TStr &scriptName) const |
| int | GetScript (const TUniChInfo &ci) const |
| int | GetScript (const int cp) const |
| const char * | GetCharName (const int cp) const |
| TStr | GetCharNameS (const int cp) const |
| template<class TSrcVec > | |
| void | PrintCharNames (FILE *f, const TSrcVec &src, size_t srcIdx, const size_t srcCount, const TStr &prefix) const |
| template<class TSrcVec > | |
| void | PrintCharNames (FILE *f, const TSrcVec &src, const TStr &prefix) const |
| bool | IsGetChInfo (const int cp, TUniChInfo &ChInfo) |
| TUniChCategory | GetCat (const int cp) const |
| TUniChSubCategory | GetSubCat (const int cp) const |
| bool | IsWbFlag (const int cp, const TUniChFlags flag) const |
| int | GetWbFlags (const int cp) const |
| bool | IsSbFlag (const int cp, const TUniChFlags flag) const |
| int | GetSbFlags (const int cp) const |
| DECLARE_FORWARDED_PROPERTY_METHODS bool | IsPrivateUse (const int cp) const |
| bool | IsSurrogate (const int cp) const |
| int | GetCombiningClass (const int cp) const |
| template<typename TSrcVec > | |
| bool | FindNextWordBoundary (const TSrcVec &src, const size_t srcIdx, const size_t srcCount, size_t &position) const |
| template<typename TSrcVec > | |
| void | FindWordBoundaries (const TSrcVec &src, const size_t srcIdx, const size_t srcCount, TBoolV &dest) const |
| template<typename TSrcVec > | |
| bool | FindNextSentenceBoundary (const TSrcVec &src, const size_t srcIdx, const size_t srcCount, size_t &position) const |
| template<typename TSrcVec > | |
| void | FindSentenceBoundaries (const TSrcVec &src, const size_t srcIdx, const size_t srcCount, TBoolV &dest) const |
| void | SbEx_Clr () |
| template<class TSrcVec > | |
| void | SbEx_Add (const TSrcVec &v) |
| void | SbEx_Add (const TStr &s) |
| void | SbEx_AddUtf8 (const TStr &s) |
| int | SbEx_AddMulti (const TStr &words, const bool wordsAreUtf8=true) |
| void | SbEx_Set (const TUniTrie< TInt > &newTrie) |
| int | SbEx_SetStdEnglish () |
| template<typename TSrcVec , typename TDestCh > | |
| void | Decompose (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | Decompose (const TSrcVec &src, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | Compose (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | Compose (const TSrcVec &src, TVec< TDestCh > &dest, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | DecomposeAndCompose (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | DecomposeAndCompose (const TSrcVec &src, TVec< TDestCh > &dest, bool compatibility, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| size_t | ExtractStarters (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| size_t | ExtractStarters (const TSrcVec &src, TVec< TDestCh > &dest, bool clrDest=true) const |
| template<typename TSrcVec > | |
| size_t | ExtractStarters (TSrcVec &src) const |
| void | LoadTxt (const TStr &basePath) |
| void | SaveBin (const TStr &fnBinUcd) |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetCaseConverted (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TCaseConversion how, const bool turkic, const bool lithuanian) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetLowerCase (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetUpperCase (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetTitleCase (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetLowerCase (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetUpperCase (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetTitleCase (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool turkic=false, const bool lithuanian=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleCaseConverted (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TCaseConversion how) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleLowerCase (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleUpperCase (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleTitleCase (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleLowerCase (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleUpperCase (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetSimpleTitleCase (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const |
| template<typename TSrcVec > | |
| void | ToSimpleCaseConverted (TSrcVec &src, size_t srcIdx, const size_t srcCount, const TCaseConversion how) const |
| template<typename TSrcVec > | |
| void | ToSimpleUpperCase (TSrcVec &src, size_t srcIdx, const size_t srcCount) const |
| template<typename TSrcVec > | |
| void | ToSimpleLowerCase (TSrcVec &src, size_t srcIdx, const size_t srcCount) const |
| template<typename TSrcVec > | |
| void | ToSimpleTitleCase (TSrcVec &src, size_t srcIdx, const size_t srcCount) const |
| template<typename TSrcVec > | |
| void | ToSimpleUpperCase (TSrcVec &src) const |
| template<typename TSrcVec > | |
| void | ToSimpleLowerCase (TSrcVec &src) const |
| template<typename TSrcVec > | |
| void | ToSimpleTitleCase (TSrcVec &src) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetCaseFolded (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic=false) const |
| template<typename TSrcVec , typename TDestCh > | |
| void | GetCaseFolded (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true, const bool full=true, const bool turkic=false) const |
| template<typename TSrcVec > | |
| void | ToCaseFolded (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic=false) const |
| template<typename TSrcVec > | |
| void | ToCaseFolded (TSrcVec &src, const bool turkic=false) const |
Static Public Member Functions | |
| static TStr | GetCaseFoldingFn () |
| static TStr | GetSpecialCasingFn () |
| static TStr | GetUnicodeDataFn () |
| static TStr | GetCompositionExclusionsFn () |
| static TStr | GetScriptsFn () |
| static TStr | GetDerivedCorePropsFn () |
| static TStr | GetLineBreakFn () |
| static TStr | GetPropListFn () |
| static TStr | GetAuxiliaryDir () |
| static TStr | GetWordBreakTestFn () |
| static TStr | GetWordBreakPropertyFn () |
| static TStr | GetSentenceBreakTestFn () |
| static TStr | GetSentenceBreakPropertyFn () |
| static TStr | GetNormalizationTestFn () |
| static TStr | GetBinFn () |
| static TStr | GetScriptNameUnknown () |
| static TStr | GetScriptNameKatakana () |
| static TStr | GetScriptNameHiragana () |
Protected Types | |
| typedef TUniVecIdx | TVecIdx |
Protected Member Functions | |
| void | InitAfterLoad () |
| bool | IsWbIgnored (const int cp) const |
| template<typename TSrcVec > | |
| void | WbFindCurOrNextNonIgnored (const TSrcVec &src, size_t &position, const size_t srcEnd) const |
| template<typename TSrcVec > | |
| void | WbFindNextNonIgnored (const TSrcVec &src, size_t &position, const size_t srcEnd) const |
| template<typename TSrcVec > | |
| void | WbFindNextNonIgnoredS (const TSrcVec &src, size_t &position, const size_t srcEnd) const |
| template<typename TSrcVec > | |
| bool | WbFindPrevNonIgnored (const TSrcVec &src, const size_t srcStart, size_t &position) const |
| void | TestWbFindNonIgnored (const TIntV &src) const |
| void | TestWbFindNonIgnored () const |
| void | TestFindNextWordOrSentenceBoundary (const TStr &basePath, bool sentence) |
| template<typename TSrcVec > | |
| bool | CanSentenceEndHere (const TSrcVec &src, const size_t srcIdx, const size_t position) const |
| template<typename TDestCh > | |
| void | AddDecomposition (const int codePoint, TVec< TDestCh > &dest, const bool compatibility) const |
| void | TestComposition (const TStr &basePath) |
| void | InitWordAndSentenceBoundaryFlags (const TStr &basePath) |
| void | InitScripts (const TStr &basePath) |
| void | InitLineBreaks (const TStr &basePath) |
| void | InitDerivedCoreProperties (const TStr &basePath) |
| void | InitPropList (const TStr &basePath) |
| void | InitSpecialCasing (const TStr &basePath) |
| void | LoadTxt_ProcessDecomposition (TUniChInfo &ci, TStr s) |
| void | TestCaseConversion (const TStr &source, const TStr &trueLc, const TStr &trueTc, const TStr &trueUc, bool turkic, bool lithuanian) |
| void | TestCaseConversions () |
Static Protected Member Functions | |
| static bool | IsWbIgnored (const TUniChInfo &ci) |
Protected Attributes | |
| TUniTrie< TInt > | sbExTrie |
Friends | |
| class | TUniCaseFolding |
| typedef enum TUniChDb::TCaseConversion_ TUniChDb::TCaseConversion |
|
protected |
| anonymous enum |
| Enumerator | |
|---|---|
| HangulSBase | |
| HangulLBase | |
| HangulVBase | |
| HangulTBase | |
| HangulLCount | |
| HangulVCount | |
| HangulTCount | |
| HangulNCount | |
| HangulSCount | |
Definition at line 1405 of file unicode.h.
| Enumerator | |
|---|---|
| ccLower | |
| ccUpper | |
| ccTitle | |
| ccMax | |
Definition at line 1584 of file unicode.h.
|
protected |
Definition at line 3103 of file unicode.h.
|
protected |
Definition at line 2585 of file unicode.h.
|
inline |
Definition at line 1276 of file unicode.h.
| void TUniChDb::Compose | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| bool | clrDest = true |
||
| ) | const |
Definition at line 3158 of file unicode.h.
|
inline |
Definition at line 1532 of file unicode.h.
| void TUniChDb::Decompose | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| bool | compatibility, | ||
| bool | clrDest = true |
||
| ) | const |
Definition at line 3126 of file unicode.h.
|
inline |
Definition at line 1520 of file unicode.h.
| void TUniChDb::DecomposeAndCompose | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| bool | compatibility, | ||
| bool | clrDest = true |
||
| ) | const |
Definition at line 3148 of file unicode.h.
|
inline |
Definition at line 1542 of file unicode.h.
| size_t TUniChDb::ExtractStarters | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| bool | clrDest = true |
||
| ) | const |
Definition at line 3215 of file unicode.h.
|
inline |
Definition at line 1551 of file unicode.h.
|
inline |
Definition at line 1555 of file unicode.h.
| bool TUniChDb::FindNextSentenceBoundary | ( | const TSrcVec & | src, |
| const size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| size_t & | position | ||
| ) | const |
Definition at line 2636 of file unicode.h.
| bool TUniChDb::FindNextWordBoundary | ( | const TSrcVec & | src, |
| const size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| size_t & | position | ||
| ) | const |
Definition at line 2483 of file unicode.h.
| void TUniChDb::FindSentenceBoundaries | ( | const TSrcVec & | src, |
| const size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TBoolV & | dest | ||
| ) | const |
Definition at line 2793 of file unicode.h.
| void TUniChDb::FindWordBoundaries | ( | const TSrcVec & | src, |
| const size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TBoolV & | dest | ||
| ) | const |
Definition at line 2561 of file unicode.h.
|
inlinestatic |
|
inlinestatic |
| void TUniChDb::GetCaseConverted | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| const bool | clrDest, | ||
| const TCaseConversion | how, | ||
| const bool | turkic, | ||
| const bool | lithuanian | ||
| ) | const |
Definition at line 2817 of file unicode.h.
|
inline |
Definition at line 1629 of file unicode.h.
|
inline |
Definition at line 1632 of file unicode.h.
|
inlinestatic |
|
inline |
|
inline |
|
inline |
Definition at line 1399 of file unicode.h.
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inline |
Definition at line 1590 of file unicode.h.
|
inline |
Definition at line 1593 of file unicode.h.
|
inlinestatic |
|
inlinestatic |
|
inline |
|
inline |
|
inline |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
| void TUniChDb::GetSimpleCaseConverted | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| const bool | clrDest, | ||
| const TCaseConversion | how | ||
| ) | const |
Definition at line 3042 of file unicode.h.
|
inline |
Definition at line 1601 of file unicode.h.
|
inline |
Definition at line 1604 of file unicode.h.
|
inline |
Definition at line 1603 of file unicode.h.
|
inline |
Definition at line 1606 of file unicode.h.
|
inline |
Definition at line 1602 of file unicode.h.
|
inline |
Definition at line 1605 of file unicode.h.
|
inlinestatic |
|
inline |
|
inline |
Definition at line 1592 of file unicode.h.
|
inline |
Definition at line 1595 of file unicode.h.
|
inlinestatic |
|
inline |
Definition at line 1591 of file unicode.h.
|
inline |
Definition at line 1594 of file unicode.h.
|
inlinestatic |
|
inlinestatic |
|
protected |
Definition at line 1368 of file unicode.cpp.
|
protected |
Definition at line 1007 of file unicode.cpp.
|
protected |
Definition at line 1046 of file unicode.cpp.
|
protected |
Definition at line 950 of file unicode.cpp.
|
protected |
Definition at line 1073 of file unicode.cpp.
|
protected |
Definition at line 1225 of file unicode.cpp.
|
protected |
Definition at line 1100 of file unicode.cpp.
|
inline |
|
inline |
|
inline |
|
inline |
|
inlinestaticprotected |
|
inlineprotected |
| void TUniChDb::LoadTxt | ( | const TStr & | basePath | ) |
Definition at line 1249 of file unicode.cpp.
|
protected |
Definition at line 937 of file unicode.cpp.
|
inline |
|
inline |
Definition at line 1342 of file unicode.h.
| void TUniChDb::SaveBin | ( | const TStr & | fnBinUcd | ) |
Definition at line 1362 of file unicode.cpp.
|
inline |
Definition at line 1490 of file unicode.h.
|
inline |
|
inline |
Definition at line 1495 of file unicode.h.
|
inline |
Definition at line 1494 of file unicode.h.
|
inline |
Definition at line 1499 of file unicode.h.
| void TUniChDb::Test | ( | const TStr & | basePath | ) |
Definition at line 1377 of file unicode.cpp.
|
protected |
Definition at line 825 of file unicode.cpp.
|
protected |
Definition at line 853 of file unicode.cpp.
|
protected |
Definition at line 745 of file unicode.cpp.
|
protected |
Definition at line 649 of file unicode.cpp.
|
protected |
Definition at line 579 of file unicode.cpp.
|
protected |
Definition at line 619 of file unicode.cpp.
|
inline |
Definition at line 1636 of file unicode.h.
|
inline |
Definition at line 1637 of file unicode.h.
| void TUniChDb::ToSimpleCaseConverted | ( | TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| const TCaseConversion | how | ||
| ) | const |
Definition at line 3072 of file unicode.h.
|
inline |
Definition at line 1610 of file unicode.h.
|
inline |
Definition at line 1613 of file unicode.h.
|
inline |
Definition at line 1611 of file unicode.h.
|
inline |
Definition at line 1614 of file unicode.h.
|
inline |
Definition at line 1609 of file unicode.h.
|
inline |
Definition at line 1612 of file unicode.h.
|
inlineprotected |
|
inlineprotected |
|
inlineprotected |
|
inlineprotected |
|
friend |
| TUniCaseFolding TUniChDb::caseFolding |
| THash<TInt, TUniChInfo> TUniChDb::h |