SNAP Library 3.0, Developer Reference
2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <unicode.h>
Public Types | |
typedef TUniChDb::TCaseConversion | TCaseConversion |
Public Member Functions | |
TUnicode () | |
TUnicode (const TStr &fnBinUcd) | |
void | Init () |
int | DecodeUtf8 (const TIntV &src, TIntV &dest) const |
int | DecodeUtf8 (const TStr &src, TIntV &dest) const |
int | EncodeUtf8 (const TIntV &src, TIntV &dest) const |
TStr | EncodeUtf8Str (const TIntV &src) const |
int | DecodeUtf16FromBytes (const TIntV &src, TIntV &dest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const |
int | DecodeUtf16FromWords (const TIntV &src, TIntV &dest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const |
int | EncodeUtf16ToWords (const TIntV &src, TIntV &dest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const |
int | EncodeUtf16ToBytes (const TIntV &src, TIntV &dest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const |
void | RegisterCodec (const TStr &nameList, const PCodecBase &codec) |
void | UnregisterCodec (const TStr &nameList) |
void | ClrCodecs () |
void | InitCodecs () |
PCodecBase | GetCodec (const TStr &name) const |
void | GetAllCodecs (TCodecBaseV &dest) const |
bool | FindNextWordBoundary (const TIntV &src, int &position) const |
void | FindWordBoundaries (const TIntV &src, TBoolV &dest) const |
bool | FindNextSentenceBoundary (const TIntV &src, int &position) const |
void | FindSentenceBoundaries (const TIntV &src, TBoolV &dest) const |
void | ClrSentenceBoundaryExceptions () |
void | UseEnglishSentenceBoundaryExceptions () |
void | Decompose (const TIntV &src, TIntV &dest, bool compatibility) const |
void | Compose (const TIntV &src, TIntV &dest) const |
void | DecomposeAndCompose (const TIntV &src, TIntV &dest, bool compatibility) const |
int | ExtractStarters (const TIntV &src, TIntV &dest) const |
int | ExtractStarters (TIntV &src) const |
void | GetLowerCase (const TIntV &src, TIntV &dest) const |
void | GetUpperCase (const TIntV &src, TIntV &dest) const |
void | GetTitleCase (const TIntV &src, TIntV &dest) const |
void | GetSimpleLowerCase (const TIntV &src, TIntV &dest) const |
void | GetSimpleUpperCase (const TIntV &src, TIntV &dest) const |
void | GetSimpleTitleCase (const TIntV &src, TIntV &dest) const |
void | ToSimpleUpperCase (TIntV &src) const |
void | ToSimpleLowerCase (TIntV &src) const |
void | ToSimpleTitleCase (TIntV &src) const |
void | GetCaseFolded (const TIntV &src, TIntV &dest, const bool full=true) const |
void | ToCaseFolded (TIntV &src) const |
TStr | GetUtf8CaseFolded (const TStr &s) const |
DECLARE_FORWARDED_PROPERTY_METHODS | ___UniFwd2 (IsPrivateUse, IsSurrogate) TUniChCategory GetCat(const int cp) const |
TUniChSubCategory | GetSubCat (const int cp) const |
const char * | GetCharName (const int cp) const |
TStr | GetCharNameS (const int cp) const |
Static Public Member Functions | |
static void | EncodeUtf8 (const uint &Ch, TChA &Dest) |
static TStr | EncodeUtf8 (const uint &Ch) |
Static Protected Member Functions | |
static TStr | NormalizeCodecName (const TStr &name) |
Protected Attributes | |
THash< TStr, PCodecBase > | codecs |
|
inline |
|
inlineexplicit |
Definition at line 1778 of file unicode.h.
References Init(), TUniChDb::LoadBin(), and ucd.
|
inline |
Definition at line 2018 of file unicode.h.
References TUniChDb::GetCat(), and ucd.
|
inline |
Definition at line 1881 of file unicode.h.
References codecs.
Referenced by InitCodecs().
|
inline |
Definition at line 1924 of file unicode.h.
References TUniChDb::SbEx_Clr(), and ucd.
Definition at line 1941 of file unicode.h.
References TUniChDb::Compose(), and ucd.
|
inline |
Definition at line 1810 of file unicode.h.
References codec, TUniCodec::DecodeUtf16FromBytes(), and TVec< TVal, TSizeTy >::Len().
|
inline |
Definition at line 1823 of file unicode.h.
References codec, TUniCodec::DecodeUtf16FromWords(), and TVec< TVal, TSizeTy >::Len().
Definition at line 1787 of file unicode.h.
References codec, and TUniCodec::DecodeUtf8().
Referenced by TJsonVal::AddEscapeChAFromStr(), GetUtf8CaseFolded(), and TUStr::TUStr().
Definition at line 1788 of file unicode.h.
References codec, and TUniCodec::DecodeUtf8().
Definition at line 1934 of file unicode.h.
References TUniChDb::Decompose(), and ucd.
Referenced by TUStr::GetStarterLowerCaseStr(), TUStr::GetStarterStr(), TUStr::ToStarterCase(), and TUStr::TUStr().
|
inline |
Definition at line 1946 of file unicode.h.
References TUniChDb::DecomposeAndCompose(), and ucd.
|
inline |
Definition at line 1838 of file unicode.h.
References codec, TUniCodec::EncodeUtf16ToBytes(), and TVec< TVal, TSizeTy >::Len().
|
inline |
Definition at line 1834 of file unicode.h.
References codec, TUniCodec::EncodeUtf16ToWords(), and TVec< TVal, TSizeTy >::Len().
Definition at line 1792 of file unicode.h.
References codec, and TUniCodec::EncodeUtf8().
Referenced by EncodeUtf8(), TXmlLx::GetPlainStrFromXmlStr(), TXmlLx::GetReference(), and TILx::GetSym().
Definition at line 1696 of file unicode.cpp.
References TChA::AddCh(), TStr::Fmt(), and TExcept::New().
Definition at line 1728 of file unicode.cpp.
References EncodeUtf8().
Definition at line 1796 of file unicode.h.
References codec, and TUniCodec::EncodeUtf8Str().
Referenced by TUStr::EncodeUtf8(), TUStr::GetStarterLowerCaseStr(), TUStr::GetStarterStr(), TUStr::GetStr(), and GetUtf8CaseFolded().
Definition at line 1951 of file unicode.h.
References TUniChDb::ExtractStarters(), and ucd.
Referenced by TUStr::GetStarterLowerCaseStr(), TUStr::GetStarterStr(), and TUStr::ToStarterCase().
|
inline |
Definition at line 1953 of file unicode.h.
References TUniChDb::ExtractStarters(), and ucd.
|
inline |
Definition at line 1916 of file unicode.h.
References TUniChDb::FindNextSentenceBoundary(), TVec< TVal, TSizeTy >::Len(), and ucd.
|
inline |
Definition at line 1901 of file unicode.h.
References TUniChDb::FindNextWordBoundary(), TVec< TVal, TSizeTy >::Len(), and ucd.
Definition at line 1922 of file unicode.h.
References TUniChDb::FindSentenceBoundaries(), TVec< TVal, TSizeTy >::Len(), and ucd.
Definition at line 1907 of file unicode.h.
References TUniChDb::FindWordBoundaries(), TVec< TVal, TSizeTy >::Len(), and ucd.
Referenced by TUStr::GetWordBoundPV().
|
inline |
Definition at line 1887 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), codec, codecs, and TVec< TVal, TSizeTy >::Len().
|
inline |
Definition at line 1989 of file unicode.h.
References TUniChDb::GetCaseFolded(), and ucd.
Referenced by GetUtf8CaseFolded().
|
inline |
Definition at line 2024 of file unicode.h.
References TUniChDb::GetCharName(), and ucd.
|
inline |
Definition at line 2025 of file unicode.h.
References TUniChDb::GetCharNameS(), and ucd.
Referenced by TUStr::GetChNm().
|
inline |
Definition at line 1883 of file unicode.h.
References TPt< TRec >::Clr(), codecs, and NormalizeCodecName().
Definition at line 1965 of file unicode.h.
References TUniChDb::GetLowerCase(), and ucd.
Definition at line 1972 of file unicode.h.
References TUniChDb::GetSimpleLowerCase(), and ucd.
Referenced by TUStr::GetStarterLowerCaseStr().
Definition at line 1974 of file unicode.h.
References TUniChDb::GetSimpleTitleCase(), and ucd.
Definition at line 1973 of file unicode.h.
References TUniChDb::GetSimpleUpperCase(), and ucd.
|
inline |
Definition at line 2021 of file unicode.h.
References TUniChDb::GetSubCat(), and ucd.
Definition at line 1967 of file unicode.h.
References TUniChDb::GetTitleCase(), and ucd.
Definition at line 1966 of file unicode.h.
References TUniChDb::GetUpperCase(), and ucd.
Definition at line 1994 of file unicode.h.
References DecodeUtf8(), EncodeUtf8Str(), GetCaseFolded(), TStr::GetLc(), and TStr::Len().
|
inline |
Definition at line 1779 of file unicode.h.
References InitCodecs().
Referenced by TUnicode().
void TUnicode::InitCodecs | ( | ) |
Definition at line 1683 of file unicode.cpp.
References ClrCodecs(), and RegisterCodec().
Referenced by Init().
Definition at line 1870 of file unicode.h.
References TStr::ChangeStrAll(), and TStr::GetLc().
Referenced by GetCodec(), RegisterCodec(), and UnregisterCodec().
|
inline |
Definition at line 1873 of file unicode.h.
References codecs, TVec< TVal, TSizeTy >::Len(), NormalizeCodecName(), and TStr::SplitOnWs().
Referenced by InitCodecs().
|
inline |
Definition at line 1992 of file unicode.h.
References TUniChDb::ToCaseFolded(), and ucd.
|
inline |
Definition at line 1978 of file unicode.h.
References TUniChDb::ToSimpleLowerCase(), and ucd.
Referenced by TUStr::ToLowerCase().
|
inline |
Definition at line 1979 of file unicode.h.
References TUniChDb::ToSimpleTitleCase(), and ucd.
|
inline |
Definition at line 1977 of file unicode.h.
References TUniChDb::ToSimpleUpperCase(), and ucd.
Referenced by TUStr::ToUpperCase().
|
inline |
Definition at line 1877 of file unicode.h.
References codecs, TVec< TVal, TSizeTy >::Len(), NormalizeCodecName(), and TStr::SplitOnWs().
|
inline |
Definition at line 1925 of file unicode.h.
References TUniChDb::SbEx_SetStdEnglish(), and ucd.
TUniCodec TUnicode::codec |
Definition at line 1774 of file unicode.h.
Referenced by DecodeUtf16FromBytes(), DecodeUtf16FromWords(), DecodeUtf8(), EncodeUtf16ToBytes(), EncodeUtf16ToWords(), EncodeUtf8(), EncodeUtf8Str(), and GetAllCodecs().
|
protected |
Definition at line 1869 of file unicode.h.
Referenced by ClrCodecs(), GetAllCodecs(), GetCodec(), RegisterCodec(), and UnregisterCodec().
T8BitCodec<TEncoding_CP1250> TUnicode::cp1250 |
T8BitCodec<TEncoding_CP437> TUnicode::cp437 |
T8BitCodec<TEncoding_CP852> TUnicode::cp852 |
T8BitCodec<TEncoding_ISO8859_1> TUnicode::iso8859_1 |
T8BitCodec<TEncoding_ISO8859_2> TUnicode::iso8859_2 |
T8BitCodec<TEncoding_ISO8859_3> TUnicode::iso8859_3 |
T8BitCodec<TEncoding_ISO8859_4> TUnicode::iso8859_4 |
TUniChDb TUnicode::ucd |
Definition at line 1775 of file unicode.h.
Referenced by ___UniFwd2(), ClrSentenceBoundaryExceptions(), Compose(), Decompose(), DecomposeAndCompose(), ExtractStarters(), FindNextSentenceBoundary(), FindNextWordBoundary(), FindSentenceBoundaries(), FindWordBoundaries(), GetCaseFolded(), GetCharName(), GetCharNameS(), TUStr::GetChScriptId(), GetLowerCase(), TUStr::GetScriptId(), TUStr::GetScriptNm(), GetSimpleLowerCase(), GetSimpleTitleCase(), GetSimpleUpperCase(), GetSubCat(), GetTitleCase(), GetUpperCase(), ToCaseFolded(), ToSimpleLowerCase(), ToSimpleTitleCase(), ToSimpleUpperCase(), TUnicode(), and UseEnglishSentenceBoundaryExceptions().
T8BitCodec<TEncoding_YuAscii> TUnicode::yuAscii |