SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <unicodestring.h>
Public Member Functions | |
TUStr () | |
TUStr (const TUStr &UStr) | |
TUStr (const TIntV &_UniChV) | |
TUStr (const TStr &Str) | |
~TUStr () | |
TUStr (TSIn &SIn) | |
void | Save (TSOut &SOut) const |
void | LoadXml (const PXmlTok &XmlTok, const TStr &Nm) |
void | SaveXml (TSOut &SOut, const TStr &Nm) const |
TUStr & | operator= (const TUStr &UStr) |
bool | operator== (const TUStr &UStr) const |
TUStr & | operator+= (const TUStr &UStr) |
int | operator[] (const int &UniChN) const |
void | Clr () |
int | Len () const |
bool | Empty () const |
void | ToLowerCase () |
void | ToUpperCase () |
void | ToStarterCase () |
void | GetWordBoundPV (TBoolV &WordBoundPV) |
void | GetWordUStrV (TUStrV &UStrV) |
TStr | GetStr () const |
TStr | GetStarterStr () const |
TStr | GetStarterLowerCaseStr () const |
Static Public Member Functions | |
static int | GetScriptId (const TStr &ScriptNm) |
static TStr | GetScriptNm (const int &ScriptId) |
static int | GetChScriptId (const int &UniCh) |
static TStr | GetChScriptNm (const int &UniCh) |
static TStr | GetChNm (const int &UniCh) |
static TStr | GetChTypeStr (const int &UniCh) |
static bool | IsCase (const int &UniCh) |
static bool | IsUpperCase (const int &UniCh) |
static bool | IsLowerCase (const int &UniCh) |
static bool | IsAlphabetic (const int &UniCh) |
static bool | IsMath (const int &UniCh) |
static TStr | EncodeUtf8 (const int &UniCh) |
Static Private Member Functions | |
static void | AssertUnicodeDefOk () |
Private Attributes | |
TIntV | UniChV |
Definition at line 32 of file unicodestring.h.
TUStr::TUStr | ( | ) | [inline] |
Definition at line 38 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(){AssertUnicodeDefOk();}
TUStr::TUStr | ( | const TUStr & | UStr | ) | [inline] |
Definition at line 39 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(UStr.UniChV){AssertUnicodeDefOk();}
TUStr::TUStr | ( | const TIntV & | _UniChV | ) | [inline] |
Definition at line 40 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(_UniChV){AssertUnicodeDefOk();}
TUStr::TUStr | ( | const TStr & | Str | ) |
Definition at line 12 of file unicodestring.cpp.
References AssertUnicodeDefOk(), TUnicode::DecodeUtf8(), TUnicode::Decompose(), TUnicodeDef::GetDef(), and UniChV.
{ AssertUnicodeDefOk(); TUnicodeDef::GetDef()->DecodeUtf8(Str, UniChV); TIntV NfcUniChV; TUnicodeDef::GetDef()->Decompose(UniChV, NfcUniChV, true); UniChV=NfcUniChV; }
TUStr::~TUStr | ( | ) | [inline] |
Definition at line 42 of file unicodestring.h.
{}
TUStr::TUStr | ( | TSIn & | SIn | ) | [inline] |
Definition at line 43 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(SIn){AssertUnicodeDefOk();}
static void TUStr::AssertUnicodeDefOk | ( | ) | [inline, static, private] |
Definition at line 35 of file unicodestring.h.
References EAssertR, and TUnicodeDef::IsDef().
Referenced by EncodeUtf8(), and TUStr().
{ EAssertR(TUnicodeDef::IsDef(), "Unicode-Definition-File not loaded!");}
void TUStr::Clr | ( | ) | [inline] |
Definition at line 56 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Clr(), and UniChV.
bool TUStr::Empty | ( | ) | const [inline] |
Definition at line 58 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Empty(), and UniChV.
TStr TUStr::EncodeUtf8 | ( | const int & | UniCh | ) | [static] |
Definition at line 157 of file unicodestring.cpp.
References AssertUnicodeDefOk(), TUnicode::EncodeUtf8Str(), TUnicodeDef::GetDef(), and TVec< TInt >::GetV().
{ AssertUnicodeDefOk(); return TUnicodeDef::GetDef()->EncodeUtf8Str(TIntV::GetV(UniCh)); }
TStr TUStr::GetChNm | ( | const int & | UniCh | ) | [static] |
Definition at line 104 of file unicodestring.cpp.
References TUnicode::GetCharNameS(), and TUnicodeDef::GetDef().
{ TStr UniChNm(TUnicodeDef::GetDef()->ucd.GetCharNameS(UniCh)); return UniChNm; }
int TUStr::GetChScriptId | ( | const int & | UniCh | ) | [static] |
Definition at line 96 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUniChDb::GetScript(), and TUnicode::ucd.
Referenced by GetChScriptNm().
{ return TUnicodeDef::GetDef()->ucd.GetScript(UniCh); }
TStr TUStr::GetChScriptNm | ( | const int & | UniCh | ) | [static] |
Definition at line 100 of file unicodestring.cpp.
References GetChScriptId(), and GetScriptNm().
{ return GetScriptNm(GetChScriptId(UniCh)); }
TStr TUStr::GetChTypeStr | ( | const int & | UniCh | ) | [static] |
Definition at line 109 of file unicodestring.cpp.
References IsAlphabetic(), IsCase(), IsLowerCase(), IsMath(), IsUpperCase(), TChA::LastCh(), and TChA::Len().
{ TChA ChTypeChA; ChTypeChA+='['; if (IsCase(UniCh)){ChTypeChA+="Case,";} if (IsUpperCase(UniCh)){ChTypeChA+="UpperCase,";} if (IsLowerCase(UniCh)){ChTypeChA+="LowerCase,";} if (IsAlphabetic(UniCh)){ChTypeChA+="Alphabetic,";} if (IsMath(UniCh)){ChTypeChA+="Math,";} if (ChTypeChA.LastCh()=='['){ChTypeChA+=']';} else {ChTypeChA[ChTypeChA.Len()-1]=']';} return ChTypeChA; }
int TUStr::GetScriptId | ( | const TStr & | ScriptNm | ) | [static] |
Definition at line 88 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUniChDb::GetScriptByName(), and TUnicode::ucd.
{ return TUnicodeDef::GetDef()->ucd.GetScriptByName(ScriptNm); }
TStr TUStr::GetScriptNm | ( | const int & | ScriptId | ) | [static] |
Definition at line 92 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUniChDb::GetScriptName(), and TUnicode::ucd.
Referenced by GetChScriptNm().
{ return TUnicodeDef::GetDef()->ucd.GetScriptName(ScriptId); }
TStr TUStr::GetStarterLowerCaseStr | ( | ) | const |
Definition at line 79 of file unicodestring.cpp.
References TUnicode::Decompose(), TUnicode::EncodeUtf8Str(), TUnicode::ExtractStarters(), TUnicodeDef::GetDef(), TUnicode::GetSimpleLowerCase(), and UniChV.
{ TIntV UniChV1; TIntV UniChV2; TIntV UniChV3; TUnicodeDef::GetDef()->GetSimpleLowerCase(UniChV, UniChV1); TUnicodeDef::GetDef()->ExtractStarters(UniChV1, UniChV2); TUnicodeDef::GetDef()->Decompose(UniChV2, UniChV3, true); TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV3); return Str; }
TStr TUStr::GetStarterStr | ( | ) | const |
Definition at line 71 of file unicodestring.cpp.
References TUnicode::Decompose(), TUnicode::EncodeUtf8Str(), TUnicode::ExtractStarters(), TUnicodeDef::GetDef(), and UniChV.
{ TIntV UniChV1; TIntV UniChV2; TUnicodeDef::GetDef()->ExtractStarters(UniChV, UniChV1); TUnicodeDef::GetDef()->Decompose(UniChV1, UniChV2, true); TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV2); return Str; }
TStr TUStr::GetStr | ( | ) | const |
Definition at line 66 of file unicodestring.cpp.
References TUnicode::EncodeUtf8Str(), TUnicodeDef::GetDef(), and UniChV.
{ TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV); return Str; }
void TUStr::GetWordBoundPV | ( | TBoolV & | WordBoundPV | ) |
Definition at line 33 of file unicodestring.cpp.
References TUnicode::FindWordBoundaries(), TUnicodeDef::GetDef(), and UniChV.
Referenced by GetWordUStrV().
{ TUnicodeDef::GetDef()->FindWordBoundaries(UniChV, WordBoundPV); }
void TUStr::GetWordUStrV | ( | TUStrV & | UStrV | ) |
Definition at line 37 of file unicodestring.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Empty(), GetWordBoundPV(), IAssert, IsAlphabetic(), TVec< TVal, TSizeTy >::Last(), Len(), TVec< TVal, TSizeTy >::Len(), and UniChV.
{ // clear word vector WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordUStrV.Add(WordUStr); // add word to word-vector WordUniChV.Clr(false); // clear char-vector } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }
bool TUStr::IsAlphabetic | ( | const int & | UniCh | ) | [static] |
Definition at line 143 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsAlphabetic().
Referenced by GetChTypeStr(), and GetWordUStrV().
{ TUniChInfo ChInfo; if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){ return ChInfo.IsAlphabetic();} else {return false;} }
bool TUStr::IsCase | ( | const int & | UniCh | ) | [static] |
Definition at line 122 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsCased().
Referenced by GetChTypeStr().
{ TUniChInfo ChInfo; if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){ return ChInfo.IsCased();} else {return false;} }
bool TUStr::IsLowerCase | ( | const int & | UniCh | ) | [static] |
Definition at line 136 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsLowercase().
Referenced by GetChTypeStr().
{ TUniChInfo ChInfo; if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){ return ChInfo.IsLowercase();} else {return false;} }
bool TUStr::IsMath | ( | const int & | UniCh | ) | [static] |
Definition at line 150 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsMath().
Referenced by GetChTypeStr().
{ TUniChInfo ChInfo; if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){ return ChInfo.IsMath();} else {return false;} }
bool TUStr::IsUpperCase | ( | const int & | UniCh | ) | [static] |
Definition at line 129 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsUppercase().
Referenced by GetChTypeStr().
{ TUniChInfo ChInfo; if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){ return ChInfo.IsUppercase();} else {return false;} }
int TUStr::Len | ( | ) | const [inline] |
Definition at line 57 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Len(), and UniChV.
Referenced by GetWordUStrV().
void TUStr::LoadXml | ( | const PXmlTok & | XmlTok, |
const TStr & | Nm | ||
) |
Definition at line 52 of file unicodestring.h.
References TVec< TVal, TSizeTy >::AddV(), and UniChV.
Definition at line 48 of file unicodestring.h.
References UniChV.
bool TUStr::operator== | ( | const TUStr & | UStr | ) | const [inline] |
int TUStr::operator[] | ( | const int & | UniChN | ) | const [inline] |
void TUStr::Save | ( | TSOut & | SOut | ) | const [inline] |
Definition at line 44 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Save(), and UniChV.
void TUStr::SaveXml | ( | TSOut & | SOut, |
const TStr & | Nm | ||
) | const |
void TUStr::ToLowerCase | ( | ) |
Definition at line 19 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUnicode::ToSimpleLowerCase(), and UniChV.
{ TUnicodeDef::GetDef()->ToSimpleLowerCase(UniChV); }
void TUStr::ToStarterCase | ( | ) |
Definition at line 27 of file unicodestring.cpp.
References TUnicode::Decompose(), TUnicode::ExtractStarters(), TUnicodeDef::GetDef(), and UniChV.
{ TIntV StarterUniChV; TUnicodeDef::GetDef()->ExtractStarters(UniChV, StarterUniChV); TUnicodeDef::GetDef()->Decompose(StarterUniChV, UniChV, true); }
void TUStr::ToUpperCase | ( | ) |
Definition at line 23 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUnicode::ToSimpleUpperCase(), and UniChV.
{ TUnicodeDef::GetDef()->ToSimpleUpperCase(UniChV); }
TIntV TUStr::UniChV [private] |
Definition at line 34 of file unicodestring.h.
Referenced by Clr(), Empty(), GetStarterLowerCaseStr(), GetStarterStr(), GetStr(), GetWordBoundPV(), GetWordUStrV(), Len(), operator+=(), operator=(), operator==(), operator[](), Save(), ToLowerCase(), ToStarterCase(), ToUpperCase(), and TUStr().