| 
    SNAP Library 2.0, Developer Reference
    2013-05-13 16:33:57
    
   SNAP, a general purpose, high performance system for analysis and manipulation of large networks 
   | 
  
  
  
 
#include <unicodestring.h>

Public Member Functions | |
| TUStr () | |
| TUStr (const TUStr &UStr) | |
| TUStr (const TIntV &_UniChV) | |
| TUStr (const TStr &Str) | |
| ~TUStr () | |
| TUStr (TSIn &SIn) | |
| void | Save (TSOut &SOut) const | 
| void | LoadXml (const PXmlTok &XmlTok, const TStr &Nm) | 
| void | SaveXml (TSOut &SOut, const TStr &Nm) const | 
| TUStr & | operator= (const TUStr &UStr) | 
| bool | operator== (const TUStr &UStr) const | 
| TUStr & | operator+= (const TUStr &UStr) | 
| int | operator[] (const int &UniChN) const | 
| void | Clr () | 
| int | Len () const | 
| bool | Empty () const | 
| void | ToLowerCase () | 
| void | ToUpperCase () | 
| void | ToStarterCase () | 
| void | GetWordBoundPV (TBoolV &WordBoundPV) | 
| void | GetWordUStrV (TUStrV &UStrV) | 
| TStr | GetStr () const | 
| TStr | GetStarterStr () const | 
| TStr | GetStarterLowerCaseStr () const | 
Static Public Member Functions | |
| static int | GetScriptId (const TStr &ScriptNm) | 
| static TStr | GetScriptNm (const int &ScriptId) | 
| static int | GetChScriptId (const int &UniCh) | 
| static TStr | GetChScriptNm (const int &UniCh) | 
| static TStr | GetChNm (const int &UniCh) | 
| static TStr | GetChTypeStr (const int &UniCh) | 
| static bool | IsCase (const int &UniCh) | 
| static bool | IsUpperCase (const int &UniCh) | 
| static bool | IsLowerCase (const int &UniCh) | 
| static bool | IsAlphabetic (const int &UniCh) | 
| static bool | IsMath (const int &UniCh) | 
| static TStr | EncodeUtf8 (const int &UniCh) | 
Static Private Member Functions | |
| static void | AssertUnicodeDefOk () | 
Private Attributes | |
| TIntV | UniChV | 
Definition at line 32 of file unicodestring.h.
| TUStr::TUStr | ( | ) |  [inline] | 
        
Definition at line 38 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(){AssertUnicodeDefOk();}

| TUStr::TUStr | ( | const TUStr & | UStr | ) |  [inline] | 
        
Definition at line 39 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(UStr.UniChV){AssertUnicodeDefOk();}

| TUStr::TUStr | ( | const TIntV & | _UniChV | ) |  [inline] | 
        
Definition at line 40 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(_UniChV){AssertUnicodeDefOk();}

| TUStr::TUStr | ( | const TStr & | Str | ) | 
Definition at line 12 of file unicodestring.cpp.
References AssertUnicodeDefOk(), TUnicode::DecodeUtf8(), TUnicode::Decompose(), TUnicodeDef::GetDef(), and UniChV.
                           {
  AssertUnicodeDefOk();
  TUnicodeDef::GetDef()->DecodeUtf8(Str, UniChV);
  TIntV NfcUniChV; TUnicodeDef::GetDef()->Decompose(UniChV, NfcUniChV, true);
  UniChV=NfcUniChV;
}

| TUStr::~TUStr | ( | ) |  [inline] | 
        
Definition at line 42 of file unicodestring.h.
{}
| TUStr::TUStr | ( | TSIn & | SIn | ) |  [inline] | 
        
Definition at line 43 of file unicodestring.h.
References AssertUnicodeDefOk().
: UniChV(SIn){AssertUnicodeDefOk();}

| static void TUStr::AssertUnicodeDefOk | ( | ) |  [inline, static, private] | 
        
Definition at line 35 of file unicodestring.h.
References EAssertR, and TUnicodeDef::IsDef().
Referenced by EncodeUtf8(), and TUStr().
                                  {
    EAssertR(TUnicodeDef::IsDef(), "Unicode-Definition-File not loaded!");}


| void TUStr::Clr | ( | ) |  [inline] | 
        
Definition at line 56 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Clr(), and UniChV.

| bool TUStr::Empty | ( | ) |  const [inline] | 
        
Definition at line 58 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Empty(), and UniChV.

| TStr TUStr::EncodeUtf8 | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 157 of file unicodestring.cpp.
References AssertUnicodeDefOk(), TUnicode::EncodeUtf8Str(), TUnicodeDef::GetDef(), and TVec< TInt >::GetV().
                                       {
  AssertUnicodeDefOk();
  return TUnicodeDef::GetDef()->EncodeUtf8Str(TIntV::GetV(UniCh));
}

| TStr TUStr::GetChNm | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 104 of file unicodestring.cpp.
References TUnicode::GetCharNameS(), and TUnicodeDef::GetDef().
                                   {
  TStr UniChNm(TUnicodeDef::GetDef()->ucd.GetCharNameS(UniCh));
  return UniChNm;
}

| int TUStr::GetChScriptId | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 96 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUniChDb::GetScript(), and TUnicode::ucd.
Referenced by GetChScriptNm().
                                        {
  return TUnicodeDef::GetDef()->ucd.GetScript(UniCh);
}


| TStr TUStr::GetChScriptNm | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 100 of file unicodestring.cpp.
References GetChScriptId(), and GetScriptNm().
                                         {
  return GetScriptNm(GetChScriptId(UniCh));
}

| TStr TUStr::GetChTypeStr | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 109 of file unicodestring.cpp.
References IsAlphabetic(), IsCase(), IsLowerCase(), IsMath(), IsUpperCase(), TChA::LastCh(), and TChA::Len().
                                        {
  TChA ChTypeChA;
  ChTypeChA+='[';
  if (IsCase(UniCh)){ChTypeChA+="Case,";}
  if (IsUpperCase(UniCh)){ChTypeChA+="UpperCase,";}
  if (IsLowerCase(UniCh)){ChTypeChA+="LowerCase,";}
  if (IsAlphabetic(UniCh)){ChTypeChA+="Alphabetic,";}
  if (IsMath(UniCh)){ChTypeChA+="Math,";}
  if (ChTypeChA.LastCh()=='['){ChTypeChA+=']';}
  else {ChTypeChA[ChTypeChA.Len()-1]=']';}
  return ChTypeChA;
}

| int TUStr::GetScriptId | ( | const TStr & | ScriptNm | ) |  [static] | 
        
Definition at line 88 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUniChDb::GetScriptByName(), and TUnicode::ucd.
                                          {
  return TUnicodeDef::GetDef()->ucd.GetScriptByName(ScriptNm);
}

| TStr TUStr::GetScriptNm | ( | const int & | ScriptId | ) |  [static] | 
        
Definition at line 92 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUniChDb::GetScriptName(), and TUnicode::ucd.
Referenced by GetChScriptNm().
                                          {
  return TUnicodeDef::GetDef()->ucd.GetScriptName(ScriptId);
}


| TStr TUStr::GetStarterLowerCaseStr | ( | ) | const | 
Definition at line 79 of file unicodestring.cpp.
References TUnicode::Decompose(), TUnicode::EncodeUtf8Str(), TUnicode::ExtractStarters(), TUnicodeDef::GetDef(), TUnicode::GetSimpleLowerCase(), and UniChV.
                                         {
  TIntV UniChV1; TIntV UniChV2; TIntV UniChV3;
  TUnicodeDef::GetDef()->GetSimpleLowerCase(UniChV, UniChV1);
  TUnicodeDef::GetDef()->ExtractStarters(UniChV1, UniChV2);
  TUnicodeDef::GetDef()->Decompose(UniChV2, UniChV3, true);
  TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV3);
  return Str;
}

| TStr TUStr::GetStarterStr | ( | ) | const | 
Definition at line 71 of file unicodestring.cpp.
References TUnicode::Decompose(), TUnicode::EncodeUtf8Str(), TUnicode::ExtractStarters(), TUnicodeDef::GetDef(), and UniChV.
                                {
  TIntV UniChV1; TIntV UniChV2;
  TUnicodeDef::GetDef()->ExtractStarters(UniChV, UniChV1);
  TUnicodeDef::GetDef()->Decompose(UniChV1, UniChV2, true);
  TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV2);
  return Str;
}

| TStr TUStr::GetStr | ( | ) | const | 
Definition at line 66 of file unicodestring.cpp.
References TUnicode::EncodeUtf8Str(), TUnicodeDef::GetDef(), and UniChV.
                         {
  TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV);
  return Str;
}

| void TUStr::GetWordBoundPV | ( | TBoolV & | WordBoundPV | ) | 
Definition at line 33 of file unicodestring.cpp.
References TUnicode::FindWordBoundaries(), TUnicodeDef::GetDef(), and UniChV.
Referenced by GetWordUStrV().
                                             {
  TUnicodeDef::GetDef()->FindWordBoundaries(UniChV, WordBoundPV);
}


| void TUStr::GetWordUStrV | ( | TUStrV & | UStrV | ) | 
Definition at line 37 of file unicodestring.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Empty(), GetWordBoundPV(), IAssert, IsAlphabetic(), TVec< TVal, TSizeTy >::Last(), Len(), TVec< TVal, TSizeTy >::Len(), and UniChV.
                                         {
  // clear word vector
  WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
        WordUStrV.Add(WordUStr); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}

| bool TUStr::IsAlphabetic | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 143 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsAlphabetic().
Referenced by GetChTypeStr(), and GetWordUStrV().
                                        {
  TUniChInfo ChInfo;
  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
    return ChInfo.IsAlphabetic();}
  else {return false;}
}


| bool TUStr::IsCase | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 122 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsCased().
Referenced by GetChTypeStr().
                                  {
  TUniChInfo ChInfo;
  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
    return ChInfo.IsCased();}
  else {return false;}
}


| bool TUStr::IsLowerCase | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 136 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsLowercase().
Referenced by GetChTypeStr().
                                       {
  TUniChInfo ChInfo;
  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
    return ChInfo.IsLowercase();}
  else {return false;}
}


| bool TUStr::IsMath | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 150 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsMath().
Referenced by GetChTypeStr().
                                  {
  TUniChInfo ChInfo;
  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
    return ChInfo.IsMath();}
  else {return false;}
}


| bool TUStr::IsUpperCase | ( | const int & | UniCh | ) |  [static] | 
        
Definition at line 129 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), and TUniChInfo::IsUppercase().
Referenced by GetChTypeStr().
                                       {
  TUniChInfo ChInfo;
  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
    return ChInfo.IsUppercase();}
  else {return false;}
}


| int TUStr::Len | ( | ) |  const [inline] | 
        
Definition at line 57 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Len(), and UniChV.
Referenced by GetWordUStrV().


| void TUStr::LoadXml | ( | const PXmlTok & | XmlTok, | 
| const TStr & | Nm | ||
| ) | 
Definition at line 52 of file unicodestring.h.
References TVec< TVal, TSizeTy >::AddV(), and UniChV.

Definition at line 48 of file unicodestring.h.
References UniChV.
| bool TUStr::operator== | ( | const TUStr & | UStr | ) |  const [inline] | 
        
| int TUStr::operator[] | ( | const int & | UniChN | ) |  const [inline] | 
        
| void TUStr::Save | ( | TSOut & | SOut | ) |  const [inline] | 
        
Definition at line 44 of file unicodestring.h.
References TVec< TVal, TSizeTy >::Save(), and UniChV.

| void TUStr::SaveXml | ( | TSOut & | SOut, | 
| const TStr & | Nm | ||
| ) | const | 
| void TUStr::ToLowerCase | ( | ) | 
Definition at line 19 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUnicode::ToSimpleLowerCase(), and UniChV.
                       {
  TUnicodeDef::GetDef()->ToSimpleLowerCase(UniChV);
}

| void TUStr::ToStarterCase | ( | ) | 
Definition at line 27 of file unicodestring.cpp.
References TUnicode::Decompose(), TUnicode::ExtractStarters(), TUnicodeDef::GetDef(), and UniChV.
                         {
  TIntV StarterUniChV;
  TUnicodeDef::GetDef()->ExtractStarters(UniChV, StarterUniChV);
  TUnicodeDef::GetDef()->Decompose(StarterUniChV, UniChV, true);
}

| void TUStr::ToUpperCase | ( | ) | 
Definition at line 23 of file unicodestring.cpp.
References TUnicodeDef::GetDef(), TUnicode::ToSimpleUpperCase(), and UniChV.
                       {
  TUnicodeDef::GetDef()->ToSimpleUpperCase(UniChV);
}

TIntV TUStr::UniChV [private] | 
        
Definition at line 34 of file unicodestring.h.
Referenced by Clr(), Empty(), GetStarterLowerCaseStr(), GetStarterStr(), GetStr(), GetWordBoundPV(), GetWordUStrV(), Len(), operator+=(), operator=(), operator==(), operator[](), Save(), ToLowerCase(), ToStarterCase(), ToUpperCase(), and TUStr().