SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TUniCaseFolding Class Reference

#include <unicode.h>

Collaboration diagram for TUniCaseFolding:

Public Member Functions

 TUniCaseFolding ()
 
 TUniCaseFolding (TSIn &SIn)
 
void Load (TSIn &SIn)
 
void Save (TSOut &SOut) const
 
void Clr ()
 
void LoadTxt (const TStr &fileName)
 
template<typename TSrcVec , typename TDestCh >
void Fold (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const
 
template<typename TSrcVec >
void FoldInPlace (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const
 
void Test ()
 

Protected Types

typedef TUniVecIdx TVecIdx
 

Protected Member Functions

void Test (const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f)
 

Static Protected Member Functions

template<typename TSrcDat , typename TDestDat >
static void AppendVector (const TVec< TSrcDat > &src, TVec< TDestDat > &dest)
 

Protected Attributes

TIntH cfCommon
 
TIntH cfSimple
 
TIntH cfTurkic
 
TIntIntVH cfFull
 

Friends

class TUniChDb
 

Detailed Description

Definition at line 271 of file unicode.h.

Member Typedef Documentation

Definition at line 281 of file unicode.h.

Constructor & Destructor Documentation

TUniCaseFolding::TUniCaseFolding ( )
inline

Definition at line 284 of file unicode.h.

284 { }
TUniCaseFolding::TUniCaseFolding ( TSIn SIn)
inlineexplicit

Definition at line 285 of file unicode.h.

References TSIn::LoadCs().

285 : cfCommon(SIn), cfSimple(SIn), cfTurkic(SIn), cfFull(SIn) { SIn.LoadCs(); }
TIntIntVH cfFull
Definition: unicode.h:275
virtual void LoadCs()
Definition: fl.cpp:28
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274

Here is the call graph for this function:

Member Function Documentation

template<typename TSrcDat , typename TDestDat >
static void TUniCaseFolding::AppendVector ( const TVec< TSrcDat > &  src,
TVec< TDestDat > &  dest 
)
inlinestaticprotected

Definition at line 278 of file unicode.h.

References TVec< TVal, TSizeTy >::Add(), and TVec< TVal, TSizeTy >::Len().

Referenced by Fold(), and TUniChDb::GetCaseConverted().

278  {
279  for (int i = 0; i < src.Len(); i++) dest.Add(src[i]); }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Clr ( )
inline

Definition at line 288 of file unicode.h.

References cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::Clr().

Referenced by TUniChDb::Clr(), and LoadTxt().

288 { cfCommon.Clr(); cfSimple.Clr(); cfFull.Clr(); cfTurkic.Clr(); }
TIntIntVH cfFull
Definition: unicode.h:275
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TSrcVec , typename TDestCh >
void TUniCaseFolding::Fold ( const TSrcVec &  src,
size_t  srcIdx,
const size_t  srcCount,
TVec< TDestCh > &  dest,
const bool  clrDest,
const bool  full,
const bool  turkic 
) const
inline

Definition at line 293 of file unicode.h.

References TVec< TVal, TSizeTy >::Add(), AppendVector(), cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().

Referenced by TUniChDb::GetCaseFolded(), and Test().

295  {
296  for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
297  {
298  int c = src[TVecIdx(srcIdx)], i; srcIdx++;
299  if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { dest.Add(cfTurkic[i]); continue; }
300  if (full && ((i = cfFull.GetKeyId(c)) >= 0)) { AppendVector(cfFull[i], dest); continue; }
301  if ((! full) && ((i = cfSimple.GetKeyId(c)) >= 0)) { dest.Add(cfSimple[i]); continue; }
302  i = cfCommon.GetKeyId(c); if (i >= 0) dest.Add(cfCommon[i]); else dest.Add(c);
303  }
304  }
static void AppendVector(const TVec< TSrcDat > &src, TVec< TDestDat > &dest)
Definition: unicode.h:278
TUniVecIdx TVecIdx
Definition: unicode.h:281
TIntIntVH cfFull
Definition: unicode.h:275
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
int GetKeyId(const TKey &Key) const
Definition: hash.h:466
TIntH cfSimple
Definition: unicode.h:274
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TSrcVec >
void TUniCaseFolding::FoldInPlace ( TSrcVec &  src,
size_t  srcIdx,
const size_t  srcCount,
const bool  turkic 
) const
inline

Definition at line 307 of file unicode.h.

References cfCommon, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().

Referenced by TUniChDb::ToCaseFolded().

308  {
309  for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
310  {
311  int c = src[TVecIdx(srcIdx)], i;
312  if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { src[TVecIdx(srcIdx)] = cfTurkic[i]; continue; }
313  if ((i = cfSimple.GetKeyId(c)) >= 0) { src[TVecIdx(srcIdx)] = cfSimple[i]; continue; }
314  i = cfCommon.GetKeyId(c); if (i >= 0) src[TVecIdx(srcIdx)] = cfCommon[i];
315  }
316  }
TUniVecIdx TVecIdx
Definition: unicode.h:281
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
int GetKeyId(const TKey &Key) const
Definition: hash.h:466
TIntH cfSimple
Definition: unicode.h:274

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Load ( TSIn SIn)
inline

Definition at line 286 of file unicode.h.

References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Load(), and TSIn::LoadCs().

Referenced by TUniChDb::Load().

286 { cfCommon.Load(SIn); cfSimple.Load(SIn); cfFull.Load(SIn); cfTurkic.Load(SIn); SIn.LoadCs(); }
TIntIntVH cfFull
Definition: unicode.h:275
virtual void LoadCs()
Definition: fl.cpp:28
void Load(TSIn &SIn)
Definition: hash.h:177
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::LoadTxt ( const TStr fileName)

Definition at line 505 of file unicode.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), cfCommon, cfFull, cfSimple, cfTurkic, Clr(), TStr::CStr(), FailR, TUniChDb::TUcdFileReader::GetNextLine(), IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TUniChDb::TUcdFileReader::Open(), TUniChDb::TUcdFileReader::ParseCodePoint(), and TUniChDb::TUcdFileReader::ParseCodePointList().

Referenced by TUniChDb::LoadTxt().

506 {
507  Clr();
508  TUniChDb::TUcdFileReader reader; reader.Open(fileName);
509  TStrV fields;
510  while (reader.GetNextLine(fields))
511  {
512  int cp = reader.ParseCodePoint(fields[0]);
513  const TStr status = fields[1], mapsTo = fields[2];
514  if (status == "C" || status == "S" || status == "T") {
515  TIntH &dest = (status == "C" ? cfCommon : status == "S" ? cfSimple : cfTurkic);
516  IAssert(! dest.IsKey(cp));
517  int cp2 = reader.ParseCodePoint(mapsTo);
518  dest.AddDat(cp, cp2); }
519  else if (status == "F") {
520  TIntIntVH &dest = cfFull;
521  IAssert(! dest.IsKey(cp));
522  TIntV cps; reader.ParseCodePointList(mapsTo, cps); IAssert(cps.Len() > 0);
523  dest.AddDat(cp, cps); }
524  else
525  FailR(status.CStr());
526  }
527  printf("TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n",
528  fileName.CStr(), cfCommon.Len(), cfSimple.Len(), cfFull.Len(), cfTurkic.Len());
529 }
#define IAssert(Cond)
Definition: bd.h:262
TIntIntVH cfFull
Definition: unicode.h:275
void Clr()
Definition: unicode.h:288
bool GetNextLine(TStrV &dest)
Definition: unicode.h:1686
void Open(const TStr &fileName)
Definition: unicode.h:1683
#define FailR(Reason)
Definition: bd.h:240
TIntH cfTurkic
Definition: unicode.h:274
static void ParseCodePointList(const TStr &s, TIntV &dest, bool ClrDestP=true)
Definition: unicode.h:1697
TIntH cfCommon
Definition: unicode.h:274
Definition: dt.h:412
TIntH cfSimple
Definition: unicode.h:274
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:258
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static int ParseCodePoint(const TStr &s)
Definition: unicode.h:1695

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Save ( TSOut SOut) const
inline

Definition at line 287 of file unicode.h.

References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Save(), and TSOut::SaveCs().

Referenced by TUniChDb::Save().

287 { cfCommon.Save(SOut); cfSimple.Save(SOut); cfFull.Save(SOut); cfTurkic.Save(SOut); SOut.SaveCs(); }
void Save(TSOut &SOut) const
Definition: hash.h:183
TIntIntVH cfFull
Definition: unicode.h:275
void SaveCs()
Definition: fl.h:171
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Test ( const TIntV src,
const TIntV expectedDest,
const bool  full,
const bool  turkic,
FILE *  f 
)
protected

Definition at line 531 of file unicode.cpp.

References Fold(), IAssert, and TVec< TVal, TSizeTy >::Len().

Referenced by TUniChDb::Test().

532 {
533  fprintf(f, "TUniCaseFolding(%s%s): ", (full ? "full" : "simple"), (turkic ? ", turkic" : ""));
534  for (int i = 0; i < src.Len(); i++) fprintf(f, " %04x", int(src[i]));
535  TIntV dest; Fold(src, 0, src.Len(), dest, true, full, turkic);
536  fprintf(f, "\n -> ");
537  for (int i = 0; i < dest.Len(); i++) fprintf(f, " %04x", int(dest[i]));
538  fprintf(f, "\n");
539  IAssert(dest.Len() == expectedDest.Len());
540  for (int i = 0; i < dest.Len(); i++) IAssert(dest[i] == expectedDest[i]);
541 }
#define IAssert(Cond)
Definition: bd.h:262
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void Fold(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const
Definition: unicode.h:293

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Test ( )

Definition at line 549 of file unicode.cpp.

References anonymous_namespace{unicode.cpp}::VB.

550 {
551  FILE *f = stderr;
552  TVectorBuilder VB;
553  // simple
554  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf), false, false, f);
555  // simple + turkic
556  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf), false, true, f);
557  // full
558  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73), true, false, f);
559  // full + turkic
560  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73), true, true, f);
561 }

Friends And Related Function Documentation

friend class TUniChDb
friend

Definition at line 280 of file unicode.h.

Member Data Documentation

TIntH TUniCaseFolding::cfCommon
protected

Definition at line 274 of file unicode.h.

Referenced by Clr(), Fold(), FoldInPlace(), Load(), LoadTxt(), and Save().

TIntIntVH TUniCaseFolding::cfFull
protected

Definition at line 275 of file unicode.h.

Referenced by Clr(), Fold(), Load(), LoadTxt(), and Save().

TIntH TUniCaseFolding::cfSimple
protected

Definition at line 274 of file unicode.h.

Referenced by Clr(), Fold(), FoldInPlace(), Load(), LoadTxt(), and Save().

TIntH TUniCaseFolding::cfTurkic
protected

Definition at line 274 of file unicode.h.

Referenced by Clr(), Fold(), FoldInPlace(), Load(), LoadTxt(), and Save().


The documentation for this class was generated from the following files: