SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <unicode.h>
Public Member Functions | |
TUniCaseFolding () | |
TUniCaseFolding (TSIn &SIn) | |
void | Load (TSIn &SIn) |
void | Save (TSOut &SOut) const |
void | Clr () |
void | LoadTxt (const TStr &fileName) |
template<typename TSrcVec , typename TDestCh > | |
void | Fold (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const |
template<typename TSrcVec > | |
void | FoldInPlace (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const |
void | Test () |
Protected Types | |
typedef TUniVecIdx | TVecIdx |
Protected Member Functions | |
void | Test (const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f) |
Static Protected Member Functions | |
template<typename TSrcDat , typename TDestDat > | |
static void | AppendVector (const TVec< TSrcDat > &src, TVec< TDestDat > &dest) |
Protected Attributes | |
TIntH | cfCommon |
TIntH | cfSimple |
TIntH | cfTurkic |
TIntIntVH | cfFull |
Friends | |
class | TUniChDb |
typedef TUniVecIdx TUniCaseFolding::TVecIdx [protected] |
TUniCaseFolding::TUniCaseFolding | ( | ) | [inline] |
TUniCaseFolding::TUniCaseFolding | ( | TSIn & | SIn | ) | [inline, explicit] |
static void TUniCaseFolding::AppendVector | ( | const TVec< TSrcDat > & | src, |
TVec< TDestDat > & | dest | ||
) | [inline, static, protected] |
Definition at line 277 of file unicode.h.
References TVec< TVal >::Add(), and TVec< TVal >::Len().
Referenced by Fold(), and TUniChDb::GetCaseConverted().
void TUniCaseFolding::Clr | ( | ) | [inline] |
Definition at line 287 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::Clr().
Referenced by TUniChDb::Clr(), and LoadTxt().
void TUniCaseFolding::Fold | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
const bool | clrDest, | ||
const bool | full, | ||
const bool | turkic | ||
) | const [inline] |
Definition at line 292 of file unicode.h.
References TVec< TVal >::Add(), AppendVector(), cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().
Referenced by TUniChDb::GetCaseFolded(), and Test().
{ for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; ) { int c = src[TVecIdx(srcIdx)], i; srcIdx++; if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { dest.Add(cfTurkic[i]); continue; } if (full && ((i = cfFull.GetKeyId(c)) >= 0)) { AppendVector(cfFull[i], dest); continue; } if ((! full) && ((i = cfSimple.GetKeyId(c)) >= 0)) { dest.Add(cfSimple[i]); continue; } i = cfCommon.GetKeyId(c); if (i >= 0) dest.Add(cfCommon[i]); else dest.Add(c); } }
void TUniCaseFolding::FoldInPlace | ( | TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
const bool | turkic | ||
) | const [inline] |
Definition at line 306 of file unicode.h.
References cfCommon, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().
Referenced by TUniChDb::ToCaseFolded().
{ for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++) { int c = src[TVecIdx(srcIdx)], i; if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { src[TVecIdx(srcIdx)] = cfTurkic[i]; continue; } if ((i = cfSimple.GetKeyId(c)) >= 0) { src[TVecIdx(srcIdx)] = cfSimple[i]; continue; } i = cfCommon.GetKeyId(c); if (i >= 0) src[TVecIdx(srcIdx)] = cfCommon[i]; } }
void TUniCaseFolding::Load | ( | TSIn & | SIn | ) | [inline] |
Definition at line 285 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Load(), and TSIn::LoadCs().
Referenced by TUniChDb::Load().
void TUniCaseFolding::LoadTxt | ( | const TStr & | fileName | ) |
Definition at line 509 of file unicode.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), cfCommon, cfFull, cfSimple, cfTurkic, Clr(), TStr::CStr(), FailR, TUniChDb::TUcdFileReader::GetNextLine(), IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TUniChDb::TUcdFileReader::Open(), TUniChDb::TUcdFileReader::ParseCodePoint(), and TUniChDb::TUcdFileReader::ParseCodePointList().
Referenced by TUniChDb::LoadTxt().
{ Clr(); TUniChDb::TUcdFileReader reader; reader.Open(fileName); TStrV fields; while (reader.GetNextLine(fields)) { int cp = reader.ParseCodePoint(fields[0]); const TStr status = fields[1], mapsTo = fields[2]; if (status == "C" || status == "S" || status == "T") { TIntH &dest = (status == "C" ? cfCommon : status == "S" ? cfSimple : cfTurkic); IAssert(! dest.IsKey(cp)); int cp2 = reader.ParseCodePoint(mapsTo); dest.AddDat(cp, cp2); } else if (status == "F") { TIntIntVH &dest = cfFull; IAssert(! dest.IsKey(cp)); TIntV cps; reader.ParseCodePointList(mapsTo, cps); IAssert(cps.Len() > 0); dest.AddDat(cp, cps); } else FailR(status.CStr()); } printf("TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n", fileName.CStr(), cfCommon.Len(), cfSimple.Len(), cfFull.Len(), cfTurkic.Len()); }
void TUniCaseFolding::Save | ( | TSOut & | SOut | ) | const [inline] |
Definition at line 286 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Save(), and TSOut::SaveCs().
Referenced by TUniChDb::Save().
{ cfCommon.Save(SOut); cfSimple.Save(SOut); cfFull.Save(SOut); cfTurkic.Save(SOut); SOut.SaveCs(); }
void TUniCaseFolding::Test | ( | const TIntV & | src, |
const TIntV & | expectedDest, | ||
const bool | full, | ||
const bool | turkic, | ||
FILE * | f | ||
) | [protected] |
Definition at line 535 of file unicode.cpp.
References Fold(), IAssert, and TVec< TVal >::Len().
Referenced by TUniChDb::Test().
{ fprintf(f, "TUniCaseFolding(%s%s): ", (full ? "full" : "simple"), (turkic ? ", turkic" : "")); for (int i = 0; i < src.Len(); i++) fprintf(f, " %04x", int(src[i])); TIntV dest; Fold(src, 0, src.Len(), dest, true, full, turkic); fprintf(f, "\n -> "); for (int i = 0; i < dest.Len(); i++) fprintf(f, " %04x", int(dest[i])); fprintf(f, "\n"); IAssert(dest.Len() == expectedDest.Len()); for (int i = 0; i < dest.Len(); i++) IAssert(dest[i] == expectedDest[i]); }
void TUniCaseFolding::Test | ( | ) |
Definition at line 553 of file unicode.cpp.
References anonymous_namespace{unicode.cpp}::VB.
{ FILE *f = stderr; TVectorBuilder VB; // simple Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf), false, false, f); // simple + turkic Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf), false, true, f); // full Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73), true, false, f); // full + turkic Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73), true, true, f); }
TIntH TUniCaseFolding::cfCommon [protected] |
TIntIntVH TUniCaseFolding::cfFull [protected] |
TIntH TUniCaseFolding::cfSimple [protected] |
TIntH TUniCaseFolding::cfTurkic [protected] |