|
SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <unicode.h>

Public Member Functions | |
| TUniCaseFolding () | |
| TUniCaseFolding (TSIn &SIn) | |
| void | Load (TSIn &SIn) |
| void | Save (TSOut &SOut) const |
| void | Clr () |
| void | LoadTxt (const TStr &fileName) |
| template<typename TSrcVec , typename TDestCh > | |
| void | Fold (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const |
| template<typename TSrcVec > | |
| void | FoldInPlace (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const |
| void | Test () |
Protected Types | |
| typedef TUniVecIdx | TVecIdx |
Protected Member Functions | |
| void | Test (const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f) |
Static Protected Member Functions | |
| template<typename TSrcDat , typename TDestDat > | |
| static void | AppendVector (const TVec< TSrcDat > &src, TVec< TDestDat > &dest) |
Protected Attributes | |
| TIntH | cfCommon |
| TIntH | cfSimple |
| TIntH | cfTurkic |
| TIntIntVH | cfFull |
Friends | |
| class | TUniChDb |
typedef TUniVecIdx TUniCaseFolding::TVecIdx [protected] |
| TUniCaseFolding::TUniCaseFolding | ( | ) | [inline] |
| TUniCaseFolding::TUniCaseFolding | ( | TSIn & | SIn | ) | [inline, explicit] |
| static void TUniCaseFolding::AppendVector | ( | const TVec< TSrcDat > & | src, |
| TVec< TDestDat > & | dest | ||
| ) | [inline, static, protected] |
Definition at line 277 of file unicode.h.
References TVec< TVal >::Add(), and TVec< TVal >::Len().
Referenced by Fold(), and TUniChDb::GetCaseConverted().


| void TUniCaseFolding::Clr | ( | ) | [inline] |
Definition at line 287 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::Clr().
Referenced by TUniChDb::Clr(), and LoadTxt().


| void TUniCaseFolding::Fold | ( | const TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| const bool | clrDest, | ||
| const bool | full, | ||
| const bool | turkic | ||
| ) | const [inline] |
Definition at line 292 of file unicode.h.
References TVec< TVal >::Add(), AppendVector(), cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().
Referenced by TUniChDb::GetCaseFolded(), and Test().
{
for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
{
int c = src[TVecIdx(srcIdx)], i; srcIdx++;
if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { dest.Add(cfTurkic[i]); continue; }
if (full && ((i = cfFull.GetKeyId(c)) >= 0)) { AppendVector(cfFull[i], dest); continue; }
if ((! full) && ((i = cfSimple.GetKeyId(c)) >= 0)) { dest.Add(cfSimple[i]); continue; }
i = cfCommon.GetKeyId(c); if (i >= 0) dest.Add(cfCommon[i]); else dest.Add(c);
}
}


| void TUniCaseFolding::FoldInPlace | ( | TSrcVec & | src, |
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| const bool | turkic | ||
| ) | const [inline] |
Definition at line 306 of file unicode.h.
References cfCommon, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().
Referenced by TUniChDb::ToCaseFolded().
{
for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
{
int c = src[TVecIdx(srcIdx)], i;
if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { src[TVecIdx(srcIdx)] = cfTurkic[i]; continue; }
if ((i = cfSimple.GetKeyId(c)) >= 0) { src[TVecIdx(srcIdx)] = cfSimple[i]; continue; }
i = cfCommon.GetKeyId(c); if (i >= 0) src[TVecIdx(srcIdx)] = cfCommon[i];
}
}


| void TUniCaseFolding::Load | ( | TSIn & | SIn | ) | [inline] |
Definition at line 285 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Load(), and TSIn::LoadCs().
Referenced by TUniChDb::Load().


| void TUniCaseFolding::LoadTxt | ( | const TStr & | fileName | ) |
Definition at line 509 of file unicode.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), cfCommon, cfFull, cfSimple, cfTurkic, Clr(), TStr::CStr(), FailR, TUniChDb::TUcdFileReader::GetNextLine(), IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TUniChDb::TUcdFileReader::Open(), TUniChDb::TUcdFileReader::ParseCodePoint(), and TUniChDb::TUcdFileReader::ParseCodePointList().
Referenced by TUniChDb::LoadTxt().
{
Clr();
TUniChDb::TUcdFileReader reader; reader.Open(fileName);
TStrV fields;
while (reader.GetNextLine(fields))
{
int cp = reader.ParseCodePoint(fields[0]);
const TStr status = fields[1], mapsTo = fields[2];
if (status == "C" || status == "S" || status == "T") {
TIntH &dest = (status == "C" ? cfCommon : status == "S" ? cfSimple : cfTurkic);
IAssert(! dest.IsKey(cp));
int cp2 = reader.ParseCodePoint(mapsTo);
dest.AddDat(cp, cp2); }
else if (status == "F") {
TIntIntVH &dest = cfFull;
IAssert(! dest.IsKey(cp));
TIntV cps; reader.ParseCodePointList(mapsTo, cps); IAssert(cps.Len() > 0);
dest.AddDat(cp, cps); }
else
FailR(status.CStr());
}
printf("TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n",
fileName.CStr(), cfCommon.Len(), cfSimple.Len(), cfFull.Len(), cfTurkic.Len());
}


| void TUniCaseFolding::Save | ( | TSOut & | SOut | ) | const [inline] |
Definition at line 286 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Save(), and TSOut::SaveCs().
Referenced by TUniChDb::Save().
{ cfCommon.Save(SOut); cfSimple.Save(SOut); cfFull.Save(SOut); cfTurkic.Save(SOut); SOut.SaveCs(); }


| void TUniCaseFolding::Test | ( | const TIntV & | src, |
| const TIntV & | expectedDest, | ||
| const bool | full, | ||
| const bool | turkic, | ||
| FILE * | f | ||
| ) | [protected] |
Definition at line 535 of file unicode.cpp.
References Fold(), IAssert, and TVec< TVal >::Len().
Referenced by TUniChDb::Test().
{
fprintf(f, "TUniCaseFolding(%s%s): ", (full ? "full" : "simple"), (turkic ? ", turkic" : ""));
for (int i = 0; i < src.Len(); i++) fprintf(f, " %04x", int(src[i]));
TIntV dest; Fold(src, 0, src.Len(), dest, true, full, turkic);
fprintf(f, "\n -> ");
for (int i = 0; i < dest.Len(); i++) fprintf(f, " %04x", int(dest[i]));
fprintf(f, "\n");
IAssert(dest.Len() == expectedDest.Len());
for (int i = 0; i < dest.Len(); i++) IAssert(dest[i] == expectedDest[i]);
}


| void TUniCaseFolding::Test | ( | ) |
Definition at line 553 of file unicode.cpp.
References anonymous_namespace{unicode.cpp}::VB.
{
FILE *f = stderr;
TVectorBuilder VB;
// simple
Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf), false, false, f);
// simple + turkic
Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf), false, true, f);
// full
Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73), true, false, f);
// full + turkic
Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73), true, true, f);
}
TIntH TUniCaseFolding::cfCommon [protected] |
TIntIntVH TUniCaseFolding::cfFull [protected] |
TIntH TUniCaseFolding::cfSimple [protected] |
TIntH TUniCaseFolding::cfTurkic [protected] |