| 
    SNAP Library 2.0, Developer Reference
    2013-05-13 16:33:57
    
   SNAP, a general purpose, high performance system for analysis and manipulation of large networks 
   | 
  
  
  
 
#include <unicode.h>

Public Member Functions | |
| TUniCaseFolding () | |
| TUniCaseFolding (TSIn &SIn) | |
| void | Load (TSIn &SIn) | 
| void | Save (TSOut &SOut) const | 
| void | Clr () | 
| void | LoadTxt (const TStr &fileName) | 
| template<typename TSrcVec , typename TDestCh > | |
| void | Fold (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const | 
| template<typename TSrcVec > | |
| void | FoldInPlace (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const | 
| void | Test () | 
Protected Types | |
| typedef TUniVecIdx | TVecIdx | 
Protected Member Functions | |
| void | Test (const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f) | 
Static Protected Member Functions | |
| template<typename TSrcDat , typename TDestDat > | |
| static void | AppendVector (const TVec< TSrcDat > &src, TVec< TDestDat > &dest) | 
Protected Attributes | |
| TIntH | cfCommon | 
| TIntH | cfSimple | 
| TIntH | cfTurkic | 
| TIntIntVH | cfFull | 
Friends | |
| class | TUniChDb | 
typedef TUniVecIdx TUniCaseFolding::TVecIdx [protected] | 
        
| TUniCaseFolding::TUniCaseFolding | ( | ) |  [inline] | 
        
| TUniCaseFolding::TUniCaseFolding | ( | TSIn & | SIn | ) |  [inline, explicit] | 
        
| static void TUniCaseFolding::AppendVector | ( | const TVec< TSrcDat > & | src, | 
| TVec< TDestDat > & | dest | ||
| ) |  [inline, static, protected] | 
        
Definition at line 278 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), and TVec< TVal, TSizeTy >::Len().
Referenced by Fold(), and TUniChDb::GetCaseConverted().


| void TUniCaseFolding::Clr | ( | ) |  [inline] | 
        
Definition at line 288 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::Clr().
Referenced by TUniChDb::Clr(), and LoadTxt().


| void TUniCaseFolding::Fold | ( | const TSrcVec & | src, | 
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| TVec< TDestCh > & | dest, | ||
| const bool | clrDest, | ||
| const bool | full, | ||
| const bool | turkic | ||
| ) |  const [inline] | 
        
Definition at line 293 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), AppendVector(), cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().
Referenced by TUniChDb::GetCaseFolded(), and Test().
        {
                for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
                {
                        int c = src[TVecIdx(srcIdx)], i; srcIdx++;
                        if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { dest.Add(cfTurkic[i]); continue; }
                        if (full && ((i = cfFull.GetKeyId(c)) >= 0)) { AppendVector(cfFull[i], dest); continue; }
                        if ((! full) && ((i = cfSimple.GetKeyId(c)) >= 0)) { dest.Add(cfSimple[i]); continue; }
                        i = cfCommon.GetKeyId(c); if (i >= 0) dest.Add(cfCommon[i]); else dest.Add(c);
                }
        }


| void TUniCaseFolding::FoldInPlace | ( | TSrcVec & | src, | 
| size_t | srcIdx, | ||
| const size_t | srcCount, | ||
| const bool | turkic | ||
| ) |  const [inline] | 
        
Definition at line 307 of file unicode.h.
References cfCommon, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().
Referenced by TUniChDb::ToCaseFolded().
        {
                for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
                {
                        int c = src[TVecIdx(srcIdx)], i;
                        if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { src[TVecIdx(srcIdx)] = cfTurkic[i]; continue; }
                        if ((i = cfSimple.GetKeyId(c)) >= 0) { src[TVecIdx(srcIdx)] = cfSimple[i]; continue; }
                        i = cfCommon.GetKeyId(c); if (i >= 0) src[TVecIdx(srcIdx)] = cfCommon[i];
                }
        }


| void TUniCaseFolding::Load | ( | TSIn & | SIn | ) |  [inline] | 
        
Definition at line 286 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Load(), and TSIn::LoadCs().
Referenced by TUniChDb::Load().


| void TUniCaseFolding::LoadTxt | ( | const TStr & | fileName | ) | 
Definition at line 509 of file unicode.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), cfCommon, cfFull, cfSimple, cfTurkic, Clr(), TStr::CStr(), FailR, TUniChDb::TUcdFileReader::GetNextLine(), IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TUniChDb::TUcdFileReader::Open(), TUniChDb::TUcdFileReader::ParseCodePoint(), and TUniChDb::TUcdFileReader::ParseCodePointList().
Referenced by TUniChDb::LoadTxt().
{
        Clr();
        TUniChDb::TUcdFileReader reader; reader.Open(fileName);
        TStrV fields;
        while (reader.GetNextLine(fields))
        {
                int cp = reader.ParseCodePoint(fields[0]);
                const TStr status = fields[1], mapsTo = fields[2];
                if (status == "C" || status == "S" || status == "T") {
                        TIntH &dest = (status == "C" ? cfCommon : status == "S" ? cfSimple : cfTurkic);
                        IAssert(! dest.IsKey(cp));
                        int cp2 = reader.ParseCodePoint(mapsTo);
                        dest.AddDat(cp, cp2); }
                else if (status == "F") {
                        TIntIntVH &dest = cfFull;
                        IAssert(! dest.IsKey(cp));
                        TIntV cps; reader.ParseCodePointList(mapsTo, cps); IAssert(cps.Len() > 0);
                        dest.AddDat(cp, cps); }
                else
                        FailR(status.CStr());
        }
        printf("TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n",
                fileName.CStr(), cfCommon.Len(), cfSimple.Len(), cfFull.Len(), cfTurkic.Len());
}


| void TUniCaseFolding::Save | ( | TSOut & | SOut | ) |  const [inline] | 
        
Definition at line 287 of file unicode.h.
References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Save(), and TSOut::SaveCs().
Referenced by TUniChDb::Save().
{ cfCommon.Save(SOut); cfSimple.Save(SOut); cfFull.Save(SOut); cfTurkic.Save(SOut); SOut.SaveCs(); }


| void TUniCaseFolding::Test | ( | const TIntV & | src, | 
| const TIntV & | expectedDest, | ||
| const bool | full, | ||
| const bool | turkic, | ||
| FILE * | f | ||
| ) |  [protected] | 
        
Definition at line 535 of file unicode.cpp.
References Fold(), IAssert, and TVec< TVal, TSizeTy >::Len().
Referenced by TUniChDb::Test().
{
        fprintf(f, "TUniCaseFolding(%s%s): ", (full ? "full" : "simple"), (turkic ? ", turkic" : ""));
        for (int i = 0; i < src.Len(); i++) fprintf(f, " %04x", int(src[i]));
        TIntV dest; Fold(src, 0, src.Len(), dest, true, full, turkic);
        fprintf(f, "\n  -> ");
        for (int i = 0; i < dest.Len(); i++) fprintf(f, " %04x", int(dest[i]));
        fprintf(f, "\n");
        IAssert(dest.Len() == expectedDest.Len());
        for (int i = 0; i < dest.Len(); i++) IAssert(dest[i] == expectedDest[i]);
}


| void TUniCaseFolding::Test | ( | ) | 
Definition at line 553 of file unicode.cpp.
References anonymous_namespace{unicode.cpp}::VB.
{
        FILE *f = stderr;
        TVectorBuilder VB;
        // simple
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf), false, false, f);
        // simple + turkic
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf), false, true, f);
        // full
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73), true, false, f);
        // full + turkic
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73), true, true, f);
}
TIntH TUniCaseFolding::cfCommon [protected] | 
        
TIntIntVH TUniCaseFolding::cfFull [protected] | 
        
TIntH TUniCaseFolding::cfSimple [protected] | 
        
TIntH TUniCaseFolding::cfTurkic [protected] |