SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // Spread-Sheet 00003 TStr& TSs::At(const int& X, const int& Y){ 00004 // Fail; 00005 if (Y>=CellStrVV.Len()){CellStrVV.Reserve(Y+1, Y+1);} 00006 if (X>=CellStrVV[Y]->Len()){CellStrVV[Y]->V.Reserve(X+1, X+1);} 00007 return CellStrVV[Y]->V[X]; 00008 } 00009 00010 void TSs::PutVal(const int& X, const int& Y, const TStr& Str){ 00011 if (Y>=CellStrVV.Len()){CellStrVV.Reserve(Y+1, Y+1);} 00012 if (X>=CellStrVV[Y]->Len()){CellStrVV[Y]->V.Reserve(X+1, X+1);} 00013 CellStrVV[Y]->V[X]=Str; 00014 } 00015 00016 TStr TSs::GetVal(const int& X, const int& Y) const { 00017 if ((0<=Y)&&(Y<CellStrVV.Len())){ 00018 if ((0<=X)&&(X<CellStrVV[Y]->Len())){ 00019 return CellStrVV[Y]->V[X]; 00020 } else { 00021 return TStr::GetNullStr(); 00022 } 00023 } else { 00024 return TStr::GetNullStr(); 00025 } 00026 } 00027 00028 int TSs::GetXLen() const { 00029 if (CellStrVV.Len()==0){ 00030 return 0; 00031 } else { 00032 int MxXLen=CellStrVV[0]->Len(); 00033 for (int Y=1; Y<CellStrVV.Len(); Y++){ 00034 MxXLen=TInt::GetMx(MxXLen, CellStrVV[Y]->Len());} 00035 return MxXLen; 00036 } 00037 } 00038 00039 int TSs::GetXLen(const int& Y) const { 00040 if ((0<=Y)&&(Y<CellStrVV.Len())){ 00041 return CellStrVV[Y]->Len(); 00042 } else { 00043 return 0; 00044 } 00045 } 00046 00047 int TSs::GetYLen() const { 00048 return CellStrVV.Len(); 00049 } 00050 00051 int TSs::SearchX(const int& Y, const TStr& Str) const { 00052 return CellStrVV[Y]->V.SearchForw(Str); 00053 } 00054 00055 int TSs::SearchY(const int& X, const TStr& Str) const { 00056 int YLen=GetYLen(); 00057 for (int Y=0; Y<YLen; Y++){ 00058 if (Str==GetVal(X, Y)){return Y;}} 00059 return -1; 00060 } 00061 00062 void TSs::DelX(const int& X){ 00063 int YLen=GetYLen(); 00064 for (int Y=0; Y<YLen; Y++){ 00065 CellStrVV[Y]->V.Del(X); 00066 } 00067 } 00068 00069 void TSs::DelY(const int& Y){ 00070 CellStrVV.Del(Y); 00071 } 00072 00073 int TSs::GetFldX(const TStr& FldNm, const TStr& NewFldNm, const int& Y) const { 00074 if (GetYLen()>Y){ 00075 int XLen=GetXLen(Y); 00076 for (int X=0; X<XLen; X++){ 00077 if (GetVal(X, Y).GetTrunc()==FldNm){ 00078 if (!NewFldNm.Empty()){GetVal(X, Y)=NewFldNm;} 00079 return X; 00080 } 00081 } 00082 return -1; 00083 } else { 00084 return -1; 00085 } 00086 } 00087 00088 int TSs::GetFldY(const TStr& FldNm, const TStr& NewFldNm, const int& X) const { 00089 for (int Y=0; Y<GetYLen(); Y++){ 00090 if (GetXLen(Y)>X){ 00091 if (GetVal(X, Y).GetTrunc()==FldNm){ 00092 if (!NewFldNm.Empty()){GetVal(X, Y)=NewFldNm;} 00093 return Y; 00094 } 00095 } 00096 } 00097 return -1; 00098 } 00099 00100 PSs TSs::LoadTxt( 00101 const TSsFmt& SsFmt, const TStr& FNm, 00102 const PNotify& Notify, const bool& IsExcelEoln, 00103 const int& MxY, const TIntV& AllowedColNV, const bool& IsQStr){ 00104 TNotify::OnNotify(Notify, ntInfo, TStr("Loading File ")+FNm+" ..."); 00105 PSIn SIn=TFIn::New(FNm); 00106 PSs Ss=TSs::New(); 00107 if (!SIn->Eof()){ 00108 int X=0; int Y=0; int PrevX=-1; int PrevY=-1; 00109 char Ch=SIn->GetCh(); TChA ChA; 00110 while (!SIn->Eof()){ 00111 // compose value 00112 ChA.Clr(); 00113 if (IsQStr&&(Ch=='"')){ 00114 // quoted string ('""' sequence means '"') 00115 Ch=SIn->GetCh(); 00116 forever { 00117 while ((!SIn->Eof())&&(Ch!='"')){ 00118 ChA+=Ch; Ch=SIn->GetCh();} 00119 if (Ch=='"'){ 00120 Ch=SIn->GetCh(); 00121 if (Ch=='"'){ChA+=Ch; Ch=SIn->GetCh();} 00122 else {break;} 00123 } 00124 } 00125 } else { 00126 if (SsFmt==ssfTabSep){ 00127 while ((!SIn->Eof())&&(Ch!='\t')&&(Ch!='\r')&&((Ch!='\n')||IsExcelEoln)){ 00128 ChA+=Ch; Ch=SIn->GetCh(); 00129 } 00130 } else 00131 if (SsFmt==ssfCommaSep){ 00132 while ((!SIn->Eof())&&(Ch!=',')&&(Ch!='\r')&&((Ch!='\n')||IsExcelEoln)){ 00133 ChA+=Ch; Ch=SIn->GetCh(); 00134 } 00135 } else 00136 if (SsFmt==ssfSemicolonSep){ 00137 while ((!SIn->Eof())&&(Ch!=';')&&(Ch!='\r')&&((Ch!='\n')||IsExcelEoln)){ 00138 ChA+=Ch; Ch=SIn->GetCh(); 00139 } 00140 } else 00141 if (SsFmt==ssfVBar){ 00142 while ((!SIn->Eof())&&(Ch!='|')&&(Ch!='\r')&&((Ch!='\n')||IsExcelEoln)){ 00143 ChA+=Ch; Ch=SIn->GetCh(); 00144 } 00145 } else 00146 if (SsFmt==ssfSpaceSep){ 00147 while ((!SIn->Eof())&&(Ch!=' ')&&(Ch!='\r')&&((Ch!='\n')||IsExcelEoln)){ 00148 ChA+=Ch; Ch=SIn->GetCh(); 00149 } 00150 } else { 00151 Fail; 00152 } 00153 } 00154 // add new line if neccessary 00155 if (PrevY!=Y){ 00156 if ((MxY!=-1)&&(Ss->CellStrVV.Len()==MxY)){break;} 00157 Ss->CellStrVV.Add(TStrVP::New()); PrevY=Y; 00158 int Recs=Ss->CellStrVV.Len(); 00159 if (Recs%1000==0){ 00160 TNotify::OnStatus(Notify, TStr::Fmt(" %d\r", Recs));} 00161 } 00162 // add value to spreadsheet 00163 if (AllowedColNV.Empty()||AllowedColNV.IsIn(X)){ 00164 Ss->CellStrVV[Y]->V.Add(ChA); 00165 } 00166 // process delimiters 00167 if (SIn->Eof()){ 00168 break; 00169 } else 00170 if ((SsFmt==ssfTabSep)&&(Ch=='\t')){ 00171 X++; Ch=SIn->GetCh(); 00172 } else 00173 if ((SsFmt==ssfCommaSep)&&(Ch==',')){ 00174 X++; Ch=SIn->GetCh(); 00175 } else 00176 if ((SsFmt==ssfSemicolonSep)&&(Ch==';')){ 00177 X++; Ch=SIn->GetCh(); 00178 } else 00179 if ((SsFmt==ssfVBar)&&(Ch=='|')){ 00180 X++; Ch=SIn->GetCh(); 00181 } else 00182 if ((SsFmt==ssfSpaceSep)&&(Ch==' ')){ 00183 X++; Ch=SIn->GetCh(); 00184 } else 00185 if (Ch=='\r'){ 00186 if ((PrevX!=-1)&&(X!=PrevX)){ 00187 TNotify::OnNotify(Notify, ntWarn, "Number of fields is not the same!");} 00188 PrevX=X; X=0; Y++; Ch=SIn->GetCh(); 00189 if ((Ch=='\n')&&(!SIn->Eof())){Ch=SIn->GetCh();} 00190 //if (Ss->CellStrVV.Len()%1000==0){Y--; break;} 00191 } else 00192 if (Ch=='\n'){ 00193 if ((PrevX!=-1)&&(X!=PrevX)){ 00194 TNotify::OnNotify(Notify, ntWarn, "Number of fields is not the same!");} 00195 PrevX=X; X=0; Y++; Ch=SIn->GetCh(); 00196 if ((Ch=='\r')&&(!SIn->Eof())){Ch=SIn->GetCh();} 00197 //if (Ss->CellStrVV.Len()%1000==0){Y--; break;} 00198 } else { 00199 Fail; 00200 } 00201 } 00202 } 00203 int Recs=Ss->CellStrVV.Len(); 00204 TNotify::OnNotify(Notify, ntInfo, TStr::Fmt(" %d records read.", Recs)); 00205 TNotify::OnNotify(Notify, ntInfo, "... Done."); 00206 return Ss; 00207 } 00208 00209 void TSs::SaveTxt(const TStr& FNm, const PNotify&) const { 00210 PSOut SOut=TFOut::New(FNm); 00211 for (int Y=0; Y<CellStrVV.Len(); Y++){ 00212 for (int X=0; X<CellStrVV[Y]->Len(); X++){ 00213 if (X>0){SOut->PutCh('\t');} 00214 TStr Str=CellStrVV[Y]->V[X]; 00215 TChA ChA(Str); 00216 for (int ChN=0; ChN<ChA.Len(); ChN++){ 00217 char Ch=ChA[ChN]; 00218 if ((Ch=='\t')||(Ch=='\r')||(Ch=='\n')){ 00219 ChA.PutCh(ChN, ' '); 00220 } 00221 } 00222 SOut->PutStr(ChA); 00223 } 00224 SOut->PutCh('\r'); SOut->PutCh('\n'); 00225 } 00226 } 00227 00228 void TSs::LoadTxtFldV( 00229 const TSsFmt& SsFmt, const PSIn& SIn, char& Ch, 00230 TStrV& FldValV, const bool& IsExcelEoln, const bool& IsQStr){ 00231 if (!SIn->Eof()){ 00232 FldValV.Clr(false); int X=0; 00233 if (Ch==TCh::NullCh){Ch=SIn->GetCh();} 00234 TChA ChA; 00235 while (!SIn->Eof()){ 00236 // compose value 00237 ChA.Clr(); 00238 if (IsQStr&&(Ch=='"')){ 00239 // quoted string ('""' sequence means '"') 00240 Ch=SIn->GetCh(); 00241 forever { 00242 while ((!SIn->Eof())&&(Ch!='"')){ 00243 ChA+=Ch; Ch=SIn->GetCh();} 00244 if (Ch=='"'){ 00245 Ch=SIn->GetCh(); 00246 if (Ch=='"'){ChA+=Ch; Ch=SIn->GetCh();} 00247 else {break;} 00248 } 00249 } 00250 } else { 00251 if (SsFmt==ssfTabSep){ 00252 while ((!SIn->Eof())&&(Ch!='\t')&&(Ch!='\r')&& 00253 ((Ch!='\n')||IsExcelEoln)){ 00254 ChA+=Ch; Ch=SIn->GetCh(); 00255 } 00256 if ((!ChA.Empty())&&(ChA.LastCh()=='\"')){ 00257 ChA.Pop();} 00258 } else 00259 if (SsFmt==ssfCommaSep){ 00260 while ((!SIn->Eof())&&(Ch!=',')&&(Ch!='\r')&& 00261 ((Ch!='\n')||IsExcelEoln)){ 00262 ChA+=Ch; Ch=SIn->GetCh(); 00263 } 00264 } else 00265 if (SsFmt==ssfSemicolonSep){ 00266 while ((!SIn->Eof())&&(Ch!=';')&&(Ch!='\r')&& 00267 ((Ch!='\n')||IsExcelEoln)){ 00268 ChA+=Ch; Ch=SIn->GetCh(); 00269 } 00270 } else 00271 if (SsFmt==ssfVBar){ 00272 while ((!SIn->Eof())&&(Ch!='|')&&(Ch!='\r')&& 00273 ((Ch!='\n')||IsExcelEoln)){ 00274 ChA+=Ch; Ch=SIn->GetCh(); 00275 } 00276 } else { 00277 Fail; 00278 } 00279 } 00280 // add value to spreadsheet 00281 ChA.Trunc(); 00282 FldValV.Add(ChA); 00283 // process delimiters 00284 if (SIn->Eof()){ 00285 break; 00286 } else 00287 if ((SsFmt==ssfTabSep)&&(Ch=='\t')){ 00288 X++; Ch=SIn->GetCh(); 00289 } else 00290 if ((SsFmt==ssfCommaSep)&&(Ch==',')){ 00291 X++; Ch=SIn->GetCh(); 00292 } else 00293 if ((SsFmt==ssfSemicolonSep)&&(Ch==';')){ 00294 X++; Ch=SIn->GetCh(); 00295 } else 00296 if ((SsFmt==ssfVBar)&&(Ch=='|')){ 00297 X++; Ch=SIn->GetCh(); 00298 } else 00299 if (Ch=='\r'){ 00300 Ch=SIn->GetCh(); 00301 if ((Ch=='\n')&&(!SIn->Eof())){Ch=SIn->GetCh();} 00302 break; 00303 } else 00304 if (Ch=='\n'){ 00305 X=0; Ch=SIn->GetCh(); 00306 if ((Ch=='\r')&&(!SIn->Eof())){Ch=SIn->GetCh();} 00307 break; 00308 } else { 00309 Fail; 00310 } 00311 } 00312 } 00313 } 00314 00315 TSsFmt TSs::GetSsFmtFromStr(const TStr& SsFmtNm){ 00316 TStr LcSsFmtNm=SsFmtNm.GetLc(); 00317 if (LcSsFmtNm=="tab"){return ssfTabSep;} 00318 else if (LcSsFmtNm=="comma"){return ssfCommaSep;} 00319 else if (LcSsFmtNm=="semicolon"){return ssfSemicolonSep;} 00320 else if (LcSsFmtNm=="vbar"){return ssfVBar;} 00321 else if (LcSsFmtNm=="space"){return ssfSpaceSep;} 00322 else if (LcSsFmtNm=="white"){return ssfWhiteSep;} 00323 else {return ssfUndef;} 00324 } 00325 00326 TStr TSs::GetStrFromSsFmt(const TSsFmt& SsFmt){ 00327 switch (SsFmt){ 00328 case ssfTabSep: return "tab"; 00329 case ssfCommaSep: return "comma"; 00330 case ssfSemicolonSep: return "semicolon"; 00331 case ssfVBar: return "vbar"; 00332 case ssfSpaceSep: return "space"; 00333 case ssfWhiteSep: return "white"; 00334 default: return "undef"; 00335 } 00336 } 00337 00338 TStr TSs::GetSsFmtNmVStr(){ 00339 TChA ChA; 00340 ChA+='('; 00341 ChA+="tab"; ChA+=", "; 00342 ChA+="comma"; ChA+=", "; 00343 ChA+="semicolon"; ChA+=", "; 00344 ChA+="space"; ChA+=", "; 00345 ChA+="white"; ChA+=")"; 00346 return ChA; 00347 } 00348 00350 // Fast-Spread-Sheet-Parser 00351 TSsParser::TSsParser(const TStr& FNm, const TSsFmt _SsFmt, const bool& _SkipLeadBlanks, const bool& _SkipCmt, const bool& _SkipEmptyFld) : SsFmt(_SsFmt), 00352 SkipLeadBlanks(_SkipLeadBlanks), SkipCmt(_SkipCmt), SkipEmptyFld(_SkipEmptyFld), LineCnt(0), /*Bf(NULL),*/ SplitCh('\t'), FldV(), FInPt(NULL) { 00353 if (TZipIn::IsZipExt(FNm.GetFExt())) { FInPt = TZipIn::New(FNm); } 00354 else { FInPt = TFIn::New(FNm); } 00355 //Bf = new char [BfLen]; 00356 switch(SsFmt) { 00357 case ssfTabSep : SplitCh = '\t'; break; 00358 case ssfCommaSep : SplitCh = ','; break; 00359 case ssfSemicolonSep : SplitCh = ';'; break; 00360 case ssfVBar : SplitCh = '|'; break; 00361 case ssfSpaceSep : SplitCh = ' '; break; 00362 case ssfWhiteSep: SplitCh = ' '; break; 00363 default: FailR("Unknown separator character."); 00364 } 00365 } 00366 00367 TSsParser::TSsParser(const TStr& FNm, const char& Separator, const bool& _SkipLeadBlanks, const bool& _SkipCmt, const bool& _SkipEmptyFld) : SsFmt(ssfSpaceSep), 00368 SkipLeadBlanks(_SkipLeadBlanks), SkipCmt(_SkipCmt), SkipEmptyFld(_SkipEmptyFld), LineCnt(0), /*Bf(NULL),*/ SplitCh('\t'), FldV(), FInPt(NULL) { 00369 if (TZipIn::IsZipExt(FNm.GetFExt())) { FInPt = TZipIn::New(FNm); } 00370 else { FInPt = TFIn::New(FNm); } 00371 SplitCh = Separator; 00372 } 00373 00374 TSsParser::~TSsParser() { 00375 //if (Bf != NULL) { delete [] Bf; } 00376 } 00377 00378 bool TSsParser::Next() { // split on SplitCh 00379 FldV.Clr(false); 00380 LineStr.Clr(); 00381 FldV.Clr(); 00382 LineCnt++; 00383 if (! FInPt->GetNextLn(LineStr)) { return false; } 00384 if (SkipCmt && LineStr.Len()>0 && LineStr[0]=='#') { return Next(); } 00385 00386 char* cur = LineStr.CStr(); 00387 if (SkipLeadBlanks) { // skip leadning blanks 00388 while (*cur && TCh::IsWs(*cur)) { cur++; } 00389 } 00390 char *last = cur; 00391 while (*cur) { 00392 if (SsFmt == ssfWhiteSep) { while (*cur && ! TCh::IsWs(*cur)) { cur++; } } 00393 else { while (*cur && *cur!=SplitCh) { cur++; } } 00394 if (*cur == 0) { break; } 00395 *cur = 0; cur++; 00396 FldV.Add(last); last = cur; 00397 if (SkipEmptyFld && strlen(FldV.Last())==0) { FldV.DelLast(); } // skip empty fields 00398 } 00399 FldV.Add(last); // add last field 00400 if (SkipEmptyFld && FldV.Empty()) { return Next(); } // skip empty lines 00401 return true; 00402 } 00403 00404 void TSsParser::ToLc() { 00405 for (int f = 0; f < FldV.Len(); f++) { 00406 for (char *c = FldV[f]; *c; c++) { 00407 *c = tolower(*c); } 00408 } 00409 } 00410 00411 bool TSsParser::GetInt(const int& FldN, int& Val) const { 00412 // parsing format {ws} [+/-] +{ddd} 00413 int _Val = -1; 00414 bool Minus=false; 00415 const char *c = GetFld(FldN); 00416 while (TCh::IsWs(*c)) { c++; } 00417 if (*c=='-') { Minus=true; c++; } 00418 if (! TCh::IsNum(*c)) { return false; } 00419 _Val = TCh::GetNum(*c); c++; 00420 while (TCh::IsNum(*c)){ 00421 _Val = 10 * _Val + TCh::GetNum(*c); 00422 c++; 00423 } 00424 if (Minus) { _Val = -_Val; } 00425 if (*c != 0) { return false; } 00426 Val = _Val; 00427 return true; 00428 } 00429 00430 bool TSsParser::GetFlt(const int& FldN, double& Val) const { 00431 // parsing format {ws} [+/-] +{d} ([.]{d}) ([E|e] [+/-] +{d}) 00432 const char *c = GetFld(FldN); 00433 while (TCh::IsWs(*c)) { c++; } 00434 if (*c=='+' || *c=='-') { c++; } 00435 if (! TCh::IsNum(*c) && *c!='.') { return false; } 00436 while (TCh::IsNum(*c)) { c++; } 00437 if (*c == '.') { 00438 c++; 00439 while (TCh::IsNum(*c)) { c++; } 00440 } 00441 if (*c=='e' || *c == 'E') { 00442 c++; 00443 if (*c == '+' || *c == '-' ) { c++; } 00444 if (! TCh::IsNum(*c)) { return false; } 00445 while (TCh::IsNum(*c)) { c++; } 00446 } 00447 if (*c != 0) { return false; } 00448 Val = atof(GetFld(FldN)); 00449 return true; 00450 } 00451 00452 const char* TSsParser::DumpStr() const { 00453 static TChA ChA(10*1024); 00454 ChA.Clr(); 00455 for (int i = 0; i < FldV.Len(); i++) { 00456 ChA += TStr::Fmt(" %d: '%s'\n", i, FldV[i]); 00457 } 00458 return ChA.CStr(); 00459 } 00460