SNAP Library , Developer Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <zipfl.h>
Public Member Functions | |
TZipIn (const TStr &FNm) | |
TZipIn (const TStr &FNm, bool &OpenedP) | |
~TZipIn () | |
bool | Eof () |
int | Len () const |
char | GetCh () |
char | PeekCh () |
int | GetBf (const void *LBf, const TSize &LBfL) |
uint64 | GetFLen () const |
uint64 | GetCurFPos () const |
Static Public Member Functions | |
static PSIn | New (const TStr &FNm) |
static PSIn | New (const TStr &FNm, bool &OpenedP) |
static bool | IsZipFNm (const TStr &FNm) |
Check whether the file extension of FNm is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2). | |
static bool | IsZipExt (const TStr &FNmExt) |
Check whether the file extension FNmExt is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2). | |
static TStr | GetCmd (const TStr &ZipFNm) |
Return a command-line string that is executed in order to decompress a file to standard output. | |
static uint64 | GetFLen (const TStr &ZipFNm) |
Return the uncompressed size (in bytes) of the compressed file ZipFNm. | |
Private Member Functions | |
void | FillBf () |
void | CreateZipProcess (const TStr &Cmd, const TStr &ZipFNm) |
TZipIn () | |
TZipIn (const TZipIn &) | |
TZipIn & | operator= (const TZipIn &) |
Static Private Member Functions | |
static void | FillFExtToCmdH () |
Private Attributes | |
FILE * | ZipStdoutRd |
FILE * | ZipStdoutWr |
uint64 | FLen |
uint64 | CurFPos |
char * | Bf |
int | BfC |
int | BfL |
Static Private Attributes | |
static TStrStrH | FExtToCmdH |
static const int | MxBfL = 32*1024 |
Compressed File Input Stream. The class reads from a compressed file without explicitly uncompressing it. This is eachieved by running external 7ZIP program which uncompresses to standard output, which is then piped to TZipFl. The class requires 7ZIP to be installed on the machine. Go to http://www.7-zip.org to install the software. 7z (7z.exe) is an executable and can decompress the following formats: .gz, .7z, .rar, .zip, .cab, .arj. bzip2. The class TZIpIn expects that '7z' ('7z.exe') is in the working path. Make sure you can execute '7z e -y -bd -so <FILENAME>' Note: You can only load .gz files of uncompressed size <2GB. If you load some other format (like .bz2 or rar) there is no such limitation. Note2: For 7z to work properly you need both the 7z executable and the directory 'Codecs'.
TZipIn::TZipIn | ( | ) | [private] |
TZipIn::TZipIn | ( | const TZipIn & | ) | [private] |
TZipIn::TZipIn | ( | const TStr & | FNm | ) |
Definition at line 53 of file zipfl.cpp.
References Bf, BfC, BfL, CreateZipProcess(), TStr::CStr(), EAssertR, TStr::Empty(), TFile::Exists(), FillBf(), FLen, TStr::Fmt(), GetCmd(), GetFLen(), MxBfL, ZipStdoutRd, and ZipStdoutWr.
: TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL), FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) { EAssertR(! FNm.Empty(), "Empty file-name."); EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr()); FLen = TZipIn::GetFLen(FNm); if (FLen == 0) { return; } // empty file #ifdef GLib_WIN // create pipes SECURITY_ATTRIBUTES saAttr; saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDOUT. const int PipeBufferSz = 32*1024; EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed"); // Ensure the read handle to the pipe for STDOUT is not inherited. SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); #else // no implementation needed #endif CreateZipProcess(GetCmd(FNm), FNm); Bf = new char[MxBfL]; BfC = BfL=-1; FillBf(); }
TZipIn::TZipIn | ( | const TStr & | FNm, |
bool & | OpenedP | ||
) |
Definition at line 78 of file zipfl.cpp.
References Bf, BfC, BfL, CreateZipProcess(), EAssertR, TStr::Empty(), TFile::Exists(), FillBf(), FLen, GetCmd(), TStr::GetFExt(), GetFLen(), MxBfL, ZipStdoutRd, and ZipStdoutWr.
: TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL), FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) { EAssertR(! FNm.Empty(), "Empty file-name."); FLen = TZipIn::GetFLen(FNm); OpenedP = TFile::Exists(FNm); if (OpenedP) { #ifdef GLib_WIN SECURITY_ATTRIBUTES saAttr; saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDOUT. EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed"); // Ensure the read handle to the pipe for STDOUT is not inherited. SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); #else // no implementation needed #endif CreateZipProcess(GetCmd(FNm.GetFExt()), FNm); Bf = new char[MxBfL]; BfC = BfL=-1; FillBf(); } }
TZipIn::~TZipIn | ( | ) |
Definition at line 110 of file zipfl.cpp.
References Bf, EAssertR, ZipStdoutRd, and ZipStdoutWr.
{ #ifdef GLib_WIN if (ZipStdoutRd != NULL) { EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); } if (ZipStdoutWr != NULL) { EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); } #else if (ZipStdoutRd != NULL) { EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); } #endif if (Bf != NULL) { delete[] Bf; } }
void TZipIn::CreateZipProcess | ( | const TStr & | Cmd, |
const TStr & | ZipFNm | ||
) | [private] |
Definition at line 6 of file zipfl.cpp.
References TStr::CStr(), EAssertR, TStr::Fmt(), ZipStdoutRd, and ZipStdoutWr.
Referenced by TZipIn().
{ const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr()); #ifdef GLib_WIN PROCESS_INFORMATION piProcInfo; STARTUPINFO siStartInfo; ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.hStdOutput = ZipStdoutWr; siStartInfo.dwFlags |= STARTF_USESTDHANDLES; // Create the child process. const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(), // command line NULL, // process security attributes NULL, // primary thread security attributes TRUE, // handles are inherited 0, // creation flags NULL, // use parent's environment NULL, // use parent's current directory &siStartInfo, // STARTUPINFO pointer &piProcInfo); // receives PROCESS_INFORMATION EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); CloseHandle(piProcInfo.hProcess); CloseHandle(piProcInfo.hThread); #else ZipStdoutRd = popen(CmdLine.CStr(), "r"); EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); #endif }
bool TZipIn::Eof | ( | ) | [inline, virtual] |
void TZipIn::FillBf | ( | ) | [private] |
Definition at line 36 of file zipfl.cpp.
References Bf, BfC, BfL, CurFPos, EAssert, EAssertR, FLen, TSBase::GetSNm(), MxBfL, and ZipStdoutRd.
Referenced by GetBf(), GetCh(), PeekCh(), and TZipIn().
{ EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached."); EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'."); #ifdef GLib_WIN // Read output from the child process DWORD BytesRead; EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0); #else size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd); EAssert(BytesRead != 0); #endif BfL = (int) BytesRead; CurFPos += BytesRead; EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'."); BfC = 0; }
void TZipIn::FillFExtToCmdH | ( | ) | [static, private] |
Definition at line 141 of file zipfl.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), THash< TKey, TDat, THashFunc >::Empty(), and FExtToCmdH.
Referenced by GetCmd(), and IsZipExt().
{ // 7za decompress: "e -y -bd -so"; #ifdef GLib_WIN const char* ZipCmd = "7z.exe e -y -bd -so"; #else const char* ZipCmd = "7za e -y -bd -so"; #endif if (FExtToCmdH.Empty()) { FExtToCmdH.AddDat(".gz", ZipCmd); FExtToCmdH.AddDat(".7z", ZipCmd); FExtToCmdH.AddDat(".rar", ZipCmd); FExtToCmdH.AddDat(".zip", ZipCmd); FExtToCmdH.AddDat(".cab", ZipCmd); FExtToCmdH.AddDat(".arj", ZipCmd); FExtToCmdH.AddDat(".bzip2", ZipCmd); FExtToCmdH.AddDat(".bz2", ZipCmd); } }
int TZipIn::GetBf | ( | const void * | LBf, |
const TSize & | LBfL | ||
) | [virtual] |
Implements TSIn.
Definition at line 123 of file zipfl.cpp.
References Bf, BfC, BfL, and FillBf().
{ int LBfS=0; if (TSize(BfC+LBfL)>TSize(BfL)){ for (TSize LBfC=0; LBfC<LBfL; LBfC++){ if (BfC==BfL){FillBf();} LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];} } else { for (TSize LBfC=0; LBfC<LBfL; LBfC++){ LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);} } return LBfS; }
char TZipIn::GetCh | ( | ) | [inline, virtual] |
TStr TZipIn::GetCmd | ( | const TStr & | ZipFNm | ) | [static] |
Return a command-line string that is executed in order to decompress a file to standard output.
Definition at line 160 of file zipfl.cpp.
References TStr::CStr(), EAssertR, THash< TKey, TDat, THashFunc >::Empty(), FExtToCmdH, FillFExtToCmdH(), TStr::Fmt(), THash< TKey, TDat, THashFunc >::GetDat(), TStr::GetFExt(), TStr::GetLc(), and THash< TKey, TDat, THashFunc >::IsKey().
Referenced by TZipIn().
{ if (FExtToCmdH.Empty()) FillFExtToCmdH(); const TStr Ext = ZipFNm.GetFExt().GetLc(); EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr())); return FExtToCmdH.GetDat(Ext); }
uint64 TZipIn::GetCurFPos | ( | ) | const [inline] |
uint64 TZipIn::GetFLen | ( | ) | const [inline] |
uint64 TZipIn::GetFLen | ( | const TStr & | ZipFNm | ) | [static] |
Return the uncompressed size (in bytes) of the compressed file ZipFNm.
Definition at line 167 of file zipfl.cpp.
References Bf, BfC, BfL, TStr::CStr(), EAssert, EAssertR, TStr::Fmt(), IAssert, TVec< TVal >::Len(), MxBfL, SaveToErrLog(), TStr::SplitOnWs(), WrNotify(), ZipStdoutRd, and ZipStdoutWr.
{ #ifdef GLib_WIN HANDLE ZipStdoutRd, ZipStdoutWr; // create pipes SECURITY_ATTRIBUTES saAttr; saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDOUT. const int PipeBufferSz = 32*1024; EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed"); // Ensure the read handle to the pipe for STDOUT is not inherited. SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); //CreateZipProcess(GetCmd(FNm), FNm); { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr()); PROCESS_INFORMATION piProcInfo; STARTUPINFO siStartInfo; ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.hStdOutput = ZipStdoutWr; siStartInfo.dwFlags |= STARTF_USESTDHANDLES; // Create the child process. const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(), NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo); EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); CloseHandle(piProcInfo.hProcess); CloseHandle(piProcInfo.hThread); } #else const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr()); FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r"); EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); #endif // Read output from the child process const int BfSz = 32*1024; char* Bf = new char [BfSz]; int BfC=0, BfL=0; memset(Bf, 0, BfSz); #ifdef GLib_WIN DWORD BytesRead; EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0); #else size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd); EAssert(BytesRead != 0); EAssert(pclose(ZipStdoutRd) != -1); #endif BfL = (int) BytesRead; IAssert((BfC!=0)||(BfL!=0)); BfC = 0; Bf[BfL] = 0; // find file lenght TStr Str(Bf); delete [] Bf; TStrV StrV; Str.SplitOnWs(StrV); int n = StrV.Len()-1; while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; } if (n-7 <= 0) { WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr()); SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr()); return 0; } return StrV[n-7].GetInt64(); }
bool TZipIn::IsZipExt | ( | const TStr & | FNmExt | ) | [static] |
Check whether the file extension FNmExt is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).
Definition at line 136 of file zipfl.cpp.
References THash< TKey, TDat, THashFunc >::Empty(), FExtToCmdH, FillFExtToCmdH(), and THash< TKey, TDat, THashFunc >::IsKey().
Referenced by IsZipFNm(), and TSsParser::TSsParser().
{ if (FExtToCmdH.Empty()) FillFExtToCmdH(); return FExtToCmdH.IsKey(FNmExt); }
static bool TZipIn::IsZipFNm | ( | const TStr & | FNm | ) | [inline, static] |
Check whether the file extension of FNm is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).
Definition at line 49 of file zipfl.h.
References TStr::GetFExt(), and IsZipExt().
Referenced by TSnap::LoadDyNet(), and TSnap::LoadDyNetGraphV().
int TZipIn::Len | ( | ) | const [inline, virtual] |
PSIn TZipIn::New | ( | const TStr & | FNm | ) | [static] |
Definition at line 102 of file zipfl.cpp.
References TZipIn().
Referenced by TSnap::LoadDyNet(), and TSnap::LoadDyNetGraphV().
PSIn TZipIn::New | ( | const TStr & | FNm, |
bool & | OpenedP | ||
) | [static] |
char TZipIn::PeekCh | ( | ) | [inline, virtual] |
char* TZipIn::Bf [private] |
int TZipIn::BfC [private] |
int TZipIn::BfL [private] |
uint64 TZipIn::CurFPos [private] |
TStrStrH TZipIn::FExtToCmdH [static, private] |
Definition at line 14 of file zipfl.h.
Referenced by FillFExtToCmdH(), GetCmd(), and IsZipExt().
uint64 TZipIn::FLen [private] |
const int TZipIn::MxBfL = 32*1024 [static, private] |
FILE* TZipIn::ZipStdoutRd [private] |
FILE * TZipIn::ZipStdoutWr [private] |