2013-01-31 36 views
3

我想從[http://www.zlib.net/]使用minzip包裝來壓縮文件夾。只要文件名是英文的,它就可以正常工作。有沒有人試圖修改minizip來支持unicode?如何將minizip包裝轉換爲unicode?

修改後的代碼張貼如下。問題在於這個函數,第二個參數以const char *作爲輸入。當我進行轉換時,它會丟失數據和文件名稱不一樣。

例如:中文 - 統一碼.txt變成中文-t +ƒS+Çtáü.txt裏面的zip。

err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi, 
             NULL,0,NULL,0,NULL /* comment*/, 
             (opt_compress_level != 0) ? Z_DEFLATED : 0, 
             opt_compress_level,0, 
             /* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */ 
             -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, 
             password,crcFile, zip64); 








minizip.c 
Version 1.1, February 14h, 2010 
sample part of the MiniZip project - (http://www.winimage.com/zLibDll/minizip.html) 

Copyright (C) 1998-2010 Gilles Vollant (minizip) (http://www.winimage.com/zLibDll/minizip.html) 

Modifications of Unzip for Zip64 
Copyright (C) 2007-2008 Even Rouault 

Modifications for Zip64 support on both zip and unzip 
Copyright (C) 2009-2010 Mathias Svensson (http://result42.com) 
*/ 


#if (!defined(_WIN32)) && (!defined(WIN32)) && (!defined(__APPLE__)) 
#ifndef __USE_FILE_OFFSET64 
#define __USE_FILE_OFFSET64 
#endif 
#ifndef __USE_LARGEFILE64 
#define __USE_LARGEFILE64 
#endif 
#ifndef _LARGEFILE64_SOURCE 
#define _LARGEFILE64_SOURCE 
#endif 
#ifndef _FILE_OFFSET_BIT 
#define _FILE_OFFSET_BIT 64 
#endif 
#endif 

#ifdef __APPLE__ 
// In darwin and perhaps other BSD variants off_t is a 64 bit value, hence no need for specific 64 bit functions 
#define FOPEN_FUNC(filename, mode) fopen(filename, mode) 
#define FTELLO_FUNC(stream) ftello(stream) 
#define FSEEKO_FUNC(stream, offset, origin) fseeko(stream, offset, origin) 
#else 
#define FOPEN_FUNC(filename, mode) fopen64(filename, mode) 
#define FTELLO_FUNC(stream) ftello64(stream) 
#define FSEEKO_FUNC(stream, offset, origin) fseeko64(stream, offset, origin) 
#endif 

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <time.h> 
#include <errno.h> 
#include <fcntl.h> 
#ifdef _WIN32 
# include <direct.h> 
# include <io.h> 
#define GetCurrentDir _getcwd 
#else 
# include <unistd.h> 
# include <utime.h> 
# include <sys/types.h> 
# include <sys/stat.h> 
#endif 

#include "zip.h" 
#include "Shlwapi.h" 

#ifdef _WIN32 
#define USEWIN32IOAPI 
#include "iowin32.h" 
#endif 

#include <windows.h> 
#include <string> 
#include <iostream> 
#include <list> 
#include <fstream> 
#include <sstream> 
#include <set> 
using namespace std; 

#define WRITEBUFFERSIZE (16384) 
#define MAXFILENAME (256) 

#ifdef _WIN32 
uLong filetime(
    wchar_t *f,    /* name of file to get info on */ 
    tm_zip *tmzip,    /* return value: access, modific. and creation times */ 
    uLong *dt)   /* dostime */ 
{ 
    int ret = 0; 
    { 
     FILETIME ftLocal; 
     HANDLE hFind; 
     _WIN32_FIND_DATAW ff32; 

     hFind = FindFirstFileW(f,&ff32); 
     if (hFind != INVALID_HANDLE_VALUE) 
     { 
      FileTimeToLocalFileTime(&(ff32.ftLastWriteTime),&ftLocal); 
      FileTimeToDosDateTime(&ftLocal,((LPWORD)dt)+1,((LPWORD)dt)+0); 
      FindClose(hFind); 
      ret = 1; 
     } 
    } 
    return ret; 
} 
#else 
#ifdef unix || __APPLE__ 
uLong filetime(f, tmzip, dt) 
    char *f;    /* name of file to get info on */ 
tm_zip *tmzip;   /* return value: access, modific. and creation times */ 
uLong *dt;    /* dostime */ 
{ 
    int ret=0; 
    struct stat s;  /* results of stat() */ 
    struct tm* filedate; 
    time_t tm_t=0; 

    if (strcmp(f,"-")!=0) 
    { 
     char name[MAXFILENAME+1]; 
     int len = strlen(f); 
     if (len > MAXFILENAME) 
      len = MAXFILENAME; 

     strncpy(name, f,MAXFILENAME-1); 
     /* strncpy doesnt append the trailing NULL, of the string is too long. */ 
     name[ MAXFILENAME ] = '\0'; 

     if (name[len - 1] == '/') 
      name[len - 1] = '\0'; 
     /* not all systems allow stat'ing a file with/appended */ 
     if (stat(name,&s)==0) 
     { 
      tm_t = s.st_mtime; 
      ret = 1; 
     } 
    } 
    filedate = localtime(&tm_t); 

    tmzip->tm_sec = filedate->tm_sec; 
    tmzip->tm_min = filedate->tm_min; 
    tmzip->tm_hour = filedate->tm_hour; 
    tmzip->tm_mday = filedate->tm_mday; 
    tmzip->tm_mon = filedate->tm_mon ; 
    tmzip->tm_year = filedate->tm_year; 

    return ret; 
} 
#else 
uLong filetime(f, tmzip, dt) 
    char *f;    /* name of file to get info on */ 
tm_zip *tmzip;    /* return value: access, modific. and creation times */ 
uLong *dt;    /* dostime */ 
{ 
    return 0; 
} 
#endif 
#endif 

void addFileToList(list<wstring>& fileList, const wstring& directory, const set<wstring>& excludeFilterSet, const wstring& fileName) 
{ 
    wstring fileExtension = fileName.substr(fileName.find_last_of(L".") + 1); 
    if (!fileExtension.empty()) 
    { 
     if (excludeFilterSet.find(fileExtension) != excludeFilterSet.end()) return; 
    } 

    fileList.push_back(directory + fileName); 
} 

void GetFileListing(list<wstring>& fileList, wstring directory,const set<wstring>& excludeFilterSet,bool recursively=true) 
{ 
    directory = directory + L"\\"; 
    wstring filter = directory + L"*"; 

    _WIN32_FIND_DATAW FindFileData; 
    HANDLE hFind = FindFirstFileW(filter.c_str(), &FindFileData); 

    if (hFind == INVALID_HANDLE_VALUE) 
    { 
     DWORD dwError = GetLastError(); 
     if (dwError != ERROR_FILE_NOT_FOUND) 
     { 
      //cout << "Invalid file handle for filter " << filter << ". Error is " << GetLastError() << endl; 
     } 
     return; 
    } 

    do 
    { 
     if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) 
     { 
      if ((recursively) && (wcscmp(FindFileData.cFileName, L".") != 0) && (wcscmp(FindFileData.cFileName, L"..") != 0)) 
      { 
       GetFileListing(fileList, directory + FindFileData.cFileName, excludeFilterSet); 
      } 
     } 
     else 
     { 
      addFileToList(fileList, directory, excludeFilterSet,FindFileData.cFileName); 
     } 
    } while (FindNextFileW(hFind, &FindFileData) != 0); 

    DWORD dwError = GetLastError(); 
    FindClose(hFind); 

    if (dwError != ERROR_NO_MORE_FILES) 
    { 
     //cout << "FindNextFile error. Error is "<< dwError << endl; 
    } 
} 


int check_exist_file(wchar_t* filename) 
{ 
    FILE* ftestexist; 
    int ret = 1; 
    //ftestexist = FOPEN_FUNC(filename,"rb"); 
    ftestexist = _wfopen(filename,L"rb"); 
    if (ftestexist==NULL) 
     ret = 0; 
    else 
     fclose(ftestexist); 
    return ret; 
} 

/* calculate the CRC32 of a file, 
because to encrypt a file, we need known the CRC32 of the file before */ 
int getFileCrc(const wchar_t * filenameinzip,void*buf,unsigned long size_buf,unsigned long* result_crc) 
{ 
    unsigned long calculate_crc=0; 
    int err=ZIP_OK; 
    //FILE * fin = FOPEN_FUNC(filenameinzip,"rb"); 
    FILE * fin = _wfopen(filenameinzip,L"rb"); 

    unsigned long size_read = 0; 
    unsigned long total_read = 0; 
    if (fin==NULL) 
    { 
     err = ZIP_ERRNO; 
    } 

    if (err == ZIP_OK) 
     do 
     { 
      err = ZIP_OK; 
      size_read = (int)fread(buf,1,size_buf,fin); 
      if (size_read < size_buf) 
       if (feof(fin)==0) 
       { 
        printf("error in reading %s\n",filenameinzip); 
        err = ZIP_ERRNO; 
       } 

       if (size_read>0) 
        calculate_crc = crc32(calculate_crc,(const Bytef *)buf,size_read); 
       total_read += size_read; 

     } while ((err == ZIP_OK) && (size_read>0)); 

     if (fin) 
      fclose(fin); 

     *result_crc=calculate_crc; 
     printf("file %s crc %lx\n", filenameinzip, calculate_crc); 
     return err; 
} 

int isLargeFile(const wchar_t * filename) 
{ 
    int largeFile = 0; 
    ZPOS64_T pos = 0; 
    //FILE* pFile = FOPEN_FUNC(filename, "rb"); 
    FILE* pFile = _wfopen(filename, L"rb"); 

    if(pFile != NULL) 
    { 
     int n = FSEEKO_FUNC(pFile, 0, SEEK_END); 
     pos = FTELLO_FUNC(pFile); 

     printf("File : %s is %lld bytes\n", filename, pos); 

     if(pos >= 0xffffffff) 
      largeFile = 1; 

     fclose(pFile); 
    } 

    return largeFile; 
} 

void split(const wstring& text, wchar_t delimiter,set<wstring>& result) 
{ 
    wstring::size_type start = 0; 
    wstring::size_type end = text.find(delimiter, start); 
    wstring token; 

    while(end != wstring::npos) 
    { 
     token = text.substr(start, end - start); 
     token.erase(0,2); 
     result.insert(token); 
     start = end + 1; 
     end = text.find(delimiter, start); 
    } 

    // Add the last string 
    token = text.substr(start); 
    token = token.erase(0,2); 
    result.insert(token); 
} 

//Do not call me. 
long getUTF8size(const wchar_t *string){ 
    if (!string) 
     return 0; 
    long res=0; 
    for (;*string;string++){ 
     if (*string<0x80) 
      res++; 
     else if (*string<0x800) 
      res+=2; 
     else 
      res+=3; 
    } 
    return res; 
} 

char *WChar_to_UTF8(const wchar_t *string){ 
    long fSize=getUTF8size(string); 
    char *res=new char[fSize+1]; 
    res[fSize]=0; 
    if (!string) 
     return res; 
    long b=0; 
    for (;*string;string++,b++){ 
     if (*string<0x80) 
      res[b]=(char)*string; 
     else if (*string<0x800){ 
      res[b++]=(*string>>6)|192; 
      res[b]=*string&63|128; 
     }else{ 
      res[b++]=(*string>>12)|224; 
      res[b++]=((*string&4095)>>6)|128; 
      res[b]=*string&63|128; 
     } 
    } 
    return res; 
} 


std::string utf8_encode(const std::wstring &wstr) 
{ 
    int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); 
    std::string strTo(size_needed, 0); 
    WideCharToMultiByte     (CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); 
    return strTo; 
} 


wstring zipper(const wstring& directoryPath, const wstring& strExcludeFilter, wstring & zipFileName) 
{ 
    int opt_overwrite=0,opt_compress_level=Z_BEST_COMPRESSION,opt_exclude_path=0,err=0,size_buf=0; 
    void* buf=NULL; 
    const char* password=NULL; 
    list<wstring> fileList; 
    DWORD dwRet; 

    wchar_t cCurrentPath[MAX_PATH]; 
    dwRet = GetCurrentDirectoryW(MAX_PATH, cCurrentPath); 
    if(dwRet == 0) 
    { 
     return wstring(); 
    } 

    // Change the directory to the current folder 
    _wchdir(directoryPath.c_str()); 
    set<wstring> excludeFilterSet; 
    split(strExcludeFilter,'|',excludeFilterSet); 

    GetFileListing(fileList, directoryPath,excludeFilterSet); 
    opt_overwrite = 1; 

    size_buf = WRITEBUFFERSIZE; 
    buf = (void*)malloc(size_buf); 
    if (buf==NULL) return wstring(); 

    wchar_t tempDirPath[MAX_PATH]; 
    dwRet = GetTempPathW (MAX_PATH, tempDirPath); 
    if(dwRet == 0) return wstring(); 

    wstring directoryName,zipFilePath; 
    _WIN32_FIND_DATAW FindFileData; 
    HANDLE hFind = FindFirstFileW(directoryPath.c_str(), &FindFileData); 
    if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) 
    { 
     directoryName = FindFileData.cFileName; 
    } 

    zipFilePath = wstring(tempDirPath)+L"\\"+directoryName+L".zip"; 
    zipFile zf; 
    int errclose; 

    #ifdef USEWIN32IOAPI 
     zlib_filefunc64_def ffunc; 
     fill_win32_filefunc64W (&ffunc); 
     zf = zipOpen2_64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0,NULL,&ffunc); 
    # else 
     zf = zipOpen64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0); 
    # endif 

    if (zf == NULL) 
    { 
     //printf("error opening %s\n",filename_try); 
     err= ZIP_ERRNO; 
    } 
    else 
    { 
     //printf("creating %s\n",filename_try); 
    } 

    for(list<wstring>::iterator it = fileList.begin() ; it!=fileList.end();++it) 
    { 
     FILE * fin; 
     int size_read; 
     //const char* filenameinzip = (*it).c_str(); 
     wstring filenameinzip = (*it).c_str(); 
     wchar_t szOut[MAX_PATH]; 

     PathRelativePathToW(szOut, 
          directoryPath.c_str(), 
          FILE_ATTRIBUTE_DIRECTORY, 
          filenameinzip.c_str(), 
          FILE_ATTRIBUTE_NORMAL); 

     wchar_t *savefilenameinzip; 
     zip_fileinfo zi; 
     unsigned long crcFile=0; 
     int zip64 = 0; 

     zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour = 
      zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0; 
     zi.dosDate = 0; 
     zi.internal_fa = 0; 
    zi.external_fa = 0; 
     filetime(szOut,&zi.tmz_date,&zi.dosDate); 

     if ((password != NULL) && (err==ZIP_OK)) 
      err = getFileCrc(szOut,buf,size_buf,&crcFile); 

     zip64 = isLargeFile(szOut); 

     /* The path name saved, should not include a leading slash. */ 
     /*if it did, windows/xp and dynazip couldn't read the zip file. */ 
     savefilenameinzip = szOut; 
     while(savefilenameinzip[0] == '\\' || savefilenameinzip[0] == '/') 
     { 
      savefilenameinzip++; 
     } 

     string outstr = utf8_encode(savefilenameinzip); 
     //char * op = (char*)outstr.c_str(); 


     err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi, 
             NULL,0,NULL,0,NULL /* comment*/, 
             (opt_compress_level != 0) ? Z_DEFLATED : 0, 
             opt_compress_level,0, 
             /* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */ 
             -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, 
             password,crcFile, zip64); 

     if (err != ZIP_OK) 
     { 
      //printf("error in opening %s in zipfile\n",szOut); 
     } 
     else 
     { 
      //fin = FOPEN_FUNC(szOut,"rb"); 
      fin = _wfopen(szOut,L"rb"); 

      if (fin==NULL) 
      { 
       err=ZIP_ERRNO; 
       //printf("error in opening %s for reading\n",szOut); 
      } 
     } 

     if (err == ZIP_OK) 
      do 
      { 
       err = ZIP_OK; 
       size_read = (int)fread(buf,1,size_buf,fin); 
       if (size_read < size_buf) 
        if (feof(fin)==0) 
        { 
         //printf("error in reading %s\n",szOut); 
         err = ZIP_ERRNO; 
        } 

        if (size_read>0) 
        { 
         err = zipWriteInFileInZip (zf,buf,size_read); 
         if (err<0) 
         { 
          //printf("error in writing %s in the zipfile\n",szOut); 
         } 

        } 
      } while ((err == ZIP_OK) && (size_read>0)); 

      if (fin) 
       fclose(fin); 

      if (err<0) 
       err=ZIP_ERRNO; 
      else 
      { 
       err = zipCloseFileInZip(zf); 
       if (err!=ZIP_OK) 
       { 
        //printf("error in closing %s in the zipfile\n",szOut); 
       } 
      } 
    } 

    errclose = zipClose(zf,NULL); 
    if (errclose != ZIP_OK) 
    { 
     //printf("error in closing %s\n",filename_try); 
    } 

    free(buf); 
    // Change back the executabe context 
    _wchdir(cCurrentPath); 
    return zipFilePath; 
} 
+0

即使我試圖修改zipOpenNewFileInZip3_64以及所有與api相關的接受wchar_t *作爲輸入。還修改了內存分配和字符串長度計算到等價的wchar_t函數。但是在做完所有這些更改後,最後的zip表示損壞的頭文件。所以這就是爲什麼我沒有觸及這個函數,只是將wchar_t *字符串轉換爲等價的char *字符串。 –

+0

除了作爲「Mac特定數據」的屬性外,ZIP規範根本沒有字符編碼的概念,所以我沒有看到minizip或任何其他庫可能以明確定義的方式做這樣的事情。就ZIP而言,文件名最多可達64k「字節」(不管是什麼),並具有一些要求,例如沒有驅動器號和正斜槓而不是反斜槓。你可以將你的中文字符轉換成UTF-8(和後面),這是「只是字節」,但我不確定這是多麼「有效」。 – Damon

+1

@Damon,你的陳述是不正確的。用於引用「代碼頁437」的[ZIP格式的定義](http://www.pkware.com/documents/casestudies/APPNOTE.TXT),但是現在有用於存儲例如UTF-8文件名。參見上面引用的文本的附錄D.不確定應用程序支持或庫支持,但該標準至少有空間。 – MvG

回答

7

存儲在一個ZIP文件UTF-8的文件名正式的方式,根據the standard,被設置爲「通用11位」。看看minizip的來源,在我看來minizip在任何時候都不會爲你設置這個位,並且zipOpenNewFileInZip3_64沒有辦法通過這個位。然而,有一個zipOpenNewFileInZip4_64,它接受兩個更多的參數,versionMadeByflagBase。所以,你可能能夠UTF-8的文件名存儲在按照標準通過改變您的來電

err = zipOpenNewFileInZip4_64(zf, outstr.c_str(), […], crcFile, 36, 1<<11, zip64); 

這是假設outstr實際上幷包含文件名的有效UTF-8編碼,其中,源代碼表明它應該,但我沒有驗證。我建議你打印outstr字節的十六進制值來驗證這一點。除非在過程中亂碼字符串,否則您的「統一碼.txt」將以十六進制UTF-8格式變爲e7 b5 b1 e4 b8 80 e7 a2 bc 2e 74 78 74

有關此versionMadeBy字段(我在我的電話中設置爲36)的詳細信息,請參閱標準的第4.4.2節。這取決於你使用的是什麼平臺,文件屬於zipfi參數(你的案例中的&zi)具有什麼格式,以及什麼版本的標準都適合。在使用unicode文件名時,我會說你使用的是標準的6.3。*版,所以低位字節應該是36。由於minizip.c包裝程序根本不存儲任何文件屬性,因此您不必在此處指定平臺。缺少的屬性可以從這些線中可以看出:

zi.internal_fa = 0; 
    zi.external_fa = 0; 

注意的是,即使標準並提供了一種方式來表示Unicode文件名,這部分只在2006年加入,並有可能仍然是很多ZIP那些不支持它的應用程序。因此,即使您的存檔是正確的,您的解壓縮實用程序仍可能會錯誤地解壓縮此文件,將UTF-8字節解釋爲代碼頁437或拉丁文1或類似文件。

+0

謝謝MvG!我正在嘗試修改源代碼並更新結果! –

+0

MvG,有關zipOpenNewFileInZip4_64中第17個參數'uLong versionMadeBy'的任何想法?應該傳遞什麼價值? –

+1

@RP .:我錯過了'versionMadeBy'字段。編輯我的答案,建議一個合適的價值。 – MvG

0

更改程序以正確處理UTF-8不是微不足道的,請看Unicode FAQ。處理可能的惡意數據(如壓縮/解壓縮)的程序必須格外小心。一個值得的努力,絕對不是微不足道的。

+0

這裏的unicode只在文件名中。而這個問題只是指壓縮,而不是解壓縮。所以這隻需要將一些已知的編碼本地文件名轉換爲UTF-8表示。我看到那裏沒有安全風險。對於解壓縮,只要UTF-8解碼器檢測並處理無效輸入,其餘的應該像以前一樣工作。所以,雖然我一般同意,但這個特定案例的變化看起來相對簡單和安全。 – MvG