2013-09-26 116 views
1

我想從pdf中提取圖像,並將圖像句柄保存到std :: vector,有時候背景不正確,我的代碼如下。如何使用mupdf從PDF中提取圖像?

BOOL CTextEditorDoc::loadImage() 
{ 
    if(m_strPDFPath.IsEmpty()) 
     return FALSE; 

    CString strFile; 
    fz_context *ctx; 
    fz_document* doc; 

    fz_device *dev; 

    fz_irect bbox; 
    fz_rect bounds; 
    fz_matrix ctm; 
    fz_pixmap *image; 
    fz_colorspace *colorspace; 

    int i,j,rotation = 0; 
    int pagecount = 0; 
    fz_page *page; 

    BITMAPINFO bmi; 
    HBITMAP hBitmap; 
    LPBYTE pDest,pImage; 

    if(!gb2312toutf8(m_strPDFPath,strFile)) 
     return FALSE; 

    ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); 
    fz_try(ctx){ 
     doc = fz_open_document(ctx,strFile.GetBuffer(0)); 
    }fz_catch(ctx){ 
     fz_free_context(ctx); 
     return FALSE; 
    } 

    fz_rotate(&ctm, rotation); 
    colorspace = fz_device_rgb(ctx); 
    pagecount = fz_count_pages(doc); 

    pDest = NULL; 
    ::ZeroMemory(&bmi, sizeof(BITMAPINFO)); 
    bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); 
    bmi.bmiHeader.biCompression = BI_RGB; 
    bmi.bmiHeader.biPlanes = 1; 
    bmi.bmiHeader.biBitCount = 32; 
    bmi.bmiHeader.biWidth = 180; 
    bmi.bmiHeader.biHeight = -180; 
    bmi.bmiHeader.biSizeImage = 180*180*4; 

    for(i=0;i<pagecount;i++){ 
     page = fz_load_page(doc,i); 

     if(i == 0){ 
      fz_bound_page(doc,page,&bounds); 
      fz_pre_scale(&ctm,180/(bounds.x1 - bounds.x0),180/(bounds.y1 - bounds.y0)); 
      fz_transform_rect(&bounds, &ctm); 
      fz_round_rect(&bbox, &bounds); 
     } 

     image = fz_new_pixmap_with_bbox(ctx,colorspace,&bbox); 
     dev = fz_new_draw_device(ctx,image); 

     fz_try(ctx){ 
      fz_run_page(doc,page,dev,&ctm,NULL); 
     }fz_catch(ctx){ 
      fz_drop_pixmap(ctx,image); 
      fz_free_device(dev); 
      fz_free_page(doc, page); 
      continue; 
     } 

     pImage = image->samples; 
     if(pImage){ 
      pDest = NULL; 
      hBitmap = ::CreateDIBSection(NULL,&bmi,DIB_RGB_COLORS,(void**)&pDest,NULL,0); 
      ASSERT(hBitmap); 

      if(image->n == 2){ //not pallet 
       for (j = 180* 180; j > 0 ; j--){ 
        pDest[0] = pDest[1] = pDest[2] = *pImage++; 
        pDest[3] = *pImage++; 
        pDest += 4; 
       } 
      }else if(image->n == 4){ 
       //memcpy(pDest,pImage,m_thumbWidth * m_thumbHeight*4); 
       for (j = 180* 180; j > 0 ; j--){ 
        pDest[0] = *pImage++; 
        pDest[1] = *pImage++; 
        pDest[2] = *pImage++; 
        pDest[3] = *pImage++; 
        pDest += 4; 
       } 
      }else ASSERT(FALSE); 

      m_imageVector.push_back(hBitmap);// save it to std::vector 
     } 

     fz_drop_pixmap(ctx,image); 
     fz_free_device(dev); 
     fz_free_page(doc, page); 
    } 

    fz_close_document(doc); 
    fz_free_context(ctx); 
    return TRUE; 
} 

這段代碼可以提取PDF的所有圖像,但它可能太慢,如何改善它? 有一段時間圖像的背景不正確?

下面的圖片的左邊是不正確的,下面的圖片的右邊是真實的。

screentshot http://s22.postimg.org/bsdgn57ml/result.jpg

回答