2010-03-19 117 views
3

我對從iPhone應用程序中提供的PDF文檔中提取特定圖像(僅圖像)有一些疑問。從PDF中提取圖像

我已經通過了蘋果的文檔 - 但我沒能找到它。

我已經做了以下努力,從PDF文檔中獲取圖像。

-(IBAction)btnTappedImages:(id)sender{ 

    // MyGetPDFDocumentRef is custom c method 
    // & filePath is path to pdf document. 
    CGPDFDocumentRef document = MyGetPDFDocumentRef ([filePath UTF8String]); 

    int pgcnt = CGPDFDocumentGetNumberOfPages(document); 

    for(int i1 = 0; i1 < pgcnt; ++i1) { 
     // 1. Open Document page 
     CGPDFPageRef pg = CGPDFDocumentGetPage (document, i1+1); 
     if(!pg) { 
      NSLog(@"Couldn't open page."); 
     } 
     // 2. get page dictionary 
     CGPDFDictionaryRef dict = CGPDFPageGetDictionary(pg); 
     if(!dict) { 
      NSLog(@"Couldn't open page dictionary."); 
     } 
     // 3. get page contents stream 
     CGPDFStreamRef cont; 
     if(!CGPDFDictionaryGetStream(dict, "Contents", &cont)) { 
      NSLog(@"Couldn't open page stream."); 
     } 
     // 4. copy page contents steam 
     // CFDataRef contdata = CGPDFStreamCopyData(cont, NULL); 

     // 5. get the media array from stream 
     CGPDFArrayRef media; 
     if(!CGPDFDictionaryGetArray(dict, "MediaBox", &media)) { 
      NSLog(@"Couldn't open page Media."); 
     } 

     // 6. open media & get it's size 
     CGPDFInteger mediatop, medialeft; 
     CGPDFReal mediaright, mediabottom; 
     if(!CGPDFArrayGetInteger(media, 0, &mediatop) || !CGPDFArrayGetInteger(media, 1, &medialeft) || !CGPDFArrayGetNumber(media, 2, &mediaright) || !CGPDFArrayGetNumber(media, 3, &mediabottom)) { 
      NSLog(@"Couldn't open page Media Box."); 
     } 

     // 7. set media size 
     double mediawidth = mediaright - medialeft, mediaheight = mediabottom - mediatop; 

     // 8. get media resources 
     CGPDFDictionaryRef res; 
     if(!CGPDFDictionaryGetDictionary(dict, "Resources", &res)) { 
      NSLog(@"Couldn't Open Page Media Reopsources."); 
     } 

     // 9. get xObject from media resources 
     CGPDFDictionaryRef xobj; 
     if(!CGPDFDictionaryGetDictionary(res, "XObject", &xobj)) { 
      NSLog(@"Couldn't load page Xobjects."); 
     } 

     char imagestr[16]; 
     sprintf(imagestr, "Im%d", i1); 

     // 10. get x object stream 
     CGPDFStreamRef strm; 
     if(!CGPDFDictionaryGetStream(xobj, imagestr, &strm)) { 
      NSLog(@"Couldn't load stream for xObject"); 
     } 

     // 11. get dictionary from xObject Stream 
     CGPDFDictionaryRef strmdict = CGPDFStreamGetDictionary(strm); 
     if(!strmdict) { 
      NSLog(@"Failed to load dictionary for xObject"); 
     } 

     // 12. get type of xObject 
     const char * type; 
     if(!CGPDFDictionaryGetName(strmdict, "Type", &type) || strcmp(type, "XObject")) { 
      NSLog(@"Couldn't load xObject Type"); 
     } 

     // 13. Check weather subtype is image or not 
     const char * subtype; 
     if(!CGPDFDictionaryGetName(strmdict, "Subtype", &subtype) || strcmp(subtype, "Image")) { 
      NSLog(@"xObject is not image"); 
     } 

     // 14. Bits per component 
     CGPDFInteger bitsper; 
     if(!CGPDFDictionaryGetInteger(strmdict, "BitsPerComponent",&bitsper) || bitsper != 1) { 
      NSLog(@"Bits per component not loaded"); 
     } 

     // 15. Type of filter of image 
     const char * filter; 
     if(!CGPDFDictionaryGetName(strmdict, "Filter", &filter) || strcmp(filter, "FlateDecode")) { 
      NSLog(@"Filter not loaded"); 
     } 

     // 16. Image height width 
     CGPDFInteger width, height; 
     if(!CGPDFDictionaryGetInteger(strmdict, "Width", &width) || !CGPDFDictionaryGetInteger(strmdict, "Height", &height)) { 
      NSLog(@"Image Height - width not loaded."); 
     } 

     // 17. Load image bytes & verify it 
     CGPDFDataFormat fmt = CGPDFDataFormatRaw; 
     CFDataRef data = CGPDFStreamCopyData(strm, &fmt); 

     int32_t len = CFDataGetLength(data); 
     const void * bytes = CFDataGetBytePtr(data); 

     // now I have bytes for images in "bytes" pointer the problem is how to covert it into UIImage 

     NSLog(@"Image bytes length - %i",len); 
     int32_t rowbytes = (width + 7)/8; 
     if(rowbytes * height != len) { 
      NSLog(@"Invalid Image"); 
     } 

     double xres = width/mediawidth * 72.0, yres = height/mediaheight * 72.0; 
     xres = round(xres * 1000)/1000; 
     yres = round(yres * 1000)/1000; 
    } 
} 

回答

13

是的!我找到了。但它看起來非常可怕 - 巨大的代碼。

NSMutableArray *aRefImgs; 
void setRefImgs(NSMutableArray *ref){ 
    aRefImgs=ref; 
} 

NSMutableArray* ImgArrRef(){ 
    return aRefImgs; 
} 

CGPDFDocumentRef MyGetPDFDocumentRef (const char *filename) { 
    CFStringRef path; 
    CFURLRef url; 
    CGPDFDocumentRef document; 
    path = CFStringCreateWithCString (NULL, filename,kCFStringEncodingUTF8); 
    url = CFURLCreateWithFileSystemPath (NULL, path, kCFURLPOSIXPathStyle, 0); 
    CFRelease (path); 
    document = CGPDFDocumentCreateWithURL (url);// 2 
    CFRelease(url); 
    int count = CGPDFDocumentGetNumberOfPages (document);// 3 
    if (count == 0) { 
     printf("`%s' needs at least one page!", filename); 
     return NULL; 
    } 
    return document; 
} 


CGFloat *decodeValuesFromImageDictionary(CGPDFDictionaryRef dict, CGColorSpaceRef cgColorSpace, NSInteger bitsPerComponent) { 
    CGFloat *decodeValues = NULL; 
    CGPDFArrayRef decodeArray = NULL; 

    if (CGPDFDictionaryGetArray(dict, "Decode", &decodeArray)) { 
     size_t count = CGPDFArrayGetCount(decodeArray); 
     decodeValues = malloc(sizeof(CGFloat) * count); 
     CGPDFReal realValue; 
     int i; 
     for (i = 0; i < count; i++) { 
      CGPDFArrayGetNumber(decodeArray, i, &realValue); 
      decodeValues[i] = realValue; 
     } 
    } else { 
     size_t n; 
     switch (CGColorSpaceGetModel(cgColorSpace)) { 
      case kCGColorSpaceModelMonochrome: 
       decodeValues = malloc(sizeof(CGFloat) * 2); 
       decodeValues[0] = 0.0; 
       decodeValues[1] = 1.0; 
       break; 
      case kCGColorSpaceModelRGB: 
       decodeValues = malloc(sizeof(CGFloat) * 6); 
       for (int i = 0; i < 6; i++) { 
        decodeValues[i] = i % 2 == 0 ? 0 : 1; 
       } 
       break; 
      case kCGColorSpaceModelCMYK: 
       decodeValues = malloc(sizeof(CGFloat) * 8); 
       for (int i = 0; i < 8; i++) { 
        decodeValues[i] = i % 2 == 0 ? 0.0 : 
        1.0; 
       } 
       break; 
      case kCGColorSpaceModelLab: 
       // ???? 
       break; 
      case kCGColorSpaceModelDeviceN: 
       n = 
       CGColorSpaceGetNumberOfComponents(cgColorSpace) * 2; 
       decodeValues = malloc(sizeof(CGFloat) * (n * 
                 2)); 
       for (int i = 0; i < n; i++) { 
        decodeValues[i] = i % 2 == 0 ? 0.0 : 
        1.0; 
       } 
       break; 
      case kCGColorSpaceModelIndexed: 
       decodeValues = malloc(sizeof(CGFloat) * 2); 
       decodeValues[0] = 0.0; 
       decodeValues[1] = pow(2.0, 
             (double)bitsPerComponent) - 1; 
       break; 
      default: 
       break; 
     } 
    } 

    return (CGFloat *)CFMakeCollectable(decodeValues); 
} 

UIImage *getImageRef(CGPDFStreamRef myStream) { 
    CGPDFArrayRef colorSpaceArray = NULL; 
    CGPDFStreamRef dataStream; 
    CGPDFDataFormat format; 
    CGPDFDictionaryRef dict; 
    CGPDFInteger width, height, bps, spp; 
    CGPDFBoolean interpolation = 0; 
    // NSString *colorSpace = nil; 
    CGColorSpaceRef cgColorSpace; 
    const char *name = NULL, *colorSpaceName = NULL, *renderingIntentName = NULL; 
    CFDataRef imageDataPtr = NULL; 
    CGImageRef cgImage; 
    //maskImage = NULL, 
    CGImageRef sourceImage = NULL; 
    CGDataProviderRef dataProvider; 
    CGColorRenderingIntent renderingIntent; 
    CGFloat *decodeValues = NULL; 
    UIImage *image; 

    if (myStream == NULL) 
     return nil; 

    dataStream = myStream; 
    dict = CGPDFStreamGetDictionary(dataStream); 

    // obtain the basic image information 
    if (!CGPDFDictionaryGetName(dict, "Subtype", &name)) 
     return nil; 

    if (strcmp(name, "Image") != 0) 
     return nil; 

    if (!CGPDFDictionaryGetInteger(dict, "Width", &width)) 
     return nil; 

    if (!CGPDFDictionaryGetInteger(dict, "Height", &height)) 
     return nil; 

    if (!CGPDFDictionaryGetInteger(dict, "BitsPerComponent", &bps)) 
     return nil; 

    if (!CGPDFDictionaryGetBoolean(dict, "Interpolate", &interpolation)) 
     interpolation = NO; 

    if (!CGPDFDictionaryGetName(dict, "Intent", &renderingIntentName)) 
     renderingIntent = kCGRenderingIntentDefault; 
    else{ 
     renderingIntent = kCGRenderingIntentDefault; 
     //  renderingIntent = renderingIntentFromName(renderingIntentName); 
    } 

    imageDataPtr = CGPDFStreamCopyData(dataStream, &format); 
    dataProvider = CGDataProviderCreateWithCFData(imageDataPtr); 
    CFRelease(imageDataPtr); 

    if (CGPDFDictionaryGetArray(dict, "ColorSpace", &colorSpaceArray)) { 
     cgColorSpace = CGColorSpaceCreateDeviceRGB(); 
     //  cgColorSpace = colorSpaceFromPDFArray(colorSpaceArray); 
     spp = CGColorSpaceGetNumberOfComponents(cgColorSpace); 
    } else if (CGPDFDictionaryGetName(dict, "ColorSpace", &colorSpaceName)) { 
     if (strcmp(colorSpaceName, "DeviceRGB") == 0) { 
      cgColorSpace = CGColorSpaceCreateDeviceRGB(); 
      //   CGColorSpaceCreateWithName(kCGColorSpaceGenericRGB); 
      spp = 3; 
     } else if (strcmp(colorSpaceName, "DeviceCMYK") == 0) {  
      cgColorSpace = CGColorSpaceCreateDeviceCMYK(); 
      //   CGColorSpaceCreateWithName(kCGColorSpaceGenericCMYK); 
      spp = 4; 
     } else if (strcmp(colorSpaceName, "DeviceGray") == 0) { 
      cgColorSpace = CGColorSpaceCreateDeviceGray(); 
      //   CGColorSpaceCreateWithName(kCGColorSpaceGenericGray); 
      spp = 1; 
     } else if (bps == 1) { // if there's no colorspace entry, there's still one we can infer from bps 
      cgColorSpace = CGColorSpaceCreateDeviceGray(); 
      //   colorSpace = NSDeviceBlackColorSpace; 
      spp = 1; 
     } 
    } 

    decodeValues = decodeValuesFromImageDictionary(dict, cgColorSpace, bps); 

    int rowBits = bps * spp * width; 
    int rowBytes = rowBits/8; 
    // pdf image row lengths are padded to byte-alignment 
    if (rowBits % 8 != 0) 
     ++rowBytes; 

// maskImage = SMaskImageFromImageDictionary(dict); 

    if (format == CGPDFDataFormatRaw) 
    { 
     sourceImage = CGImageCreate(width, height, bps, bps * spp, rowBytes, cgColorSpace, 0, dataProvider, decodeValues, interpolation, renderingIntent); 
     CGDataProviderRelease(dataProvider); 
     cgImage = sourceImage; 
//  if (maskImage != NULL) { 
//   cgImage = CGImageCreateWithMask(sourceImage, maskImage); 
//   CGImageRelease(sourceImage); 
//   CGImageRelease(maskImage); 
//  } else { 
//   cgImage = sourceImage; 
//  } 
    } else { 
     if (format == CGPDFDataFormatJPEGEncoded){ // JPEG data requires a CGImage; AppKit can't decode it { 
      sourceImage = 
      CGImageCreateWithJPEGDataProvider(dataProvider,decodeValues,interpolation,renderingIntent); 
      CGDataProviderRelease(dataProvider); 
      cgImage = sourceImage; 
//   if (maskImage != NULL) { 
//    cgImage = CGImageCreateWithMask(sourceImage,maskImage); 
//    CGImageRelease(sourceImage); 
//    CGImageRelease(maskImage); 
//   } else { 
//    cgImage = sourceImage; 
//   } 
     } 
     // note that we could have handled JPEG with ImageIO as well 
     else if (format == CGPDFDataFormatJPEG2000) { // JPEG2000 requires ImageIO { 
      CFDictionaryRef dictionary = CFDictionaryCreate(NULL, NULL, NULL, 0, NULL, NULL); 
      sourceImage= 
      CGImageCreateWithJPEGDataProvider(dataProvider, decodeValues, interpolation, renderingIntent); 


      //   CGImageSourceRef cgImageSource = CGImageSourceCreateWithDataProvider(dataProvider, dictionary); 
      CGDataProviderRelease(dataProvider); 

      cgImage=sourceImage; 

      //   cgImage = CGImageSourceCreateImageAtIndex(cgImageSource, 0, dictionary); 
      CFRelease(dictionary); 
     } else // some format we don't know about or an error in the PDF 
      return nil; 
    } 
    image=[UIImage imageWithCGImage:cgImage]; 
    return image; 
} 

@implementation DashBoard 

// Implement viewDidLoad to do additional setup after loading the view, typically from a nib. 
- (void)viewDidLoad { 
    [super viewDidLoad]; 
    filePath=[[NSString alloc] initWithString:[[NSBundle mainBundle] pathForResource:@"per" ofType:@"pdf"]]; 
} 



-(IBAction)btnTappedText:(id)sender{ 
    if(arrImgs!=nil && [arrImgs retainCount]>0) { [arrImgs release]; arrImgs=nil; } 
    arrImgs=[[NSMutableArray alloc] init]; 

    setRefImgs(arrImgs); 
// if(nxtTxtDtlVCtr!=nil && [nxtTxtDtlVCtr retainCount]>0) { [nxtTxtDtlVCtr release]; nxtTxtDtlVCtr=nil; } 
// nxtTxtDtlVCtr=[[TxtDtlVCtr alloc] initWithNibName:@"TxtDtlVCtr" bundle:nil]; 
// nxtTxtDtlVCtr.str=StringRef(); 
// [self.navigationController pushViewController:nxtTxtDtlVCtr animated:YES]; 

    // 1. Open Document page 
    CGPDFDocumentRef document = MyGetPDFDocumentRef ([filePath UTF8String]); 

    int pgcnt = CGPDFDocumentGetNumberOfPages(document); 

    for(int i1 = 0; i1 < pgcnt; ++i1) { 

     CGPDFPageRef pg = CGPDFDocumentGetPage (document, i1+1); 
     if(!pg) { 
      NSLog(@"Couldn't open page."); 
     } else { 

      // 2. get page dictionary 
      CGPDFDictionaryRef dict = CGPDFPageGetDictionary(pg); 
      if(!dict) { 
       NSLog(@"Couldn't open page dictionary."); 
      } else { 
       // 3. get page contents stream 
       CGPDFStreamRef cont; 
       if(!CGPDFDictionaryGetStream(dict, "Contents", &cont)) { 
        NSLog(@"Couldn't open page stream."); 
       } else { 
        // 4. copy page contents steam 
        // CFDataRef contdata = CGPDFStreamCopyData(cont, NULL); 

        // 5. get the media array from stream 
        CGPDFArrayRef media; 
        if(!CGPDFDictionaryGetArray(dict, "MediaBox", &media)) { 
         NSLog(@"Couldn't open page Media."); 
        } else { 
         // 6. open media & get it's size 
         CGPDFInteger mediatop, medialeft; 
         CGPDFReal mediaright, mediabottom; 
         if(!CGPDFArrayGetInteger(media, 0, &mediatop) || !CGPDFArrayGetInteger(media, 1, &medialeft) || !CGPDFArrayGetNumber(media, 2, &mediaright) || !CGPDFArrayGetNumber(media, 3, &mediabottom)) { 
          NSLog(@"Couldn't open page Media Box."); 
         } else { 
          // 7. set media size 
          //  double mediawidth = mediaright - medialeft, mediaheight = mediabottom - mediatop; 
          // 8. get media resources 
          CGPDFDictionaryRef res; 
          if(!CGPDFDictionaryGetDictionary(dict, "Resources", &res)) { 
           NSLog(@"Couldn't Open Page Media Reopsources."); 
          } else { 
           // 9. get xObject from media resources 
           CGPDFDictionaryRef xobj; 
           if(!CGPDFDictionaryGetDictionary(res, "XObject", &xobj)) { 
            NSLog(@"Couldn't load page Xobjects."); 
           } else { 
            CGPDFDictionaryApplyFunction(xobj, pdfDictionaryFunction, NULL); 
           } 
          } 
         } 
        } 
       } 
      } 
     } 
    } 

    NSLog(@"Total images are - %i",[arrImgs count]); 

    if(nxtImgVCtr!=nil && [nxtImgVCtr retainCount]>0) { [nxtImgVCtr release]; nxtImgVCtr=nil; } 
    nxtImgVCtr=[[ImgVCtr alloc] initWithNibName:@"ImgVCtr" bundle:nil]; 
    nxtImgVCtr.arrImg=arrImgs; 
    [self.navigationController pushViewController:nxtImgVCtr animated:YES]; 
} 
+0

請指導我http://stackoverflow.com/questions/18574559/how-to-get-image-from-fileattachment-annotation-in-pdf-file –

+0

+1爲偉大的答案。在iOS6中運行良好,但在iOS7中崩潰。控制檯說'斷言失敗:(color_components <= decode_components),函數cgimage_set_decode_array,文件圖像/ CGImage.c,行233.'你能幫我解決這個問題在iOS 7 –

+1

這個答案似乎缺少關鍵'pdfDictionaryFunction() '執行。我錯過了什麼嗎? –

1

將CGPDFStreamRef轉換爲UIImage並沒有真正的概念意義; PDF表示可能具有多個頁面並且沒有明顯映射到位圖圖像的文檔。

即使你知道你的PDF只包含一個頁面,你仍然需要做一些解析和finagling。這裏有一個很短的討論:http://lists.apple.com/archives/Cocoa-dev/2006/Sep/msg01407.html

+0

啊,我看你在做什麼。調用'CGPDFStreamCopyData'後,'t'的值是多少?看起來它會告訴你它是否是圖像數據。 – Tom

+0

順便說一句,設置't = CGPDFDataFormatJPEG2000'沒有意義。該參數被函數決定輸出的格式覆蓋。 – Tom

+0

好的。從你的答案我正在更新我的問題。 –

0
//it completely fit in view in uiview,pdf pages are get images in view 
    CGPDFDocumentRef PDFfile;  
    CFURLRef pdfURL = CFBundleCopyResourceURL(CFBundleGetMainBundle(), CFSTR("iPhoneAppProgrammingGuide.pdf"), NULL, NULL); 
       PDFfile = CGPDFDocumentCreateWithURL((CFURLRef)pdfURL); 
       CFRelease(pdfURL); 

      CGPDFPageRef page = CGPDFDocumentGetPage(PDFfile,currentpage); 

      context = UIGraphicsGetCurrentContext(); 
      CGContextSaveGState(context); 
      CGContextSetRGBFillColor(context, 1.0, 1.0, 1.0, 1.0); 
      CGContextFillRect(context,self.bounds); 
      CGContextTranslateCTM(context, -1.0, [self bounds].size.height); 
      CGContextScaleCTM(context, 1.0, -1.0); 
      CGContextConcatCTM(context, CGPDFPageGetDrawingTransform(page, kCGPDFArtBox, [self bounds], 0, true)); 
      CGContextDrawPDFPage(context, page);  
      CGContextRestoreGState(context); 
      CGAffineTransform transform = aspectFit(CGPDFPageGetBoxRect(page, kCGPDFMediaBox), 
                 CGContextGetClipBoundingBox(context)); 

       CGContextConcatCTM(context, transform); 
       UIGraphicsBeginImageContext(CGSizeMake(self.bounds.size.width, self.bounds.size.height));