[ 
https://issues.apache.org/jira/browse/PDFBOX-2128?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Tilman Hausherr updated PDFBOX-2128:
------------------------------------

    Description: 
I have a PDF with CMYK images inside and i need to extract the images in the 
RGB format. But the PDJpeg class seems to not work correctly; the colors are 
bad.  Example:

- Original image in te PDF : http://ludoda.free.fr/IMAGE_IN_PDF.jpg
- Extracted image: http://ludoda.free.fr/IMAGE_EXTRACTED.jpg


You can download the PDF : http://ludoda.free.fr/PORSCHE_CMYK.PDF

and try my simple Test Case (I'm using PDFbox 1.8.5): 
{code}
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDJpeg;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;

public class TestCase {
        
        public static void main(String[] args) 
        {
                try 
                {
                        System.out.println("START EXTRACTING IMAGES...");
                        read_pdf();
                        System.out.println("COMPLETE");
                }
                catch (IOException ex) 
                {
                    System.out.println("" + ex);
                }

        }

        public static void read_pdf() throws IOException 
        {
                    PDDocument document = null; 
                    document = PDDocument.load("C:\\temp\\PORSCHE_CMYK.pdf");

                    @SuppressWarnings("unchecked")
                    List<PDPage> pages = 
document.getDocumentCatalog().getAllPages();
                    Iterator<PDPage> iter = pages.iterator(); 
                    int i =1;

                    while (iter.hasNext())
                    {
                        PDPage page = (PDPage) iter.next();
                        PDResources resources = page.getResources();
                        Map<String, PDXObject> pageImages = 
resources.getXObjects();
                        if (pageImages != null)
                        { 
                            Iterator<String> imageIter = 
pageImages.keySet().iterator();
                            while (imageIter.hasNext())
                            {
                                String key = (String) imageIter.next();
                                if(pageImages.get(key) instanceof 
PDXObjectImage)
                                {
                                        PDJpeg image = (PDJpeg) 
pageImages.get(key);
                                        
                                        // Test 1 : write2file
                                        
image.write2file("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i);
                                        
                                        // Test 2: getRGBImage
                                        BufferedImage 
bimage=image.getRGBImage();
                                        File outputfile = new 
File("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i+"_buffered.jpg");
                                        ImageIO.write(bimage, "jpg", 
outputfile);
                                        i ++;
                                }
                            }
                        }
                    }
                }
}
{code}


  was:
I have a PDF with CMYK images inside and i need to extract the images in the 
RGB format. But the PDJpeg class seems to not work correctly; the colors are 
bad.  Example:

- Original image in te PDF : http://ludoda.free.fr/IMAGE_IN_PDF.jpg
- Extracted image: http://ludoda.free.fr/IMAGE_EXTRACTED.jpg


You can download the PDF : http://ludoda.free.fr/PORSCHE_CMYK.PDF

and try my simple Test Case (I'm using PDFbox 1.8.5): 

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDJpeg;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;

public class TestCase {
        
        public static void main(String[] args) 
        {
                try 
                {
                        System.out.println("START EXTRACTING IMAGES...");
                        read_pdf();
                        System.out.println("COMPLETE");
                }
                catch (IOException ex) 
                {
                    System.out.println("" + ex);
                }

        }

        public static void read_pdf() throws IOException 
        {
                    PDDocument document = null; 
                    document = PDDocument.load("C:\\temp\\PORSCHE_CMYK.pdf");

                    @SuppressWarnings("unchecked")
                    List<PDPage> pages = 
document.getDocumentCatalog().getAllPages();
                    Iterator<PDPage> iter = pages.iterator(); 
                    int i =1;

                    while (iter.hasNext())
                    {
                        PDPage page = (PDPage) iter.next();
                        PDResources resources = page.getResources();
                        Map<String, PDXObject> pageImages = 
resources.getXObjects();
                        if (pageImages != null)
                        { 
                            Iterator<String> imageIter = 
pageImages.keySet().iterator();
                            while (imageIter.hasNext())
                            {
                                String key = (String) imageIter.next();
                                if(pageImages.get(key) instanceof 
PDXObjectImage)
                                {
                                        PDJpeg image = (PDJpeg) 
pageImages.get(key);
                                        
                                        // Test 1 : write2file
                                        
image.write2file("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i);
                                        
                                        // Test 2: getRGBImage
                                        BufferedImage 
bimage=image.getRGBImage();
                                        File outputfile = new 
File("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i+"_buffered.jpg");
                                        ImageIO.write(bimage, "jpg", 
outputfile);
                                        i ++;
                                }
                            }
                        }
                    }
                }
}




> CMYK images are not supported correctly in the PDJpeg class
> -----------------------------------------------------------
>
>                 Key: PDFBOX-2128
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-2128
>             Project: PDFBox
>          Issue Type: Bug
>          Components: PDModel
>    Affects Versions: 1.8.5
>         Environment: Windows 7 Professional
> Running jvm: Java HotSpot(TM) 64-Bit Server VM - 1.6.0_26-b03 - 20.1-b02 - 
> Sun Microsystems Inc
>            Reporter: Ludovic Davoine
>              Labels: PDJpeg, cmyk, images
>             Fix For: 1.8.5
>
>   Original Estimate: 1h
>  Remaining Estimate: 1h
>
> I have a PDF with CMYK images inside and i need to extract the images in the 
> RGB format. But the PDJpeg class seems to not work correctly; the colors are 
> bad.  Example:
> - Original image in te PDF : http://ludoda.free.fr/IMAGE_IN_PDF.jpg
> - Extracted image: http://ludoda.free.fr/IMAGE_EXTRACTED.jpg
> You can download the PDF : http://ludoda.free.fr/PORSCHE_CMYK.PDF
> and try my simple Test Case (I'm using PDFbox 1.8.5): 
> {code}
> import java.awt.image.BufferedImage;
> import java.io.File;
> import java.io.IOException;
> import java.util.Iterator;
> import java.util.List;
> import java.util.Map;
> import javax.imageio.ImageIO;
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.PDPage;
> import org.apache.pdfbox.pdmodel.PDResources;
> import org.apache.pdfbox.pdmodel.graphics.xobject.PDJpeg;
> import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
> import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
> public class TestCase {
>       
>       public static void main(String[] args) 
>       {
>               try 
>               {
>                       System.out.println("START EXTRACTING IMAGES...");
>                       read_pdf();
>                       System.out.println("COMPLETE");
>               }
>               catch (IOException ex) 
>               {
>                   System.out.println("" + ex);
>               }
>       }
>       public static void read_pdf() throws IOException 
>       {
>                   PDDocument document = null; 
>                   document = PDDocument.load("C:\\temp\\PORSCHE_CMYK.pdf");
>                   @SuppressWarnings("unchecked")
>                   List<PDPage> pages = 
> document.getDocumentCatalog().getAllPages();
>                   Iterator<PDPage> iter = pages.iterator(); 
>                   int i =1;
>                   while (iter.hasNext())
>                   {
>                       PDPage page = (PDPage) iter.next();
>                       PDResources resources = page.getResources();
>                       Map<String, PDXObject> pageImages = 
> resources.getXObjects();
>                       if (pageImages != null)
>                       { 
>                           Iterator<String> imageIter = 
> pageImages.keySet().iterator();
>                           while (imageIter.hasNext())
>                           {
>                               String key = (String) imageIter.next();
>                               if(pageImages.get(key) instanceof 
> PDXObjectImage)
>                               {
>                                       PDJpeg image = (PDJpeg) 
> pageImages.get(key);
>                                       
>                                       // Test 1 : write2file
>                                       
> image.write2file("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i);
>                                       
>                                       // Test 2: getRGBImage
>                                       BufferedImage 
> bimage=image.getRGBImage();
>                                       File outputfile = new 
> File("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i+"_buffered.jpg");
>                                       ImageIO.write(bimage, "jpg", 
> outputfile);
>                                       i ++;
>                               }
>                           }
>                       }
>                   }
>               }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to