Dear experts, I am trying to use iTextSharp to extract images from pdf. Some images are extracted fine, but many are extracted as negatives of the original images. Could you please help me with it? Here is the code I am using: public static void ExtractImagesFromPDF(string sourcePdf, string outputPath) { PdfReader pdf = new PdfReader(sourcePdf);
try { for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++) { PdfDictionary pg = pdf.GetPageN(pageNumber); var pageImages = new List<byte[]>(); getAllImages(pg, pageImages, pdf); if ((pageImages != null)) { int imgNum = 1; foreach (byte[] bytes in pageImages) { using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes)) { memStream.Position = 0; System.Drawing.Image img = System.Drawing.Image.FromStream(memStream); // must save the file while stream is open. if (!System.IO.Directory.Exists(outputPath)) System.IO.Directory.CreateDirectory(outputPath); string path = System.IO.Path.Combine(outputPath, String.Format(@"Page_{0}_Image_{1}.png", pageNumber, imgNum)); System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1); parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0); // GetImageEncoder is found below this method System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("PNG"); img.Save(path, jpegEncoder, parms); } imgNum++; } } } } catch { throw; } finally { pdf.Close(); } } public static System.Drawing.Imaging.ImageCodecInfo GetImageEncoder(string imageType) { imageType = imageType.ToUpperInvariant(); foreach (ImageCodecInfo info in ImageCodecInfo.GetImageEncoders()) { if (info.FormatDescription == imageType) { return info; } } return null; } private static void getAllImages(PdfDictionary dict, List<byte[]> images, PdfReader doc) { PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(dict.Get(PdfName.RESOURCES)); PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); if (xobj != null) { foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if ((obj.IsIndirect())) { PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj); PdfName subtype = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); if (PdfName.IMAGE.Equals(subtype)) { int xrefIdx = ((PRIndirectReference)obj).Number; PdfObject pdfObj = doc.GetPdfObject(xrefIdx); PdfStream str = (PdfStream)pdfObj; byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)str); string filter = tg.Get(PdfName.FILTER).ToString(); string width = tg.Get(PdfName.WIDTH).ToString(); string height = tg.Get(PdfName.HEIGHT).ToString(); string bpp = tg.Get(PdfName.BITSPERCOMPONENT).ToString(); if (filter == "/FlateDecode") { bytes = PdfReader.FlateDecode(bytes, true); System.Drawing.Imaging.PixelFormat pixelFormat = default(System.Drawing.Imaging.PixelFormat); switch (int.Parse(bpp)) { case 1: pixelFormat = System.Drawing.Imaging.PixelFormat.Format1bppIndexed; break; case 8: pixelFormat = System.Drawing.Imaging.PixelFormat.Format8bppIndexed; break; case 16: pixelFormat = System.Drawing.Imaging.PixelFormat.Format16bppArgb1555; break; case 24: pixelFormat = System.Drawing.Imaging.PixelFormat.Format24bppRgb; break; case 48: pixelFormat = System.Drawing.Imaging.PixelFormat.Format48bppRgb; break; default: throw new Exception("Unknown pixel format " + bpp); } System.Drawing.Bitmap bmp = new System.Drawing.Bitmap(Int32.Parse(width), Int32.Parse(height), pixelFormat); System.Drawing.Imaging.BitmapData bmd = bmp.LockBits(new System.Drawing.Rectangle(0, 0, Int32.Parse(width), Int32.Parse(height)), System.Drawing.Imaging.ImageLockMode.WriteOnly, pixelFormat); System.Runtime.InteropServices.Marshal.Copy(bytes, 0, bmd.Scan0, bytes.Length); bmp.UnlockBits(bmd); using (var ms = new System.IO.MemoryStream()) { bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Png); bytes = ms.GetBuffer(); } } images.Add(bytes); } else if (PdfName.FORM.Equals(subtype) | PdfName.GROUP.Equals(subtype)) { getAllImages(tg, images, doc); } } } } } -- View this message in context: http://itext-general.2136553.n4.nabble.com/Images-extract-problem-tp3079140p3079140.html Sent from the iText - General mailing list archive at Nabble.com. ------------------------------------------------------------------------------ This SF Dev2Dev email is sponsored by: WikiLeaks The End of the Free Internet http://p.sf.net/sfu/therealnews-com _______________________________________________ iText-questions mailing list iText-questions@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/itext-questions Many questions posted to this list can (and will) be answered with a reference to the iText book: http://www.itextpdf.com/book/ Please check the keywords list before you ask for examples: http://itextpdf.com/themes/keywords.php