Hello,
I am
trying to replace the duplicate images with available images in PDF file, but
the result is corrupted.
PdfReader.KillIndirect
nulls the duplicate image, but writer.AddDirectImageSimple won't replace it
with the reference of the previously available image. What's is the problem
here? Thanks!
Here is
the code:
using
System.Collections.Generic;
using
System.Diagnostics;
using
System.IO;
using
System.Security.Cryptography;
using
System.Text;
using
iTextSharp.text;
using
iTextSharp.text.pdf;
namespace
ReplaceDuplicateImages
{
class Program
{
/// <summary>
/// Adding one image, 2 times.
/// </summary>
private static void createSampleFile()
{
using (var pdfDoc = new
Document(PageSize.A4))
{
var pdfWriter =
PdfWriter.GetInstance(pdfDoc, new FileStream("Test.pdf",
FileMode.Create));
pdfDoc.Open();
var table = new PdfPTable(new
float[] { 1, 2 });
table.AddCell(Image.GetInstance("01.png"));
table.AddCell(Image.GetInstance("01.png"));
pdfDoc.Add(table);
}
}
private static void
RemoveDuplicateImagesFromPdfFile(string inFile, string outFile)
{
var pdfReader = new
PdfReader(inFile);
var pdfStamper = new
PdfStamper(pdfReader, new FileStream(outFile, FileMode.Create));
var writer = pdfStamper.Writer;
var md5Service = new
MD5CryptoServiceProvider();
var enc = new UTF8Encoding();
var imagesDictionary = new
Dictionary<string, PRIndirectReference>();
int pageNum =
pdfReader.NumberOfPages;
for (int i = 1; i <= pageNum;
i++)
{
var page =
pdfReader.GetPageN(i);
var resources =
PdfReader.GetPdfObject(page.Get(PdfName.RESOURCES)) as PdfDictionary;
if (resources == null)
continue;
var xObject =
PdfReader.GetPdfObject(resources.Get(PdfName.XOBJECT)) as PdfDictionary;
if (xObject == null) continue;
foreach (var name in
xObject.Keys)
{
var pdfObject =
xObject.Get(name);
if
(!pdfObject.IsIndirect()) continue;
var imgObject =
PdfReader.GetPdfObject(pdfObject) as PdfDictionary;
if (imgObject == null)
continue;
var subType =
PdfReader.GetPdfObject(imgObject.Get(PdfName.SUBTYPE)) as PdfName;
if (subType == null)
continue;
if
(!PdfName.IMAGE.Equals(subType)) continue;
var imageBytes =
PdfReader.GetStreamBytesRaw((PRStream)imgObject);
var md5 =
enc.GetString(md5Service.ComputeHash(imageBytes));
if
(!imagesDictionary.ContainsKey(md5)) // is it duplicate?
{
imagesDictionary.Add(md5,
(PRIndirectReference)pdfObject);
}
else
{
PdfReader.KillIndirect(pdfObject); // nulls the
duplicate image
// trying to replace it
with the reference of the available image
var imageRef =
imagesDictionary[md5];
var image =
Image.GetInstance(imageRef);
Image maskImage =
image.ImageMask; // it's always null here.
if (maskImage != null)
writer.AddDirectImageSimple(maskImage);
writer.AddDirectImageSimple(image,
(PRIndirectReference)pdfObject);
}
}
}
pdfReader.RemoveUnusedObjects();
pdfReader.Close();
pdfStamper.Close();
}
static void Main(string[] args)
{
createSampleFile();
RemoveDuplicateImagesFromPdfFile("test.pdf",
"Optimized.pdf");
Process.Start("Optimized.pdf");
}
}
}using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace ReplaceDuplicateImages
{
class Program
{
/// <summary>
/// Adding one image, 2 times.
/// </summary>
private static void createSampleFile()
{
using (var pdfDoc = new Document(PageSize.A4))
{
var pdfWriter = PdfWriter.GetInstance(pdfDoc, new FileStream("Test.pdf", FileMode.Create));
pdfDoc.Open();
var table = new PdfPTable(new float[] { 1, 2 });
table.AddCell(Image.GetInstance("01.png"));
table.AddCell(Image.GetInstance("01.png"));
pdfDoc.Add(table);
}
}
private static void RemoveDuplicateImagesFromPdfFile(string inFile, string outFile)
{
var pdfReader = new PdfReader(inFile);
var pdfStamper = new PdfStamper(pdfReader, new FileStream(outFile, FileMode.Create));
var writer = pdfStamper.Writer;
var md5Service = new MD5CryptoServiceProvider();
var enc = new UTF8Encoding();
var imagesDictionary = new Dictionary<string, PRIndirectReference>();
int pageNum = pdfReader.NumberOfPages;
for (int i = 1; i <= pageNum; i++)
{
var page = pdfReader.GetPageN(i);
var resources = PdfReader.GetPdfObject(page.Get(PdfName.RESOURCES)) as PdfDictionary;
if (resources == null) continue;
var xObject = PdfReader.GetPdfObject(resources.Get(PdfName.XOBJECT)) as PdfDictionary;
if (xObject == null) continue;
foreach (var name in xObject.Keys)
{
var pdfObject = xObject.Get(name);
if (!pdfObject.IsIndirect()) continue;
var imgObject = PdfReader.GetPdfObject(pdfObject) as PdfDictionary;
if (imgObject == null) continue;
var subType = PdfReader.GetPdfObject(imgObject.Get(PdfName.SUBTYPE)) as PdfName;
if (subType == null) continue;
if (!PdfName.IMAGE.Equals(subType)) continue;
var imageBytes = PdfReader.GetStreamBytesRaw((PRStream)imgObject);
var md5 = enc.GetString(md5Service.ComputeHash(imageBytes));
if (!imagesDictionary.ContainsKey(md5)) // is it duplicate?
{
imagesDictionary.Add(md5, (PRIndirectReference)pdfObject);
}
else
{
PdfReader.KillIndirect(pdfObject); // nulls the duplicate image
// trying to replace it with the reference of the available image
var imageRef = imagesDictionary[md5];
var image = Image.GetInstance(imageRef);
Image maskImage = image.ImageMask; // it's always null here.
if (maskImage != null)
writer.AddDirectImageSimple(maskImage);
writer.AddDirectImageSimple(image, (PRIndirectReference)pdfObject);
}
}
}
pdfReader.RemoveUnusedObjects();
pdfReader.Close();
pdfStamper.Close();
}
static void Main(string[] args)
{
createSampleFile();
RemoveDuplicateImagesFromPdfFile("test.pdf", "Optimized.pdf");
Process.Start("Optimized.pdf");
}
}
}------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
iText-questions mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/itext-questions
iText(R) is a registered trademark of 1T3XT BVBA.
Many questions posted to this list can (and will) be answered with a reference
to the iText book: http://www.itextpdf.com/book/
Please check the keywords list before you ask for examples:
http://itextpdf.com/themes/keywords.php