This should work. Use PdfStamper to get the result out.
import java.io.*;
import java.awt.Color;
import com.lowagie.text.*;
import com.lowagie.text.pdf.*;
import java.util.*;
public class metadata {
public static void main(String[] args) {
try {
PdfReader reader = new
PdfReader("C:\\downloads\\XMP_for_CreativePros2004.pdf");
removeMetadata(reader);
}
catch(Exception de) {
de.printStackTrace();
}
}
public static void removeMetadata(PdfReader reader) {
boolean hits[] = new boolean[reader.getXrefSize()];
removeMetadataNode(reader.getTrailer(), hits);
hits = null;
reader.removeUnusedObjects();
}
private static void removeMetadataNode(PdfObject obj, boolean
hits[]) {
if (obj == null)
return;
switch (obj.type()) {
case PdfObject.DICTIONARY:
case PdfObject.STREAM: {
PdfDictionary dic = (PdfDictionary)obj;
for (Iterator it = dic.getKeys().iterator();
it.hasNext();) {
PdfName key = (PdfName)it.next();
if (key.equals(PdfName.METADATA)) {
System.out.println("Metadata found");
it.remove();
continue;
}
PdfObject v = dic.get(key);
removeMetadataNode(v, hits);
}
break;
}
case PdfObject.ARRAY: {
ArrayList list = ((PdfArray)obj).getArrayList();
for (int k = 0; k < list.size(); ++k) {
PdfObject v = (PdfObject)list.get(k);
removeMetadataNode(v, hits);
}
break;
}
case PdfObject.INDIRECT: {
PRIndirectReference ref = (PRIndirectReference)obj;
int num = ref.getNumber();
if (!hits[num]) {
hits[num] = true;
removeMetadataNode(PdfReader.getPdfObject(ref),
hits);
}
}
}
}
}
> -----Original Message-----
> From: Greg Sabatino [mailto:[EMAIL PROTECTED]
> Sent: Tuesday, April 12, 2005 10:06 PM
> To: Paulo Soares
> Subject: Re: [iText-questions] Need to strip all XMP metadata
> when using PdfCopy
>
> Thanks for that reply Paulo. I know that you are busy and I
> appreciate
> your answering me. I am a novice when it comes to pdf processing
> and have found iText to be terrific so far. You and Bruno have done
> some
> excellent work! I am hoping that I can ask you to take one more look
> at this.
>
> I looked at PdfReader.removeUnusedObjects() and came up
> with the code below. It doesn't seem to find any streams or Metadata
> embedded
> in an example pdf file found at
> http://www.adobe.com/products/xmp/pdfs/XMP_for_CreativePros2004.pdf
>
> What did I miss?
>
> Thanks again,
> Greg Sabatino
>
>
>
> public void removeMetadata(PdfReader reader) {
> removeMetadataNode(reader.getTrailer());
> reader.removeUnusedObjects();
> }
>
> private void removeMetadataNode(PdfObject obj) {
> if (obj == null) return;
> System.out.println("*Looking at obj " + obj);
> if(obj.isDictionary() || obj.isStream()){
> System.out.println("*Looking for metadata.* " + obj);
> PdfDictionary dic = (PdfDictionary)obj;
> if(dic.contains(PdfName.METADATA))
> System.out.println("*Found metadata.*");
> dic.remove(PdfName.METADATA);
> for (Iterator it = dic.getKeys().iterator();
> it.hasNext();)
> {
> PdfName key = (PdfName)it.next();
> System.out.println("Key" + key);
> PdfObject v = dic.get(key);
> removeMetadataNode(v);
> }
> }
> } On Apr 12, 2005, at 11:50 AM, Paulo Soares wrote:
>
> > That's quite easy. Look at the source of
> > PdfReader.removeUnsusedObjects() and get what you need to
> reach all the
> > pdf objects. If the PdfObject is a dictionary or a prstream use
> > remove(PdfName.METADATA). Finally to clean up what was removed call
> > PdfReader.removeUnsusedObjects().
> >
> >> -----Original Message-----
> >> From: [EMAIL PROTECTED]
> >> [mailto:[EMAIL PROTECTED] On
> >> Behalf Of Greg Sabatino
> >> Sent: Tuesday, April 12, 2005 4:27 PM
> >> To: [email protected]
> >> Subject: [iText-questions] Need to strip all XMP metadata
> >> when using PdfCopy
> >>
> >> Hello, I am using PdfCopy to copy existing pdf's but need a
> >> way to positively remove any XMP metadata. As a first attempt
> >> I am using "reader.getCatalog().remove(PdfName.METADATA)"
> >> prior to creating a new Document using the reader. This seems
> >> to work just fine but I see in the PDF spec that XMP metadata
> >> can be contained in any components that are represented as a
> >> stream or dictionary. Does anyone know if there as a way to
> >> find and remove all those metadata streams if they exist
> >> anywhere in the document? Thanks, Greg
> >>
> >
>
>
-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_ide95&alloc_id396&op=click
_______________________________________________
iText-questions mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/itext-questions