Tilman Hausherr, Roberto Nibali Thanks for this excellent piece of code. It seems to be dealing very well with AcroForm. But here are some stupid questions I have:
I may have some problem understanding the API but it seems to me that those PDActionJavaScript objects are nothing more than COSString objects with COSName("JS") in a dictionary. Why I can't just list all objects by this name and get them? I assume it's because they are inside of some dictionary but then isn't there an utilitary method that can exhaustively list all "prime" objects (string, long) ? Can you share an example on how one would achieve this? I don't mind writing some regexs after to select only the ones that contain javascript. I expect more of the javascript code to be placed inside AcroForms, there obviously can be some code in the OpenAction ... do you have ideas where else could be? I am new to the PDF format but I wouldn't think that it allows to put executable javascript code anywhere in the file. Thanks for your answers, Alin On Tue, Jan 19, 2016 at 9:45 AM Alin Ghitulan <alinghitu...@gmail.com> wrote: > Thanks a lot ! I will try it out tonight and see how it goes :). > > On Tue, Jan 19, 2016, 08:18 Tilman Hausherr <thaush...@t-online.de> wrote: > >> Am 18.01.2016 um 23:19 schrieb Roberto Nibali: >> > This then calls dumpJavaScriptEntries() for all >> non-PDFNonTerminalFields, >> > which finally dumps the javascript portions of your PDF (courtesy of >> Tilman >> > Hausherr): >> >> Ah, I forgot that I had written something at that time. Here's the >> original code I wrote, although this was for that file only and other >> files can have javascript elsewhere too. >> >> >> >> /* >> * Licensed to the Apache Software Foundation (ASF) under one or more >> * contributor license agreements. See the NOTICE file distributed with >> * this work for additional information regarding copyright ownership. >> * The ASF licenses this file to You under the Apache License, Version >> 2.0 >> * (the "License"); you may not use this file except in compliance with >> * the License. You may obtain a copy of the License at >> * >> * http://www.apache.org/licenses/LICENSE-2.0 >> * >> * Unless required by applicable law or agreed to in writing, software >> * distributed under the License is distributed on an "AS IS" BASIS, >> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> * See the License for the specific language governing permissions and >> * limitations under the License. >> */ >> package pdfboxpageimageextraction; >> >> import java.io.File; >> import java.io.IOException; >> import java.util.List; >> import org.apache.pdfbox.pdmodel.PDDocument; >> import org.apache.pdfbox.pdmodel.PDDocumentCatalog; >> import org.apache.pdfbox.pdmodel.interactive.action.PDAction; >> import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript; >> import >> org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions; >> import >> org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; >> import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; >> import org.apache.pdfbox.pdmodel.interactive.form.PDField; >> import org.apache.pdfbox.pdmodel.interactive.form.PDNonTerminalField; >> import org.apache.pdfbox.pdmodel.interactive.form.PDTerminalField; >> >> /** >> * This example will take a PDF document and print all the fields from >> the file. >> * >> * @author Ben Litchfield >> * >> */ >> public class PrintJavaScriptFields >> { >> >> /** >> * This will print all the fields from the document. >> * >> * @param pdfDocument The PDF to get the fields from. >> * >> * @throws IOException If there is an error getting the fields. >> */ >> public void printFields(PDDocument pdfDocument) throws IOException >> { >> PDDocumentCatalog docCatalog = pdfDocument.getDocumentCatalog(); >> PDAcroForm acroForm = docCatalog.getAcroForm(); >> List<PDField> fields = acroForm.getFields(); >> >> //System.out.println(fields.size() + " top-level fields were >> found on the form"); >> for (PDField field : fields) >> { >> processField(field, "|--", field.getPartialName()); >> } >> } >> >> private void processField(PDField field, String sLevel, String >> sParent) throws IOException >> { >> String partialName = field.getPartialName(); >> >> if (field instanceof PDTerminalField) >> { >> PDTerminalField termField = (PDTerminalField) field; >> PDFormFieldAdditionalActions fieldActions = >> field.getActions(); >> if (fieldActions != null) >> { >> System.out.println(field.getFullyQualifiedName() + ": " >> + fieldActions.getClass().getSimpleName() + " js field actionS:\n" + >> fieldActions.getCOSObject()); >> printPossibleJS(fieldActions.getK()); >> printPossibleJS(fieldActions.getC()); >> printPossibleJS(fieldActions.getF()); >> printPossibleJS(fieldActions.getV()); >> } >> for (PDAnnotationWidget widgetAction : >> termField.getWidgets()) >> { >> PDAction action = widgetAction.getAction(); >> if (action instanceof PDActionJavaScript) >> { >> System.out.println(field.getFullyQualifiedName() + >> ": " + action.getClass().getSimpleName() + " js widget action:\n" + >> action.getCOSObject()); >> printPossibleJS(action); >> } >> } >> } >> >> if (field instanceof PDNonTerminalField) >> { >> if (!sParent.equals(field.getPartialName())) >> { >> if (partialName != null) >> { >> sParent = sParent + "." + partialName; >> } >> } >> //System.out.println(sLevel + sParent); >> >> for (PDField child : ((PDNonTerminalField) >> field).getChildren()) >> { >> processField(child, "| " + sLevel, sParent); >> } >> } >> else >> { >> String fieldValue = field.getValueAsString(); >> StringBuilder outputString = new StringBuilder(sLevel); >> outputString.append(sParent); >> if (partialName != null) >> { >> outputString.append(".").append(partialName); >> } >> outputString.append(" = ").append(fieldValue); >> outputString.append(", >> type=").append(field.getClass().getName()); >> //System.out.println(outputString); >> } >> } >> >> private void printPossibleJS(PDAction kAction) >> { >> if (kAction instanceof PDActionJavaScript) >> { >> PDActionJavaScript jsAction = (PDActionJavaScript) kAction; >> String jsString = jsAction.getAction(); >> if (!jsString.contains("\n")) >> { >> // Sonst erscheint in Netbeans nichts?! >> jsString = jsString.replaceAll("\r", >> "\n").replaceAll("\n\n", "\n"); >> } >> System.out.println(jsString); >> System.out.println(); >> } >> } >> >> /** >> * This will read a PDF file and print out the form elements. <br /> >> * see usage() for commandline >> * >> * @param args command line arguments >> * >> * @throws IOException If there is an error importing the FDF >> document. >> */ >> public static void main(String[] args) throws IOException >> { >> PDDocument pdf = null; >> try >> { >> pdf = PDDocument.load(new File("XXXX", "YYYYY.pdf")); >> PrintJavaScriptFields exporter = new PrintJavaScriptFields(); >> exporter.printFields(pdf); >> } >> finally >> { >> if (pdf != null) >> { >> pdf.close(); >> } >> } >> } >> >> } >> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: users-unsubscr...@pdfbox.apache.org >> For additional commands, e-mail: users-h...@pdfbox.apache.org >> >>