Author: tilman Date: Thu May 18 17:30:33 2023 New Revision: 1909913 URL: http://svn.apache.org/viewvc?rev=1909913&view=rev Log: PDFBOX-5598: create a command line utiliy to extract document XMP content or page XMP content
Added: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java (with props) Modified: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java Added: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java?rev=1909913&view=auto ============================================================================== --- pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java (added) +++ pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java Thu May 18 17:30:33 2023 @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.tools; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; + +import org.apache.pdfbox.io.IOUtils; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.common.PDMetadata; + +/** + * Extract the XMP metadata from the document or from a page. + * + * @author Tilman Hausherr + */ +public class ExtractXMP +{ + @SuppressWarnings({"squid:S2068"}) + private static final String PASSWORD = "-password"; + private static final String CONSOLE = "-console"; + private static final String PAGE = "-page"; + + /** + * private constructor. + */ + public ExtractXMP() + { + //static class + } + + /** + * Infamous main method. + * + * @param args Command line arguments, should be one and a reference to a file. + * + * @throws IOException if there is an error reading the document or extracting the XMP data. + */ + public static void main( String[] args ) throws IOException + { + // suppress the Dock icon on OS X + System.setProperty("apple.awt.UIElement", "true"); + + ExtractXMP extractor = new ExtractXMP(); + extractor.startExtraction(args); + } + + public void startExtraction(String[] args) throws IOException + { + boolean toConsole = false; + @SuppressWarnings({"squid:S2068"}) + String password = ""; + int page = 0; + String pdfFile = null; + String outputFile = null; + + for (int i = 0; i < args.length; i++) + { + if (args[i].equals(PASSWORD)) + { + i++; + if (i >= args.length) + { + usage(); + } + password = args[i]; + } + else if (args[i].equals(PAGE)) + { + i++; + if (i >= args.length) + { + usage(); + } + page = Integer.parseInt(args[i]); + } + else if (args[i].equals(CONSOLE)) + { + toConsole = true; + } + else + { + if (pdfFile == null) + { + pdfFile = args[i]; + } + else + { + outputFile = args[i]; + } + } + } + + if (pdfFile == null) + { + usage(); + } + else + { + PrintStream output = null; + PDDocument document = null; + try + { + if (outputFile == null && pdfFile.length() > 4) + { + outputFile = new File(pdfFile.substring(0, pdfFile.length() - 4) + ".xml").getAbsolutePath(); + } + document = PDDocument.load(new File(pdfFile), password); + + PDDocumentCatalog catalog = document.getDocumentCatalog(); + PDMetadata meta; + if (page == 0) + { + meta = catalog.getMetadata(); + } + else + { + if (page > document.getNumberOfPages()) + { + System.err.println("Page " + page + " doesn't exist"); + System.exit(1); + } + meta = document.getPage(page - 1).getMetadata(); + } + if (meta == null) + { + System.err.println("No XMP metadata available"); + System.exit(1); + } + if (toConsole) + { + output = System.out; + } + else + { + output = new PrintStream(outputFile); + } + output.write(meta.toByteArray()); + output.close(); + } + finally + { + IOUtils.closeQuietly(output); + IOUtils.closeQuietly(document); + } + } + System.exit(0); + } + + /** + * This will print the usage requirements and exit. + */ + private static void usage() + { + String message = "Usage: java -jar pdfbox-app-x.y.z.jar ExtractXMP [options] <inputfile> [output-text-file]\n" + + "\nOptions:\n" + + " -password <password> : Password to decrypt document\n" + + " -console : Send text to console instead of file\n" + + " -page <number> : The optional page to extract XMP (1 based)\n" + + " <inputfile> : The PDF document to use\n" + + " [output-xml-file] : The file to write the XMP to"; + System.err.println(message); + System.exit( 1 ); + } + +} Propchange: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java?rev=1909913&r1=1909912&r2=1909913&view=diff ============================================================================== --- pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java (original) +++ pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java Thu May 18 17:30:33 2023 @@ -59,6 +59,10 @@ public final class PDFBox { ExtractText.main(arguments); } + else if (command.equals("ExtractXMP")) + { + ExtractXMP.main(arguments); + } else if (command.equals("ExtractImages")) { ExtractImages.main(arguments); @@ -123,6 +127,7 @@ public final class PDFBox + " Decrypt\n" + " Encrypt\n" + " ExtractText\n" + + " ExtractXMP\n" + " ExtractImages\n" + " OverlayPDF\n" + " PrintPDF\n"