Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java?rev=1211082&r1=1211081&r2=1211082&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java Tue Dec 6 20:19:29 2011 @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.examples.util; - -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.exceptions.InvalidPasswordException; -import org.apache.pdfbox.exceptions.WrappedIOException; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; -import org.apache.pdfbox.util.Matrix; -import org.apache.pdfbox.util.PDFOperator; -import org.apache.pdfbox.util.PDFStreamEngine; -import org.apache.pdfbox.util.ResourceLoader; - -import java.awt.geom.AffineTransform; -import java.awt.geom.NoninvertibleTransformException; -import java.io.IOException; - -import java.util.List; -import java.util.Map; - -/** - * This is an example on how to get the x/y coordinates of image locations. - * - * Usage: java org.apache.pdfbox.examples.util.PrintImageLocations <input-pdf> - * - * @author <a href="mailto:[email protected]">Ben Litchfield</a> - * @version $Revision: 1.5 $ - */ -public class PrintImageLocations extends PDFStreamEngine -{ - /** - * Default constructor. - * - * @throws IOException If there is an error loading text stripper properties. - */ - public PrintImageLocations() throws IOException - { - super( ResourceLoader.loadProperties( - "org/apache/pdfbox/resources/PDFTextStripper.properties", true ) ); - } - - /** - * This will print the documents data. - * - * @param args The command line arguments. - * - * @throws Exception If there is an error parsing the document. - */ - public static void main( String[] args ) throws Exception - { - if( args.length != 1 ) - { - usage(); - } - else - { - PDDocument document = null; - try - { - document = PDDocument.load( args[0] ); - if( document.isEncrypted() ) - { - try - { - document.decrypt( "" ); - } - catch( InvalidPasswordException e ) - { - System.err.println( "Error: Document is encrypted with a password." ); - System.exit( 1 ); - } - } - PrintImageLocations printer = new PrintImageLocations(); - List allPages = document.getDocumentCatalog().getAllPages(); - for( int i=0; i<allPages.size(); i++ ) - { - PDPage page = (PDPage)allPages.get( i ); - System.out.println( "Processing page: " + i ); - printer.processStream( page, page.findResources(), page.getContents().getStream() ); - } - } - finally - { - if( document != null ) - { - document.close(); - } - } - } - } - - /** - * This is used to handle an operation. - * - * @param operator The operation to perform. - * @param arguments The list of arguments. - * - * @throws IOException If there is an error processing the operation. - */ - protected void processOperator( PDFOperator operator, List arguments ) throws IOException - { - String operation = operator.getOperation(); - if( operation.equals( "Do" ) ) - { - COSName objectName = (COSName)arguments.get( 0 ); - Map xobjects = getResources().getXObjects(); - PDXObject xobject = (PDXObject)xobjects.get( objectName.getName() ); - if( xobject instanceof PDXObjectImage ) - { - try - { - PDXObjectImage image = (PDXObjectImage)xobject; - PDPage page = getCurrentPage(); - Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); - double rotationInRadians =(page.findRotation() * Math.PI)/180; - - - AffineTransform rotation = new AffineTransform(); - rotation.setToRotation( rotationInRadians ); - AffineTransform rotationInverse = rotation.createInverse(); - Matrix rotationInverseMatrix = new Matrix(); - rotationInverseMatrix.setFromAffineTransform( rotationInverse ); - Matrix rotationMatrix = new Matrix(); - rotationMatrix.setFromAffineTransform( rotation ); - - Matrix unrotatedCTM = ctm.multiply( rotationInverseMatrix ); - float xScale = unrotatedCTM.getXScale(); - float yScale = unrotatedCTM.getYScale(); - - System.out.println( "Found image[" + objectName.getName() + "] " + - "at " + unrotatedCTM.getXPosition() + "," + unrotatedCTM.getYPosition() + - " size=" + (xScale/100f*image.getWidth()) + "," + (yScale/100f*image.getHeight() )); - } - catch( NoninvertibleTransformException e ) - { - throw new WrappedIOException( e ); - } - } - } - else - { - super.processOperator( operator, arguments ); - } - } - - /** - * This will print the usage for this document. - */ - private static void usage() - { - System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.PrintImageLocations <input-pdf>" ); - } - -}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java?rev=1211082&r1=1211081&r2=1211082&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java Tue Dec 6 20:19:29 2011 @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.examples.util; - -import org.apache.pdfbox.exceptions.InvalidPasswordException; - - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.util.PDFTextStripper; -import org.apache.pdfbox.util.TextPosition; - -import java.io.IOException; - -import java.util.List; - -/** - * This is an example on how to get some x/y coordinates of text. - * - * Usage: java org.apache.pdfbox.examples.util.PrintTextLocations <input-pdf> - * - * @author <a href="mailto:[email protected]">Ben Litchfield</a> - * @version $Revision: 1.7 $ - */ -public class PrintTextLocations extends PDFTextStripper -{ - /** - * Default constructor. - * - * @throws IOException If there is an error loading text stripper properties. - */ - public PrintTextLocations() throws IOException - { - super.setSortByPosition( true ); - } - - /** - * This will print the documents data. - * - * @param args The command line arguments. - * - * @throws Exception If there is an error parsing the document. - */ - public static void main( String[] args ) throws Exception - { - if( args.length != 1 ) - { - usage(); - } - else - { - PDDocument document = null; - try - { - document = PDDocument.load( args[0] ); - if( document.isEncrypted() ) - { - try - { - document.decrypt( "" ); - } - catch( InvalidPasswordException e ) - { - System.err.println( "Error: Document is encrypted with a password." ); - System.exit( 1 ); - } - } - PrintTextLocations printer = new PrintTextLocations(); - List allPages = document.getDocumentCatalog().getAllPages(); - for( int i=0; i<allPages.size(); i++ ) - { - PDPage page = (PDPage)allPages.get( i ); - System.out.println( "Processing page: " + i ); - PDStream contents = page.getContents(); - if( contents != null ) - { - printer.processStream( page, page.findResources(), page.getContents().getStream() ); - } - } - } - finally - { - if( document != null ) - { - document.close(); - } - } - } - } - - /** - * A method provided as an event interface to allow a subclass to perform - * some specific functionality when text needs to be processed. - * - * @param text The text to be processed - */ - protected void processTextPosition( TextPosition text ) - { - System.out.println( "String[" + text.getXDirAdj() + "," + - text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" + - text.getXScale() + " height=" + text.getHeightDir() + " space=" + - text.getWidthOfSpace() + " width=" + - text.getWidthDirAdj() + "]" + text.getCharacter() ); - } - - /** - * This will print the usage for this document. - */ - private static void usage() - { - System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.PrintTextLocations <input-pdf>" ); - } - -} Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java?rev=1211082&r1=1211081&r2=1211082&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java Tue Dec 6 20:19:29 2011 @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.examples.util; - -import org.apache.pdfbox.pdfparser.PDFStreamParser; -import org.apache.pdfbox.pdfwriter.ContentStreamWriter; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.util.PDFOperator; - -import java.util.ArrayList; -import java.util.List; - -/** - * This is an example on how to remove all text from PDF document. - * - * Usage: java org.apache.pdfbox.examples.util.RemoveAllText <input-pdf> <output-pdf> - * - * @author <a href="mailto:[email protected]">Ben Litchfield</a> - * @version $Revision: 1.2 $ - */ -public class RemoveAllText -{ - /** - * Default constructor. - */ - private RemoveAllText() - { - //example class should not be instantiated - } - - /** - * This will remove all text from a PDF document. - * - * @param args The command line arguments. - * - * @throws Exception If there is an error parsing the document. - */ - public static void main( String[] args ) throws Exception - { - if( args.length != 2 ) - { - usage(); - } - else - { - PDDocument document = null; - try - { - document = PDDocument.load( args[0] ); - if( document.isEncrypted() ) - { - System.err.println( "Error: Encrypted documents are not supported for this example." ); - System.exit( 1 ); - } - List allPages = document.getDocumentCatalog().getAllPages(); - for( int i=0; i<allPages.size(); i++ ) - { - PDPage page = (PDPage)allPages.get( i ); - PDFStreamParser parser = new PDFStreamParser(page.getContents()); - parser.parse(); - List tokens = parser.getTokens(); - List newTokens = new ArrayList(); - for( int j=0; j<tokens.size(); j++) - { - Object token = tokens.get( j ); - if( token instanceof PDFOperator ) - { - PDFOperator op = (PDFOperator)token; - if( op.getOperation().equals( "TJ") || op.getOperation().equals( "Tj" )) - { - //remove the one argument to this operator - newTokens.remove( newTokens.size() -1 ); - continue; - } - } - newTokens.add( token ); - - } - PDStream newContents = new PDStream( document ); - ContentStreamWriter writer = new ContentStreamWriter( newContents.createOutputStream() ); - writer.writeTokens( newTokens ); - newContents.addCompression(); - page.setContents( newContents ); - } - document.save( args[1] ); - } - finally - { - if( document != null ) - { - document.close(); - } - } - } - } - - /** - * This will print the usage for this document. - */ - private static void usage() - { - System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.RemoveAllText <input-pdf> <output-pdf>" ); - } - -} Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/package.html URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/package.html?rev=1211082&r1=1211081&r2=1211082&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/package.html (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/examples/util/package.html Tue Dec 6 20:19:29 2011 @@ -1,25 +0,0 @@ -<!-- - ! Licensed to the Apache Software Foundation (ASF) under one or more - ! contributor license agreements. See the NOTICE file distributed with - ! this work for additional information regarding copyright ownership. - ! The ASF licenses this file to You under the Apache License, Version 2.0 - ! (the "License"); you may not use this file except in compliance with - ! the License. You may obtain a copy of the License at - ! - ! http://www.apache.org/licenses/LICENSE-2.0 - ! - ! Unless required by applicable law or agreed to in writing, software - ! distributed under the License is distributed on an "AS IS" BASIS, - ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ! See the License for the specific language governing permissions and - ! limitations under the License. - !--> -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> -<html> -<head> - -</head> -<body> -The packages in this package will show how to use the PDFBox util API. -</body> -</html>
