Author: tilman
Date: Wed Oct 28 17:38:45 2015
New Revision: 1711080

URL: http://svn.apache.org/viewvc?rev=1711080&view=rev
Log:
PDFBOX-2246: add example how to sometimes get text colors with PDFTextStripper

Added:
    
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java
   (with props)

Added: 
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java?rev=1711080&view=auto
==============================================================================
--- 
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java
 (added)
+++ 
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java
 Wed Oct 28 17:38:45 2015
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
+import 
org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
+import 
org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
+import 
org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
+import 
org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
+import 
org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
+import 
org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
+import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.pdfbox.text.TextPosition;
+
+/**
+ * This is an example on how to get the colors of text. Note that this will 
not tell the background,
+ * and will only work properly if the text is not overwritten later, and only 
if the text rendering
+ * modes are 0, 1 or 2. In the PDF 32000 specification, please read 9.3.6 
"Text Rendering Mode" to
+ * know more. Mode 0 (FILL) is the default. Mode 1 (STROKE) will make glyphs 
look "hollow". Mode 2
+ * (FILL_STROKE) will make glyphs look "fat".
+ *
+ * @author Ben Litchfield
+ * @author Tilman Hausherr
+ */
+public class PrintTextColors extends PDFTextStripper
+{
+    /**
+     * Instantiate a new PDFTextStripper object.
+     *
+     * @throws IOException If there is an error loading the properties.
+     */
+    public PrintTextColors() throws IOException
+    {
+        addOperator(new SetStrokingColorSpace());
+        addOperator(new SetNonStrokingColorSpace());
+        addOperator(new SetStrokingDeviceCMYKColor());
+        addOperator(new SetNonStrokingDeviceCMYKColor());
+        addOperator(new SetNonStrokingDeviceRGBColor());
+        addOperator(new SetStrokingDeviceRGBColor());
+        addOperator(new SetNonStrokingDeviceGrayColor());
+        addOperator(new SetStrokingDeviceGrayColor());
+        addOperator(new SetStrokingColor());
+        addOperator(new SetStrokingColorN());
+        addOperator(new SetNonStrokingColor());
+        addOperator(new SetNonStrokingColorN());
+    }
+
+    /**
+     * This will print the documents data.
+     *
+     * @param args The command line arguments.
+     *
+     * @throws IOException If there is an error parsing the document.
+     */
+    public static void main(String[] args) throws IOException
+    {
+        if (args.length != 1)
+        {
+            usage();
+        }
+        else
+        {
+            PDDocument document = null;
+            try
+            {
+                document = PDDocument.load(new File(args[0]));
+
+                PDFTextStripper stripper = new PrintTextColors();
+                stripper.setSortByPosition(true);
+                stripper.setStartPage(0);
+                stripper.setEndPage(document.getNumberOfPages());
+
+                Writer dummy = new OutputStreamWriter(new 
ByteArrayOutputStream());
+                stripper.writeText(document, dummy);
+            }
+            finally
+            {
+                if (document != null)
+                {
+                    document.close();
+                }
+            }
+        }
+    }
+
+    @Override
+    protected void processTextPosition(TextPosition text)
+    {
+        super.processTextPosition(text);
+
+        PDColor strokingColor = getGraphicsState().getStrokingColor();
+        PDColor nonStrokingColor = getGraphicsState().getNonStrokingColor();
+        String unicode = text.getUnicode();
+        RenderingMode renderingMode = 
getGraphicsState().getTextState().getRenderingMode();
+        System.out.println("Unicode:            " + unicode);
+        System.out.println("Rendering mode:     " + renderingMode);
+        System.out.println("Stroking color:     " + strokingColor);
+        System.out.println("Non-Stroking color: " + nonStrokingColor);
+        System.out.println("Non-Stroking color: " + nonStrokingColor);
+        System.out.println();
+
+        // See the PrintTextLocations for more attributes
+    }
+
+    /**
+     * This will print the usage for this document.
+     */
+    private static void usage()
+    {
+        System.err.println("Usage: java " + PrintTextColors.class.getName() + 
" <input-pdf>");
+    }
+}

Propchange: 
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to