[ https://issues.apache.org/jira/browse/PDFBOX-4062?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tilman Hausherr updated PDFBOX-4062: ------------------------------------ Description: I Need to Fetch the Color of Text in a PDF using pdfbox 2.0.0 , for the same i have written java code as {code} import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace; import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; /** * This is an example on how to get the colors of text. Note that this will not tell the background, * and will only work properly if the text is not overwritten later, and only if the text rendering * modes are 0, 1 or 2. In the PDF 32000 specification, please read 9.3.6 "Text Rendering Mode" to * know more. Mode 0 (FILL) is the default. Mode 1 (STROKE) will make glyphs look "hollow". Mode 2 * (FILL_STROKE) will make glyphs look "fat". * * @author Ben Litchfield * @author Tilman Hausherr */ public class PDF_Box_1 extends PDFTextStripper { /** * Instantiate a new PDFTextStripper object. * * @throws IOException If there is an error loading the properties. */ public PDF_Box_1() throws IOException { addOperator(new SetStrokingColorSpace()); addOperator(new SetNonStrokingColorSpace()); addOperator(new SetStrokingDeviceCMYKColor()); addOperator(new SetNonStrokingDeviceCMYKColor()); addOperator(new SetNonStrokingDeviceRGBColor()); addOperator(new SetStrokingDeviceRGBColor()); addOperator(new SetNonStrokingDeviceGrayColor()); addOperator(new SetStrokingDeviceGrayColor()); addOperator(new SetStrokingColor()); addOperator(new SetStrokingColorN()); addOperator(new SetNonStrokingColor()); addOperator(new SetNonStrokingColorN()); } /** * This will print the documents data. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { try (PDDocument document = PDDocument.load(new File("D://Vimal//New folder//ab.pdf"))) { PDFTextStripper stripper = new PDF_Box_1(); stripper.setSortByPosition(true); stripper.setStartPage(0); stripper.setEndPage(document.getNumberOfPages()); stripper.getText(document); } } @Override protected void processTextPosition(TextPosition text) { super.processTextPosition(text); PDColor strokingColor = getGraphicsState().getStrokingColor(); PDColor nonStrokingColor = getGraphicsState().getNonStrokingColor(); String unicode = text.getUnicode(); RenderingMode renderingMode = getGraphicsState().getTextState().getRenderingMode(); System.out.println("Unicode: " + unicode); System.out.println("Rendering mode: " + renderingMode); System.out.println("Stroking color: " + strokingColor); System.out.println("Non-Stroking color: " + nonStrokingColor); System.out.println("Non-Stroking color: " + nonStrokingColor); System.out.println(); // See the PrintTextLocations for more attributes } /** * This will print the usage for this document. */ /* private static void usage() { System.err.println("Usage: java " + PDF_Box_1.class.getName() + "D://Vimal//New folder//b1.pdf"); }*/ } {code} But by using this I am getting Text of a Byte , but i want it for the word . Can you please help me in how to do this. was: I Need to Fetch the Color of Text in a PDF using pdfbox 2.0.0 , for the same i have written java code as import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace; import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor; import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; /** * This is an example on how to get the colors of text. Note that this will not tell the background, * and will only work properly if the text is not overwritten later, and only if the text rendering * modes are 0, 1 or 2. In the PDF 32000 specification, please read 9.3.6 "Text Rendering Mode" to * know more. Mode 0 (FILL) is the default. Mode 1 (STROKE) will make glyphs look "hollow". Mode 2 * (FILL_STROKE) will make glyphs look "fat". * * @author Ben Litchfield * @author Tilman Hausherr */ public class PDF_Box_1 extends PDFTextStripper { /** * Instantiate a new PDFTextStripper object. * * @throws IOException If there is an error loading the properties. */ public PDF_Box_1() throws IOException { addOperator(new SetStrokingColorSpace()); addOperator(new SetNonStrokingColorSpace()); addOperator(new SetStrokingDeviceCMYKColor()); addOperator(new SetNonStrokingDeviceCMYKColor()); addOperator(new SetNonStrokingDeviceRGBColor()); addOperator(new SetStrokingDeviceRGBColor()); addOperator(new SetNonStrokingDeviceGrayColor()); addOperator(new SetStrokingDeviceGrayColor()); addOperator(new SetStrokingColor()); addOperator(new SetStrokingColorN()); addOperator(new SetNonStrokingColor()); addOperator(new SetNonStrokingColorN()); } /** * This will print the documents data. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { try (PDDocument document = PDDocument.load(new File("D://Vimal//New folder//ab.pdf"))) { PDFTextStripper stripper = new PDF_Box_1(); stripper.setSortByPosition(true); stripper.setStartPage(0); stripper.setEndPage(document.getNumberOfPages()); stripper.getText(document); } } @Override protected void processTextPosition(TextPosition text) { super.processTextPosition(text); PDColor strokingColor = getGraphicsState().getStrokingColor(); PDColor nonStrokingColor = getGraphicsState().getNonStrokingColor(); String unicode = text.getUnicode(); RenderingMode renderingMode = getGraphicsState().getTextState().getRenderingMode(); System.out.println("Unicode: " + unicode); System.out.println("Rendering mode: " + renderingMode); System.out.println("Stroking color: " + strokingColor); System.out.println("Non-Stroking color: " + nonStrokingColor); System.out.println("Non-Stroking color: " + nonStrokingColor); System.out.println(); // See the PrintTextLocations for more attributes } /** * This will print the usage for this document. */ /* private static void usage() { System.err.println("Usage: java " + PDF_Box_1.class.getName() + "D://Vimal//New folder//b1.pdf"); }*/ } But by using this I am getting Text of a Byte , but i want it for the word . Can you please help me in how to do this. > Fetch Color of Text using PDFBox > -------------------------------- > > Key: PDFBOX-4062 > URL: https://issues.apache.org/jira/browse/PDFBOX-4062 > Project: PDFBox > Issue Type: Bug > Affects Versions: 2.0.0 > Reporter: Vimal Kumar > Priority: Blocker > Attachments: b1.pdf > > > I Need to Fetch the Color of Text in a PDF using pdfbox 2.0.0 , for the same > i have written java code as > {code} > import java.io.ByteArrayOutputStream; > import java.io.File; > import java.io.IOException; > import java.io.OutputStreamWriter; > import java.io.Writer; > import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor; > import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor; > import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor; > import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; > import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace; > import > org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor; > import > org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor; > import > org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor; > import org.apache.pdfbox.pdmodel.PDDocument; > import org.apache.pdfbox.pdmodel.graphics.color.PDColor; > import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; > import org.apache.pdfbox.text.PDFTextStripper; > import org.apache.pdfbox.text.TextPosition; > /** > * This is an example on how to get the colors of text. Note that this will > not tell the background, > * and will only work properly if the text is not overwritten later, and only > if the text rendering > * modes are 0, 1 or 2. In the PDF 32000 specification, please read 9.3.6 > "Text Rendering Mode" to > * know more. Mode 0 (FILL) is the default. Mode 1 (STROKE) will make glyphs > look "hollow". Mode 2 > * (FILL_STROKE) will make glyphs look "fat". > * > * @author Ben Litchfield > * @author Tilman Hausherr > */ > public class PDF_Box_1 extends PDFTextStripper > { > /** > * Instantiate a new PDFTextStripper object. > * > * @throws IOException If there is an error loading the properties. > */ > public PDF_Box_1() throws IOException > { > addOperator(new SetStrokingColorSpace()); > addOperator(new SetNonStrokingColorSpace()); > addOperator(new SetStrokingDeviceCMYKColor()); > addOperator(new SetNonStrokingDeviceCMYKColor()); > addOperator(new SetNonStrokingDeviceRGBColor()); > addOperator(new SetStrokingDeviceRGBColor()); > addOperator(new SetNonStrokingDeviceGrayColor()); > addOperator(new SetStrokingDeviceGrayColor()); > addOperator(new SetStrokingColor()); > addOperator(new SetStrokingColorN()); > addOperator(new SetNonStrokingColor()); > addOperator(new SetNonStrokingColorN()); > } > /** > * This will print the documents data. > * > * @param args The command line arguments. > * > * @throws IOException If there is an error parsing the document. > */ > public static void main(String[] args) throws IOException > { > > try (PDDocument document = PDDocument.load(new > File("D://Vimal//New folder//ab.pdf"))) > { > PDFTextStripper stripper = new PDF_Box_1(); > stripper.setSortByPosition(true); > stripper.setStartPage(0); > stripper.setEndPage(document.getNumberOfPages()); > stripper.getText(document); > } > > } > @Override > protected void processTextPosition(TextPosition text) > { > super.processTextPosition(text); > PDColor strokingColor = getGraphicsState().getStrokingColor(); > PDColor nonStrokingColor = getGraphicsState().getNonStrokingColor(); > String unicode = text.getUnicode(); > RenderingMode renderingMode = > getGraphicsState().getTextState().getRenderingMode(); > System.out.println("Unicode: " + unicode); > System.out.println("Rendering mode: " + renderingMode); > System.out.println("Stroking color: " + strokingColor); > System.out.println("Non-Stroking color: " + nonStrokingColor); > System.out.println("Non-Stroking color: " + nonStrokingColor); > System.out.println(); > // See the PrintTextLocations for more attributes > } > /** > * This will print the usage for this document. > */ > /* private static void usage() > { > System.err.println("Usage: java " + PDF_Box_1.class.getName() + > "D://Vimal//New folder//b1.pdf"); > }*/ > } > {code} > But by using this I am getting Text of a Byte , but i want it for the word . > Can you please help me in how to do this. -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org For additional commands, e-mail: dev-h...@pdfbox.apache.org