[ https://issues.apache.org/jira/browse/PDFBOX-4062?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tilman Hausherr closed PDFBOX-4062. ----------------------------------- Resolution: Not A Bug > Fetch Color of Text using PDFBox > -------------------------------- > > Key: PDFBOX-4062 > URL: https://issues.apache.org/jira/browse/PDFBOX-4062 > Project: PDFBox > Issue Type: Bug > Affects Versions: 2.0.0 > Reporter: Vimal Kumar > Priority: Blocker > Attachments: b1.pdf > > > I Need to Fetch the Color of Text in a PDF using pdfbox 2.0.0 , for the same > i have written java code as > {code} > import java.io.ByteArrayOutputStream; > import java.io.File; > import java.io.IOException; > import java.io.OutputStreamWriter; > import java.io.Writer; > import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor; > import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor; > import > org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor; > import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor; > import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; > import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace; > import > org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor; > import > org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor; > import > org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor; > import org.apache.pdfbox.pdmodel.PDDocument; > import org.apache.pdfbox.pdmodel.graphics.color.PDColor; > import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; > import org.apache.pdfbox.text.PDFTextStripper; > import org.apache.pdfbox.text.TextPosition; > /** > * This is an example on how to get the colors of text. Note that this will > not tell the background, > * and will only work properly if the text is not overwritten later, and only > if the text rendering > * modes are 0, 1 or 2. In the PDF 32000 specification, please read 9.3.6 > "Text Rendering Mode" to > * know more. Mode 0 (FILL) is the default. Mode 1 (STROKE) will make glyphs > look "hollow". Mode 2 > * (FILL_STROKE) will make glyphs look "fat". > * > * @author Ben Litchfield > * @author Tilman Hausherr > */ > public class PDF_Box_1 extends PDFTextStripper > { > /** > * Instantiate a new PDFTextStripper object. > * > * @throws IOException If there is an error loading the properties. > */ > public PDF_Box_1() throws IOException > { > addOperator(new SetStrokingColorSpace()); > addOperator(new SetNonStrokingColorSpace()); > addOperator(new SetStrokingDeviceCMYKColor()); > addOperator(new SetNonStrokingDeviceCMYKColor()); > addOperator(new SetNonStrokingDeviceRGBColor()); > addOperator(new SetStrokingDeviceRGBColor()); > addOperator(new SetNonStrokingDeviceGrayColor()); > addOperator(new SetStrokingDeviceGrayColor()); > addOperator(new SetStrokingColor()); > addOperator(new SetStrokingColorN()); > addOperator(new SetNonStrokingColor()); > addOperator(new SetNonStrokingColorN()); > } > /** > * This will print the documents data. > * > * @param args The command line arguments. > * > * @throws IOException If there is an error parsing the document. > */ > public static void main(String[] args) throws IOException > { > > try (PDDocument document = PDDocument.load(new > File("D://Vimal//New folder//ab.pdf"))) > { > PDFTextStripper stripper = new PDF_Box_1(); > stripper.setSortByPosition(true); > stripper.setStartPage(0); > stripper.setEndPage(document.getNumberOfPages()); > stripper.getText(document); > } > > } > @Override > protected void processTextPosition(TextPosition text) > { > super.processTextPosition(text); > PDColor strokingColor = getGraphicsState().getStrokingColor(); > PDColor nonStrokingColor = getGraphicsState().getNonStrokingColor(); > String unicode = text.getUnicode(); > RenderingMode renderingMode = > getGraphicsState().getTextState().getRenderingMode(); > System.out.println("Unicode: " + unicode); > System.out.println("Rendering mode: " + renderingMode); > System.out.println("Stroking color: " + strokingColor); > System.out.println("Non-Stroking color: " + nonStrokingColor); > System.out.println("Non-Stroking color: " + nonStrokingColor); > System.out.println(); > // See the PrintTextLocations for more attributes > } > /** > * This will print the usage for this document. > */ > /* private static void usage() > { > System.err.println("Usage: java " + PDF_Box_1.class.getName() + > "D://Vimal//New folder//b1.pdf"); > }*/ > } > {code} > But by using this I am getting Text of a Byte , but i want it for the word . > Can you please help me in how to do this. -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org For additional commands, e-mail: dev-h...@pdfbox.apache.org