Sun Peter created PDFBOX-3885:
---------------------------------
Summary: Get wrong image location
Key: PDFBOX-3885
URL: https://issues.apache.org/jira/browse/PDFBOX-3885
Project: PDFBox
Issue Type: Bug
Components: Rendering
Affects Versions: 2.0.7
Environment: osx, java8
Reporter: Sun Peter
Fix For: 2.0.7
Attachments: src.pdf
I'm using below code to extract images to an XML file.
{code:java}
public class ImageExtractor extends PDFStreamEngine {
private List<Image> images = new ArrayList<>();
public ImageExtractor() {
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
}
/**
* This is used to handle an operation.
*
* @param operator The operation to perform.
* @param operands The list of arguments.
* @throws IOException If there is an error processing the operation.
*/
@Override
protected void processOperator(Operator operator, List<COSBase> operands)
throws IOException {
String operation = operator.getName();
if ("Do".equals(operation)) {
COSName objectName = (COSName) operands.get(0);
PDXObject xobject = getResources().getXObject(objectName);
if (xobject instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobject;
String name = objectName.getName();
String format = image.getSuffix();
Matrix ctmNew =
getGraphicsState().getCurrentTransformationMatrix();
float imageWidth = ctmNew.getScalingFactorX();
float imageHeight = ctmNew.getScalingFactorY();
// position in user space units. 1 unit = 1/72 inch at 72 dpi
float x = ctmNew.getTranslateX();
float y = ctmNew.getTranslateY();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
BufferedImage bufferedImage = image.getOpaqueImage();
ImageIO.write(bufferedImage, format, bos);
images.add(new Image(x, y, imageWidth, imageHeight, name,
format, bos.toByteArray()));
}
} else {
super.processOperator(operator, operands);
}
}
public byte[] toZipFile(PDDocument doc) throws IOException {
int pageNum = 0;
StringBuilder builder = new StringBuilder("<?xml version=\"1.0\"
encoding=\"UTF-8\"?>\n<document>\n");
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ZipOutputStream zipOut = new ZipOutputStream(bos);
try {
for (PDPage page : doc.getPages()) {
builder.append(String.format("<page width=\"%f\"
height=\"%f\">\n", page.getBBox().getWidth(), page.getBBox().getHeight()));
pageNum++;
this.processPage(page);
for (Image image : images) {
byte[] data = image.getData();
ZipEntry zipEntry = new ZipEntry(image.getName() + "." +
image.getFormat());
zipOut.putNextEntry(zipEntry);
zipOut.write(data);
builder.append(String.format("<image x=\"%f\" y=\"%f\"
width=\"%f\" height=\"%f\" name=\"%s\" format=\"%s\"></image>\n", image.getX(),
image.getY(), image.getWidth(), image.getHeight(), image.getName(),
image.getFormat()));
}
builder.append("</page>\n");
}
builder.append("</document>");
System.out.println(builder);
ZipEntry xmlFile = new ZipEntry("images.xml");
zipOut.putNextEntry(xmlFile);
zipOut.write(builder.toString().getBytes(Charset.defaultCharset()));
} finally {
zipOut.close();
}
return bos.toByteArray();
}
}
{code}
Output is below. the y location is wrong. it should be 434.
{code:xml}
<document>
<page width="595.000000" height="842.000000">
<image x="48.000000" y="108.000000" width="315.000000" height="300.000000"
name="Im12" format="jpg"></image>
</page>
</document>
{code}
I got the right output with poppler.
{code:xml}
<image top="434" left="48" width="315" height="300" src="src-1_1.jpg"/>
{code}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]