[
https://issues.apache.org/jira/browse/PDFBOX-2688?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14324878#comment-14324878
]
Ankit Khanal commented on PDFBOX-2688:
--------------------------------------
Following is a single file servlet application running in tomcat that should
reproduce the problem.
And we can reproduce the problem using apache benchmark like
ab -n 100000 -c 10 http://localhost:8980/appContext/PDFToImage
package pdfbox2688;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;
import javax.imageio.ImageIO;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
/**
* Servlet implementation class PDFToImageConversionServlet
*/
@WebServlet("/PDFToImage")
public class PDFToImageConversionServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
/**
* @see HttpServlet#HttpServlet()
*/
public PDFToImageConversionServlet() {
super();
// TODO Auto-generated constructor stub
}
/**
* @see HttpServlet#doGet(HttpServletRequest request,
HttpServletResponse response)
*/
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse
response) throws ServletException, IOException {
File pdfFile = getPDFFile();
if (pdfFile != null) {
BufferedImage image = rasterizeUsingPdfBox(pdfFile);
ByteArrayOutputStream baos = new
ByteArrayOutputStream();
ImageIO.write(image, "png", baos);
response.setContentType("image/png");
response.getOutputStream().write(baos.toByteArray());
response.flushBuffer();
}
}
private BufferedImage rasterizeUsingPdfBox(File pdfFile) throws
IOException {
BufferedImage image = null;
PDDocument document = null;
PDPage page = null;
try {
document = PDDocument.loadNonSeq(pdfFile, null);
@SuppressWarnings("unchecked")
List<PDPage> pages =
document.getDocumentCatalog().getAllPages();
page = pages.get(0);
int imageType = BufferedImage.TYPE_INT_ARGB;
image = page.convertToImage(imageType, 300);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (document != null) {
if (page != null) {
page.clear();
}
document.close();
}
}
return image;
}
private File getPDFFile() throws IOException {
InputStream is = null;
FileOutputStream pdfOS = null;
try {
URL url = new
URL("http://www.xmlpdf.com/manualfiles/hello-world.pdf");
//
"https://github.com/mozilla/pdf.js/raw/master/examples/helloworld/helloworld.pdf");
is = url.openStream();
File pdfFile = File.createTempFile("Testpdf", ".pdf",
new File("/tmp"));
pdfOS = new FileOutputStream(pdfFile);
byte[] buf = new byte[4096];
int n;
while ((n = is.read(buf)) >= 0) {
pdfOS.write(buf, 0, n);
}
pdfOS.close();
is.close();
return pdfFile;
} catch (Exception e) {
return null;
} finally {
if (is != null) {
is.close();
}
if (pdfOS != null) {
pdfOS.close();
}
}
}
}
> sun.java2d.Disposer leak when using pdf to image conversion in a
> server(tomcat)
> -------------------------------------------------------------------------------
>
> Key: PDFBOX-2688
> URL: https://issues.apache.org/jira/browse/PDFBOX-2688
> Project: PDFBox
> Issue Type: Bug
> Affects Versions: 1.8.8
> Reporter: Ankit Khanal
>
> I am running with 6GB of heap space and running PDF to PNG conversion in a
> servlet container(tomcat). This happens only when running thousands of
> requests for conversion.
> JVM memory statistics shows heap space never going above 1GB and non-heap
> memory is also constant but the linux process or windows process seems to
> consume around 8GB of memory.
> Heap dump shows that the largest object is sun.java2d.Disposer and is around
> 200MB.
> It seems that the leaked memory is native memory used by java2d and not
> accounted in the heap memory statistic but this growth of sun.java2d.Disposer
> memory is proportional to the growth of process memory(linux 'top' command).
> {code}
> BufferedImage image = null;
> ByteArrayInputStream pdfStream = getpdfbytesfromExistingDoc();
> PDDocument document = null;
> PDPage page = null;
> COSDocument cosDoc = null;
> PDFParser parser = null;
> try {
> parser = new PDFParser(pdfStream);
> parser.parse();
> cosDoc = parser.getDocument();
> document = new PDDocument(cosDoc);
> @SuppressWarnings("unchecked")
> List<PDPage> pages =
> document.getDocumentCatalog().getAllPages();
> page = pages.get(0);
> int imageType = BufferedImage.TYPE_INT_ARGB;
> image = page.convertToImage(imageType, 72);
> } finally {
> if (cosDoc != null) {
> cosDoc.close();
> }
> if (parser != null) {
> parser.clearResources();
> }
> if (document != null) {
> if (page != null) {
> page.clear();
> }
> document.close();
> }
> }
> return image;
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]