[ 
https://issues.apache.org/jira/browse/PDFBOX-2688?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14324878#comment-14324878
 ] 

Andreas Lehmkühler edited comment on PDFBOX-2688 at 2/18/15 10:45 AM:
----------------------------------------------------------------------

Following is a single file servlet application running in tomcat that should 
reproduce the problem.

And we can reproduce the problem using apache benchmark like 

ab -n 100000 -c 10 http://localhost:8980/appContext/PDFToImage

{code}
package pdfbox2688;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;

import javax.imageio.ImageIO;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;

/**
 * Servlet implementation class PDFToImageConversionServlet
 */
@WebServlet("/PDFToImage")
public class PDFToImageConversionServlet extends HttpServlet {
        private static final long serialVersionUID = 1L;

        /**
         * @see HttpServlet#HttpServlet()
         */
        public PDFToImageConversionServlet() {
                super();
                // TODO Auto-generated constructor stub
        }

        /**
         * @see HttpServlet#doGet(HttpServletRequest request, 
HttpServletResponse response)
         */
        @Override
        protected void doGet(HttpServletRequest request, HttpServletResponse 
response) throws ServletException, IOException {
                File pdfFile = getPDFFile();
                if (pdfFile != null) {
                        BufferedImage image = rasterizeUsingPdfBox(pdfFile);
                        ByteArrayOutputStream baos = new 
ByteArrayOutputStream();
                        ImageIO.write(image, "png", baos);
                        response.setContentType("image/png");
                        response.getOutputStream().write(baos.toByteArray());
                        response.flushBuffer();
                }
        }

        private BufferedImage rasterizeUsingPdfBox(File pdfFile) throws 
IOException {
                BufferedImage image = null;
                PDDocument document = null;
                PDPage page = null;
                try {
                        document = PDDocument.loadNonSeq(pdfFile, null);
                        @SuppressWarnings("unchecked")
                        List<PDPage> pages = 
document.getDocumentCatalog().getAllPages();
                        page = pages.get(0);
                        int imageType = BufferedImage.TYPE_INT_ARGB;
                        image = page.convertToImage(imageType, 300);
                } catch (Exception e) {
                        e.printStackTrace();
                } finally {
                        if (document != null) {
                                if (page != null) {
                                        page.clear();
                                }
                                document.close();
                        }
                }
                return image;
        }

        private File getPDFFile() throws IOException {
                InputStream is = null;
                FileOutputStream pdfOS = null;
                try {
                        URL url = new 
URL("http://www.xmlpdf.com/manualfiles/hello-world.pdf";);
                        // 
"https://github.com/mozilla/pdf.js/raw/master/examples/helloworld/helloworld.pdf";);
                        is = url.openStream();

                        File pdfFile = File.createTempFile("Testpdf", ".pdf", 
new File("/tmp"));
                        pdfOS = new FileOutputStream(pdfFile);

                        byte[] buf = new byte[4096];
                        int n;
                        while ((n = is.read(buf)) >= 0) {
                                pdfOS.write(buf, 0, n);
                        }
                        pdfOS.close();
                        is.close();
                        return pdfFile;
                } catch (Exception e) {
                        return null;
                } finally {
                        if (is != null) {
                                is.close();
                        }
                        if (pdfOS != null) {
                                pdfOS.close();
                        }
                }
        }

}
{code}


was (Author: akhanal):
Following is a single file servlet application running in tomcat that should 
reproduce the problem.

And we can reproduce the problem using apache benchmark like 

ab -n 100000 -c 10 http://localhost:8980/appContext/PDFToImage


package pdfbox2688;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;

import javax.imageio.ImageIO;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;

/**
 * Servlet implementation class PDFToImageConversionServlet
 */
@WebServlet("/PDFToImage")
public class PDFToImageConversionServlet extends HttpServlet {
        private static final long serialVersionUID = 1L;

        /**
         * @see HttpServlet#HttpServlet()
         */
        public PDFToImageConversionServlet() {
                super();
                // TODO Auto-generated constructor stub
        }

        /**
         * @see HttpServlet#doGet(HttpServletRequest request, 
HttpServletResponse response)
         */
        @Override
        protected void doGet(HttpServletRequest request, HttpServletResponse 
response) throws ServletException, IOException {
                File pdfFile = getPDFFile();
                if (pdfFile != null) {
                        BufferedImage image = rasterizeUsingPdfBox(pdfFile);
                        ByteArrayOutputStream baos = new 
ByteArrayOutputStream();
                        ImageIO.write(image, "png", baos);
                        response.setContentType("image/png");
                        response.getOutputStream().write(baos.toByteArray());
                        response.flushBuffer();
                }
        }

        private BufferedImage rasterizeUsingPdfBox(File pdfFile) throws 
IOException {
                BufferedImage image = null;
                PDDocument document = null;
                PDPage page = null;
                try {
                        document = PDDocument.loadNonSeq(pdfFile, null);
                        @SuppressWarnings("unchecked")
                        List<PDPage> pages = 
document.getDocumentCatalog().getAllPages();
                        page = pages.get(0);
                        int imageType = BufferedImage.TYPE_INT_ARGB;
                        image = page.convertToImage(imageType, 300);
                } catch (Exception e) {
                        e.printStackTrace();
                } finally {
                        if (document != null) {
                                if (page != null) {
                                        page.clear();
                                }
                                document.close();
                        }
                }
                return image;
        }

        private File getPDFFile() throws IOException {
                InputStream is = null;
                FileOutputStream pdfOS = null;
                try {
                        URL url = new 
URL("http://www.xmlpdf.com/manualfiles/hello-world.pdf";);
                        // 
"https://github.com/mozilla/pdf.js/raw/master/examples/helloworld/helloworld.pdf";);
                        is = url.openStream();

                        File pdfFile = File.createTempFile("Testpdf", ".pdf", 
new File("/tmp"));
                        pdfOS = new FileOutputStream(pdfFile);

                        byte[] buf = new byte[4096];
                        int n;
                        while ((n = is.read(buf)) >= 0) {
                                pdfOS.write(buf, 0, n);
                        }
                        pdfOS.close();
                        is.close();
                        return pdfFile;
                } catch (Exception e) {
                        return null;
                } finally {
                        if (is != null) {
                                is.close();
                        }
                        if (pdfOS != null) {
                                pdfOS.close();
                        }
                }
        }

}


> sun.java2d.Disposer leak when using pdf to image conversion in a 
> server(tomcat)
> -------------------------------------------------------------------------------
>
>                 Key: PDFBOX-2688
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-2688
>             Project: PDFBox
>          Issue Type: Bug
>    Affects Versions: 1.8.8
>            Reporter: Ankit Khanal
>
> I am running with 6GB of heap space and running PDF to PNG conversion in a 
> servlet container(tomcat). This happens only when running thousands of 
> requests for conversion.
> JVM memory statistics shows heap space never going above 1GB and non-heap 
> memory is also constant but the linux process or windows process seems to 
> consume around 8GB of memory.
> Heap dump shows that the largest object is sun.java2d.Disposer and is around 
> 200MB.
> It seems that the leaked memory is native memory used by java2d and not 
> accounted in the heap memory statistic but this growth of sun.java2d.Disposer 
> memory is proportional to the growth of process memory(linux 'top' command).
> {code}
>               BufferedImage image = null;
>               ByteArrayInputStream pdfStream = getpdfbytesfromExistingDoc();
>               PDDocument document = null;
>               PDPage page = null;
>               COSDocument cosDoc = null;
>               PDFParser parser = null;
>               try {
>                       parser = new PDFParser(pdfStream);
>                       parser.parse();
>                       cosDoc = parser.getDocument();
>                       document = new PDDocument(cosDoc);
>                       @SuppressWarnings("unchecked")
>                       List<PDPage> pages = 
> document.getDocumentCatalog().getAllPages();
>                       page = pages.get(0);
>                       int imageType = BufferedImage.TYPE_INT_ARGB;
>                       image = page.convertToImage(imageType, 72);
>               } finally {
>                       if (cosDoc != null) {
>                               cosDoc.close();
>                       }
>                       if (parser != null) {
>                               parser.clearResources();
>                       }
>                       if (document != null) {
>                               if (page != null) {
>                                       page.clear();
>                               }
>                               document.close();
>                       }
>               }
>               return image;
>       }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org
For additional commands, e-mail: dev-h...@pdfbox.apache.org

Reply via email to