jiangpeiheng created PDFBOX-4857: ------------------------------------ Summary: Render the first page for PDF cost long time Key: PDFBOX-4857 URL: https://issues.apache.org/jira/browse/PDFBOX-4857 Project: PDFBox Issue Type: Bug Affects Versions: 2.0.19 Reporter: jiangpeiheng Attachments: contract_input_jira.pdf
Hi, dear PDFBox developers: I'm now using pdfbox 2.0.19 to render my PDF file to jpg pictures. As the time goes, I have found a problem. There is a PDF file which only got 2 pages, however it takes 10 or more seconeds to render all pages. The file is in attachment, and here is my rendering code: {code:java} package com.bytedance.esign.pdfrender.processor; import com.google.common.collect.Maps; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; import org.springframework.stereotype.Service; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.List; import java.util.Map; /** * @author jiangpeiheng create on 2020/5/9 */ @Service @Slf4j public class RenderingProcessor { private static final String LOG_PERFIX = "RENDERING_PROCESSOR"; // some settings for rendering private static final int IMAGE_DPI = 200; private static final String IMAGE_FORMAT = "jpg"; /** * 渲染主入口 * * @param originPdf 原始PDF * @param pages 需要渲染的页 * @return */ public static Map<Integer, byte[]> render(byte[] originPdf, List<Integer> pages) { long startTime = System.currentTimeMillis(); Map<Integer, byte[]> result = Maps.newHashMap(); try ( PDDocument doc = load(originPdf) ) { log.info("[{}]载入PDDocument耗时:{}", LOG_PERFIX, System.currentTimeMillis() - startTime); long rendererInitStopWatch = System.currentTimeMillis(); PDFRenderer renderer = new PDFRenderer(doc); log.info("[{}]建立PDFRenderer完成,cost:{}", LOG_PERFIX, System.currentTimeMillis() - rendererInitStopWatch); pages.forEach(pageIndex -> { long pageStopWatch = System.currentTimeMillis(); byte[] res = renderSinglePage(renderer, pageIndex); log.info("[{}]单页渲染完成,page index:{}, cost:{}", LOG_PERFIX, pageIndex, System.currentTimeMillis() - pageStopWatch); pageStopWatch = System.currentTimeMillis(); if (ArrayUtils.isNotEmpty(res)) { result.put(pageIndex, res); } log.info("[{}]单页put to map完成,page index:{}, cost:{}", LOG_PERFIX, pageIndex, System.currentTimeMillis() - pageStopWatch); }); log.info("[{}]渲染PDF成功, successSize:{}", LOG_PERFIX, result.size()); } catch (Exception e) { log.error("[{}]渲染PDF异常, successSize:{}, e:", LOG_PERFIX, result.size(), e); } finally { log.info("[{}]渲染PDF完成, successSize:{}, cost:{}", LOG_PERFIX, result.size(), System.currentTimeMillis() - startTime); } return result; } private static PDDocument load(byte[] docBytes) throws IOException { return PDDocument.load(new ByteArrayInputStream(docBytes), MemoryUsageSetting.setupTempFileOnly()); } /** * 渲染单页 * * @param renderer * @param pageIndex * @return */ private static byte[] renderSinglePage(PDFRenderer renderer, int pageIndex) { try { // 渲染第一页,则这里传入的pageIndex需要减1 return transformImage(renderer.renderImageWithDPI(pageIndex - 1, IMAGE_DPI)); } catch (Exception e) { log.error("[{}]渲染单页异常, pageIndex:{}, e:", LOG_PERFIX, pageIndex, e); return null; } } /** * BufferedImage -> byte[] * * @param bim * @return * @throws IOException */ private static byte[] transformImage(BufferedImage bim) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); ImageIO.write(bim, IMAGE_FORMAT, os); return os.toByteArray(); } } {code} Here is my UT code as well: {code:java} package com.bytedance.esign.pdfrender.processor; import com.google.common.collect.Lists; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.IOUtils; import org.junit.Test; import java.io.FileInputStream; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; import static org.junit.Assert.*; /** * @author jiangpeiheng create on 2020/6/3 */ @Slf4j public class RenderingProcessorTest { private static final String INPUT_PATH = "/Users/jiangpeiheng/myhome/work_stuff/esign/optimize/pdfrender/contract_input_jira.pdf"; private static final List<Integer> PAGES; static { PAGES = IntStream.rangeClosed(1, 2).boxed().collect(Collectors.toList()); } @Test public void render() { try ( FileInputStream is = new FileInputStream(INPUT_PATH) ) { byte[] pdfBytes = IOUtils.toByteArray(is); // single RenderingProcessor.render(pdfBytes, PAGES); // loop // IntStream.rangeClosed(1, 10).forEach(i -> { // log.info("Loop render, index:{}", i); // RenderingProcessor.render(pdfBytes, PAGES); // }); } catch (Exception e) { log.error("Exception, e:", e); } } } {code} Does anybody could find out why it cost so long time for rendering the first page? Thank you Jiang Peiheng -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org For additional commands, e-mail: dev-h...@pdfbox.apache.org