[ https://issues.apache.org/jira/browse/PDFBOX-4857?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17126717#comment-17126717 ]
jiangpeiheng commented on PDFBOX-4857: -------------------------------------- Hi Tilman Ok, I've understood about the reason. And I still got some questions. # Is that watermark loaded for only once? Which means the same watermark in the same PDF file, PDFRenderer loads it for only once? # Is there anyway to cache this watermark, so that other PDF with the same watermark will be loaded faster. The same watermark could be judged by same encrypt code such as MD5. Thanks Jiang Peiheng > Render the first page for PDF cost long time > -------------------------------------------- > > Key: PDFBOX-4857 > URL: https://issues.apache.org/jira/browse/PDFBOX-4857 > Project: PDFBox > Issue Type: Bug > Affects Versions: 2.0.19 > Reporter: jiangpeiheng > Priority: Major > Attachments: contract_input_jira.pdf > > > Hi, dear PDFBox developers: > I'm now using pdfbox 2.0.19 to render my PDF file to jpg pictures. As the > time goes, I have found a problem. There is a PDF file which only got 2 > pages, however it takes 10 or more seconeds to render all pages. The file is > in attachment, and here is my rendering code: > {code:java} > package com.bytedance.esign.pdfrender.processor; > import com.google.common.collect.Maps; > import lombok.extern.slf4j.Slf4j; > import org.apache.commons.lang3.ArrayUtils; > import org.apache.pdfbox.io.MemoryUsageSetting; > import org.apache.pdfbox.pdmodel.PDDocument; > import org.apache.pdfbox.rendering.PDFRenderer; > import org.springframework.stereotype.Service; > import javax.imageio.ImageIO; > import java.awt.image.BufferedImage; > import java.io.ByteArrayInputStream; > import java.io.ByteArrayOutputStream; > import java.io.IOException; > import java.util.List; > import java.util.Map; > /** > * @author jiangpeiheng create on 2020/5/9 > */ > @Service > @Slf4j > public class RenderingProcessor { > private static final String LOG_PERFIX = "RENDERING_PROCESSOR"; > // some settings for rendering > private static final int IMAGE_DPI = 200; > private static final String IMAGE_FORMAT = "jpg"; > /** > * 渲染主入口 > * > * @param originPdf 原始PDF > * @param pages 需要渲染的页 > * @return > */ > public static Map<Integer, byte[]> render(byte[] originPdf, List<Integer> > pages) { > long startTime = System.currentTimeMillis(); > Map<Integer, byte[]> result = Maps.newHashMap(); > try ( > PDDocument doc = load(originPdf) > ) { > log.info("[{}]载入PDDocument耗时:{}", > LOG_PERFIX, System.currentTimeMillis() - startTime); > long rendererInitStopWatch = System.currentTimeMillis(); > PDFRenderer renderer = new PDFRenderer(doc); > log.info("[{}]建立PDFRenderer完成,cost:{}", > LOG_PERFIX, System.currentTimeMillis() - > rendererInitStopWatch); > pages.forEach(pageIndex -> { > long pageStopWatch = System.currentTimeMillis(); > byte[] res = renderSinglePage(renderer, pageIndex); > log.info("[{}]单页渲染完成,page index:{}, cost:{}", > LOG_PERFIX, pageIndex, System.currentTimeMillis() - > pageStopWatch); > pageStopWatch = System.currentTimeMillis(); > if (ArrayUtils.isNotEmpty(res)) { > result.put(pageIndex, res); > } > log.info("[{}]单页put to map完成,page index:{}, cost:{}", > LOG_PERFIX, pageIndex, System.currentTimeMillis() - > pageStopWatch); > }); > log.info("[{}]渲染PDF成功, successSize:{}", > LOG_PERFIX, result.size()); > } catch (Exception e) { > log.error("[{}]渲染PDF异常, successSize:{}, e:", > LOG_PERFIX, result.size(), e); > } finally { > log.info("[{}]渲染PDF完成, successSize:{}, cost:{}", > LOG_PERFIX, result.size(), > System.currentTimeMillis() - startTime); > } > return result; > } > private static PDDocument load(byte[] docBytes) throws IOException { > return PDDocument.load(new ByteArrayInputStream(docBytes), > MemoryUsageSetting.setupTempFileOnly()); > } > /** > * 渲染单页 > * > * @param renderer > * @param pageIndex > * @return > */ > private static byte[] renderSinglePage(PDFRenderer renderer, int > pageIndex) { > try { > // 渲染第一页,则这里传入的pageIndex需要减1 > return transformImage(renderer.renderImageWithDPI(pageIndex - 1, > IMAGE_DPI)); > } catch (Exception e) { > log.error("[{}]渲染单页异常, pageIndex:{}, e:", > LOG_PERFIX, pageIndex, e); > return null; > } > } > /** > * BufferedImage -> byte[] > * > * @param bim > * @return > * @throws IOException > */ > private static byte[] transformImage(BufferedImage bim) throws > IOException { > ByteArrayOutputStream os = new ByteArrayOutputStream(); > ImageIO.write(bim, IMAGE_FORMAT, os); > return os.toByteArray(); > } > } > {code} > Here is my UT code as well: > {code:java} > package com.bytedance.esign.pdfrender.processor; > import com.google.common.collect.Lists; > import lombok.extern.slf4j.Slf4j; > import org.apache.commons.io.IOUtils; > import org.junit.Test; > import java.io.FileInputStream; > import java.util.List; > import java.util.stream.Collectors; > import java.util.stream.IntStream; > import static org.junit.Assert.*; > /** > * @author jiangpeiheng create on 2020/6/3 > */ > @Slf4j > public class RenderingProcessorTest { > private static final String INPUT_PATH = > "/Users/jiangpeiheng/myhome/work_stuff/esign/optimize/pdfrender/contract_input_jira.pdf"; > private static final List<Integer> PAGES; > static { > PAGES = IntStream.rangeClosed(1, > 2).boxed().collect(Collectors.toList()); > } > @Test > public void render() { > try ( > FileInputStream is = new FileInputStream(INPUT_PATH) > ) { > byte[] pdfBytes = IOUtils.toByteArray(is); > // single > RenderingProcessor.render(pdfBytes, PAGES); > // loop > // IntStream.rangeClosed(1, 10).forEach(i -> { > // log.info("Loop render, index:{}", i); > // RenderingProcessor.render(pdfBytes, PAGES); > // }); > } catch (Exception e) { > log.error("Exception, e:", e); > } > } > } > {code} > Does anybody could find out why it cost so long time for rendering the first > page? > Thank you > Jiang Peiheng -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org For additional commands, e-mail: dev-h...@pdfbox.apache.org