jiangpeiheng created PDFBOX-4857:
------------------------------------
Summary: Render the first page for PDF cost long time
Key: PDFBOX-4857
URL: https://issues.apache.org/jira/browse/PDFBOX-4857
Project: PDFBox
Issue Type: Bug
Affects Versions: 2.0.19
Reporter: jiangpeiheng
Attachments: contract_input_jira.pdf
Hi, dear PDFBox developers:
I'm now using pdfbox 2.0.19 to render my PDF file to jpg pictures. As the time
goes, I have found a problem. There is a PDF file which only got 2 pages,
however it takes 10 or more seconeds to render all pages. The file is in
attachment, and here is my rendering code:
{code:java}
package com.bytedance.esign.pdfrender.processor;
import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.springframework.stereotype.Service;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* @author jiangpeiheng create on 2020/5/9
*/
@Service
@Slf4j
public class RenderingProcessor {
private static final String LOG_PERFIX = "RENDERING_PROCESSOR";
// some settings for rendering
private static final int IMAGE_DPI = 200;
private static final String IMAGE_FORMAT = "jpg";
/**
* 渲染主入口
*
* @param originPdf 原始PDF
* @param pages 需要渲染的页
* @return
*/
public static Map<Integer, byte[]> render(byte[] originPdf, List<Integer>
pages) {
long startTime = System.currentTimeMillis();
Map<Integer, byte[]> result = Maps.newHashMap();
try (
PDDocument doc = load(originPdf)
) {
log.info("[{}]载入PDDocument耗时:{}",
LOG_PERFIX, System.currentTimeMillis() - startTime);
long rendererInitStopWatch = System.currentTimeMillis();
PDFRenderer renderer = new PDFRenderer(doc);
log.info("[{}]建立PDFRenderer完成,cost:{}",
LOG_PERFIX, System.currentTimeMillis() -
rendererInitStopWatch);
pages.forEach(pageIndex -> {
long pageStopWatch = System.currentTimeMillis();
byte[] res = renderSinglePage(renderer, pageIndex);
log.info("[{}]单页渲染完成,page index:{}, cost:{}",
LOG_PERFIX, pageIndex, System.currentTimeMillis() -
pageStopWatch);
pageStopWatch = System.currentTimeMillis();
if (ArrayUtils.isNotEmpty(res)) {
result.put(pageIndex, res);
}
log.info("[{}]单页put to map完成,page index:{}, cost:{}",
LOG_PERFIX, pageIndex, System.currentTimeMillis() -
pageStopWatch);
});
log.info("[{}]渲染PDF成功, successSize:{}",
LOG_PERFIX, result.size());
} catch (Exception e) {
log.error("[{}]渲染PDF异常, successSize:{}, e:",
LOG_PERFIX, result.size(), e);
} finally {
log.info("[{}]渲染PDF完成, successSize:{}, cost:{}",
LOG_PERFIX, result.size(),
System.currentTimeMillis() - startTime);
}
return result;
}
private static PDDocument load(byte[] docBytes) throws IOException {
return PDDocument.load(new ByteArrayInputStream(docBytes),
MemoryUsageSetting.setupTempFileOnly());
}
/**
* 渲染单页
*
* @param renderer
* @param pageIndex
* @return
*/
private static byte[] renderSinglePage(PDFRenderer renderer, int pageIndex)
{
try {
// 渲染第一页,则这里传入的pageIndex需要减1
return transformImage(renderer.renderImageWithDPI(pageIndex - 1,
IMAGE_DPI));
} catch (Exception e) {
log.error("[{}]渲染单页异常, pageIndex:{}, e:",
LOG_PERFIX, pageIndex, e);
return null;
}
}
/**
* BufferedImage -> byte[]
*
* @param bim
* @return
* @throws IOException
*/
private static byte[] transformImage(BufferedImage bim) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
ImageIO.write(bim, IMAGE_FORMAT, os);
return os.toByteArray();
}
}
{code}
Here is my UT code as well:
{code:java}
package com.bytedance.esign.pdfrender.processor;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
import java.io.FileInputStream;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static org.junit.Assert.*;
/**
* @author jiangpeiheng create on 2020/6/3
*/
@Slf4j
public class RenderingProcessorTest {
private static final String INPUT_PATH =
"/Users/jiangpeiheng/myhome/work_stuff/esign/optimize/pdfrender/contract_input_jira.pdf";
private static final List<Integer> PAGES;
static {
PAGES = IntStream.rangeClosed(1,
2).boxed().collect(Collectors.toList());
}
@Test
public void render() {
try (
FileInputStream is = new FileInputStream(INPUT_PATH)
) {
byte[] pdfBytes = IOUtils.toByteArray(is);
// single
RenderingProcessor.render(pdfBytes, PAGES);
// loop
// IntStream.rangeClosed(1, 10).forEach(i -> {
// log.info("Loop render, index:{}", i);
// RenderingProcessor.render(pdfBytes, PAGES);
// });
} catch (Exception e) {
log.error("Exception, e:", e);
}
}
}
{code}
Does anybody could find out why it cost so long time for rendering the first
page?
Thank you
Jiang Peiheng
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]