jiangpeiheng created PDFBOX-4857:
------------------------------------

             Summary: Render the first page for PDF cost long time
                 Key: PDFBOX-4857
                 URL: https://issues.apache.org/jira/browse/PDFBOX-4857
             Project: PDFBox
          Issue Type: Bug
    Affects Versions: 2.0.19
            Reporter: jiangpeiheng
         Attachments: contract_input_jira.pdf

Hi, dear PDFBox developers:

I'm now using pdfbox 2.0.19 to render my PDF file to jpg pictures. As the time 
goes, I have found a problem. There is a PDF file which only got 2 pages, 
however it takes 10 or more seconeds to render all pages. The file is in 
attachment, and here is my rendering code:
{code:java}
package com.bytedance.esign.pdfrender.processor;

import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.springframework.stereotype.Service;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 * @author jiangpeiheng create on 2020/5/9
 */
@Service
@Slf4j
public class RenderingProcessor {

    private static final String LOG_PERFIX = "RENDERING_PROCESSOR";

    // some settings for rendering
    private static final int IMAGE_DPI = 200;
    private static final String IMAGE_FORMAT = "jpg";

    /**
     * 渲染主入口
     *
     * @param originPdf 原始PDF
     * @param pages     需要渲染的页
     * @return
     */
    public static Map<Integer, byte[]> render(byte[] originPdf, List<Integer> 
pages) {
        long startTime = System.currentTimeMillis();
        Map<Integer, byte[]> result = Maps.newHashMap();
        try (
                PDDocument doc = load(originPdf)
        ) {
            log.info("[{}]载入PDDocument耗时:{}",
                    LOG_PERFIX, System.currentTimeMillis() - startTime);
            long rendererInitStopWatch = System.currentTimeMillis();
            PDFRenderer renderer = new PDFRenderer(doc);
            log.info("[{}]建立PDFRenderer完成,cost:{}",
                    LOG_PERFIX, System.currentTimeMillis() - 
rendererInitStopWatch);
            pages.forEach(pageIndex -> {
                long pageStopWatch = System.currentTimeMillis();
                byte[] res = renderSinglePage(renderer, pageIndex);
                log.info("[{}]单页渲染完成,page index:{}, cost:{}",
                        LOG_PERFIX, pageIndex, System.currentTimeMillis() - 
pageStopWatch);
                pageStopWatch = System.currentTimeMillis();
                if (ArrayUtils.isNotEmpty(res)) {
                    result.put(pageIndex, res);
                }
                log.info("[{}]单页put to map完成,page index:{}, cost:{}",
                        LOG_PERFIX, pageIndex, System.currentTimeMillis() - 
pageStopWatch);
            });
            log.info("[{}]渲染PDF成功, successSize:{}",
                    LOG_PERFIX, result.size());
        } catch (Exception e) {
            log.error("[{}]渲染PDF异常, successSize:{}, e:",
                    LOG_PERFIX, result.size(), e);
        } finally {
            log.info("[{}]渲染PDF完成, successSize:{}, cost:{}",
                    LOG_PERFIX, result.size(),
                    System.currentTimeMillis() - startTime);
        }
        return result;
    }

    private static PDDocument load(byte[] docBytes) throws IOException {
        return PDDocument.load(new ByteArrayInputStream(docBytes),
                MemoryUsageSetting.setupTempFileOnly());
    }

    /**
     * 渲染单页
     *
     * @param renderer
     * @param pageIndex
     * @return
     */
    private static byte[] renderSinglePage(PDFRenderer renderer, int pageIndex) 
{
        try {
            // 渲染第一页,则这里传入的pageIndex需要减1
            return transformImage(renderer.renderImageWithDPI(pageIndex - 1, 
IMAGE_DPI));
        } catch (Exception e) {
            log.error("[{}]渲染单页异常, pageIndex:{}, e:",
                    LOG_PERFIX, pageIndex, e);
            return null;
        }
    }

    /**
     * BufferedImage -> byte[]
     *
     * @param bim
     * @return
     * @throws IOException
     */
    private static byte[] transformImage(BufferedImage bim) throws IOException {
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        ImageIO.write(bim, IMAGE_FORMAT, os);
        return os.toByteArray();
    }

}
{code}
Here is my UT code as well:
{code:java}
package com.bytedance.esign.pdfrender.processor;

import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.junit.Test;

import java.io.FileInputStream;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static org.junit.Assert.*;

/**
 * @author jiangpeiheng create on 2020/6/3
 */
@Slf4j
public class RenderingProcessorTest {

    private static final String INPUT_PATH = 
"/Users/jiangpeiheng/myhome/work_stuff/esign/optimize/pdfrender/contract_input_jira.pdf";
    private static final List<Integer> PAGES;

    static {
        PAGES = IntStream.rangeClosed(1, 
2).boxed().collect(Collectors.toList());
    }

    @Test
    public void render() {
        try (
                FileInputStream is = new FileInputStream(INPUT_PATH)
        ) {
            byte[] pdfBytes = IOUtils.toByteArray(is);
            // single
            RenderingProcessor.render(pdfBytes, PAGES);

            // loop
//            IntStream.rangeClosed(1, 10).forEach(i -> {
//                log.info("Loop render, index:{}", i);
//                RenderingProcessor.render(pdfBytes, PAGES);
//            });
        } catch (Exception e) {
            log.error("Exception, e:", e);
        }
    }
}
{code}
Does anybody could find out why it cost so long time for rendering the first 
page?

Thank you

Jiang Peiheng



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org
For additional commands, e-mail: dev-h...@pdfbox.apache.org

Reply via email to