Dear Poppler developers,

Before all, I thank poppler developers for writing excellent
software. The addition of CairoOutputDev is very interesting.

Now I'm trying to extend pdftoppm to draw on CairoOutputDev.
My motivation is splitting a large table in PDF document
into small PDFs for each cell.

Recent poppler has a feature to draw on cairo surface, so
I think it is possible to do such by pdftoppm draw a cell
(by the specification of geometry for a cell) on cairo surface,
something like:

  pdftoppm \
           -f [page_num] -l [page_num] \ 
           -r [dpi_to_specify_the_unit_of_geometry] \
           -x [cell_pos_x] -y [cell_pos_y]          \
           -w [cell_width] -h [cell_height]         \
           -pdf [input_table.pdf] [output_cell_prefix]

Attached patch is an experiment doing such, please comment
what should be improved for the official adoption.

By default, "-r" option for "pdftoppm -pdf" is used only
as an unit to calculate the geometry to be cropped, and
it does not change the resolution of output PDF. This is
inconsistent with "-r" option for SplashOutputDev cases.
If MODIFY_RESOLUTION_IN_PDF2CAIRO is defined in the compilation,
the behaviour of "-r" is consistent with the case of
SplashOutputDev.

The problems that I've already recognized are:

* If a PDF including large image (e.g. PDF generated by 
  image scanners) is given, the cropped PDF includes 
  whole image object, not cropped image object.
  The filesize of cropped PDF is not reduced.

* When multiple pages are rendered (e.g. pdftoppm -pdf
  -f 1 -l 100 ...), startDoc() is invoked for each
  output file. As a result, the rendering speed is
  slower than that of SplashOutputDev.

Regards,
mpsuzuki
diff --git a/utils/Makefile.am b/utils/Makefile.am
index e57c71b..48bb7a7 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -4,6 +4,11 @@ pdftoppm_SOURCES =				\
 	pdftoppm.cc				\
 	$(common)
 
+pdftoppm_LDADD =				\
+	$(LDADD)				\
+	$(top_builddir)/poppler/libpoppler-cairo.la	\
+	$(CAIRO_LIBS)
+
 pdftoppm_binary = pdftoppm
 
 pdftoppm_manpage = pdftoppm.1
@@ -17,6 +22,7 @@ INCLUDES =					\
 	-I$(top_srcdir)/poppler			\
 	$(UTILS_CFLAGS)				\
 	$(FONTCONFIG_CFLAGS)			\
+	$(CAIRO_CFLAGS)				\
 	$(ABIWORD_CFLAGS)
 
 LDADD =						\
diff --git a/utils/pdftoppm.cc b/utils/pdftoppm.cc
index e27aa44..8d486df 100644
--- a/utils/pdftoppm.cc
+++ b/utils/pdftoppm.cc
@@ -37,6 +37,8 @@
 #include "splash/SplashBitmap.h"
 #include "splash/Splash.h"
 #include "SplashOutputDev.h"
+#include "CairoOutputDev.h"
+#include <cairo-pdf.h>
 
 #define PPM_FILE_SZ 512
 
@@ -58,6 +60,7 @@ static GBool mono = gFalse;
 static GBool gray = gFalse;
 static GBool png = gFalse;
 static GBool jpeg = gFalse;
+static GBool pdf = gFalse;
 static char enableFreeTypeStr[16] = "";
 static char antialiasStr[16] = "";
 static char vectorAntialiasStr[16] = "";
@@ -111,6 +114,10 @@ static const ArgDesc argDesc[] = {
   {"-jpeg",    argFlag,     &jpeg,           0,
    "generate a JPEG file"},
 #endif
+#if HAVE_CAIRO
+  {"-pdf",    argFlag,     &pdf,           0,
+   "generate a PDF file"},
+#endif
 #if HAVE_FREETYPE_FREETYPE_H | HAVE_FREETYPE_H
   {"-freetype",   argString,      enableFreeTypeStr, sizeof(enableFreeTypeStr),
    "enable FreeType font rasterizer: yes, no"},
@@ -177,6 +184,22 @@ static void savePageSlice(PDFDoc *doc,
   }
 }
 
+static void savePageSliceCairo(PDFDoc *doc,
+                   CairoOutputDev *cairoOut,
+                   int pg, int x, int y, int w, int h, 
+                   double pg_w, double pg_h) {
+  if (w == 0) w = (int)ceil(pg_w);
+  if (h == 0) h = (int)ceil(pg_h);
+  w = (x+w > pg_w ? (int)ceil(pg_w-x) : w);
+  h = (y+h > pg_h ? (int)ceil(pg_h-y) : h);
+
+#ifdef MODIFY_RESOLUTION_IN_PDF2CAIRO
+  doc->displayPageSlice(cairoOut, pg, x_resolution, y_resolution, 0, !useCropBox, gFalse, gFalse, x, y, w, h);
+#else
+  doc->displayPageSlice(cairoOut, pg, 72, 72, 0, !useCropBox, gFalse, gFalse, x, y, w, h);
+#endif
+}
+
 int main(int argc, char *argv[]) {
   PDFDoc *doc;
   GooString *fileName = NULL;
@@ -185,6 +208,11 @@ int main(int argc, char *argv[]) {
   GooString *ownerPW, *userPW;
   SplashColor paperColor;
   SplashOutputDev *splashOut;
+
+  cairo_surface_t *surface = NULL;
+  cairo_t* cr = NULL;
+  CairoOutputDev *cairoOut = NULL;
+
   GBool ok;
   int exitCode;
   int pg, pg_num_len;
@@ -272,15 +300,18 @@ int main(int argc, char *argv[]) {
   if (lastPage < 1 || lastPage > doc->getNumPages())
     lastPage = doc->getNumPages();
 
-  // write PPM files
-  paperColor[0] = 255;
-  paperColor[1] = 255;
-  paperColor[2] = 255;
-  splashOut = new SplashOutputDev(mono ? splashModeMono1 :
+  // CairoOutputDev is bound to output file,
+  // initialization for PDF is postponed.
+  if (!pdf) {
+    paperColor[0] = 255;
+    paperColor[1] = 255;
+    paperColor[2] = 255;
+    splashOut = new SplashOutputDev(mono ? splashModeMono1 :
 				    gray ? splashModeMono8 :
 				             splashModeRGB8, 4,
 				  gFalse, paperColor);
-  splashOut->startDoc(doc->getXRef());
+    splashOut->startDoc(doc->getXRef());
+  }
   if (sz != 0) w = h = sz;
   pg_num_len = (int)ceil(log((double)doc->getNumPages()) / log((double)10));
   for (pg = firstPage; pg <= lastPage; ++pg) {
@@ -310,16 +341,46 @@ int main(int argc, char *argv[]) {
       pg_w = pg_h;
       pg_h = tmp;
     }
+
     if (ppmRoot != NULL) {
       snprintf(ppmFile, PPM_FILE_SZ, "%.*s-%0*d.%s",
               PPM_FILE_SZ - 32, ppmRoot, pg_num_len, pg,
-              png ? "png" : jpeg ? "jpg" : mono ? "pbm" : gray ? "pgm" : "ppm");
-      savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, ppmFile);
-    } else {
-      savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, NULL);
+              pdf ? "pdf" : png ? "png" : jpeg ? "jpg" : mono ? "pbm" : gray ? "pgm" : "ppm");
     }
+
+
+    if (pdf) {
+      // postponed initialization for cairo output device
+#ifdef MODIFY_RESOLUTION_IN_PDF2CAIRO
+      surface = cairo_pdf_surface_create( ppmFile, w, h );
+#else
+      surface = cairo_pdf_surface_create( ppmFile,
+                                          72 * w / x_resolution,
+                                          72 * h / y_resolution );
+#endif
+      cr = cairo_create( surface );
+      cairo_surface_destroy( surface );
+      cairoOut = new CairoOutputDev;
+      cairoOut->setCairo( cr );
+      cairo_destroy( cr );
+      cairoOut->startDoc( doc->getXRef(), doc->getCatalog() );
+#ifdef MODIFY_RESOLUTION_IN_PDF2CAIRO
+      savePageSliceCairo(doc, cairoOut, pg, x, y, w, h, pg_w, pg_h);
+#else
+      savePageSliceCairo(doc, cairoOut, pg,
+                         72 * x / x_resolution,
+                         72 * y / y_resolution,
+                         72 * w / x_resolution,
+                         72 * h / y_resolution,
+                         pg_w, pg_h);
+#endif
+      cairoOut->setCairo( NULL );
+      delete cairoOut;
+    } else
+      savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, ppmFile);
   }
-  delete splashOut;
+  if (!pdf)
+    delete splashOut;
 
   exitCode = 0;
 
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to