Dear developers,

The poppler patch is attached related with "accessibility of pdf document",
please check it and give your feedback.



On Mon, Jun 14, 2010 at 12:44 PM, leena chourey <[email protected]> wrote:

> Dear Albert,
>
> Thanks for responding.
> The poppler patch related with our changes will be submitted soon to
> poppler developers for feedback.
>
> with regards
> Leena
>
>
>
> On Mon, Jun 14, 2010 at 3:02 AM, Albert Astals Cid <[email protected]> wrote:
>
>> A Divendres, 11 de juny de 2010, leena chourey va escriure:
>> > Dear all,
>> >
>> > This is in continuation to our last communication related with
>> > *Accessibility of Pdf document.* As discussed last time, we have now
>> > completed the first prototype version of the same and we are making it
>> > available for your testing and valuable comments/feedback. As mentioned
>> > last time, we convert the PDF file into HTML format and open the same
>> > automatically in Firefox.
>> >
>> > Enclosed with this email is the ReadMe file which explains the steps to
>> > download, install and run this enhancement. Please try out the same and
>> > give us your valuable feedback.
>> >
>> >
>> > We have tested it at our end by different teams and have found that it
>> > works well in most cases. Some places where we feel work is required are
>> > as follows:
>> >
>> >    - Sometimes Orca in not able to read header and footer of page
>> >    - Some character combinations are displayed as garbage like 'ft',
>> 'tt'
>> >    - Content in table format is spoken by orca but it does not say the
>> row
>> >    and column reference. Means a user can not find out that orca is
>> reading
>> >    table content. (as orca does table reading in openoffice writer)
>> >
>> > We are working on these. Waiting to hear more from all.
>>
>> Quick question, do you have any patches on top of the poppler you use?
>>
>> Albert
>>
>> >
>> >
>> >
>> >
>> > With regards
>> > Leena C
>> _______________________________________________
>> poppler mailing list
>> [email protected]
>> http://lists.freedesktop.org/mailman/listinfo/poppler
>>
>
>
>
> --
> Leena C
>


Thanks & Regards
-- 
Leena C
(for CDAC Mumbai team)
From da2edbd45208384a5500d87440fe87a92c2855c3 Mon Sep 17 00:00:00 2001
From: onkar <[email protected]>, leena <[email protected]>
Date: Tue, 15 Jun 2010 10:51:04 +0530
Subject: [PATCH] By CDAC Developers
 This will help to make pdftohtml more accessible and usable.
 A function similar to dumpComplex is defined to combine all pages in html format.
 This change will generate <file>-html.html file and will modify CSStyle classes according to page. 
 ---
 utils/HtmlFonts.cc     |   13 ++++--
 utils/HtmlFonts.h      |    5 +-
 utils/HtmlOutputDev.cc |  109 +++++++++++++++++++++++++++++++++++++++++++++++-
 utils/HtmlOutputDev.h  |    6 ++-
 4 files changed, 125 insertions(+), 8 deletions(-)

diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index d2cbfd5..88cbf65 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -288,12 +288,14 @@ int HtmlFontAccu::AddFont(const HtmlFont& font){
 }
 
 // get CSS font name for font #i 
-GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
+// One more parameter for page no (int j) is added in the getCSStyle function by CDAC developer Team
+GooString* HtmlFontAccu::getCSStyle(int i, int j, GooString* content){
   GooString *tmp;
   GooString *iStr=GooString::fromInt(i);
-  
+  GooString *jStr=GooString::fromInt(j);
   if (!xml) {
     tmp = new GooString("<span class=\"ft");
+    tmp->append(jStr);
     tmp->append(iStr);
     tmp->append("\">");
     tmp->append(content);
@@ -308,10 +310,11 @@ GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
 }
 
 // get CSS font definition for font #i 
-GooString* HtmlFontAccu::CSStyle(int i){
+// One more parameter for page no (int j) is added in the CSStyle function by CDAC developer Team
+GooString* HtmlFontAccu::CSStyle(int i, int j){
    GooString *tmp=new GooString();
    GooString *iStr=GooString::fromInt(i);
-
+   GooString *jStr=GooString::fromInt(j);
    GooVector<HtmlFont>::iterator g=accu->begin();
    g+=i;
    HtmlFont font=*g;
@@ -322,6 +325,7 @@ GooString* HtmlFontAccu::CSStyle(int i){
    
    if(!xml){
      tmp->append(".ft");
+     tmp->append(jStr); 	
      tmp->append(iStr);
      tmp->append("{font-size:");
      tmp->append(Size);
@@ -352,6 +356,7 @@ GooString* HtmlFontAccu::CSStyle(int i){
 
    delete fontName;
    delete colorStr;
+   delete jstr;
    delete iStr;
    delete Size;
    return tmp;
diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h
index df2b570..39c38c0 100644
--- a/utils/HtmlFonts.h
+++ b/utils/HtmlFonts.h
@@ -89,8 +89,9 @@ public:
     g+=i;  
     return g;
   } 
-  GooString* getCSStyle (int i, GooString* content);
-  GooString* CSStyle(int i);
+  // One more parameter for page no (int j) is added in the getCSStyle and CSStyle function by CDAC developer Team
+  GooString* getCSStyle (int i, int j, GooString* content);
+  GooString* CSStyle(int i,int j=0);
   int size() const {return accu->size();}
   
 };  
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 81f8b88..297e838 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -630,6 +630,109 @@ void HtmlPage::coalesce() {
 
 }
 
+
+/*
+	this function (complexHtml) will write all pages in one file *html.html
+	marks the position of the fonts that belong to current page (for noframes). Developed by CDAC developer Team
+  */
+void HtmlPage::complexHtml(FILE *file, int page){
+
+  FILE* pageFile;
+  GooString* tmp;
+  char* htmlEncoding;
+
+  if( firstPage == -1 ) firstPage = page;
+
+  if( !noframes )
+  {
+      GooString* pgNum=GooString::fromInt(page);
+      tmp = new GooString(DocName);
+      tmp->append("-html")->append(".html");
+      //printf("\ntmp=%s", tmp->getCString());
+      delete pgNum;
+
+      if (!(pageFile = fopen(tmp->getCString(), "a"))) {
+	  error(-1, "Couldn't open html file '%s'", tmp->getCString());
+	  delete tmp;
+	  return;
+      }
+      delete tmp;
+
+      fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE> %s</TITLE>\n\n",
+	      DOCTYPE, tmp->getCString());
+
+      htmlEncoding = HtmlOutputDev::mapEncodingToHtml
+	  (globalParams->getTextEncodingName());
+      fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n<br>\n", htmlEncoding);
+  }
+  else
+  {
+      pageFile = file;
+      fprintf(pageFile,"<!-- Page %d -->\n", page);
+      fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
+  }
+
+  fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n",
+	pageWidth, pageHeight);
+
+  tmp=basename(DocName);
+  //printf("filename::%s",tmp);
+  fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
+  for(int i=fontsPageMarker;i!=fonts->size();i++) {
+    GooString *fontCSStyle = fonts->CSStyle(i, page);
+    fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
+    //printf("\n fontCSStyle:: %s", fontCSStyle->getCString());
+    delete fontCSStyle;
+  }
+
+  fputs("-->\n</STYLE>\n",pageFile);
+
+  if( !noframes )
+  {
+      fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile);
+  }
+
+  if( !ignore )
+  {
+    fprintf(pageFile,
+	    "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n",
+	    pageWidth, pageHeight, tmp->getCString(),
+		(page-firstPage+1), imgExt->getCString());
+  }
+
+  delete tmp;
+
+  GooString *str, *str1 = NULL;
+  for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
+    if (tmp1->htext){
+      str=new GooString(tmp1->htext);
+      fprintf(pageFile,
+	      "<DIV style=\"position:absolute;top:%d;left:%d\">",
+	      xoutRound(tmp1->yMin),
+	      xoutRound(tmp1->xMin));
+      fputs("<nobr>",pageFile);
+      if (tmp1->fontpos!=-1){
+	str1=fonts->getCSStyle(tmp1->fontpos, page, str);
+      }
+      //printf("%s\n", str1->getCString());
+      fputs(str1->getCString(),pageFile);
+
+      delete str;
+      delete str1;
+      fputs("</nobr></DIV>\n",pageFile);
+    }
+  }
+
+  fputs("</DIV>\n", pageFile);
+
+  if( !noframes )
+  {
+      fputs("</BODY>\n</HTML>",pageFile);
+      fclose(pageFile);
+  }
+}
+
+
 void HtmlPage::dumpAsXML(FILE* f,int page){  
   fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
   fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
@@ -754,7 +857,11 @@ void HtmlPage::dump(FILE *f, int pageNum)
   if (complexMode)
   {
     if (xml) dumpAsXML(f, pageNum);
-    if (!xml) dumpComplex(f, pageNum);  
+    if (!xml) 
+	{
+        //dumpComplex(f, pageNum); // This function used to call in poppler 0.12.4
+        complexHtml(f, pageNum);   // call of dumpComplex() is replaced by call of complerHtml()which is developed by 					CDAC Developer Team
+	}   
   }
   else
   {
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index 24ccfd1..a0d1671 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -170,7 +170,11 @@ private:
   void setDocName(char* fname);
   void dumpAsXML(FILE* f,int page);
   void dumpComplex(FILE* f, int page);
-
+  void complexHtml(FILE *file, int page);//by CDAC developer team//
+  /*
+	this function (complexHtml) will write all pages in one file *html.html
+	marks the position of the fonts that belong to current page (for noframes)
+  */
   // marks the position of the fonts that belong to current page (for noframes)
   int fontsPageMarker; 
   HtmlFontAccu *fonts;
-- 
1.7.0.4

_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to