Dear developers, The poppler patch is attached related with "accessibility of pdf document", please check it and give your feedback.
On Mon, Jun 14, 2010 at 12:44 PM, leena chourey <[email protected]> wrote: > Dear Albert, > > Thanks for responding. > The poppler patch related with our changes will be submitted soon to > poppler developers for feedback. > > with regards > Leena > > > > On Mon, Jun 14, 2010 at 3:02 AM, Albert Astals Cid <[email protected]> wrote: > >> A Divendres, 11 de juny de 2010, leena chourey va escriure: >> > Dear all, >> > >> > This is in continuation to our last communication related with >> > *Accessibility of Pdf document.* As discussed last time, we have now >> > completed the first prototype version of the same and we are making it >> > available for your testing and valuable comments/feedback. As mentioned >> > last time, we convert the PDF file into HTML format and open the same >> > automatically in Firefox. >> > >> > Enclosed with this email is the ReadMe file which explains the steps to >> > download, install and run this enhancement. Please try out the same and >> > give us your valuable feedback. >> > >> > >> > We have tested it at our end by different teams and have found that it >> > works well in most cases. Some places where we feel work is required are >> > as follows: >> > >> > - Sometimes Orca in not able to read header and footer of page >> > - Some character combinations are displayed as garbage like 'ft', >> 'tt' >> > - Content in table format is spoken by orca but it does not say the >> row >> > and column reference. Means a user can not find out that orca is >> reading >> > table content. (as orca does table reading in openoffice writer) >> > >> > We are working on these. Waiting to hear more from all. >> >> Quick question, do you have any patches on top of the poppler you use? >> >> Albert >> >> > >> > >> > >> > >> > With regards >> > Leena C >> _______________________________________________ >> poppler mailing list >> [email protected] >> http://lists.freedesktop.org/mailman/listinfo/poppler >> > > > > -- > Leena C > Thanks & Regards -- Leena C (for CDAC Mumbai team)
From da2edbd45208384a5500d87440fe87a92c2855c3 Mon Sep 17 00:00:00 2001 From: onkar <[email protected]>, leena <[email protected]> Date: Tue, 15 Jun 2010 10:51:04 +0530 Subject: [PATCH] By CDAC Developers This will help to make pdftohtml more accessible and usable. A function similar to dumpComplex is defined to combine all pages in html format. This change will generate <file>-html.html file and will modify CSStyle classes according to page. --- utils/HtmlFonts.cc | 13 ++++-- utils/HtmlFonts.h | 5 +- utils/HtmlOutputDev.cc | 109 +++++++++++++++++++++++++++++++++++++++++++++++- utils/HtmlOutputDev.h | 6 ++- 4 files changed, 125 insertions(+), 8 deletions(-) diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc index d2cbfd5..88cbf65 100644 --- a/utils/HtmlFonts.cc +++ b/utils/HtmlFonts.cc @@ -288,12 +288,14 @@ int HtmlFontAccu::AddFont(const HtmlFont& font){ } // get CSS font name for font #i -GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){ +// One more parameter for page no (int j) is added in the getCSStyle function by CDAC developer Team +GooString* HtmlFontAccu::getCSStyle(int i, int j, GooString* content){ GooString *tmp; GooString *iStr=GooString::fromInt(i); - + GooString *jStr=GooString::fromInt(j); if (!xml) { tmp = new GooString("<span class=\"ft"); + tmp->append(jStr); tmp->append(iStr); tmp->append("\">"); tmp->append(content); @@ -308,10 +310,11 @@ GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){ } // get CSS font definition for font #i -GooString* HtmlFontAccu::CSStyle(int i){ +// One more parameter for page no (int j) is added in the CSStyle function by CDAC developer Team +GooString* HtmlFontAccu::CSStyle(int i, int j){ GooString *tmp=new GooString(); GooString *iStr=GooString::fromInt(i); - + GooString *jStr=GooString::fromInt(j); GooVector<HtmlFont>::iterator g=accu->begin(); g+=i; HtmlFont font=*g; @@ -322,6 +325,7 @@ GooString* HtmlFontAccu::CSStyle(int i){ if(!xml){ tmp->append(".ft"); + tmp->append(jStr); tmp->append(iStr); tmp->append("{font-size:"); tmp->append(Size); @@ -352,6 +356,7 @@ GooString* HtmlFontAccu::CSStyle(int i){ delete fontName; delete colorStr; + delete jstr; delete iStr; delete Size; return tmp; diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h index df2b570..39c38c0 100644 --- a/utils/HtmlFonts.h +++ b/utils/HtmlFonts.h @@ -89,8 +89,9 @@ public: g+=i; return g; } - GooString* getCSStyle (int i, GooString* content); - GooString* CSStyle(int i); + // One more parameter for page no (int j) is added in the getCSStyle and CSStyle function by CDAC developer Team + GooString* getCSStyle (int i, int j, GooString* content); + GooString* CSStyle(int i,int j=0); int size() const {return accu->size();} }; diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index 81f8b88..297e838 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -630,6 +630,109 @@ void HtmlPage::coalesce() { } + +/* + this function (complexHtml) will write all pages in one file *html.html + marks the position of the fonts that belong to current page (for noframes). Developed by CDAC developer Team + */ +void HtmlPage::complexHtml(FILE *file, int page){ + + FILE* pageFile; + GooString* tmp; + char* htmlEncoding; + + if( firstPage == -1 ) firstPage = page; + + if( !noframes ) + { + GooString* pgNum=GooString::fromInt(page); + tmp = new GooString(DocName); + tmp->append("-html")->append(".html"); + //printf("\ntmp=%s", tmp->getCString()); + delete pgNum; + + if (!(pageFile = fopen(tmp->getCString(), "a"))) { + error(-1, "Couldn't open html file '%s'", tmp->getCString()); + delete tmp; + return; + } + delete tmp; + + fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE> %s</TITLE>\n\n", + DOCTYPE, tmp->getCString()); + + htmlEncoding = HtmlOutputDev::mapEncodingToHtml + (globalParams->getTextEncodingName()); + fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n<br>\n", htmlEncoding); + } + else + { + pageFile = file; + fprintf(pageFile,"<!-- Page %d -->\n", page); + fprintf(pageFile,"<a name=\"%d\"></a>\n", page); + } + + fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n", + pageWidth, pageHeight); + + tmp=basename(DocName); + //printf("filename::%s",tmp); + fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile); + for(int i=fontsPageMarker;i!=fonts->size();i++) { + GooString *fontCSStyle = fonts->CSStyle(i, page); + fprintf(pageFile,"\t%s\n",fontCSStyle->getCString()); + //printf("\n fontCSStyle:: %s", fontCSStyle->getCString()); + delete fontCSStyle; + } + + fputs("-->\n</STYLE>\n",pageFile); + + if( !noframes ) + { + fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); + } + + if( !ignore ) + { + fprintf(pageFile, + "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n", + pageWidth, pageHeight, tmp->getCString(), + (page-firstPage+1), imgExt->getCString()); + } + + delete tmp; + + GooString *str, *str1 = NULL; + for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){ + if (tmp1->htext){ + str=new GooString(tmp1->htext); + fprintf(pageFile, + "<DIV style=\"position:absolute;top:%d;left:%d\">", + xoutRound(tmp1->yMin), + xoutRound(tmp1->xMin)); + fputs("<nobr>",pageFile); + if (tmp1->fontpos!=-1){ + str1=fonts->getCSStyle(tmp1->fontpos, page, str); + } + //printf("%s\n", str1->getCString()); + fputs(str1->getCString(),pageFile); + + delete str; + delete str1; + fputs("</nobr></DIV>\n",pageFile); + } + } + + fputs("</DIV>\n", pageFile); + + if( !noframes ) + { + fputs("</BODY>\n</HTML>",pageFile); + fclose(pageFile); + } +} + + void HtmlPage::dumpAsXML(FILE* f,int page){ fprintf(f, "<page number=\"%d\" position=\"absolute\"", page); fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth); @@ -754,7 +857,11 @@ void HtmlPage::dump(FILE *f, int pageNum) if (complexMode) { if (xml) dumpAsXML(f, pageNum); - if (!xml) dumpComplex(f, pageNum); + if (!xml) + { + //dumpComplex(f, pageNum); // This function used to call in poppler 0.12.4 + complexHtml(f, pageNum); // call of dumpComplex() is replaced by call of complerHtml()which is developed by CDAC Developer Team + } } else { diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h index 24ccfd1..a0d1671 100644 --- a/utils/HtmlOutputDev.h +++ b/utils/HtmlOutputDev.h @@ -170,7 +170,11 @@ private: void setDocName(char* fname); void dumpAsXML(FILE* f,int page); void dumpComplex(FILE* f, int page); - + void complexHtml(FILE *file, int page);//by CDAC developer team// + /* + this function (complexHtml) will write all pages in one file *html.html + marks the position of the fonts that belong to current page (for noframes) + */ // marks the position of the fonts that belong to current page (for noframes) int fontsPageMarker; HtmlFontAccu *fonts; -- 1.7.0.4
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
