Hi Gabriela, attached is a patch that reorganises the ODF world to be more like the way Word documents are processed.
I changed to the top level from operations to use an ODFGet. Which in turn uses an ODFConverter. The heart of the ODFGet function is ODFConverter *converter = ODFConverterNew(html,abstractStorage,package,idPrefix); //Get the styles data //CSSSheetRelease(converter->styleSheet); converter->styleSheet = ODFParseStyles(converter); //Convert the content.xml to an html beastie ODFTextGet(converter); char *cssText = CSSSheetCopyCSSText(converter->styleSheet); HTMLAddInternalStyleSheet(converter->html, cssText); HTML_safeIndent(converter->html->docNode,0); Which parses for styles as I did before ( so still needs some work). Then calls an edited ODFTextGet - which is much as it was. The code has just been twisted around to match the structure of the word world. Which means I can't help thinking that we could/should abstract out the common aspects of converters. It converts the headers.odt document to an html which shows the headers ok. I also attached my version of headers.odt since I changed some of the styles to try and emphasize their differences. I hope it makes sense to you and that your patch tool can digest it. -- Cheers, Ian C
diff --git a/CMakeLists.txt b/CMakeLists.txt index 655fe6b..55b7843 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,4 +63,4 @@ add_subdirectory(DocFormats) add_subdirectory(consumers/dftest/src) add_subdirectory(consumers/dfconvert/src) add_subdirectory(consumers/dfutil/src) -add_subdirectory(consumers/corinthia/src) +#add_subdirectory(consumers/corinthia/src) diff --git a/DocFormats/api/src/Operations.c b/DocFormats/api/src/Operations.c index f605ad8..5b71d50 100644 --- a/DocFormats/api/src/Operations.c +++ b/DocFormats/api/src/Operations.c @@ -21,7 +21,7 @@ #include "DFString.h" #include <DocFormats/DFStorage.h> #include "Word.h" -#include "ODFText.h" +#include "ODF.h" #include "DFHTML.h" #include "DFDOM.h" #include "DFXML.h" @@ -242,10 +242,10 @@ int DFGet(DFConcreteDocument *concrete, error); break; case DFFileFormatOdt: - htmlDoc = ODFTextGet(concrete->storage, - abstract->storage, - idPrefix, - error); + htmlDoc = ODFGet(concrete->storage, + abstract->storage, + idPrefix, + error); break; default: DFErrorFormat(error,"Unsupported file format"); @@ -308,11 +308,11 @@ int DFPut(DFConcreteDocument *concreteDoc, error); break; case DFFileFormatOdt: - ok = ODFTextPut(concreteDoc->storage, - abstractDoc->storage, - abstractDoc->htmlDoc, - idPrefix, - error); + ok = ODFPut(concreteDoc->storage, + abstractDoc->storage, + abstractDoc->htmlDoc, + idPrefix, + error); break; default: DFErrorFormat(error,"Unsupported file format"); @@ -340,7 +340,7 @@ int DFCreate(DFConcreteDocument *concreteDoc, error); break; case DFFileFormatOdt: - ok = ODFTextCreate(concreteDoc->storage, + ok = ODFCreate(concreteDoc->storage, abstractDoc->storage, abstractDoc->htmlDoc, error); diff --git a/DocFormats/filters/odf/CMakeLists.txt b/DocFormats/filters/odf/CMakeLists.txt index 521b915..72153aa 100644 --- a/DocFormats/filters/odf/CMakeLists.txt +++ b/DocFormats/filters/odf/CMakeLists.txt @@ -22,6 +22,10 @@ set(GroupSrc src/ODFManifest.h src/ODFPackage.c src/ODFPackage.h + src/ODFConverter.c + src/ODFConverter.h + src/ODFStyles.c + src/ODFStyles.h src/ODFSheet.c src/ODFSheet.h) diff --git a/DocFormats/filters/odf/src/ODF.c b/DocFormats/filters/odf/src/ODF.c index f2091de..aea9176 100644 --- a/DocFormats/filters/odf/src/ODF.c +++ b/DocFormats/filters/odf/src/ODF.c @@ -18,3 +18,49 @@ #include "DFPlatform.h" #include "ODF.h" #include "DFCommon.h" +#include "ODFConverter.h" + +#include <stdio.h> + +DFDocument *ODFGet(DFStorage *concreteStorage, DFStorage *abstractStorage, const char *idPrefix, DFError **error) +{ + int ok = 0; + DFDocument *html = NULL; + ODFPackage *package = NULL; + + + ODFConverter *conv = NULL; + + printf("ODFGet\n"); + + package = ODFPackageOpenFrom(concreteStorage, error); + if (package == NULL) + goto end; + + printf("ODFGet\n"); + html = DFDocumentNewWithRoot(HTML_HTML); + + if(ODFConverterGet(html, abstractStorage, package, idPrefix, error) == 0) + goto end; + + ok = 1; + +end: + ODFPackageRelease(package); + if (!ok) { + DFDocumentRelease(html); + return NULL; + } + return html; +} + +int ODFPut(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, const char *idPrefix, DFError **error) +{ + //TBD +} + +int ODFCreate(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, DFError **error) +{ + //TBD + +} \ No newline at end of file diff --git a/DocFormats/filters/odf/src/ODF.h b/DocFormats/filters/odf/src/ODF.h index b9afb52..e5ee24c 100644 --- a/DocFormats/filters/odf/src/ODF.h +++ b/DocFormats/filters/odf/src/ODF.h @@ -18,6 +18,14 @@ #ifndef DocFormats_ODF_h #define DocFormats_ODF_h +#include <DocFormats/DFError.h> +#include <DocFormats/DFStorage.h> + #include "ODFPackage.h" +DFDocument *ODFGet(DFStorage *concreteStorage, DFStorage *abstractStorage, const char *idPrefix, DFError **error); +int ODFPut(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, const char *idPrefix, DFError **error); +int ODFCreate(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, DFError **error); + + #endif diff --git a/DocFormats/filters/odf/src/ODFPackage.c b/DocFormats/filters/odf/src/ODFPackage.c index 3507586..e559c3c 100644 --- a/DocFormats/filters/odf/src/ODFPackage.c +++ b/DocFormats/filters/odf/src/ODFPackage.c @@ -93,9 +93,6 @@ ODFPackage *ODFPackageOpenNew(DFStorage *storage, DFError **error) ODFManifestAddEntry(package->manifest,"settings.xml","text/xml",NULL); ODFManifestAddEntry(package->manifest,"styles.xml","text/xml",NULL); - // Setup ODF objects - package->sheet = ODFSheetNew(package->stylesDoc,package->contentDoc); - return package; } @@ -106,21 +103,28 @@ ODFPackage *ODFPackageOpenFrom(DFStorage *storage, DFError **error) package->storage = DFStorageRetain(storage); // Read XML documents - if ((package->contentDoc = readDocument(package,"content.xml",error)) == NULL) + if ((package->contentDoc = readDocument(package,"content.xml",error)) == NULL) { + DFErrorFormat(error,"Unable to read content.xml"); goto end; - if ((package->metaDoc = readDocument(package,"meta.xml",error)) == NULL) + } + if ((package->metaDoc = readDocument(package,"meta.xml",error)) == NULL) { + DFErrorFormat(error,"Unable to read meta.xml"); goto end; - if ((package->settingsDoc = readDocument(package,"settings.xml",error)) == NULL) + } + if ((package->settingsDoc = readDocument(package,"settings.xml",error)) == NULL) { + DFErrorFormat(error,"Unable to read settings.xml"); goto end; - if ((package->stylesDoc = readDocument(package,"styles.xml",error)) == NULL) + } + if ((package->stylesDoc = readDocument(package,"styles.xml",error)) == NULL) { + DFErrorFormat(error,"Unable to read styles.xml"); goto end; + } // Read manifest - if ((package->manifest = readManifest(package,error)) == NULL) + if ((package->manifest = readManifest(package,error)) == NULL) { + DFErrorFormat(error,"Unable to read manifest.xml"); goto end; - - // Setup ODF objects - package->sheet = ODFSheetNew(package->stylesDoc,package->contentDoc); + } return package; @@ -143,7 +147,6 @@ void ODFPackageRelease(ODFPackage *package) DFStorageRelease(package->storage); ODFManifestRelease(package->manifest); - ODFSheetRelease(package->sheet); DFDocumentRelease(package->contentDoc); DFDocumentRelease(package->metaDoc); DFDocumentRelease(package->settingsDoc); diff --git a/DocFormats/filters/odf/src/ODFPackage.h b/DocFormats/filters/odf/src/ODFPackage.h index e56ebc2..3db01b6 100644 --- a/DocFormats/filters/odf/src/ODFPackage.h +++ b/DocFormats/filters/odf/src/ODFPackage.h @@ -22,7 +22,6 @@ #include <DocFormats/DFError.h> #include <DocFormats/DFStorage.h> #include "ODFManifest.h" -#include "ODFSheet.h" typedef struct ODFPackage ODFPackage; @@ -34,7 +33,7 @@ struct ODFPackage { DFDocument *settingsDoc; DFDocument *stylesDoc; ODFManifest *manifest; - ODFSheet *sheet; + }; ODFPackage *ODFPackageOpenNew(DFStorage *storage, DFError **error); diff --git a/DocFormats/filters/odf/src/ODFSheet.c b/DocFormats/filters/odf/src/ODFSheet.c index c2ea092..609bca5 100644 --- a/DocFormats/filters/odf/src/ODFSheet.c +++ b/DocFormats/filters/odf/src/ODFSheet.c @@ -24,35 +24,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// // // -// ODFStyle // -// // -//////////////////////////////////////////////////////////////////////////////////////////////////// - -ODFStyle *ODFStyleNew() -{ - ODFStyle *style = (ODFStyle *)xcalloc(1,sizeof(ODFStyle)); - style->retainCount = 1; - return style; -} - -ODFStyle *ODFStyleRetain(ODFStyle *style) -{ - if (style != NULL) - style->retainCount++; - return style; -} - -void ODFStyleRelease(ODFStyle *style) -{ - if ((style == NULL) || (--style->retainCount > 0)) - return; - - free(style->selector); - free(style); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// // // ODFSheet // // // //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/DocFormats/filters/odf/src/ODFSheet.h b/DocFormats/filters/odf/src/ODFSheet.h index eee9697..faeb00c 100644 --- a/DocFormats/filters/odf/src/ODFSheet.h +++ b/DocFormats/filters/odf/src/ODFSheet.h @@ -20,24 +20,7 @@ #include <DocFormats/DFXMLForward.h> #include "DFTypes.h" - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// ODFStyle // -// // -//////////////////////////////////////////////////////////////////////////////////////////////////// - -typedef struct ODFStyle ODFStyle; - -struct ODFStyle { - size_t retainCount; - DFNode *element; - char *selector; -}; - -ODFStyle *ODFStyleNew(); -ODFStyle *ODFStyleRetain(ODFStyle *style); -void ODFStyleRelease(ODFStyle *style); +#include "ODFStyles.h" //////////////////////////////////////////////////////////////////////////////////////////////////// // // diff --git a/DocFormats/filters/odf/src/text/ODFText.c b/DocFormats/filters/odf/src/text/ODFText.c index 4ef0068..ebfb531 100644 --- a/DocFormats/filters/odf/src/text/ODFText.c +++ b/DocFormats/filters/odf/src/text/ODFText.c @@ -21,6 +21,9 @@ #include "ODFPackage.h" #include "ODFTextConverter.h" #include "DFDOM.h" +#include "DFHTML.h" +#include "DFHTMLNormalization.h" +#include "CSS.h" #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -36,13 +39,13 @@ typedef struct { } ODFPutData; // I'm not sure what ODFTextConverter ise used here for. -static void traverseContent(ODFTextConverter *conv, DFNode *odfNode, DFNode *htmlNode) +static void traverseContent(ODFConverter *conv, DFNode *odfNode, DFNode *htmlNode) { for (DFNode *odfChild = odfNode->first; odfChild != NULL; odfChild = odfChild->next) { if (odfChild->tag == DOM_TEXT) { // we have some text or a text modfier here. // DFNode *check = - DFCreateChildTextNode(htmlNode, odfChild->value); +// DFCreateChildTextNode(htmlNode, odfChild->value); printf(YELLOW "DOM_TEXT: %s \n" RESET, odfChild->value ); @@ -70,8 +73,33 @@ static void traverseContent(ODFTextConverter *conv, DFNode *odfNode, DFNode *htm // DFNode *newChild = DFCreateChildElement(htmlNode, newTag); } else { - DFCreateChildElement(htmlNode, newTag); - } + //what do we have here + + DFNode *node = NULL; + const char * styleName = DFGetAttribute(odfChild,TEXT_STYLE_NAME); + const char * outlevel = DFGetAttribute(odfChild,TEXT_OUTLINE_LEVEL); + if(outlevel != NULL) { + int s_val = atoi(&outlevel[strlen(outlevel)-1]) - 1; + //if (s_val >= 0 && s_val < 6) { + // HTML_H1 + s_val; + //} + node = DFCreateChildElement(htmlNode, HTML_H1 + s_val); + } else { + node = DFCreateChildElement(htmlNode, HTML_P); + } + printf("Found style name %s\n", styleName); + DFSetAttribute(node, HTML_CLASS, styleName); //DFGetAttribute(odfNode,TEXT_STYLE_NAME)); + for (DFNode *domChild = odfChild->first; domChild != NULL; domChild = domChild->next) + { + if (domChild->tag == DOM_TEXT) { // we have some text or a text modfier here. + // DFNode *check = + DFCreateChildTextNode(node, domChild->value); + printf(YELLOW "DOM_TEXT: %s \n" RESET, + domChild->value + ); + } + } + } } traverseContent(conv,odfChild,htmlNode); } @@ -81,40 +109,29 @@ static void traverseContent(ODFTextConverter *conv, DFNode *odfNode, DFNode *htm // split it up into several functions } -DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, const char *idPrefix, DFError **error) +DFDocument *ODFTextGet(ODFConverter *converter) { - int ok = 0; - DFDocument *html = NULL; - ODFPackage *package = NULL; - ODFTextConverter *conv = NULL; - DFNode *body = NULL; - - package = ODFPackageOpenFrom(concreteStorage, error); - if (package == NULL) - goto end; - - html = DFDocumentNewWithRoot(HTML_HTML); - body = DFCreateChildElement(html->root, HTML_BODY); - conv = ODFTextConverterNew(html, abstractStorage, package, idPrefix); + print_line(2); + print_line(2); + print_line(2); printf(YELLOW "============================================================\n" - "Showing ODF nodes prior to the traverseContent function\n" + "Showing ODF content nodes prior to the traverseContent function\n" "============================================================\n" RESET); - show_nodes(package->contentDoc->root); - + show_nodes(converter->package->contentDoc->root, 0); print_line(2); print_line(2); print_line(2); + // TODO: Traverse the DOM tree of package->contentDoc, adding elements to the HTML document. // contentDoc is loaded from content.xml, and represents the most important information in // the document, i.e. the text, tables, lists, etc. - traverseContent(conv, package->contentDoc->root, body); - + traverseContent(converter, converter->package->contentDoc->root, converter->body); // uncomment to see the result. (spammy!) printf(GREEN "============================================================\n" @@ -122,7 +139,7 @@ DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, c "============================================================\n" RESET); - show_nodes(body); + show_nodes(converter->body, 0); // TODO: Once this basic traversal is implemented and is capable of producing paragraphs, @@ -136,17 +153,6 @@ DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, c // // See WordConverterCreateAbstract and WordConverterGetConcrete for how this is done in the // Word filter. - - ok = 1; - - end: - ODFPackageRelease(package); - ODFTextConverterRelease(conv); - if (!ok) { - DFDocumentRelease(html); - return NULL; - } - return html; } int ODFTextPut(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, const char *idPrefix, DFError **error) diff --git a/DocFormats/filters/odf/src/text/ODFText.h b/DocFormats/filters/odf/src/text/ODFText.h index 70486ab..a69779d 100644 --- a/DocFormats/filters/odf/src/text/ODFText.h +++ b/DocFormats/filters/odf/src/text/ODFText.h @@ -22,7 +22,9 @@ #include <DocFormats/DFStorage.h> #include <DocFormats/DFXMLForward.h> -DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, const char *idPrefix, DFError **error); +#include "ODFConverter.h" + +DFDocument *ODFTextGet(ODFConverter *converter); int ODFTextPut(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, const char *idPrefix, DFError **error); int ODFTextCreate(DFStorage *concreteStorage, DFStorage *abstractStorage, DFDocument *htmlDoc, DFError **error); diff --git a/DocFormats/filters/odf/src/text/gbg_test.c b/DocFormats/filters/odf/src/text/gbg_test.c index fae6c42..79afde8 100644 --- a/DocFormats/filters/odf/src/text/gbg_test.c +++ b/DocFormats/filters/odf/src/text/gbg_test.c @@ -3,6 +3,9 @@ #include "ODFPackage.h" #include "ODFTextConverter.h" #include "DFDOM.h" + +#include "CSS.h" +#include "CSSSheet.h" #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -189,11 +192,144 @@ Tag find_HTML(DFNode *odfNode, DFNode *htmlNode) /** * Dev tool: List all the nodes following the given one. */ -void show_nodes(DFNode *odfNode) +void show_nodes(DFNode *odfNode, int level) +{ + printf("Level: %d\n",level); + level++; + print_node_info(odfNode); + for (DFNode *odfChild = odfNode->first; odfChild != NULL; odfChild = odfChild->next) { + walkChildren(odfChild, level); + } +} + +/** + * Dev tool: List all the nodes following the given one. + */ +void walkChildren(DFNode *odfNode, int level) { + printf("Level: %d\n",level); + level++; + print_node_info(odfNode); for (DFNode *odfChild = odfNode->first; odfChild != NULL; odfChild = odfChild->next) { - print_node_info(odfChild); - print_line(0); + walkChildren(odfChild, level); + } +} + +/** + * Dev tool: List all the nodes below the given one. + */ +/*void show_nodes(DFNode *odfNode, int level) +{ + DFNode *nextNode = odfNode; + do { + print_node_info(nextNode); + nextNode = DFNextNode(nextNode); + }while(nextNode != NULL); +}*/ + +//give me the styles document +void buildCSS_Styles(CSSSheet * cssSheet, DFNode *odfNode) +{ + //walk through the nodes + // go to the office:styles can we find it? + //iterate each style:style + // make a css + // dip down to get its attributes + printf("buildCSS_Styles\n"); + printf("name = %s\n", translateXMLEnumName[odfNode->tag]); + + //manually play with the functions first + + +/* CSSStyle* cssStyle = CSSSheetLookupElement(cssSheet, + "elementName", + "className", + 1, + 0); + CSSProperties * localproperties = CSSStyleRule(cssStyle); + CSSPut(localproperties,"font-weight","bold");*/ + + + + for (DFNode *odfChild = odfNode->first; odfChild != NULL; odfChild = odfChild->next) + { + if(odfChild->tag == OFFICE_STYLES) + { + printf("Processing office styles\n"); + for (DFNode *styleNode = odfChild->first; styleNode != NULL; styleNode = styleNode->next) + { + if(styleNode->tag == STYLE_STYLE) + { + for (unsigned int i = 0; i < styleNode->attrsCount; i++) + { + Tag t = styleNode->attrs[i].tag; + if(t == STYLE_NAME) + { + printf("Create CSS Properties for %s\n", styleNode->attrs[i].value); + + //if this is a heading look for the TEXT_OUTLINE_LEVEL + + // use the attrbute fetch thing... + const char* outlevel = DFGetAttribute(styleNode, STYLE_DEFAULT_OUTLINE_LEVEL); + CSSStyle* cssStyle = NULL; + if(outlevel != NULL) { + char hlevel[4] = "h"; + hlevel[1] = outlevel[0]; + hlevel[2] = 0; + + cssStyle = CSSSheetLookupElement(cssSheet, + hlevel, + styleNode->attrs[i].value, + 1, + 0); + } else { + cssStyle = CSSSheetLookupElement(cssSheet, + "div", + styleNode->attrs[i].value, + 1, + 0); + } + for (DFNode *styleInfo = styleNode->first; styleInfo != NULL; styleInfo = styleInfo->next) + { + if(styleInfo->tag == STYLE_TEXT_PROPERTIES) + { + //just looking for bolds as a first cut + for (unsigned int i = 0; i < styleInfo->attrsCount; i++) + { + Tag t = styleInfo->attrs[i].tag; + switch(t) + { + case FO_FONT_WEIGHT: + { + CSSProperties * localproperties = CSSStyleRule(cssStyle); + CSSPut(localproperties,"font-weight",styleInfo->attrs[i].value); + break; + } + case FO_FONT_SIZE: + { + CSSProperties * localproperties = CSSStyleRule(cssStyle); + CSSPut(localproperties,"font-size",styleInfo->attrs[i].value); + break; + } + case STYLE_FONT_NAME: + { + CSSProperties * localproperties = CSSStyleRule(cssStyle); + CSSPut(localproperties,"font-family",styleInfo->attrs[i].value); + break; + } + } + } + } + else if(styleInfo->tag == STYLE_PARAGRAPH_PROPERTIES) + { + //TBD + } + } + } + } + } + } + } } } diff --git a/DocFormats/filters/odf/src/text/gbg_test.h b/DocFormats/filters/odf/src/text/gbg_test.h index 636165f..3645c8e 100644 --- a/DocFormats/filters/odf/src/text/gbg_test.h +++ b/DocFormats/filters/odf/src/text/gbg_test.h @@ -3,10 +3,14 @@ #define TAG_NOT_FOUND 4040404 #define TAG_NOT_MATCHED 777777 +#include "CSS.h" +#include "CSSSheet.h" Tag find_HTML(DFNode *odfNode, DFNode *htmlNode); void print_node_info(DFNode *node); -void show_nodes(DFNode *node); +void show_nodes(DFNode *node, int level); +void walkChildren(DFNode *odfNode, int level); +void buildCSS_Styles(CSSSheet * cssSheet, DFNode *odfNode); char *node_id_info(DFNode *node); char *missing_tag_info(DFNode *node); void print_line(int style);
headers.odt
Description: application/vnd.oasis.opendocument.text