The branch, odt2lyx, has been updated. - Log -----------------------------------------------------------------
commit 822903a652ea88bf464eaee5aa06598b82c9ea98 Author: Prannoy Pilligundla <[email protected]> Date: Sat Jul 12 18:16:02 2014 +0530 Import working for Sectioning Document, header is yet to be taken up for conversion. Dummy header is in place to be able to open the file in LyX. Afer running the python script open odt2lyx.lyx to see the imported LyX file diff --git a/odt2lyx/LyX-Word-roundtrip-Sectioning.odt b/odt2lyx/LyX-Word-roundtrip-Sectioning.odt new file mode 100644 index 0000000..ba555aa Binary files /dev/null and b/odt2lyx/LyX-Word-roundtrip-Sectioning.odt differ diff --git a/odt2lyx/odt2lyx.lyx b/odt2lyx/odt2lyx.lyx index 12048fd..4173e34 100644 --- a/odt2lyx/odt2lyx.lyx +++ b/odt2lyx/odt2lyx.lyx @@ -1,19 +1,104 @@ -#LyX 2.1 created this file. For more info see http://www.lyx.org/ +#LyX created this file. For more info see http://www.lyx.org/ +\lyxformat 474 +\begin_document +\begin_header +\textclass memoir +\end_header \begin_body -start text:p Title -start text:span ec-qtmr-x-x-172 -Hello World! -end text:span ec-qtmr-x-x-172 -end text:p Title -start text:p author -end text:p author -start text:p date -start text:span ec-qtmr-x-x-120 -July 2, 2014 -end text:span ec-qtmr-x-x-120 +\begin_layout Chapter +Sectioning environments +\end_layout -end text:p date +\begin_layout Standard +Sectioning levels are class-dependent in LATEX. Standard classes have 5 levels: Part, +Chapter, Section, Subsection, Subsubsection, and Paragraph. The memoir class adds a +further level: Subparagraph. Koma script may add levels as well (?????). In this section we +ignore Memoirâs and Koma-Scriptâs deviation from standard LATEX. Part and +Chapter are used throughout the document and are not repeated here. We limit this +section to samples of the 4-lowest levels: Section, Subsection, Subsubsection, and +Paragraph. + +\end_layout +\begin_layout Section +Section +\end_layout + +\begin_layout Standard +Quisque ullamcorper placerat ipsum. Cras nibh. Morbi vel justo vitae lacus tincidunt +ultrices. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. In hac habitasse platea +dictumst. Integer tempus convallis augue. Etiam facilisis. Nunc elementum fermentum wisi. +Aenean placerat. Ut imperdiet, enim sed gravida sollicitudin, felis odio placerat quam, ac +pulvinar elit purus eget enim. Nunc vitae tortor. Proin tempus nibh sit amet nisl. Vivamus +quis tortor vitae risus porta vehicula. + +\end_layout + +\begin_layout Standard + + +\end_layout + +\begin_layout Subsection +Subsection +\end_layout + +\begin_layout Standard +lacus vel est. Curabitur consectetuer. Suspendisse vel felis. Ut lorem lorem, interdum eu, +tincidunt sit amet, laoreet vitae, arcu. Aenean faucibus pede eu ante. Praesent enim elit, +rutrum at, molestie non, nonummy vel, nisl. Ut lectus eros, malesuada sit amet, +fermentum eu, sodales cursus, magna. Donec eu purus. Quisque vehicula, urna sed +ultricies auctor, pede lorem egestas dui, et convallis elit erat sed nulla. Donec +luctus. Curabitur et nunc. Aliquam dolor odio, commodo pretium, ultricies non, +pharetra in, velit. Integer arcu est, nonummy in, fermentum faucibus, egestas vel, +odio. + +\end_layout + +\begin_layout Standard + + +\end_layout + +\begin_layout Subsubsection +Subsubsection +\end_layout + +\begin_layout Standard +lum turpis. Pellentesque cursus luctus mauris. Nulla malesuada porttitor diam. Donec felis +erat, congue non, volutpat at, tin- cidunt tristique, libero. Vivamus viverra fermentum felis. +Donec nonummy pellen- tesque ante. Phasellus adipiscing semper elit. Proin fermentum +massa ac quam. Sed diam turpis, molestie vitae, placerat a, molestie nec, leo. Maecenas + + + +lacinia. Nam ipsum ligula, eleifend at, accumsan nec, suscipit a, ipsum. Morbi +blandit ligula feugiat magna. Nunc eleifend consequat lorem. Sed lacinia nulla vitae +enim. Pellentesque tincidunt purus vel magna. Integer non enim. Praesent euismod +nunc eu purus. Donec bibendum quam in tellus. Nullam cursus pulvinar lectus. +Donec et mi. Nam vulputate metus eu enim. Vestibulum pellentesque felis eu +massa. + +\end_layout + +\begin_layout Paragraph +Paragraph +\end_layout + +\begin_layout Standard + + + Nam dui ligula, fringilla a, euismod sodales, sollicitudin vel, wisi. Morbi auc- tor lorem +non justo. Nam lacus libero, pretium at, lobortis vitae, ultricies et, tellus. Donec aliquet, +tortor sed accumsan bibendum, erat ligula aliquet magna, vitae ornare odio metus a mi. +Morbi ac orci et nisl hendrerit mollis. Suspendisse ut massa. Cras nec ante. Pellentesque a +nulla. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus +mus. Aliquam tincidunt urna. Nulla ullam corper vestibulum turpis. Pellentesque cursus +luctus mauris. + +\end_layout + +\end_body \end_document diff --git a/odt2lyx/parseodt.py b/odt2lyx/parseodt.py index 0d7c763..2117fcf 100644 --- a/odt2lyx/parseodt.py +++ b/odt2lyx/parseodt.py @@ -6,7 +6,7 @@ import xml.parsers.expat import codecs # get content xml data from OpenDocument file -ziparchive = zipfile.ZipFile("Hello-World.odt", "r") +ziparchive = zipfile.ZipFile("LyX-Word-roundtrip-Sectioning.odt", "r") xmldata = ziparchive.read("content.xml") ziparchive.close() @@ -38,34 +38,74 @@ parser.EndElementHandler = treebuilder.end_element parser.CharacterDataHandler = treebuilder.char_data # parse the data +xmldata = xmldata.replace("<text:s/>"," ") +xmldata = xmldata.replace("<text:line-break/>","") parser.Parse(xmldata, True) docbody=False -doc_content="#LyX 2.1 created this file. For more info see http://www.lyx.org/" +doc_content = "" +#Header is not yet processes, this is temporary to be able to open the outpt file with LyX +doc_header="""#LyX created this file. For more info see http://www.lyx.org/ +\\lyxformat 474 +\\begin_document +\\begin_header +\\textclass memoir +\\end_header""" + +headingStyles = { + 'Heading-1':'Chapter', + 'Heading-2':'Section', + 'Heading-3':'Subsection', + 'Heading-4':'Subsubsection', + 'Heading-5':'Paragraph', + 'Heading-6':'Subparagraph' + } + +paragraphStyles = { + 'Text-body':'Standard' + } + +def processParagraph(node): + e = node[0] + if isinstance(e, Element) and e.attrs[u'text:style-name'] == "paragraph-h": + return True + else: + return False def showtree(node): - global docbody - global doc_content - if node.name=="office:body": - docbody=True - doc_content = doc_content + u'\n\\begin_body\n\n' - #At the point node.name and node.attrs will be processed and appropriate lyx command for it will be written into the lyx file - if docbody and node.name!="office:body" and node.name!="office:text": - doc_content = doc_content + "start " + node.name + "\t" + node.attrs[u'text:style-name'] + "\n" - if len(node)==0: - doc_content = doc_content + "end " + node.name + "\t" + node.attrs[u'text:style-name'] + "\n" - for e in node: - if isinstance(e, Element): - showtree(e) - if node.name=="office:body": docbody=False - if docbody and node.name!="office:body" and node.name!="office:text": - doc_content = doc_content + "\nend " + node.name + "\t" + node.attrs[u'text:style-name'] + "\n" - else: - #Here as "e"contains content, e will be written as it is in the lyx document - if docbody and node.name!="office:body" and node.name!="office:text": - doc_content = doc_content + e + "\nend " + node.name + "\t" + node.attrs[u'text:style-name'] + "\n" + global docbody,doc_content,paragraphStyles,headingStyles + noSupport = False + try: + if node.name=="office:body": + docbody=True + doc_content = doc_content + u'\n\\begin_body\n\n' + #At the point node.name and node.attrs will be processed and appropriate lyx command for it will be written into the lyx file + if docbody and node.name!="office:body" and node.name!="office:text": + if node.attrs[u'text:style-name'] in headingStyles.keys(): + doc_content = doc_content + "\n\\begin_layout " + headingStyles[node.attrs[u'text:style-name']] + "\n" + del node[0] + elif node.attrs[u'text:style-name'] in paragraphStyles.keys(): + if processParagraph(node): + doc_content = doc_content + "\n\\begin_layout Paragraph\n" + node[0][0] + "\n\\end_layout\n" + del node[0] + doc_content = doc_content + "\n\\begin_layout " + paragraphStyles[node.attrs[u'text:style-name']] + "\n" + else: + noSupport=True + #doc_content = doc_content + "\n#start " + node.name + "\t" + node.attrs[u'text:style-name'] + "\n" + for e in node: + if isinstance(e, Element): + showtree(e) + if node.name=="office:body": docbody=False + else: + #Here as "e"contains content, e will be written as it is in the lyx document + if docbody and node.name!="office:body" and node.name!="office:text": + doc_content = doc_content + e + if docbody and node.name!="office:body" and node.name!="office:text" and not noSupport: + doc_content = doc_content + "\n\\end_layout\n" + except: + raise showtree(treebuilder.root) lyxoutput = codecs.open("odt2lyx.lyx", 'w', 'utf-8') -doc_content = doc_content + '\n\\end_document\n' +doc_content = doc_header + doc_content + '\n\\end_body\n\\end_document\n' lyxoutput.write(doc_content) lyxoutput.close() \ No newline at end of file ----------------------------------------------------------------------- Summary of changes: odt2lyx/LyX-Word-roundtrip-Sectioning.odt | Bin 0 -> 8601 bytes odt2lyx/odt2lyx.lyx | 111 +++++++++++++++++++++++++---- odt2lyx/parseodt.py | 86 ++++++++++++++++------ 3 files changed, 161 insertions(+), 36 deletions(-) create mode 100644 odt2lyx/LyX-Word-roundtrip-Sectioning.odt hooks/post-receive -- Repositories for GSOC work
