The branch, odt2lyx, has been created.
at a91895b87c5f878a7ae68254c61b1c925e9dee21 (commit)
- Log -----------------------------------------------------------------
commit a91895b87c5f878a7ae68254c61b1c925e9dee21
Author: Prannoy Pilligundla <[email protected]>
Date: Wed Jul 9 22:56:32 2014 +0530
Basic Python Script to parse the complete odt file and write appropriate
text into a new lyx file.
Warning:Open the generated lyx file with a text editor as of now. LyX
syntax is not yet incorporated
diff --git a/odt2lyx/Hello-World.odt b/odt2lyx/Hello-World.odt
new file mode 100644
index 0000000..ac27677
Binary files /dev/null and b/odt2lyx/Hello-World.odt differ
diff --git a/odt2lyx/odt2lyx.lyx b/odt2lyx/odt2lyx.lyx
new file mode 100644
index 0000000..12048fd
--- /dev/null
+++ b/odt2lyx/odt2lyx.lyx
@@ -0,0 +1,19 @@
+#LyX 2.1 created this file. For more info see http://www.lyx.org/
+\begin_body
+
+start text:p Title
+start text:span ec-qtmr-x-x-172
+Hello World!
+end text:span ec-qtmr-x-x-172
+
+end text:p Title
+start text:p author
+end text:p author
+start text:p date
+start text:span ec-qtmr-x-x-120
+July 2, 2014
+end text:span ec-qtmr-x-x-120
+
+end text:p date
+
+\end_document
diff --git a/odt2lyx/parseodt.py b/odt2lyx/parseodt.py
new file mode 100644
index 0000000..0d7c763
--- /dev/null
+++ b/odt2lyx/parseodt.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+# import the needed modules
+import zipfile
+import xml.parsers.expat
+import codecs
+
+# get content xml data from OpenDocument file
+ziparchive = zipfile.ZipFile("Hello-World.odt", "r")
+xmldata = ziparchive.read("content.xml")
+ziparchive.close()
+
+class Element(list):
+ def __init__(self, name, attrs):
+ self.name = name
+ self.attrs = attrs
+
+class TreeBuilder:
+ def __init__(self):
+ self.root = Element("root", None)
+ self.path = [self.root]
+ def start_element(self, name, attrs):
+ element = Element(name, attrs)
+ self.path[-1].append(element)
+ self.path.append(element)
+ def end_element(self, name):
+ assert name == self.path[-1].name
+ self.path.pop()
+ def char_data(self, data):
+ self.path[-1].append(data)
+
+# create parser and parsehandler
+parser = xml.parsers.expat.ParserCreate()
+treebuilder = TreeBuilder()
+# assign the handler functions
+parser.StartElementHandler = treebuilder.start_element
+parser.EndElementHandler = treebuilder.end_element
+parser.CharacterDataHandler = treebuilder.char_data
+
+# parse the data
+parser.Parse(xmldata, True)
+
+docbody=False
+doc_content="#LyX 2.1 created this file. For more info see http://www.lyx.org/"
+def showtree(node):
+ global docbody
+ global doc_content
+ if node.name=="office:body":
+ docbody=True
+ doc_content = doc_content + u'\n\\begin_body\n\n'
+ #At the point node.name and node.attrs will be processed and appropriate
lyx command for it will be written into the lyx file
+ if docbody and node.name!="office:body" and node.name!="office:text":
+ doc_content = doc_content + "start " + node.name + "\t" +
node.attrs[u'text:style-name'] + "\n"
+ if len(node)==0:
+ doc_content = doc_content + "end " + node.name + "\t" +
node.attrs[u'text:style-name'] + "\n"
+ for e in node:
+ if isinstance(e, Element):
+ showtree(e)
+ if node.name=="office:body": docbody=False
+ if docbody and node.name!="office:body" and
node.name!="office:text":
+ doc_content = doc_content + "\nend " + node.name + "\t" +
node.attrs[u'text:style-name'] + "\n"
+ else:
+ #Here as "e"contains content, e will be written as it is in the lyx
document
+ if docbody and node.name!="office:body" and
node.name!="office:text":
+ doc_content = doc_content + e + "\nend " + node.name + "\t" +
node.attrs[u'text:style-name'] + "\n"
+
+showtree(treebuilder.root)
+lyxoutput = codecs.open("odt2lyx.lyx", 'w', 'utf-8')
+doc_content = doc_content + '\n\\end_document\n'
+lyxoutput.write(doc_content)
+lyxoutput.close()
\ No newline at end of file
commit cdbeea666d2bedc4ab61ed8c480dcbd189684809
Author: Prannoy Pilligundla <[email protected]>
Date: Mon Jun 30 23:36:27 2014 +0530
Added Python script to inject meta data into the tex file
Run this script after lyx to latex conversion and before calling mk4ht
diff --git a/tests/parselyx.py b/tests/parselyx.py
new file mode 100644
index 0000000..f41ad07
--- /dev/null
+++ b/tests/parselyx.py
@@ -0,0 +1,43 @@
+import re
+lyxf = open("LyX-Word-roundtrip-Mathematical-expressions.lyx", "r")
+content = lyxf.read()
+lyxf.close()
+
+lyxeqn = re.findall(r'\\begin_inset Formula(.*?)\\end_inset',content,re.DOTALL)
+lyxeqn1=set(lyxeqn)
+texeqn_old=[]
+texeqn_new=[]
+
+for i in lyxeqn1:
+ if i[1]=="\n" or i[1:].count('\n')>1:
+ if i[-2]=="\n":
+ texeqn_old.append(i[1:-2])
+
texeqn_new.append(i[1:-2]+"\n\\begin{metadata}\n"+i[2:-2]+"\n\\end{metadata}")
+ else:
+ texeqn_old.append(i[1:-1])
+
texeqn_new.append(i[1:-1]+"\n\\begin{metadata}\n"+i[2:-1]+"\n\\end{metadata}")
+ else:
+ texeqn_old.append(i[1:-1])
+ if i[-2]=="\n":
+ texeqn_new.append(i[1:-2]+" \\inlinemeta{\\verb|"+i[1:-2]+"|}")
+ else:
+ texeqn_new.append(i[1:-1]+" \\inlinemeta{\\verb|"+i[1:-1]+"|}")
+
+
+texf = open("LyX-Word-roundtrip-Mathematical-expressions.tex", "r")
+content = texf.read()
+
+flag=1
+for i,j in zip(texeqn_old,texeqn_new):
+ eqncount=content.count(i)
+ for k in range(eqncount):
+ if ((content.split(i)[:1])[0][-1] in {'{','('} or
(content.split(i)[:1])[0][-2:] in {': '} or (content.split(i)[1:])[0][0] in
{'}',')'} or (content.split(i)[1:])[0][:2] in {' &'}):
+ flag=0
+ if flag:
+ content = content.replace(i,j)
+ flag=1
+texf.close()
+
+texf1 = open("metatest.tex", "w")
+texf1.writelines(content)
+texf1.close()
\ No newline at end of file
-----------------------------------------------------------------------
hooks/post-receive
--
Repositories for GSOC work