[LyX GSoC/odt2lyx] Basic Python Script to parse the complete odt file and write appropriate text into a new lyx file.

Prannoy Pilligundla Wed, 09 Jul 2014 10:29:47 -0700

The branch, odt2lyx, has been created.
        at  a91895b87c5f878a7ae68254c61b1c925e9dee21 (commit)


- Log -----------------------------------------------------------------

commit a91895b87c5f878a7ae68254c61b1c925e9dee21
Author: Prannoy Pilligundla <[email protected]>
Date:   Wed Jul 9 22:56:32 2014 +0530

    Basic Python Script to parse the complete odt file and write appropriate 
text into a new lyx file.
    
    Warning:Open the generated lyx file with a text editor as of now. LyX 
syntax is not yet incorporated

diff --git a/odt2lyx/Hello-World.odt b/odt2lyx/Hello-World.odt
new file mode 100644
index 0000000..ac27677
Binary files /dev/null and b/odt2lyx/Hello-World.odt differ
diff --git a/odt2lyx/odt2lyx.lyx b/odt2lyx/odt2lyx.lyx
new file mode 100644
index 0000000..12048fd
--- /dev/null
+++ b/odt2lyx/odt2lyx.lyx
@@ -0,0 +1,19 @@
+#LyX 2.1 created this file. For more info see http://www.lyx.org/
+\begin_body
+
+start text:p   Title
+start text:span        ec-qtmr-x-x-172
+Hello World!
+end text:span  ec-qtmr-x-x-172
+
+end text:p     Title
+start text:p   author
+end text:p     author
+start text:p   date
+start text:span        ec-qtmr-x-x-120
+July 2, 2014
+end text:span  ec-qtmr-x-x-120
+
+end text:p     date
+
+\end_document
diff --git a/odt2lyx/parseodt.py b/odt2lyx/parseodt.py
new file mode 100644
index 0000000..0d7c763
--- /dev/null
+++ b/odt2lyx/parseodt.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+ 
+# import the needed modules
+import zipfile
+import xml.parsers.expat
+import codecs
+ 
+# get content xml data from OpenDocument file
+ziparchive = zipfile.ZipFile("Hello-World.odt", "r")
+xmldata = ziparchive.read("content.xml")
+ziparchive.close()
+ 
+class Element(list):
+    def __init__(self, name, attrs):
+        self.name = name
+        self.attrs = attrs
+ 
+class TreeBuilder:
+    def __init__(self):
+        self.root = Element("root", None)
+        self.path = [self.root]
+    def start_element(self, name, attrs):
+        element = Element(name, attrs)
+        self.path[-1].append(element)
+        self.path.append(element)
+    def end_element(self, name):
+        assert name == self.path[-1].name
+        self.path.pop()
+    def char_data(self, data):
+        self.path[-1].append(data)
+ 
+# create parser and parsehandler
+parser = xml.parsers.expat.ParserCreate()
+treebuilder = TreeBuilder()
+# assign the handler functions
+parser.StartElementHandler  = treebuilder.start_element
+parser.EndElementHandler    = treebuilder.end_element
+parser.CharacterDataHandler = treebuilder.char_data
+ 
+# parse the data
+parser.Parse(xmldata, True)
+
+docbody=False
+doc_content="#LyX 2.1 created this file. For more info see http://www.lyx.org/";
+def showtree(node):
+    global docbody
+    global doc_content
+    if node.name=="office:body":
+        docbody=True
+        doc_content = doc_content + u'\n\\begin_body\n\n'
+    #At the point node.name and node.attrs will be processed and appropriate 
lyx command for it will be written into the lyx file
+    if docbody and node.name!="office:body" and node.name!="office:text":
+        doc_content = doc_content + "start " + node.name + "\t" + 
node.attrs[u'text:style-name'] + "\n"
+        if len(node)==0:
+            doc_content = doc_content + "end " + node.name + "\t" + 
node.attrs[u'text:style-name'] + "\n"
+    for e in node:
+        if isinstance(e, Element):
+            showtree(e)
+            if node.name=="office:body": docbody=False
+            if docbody and node.name!="office:body" and 
node.name!="office:text":
+                doc_content = doc_content + "\nend " + node.name + "\t" + 
node.attrs[u'text:style-name'] + "\n"
+        else:
+           #Here as "e"contains content, e will be written as it is in the lyx 
document
+           if docbody and node.name!="office:body" and 
node.name!="office:text":
+               doc_content = doc_content + e +  "\nend " + node.name + "\t" + 
node.attrs[u'text:style-name'] + "\n"
+
+showtree(treebuilder.root)
+lyxoutput = codecs.open("odt2lyx.lyx", 'w', 'utf-8')
+doc_content = doc_content + '\n\\end_document\n'
+lyxoutput.write(doc_content)
+lyxoutput.close()
\ No newline at end of file

commit cdbeea666d2bedc4ab61ed8c480dcbd189684809
Author: Prannoy Pilligundla <[email protected]>
Date:   Mon Jun 30 23:36:27 2014 +0530

    Added Python script to inject meta data into the tex file
    
    Run this script after lyx to latex conversion and before calling mk4ht

diff --git a/tests/parselyx.py b/tests/parselyx.py
new file mode 100644
index 0000000..f41ad07
--- /dev/null
+++ b/tests/parselyx.py
@@ -0,0 +1,43 @@
+import re
+lyxf = open("LyX-Word-roundtrip-Mathematical-expressions.lyx", "r")
+content = lyxf.read()
+lyxf.close()
+
+lyxeqn = re.findall(r'\\begin_inset Formula(.*?)\\end_inset',content,re.DOTALL)
+lyxeqn1=set(lyxeqn)
+texeqn_old=[]
+texeqn_new=[]
+
+for i in lyxeqn1:
+    if i[1]=="\n" or i[1:].count('\n')>1:
+        if i[-2]=="\n":
+            texeqn_old.append(i[1:-2])
+            
texeqn_new.append(i[1:-2]+"\n\\begin{metadata}\n"+i[2:-2]+"\n\\end{metadata}")
+        else:
+            texeqn_old.append(i[1:-1])
+            
texeqn_new.append(i[1:-1]+"\n\\begin{metadata}\n"+i[2:-1]+"\n\\end{metadata}")
+    else:
+        texeqn_old.append(i[1:-1])
+        if i[-2]=="\n":
+            texeqn_new.append(i[1:-2]+" \\inlinemeta{\\verb|"+i[1:-2]+"|}")
+        else:
+            texeqn_new.append(i[1:-1]+" \\inlinemeta{\\verb|"+i[1:-1]+"|}")
+    
+    
+texf = open("LyX-Word-roundtrip-Mathematical-expressions.tex", "r")
+content = texf.read()
+
+flag=1
+for i,j in zip(texeqn_old,texeqn_new):
+     eqncount=content.count(i)
+     for k in range(eqncount):
+         if ((content.split(i)[:1])[0][-1] in {'{','('} or 
(content.split(i)[:1])[0][-2:] in {': '} or (content.split(i)[1:])[0][0] in 
{'}',')'} or (content.split(i)[1:])[0][:2] in {' &'}):
+             flag=0
+     if flag:
+         content = content.replace(i,j)
+     flag=1
+texf.close()
+    
+texf1 = open("metatest.tex", "w")
+texf1.writelines(content)
+texf1.close()
\ No newline at end of file

-----------------------------------------------------------------------


hooks/post-receive
-- 
Repositories for GSOC work

[LyX GSoC/odt2lyx] Basic Python Script to parse the complete odt file and write appropriate text into a new lyx file.

Reply via email to