Hi all,I wrote a converter for NoteCase files (http://www.virtual-sky.com/). Not all features are supported but that seems to work with basic features, which are those I use.
The Python script is my first one and mainly for my own needs (yes, I'm joining the Zim fans !) but other may think it's usefull.
Script is enclosed in this e-mail, along with a sample NoteCase file (.ncd). I could post it to the Wiki (http://zim-wiki.org/wiki/doku.php?id=tips_and_tricks) if needed.
Jigho
notecase.ncd
Description: application/notecase-plain
#!/usr/bin/python # Simple script to convert NoteCase Document to a Zim notebook folder # # NoteCase reference: # http://notecase.sourceforge.net/ (Free version, discontinued) # http://www.virtual-sky.com/ (Pro version) # # Based on BeautifulSoup (you need to install it before running notecase2zim): # http://www.crummy.com/software/BeautifulSoup/ # # Adapted to my use of NoteCase and Zim => other may want to adapt it # For instance: # Color "red" in NoteCase => I use "italic" in Zim # Background Color "grey" in NoteCase => Title 3 in Zim # # Usage : # ------- # 1. Save NoteCase document to .ncd format (plain text, no compression) # 2. This script assumes the name is "notecase.ncd". This can be changed below # 3. Run: python notecase2zim.py # 4. Get a Folder named "notecase.zim" with the main file "notebook.zim" inside # # v1.1 # Jigho 2011 # Contact: https://launchpad.net/~jigho # import os import shutil import sys import re import datetime sys.path.append('./BeautifulSoup') from BeautifulSoup import BeautifulSoup notecasefile = 'notecase.ncd' def create_file_zim(): # You may change the name and endofline mode here fileZim = open('notebook.zim', 'w') fileZim.write('[Notebook]\nname=Notes\nversion=0.4\nendofline=dos') fileZim.close() def process_title(titre, date): # Some titles are plain, but some have information that we do not use in Zim if (titre.span): m = titre.span.contents titre2 = str(m[1]) elif (titre.string): titre2 = titre.string else: m = titre.contents titre2 = str(m[1]) # Delete white space, / and " in the filename output1 = str(titre2 + '.txt').replace(' ', '_') output2 = output1.replace('/', '') output3 = output2.replace('\"', '') output = unicode(output3, 'utf-8', errors='ignore') # Some verbose, usefull on large contents # to be aware that the program is still processing... print 'Creating file: ', output fileOut = open(output, 'w') # Standard information at the start of any Zim file fileOut.write('Content-Type: text/x-zim-wiki\n') fileOut.write('Wiki-Format: zim 0.4\n') fileOut.write('Creation-Date: ' + str(date) + '\n') fileOut.write('\n====== ' + titre2 + ' ======\n') fileOut.write('\n') return fileOut def create_subdir(repertoire): rep = repertoire.name.replace('.txt', '') os.mkdir(rep) os.chdir(rep) def process_format(c, fichier, formatString): # for basic formatting tags (underline, bold, italic,...) # do the core job newLine = False # Open Wiki format fichier.write(formatString) # Another trick in case of formatted content ends with a newline # I then prefer to close the formatting tag and then write the # new line without formatting if (len(c.contents) > 1): if (c.contents[-2].__class__.__name__ == 'Tag'): if (c.contents[-2].name == 'br'): c.contents[-2].extract() c.contents[-1].extract() newLine = True # Process content (recursively !) process_content(c, fichier, formatString) # Close Wiki format fichier.write(formatString) # End of the trick for content finishing with a newline if newLine: fichier.write('\n') def process_content(contenu, fichier, currentFormat): # "currentFormat" is a trick to close the Wiki format at end of each line # even if the format is applied to multi-lines # Nota: this trick would need to be be enhanced # when multiple formats are nested for c in contenu: if (c.__class__.__name__ == 'Tag'): # <dl> tag stands for new note, ie new Zim file if c.name == 'dl': create_subdir(fichier) process_page(c) os.chdir('..') # <br> tag stands for new line # use the "currentFormat" trick to properly close format tag # and then reopen it on the the new line elif c.name == 'br': fichier.write(currentFormat) fichier.write('\n') fichier.write(currentFormat) # <u> tag stands for underline elif c.name == 'u': process_format(c, fichier, '__') # <b> tag stands for bold elif c.name == 'b': process_format(c, fichier, '**') # <i> tag stands fr italic elif c.name == 'i': process_format(c, fichier, '//') # <s> tag stands for strike-through elif c.name == 's': process_format(c, fichier, '~~') # <span> tag can have different purposes according to arguments elif c.name == 'span': # Color "red" in NoteCase => I use "italic" in Zim if (c['style'] == "color:#ff0000"): process_format(c, fichier, '//') # Color "blue" in NoteCase => I use "bold" in Zim elif (c['style'] == "color:#0000ff"): process_format(c, fichier, '**') # Color "green" in NoteCase => I use "bold" in Zim elif (c['style'] == "color:#00ff00"): process_format(c, fichier, '**') # Background Color "grey" in NoteCase => Title 3 in Zim elif (c['style'] == "background-color:#bfbfbf"): fichier.write('===== ') # Don't not use the "currentFormat" trick, # since title format is not symetrical #(which add difficulty) # and Zim seems to autoclose this format at the end of line process_content(c, fichier, currentFormat) currentFormat = '' # Other <span> contents are treated as plain text # You may add more cases according to your needs else: print "WARNING : unknown SPAN type", c.attrs process_content(c, fichier, currentFormat) # <p> tag is not taken into account elif c.name == 'p': process_content(c, fichier, currentFormat) # <a> tag stands for links elif c.name == 'a': fichier.write('[[') fichier.write(c['href'].encode('utf-8')) fichier.write('|') process_content(c, fichier, currentFormat) fichier.write(']]') # In case program encounter a Tag which is not dealt with # according to your needs, you can then add specific bloc else: print 'WARNING, unknown tag: ', c.name fichier.write( 'TAG ' + c.name + ' / ' + c.string.encode("UTF-8")) else: ligne = c.string.encode("UTF-8") # Delete the new line symbol at start of the line # This happens when there was a <br> just before # but <br> is already taken into account fichier.write(re.sub("^\n", '', ligne)) def process_page(page): creation = datetime.date.today() for a in page.contents: if (a.__class__.__name__ == 'Tag'): if a.name == 'dt': fileOut = process_title(a, creation) elif a.name == 'dd': process_content(a.contents, fileOut, '') elif (a.__class__.__name__ == 'Comment'): m = re.match("<!--property:date_created=(.*)-->$", str(a)) if (m): creation = datetime.date.fromtimestamp(float(m.group(1))) def main(repertoire): xml = open(notecasefile, 'r').read() soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.XML_ENTITIES) level0 = soup.html.body.dl os.chdir(repertoire) create_file_zim() process_page(level0) if __name__ == '__main__': zimdir = re.sub(".ncd$", ".zim", notecasefile) #shutil.rmtree(zimdir) os.mkdir(zimdir) main(zimdir)
_______________________________________________ Mailing list: https://launchpad.net/~zim-wiki Post to : zim-wiki@lists.launchpad.net Unsubscribe : https://launchpad.net/~zim-wiki More help : https://help.launchpad.net/ListHelp