Hello,
Don't know whether anyone has done this already, but a quick search drew a
blank. Python 3, dictionary found here: http://gcide.gnu.org.ua/download
Extract the CIDE files into a directory, then run this program in that
directory. Merge the resultant text files together;
$ cat CIDE.*.txt > gcide.txt
Import into Mnemosyne, and you're away. There are still a few rough edges -
I haven't figured out what all the diacritics and ligatures are supposed to
be yet, and there are probably other problems. Just thought I'd post here
in case anyone wants to suggest improvements (I'm very much a beginner at
Python...)
Thanks for a very interesting program!
--------------->8-------------------
import os
import glob
def clean(buff):
buff = buff.replace("\t", "<br>")
buff = buff.replace("<note>", "<note>Note:")
buff = buff.replace("<ldquo/", '\"')
buff = buff.replace("<rdquo/", '\"')
buff = buff.replace("<eacute/", 'é')
buff = buff.replace("<acir/", 'â')
buff = buff.replace("<ae/", 'ae')
buff = buff.replace("<hand/>", "")
buff = buff.replace("<hand/", "")
buff = buff.replace("<br/", "")
buff = buff.replace("(<?/)", "")
buff = buff.replace("<?/", "")
buff = buff.replace("\\'d8", "")
buff = buff.replace("<img", "")
buff = buff.replace("</img>", "")
buff = buff.replace("src='images\\", "")
buff = buff.replace("</hw>,", "</hw>")
buff = buff.replace("</pr>,", "</pr>")
buff = buff.replace("</wf>,", "</wf>")
buff = remove(buff, "<mhw>", "</mhw>")
buff = remove(buff, "<hw>", "</hw>")
buff = remove(buff, "<pr>", "</pr>")
buff = remove(buff, "<wf>", "</wf>")
buff = remove(buff, "<--", "-->")
return buff
def remove(buff, startstring, endstring):
while True:
startindex = buff.find(startstring)
endindex = buff[startindex+len(startstring):].find(endstring)
if startindex == -1 or endindex == -1:
break
else:
endindex = endindex + startindex + len(startstring)
buff = buff[:startindex] + buff[endindex + len(endstring):]
return buff
for file in glob.glob( os.path.join(os.getcwd(), 'CIDE.?')):
entry = ""
buffer = ""
source = open( file, "r", encoding='latin1' )
target = open( file + '.txt', "w" )
text = source.readlines()
count = 0
for line in text:
if line[0:8] == '<p><ent>':
break
else:
count += 1
text = text[count:]
for line in text:
if line[0:8] == '<p><ent>':
if line[8:-11] != entry:
buffer = buffer.replace('"', "'")
buffer = clean(buffer)
target.write(buffer)
target.write( '\n' )
entry = line[8:-11]
target.write( entry + '\t' )
buffer = ""
else:
buffer = buffer + line.rstrip('\n') + ' '
buffer = clean(buffer)
target.write(buffer)
source.close()
target.close()
--
You received this message because you are subscribed to the Google Groups
"mnemosyne-proj-users" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
To view this discussion on the web visit
https://groups.google.com/d/msg/mnemosyne-proj-users/-/3MlhwkkUbYoJ.
For more options, visit https://groups.google.com/groups/opt_out.