I am trying to convert a bunch of html files to markmin, but I got strange results I think html to markmin doesn't work well with unicode (utf8 my case) attached are an example
and this is my controller to do that
def mmtest():
from gluon.html import markmin_serializer
#markmin = TAG(html).flatten(markmin_serializer)
for row in db(db.articles).select():
html = row.content
markmin = TAG(html).flatten(markmin_serializer)
open('d:/mmtest/'+str(row.id)+'.html', 'w').write(html)
open('d:/mmtest/'+str(row.id)+'.mm', 'w').write(markmin)
return "ok"
Also at some point I am getting this error
Traceback (most recent call last):
File "D:\web2py\gluon\restricted.py", line 194, in restricted
exec ccode in environment
File "D:/web2py/applications/fermer/controllers/tests.py", line 79, in
<module>
File "D:\web2py\gluon\globals.py", line 149, in <lambda>
self._caller = lambda f: f()
File "D:/web2py/applications/fermer/controllers/tests.py", line 6, in
mmtest
markmin = TAG(html).flatten(markmin_serializer)
File "D:\web2py\gluon\html.py", line 1054, in __call__
return web2pyHTMLParser(decoder.decoder(html)).tree
File "D:\web2py\gluon\html.py", line 2172, in __init__
self.feed(text)
File "C:\Python27\lib\HTMLParser.py", line 108, in feed
self.goahead(0)
File "C:\Python27\lib\HTMLParser.py", line 148, in goahead
k = self.parse_starttag(i)
File "C:\Python27\lib\HTMLParser.py", line 229, in parse_starttag
endpos = self.check_for_whole_start_tag(i)
File "C:\Python27\lib\HTMLParser.py", line 304, in
check_for_whole_start_tag
self.error("malformed start tag")
File "C:\Python27\lib\HTMLParser.py", line 115, in error
raise HTMLParseError(message, self.getpos())
HTMLParseError: malformed start tag, at line 1, column 4219
Взято из Книга рекордов Гиннеса
Источник:http://skuky.net
1.mm
Description: Binary data

