I am trying to convert a bunch of html files to markmin, but I got strange results I think html to markmin doesn't work well with unicode (utf8 my case) attached are an example
and this is my controller to do that def mmtest(): from gluon.html import markmin_serializer #markmin = TAG(html).flatten(markmin_serializer) for row in db(db.articles).select(): html = row.content markmin = TAG(html).flatten(markmin_serializer) open('d:/mmtest/'+str(row.id)+'.html', 'w').write(html) open('d:/mmtest/'+str(row.id)+'.mm', 'w').write(markmin) return "ok" Also at some point I am getting this error Traceback (most recent call last): File "D:\web2py\gluon\restricted.py", line 194, in restricted exec ccode in environment File "D:/web2py/applications/fermer/controllers/tests.py", line 79, in <module> File "D:\web2py\gluon\globals.py", line 149, in <lambda> self._caller = lambda f: f() File "D:/web2py/applications/fermer/controllers/tests.py", line 6, in mmtest markmin = TAG(html).flatten(markmin_serializer) File "D:\web2py\gluon\html.py", line 1054, in __call__ return web2pyHTMLParser(decoder.decoder(html)).tree File "D:\web2py\gluon\html.py", line 2172, in __init__ self.feed(text) File "C:\Python27\lib\HTMLParser.py", line 108, in feed self.goahead(0) File "C:\Python27\lib\HTMLParser.py", line 148, in goahead k = self.parse_starttag(i) File "C:\Python27\lib\HTMLParser.py", line 229, in parse_starttag endpos = self.check_for_whole_start_tag(i) File "C:\Python27\lib\HTMLParser.py", line 304, in check_for_whole_start_tag self.error("malformed start tag") File "C:\Python27\lib\HTMLParser.py", line 115, in error raise HTMLParseError(message, self.getpos()) HTMLParseError: malformed start tag, at line 1, column 4219
Взято из Книга рекордов ГиннесаАвгуст – пора поедания сочных арбузов. Самый большой арбуз в мире вырастила семья из Арканзас, США. Летом 2005 года они вырастили невероятный арбуз весом целых 122 кг. Это средний вес двух взрослых людей. В 2006 году этот рекорд был занесен в Книгу рекордов Гиннеса.
Источник:http://skuky.net
1.mm
Description: Binary data