This is what I have so far. It is not really a priority of mine.
Implementing tables may be beyond what I was planning to do.
--
You received this message because you are subscribed to the Google Groups
"web2py-users" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/web2py?hl=en.
import re
import cgi
import sanitizer
class Markdown:
def __init__(self):
self.pre='filu'
self.post='fila'
self.regex_cleanup=re.compile('\s*\n(\s*(\n|\r))?',re.MULTILINE)
self.actions = [
(re.compile('(?P<s>xyz\d+t)'),self.t_ignore),
(re.compile('\n(?P<s>( .*?\n)+)',re.MULTILINE),self.t_code2),
(re.compile('`\s*(?P<s>.+?)\s*`',re.MULTILINE),self.t_code),
(re.compile('((\*|\-)\s*){3}((\*|\-)\s*)*',re.MULTILINE),self.t_hr),
(re.compile('\*\*\s*(?P<s>.+?)\s*\*\*',re.MULTILINE),self.t_strong),
(re.compile('__\s*(?P<s>.+?)\s*__',re.MULTILINE),self.t_strong),
(re.compile('\*\s*(?P<s>.+?)\s*\*',re.MULTILINE),self.t_emphasize),
(re.compile('_\s*(?P<s>.+?)\s*_',re.MULTILINE),self.t_emphasize),
(re.compile('^######\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h6),
(re.compile('^#####\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h5),
(re.compile('^####\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h4),
(re.compile('^###\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h3),
(re.compile('^##\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h2),
(re.compile('^#\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h1),
(re.compile('(?P<s>.+?)`',re.MULTILINE),self.t_code),
(re.compile('!\[(?P<key>.+?)\]\s*\((?P<link>.+?)\)'),self.t_image),
(re.compile('\[(?P<key>.+?)\]\s*\[(?P<link>.*?)\]'),self.t_ref_link),
(re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\s+"(?P<title>.*?)"\s*\)'),self.t_title_link),
(re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\s+\((?P<title>.*?)\)\s*\)'),self.t_title_link),
(re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\)'),self.t_link),
(re.compile('\[(?P<key>\w+?)\]\:\s+(?P<link>\S+)(\s+"(?P<title>.*)")?'),self.t_reference),
(re.compile('\[(?P<key>\w+?)\]\:\s+(?P<link>\S+)(\s+\((?P<title>.*)\))?'),self.t_reference),
(re.compile('\n(?P<s>((\*|\-|\+)\s+.+?\n)+)',re.MULTILINE),self.t_ul),
(re.compile('\n(?P<s>(\d+\s+.+?\n)+)',re.MULTILINE),self.t_ol),
]
def sanitize(self,text):
return sanitizer.sanitize(text,permitted_tags=['a','div','img','span'])
def t_ignore(self,match):
return match.group('s')
def t_newlines(self,match):
return '\n'
def t_strong(self,match):
return '<strong>%s</strong>' % self.sanitize(match.group('s'))
def t_emphasize(self,match):
return '<em>%s</em>' % self.sanitize(match.group('s'))
def t_code(self,match):
return '<code>%s</code>' % self.sanitize(match.group('s'))
def t_code(self,match):
return '<code>%s</code>' % self.sanitize(match.group('s'))
def t_code2(self,match):
return '<pre><code>' + \
'\n'.join([x[4:] for x in match.group('s').split('\n')]) + \
'</pre></code>'
def t_ul(self,match):
return '<ul>\n' + \
'\n'.join(['<li>%s</li>' % self.sanitize(x) for x in match.group('s').strip().split('\n')]) + \
'</ul>'
def t_ol(self,match):
return '<ol>\n' + \
'\n'.join(['<li>%s</li>' % self.sanitize(x.split(' ',1)[1]) for x in match.group('s').strip().split('\n')]) + \
'</ol>'
def t_h6(self,match):
return '<h6>%s</h6>' % self.sanitize(match.group('s'))
def t_h5(self,match):
return '<h5>%s</h5>' % self.sanitize(match.group('s'))
def t_h4(self,match):
return '<h4>%s</h4>' % self.sanitize(match.group('s'))
def t_h3(self,match):
return '<h3>%s</h3>' % self.sanitize(match.group('s'))
def t_h2(self,match):
return '<h2>%s</h2>' % self.sanitize(match.group('s'))
def t_h1(self,match):
return '<h1>%s</h1>' % self.sanitize(match.group('s'))
def t_hr(self,match):
return '<hr/>'
def t_image(self,match):
return '<img src="%s" alt="%s" />' %(match.group('link'),match.group('key'))
def t_ref_link(self,match):
key=match.group('key')
link = match.group('link') or key
return '<a href="#%s">%s</a>' % (link.lower(), key)
def t_link(self,match):
return '<a href="%s">%s</a>' % (match.group('link'),match.group('key'))
def t_title_link(self,match):
return '<a href="%s" title="%s">%s</a>' % \
(match.group('link'),match.group('title'),match.group('key'))
def t_reference(self,match):
key=match.group('key')
title=match.group('title') or key
return '<a id="%s" href="%s">%s</a><br/>' % (key.lower(),match.group('link'),title)
def convert(self,text):
actions=self.actions
substitutions=[]
text = self.regex_cleanup.sub('\n',text+'\n').replace('\t',' ')
for (regex,action) in actions:
next=0
while True:
match=regex.search(text,next)
if not match: break
k=len(substitutions)
substitutions.append(action(match))
key=self.pre+str(k)+self.post
text=text[:match.start()]+key+text[match.end():]
next=match.start()+len(key)
text = self.sanitize(text)
for k in range(len(substitutions)-1,-1,-1):
key=self.pre+str(k)+self.post
text=text.replace(key,substitutions[k])
return text
print Markdown().convert("""
# This
## is
### a test
***
This is a **strong** *emphasized* piece of `code` <div>in a div</div>
and invalid < code.
and this [is a link](http://www.google.com) to something
and this [is a link](http://www.google.com "with title") to something
Here is an image ![image] (http://www.google.com)
This if code:
for i in range(10):
print(i)
here is a referene to [Google][1] and to [Yahoo][]
here is a list
- dog
- cat
- mouse
and another
1 dog
2 cat
3 mouse
This mess up with this: xyz2t
- - -
[1]: http://www.google.com
"google"
[Yahoo]: http://www.yahoo.com
"Yahoo"
""")