This is an automated email from the ASF dual-hosted git repository. wave pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/openwebbeans-site.git
commit b084c395697a8078f3e5d95cbd330887a0da43a5 Author: Dave Fisher <[email protected]> AuthorDate: Mon Jun 7 09:32:59 2021 -0700 fixup html content in blogs --- theme/plugins/asfdata.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/theme/plugins/asfdata.py b/theme/plugins/asfdata.py index 6095537..7b72ee2 100644 --- a/theme/plugins/asfdata.py +++ b/theme/plugins/asfdata.py @@ -24,6 +24,7 @@ import os.path import sys import random import json +import re import traceback import operator import pprint @@ -37,12 +38,18 @@ import xml.dom.minidom import pelican.plugins.signals import pelican.utils +from bs4 import BeautifulSoup ASF_DATA = { 'metadata': { }, 'debug': False, } +FIXUP_HTML = [ + (re.compile(r'<'),'<'), + (re.compile(r'>'),'>'), +] + # read the asfdata configuration in order to get data load and transformation instructions. def read_config(config_yaml): with pelican.utils.pelican_open(config_yaml) as text: @@ -362,6 +369,12 @@ def process_blog(feed, count, words, debug): content_text = '' if words: content_text = ' '.join(get_element_text(entry, 'content').split(' ')[:words]) + "..." + for regex, replace in FIXUP_HTML: + m = regex.search(content_text) + if m: + content_text = re.sub(regex, replace, content_text) + tree_soup = BeautifulSoup(content_text, 'html.parser') + content_text = tree_soup.decode(formatter='html') # we want the title and href v.append( {
