This is an automated email from the ASF dual-hosted git repository. kentontaylor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/allura.git
commit 3c8b539229a239bc62a85e837e97f678bc514cc4 Author: Dave Brondsema <[email protected]> AuthorDate: Tue May 7 11:57:16 2024 -0400 use urlopen in blog external rss feed processing --- ForgeBlog/forgeblog/command/rssfeeds.py | 4 +- ForgeBlog/forgeblog/tests/test_commands.py | 92 +++++++++++++++--------------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/ForgeBlog/forgeblog/command/rssfeeds.py b/ForgeBlog/forgeblog/command/rssfeeds.py index 2e3ed3ad7..de15dd9f3 100644 --- a/ForgeBlog/forgeblog/command/rssfeeds.py +++ b/ForgeBlog/forgeblog/command/rssfeeds.py @@ -28,6 +28,7 @@ from ming.odm import session from tg import tmpl_context as c from allura import model as M +from allura.lib import helpers as h from forgeblog import model as BM from forgeblog.main import ForgeBlogApp from allura.lib import exceptions @@ -106,7 +107,8 @@ class RssFeedsCommand(base.BlogCommand): c.app = app allura_base.log.info(f"Getting {app.url} feed {feed_url}") - f = feedparser.parse(feed_url) + content = h.urlopen(feed_url).read() + f = feedparser.parse(content) if f.bozo: allura_base.log.warning(f"{app.url} feed {feed_url} errored: {f.bozo_exception}") return diff --git a/ForgeBlog/forgeblog/tests/test_commands.py b/ForgeBlog/forgeblog/tests/test_commands.py index 1420ce8b8..b6d2f7c01 100644 --- a/ForgeBlog/forgeblog/tests/test_commands.py +++ b/ForgeBlog/forgeblog/tests/test_commands.py @@ -14,12 +14,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import html from datetime import datetime, timedelta +from io import BytesIO from unittest import skipIf import pkg_resources import mock import feedparser +from mock import patch from ming.odm.odmsession import ThreadLocalODMSession @@ -37,42 +39,9 @@ def setup_module(module): setup_global_objects() -def _mock_feed(*entries): - class attrdict(dict): - - def __getattr__(self, name): - return self[name] - - feed = mock.Mock() - feed.bozo = False - feed.entries = [] - for e in entries: - _mock_feed.i += 1 - entry = attrdict( - content_type='text/plain', - title='Default Title %d' % _mock_feed.i, - subtitle='', - summary='', - link='http://example.com/', - updated=datetime.utcnow() + timedelta(days=_mock_feed.i - 100)) - entry.update(e) - entry['updated_parsed'] = entry['updated'].timetuple() - if 'content' in entry: - entry['content'] = [ - attrdict(type=entry['content_type'], value=entry['content'])] - if 'summary_detail' in entry: - entry['summary_detail'] = attrdict(entry['summary_detail']) - feed.entries.append(entry) - - return feed - - -_mock_feed.i = 0 - - @skipIf(module_not_available('html2text'), 'requires html2text') [email protected](feedparser, 'parse') -def test_pull_rss_feeds(parsefeed): +@patch('urllib.request.urlopen') +def test_pull_rss_feeds(urlopen): html_content = ( "<p>1. foo</p>\n" "\n" @@ -85,6 +54,7 @@ def test_pull_rss_feeds(parsefeed): "baz\n" "</a></p>\n" ) + html_in_feed = html.escape(html_content).encode('utf-8') rendered_html_content = "\n".join([ r"1\. foo", @@ -96,12 +66,40 @@ def test_pull_rss_feeds(parsefeed): " [link](http://example.com/)", ]) - parsefeed.return_value = _mock_feed( - dict(title='Test', subtitle='test', summary='This is a test'), - dict(content_type='text/plain', content='Test feed'), - dict(content_type='text/html', content=html_content), - dict(summary_detail=dict(type='text/html', value=html_content)), - ) + urlopen.return_value = BytesIO(b'''<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Test</title> + <updated>2003-12-13T18:30:02Z</updated> + <author><name>John Doe</name></author> + <subtitle>test</subtitle> + <summary>This is a test</summary> + + <entry> + <title>Test summary</title> + <subtitle>test</subtitle> + <link href="http://example.com/"/> + <updated>2003-12-13T18:30:02Z</updated> + <summary>This is a test</summary> + </entry> + <entry> + <title>Test content</title> + <link href="http://example.com/"/> + <updated>2003-12-13T18:30:02Z</updated> + <content>Test feed</content> + </entry> + <entry> + <title>Test html content</title> + <link href="http://example.com/"/> + <updated>2003-12-13T18:30:02Z</updated> + <content type="html">''' + html_in_feed + b'''</content> + </entry> + <entry> + <title>Test html summary</title> + <link href="http://example.com/"/> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="html">'''+ html_in_feed + b'''</summary> + </entry> + </feed>''') base_app = M.AppConfig.query.find().all()[0] tmp_app = M.AppConfig( @@ -119,16 +117,16 @@ def test_pull_rss_feeds(parsefeed): cmd = rssfeeds.RssFeedsCommand('pull-rss-feeds') cmd.run([test_config, '-a', tmp_app._id]) cmd.command() - parsefeed.assert_called_with('http://example.com/news/feed/') + urlopen.assert_called_with('http://example.com/news/feed/', timeout=None) posts = BM.BlogPost.query.find( {'app_config_id': tmp_app._id}).sort('timestamp', 1) assert posts.count() == 4 posts = posts.all() - assert posts[0].title == 'Test' + assert posts[0].title == 'Test summary' assert posts[0].text == 'This is a test [link](http://example.com/)' - assert posts[1].title == 'Default Title 2' + assert posts[1].title == 'Test content' assert posts[1].text == 'Test feed [link](http://example.com/)' - assert posts[2].title == 'Default Title 3' + assert posts[2].title == 'Test html content' assert posts[2].text == rendered_html_content - assert posts[3].title == 'Default Title 4' + assert posts[3].title == 'Test html summary' assert posts[3].text == rendered_html_content
