[GitHub] [airflow] ashb commented on a change in pull request #4911: [AIRFLOW-3768] Escape search parameter in pagination controls

GitBox Thu, 14 Mar 2019 02:58:38 -0700

ashb commented on a change in pull request #4911: [AIRFLOW-3768] Escape search 
parameter in pagination controls
URL: https://github.com/apache/airflow/pull/4911#discussion_r265492740


 ##########
 File path: tests/www/test_utils.py
 ##########
 @@ -45,39 +47,36 @@ def test_sensitive_variable_should_be_hidden_ic(self):
     def check_generate_pages_html(self, current_page, total_pages,
                                   window=7, check_middle=False):
         extra_links = 4  # first, prev, next, last
-        html_str = utils.generate_pages(current_page, total_pages)
+        search = "'>\"/><img src=x onerror=alert(1)>"
+        html_str = utils.generate_pages(current_page, total_pages,
+                                        search=search)
+
+        self.assertNotIn(search, html_str,
+                         "The raw search string shouldn't appear in the 
output")
 
-        # dom parser has issues with special &laquo; and &raquo;
-        html_str = html_str.replace('&laquo;', '')
-        html_str = html_str.replace('&raquo;', '')
-        dom = minidom.parseString(html_str)
+        dom = BeautifulSoup(html_str, 'html.parser')
         self.assertIsNotNone(dom)
 
-        ulist = dom.getElementsByTagName('ul')[0]
-        ulist_items = ulist.getElementsByTagName('li')
+        ulist = dom.ul
+        ulist_items = ulist.find_all('li')
         self.assertEqual(min(window, total_pages) + extra_links, 
len(ulist_items))
 
-        def get_text(nodelist):
-            rc = []
-            for node in nodelist:
-                if node.nodeType == node.TEXT_NODE:
-                    rc.append(node.data)
-            return ''.join(rc)
-
         page_items = ulist_items[2:-2]
         mid = int(len(page_items) / 2)
         for i, item in enumerate(page_items):
-            a_node = item.getElementsByTagName('a')[0]
-            href_link = a_node.getAttribute('href')
-            node_text = get_text(a_node.childNodes)
+            a_node = item.a
+            href_link = a_node['href']
+            node_text = a_node.string
             if node_text == str(current_page + 1):
                 if check_middle:
                     self.assertEqual(mid, i)
-                self.assertEqual('javascript:void(0)', 
a_node.getAttribute('href'))
-                self.assertIn('active', item.getAttribute('class'))
+                self.assertEqual('javascript:void(0)', href_link)
+                self.assertIn('active', item['class'])
             else:
-                link_str = '?page=' + str(int(node_text) - 1)
-                self.assertEqual(link_str, href_link)
+                self.assertRegex(href_link, r'^\?', 'Link is page-relative')
 
 Review comment:
   Oh whoops. The other way we've dealt with this in the past is to use `import 
unittest2 as unittest` (which is a py2-only dep already). For consistency I'll 
do that

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [airflow] ashb commented on a change in pull request #4911: [AIRFLOW-3768] Escape search parameter in pagination controls

Reply via email to