https://github.com/python/cpython/commit/de1428f8c234a8731ced99cbfe5cd6c5c719e31d
commit: de1428f8c234a8731ced99cbfe5cd6c5c719e31d
branch: main
author: Ali Hamdan <ali.hamdan....@gmail.com>
committer: encukou <encu...@gmail.com>
date: 2024-05-07T09:28:51+02:00
summary:

gh-62090: Simplify argparse usage formatting (GH-105039)

Rationale
=========

argparse performs a complex formatting of the usage for argument grouping
and for line wrapping to fit the terminal width. This formatting has been
a constant source of bugs for at least 10 years (see linked issues below)
where defensive assertion errors are triggered or brackets and paranthesis
are not properly handeled.

Problem
=======

The current implementation of argparse usage formatting relies on regular
expressions to group arguments usage only to separate them again later
with another set of regular expressions. This is a complex and error prone
approach that caused all the issues linked below. Special casing certain
argument formats has not solved the problem. The following are some of
the most common issues:
- empty `metavar`
- mutually exclusive groups with `SUPPRESS`ed arguments
- metavars with whitespace
- metavars with brackets or paranthesis

Solution
========

The following two comments summarize the solution:
- https://github.com/python/cpython/issues/82091#issuecomment-1093832187
- https://github.com/python/cpython/issues/77048#issuecomment-1093776995

Mainly, the solution is to rewrite the usage formatting to avoid the
group-then-separate approach. Instead, the usage parts are kept separate
and only joined together at the end. This allows for a much simpler
implementation that is easier to understand and maintain. It avoids the
regular expressions approach and fixes the corresponding issues.

This closes the following GitHub issues:
-  #62090
-  #62549
-  #77048
-  #82091
-  #89743
-  #96310
-  #98666

These PRs become obsolete:
-  #15372
-  #96311

files:
A Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst
M Lib/argparse.py
M Lib/test/test_argparse.py

diff --git a/Lib/argparse.py b/Lib/argparse.py
index 0dbdd67a82f391..55bf8cdd875a8d 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -328,17 +328,8 @@ def _format_usage(self, usage, actions, groups, prefix):
             if len(prefix) + len(usage) > text_width:
 
                 # break usage into wrappable parts
-                part_regexp = (
-                    r'\(.*?\)+(?=\s|$)|'
-                    r'\[.*?\]+(?=\s|$)|'
-                    r'\S+'
-                )
-                opt_usage = format(optionals, groups)
-                pos_usage = format(positionals, groups)
-                opt_parts = _re.findall(part_regexp, opt_usage)
-                pos_parts = _re.findall(part_regexp, pos_usage)
-                assert ' '.join(opt_parts) == opt_usage
-                assert ' '.join(pos_parts) == pos_usage
+                opt_parts = self._get_actions_usage_parts(optionals, groups)
+                pos_parts = self._get_actions_usage_parts(positionals, groups)
 
                 # helper for wrapping lines
                 def get_lines(parts, indent, prefix=None):
@@ -391,6 +382,9 @@ def get_lines(parts, indent, prefix=None):
         return '%s%s\n\n' % (prefix, usage)
 
     def _format_actions_usage(self, actions, groups):
+        return ' '.join(self._get_actions_usage_parts(actions, groups))
+
+    def _get_actions_usage_parts(self, actions, groups):
         # find group indices and identify actions in groups
         group_actions = set()
         inserts = {}
@@ -398,58 +392,26 @@ def _format_actions_usage(self, actions, groups):
             if not group._group_actions:
                 raise ValueError(f'empty group {group}')
 
+            if all(action.help is SUPPRESS for action in group._group_actions):
+                continue
+
             try:
                 start = actions.index(group._group_actions[0])
             except ValueError:
                 continue
             else:
-                group_action_count = len(group._group_actions)
-                end = start + group_action_count
+                end = start + len(group._group_actions)
                 if actions[start:end] == group._group_actions:
-
-                    suppressed_actions_count = 0
-                    for action in group._group_actions:
-                        group_actions.add(action)
-                        if action.help is SUPPRESS:
-                            suppressed_actions_count += 1
-
-                    exposed_actions_count = group_action_count - 
suppressed_actions_count
-                    if not exposed_actions_count:
-                        continue
-
-                    if not group.required:
-                        if start in inserts:
-                            inserts[start] += ' ['
-                        else:
-                            inserts[start] = '['
-                        if end in inserts:
-                            inserts[end] += ']'
-                        else:
-                            inserts[end] = ']'
-                    elif exposed_actions_count > 1:
-                        if start in inserts:
-                            inserts[start] += ' ('
-                        else:
-                            inserts[start] = '('
-                        if end in inserts:
-                            inserts[end] += ')'
-                        else:
-                            inserts[end] = ')'
-                    for i in range(start + 1, end):
-                        inserts[i] = '|'
+                    group_actions.update(group._group_actions)
+                    inserts[start, end] = group
 
         # collect all actions format strings
         parts = []
-        for i, action in enumerate(actions):
+        for action in actions:
 
             # suppressed arguments are marked with None
-            # remove | separators for suppressed arguments
             if action.help is SUPPRESS:
-                parts.append(None)
-                if inserts.get(i) == '|':
-                    inserts.pop(i)
-                elif inserts.get(i + 1) == '|':
-                    inserts.pop(i + 1)
+                part = None
 
             # produce all arg strings
             elif not action.option_strings:
@@ -461,9 +423,6 @@ def _format_actions_usage(self, actions, groups):
                     if part[0] == '[' and part[-1] == ']':
                         part = part[1:-1]
 
-                # add the action string to the list
-                parts.append(part)
-
             # produce the first way to invoke the option in brackets
             else:
                 option_string = action.option_strings[0]
@@ -484,26 +443,23 @@ def _format_actions_usage(self, actions, groups):
                 if not action.required and action not in group_actions:
                     part = '[%s]' % part
 
-                # add the action string to the list
-                parts.append(part)
+            # add the action string to the list
+            parts.append(part)
 
-        # insert things at the necessary indices
-        for i in sorted(inserts, reverse=True):
-            parts[i:i] = [inserts[i]]
-
-        # join all the action items with spaces
-        text = ' '.join([item for item in parts if item is not None])
-
-        # clean up separators for mutually exclusive groups
-        open = r'[\[(]'
-        close = r'[\])]'
-        text = _re.sub(r'(%s) ' % open, r'\1', text)
-        text = _re.sub(r' (%s)' % close, r'\1', text)
-        text = _re.sub(r'%s *%s' % (open, close), r'', text)
-        text = text.strip()
+        # group mutually exclusive actions
+        for start, end in sorted(inserts, reverse=True):
+            group = inserts[start, end]
+            group_parts = [item for item in parts[start:end] if item is not 
None]
+            if group.required:
+                open, close = "()" if len(group_parts) > 1 else ("", "")
+            else:
+                open, close = "[]"
+            parts[start] = open + " | ".join(group_parts) + close
+            for i in range(start + 1, end):
+                parts[i] = None
 
-        # return the text
-        return text
+        # return the usage parts
+        return [item for item in parts if item is not None]
 
     def _format_text(self, text):
         if '%(prog)' in text:
diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py
index 617b1721f3dbb1..02b499145f6c43 100644
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@@ -4255,6 +4255,140 @@ class TestHelpUsagePositionalsOnlyWrap(HelpTestCase):
     version = ''
 
 
+class TestHelpUsageMetavarsSpacesParentheses(HelpTestCase):
+    # https://github.com/python/cpython/issues/62549
+    # https://github.com/python/cpython/issues/89743
+    parser_signature = Sig(prog='PROG')
+    argument_signatures = [
+        Sig('-n1', metavar='()', help='n1'),
+        Sig('-o1', metavar='(1, 2)', help='o1'),
+        Sig('-u1', metavar=' (uu) ', help='u1'),
+        Sig('-v1', metavar='( vv )', help='v1'),
+        Sig('-w1', metavar='(w)w', help='w1'),
+        Sig('-x1', metavar='x(x)', help='x1'),
+        Sig('-y1', metavar='yy)', help='y1'),
+        Sig('-z1', metavar='(zz', help='z1'),
+        Sig('-n2', metavar='[]', help='n2'),
+        Sig('-o2', metavar='[1, 2]', help='o2'),
+        Sig('-u2', metavar=' [uu] ', help='u2'),
+        Sig('-v2', metavar='[ vv ]', help='v2'),
+        Sig('-w2', metavar='[w]w', help='w2'),
+        Sig('-x2', metavar='x[x]', help='x2'),
+        Sig('-y2', metavar='yy]', help='y2'),
+        Sig('-z2', metavar='[zz', help='z2'),
+    ]
+
+    usage = '''\
+        usage: PROG [-h] [-n1 ()] [-o1 (1, 2)] [-u1  (uu) ] [-v1 ( vv )] [-w1 
(w)w]
+                    [-x1 x(x)] [-y1 yy)] [-z1 (zz] [-n2 []] [-o2 [1, 2]] [-u2  
[uu] ]
+                    [-v2 [ vv ]] [-w2 [w]w] [-x2 x[x]] [-y2 yy]] [-z2 [zz]
+        '''
+    help = usage + '''\
+
+        options:
+          -h, --help  show this help message and exit
+          -n1 ()      n1
+          -o1 (1, 2)  o1
+          -u1  (uu)   u1
+          -v1 ( vv )  v1
+          -w1 (w)w    w1
+          -x1 x(x)    x1
+          -y1 yy)     y1
+          -z1 (zz     z1
+          -n2 []      n2
+          -o2 [1, 2]  o2
+          -u2  [uu]   u2
+          -v2 [ vv ]  v2
+          -w2 [w]w    w2
+          -x2 x[x]    x2
+          -y2 yy]     y2
+          -z2 [zz     z2
+        '''
+    version = ''
+
+
+class TestHelpUsageNoWhitespaceCrash(TestCase):
+
+    def test_all_suppressed_mutex_followed_by_long_arg(self):
+        # https://github.com/python/cpython/issues/62090
+        # https://github.com/python/cpython/issues/96310
+        parser = argparse.ArgumentParser(prog='PROG')
+        mutex = parser.add_mutually_exclusive_group()
+        mutex.add_argument('--spam', help=argparse.SUPPRESS)
+        parser.add_argument('--eggs-eggs-eggs-eggs-eggs-eggs')
+        usage = textwrap.dedent('''\
+        usage: PROG [-h]
+                    [--eggs-eggs-eggs-eggs-eggs-eggs 
EGGS_EGGS_EGGS_EGGS_EGGS_EGGS]
+        ''')
+        self.assertEqual(parser.format_usage(), usage)
+
+    def test_newline_in_metavar(self):
+        # https://github.com/python/cpython/issues/77048
+        mapping = ['123456', '12345', '12345', '123']
+        parser = argparse.ArgumentParser('11111111111111')
+        parser.add_argument('-v', '--verbose',
+                            help='verbose mode', action='store_true')
+        parser.add_argument('targets',
+                            help='installation targets',
+                            nargs='+',
+                            metavar='\n'.join(mapping))
+        usage = textwrap.dedent('''\
+        usage: 11111111111111 [-h] [-v]
+                              123456
+        12345
+        12345
+        123 [123456
+        12345
+        12345
+        123 ...]
+        ''')
+        self.assertEqual(parser.format_usage(), usage)
+
+    def test_empty_metavar_required_arg(self):
+        # https://github.com/python/cpython/issues/82091
+        parser = argparse.ArgumentParser(prog='PROG')
+        parser.add_argument('--nil', metavar='', required=True)
+        parser.add_argument('--a', metavar='A' * 70)
+        usage = (
+            'usage: PROG [-h] --nil \n'
+            '            [--a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
+            'AAAAAAAAAAAAAAAAAAAAAAA]\n'
+        )
+        self.assertEqual(parser.format_usage(), usage)
+
+    def test_all_suppressed_mutex_with_optional_nargs(self):
+        # https://github.com/python/cpython/issues/98666
+        parser = argparse.ArgumentParser(prog='PROG')
+        mutex = parser.add_mutually_exclusive_group()
+        mutex.add_argument(
+            '--param1',
+            nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS)
+        mutex.add_argument(
+            '--param2',
+            nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS)
+        usage = 'usage: PROG [-h]\n'
+        self.assertEqual(parser.format_usage(), usage)
+
+    def test_nested_mutex_groups(self):
+        parser = argparse.ArgumentParser(prog='PROG')
+        g = parser.add_mutually_exclusive_group()
+        g.add_argument("--spam")
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', DeprecationWarning)
+            gg = g.add_mutually_exclusive_group()
+        gg.add_argument("--hax")
+        gg.add_argument("--hox", help=argparse.SUPPRESS)
+        gg.add_argument("--hex")
+        g.add_argument("--eggs")
+        parser.add_argument("--num")
+
+        usage = textwrap.dedent('''\
+        usage: PROG [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS]
+                    [--num NUM]
+        ''')
+        self.assertEqual(parser.format_usage(), usage)
+
+
 class TestHelpVariableExpansion(HelpTestCase):
     """Test that variables are expanded properly in help messages"""
 
diff --git 
a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst 
b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst
new file mode 100644
index 00000000000000..c5abf7178563e8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst
@@ -0,0 +1,2 @@
+Fix assertion errors caused by whitespace in metavars or ``SUPPRESS``-ed groups
+in :mod:`argparse` by simplifying usage formatting. Patch by Ali Hamdan.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to