https://github.com/python/cpython/commit/fc0ec2988999be05db67186ca01ed6563ba27f9e commit: fc0ec2988999be05db67186ca01ed6563ba27f9e branch: main author: Jon Crall <erote...@gmail.com> committer: methane <songofaca...@gmail.com> date: 2025-04-18T17:39:30+09:00 summary:
gh-103997: Automatically dedent the argument to "-c" (#103998) Co-authored-by: sunmy2019 <59365878+sunmy2...@users.noreply.github.com> Co-authored-by: Kirill Podoprigora <80244920+ecli...@users.noreply.github.com> Co-authored-by: Inada Naoki <songofaca...@gmail.com> Co-authored-by: Adam Turner <9087854+aa-tur...@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com> files: A Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst M Doc/using/cmdline.rst M Doc/whatsnew/3.14.rst M Include/internal/pycore_unicodeobject.h M Lib/test/test_cmd_line.py M Modules/main.c M Objects/unicodeobject.c diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 9b5c6eb863e56d..fa7c9cddf9c6d6 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -73,6 +73,9 @@ source. .. audit-event:: cpython.run_command command cmdoption-c + .. versionchanged:: next + *command* is automatically dedented before execution. + .. option:: -m <module-name> Search :data:`sys.path` for the named module and execute its contents as diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7d469e83dc27ad..aaa4702d53df93 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -474,6 +474,12 @@ Other language changes explicitly overridden in the subclass. (Contributed by Tomasz Pytel in :gh:`132329`.) +* The command line option :option:`-c` now automatically dedents its code + argument before execution. The auto-dedentation behavior mirrors + :func:`textwrap.dedent`. + (Contributed by Jon Crall and Steven Sun in :gh:`103998`.) + + .. _whatsnew314-pep765: PEP 765: Disallow return/break/continue that exit a finally block diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 5fea3247e8f68d..c85d53b89accdb 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -247,6 +247,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Py_UCS4 *maxchar, int forward); +/* Dedent a string. + Behaviour is expected to be an exact match of `textwrap.dedent`. + Return a new reference on success, NULL with exception set on error. + */ +extern PyObject* _PyUnicode_Dedent(PyObject *unicode); + /* --- Misc functions ----------------------------------------------------- */ extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int); diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index b949b310ac0f5f..e1d1d03d4ff698 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -17,6 +17,8 @@ spawn_python, kill_python, assert_python_ok, assert_python_failure, interpreter_requires_environment ) +from textwrap import dedent + if not support.has_subprocess_support: raise unittest.SkipTest("test module requires subprocess") @@ -1051,6 +1053,88 @@ def test_int_max_str_digits(self): ) self.assertEqual(res2int(res), (6000, 6000)) + def test_cmd_dedent(self): + # test that -c auto-dedents its arguments + test_cases = [ + ( + """ + print('space-auto-dedent') + """, + "space-auto-dedent", + ), + ( + dedent( + """ + ^^^print('tab-auto-dedent') + """ + ).replace("^", "\t"), + "tab-auto-dedent", + ), + ( + dedent( + """ + ^^if 1: + ^^^^print('mixed-auto-dedent-1') + ^^print('mixed-auto-dedent-2') + """ + ).replace("^", "\t \t"), + "mixed-auto-dedent-1\nmixed-auto-dedent-2", + ), + ( + ''' + data = """$ + + this data has an empty newline above and a newline with spaces below $ + $ + """$ + if 1: $ + print(repr(data))$ + '''.replace( + "$", "" + ), + # Note: entirely blank lines are normalized to \n, even if they + # are part of a data string. This is consistent with + # textwrap.dedent behavior, but might not be intuitive. + "'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'", + ), + ( + '', + '', + ), + ( + ' \t\n\t\n \t\t\t \t\t \t\n\t\t \n\n\n\t\t\t ', + '', + ), + ] + for code, expected in test_cases: + # Run the auto-dedent case + args1 = sys.executable, '-c', code + proc1 = subprocess.run(args1, stdout=subprocess.PIPE) + self.assertEqual(proc1.returncode, 0, proc1) + output1 = proc1.stdout.strip().decode(encoding='utf-8') + + # Manually dedent beforehand, check the result is the same. + args2 = sys.executable, '-c', dedent(code) + proc2 = subprocess.run(args2, stdout=subprocess.PIPE) + self.assertEqual(proc2.returncode, 0, proc2) + output2 = proc2.stdout.strip().decode(encoding='utf-8') + + self.assertEqual(output1, output2) + self.assertEqual(output1.replace('\r\n', '\n'), expected) + + def test_cmd_dedent_failcase(self): + # Mixing tabs and spaces is not allowed + from textwrap import dedent + template = dedent( + ''' + -+if 1: + +-++ print('will fail') + ''') + code = template.replace('-', ' ').replace('+', '\t') + assert_python_failure('-c', code) + code = template.replace('-', '\t').replace('+', ' ') + assert_python_failure('-c', code) + def test_cpu_count(self): code = "import os; print(os.cpu_count(), os.process_cpu_count())" res = assert_python_ok('-X', 'cpu_count=4321', '-c', code) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst new file mode 100644 index 00000000000000..511ca8fa732fa6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst @@ -0,0 +1,4 @@ +String arguments passed to "-c" are now automatically dedented as if by +:func:`textwrap.dedent`. This allows "python -c" invocations to be indented +in shell scripts without causing indentation errors. (Patch by Jon Crall and +Steven Sun) diff --git a/Modules/main.c b/Modules/main.c index c2b7bfde2abd7c..ea1239ecc57f00 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -11,6 +11,7 @@ #include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pythonrun.h" // _PyRun_AnyFileObject() +#include "pycore_unicodeobject.h" // _PyUnicode_Dedent() /* Includes for exit_sigint() */ #include <stdio.h> // perror() @@ -244,6 +245,11 @@ pymain_run_command(wchar_t *command) return pymain_exit_err_print(); } + Py_SETREF(unicode, _PyUnicode_Dedent(unicode)); + if (unicode == NULL) { + goto error; + } + bytes = PyUnicode_AsUTF8String(unicode); Py_DECREF(unicode); if (bytes == NULL) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7c735685e89389..e01a10fc19e904 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14270,6 +14270,163 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) return Py_BuildValue("(N)", copy); } +/* +This function searchs the longest common leading whitespace +of all lines in the [src, end). +It returns the length of the common leading whitespace and sets `output` to +point to the beginning of the common leading whitespace if length > 0. +*/ +static Py_ssize_t +search_longest_common_leading_whitespace( + const char *const src, + const char *const end, + const char **output) +{ + // [_start, _start + _len) + // describes the current longest common leading whitespace + const char *_start = NULL; + Py_ssize_t _len = 0; + + for (const char *iter = src; iter < end; ++iter) { + const char *line_start = iter; + const char *leading_whitespace_end = NULL; + + // scan the whole line + while (iter < end && *iter != '\n') { + if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') { + /* `iter` points to the first non-whitespace character + in this line */ + if (iter == line_start) { + // some line has no indent, fast exit! + return 0; + } + leading_whitespace_end = iter; + } + ++iter; + } + + // if this line has all white space, skip it + if (!leading_whitespace_end) { + continue; + } + + if (!_start) { + // update the first leading whitespace + _start = line_start; + _len = leading_whitespace_end - line_start; + assert(_len > 0); + } + else { + /* We then compare with the current longest leading whitespace. + + [line_start, leading_whitespace_end) is the leading + whitespace of this line, + + [_start, _start + _len) is the leading whitespace of the + current longest leading whitespace. */ + Py_ssize_t new_len = 0; + const char *_iter = _start, *line_iter = line_start; + + while (_iter < _start + _len && line_iter < leading_whitespace_end + && *_iter == *line_iter) + { + ++_iter; + ++line_iter; + ++new_len; + } + + _len = new_len; + if (_len == 0) { + // No common things now, fast exit! + return 0; + } + } + } + + assert(_len >= 0); + if (_len > 0) { + *output = _start; + } + return _len; +} + +/* Dedent a string. + Behaviour is expected to be an exact match of `textwrap.dedent`. + Return a new reference on success, NULL with exception set on error. + */ +PyObject * +_PyUnicode_Dedent(PyObject *unicode) +{ + Py_ssize_t src_len = 0; + const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len); + if (!src) { + return NULL; + } + assert(src_len >= 0); + if (src_len == 0) { + return Py_NewRef(unicode); + } + + const char *const end = src + src_len; + + // [whitespace_start, whitespace_start + whitespace_len) + // describes the current longest common leading whitespace + const char *whitespace_start = NULL; + Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( + src, end, &whitespace_start); + + if (whitespace_len == 0) { + return Py_NewRef(unicode); + } + + // now we should trigger a dedent + char *dest = PyMem_Malloc(src_len); + if (!dest) { + PyErr_NoMemory(); + return NULL; + } + char *dest_iter = dest; + + for (const char *iter = src; iter < end; ++iter) { + const char *line_start = iter; + bool in_leading_space = true; + + // iterate over a line to find the end of a line + while (iter < end && *iter != '\n') { + if (in_leading_space && *iter != ' ' && *iter != '\t') { + in_leading_space = false; + } + ++iter; + } + + // invariant: *iter == '\n' or iter == end + bool append_newline = iter < end; + + // if this line has all white space, write '\n' and continue + if (in_leading_space && append_newline) { + *dest_iter++ = '\n'; + continue; + } + + /* copy [new_line_start + whitespace_len, iter) to buffer, then + conditionally append '\n' */ + + Py_ssize_t new_line_len = iter - line_start - whitespace_len; + assert(new_line_len >= 0); + memcpy(dest_iter, line_start + whitespace_len, new_line_len); + + dest_iter += new_line_len; + + if (append_newline) { + *dest_iter++ = '\n'; + } + } + + PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest); + PyMem_Free(dest); + return res; +} + static PyMethodDef unicode_methods[] = { UNICODE_ENCODE_METHODDEF UNICODE_REPLACE_METHODDEF _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com