https://github.com/python/cpython/commit/06a26fda607fb1a5e108cf82a0458c8ebf97f5d2 commit: 06a26fda607fb1a5e108cf82a0458c8ebf97f5d2 branch: main author: Adam Turner <9087854+aa-tur...@users.noreply.github.com> committer: AA-Turner <9087854+aa-tur...@users.noreply.github.com> date: 2025-04-24T16:10:46+01:00 summary:
gh-118761: Optimise import time for ``shlex`` (#132036) files: A Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst M Lib/shlex.py M Lib/test/test_shlex.py diff --git a/Lib/shlex.py b/Lib/shlex.py index f4821616b62a0f..5bf6e0d70e0012 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -7,11 +7,7 @@ # iterator interface by Gustavo Niemeyer, April 2003. # changes to tokenize more like Posix shells by Vinay Sajip, July 2016. -import os -import re import sys -from collections import deque - from io import StringIO __all__ = ["shlex", "split", "quote", "join"] @@ -20,6 +16,8 @@ class shlex: "A lexical analyzer class for simple shell-like syntaxes." def __init__(self, instream=None, infile=None, posix=False, punctuation_chars=False): + from collections import deque # deferred import for performance + if isinstance(instream, str): instream = StringIO(instream) if instream is not None: @@ -278,6 +276,7 @@ def read_token(self): def sourcehook(self, newfile): "Hook called on a filename to be sourced." + import os.path if newfile[0] == '"': newfile = newfile[1:-1] # This implements cpp-like semantics for relative-path inclusion. @@ -318,13 +317,17 @@ def join(split_command): return ' '.join(quote(arg) for arg in split_command) -_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search - def quote(s): """Return a shell-escaped version of the string *s*.""" if not s: return "''" - if _find_unsafe(s) is None: + + # Use bytes.translate() for performance + safe_chars = (b'%+,-./0123456789:=@' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_' + b'abcdefghijklmnopqrstuvwxyz') + # No quoting is needed if `s` is an ASCII string consisting only of `safe_chars` + if s.isascii() and not s.encode().translate(None, delete=safe_chars): return s # use single quotes, and put single quotes into double quotes diff --git a/Lib/test/test_shlex.py b/Lib/test/test_shlex.py index 797c91ee7effdf..f35571ea88654d 100644 --- a/Lib/test/test_shlex.py +++ b/Lib/test/test_shlex.py @@ -3,6 +3,7 @@ import shlex import string import unittest +from test.support import import_helper # The original test data set was from shellwords, by Hartmut Goebel. @@ -363,6 +364,9 @@ def testPunctuationCharsReadOnly(self): with self.assertRaises(AttributeError): shlex_instance.punctuation_chars = False + def test_lazy_imports(self): + import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'}) + # Allow this test to be used with old shlex.py if not getattr(shlex, "split", None): diff --git a/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst new file mode 100644 index 00000000000000..6b4b3ed7526a8b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst @@ -0,0 +1,3 @@ +Improve import times by up to 33x for the :mod:`shlex` module, +and improve the performance of :func:`shlex.quote` by up to 12x. +Patch by Adam Turner. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com