https://github.com/python/cpython/commit/2078eb45ca0db495972a20fcaf96df8fcf48451d
commit: 2078eb45ca0db495972a20fcaf96df8fcf48451d
branch: main
author: Ruben Vorderman <r.h.p.vorder...@lumc.nl>
committer: vstinner <vstin...@python.org>
date: 2024-06-13T16:28:59+02:00
summary:

gh-120397: Optimize str.count() for single characters (#120398)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst
M Objects/stringlib/fastsearch.h

diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst
new file mode 100644
index 00000000000000..05c55e8a45eb12
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst 
@@ -0,0 +1,2 @@
+Improve the througput by up to two times for the :meth:`str.count`, 
:meth:`bytes.count` and :meth:`bytearray.count`
+methods for counting single characters.
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 309ed1554f4699..05e700b06258f0 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -753,6 +753,22 @@ STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t 
n,
 }
 
 
+static inline Py_ssize_t
+STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
+                                  const STRINGLIB_CHAR p0)
+/* A specialized function of count_char that does not cut off at a maximum.
+   As a result, the compiler is able to vectorize the loop. */
+{
+    Py_ssize_t count = 0;
+    for (Py_ssize_t i = 0; i < n; i++) {
+        if (s[i] == p0) {
+            count++;
+        }
+    }
+    return count;
+}
+
+
 Py_LOCAL_INLINE(Py_ssize_t)
 FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
            const STRINGLIB_CHAR* p, Py_ssize_t m,
@@ -773,6 +789,9 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
         else if (mode == FAST_RSEARCH)
             return STRINGLIB(rfind_char)(s, n, p[0]);
         else {
+            if (maxcount == PY_SSIZE_T_MAX) {
+                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
+            }
             return STRINGLIB(count_char)(s, n, p[0], maxcount);
         }
     }

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to