jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/924492 )

Change subject: [IMPR] Use hashlib.file_digest with Python 3.11
......................................................................

[IMPR] Use hashlib.file_digest with Python 3.11

hashlib.file_digest was introduced with Pyton 3.11. Use this function
in tools.compute_file_hash() function if no bytes_to_read is given.

- refactor compute_file_hash
- enable a hash constructor or a callable to be used with
  compute_file_hash like in hashlib.file_digest()
- update documentation
- add some tests

Change-Id: I9d58150c67123e619f15c8c502aaaaf2abe78ed8
---
M pywikibot/tools/__init__.py
M tests/tools_tests.py
2 files changed, 80 insertions(+), 38 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified




diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 25f2395..ae9e715 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -19,7 +19,7 @@
 from functools import total_ordering, wraps
 from importlib import import_module
 from types import TracebackType
-from typing import Any, Optional, Type
+from typing import Any, Optional, Type, Union
 from warnings import catch_warnings, showwarning, warn

 import pkg_resources
@@ -721,40 +721,48 @@
             warn(warn_str.format(filename, st_mode - stat.S_IFREG, mode))


-def compute_file_hash(filename: str, sha: str = 'sha1', bytes_to_read=None):
+def compute_file_hash(filename: Union[str, os.PathLike],
+                      sha: Union[str, Callable[[], Any]] = 'sha1',
+                      bytes_to_read: Optional[int] = None) -> str:
     """Compute file hash.

     Result is expressed as hexdigest().

     .. versionadded:: 3.0
+    .. versionchanged:: 8.2
+       *sha* may be  also a hash constructor, or a callable that returns
+       a hash object.
+

     :param filename: filename path
-    :param sha: hashing function among the following in hashlib:
-        md5(), sha1(), sha224(), sha256(), sha384(), and sha512()
-        function name shall be passed as string, e.g. 'sha1'.
-    :param bytes_to_read: only the first bytes_to_read will be considered;
-        if file size is smaller, the whole file will be considered.
-    :type bytes_to_read: None or int
-
+    :param sha: hash algorithm available with hashlib: ``sha1()``,
+        ``sha224()``, ``sha256()``, ``sha384()``, ``sha512()``,
+        ``blake2b()``, and ``blake2s()``. Additional algorithms like
+        ``md5()``, ``sha3_224()``, ``sha3_256()``, ``sha3_384()``,
+        ``sha3_512()``, ``shake_128()`` and ``shake_256()`` may also be
+        available. *sha* must either be a hash algorithm name as a str
+        like ``'sha1'`` (default), a hash constructor like
+        ``hashlib.sha1``, or a callable that returns a hash object like
+        ``lambda: hashlib.sha1()``.
+    :param bytes_to_read: only the first bytes_to_read will be
+        considered; if file size is smaller, the whole file will be
+        considered.
     """
-    size = os.path.getsize(filename)
-    if bytes_to_read is None:
-        bytes_to_read = size
-    else:
-        bytes_to_read = min(bytes_to_read, size)
-    step = 1 << 20
-
-    shas = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']
-    assert sha in shas
-    sha = getattr(hashlib, sha)()  # sha instance
-
     with open(filename, 'rb') as f:
-        while bytes_to_read > 0:
-            read_bytes = f.read(min(bytes_to_read, step))
-            assert read_bytes  # make sure we actually read bytes
-            bytes_to_read -= len(read_bytes)
-            sha.update(read_bytes)
-    return sha.hexdigest()
+        if PYTHON_VERSION < (3, 11) or bytes_to_read is not None:
+            digest = sha() if callable(sha) else hashlib.new(sha)
+            size = os.path.getsize(filename)
+            bytes_to_read = min(bytes_to_read or size, size)
+            step = 1 << 20
+            while bytes_to_read > 0:
+                read_bytes = f.read(min(bytes_to_read, step))
+                assert read_bytes  # make sure we actually read bytes
+                bytes_to_read -= len(read_bytes)
+                digest.update(read_bytes)
+        else:
+            digest = hashlib.file_digest(f, sha)
+
+    return digest.hexdigest()


 def cached(*arg: Callable) -> Any:
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
index 2084d77..15471ac 100755
--- a/tests/tools_tests.py
+++ b/tests/tools_tests.py
@@ -5,6 +5,7 @@
 #
 # Distributed under the terms of the MIT license.
 import decimal
+import hashlib
 import os
 import subprocess
 import tempfile
@@ -12,6 +13,7 @@
 from collections import Counter, OrderedDict
 from collections.abc import Mapping
 from contextlib import suppress
+from functools import partial
 from unittest import mock
 
 from pywikibot import config, tools
@@ -599,35 +601,49 @@
         self.chmod.assert_called_once_with(self.file, 0o600)


+def hash_func(digest):
+    """Function who gives a hashlib function."""
+    return hashlib.new(digest)
+
+
 class TestFileShaCalculator(TestCase):

     r"""Test calculator of sha of a file.

     There are two possible hash values for each test. The second one is for
     files with Windows line endings (\r\n).
-
     """

     net = False

     filename = join_xml_data_path('article-pear-0.10.xml')

+    md5_tests = {
+        'str': 'md5',
+        'hash': hashlib.md5,
+        'function': partial(hash_func, 'md5')
+    }
+
     def test_md5_complete_calculation(self):
         """Test md5 of complete file."""
-        res = tools.compute_file_hash(self.filename, sha='md5')
-        self.assertIn(res, (
-            '5d7265e290e6733e1e2020630262a6f3',
-            '2c941f2fa7e6e629d165708eb02b67f7',
-        ))
+        for test, sha in self.md5_tests.items():
+            with self.subTest(test=test):
+                res = tools.compute_file_hash(self.filename, sha=sha)
+                self.assertIn(res, (
+                    '5d7265e290e6733e1e2020630262a6f3',
+                    '2c941f2fa7e6e629d165708eb02b67f7',
+                ))

     def test_md5_partial_calculation(self):
         """Test md5 of partial file (1024 bytes)."""
-        res = tools.compute_file_hash(self.filename, sha='md5',
-                                      bytes_to_read=1024)
-        self.assertIn(res, (
-            'edf6e1accead082b6b831a0a600704bc',
-            'be0227b6d490baa49e6d7e131c7f596b',
-        ))
+        for test, sha in self.md5_tests.items():
+            with self.subTest(test=test):
+                res = tools.compute_file_hash(self.filename, sha=sha,
+                                              bytes_to_read=1024)
+                self.assertIn(res, (
+                    'edf6e1accead082b6b831a0a600704bc',
+                    'be0227b6d490baa49e6d7e131c7f596b',
+                ))

     def test_sha1_complete_calculation(self):
         """Test sha1 of complete file."""

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/924492
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9d58150c67123e619f15c8c502aaaaf2abe78ed8
Gerrit-Change-Number: 924492
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <i...@gno.de>
Gerrit-Reviewer: Xqt <i...@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org
To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org

Reply via email to