jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/924492 )
Change subject: [IMPR] Use hashlib.file_digest with Python 3.11 ...................................................................... [IMPR] Use hashlib.file_digest with Python 3.11 hashlib.file_digest was introduced with Pyton 3.11. Use this function in tools.compute_file_hash() function if no bytes_to_read is given. - refactor compute_file_hash - enable a hash constructor or a callable to be used with compute_file_hash like in hashlib.file_digest() - update documentation - add some tests Change-Id: I9d58150c67123e619f15c8c502aaaaf2abe78ed8 --- M pywikibot/tools/__init__.py M tests/tools_tests.py 2 files changed, 80 insertions(+), 38 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 25f2395..ae9e715 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -19,7 +19,7 @@ from functools import total_ordering, wraps from importlib import import_module from types import TracebackType -from typing import Any, Optional, Type +from typing import Any, Optional, Type, Union from warnings import catch_warnings, showwarning, warn import pkg_resources @@ -721,40 +721,48 @@ warn(warn_str.format(filename, st_mode - stat.S_IFREG, mode)) -def compute_file_hash(filename: str, sha: str = 'sha1', bytes_to_read=None): +def compute_file_hash(filename: Union[str, os.PathLike], + sha: Union[str, Callable[[], Any]] = 'sha1', + bytes_to_read: Optional[int] = None) -> str: """Compute file hash. Result is expressed as hexdigest(). .. versionadded:: 3.0 + .. versionchanged:: 8.2 + *sha* may be also a hash constructor, or a callable that returns + a hash object. + :param filename: filename path - :param sha: hashing function among the following in hashlib: - md5(), sha1(), sha224(), sha256(), sha384(), and sha512() - function name shall be passed as string, e.g. 'sha1'. - :param bytes_to_read: only the first bytes_to_read will be considered; - if file size is smaller, the whole file will be considered. - :type bytes_to_read: None or int - + :param sha: hash algorithm available with hashlib: ``sha1()``, + ``sha224()``, ``sha256()``, ``sha384()``, ``sha512()``, + ``blake2b()``, and ``blake2s()``. Additional algorithms like + ``md5()``, ``sha3_224()``, ``sha3_256()``, ``sha3_384()``, + ``sha3_512()``, ``shake_128()`` and ``shake_256()`` may also be + available. *sha* must either be a hash algorithm name as a str + like ``'sha1'`` (default), a hash constructor like + ``hashlib.sha1``, or a callable that returns a hash object like + ``lambda: hashlib.sha1()``. + :param bytes_to_read: only the first bytes_to_read will be + considered; if file size is smaller, the whole file will be + considered. """ - size = os.path.getsize(filename) - if bytes_to_read is None: - bytes_to_read = size - else: - bytes_to_read = min(bytes_to_read, size) - step = 1 << 20 - - shas = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'] - assert sha in shas - sha = getattr(hashlib, sha)() # sha instance - with open(filename, 'rb') as f: - while bytes_to_read > 0: - read_bytes = f.read(min(bytes_to_read, step)) - assert read_bytes # make sure we actually read bytes - bytes_to_read -= len(read_bytes) - sha.update(read_bytes) - return sha.hexdigest() + if PYTHON_VERSION < (3, 11) or bytes_to_read is not None: + digest = sha() if callable(sha) else hashlib.new(sha) + size = os.path.getsize(filename) + bytes_to_read = min(bytes_to_read or size, size) + step = 1 << 20 + while bytes_to_read > 0: + read_bytes = f.read(min(bytes_to_read, step)) + assert read_bytes # make sure we actually read bytes + bytes_to_read -= len(read_bytes) + digest.update(read_bytes) + else: + digest = hashlib.file_digest(f, sha) + + return digest.hexdigest() def cached(*arg: Callable) -> Any: diff --git a/tests/tools_tests.py b/tests/tools_tests.py index 2084d77..15471ac 100755 --- a/tests/tools_tests.py +++ b/tests/tools_tests.py @@ -5,6 +5,7 @@ # # Distributed under the terms of the MIT license. import decimal +import hashlib import os import subprocess import tempfile @@ -12,6 +13,7 @@ from collections import Counter, OrderedDict from collections.abc import Mapping from contextlib import suppress +from functools import partial from unittest import mock from pywikibot import config, tools @@ -599,35 +601,49 @@ self.chmod.assert_called_once_with(self.file, 0o600) +def hash_func(digest): + """Function who gives a hashlib function.""" + return hashlib.new(digest) + + class TestFileShaCalculator(TestCase): r"""Test calculator of sha of a file. There are two possible hash values for each test. The second one is for files with Windows line endings (\r\n). - """ net = False filename = join_xml_data_path('article-pear-0.10.xml') + md5_tests = { + 'str': 'md5', + 'hash': hashlib.md5, + 'function': partial(hash_func, 'md5') + } + def test_md5_complete_calculation(self): """Test md5 of complete file.""" - res = tools.compute_file_hash(self.filename, sha='md5') - self.assertIn(res, ( - '5d7265e290e6733e1e2020630262a6f3', - '2c941f2fa7e6e629d165708eb02b67f7', - )) + for test, sha in self.md5_tests.items(): + with self.subTest(test=test): + res = tools.compute_file_hash(self.filename, sha=sha) + self.assertIn(res, ( + '5d7265e290e6733e1e2020630262a6f3', + '2c941f2fa7e6e629d165708eb02b67f7', + )) def test_md5_partial_calculation(self): """Test md5 of partial file (1024 bytes).""" - res = tools.compute_file_hash(self.filename, sha='md5', - bytes_to_read=1024) - self.assertIn(res, ( - 'edf6e1accead082b6b831a0a600704bc', - 'be0227b6d490baa49e6d7e131c7f596b', - )) + for test, sha in self.md5_tests.items(): + with self.subTest(test=test): + res = tools.compute_file_hash(self.filename, sha=sha, + bytes_to_read=1024) + self.assertIn(res, ( + 'edf6e1accead082b6b831a0a600704bc', + 'be0227b6d490baa49e6d7e131c7f596b', + )) def test_sha1_complete_calculation(self): """Test sha1 of complete file.""" -- To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/924492 To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Change-Id: I9d58150c67123e619f15c8c502aaaaf2abe78ed8 Gerrit-Change-Number: 924492 Gerrit-PatchSet: 5 Gerrit-Owner: Xqt <i...@gno.de> Gerrit-Reviewer: Xqt <i...@gno.de> Gerrit-Reviewer: jenkins-bot Gerrit-MessageType: merged
_______________________________________________ Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org