Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-pytools for openSUSE:Factory checked in at 2024-06-07 15:04:30 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-pytools (Old) and /work/SRC/openSUSE:Factory/.python-pytools.new.24587 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-pytools" Fri Jun 7 15:04:30 2024 rev:19 rq:1179086 version:2024.1.4 Changes: -------- --- /work/SRC/openSUSE:Factory/python-pytools/python-pytools.changes 2024-04-29 08:58:32.128910890 +0200 +++ /work/SRC/openSUSE:Factory/.python-pytools.new.24587/python-pytools.changes 2024-06-07 15:04:59.475323534 +0200 @@ -1,0 +2,9 @@ +Thu Jun 6 20:03:37 UTC 2024 - Dirk Müller <[email protected]> + +- update to 2024.1.4: + * remove get_read_from_map_from_permutation, + get_write_to_map_from_permutation + * KeyBuilder: support function hashing + * PersistentDict: Use sqlite as backend storage v2 + +------------------------------------------------------------------- Old: ---- pytools-2024.1.2.tar.gz New: ---- pytools-2024.1.4.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-pytools.spec ++++++ --- /var/tmp/diff_new_pack.lcfvdg/_old 2024-06-07 15:05:01.643402517 +0200 +++ /var/tmp/diff_new_pack.lcfvdg/_new 2024-06-07 15:05:01.643402517 +0200 @@ -18,7 +18,7 @@ %{?sle15_python_module_pythons} Name: python-pytools -Version: 2024.1.2 +Version: 2024.1.4 Release: 0 Summary: A collection of tools for Python License: MIT @@ -30,6 +30,7 @@ BuildRequires: %{python_module platformdirs >= 2.2.0} BuildRequires: %{python_module pytest} BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module sqlite3} BuildRequires: %{python_module typing_extensions if %python-base < 3.11} BuildRequires: %{python_module wheel} BuildRequires: fdupes ++++++ pytools-2024.1.2.tar.gz -> pytools-2024.1.4.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/PKG-INFO new/pytools-2024.1.4/PKG-INFO --- old/pytools-2024.1.2/PKG-INFO 2024-04-24 16:58:13.700250000 +0200 +++ new/pytools-2024.1.4/PKG-INFO 2024-05-31 19:41:22.161351700 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: pytools -Version: 2024.1.2 +Version: 2024.1.4 Summary: A collection of tools for Python Home-page: http://pypi.python.org/pypi/pytools Author: Andreas Kloeckner diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/pytools/__init__.py new/pytools-2024.1.4/pytools/__init__.py --- old/pytools-2024.1.2/pytools/__init__.py 2024-03-20 19:39:03.000000000 +0100 +++ new/pytools-2024.1.4/pytools/__init__.py 2024-05-30 19:19:07.000000000 +0200 @@ -189,6 +189,7 @@ ---------------- .. autofunction:: strtobool +.. autofunction:: to_identifier Sequence utilities ------------------ @@ -448,7 +449,7 @@ return "{}({})".format( self.__class__.__name__, ", ".join(f"{fld}={getattr(self, fld)!r}" - for fld in self.__class__.fields + for fld in sorted(self.__class__.fields) if hasattr(self, fld))) def register_fields(self, new_fields): @@ -1524,68 +1525,6 @@ # }}} -# {{{ index mangling - -def get_read_from_map_from_permutation(original, permuted): - """With a permutation given by *original* and *permuted*, - generate a list *rfm* of indices such that - ``permuted[i] == original[rfm[i]]``. - - Requires that the permutation can be inferred from - *original* and *permuted*. - - .. doctest :: - - >>> for p1 in generate_permutations(list(range(5))): - ... for p2 in generate_permutations(list(range(5))): - ... rfm = get_read_from_map_from_permutation(p1, p2) - ... p2a = [p1[rfm[i]] for i in range(len(p1))] - ... assert p2 == p2a - """ - from warnings import warn - warn("get_read_from_map_from_permutation is deprecated and will be " - "removed in 2019", DeprecationWarning, stacklevel=2) - - assert len(original) == len(permuted) - where_in_original = { - original[i]: i for i in range(len(original))} - assert len(where_in_original) == len(original) - return tuple(where_in_original[pi] for pi in permuted) - - -def get_write_to_map_from_permutation(original, permuted): - """With a permutation given by *original* and *permuted*, - generate a list *wtm* of indices such that - ``permuted[wtm[i]] == original[i]``. - - Requires that the permutation can be inferred from - *original* and *permuted*. - - .. doctest :: - - >>> for p1 in generate_permutations(list(range(5))): - ... for p2 in generate_permutations(list(range(5))): - ... wtm = get_write_to_map_from_permutation(p1, p2) - ... p2a = [0] * len(p2) - ... for i, oi in enumerate(p1): - ... p2a[wtm[i]] = oi - ... assert p2 == p2a - """ - from warnings import warn - warn("get_write_to_map_from_permutation is deprecated and will be " - "removed in 2019", DeprecationWarning, stacklevel=2) - - assert len(original) == len(permuted) - - where_in_permuted = { - permuted[i]: i for i in range(len(permuted))} - - assert len(where_in_permuted) == len(permuted) - return tuple(where_in_permuted[oi] for oi in original) - -# }}} - - # {{{ graph algorithms from pytools.graph import a_star as a_star_moved @@ -2998,6 +2937,33 @@ # }}} + +# {{{ to_identifier + +def to_identifier(s: str) -> str: + """Convert a string to a valid Python identifier, by removing + non-alphanumeric, non-underscore characters, and prepending an underscore + if the string starts with a numeric character. + + :param s: The string to convert to an identifier. + + :returns: The converted string. + """ + if s.isidentifier(): + return s + + s = "".join(c for c in s if c.isalnum() or c == "_") + + if len(s) == 0: + return "_" + + if s[0].isdigit(): + s = "_" + s + + return s + +# }}} + # {{{ unique diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/pytools/persistent_dict.py new/pytools-2024.1.4/pytools/persistent_dict.py --- old/pytools-2024.1.2/pytools/persistent_dict.py 2024-04-23 17:15:32.000000000 +0200 +++ new/pytools-2024.1.4/pytools/persistent_dict.py 2024-05-30 19:19:07.000000000 +0200 @@ -29,15 +29,17 @@ THE SOFTWARE. """ -import errno + import hashlib import logging import os -import shutil +import pickle +import sqlite3 import sys from dataclasses import fields as dc_fields, is_dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Generic, Mapping, Optional, Protocol, TypeVar +from typing import ( + TYPE_CHECKING, Any, Generator, Mapping, Optional, Protocol, Tuple, TypeVar) if TYPE_CHECKING: @@ -64,8 +66,6 @@ This module also provides a disk-backed dictionary that uses persistent hashing. .. autoexception:: NoSuchEntryError -.. autoexception:: NoSuchEntryInvalidKeyError -.. autoexception:: NoSuchEntryInvalidContentsError .. autoexception:: NoSuchEntryCollisionError .. autoexception:: ReadOnlyEntryError @@ -90,108 +90,6 @@ """ -# {{{ cleanup managers - -class CleanupBase: - pass - - -class CleanupManager(CleanupBase): - def __init__(self): - self.cleanups = [] - - def register(self, c): - self.cleanups.insert(0, c) - - def clean_up(self): - for c in self.cleanups: - c.clean_up() - - def error_clean_up(self): - for c in self.cleanups: - c.error_clean_up() - - -class LockManager(CleanupBase): - def __init__(self, cleanup_m, lock_file, stacklevel=0): - self.lock_file = lock_file - - attempts = 0 - while True: - try: - self.fd = os.open(self.lock_file, - os.O_CREAT | os.O_WRONLY | os.O_EXCL) - break - except OSError: - pass - - # This value was chosen based on the py-filelock package: - # https://github.com/tox-dev/py-filelock/blob/a6c8fabc4192fa7a4ae19b1875ee842ec5eb4f61/src/filelock/_api.py#L113 - wait_time_seconds = 0.05 - - # Warn every 10 seconds if not able to acquire lock - warn_attempts = int(10/wait_time_seconds) - - # Exit after 60 seconds if not able to acquire lock - exit_attempts = int(60/wait_time_seconds) - - from time import sleep - sleep(wait_time_seconds) - - attempts += 1 - - if attempts % warn_attempts == 0: - from warnings import warn - warn("could not obtain lock -- " - f"delete '{self.lock_file}' if necessary", - stacklevel=1 + stacklevel) - - if attempts > exit_attempts: - raise RuntimeError("waited more than one minute " - f"on the lock file '{self.lock_file}' " - "-- something is wrong") - - cleanup_m.register(self) - - def clean_up(self): - os.close(self.fd) - os.unlink(self.lock_file) - - def error_clean_up(self): - pass - - -class ItemDirManager(CleanupBase): - def __init__(self, cleanup_m, path, delete_on_error): - from os.path import isdir - - self.existed = isdir(path) - self.path = path - self.delete_on_error = delete_on_error - - cleanup_m.register(self) - - def reset(self): - try: - shutil.rmtree(self.path) - except OSError as e: - if e.errno != errno.ENOENT: - raise - - def mkdir(self): - from os import makedirs - makedirs(self.path, exist_ok=True) - - def clean_up(self): - pass - - def error_clean_up(self): - if self.delete_on_error: - self.reset() - -# }}} - - # {{{ key generation class Hash(Protocol): @@ -402,7 +300,7 @@ key_hash.update(b"<None>") @staticmethod - def update_for_dtype(key_hash, key): + def update_for_dtype(key_hash: Hash, key: Any) -> None: key_hash.update(key.str.encode("utf8")) # Handling numpy >= 1.20, for which @@ -410,11 +308,11 @@ # Introducing this method allows subclasses to specially handle all those # dtypes. @staticmethod - def update_for_specific_dtype(key_hash, key): + def update_for_specific_dtype(key_hash: Hash, key: Any) -> None: key_hash.update(key.str.encode("utf8")) @staticmethod - def update_for_numpy_scalar(key_hash: Hash, key) -> None: + def update_for_numpy_scalar(key_hash: Hash, key: Any) -> None: import numpy as np if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"): key_hash.update(repr(complex(key)).encode("utf8")) @@ -430,7 +328,7 @@ self.rec(key_hash, fld.name) self.rec(key_hash, getattr(key, fld.name, None)) - def update_for_attrs(self, key_hash: Hash, key) -> None: + def update_for_attrs(self, key_hash: Hash, key: Any) -> None: self.rec(key_hash, f"{type(key).__qualname__}.{type(key).__name__}") for fld in attrs.fields(key.__class__): @@ -449,6 +347,43 @@ update_for_PMap = update_for_frozendict # noqa: N815 update_for_Map = update_for_frozendict # noqa: N815 + # {{{ date, time, datetime, timezone + + def update_for_date(self, key_hash: Hash, key: Any) -> None: + # 'date' has no timezone information; it is always naive + self.rec(key_hash, key.isoformat()) + + def update_for_time(self, key_hash: Hash, key: Any) -> None: + # 'time' should differentiate between naive and aware + import datetime + + # Convert to datetime object + self.rec(key_hash, datetime.datetime.combine(datetime.date.min, key)) + self.rec(key_hash, "<time>") + + def update_for_datetime(self, key_hash: Hash, key: Any) -> None: + # 'datetime' should differentiate between naive and aware + + # https://docs.python.org/3.11/library/datetime.html#determining-if-an-object-is-aware-or-naive + if key.tzinfo is not None and key.tzinfo.utcoffset(key) is not None: + self.rec(key_hash, key.timestamp()) + self.rec(key_hash, "<aware>") + else: + from datetime import timezone + self.rec(key_hash, key.replace(tzinfo=timezone.utc).timestamp()) + self.rec(key_hash, "<naive>") + + def update_for_timezone(self, key_hash: Hash, key: Any) -> None: + self.rec(key_hash, repr(key)) + + # }}} + + def update_for_function(self, key_hash: Hash, key: Any) -> None: + self.rec(key_hash, key.__module__ + key.__qualname__) + + if key.__closure__: + self.rec(key_hash, tuple(c.cell_contents for c in key.__closure__)) + # }}} # }}} @@ -461,18 +396,6 @@ pass -class NoSuchEntryInvalidKeyError(NoSuchEntryError): - """Raised when an entry is not found in a :class:`PersistentDict` due to an - invalid key file.""" - pass - - -class NoSuchEntryInvalidContentsError(NoSuchEntryError): - """Raised when an entry is not found in a :class:`PersistentDict` due to an - invalid contents file.""" - pass - - class NoSuchEntryCollisionError(NoSuchEntryError): """Raised when an entry is not found in a :class:`PersistentDict`, but it contains an entry with the same hash key (hash collision).""" @@ -490,15 +413,27 @@ pass +def __getattr__(name: str) -> Any: + if name in ("NoSuchEntryInvalidKeyError", + "NoSuchEntryInvalidContentsError"): + from warnings import warn + warn(f"pytools.persistent_dict.{name} has been removed.") + return NoSuchEntryError + + raise AttributeError(name) + + K = TypeVar("K") V = TypeVar("V") -class _PersistentDictBase(Generic[K, V]): +class _PersistentDictBase(Mapping[K, V]): def __init__(self, identifier: str, key_builder: Optional[KeyBuilder] = None, - container_dir: Optional[str] = None) -> None: + container_dir: Optional[str] = None, + enable_wal: bool = False) -> None: self.identifier = identifier + self.conn = None if key_builder is None: key_builder = KeyBuilder() @@ -512,112 +447,126 @@ if sys.platform == "darwin" and os.getenv("XDG_CACHE_HOME") is not None: # platformdirs does not handle XDG_CACHE_HOME on macOS # https://github.com/platformdirs/platformdirs/issues/269 - cache_dir = join(os.getenv("XDG_CACHE_HOME"), "pytools") + container_dir = join(os.getenv("XDG_CACHE_HOME"), "pytools") else: - cache_dir = platformdirs.user_cache_dir("pytools", "pytools") + container_dir = platformdirs.user_cache_dir("pytools", "pytools") - container_dir = join( - cache_dir, - "pdict-v4-{}-py{}".format( - identifier, - ".".join(str(i) for i in sys.version_info))) + self.filename = join(container_dir, f"pdict-v5-{identifier}" + + ".".join(str(i) for i in sys.version_info) + + ".sqlite") self.container_dir = container_dir - self._make_container_dir() - @staticmethod - def _warn(msg: str, category: Any = UserWarning, stacklevel: int = 0) -> None: - from warnings import warn - warn(msg, category, stacklevel=1 + stacklevel) - - def store_if_not_present(self, key: K, value: V, - _stacklevel: int = 0) -> None: - """Store (*key*, *value*) if *key* is not already present.""" - self.store(key, value, _skip_if_present=True, _stacklevel=1 + _stacklevel) - - def store(self, key: K, value: V, _skip_if_present: bool = False, - _stacklevel: int = 0) -> None: - """Store (*key*, *value*) in the dictionary.""" - raise NotImplementedError() - - def fetch(self, key: K, _stacklevel: int = 0) -> V: - """Return the value associated with *key* in the dictionary.""" - raise NotImplementedError() - - @staticmethod - def _read(path: str) -> V: - from pickle import load - with open(path, "rb") as inf: - return load(inf) - - @staticmethod - def _write(path: str, value: V) -> None: - from pickle import HIGHEST_PROTOCOL, dump - with open(path, "wb") as outf: - dump(value, outf, protocol=HIGHEST_PROTOCOL) - - def _item_dir(self, hexdigest_key: str) -> str: - from os.path import join - - # Some file systems limit the number of directories in a directory. - # For ext4, that limit appears to be 64K for example. - # This doesn't solve that problem, but it makes it much less likely - - return join(self.container_dir, - hexdigest_key[:3], - hexdigest_key[3:6], - hexdigest_key[6:]) - - def _key_file(self, hexdigest_key: str) -> str: - from os.path import join - return join(self._item_dir(hexdigest_key), "key") - - def _contents_file(self, hexdigest_key: str) -> str: - from os.path import join - return join(self._item_dir(hexdigest_key), "contents") - - def _lock_file(self, hexdigest_key: str) -> str: - from os.path import join - return join(self.container_dir, str(hexdigest_key) + ".lock") - - def _make_container_dir(self) -> None: - """Create the container directory to store the dictionary.""" - os.makedirs(self.container_dir, exist_ok=True) + # isolation_level=None: enable autocommit mode + # https://www.sqlite.org/lang_transaction.html#implicit_versus_explicit_transactions + self.conn = sqlite3.connect(self.filename, isolation_level=None) + + self.conn.execute( + "CREATE TABLE IF NOT EXISTS dict " + "(keyhash TEXT NOT NULL PRIMARY KEY, key_value TEXT NOT NULL)" + ) + + # https://www.sqlite.org/wal.html + if enable_wal: + self.conn.execute("PRAGMA journal_mode = 'WAL'") + + # Note: the following configuration values were taken from litedict: + # https://github.com/litements/litedict/blob/377603fa597453ffd9997186a493ed4fd23e5399/litedict.py#L67-L70 + # They result in fast operations while maintaining database integrity + # even in the face of concurrent accesses and power loss. + + # temp_store=2: use in-memory temp store + # https://www.sqlite.org/pragma.html#pragma_temp_store + self.conn.execute("PRAGMA temp_store = 2") + + # https://www.sqlite.org/pragma.html#pragma_synchronous + self.conn.execute("PRAGMA synchronous = NORMAL") + + # 64 MByte of cache + # https://www.sqlite.org/pragma.html#pragma_cache_size + self.conn.execute("PRAGMA cache_size = -64000") + + def __del__(self) -> None: + if self.conn: + self.conn.close() - def _collision_check(self, key: K, stored_key: K, _stacklevel: int) -> None: + def _collision_check(self, key: K, stored_key: K) -> None: if stored_key != key: # Key collision, oh well. - self._warn(f"{self.identifier}: key collision in cache at " + from warnings import warn + warn(f"{self.identifier}: key collision in cache at " f"'{self.container_dir}' -- these are sufficiently unlikely " "that they're often indicative of a broken hash key " "implementation (that is not considering some elements " "relevant for equality comparison)", - CollisionWarning, - 1 + _stacklevel) + CollisionWarning + ) # This is here so we can step through equality comparison to # see what is actually non-equal. stored_key == key # pylint:disable=pointless-statement # noqa: B015 raise NoSuchEntryCollisionError(key) + def store_if_not_present(self, key: K, value: V) -> None: + """Store (*key*, *value*) if *key* is not already present.""" + self.store(key, value, _skip_if_present=True) + + def store(self, key: K, value: V, _skip_if_present: bool = False) -> None: + """Store (*key*, *value*) in the dictionary.""" + raise NotImplementedError() + + def fetch(self, key: K) -> V: + """Return the value associated with *key* in the dictionary.""" + raise NotImplementedError() + + def _make_container_dir(self) -> None: + """Create the container directory to store the dictionary.""" + os.makedirs(self.container_dir, exist_ok=True) + def __getitem__(self, key: K) -> V: """Return the value associated with *key* in the dictionary.""" - return self.fetch(key, _stacklevel=1) + return self.fetch(key) def __setitem__(self, key: K, value: V) -> None: """Store (*key*, *value*) in the dictionary.""" - self.store(key, value, _stacklevel=1) + self.store(key, value) + + def __len__(self) -> int: + """Return the number of entries in the dictionary.""" + return next(self.conn.execute("SELECT COUNT(*) FROM dict"))[0] + + def __iter__(self) -> Generator[K, None, None]: + """Return an iterator over the keys in the dictionary.""" + return self.keys() + + def keys(self) -> Generator[K, None, None]: + """Return an iterator over the keys in the dictionary.""" + for row in self.conn.execute("SELECT key_value FROM dict ORDER BY rowid"): + yield pickle.loads(row[0])[0] + + def values(self) -> Generator[V, None, None]: + """Return an iterator over the values in the dictionary.""" + for row in self.conn.execute("SELECT key_value FROM dict ORDER BY rowid"): + yield pickle.loads(row[0])[1] + + def items(self) -> Generator[tuple[K, V], None, None]: + """Return an iterator over the items in the dictionary.""" + for row in self.conn.execute("SELECT key_value FROM dict ORDER BY rowid"): + yield pickle.loads(row[0]) + + def nbytes(self) -> int: + """Return the size of the dictionary in bytes.""" + return next(self.conn.execute("SELECT page_size * page_count FROM " + "pragma_page_size(), pragma_page_count()"))[0] + + def __repr__(self) -> str: + """Return a string representation of the dictionary.""" + return f"{type(self).__name__}({self.filename}, nitems={len(self)})" def clear(self) -> None: """Remove all entries from the dictionary.""" - try: - shutil.rmtree(self.container_dir) - except OSError as e: - if e.errno != errno.ENOENT: - raise - - self._make_container_dir() + self.conn.execute("DELETE FROM dict") class WriteOncePersistentDict(_PersistentDictBase[K, V]): @@ -627,6 +576,13 @@ Compared with :class:`PersistentDict`, this class has faster retrieval times because it uses an LRU cache to cache entries in memory. + .. note:: + + This class intentionally does not store all values with a certain + key, based on the assumption that key conflicts are highly unlikely, + and if they occur, almost always due to a bug in the hash key + generation code (:class:`KeyBuilder`). + .. automethod:: __init__ .. automethod:: __getitem__ .. automethod:: __setitem__ @@ -639,19 +595,23 @@ def __init__(self, identifier: str, key_builder: Optional[KeyBuilder] = None, container_dir: Optional[str] = None, + enable_wal: bool = False, in_mem_cache_size: int = 256) -> None: """ - :arg identifier: a file-name-compatible string identifying this + :arg identifier: a filename-compatible string identifying this dictionary :arg key_builder: a subclass of :class:`KeyBuilder` :arg container_dir: the directory in which to store this dictionary. If ``None``, the default cache directory from :func:`platformdirs.user_cache_dir` is used + :arg enable_wal: enable write-ahead logging (WAL) mode. This mode + is faster than the default rollback journal mode, but it is + not compatible with network filesystems. :arg in_mem_cache_size: retain an in-memory cache of up to *in_mem_cache_size* items (with an LRU replacement policy) """ - _PersistentDictBase.__init__(self, identifier, key_builder, container_dir) - self._in_mem_cache_size = in_mem_cache_size + _PersistentDictBase.__init__(self, identifier, key_builder, + container_dir, enable_wal) from functools import lru_cache self._fetch = lru_cache(maxsize=in_mem_cache_size)(self._fetch) @@ -661,129 +621,38 @@ .. versionadded:: 2023.1.1 """ - self._fetch.cache_clear() - def _spin_until_removed(self, lock_file: str, stacklevel: int) -> None: - from os.path import exists - - attempts = 0 - while exists(lock_file): - from time import sleep - sleep(1) - - attempts += 1 - - if attempts > 10: - self._warn( - f"waiting until unlocked--delete '{lock_file}' if necessary", - stacklevel=1 + stacklevel) - - if attempts > 3 * 60: - raise RuntimeError("waited more than three minutes " - f"on the lock file '{lock_file}'" - "--something is wrong") - - def store(self, key: K, value: V, _skip_if_present: bool = False, - _stacklevel: int = 0) -> None: - hexdigest_key = self.key_builder(key) - - cleanup_m = CleanupManager() - try: - try: - LockManager(cleanup_m, self._lock_file(hexdigest_key), - 1 + _stacklevel) - item_dir_m = ItemDirManager( - cleanup_m, self._item_dir(hexdigest_key), - delete_on_error=False) - - if item_dir_m.existed: - if _skip_if_present: - return - raise ReadOnlyEntryError(key) - - item_dir_m.mkdir() - - key_path = self._key_file(hexdigest_key) - value_path = self._contents_file(hexdigest_key) - - self._write(value_path, value) - self._write(key_path, key) - - logger.debug("%s: disk cache store [key=%s]", - self.identifier, hexdigest_key) - except Exception: - cleanup_m.error_clean_up() - raise - finally: - cleanup_m.clean_up() - - def fetch(self, key: K, _stacklevel: int = 0) -> Any: - hexdigest_key = self.key_builder(key) - - (stored_key, stored_value) = self._fetch(hexdigest_key, 1 + _stacklevel) - - self._collision_check(key, stored_key, 1 + _stacklevel) - - return stored_value - - def _fetch(self, hexdigest_key: str, # pylint:disable=method-hidden - _stacklevel: int = 0) -> V: - # This is separate from fetch() to allow for LRU caching - - # {{{ check path exists and is unlocked - - item_dir = self._item_dir(hexdigest_key) - - from os.path import isdir - if not isdir(item_dir): - logger.debug("%s: disk cache miss [key=%s]", - self.identifier, hexdigest_key) - raise NoSuchEntryError(hexdigest_key) - - lock_file = self._lock_file(hexdigest_key) - self._spin_until_removed(lock_file, 1 + _stacklevel) - - # }}} - - key_file = self._key_file(hexdigest_key) - contents_file = self._contents_file(hexdigest_key) - - # Note: Unlike PersistentDict, this doesn't autodelete invalid entires, - # because that would lead to a race condition. - - # {{{ load key file and do equality check + def store(self, key: K, value: V, _skip_if_present: bool = False) -> None: + keyhash = self.key_builder(key) + v = pickle.dumps((key, value)) try: - read_key = self._read(key_file) - except Exception as e: - self._warn(f"{type(self).__name__}({self.identifier}) " - f"encountered an invalid key file for key {hexdigest_key}. " - f"Remove the directory '{item_dir}' if necessary. " - f"(caught: {type(e).__name__}: {e})", - stacklevel=1 + _stacklevel) - raise NoSuchEntryInvalidKeyError(hexdigest_key) - - # }}} + self.conn.execute("INSERT INTO dict VALUES (?, ?)", (keyhash, v)) + except sqlite3.IntegrityError: + if not _skip_if_present: + raise ReadOnlyEntryError("WriteOncePersistentDict, " + "tried overwriting key") + + def _fetch(self, keyhash: str) -> Tuple[K, V]: # pylint:disable=method-hidden + # This method is separate from fetch() to allow for LRU caching + c = self.conn.execute("SELECT key_value FROM dict WHERE keyhash=?", + (keyhash,)) + row = c.fetchone() + if row is None: + raise KeyError + return pickle.loads(row[0]) - logger.debug("%s: disk cache hit [key=%s]", - self.identifier, hexdigest_key) - - # {{{ load contents + def fetch(self, key: K) -> V: + keyhash = self.key_builder(key) try: - read_contents = self._read(contents_file) - except Exception as e: - self._warn(f"{type(self).__name__}({self.identifier}) " - f"encountered an invalid contents file for key {hexdigest_key}. " - f"Remove the directory '{item_dir}' if necessary." - f"(caught: {type(e).__name__}: {e})", - stacklevel=1 + _stacklevel) - raise NoSuchEntryInvalidContentsError(hexdigest_key) - - # }}} - - return (read_key, read_contents) + stored_key, value = self._fetch(keyhash) + except KeyError: + raise NoSuchEntryError(key) + else: + self._collision_check(key, stored_key) + return value def clear(self) -> None: _PersistentDictBase.clear(self) @@ -793,6 +662,13 @@ class PersistentDict(_PersistentDictBase[K, V]): """A concurrent disk-backed dictionary. + .. note:: + + This class intentionally does not store all values with a certain + key, based on the assumption that key conflicts are highly unlikely, + and if they occur, almost always due to a bug in the hash key + generation code (:class:`KeyBuilder`). + .. automethod:: __init__ .. automethod:: __getitem__ .. automethod:: __setitem__ @@ -806,161 +682,72 @@ def __init__(self, identifier: str, key_builder: Optional[KeyBuilder] = None, - container_dir: Optional[str] = None) -> None: + container_dir: Optional[str] = None, + enable_wal: bool = False) -> None: """ - :arg identifier: a file-name-compatible string identifying this + :arg identifier: a filename-compatible string identifying this dictionary :arg key_builder: a subclass of :class:`KeyBuilder` :arg container_dir: the directory in which to store this dictionary. If ``None``, the default cache directory from :func:`platformdirs.user_cache_dir` is used + :arg enable_wal: enable write-ahead logging (WAL) mode. This mode + is faster than the default rollback journal mode, but it is + not compatible with network filesystems. """ - _PersistentDictBase.__init__(self, identifier, key_builder, container_dir) + _PersistentDictBase.__init__(self, identifier, key_builder, + container_dir, enable_wal) - def store(self, key: K, value: V, _skip_if_present: bool = False, - _stacklevel: int = 0) -> None: - hexdigest_key = self.key_builder(key) + def store(self, key: K, value: V, _skip_if_present: bool = False) -> None: + keyhash = self.key_builder(key) + v = pickle.dumps((key, value)) + + if _skip_if_present: + self.conn.execute("INSERT OR IGNORE INTO dict VALUES (?, ?)", + (keyhash, v)) + else: + self.conn.execute("INSERT OR REPLACE INTO dict VALUES (?, ?)", + (keyhash, v)) - cleanup_m = CleanupManager() - try: - try: - LockManager(cleanup_m, self._lock_file(hexdigest_key), - 1 + _stacklevel) - item_dir_m = ItemDirManager( - cleanup_m, self._item_dir(hexdigest_key), - delete_on_error=True) - - if item_dir_m.existed: - if _skip_if_present: - return - item_dir_m.reset() - - item_dir_m.mkdir() - - key_path = self._key_file(hexdigest_key) - value_path = self._contents_file(hexdigest_key) - - self._write(value_path, value) - self._write(key_path, key) - - logger.debug("%s: cache store [key=%s]", - self.identifier, hexdigest_key) - except Exception: - cleanup_m.error_clean_up() - raise - finally: - cleanup_m.clean_up() - - def fetch(self, key: K, _stacklevel: int = 0) -> V: - hexdigest_key = self.key_builder(key) - item_dir = self._item_dir(hexdigest_key) - - from os.path import isdir - if not isdir(item_dir): - logger.debug("%s: cache miss [key=%s]", - self.identifier, hexdigest_key) + def fetch(self, key: K) -> V: + keyhash = self.key_builder(key) + + c = self.conn.execute("SELECT key_value FROM dict WHERE keyhash=?", + (keyhash,)) + row = c.fetchone() + if row is None: raise NoSuchEntryError(key) - cleanup_m = CleanupManager() - try: - try: - LockManager(cleanup_m, self._lock_file(hexdigest_key), - 1 + _stacklevel) - item_dir_m = ItemDirManager( - cleanup_m, item_dir, delete_on_error=False) - - key_path = self._key_file(hexdigest_key) - value_path = self._contents_file(hexdigest_key) - - # {{{ load key - - try: - read_key = self._read(key_path) - except Exception as e: - item_dir_m.reset() - self._warn(f"{type(self).__name__}({self.identifier}) " - "encountered an invalid key file for key " - f"{hexdigest_key}. Entry deleted." - f"(caught: {type(e).__name__}: {e})", - stacklevel=1 + _stacklevel) - raise NoSuchEntryInvalidKeyError(key) - - self._collision_check(key, read_key, 1 + _stacklevel) - - # }}} - - logger.debug("%s: cache hit [key=%s]", - self.identifier, hexdigest_key) - - # {{{ load value - - try: - read_contents = self._read(value_path) - except Exception as e: - item_dir_m.reset() - self._warn(f"{type(self).__name__}({self.identifier}) " - "encountered an invalid contents file for key " - f"{hexdigest_key}. Entry deleted." - f"(caught: {type(e).__name__}: {e})", - stacklevel=1 + _stacklevel) - raise NoSuchEntryInvalidContentsError(key) - - return read_contents - - # }}} - - except Exception: - cleanup_m.error_clean_up() - raise - finally: - cleanup_m.clean_up() + stored_key, value = pickle.loads(row[0]) + self._collision_check(key, stored_key) + return value - def remove(self, key: K, _stacklevel: int = 0) -> None: + def remove(self, key: K) -> None: """Remove the entry associated with *key* from the dictionary.""" - hexdigest_key = self.key_builder(key) + keyhash = self.key_builder(key) - item_dir = self._item_dir(hexdigest_key) - from os.path import isdir - if not isdir(item_dir): - raise NoSuchEntryError(key) + self.conn.execute("BEGIN EXCLUSIVE TRANSACTION") - cleanup_m = CleanupManager() try: - try: - LockManager(cleanup_m, self._lock_file(hexdigest_key), - 1 + _stacklevel) - item_dir_m = ItemDirManager( - cleanup_m, item_dir, delete_on_error=False) - key_file = self._key_file(hexdigest_key) - - # {{{ load key - - try: - read_key = self._read(key_file) - except Exception as e: - item_dir_m.reset() - self._warn(f"{type(self).__name__}({self.identifier}) " - "encountered an invalid key file for key " - f"{hexdigest_key}. Entry deleted" - f"(caught: {type(e).__name__}: {e})", - stacklevel=1 + _stacklevel) - raise NoSuchEntryInvalidKeyError(key) - - self._collision_check(key, read_key, 1 + _stacklevel) - - # }}} - - item_dir_m.reset() - - except Exception: - cleanup_m.error_clean_up() - raise - finally: - cleanup_m.clean_up() + # This is split into SELECT/DELETE to allow for a collision check + c = self.conn.execute("SELECT key_value FROM dict WHERE keyhash=?", + (keyhash,)) + row = c.fetchone() + if row is None: + raise NoSuchEntryError(key) + + stored_key, _value = pickle.loads(row[0]) + self._collision_check(key, stored_key) + + self.conn.execute("DELETE FROM dict WHERE keyhash=?", (keyhash,)) + self.conn.execute("COMMIT") + except Exception as e: + self.conn.execute("ROLLBACK") + raise e def __delitem__(self, key: K) -> None: """Remove the entry associated with *key* from the dictionary.""" - self.remove(key, _stacklevel=1) + self.remove(key) # }}} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/pytools/test/test_persistent_dict.py new/pytools-2024.1.4/pytools/test/test_persistent_dict.py --- old/pytools-2024.1.2/pytools/test/test_persistent_dict.py 2024-04-23 17:15:32.000000000 +0200 +++ new/pytools-2024.1.4/pytools/test/test_persistent_dict.py 2024-05-30 19:19:07.000000000 +0200 @@ -169,6 +169,9 @@ del pdict[0] with pytest.raises(NoSuchEntryError): + pdict.remove(0) + + with pytest.raises(NoSuchEntryError): pdict.fetch(0) with pytest.raises(NoSuchEntryError): @@ -598,6 +601,105 @@ != keyb(MyAttrs("hi", 1))) # type: ignore[call-arg] +def test_datetime_hashing() -> None: + keyb = KeyBuilder() + + import datetime + + # {{{ date + # No timezone info; date is always naive + assert (keyb(datetime.date(2020, 1, 1)) + == keyb(datetime.date(2020, 1, 1)) + == "9fb97d7faabc3603f3e334ca5eb1eb0fe0c92665e5611cb1b5aa77fa0f70f5e3") + assert keyb(datetime.date(2020, 1, 1)) != keyb(datetime.date(2020, 1, 2)) + + # }}} + + # {{{ time + + # Must distinguish between naive and aware time objects + + # Naive time + assert (keyb(datetime.time(12, 0)) + == keyb(datetime.time(12, 0)) + == keyb(datetime.time(12, 0, 0)) + == keyb(datetime.time(12, 0, 0, 0)) + == "288ec82f6a00ac15968d4d257d4aca1089b863c61ef2ee200e64351238397705") + assert keyb(datetime.time(12, 0)) != keyb(datetime.time(12, 1)) + + # Aware time + t1 = datetime.time(12, 0, tzinfo=datetime.timezone.utc) + t2 = datetime.time(7, 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-5))) + t3 = datetime.time(7, 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-4))) + + assert t1 == t2 + assert (keyb(t1) + == keyb(t2) + == "3587427ca9d581779d532b397df206ddeadfcf4e38b1ee69c19174e8e1268cc4") + + assert t1 != t3 + assert keyb(t1) != keyb(t3) + + # }}} + + # {{{ datetime + + # must distinguish between naive and aware datetime objects + + # Aware datetime + dt1 = datetime.datetime(2020, 1, 1, 12, tzinfo=datetime.timezone.utc) + dt2 = datetime.datetime(2020, 1, 1, 7, + tzinfo=datetime.timezone(datetime.timedelta(hours=-5))) + + assert dt1 == dt2 + assert (keyb(dt1) + == keyb(dt2) + == "cd35722af47e42cb3bc81c389b87eb2e78ee8e20298bb1d8a193b30940d1c142") + + dt3 = datetime.datetime(2020, 1, 1, 7, + tzinfo=datetime.timezone(datetime.timedelta(hours=-4))) + + assert dt1 != dt3 + assert keyb(dt1) != keyb(dt3) + + # Naive datetime + dt4 = datetime.datetime(2020, 1, 1, 6) # matches dt1 'naively' + assert dt1 != dt4 # naive and aware datetime objects are never equal + assert keyb(dt1) != keyb(dt4) + + assert (keyb(datetime.datetime(2020, 1, 1)) + == keyb(datetime.datetime(2020, 1, 1)) + == keyb(datetime.datetime(2020, 1, 1, 0, 0, 0, 0)) + == "8f3b843d7b9176afd8e2ce97ebc19789098a1c7774c4ec00d4054ec954ce2b88" + ) + assert keyb(datetime.datetime(2020, 1, 1)) != keyb(datetime.datetime(2020, 1, 2)) + assert (keyb(datetime.datetime(2020, 1, 1)) + != keyb(datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc))) + + # }}} + + # {{{ timezone + + tz1 = datetime.timezone(datetime.timedelta(hours=-4)) + tz2 = datetime.timezone(datetime.timedelta(hours=0)) + tz3 = datetime.timezone.utc + + assert tz1 != tz2 + assert keyb(tz1) != keyb(tz2) + + assert tz1 != tz3 + assert keyb(tz1) != keyb(tz3) + + assert tz2 == tz3 + assert (keyb(tz2) + == keyb(tz3) + == "89bd615f32c1f209b0853b1fc7d06ddb6fda7f367a00a8621d60337d52cb8d10") + + # }}} + + def test_xdg_cache_home() -> None: import os xdg_dir = "tmpdir_pytools_xdg_test" @@ -620,6 +722,172 @@ shutil.rmtree(xdg_dir) +def test_speed(): + import time + + tmpdir = tempfile.mkdtemp() + pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + + start = time.time() + for i in range(10000): + pdict[i] = i + end = time.time() + print("persistent dict write time: ", end-start) + + start = time.time() + for _ in range(5): + for i in range(10000): + pdict[i] + end = time.time() + print("persistent dict read time: ", end-start) + + shutil.rmtree(tmpdir) + + +def test_size(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict("pytools-test", container_dir=tmpdir) + + for i in range(10000): + pdict[f"foobarbazfoobbb{i}"] = i + + size = pdict.nbytes() + print("sqlite size: ", size/1024/1024, " MByte") + assert 1*1024*1024 < size < 2*1024*1024 + finally: + shutil.rmtree(tmpdir) + + +def test_len(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict("pytools-test", container_dir=tmpdir) + + assert len(pdict) == 0 + + for i in range(10000): + pdict[i] = i + + assert len(pdict) == 10000 + + pdict.clear() + + assert len(pdict) == 0 + finally: + shutil.rmtree(tmpdir) + + +def test_repr(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict("pytools-test", container_dir=tmpdir) + + assert repr(pdict)[:15] == "PersistentDict(" + finally: + shutil.rmtree(tmpdir) + + +def test_keys_values_items(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict("pytools-test", container_dir=tmpdir) + + for i in range(10000): + pdict[i] = i + + # This also tests deterministic iteration order + assert len(list(pdict.keys())) == 10000 == len(set(pdict.keys())) + assert list(pdict.keys()) == list(range(10000)) + assert list(pdict.values()) == list(range(10000)) + assert list(pdict.items()) == list(zip(list(pdict.keys()), range(10000))) + + assert ([k for k in pdict.keys()] # noqa: C416 + == list(pdict.keys()) + == list(pdict) + == [k for k in pdict]) # noqa: C416 + + finally: + shutil.rmtree(tmpdir) + + +def global_fun(): + pass + + +def global_fun2(): + pass + + +def test_hash_function() -> None: + keyb = KeyBuilder() + + # {{{ global functions + + assert keyb(global_fun) == keyb(global_fun) == \ + "51b5980dd3a8aa13f6e83869e4a04c22973d7aaf96cb22899abdfdc55e15c9b2" + assert keyb(global_fun) != keyb(global_fun2) + + # }}} + + # {{{ closures + + def get_fun(x): + def add_x(y): + return x + y + return add_x + + f1 = get_fun(1) + f11 = get_fun(1) + f2 = get_fun(2) + + fa = get_fun + fb = get_fun + + assert fa == fb + assert keyb(fa) == keyb(fb) + + assert f1 != f2 + assert keyb(f1) != keyb(f2) + + # FIXME: inconsistency! + assert f1 != f11 + assert hash(f1) != hash(f11) + assert keyb(f1) == keyb(f11) + + # }}} + + # {{{ local functions + + def local_fun(): + pass + + def local_fun2(): + pass + + assert keyb(local_fun) == keyb(local_fun) == \ + "fc58f5b0130df821913c848749eb03f5dcd4da7a568c6130f1c0cfb96ed0d12d" + assert keyb(local_fun) != keyb(local_fun2) + + # }}} + + # {{{ methods + + class C1: + def method(self): + pass + + class C2: + def method(self): + pass + + assert keyb(C1.method) == keyb(C1.method) == \ + "3013eb424dac133a57bd70cb6084d2a2f349a247714efc508fe3b10b99b6f717" + assert keyb(C1.method) != keyb(C2.method) + + # }}} + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/pytools/test/test_pytools.py new/pytools-2024.1.4/pytools/test/test_pytools.py --- old/pytools-2024.1.2/pytools/test/test_pytools.py 2024-04-23 17:15:32.000000000 +0200 +++ new/pytools-2024.1.4/pytools/test/test_pytools.py 2024-05-21 19:51:19.000000000 +0200 @@ -26,6 +26,8 @@ import pytest +from pytools import Record + logger = logging.getLogger(__name__) from typing import FrozenSet @@ -738,6 +740,23 @@ assert strtobool(None, False) is False +def test_to_identifier() -> None: + from pytools import to_identifier + + assert to_identifier("_a_123_") == "_a_123_" + assert to_identifier("a_123") == "a_123" + assert to_identifier("a 123") == "a123" + assert to_identifier("123") == "_123" + assert to_identifier("_123") == "_123" + assert to_identifier("123A") == "_123A" + assert to_identifier("") == "_" + + assert not "a 123".isidentifier() + assert to_identifier("a 123").isidentifier() + assert to_identifier("123").isidentifier() + assert to_identifier("").isidentifier() + + def test_typedump(): from pytools import typedump assert typedump("") == "str" @@ -783,6 +802,78 @@ assert next(unique([]), None) is None +# This class must be defined globally to be picklable +class SimpleRecord(Record): + pass + + +def test_record(): + r = SimpleRecord(c=3, b=2, a=1) + + assert r.a == 1 + assert r.b == 2 + assert r.c == 3 + + # Fields are sorted alphabetically in records + assert str(r) == "SimpleRecord(a=1, b=2, c=3)" + + # Unregistered fields are (silently) ignored for printing + r.f = 6 + assert str(r) == "SimpleRecord(a=1, b=2, c=3)" + + # Registered fields are printed + r.register_fields({"d", "e"}) + assert str(r) == "SimpleRecord(a=1, b=2, c=3)" + + r.d = 4 + r.e = 5 + assert str(r) == "SimpleRecord(a=1, b=2, c=3, d=4, e=5)" + + with pytest.raises(AttributeError): + r.ff + + # Test pickling + import pickle + r_pickled = pickle.loads(pickle.dumps(r)) + assert r == r_pickled + + # }}} + + # {{{ __slots__, __dict__, __weakref__ handling + + class RecordWithEmptySlots(Record): + __slots__ = [] + + assert hasattr(RecordWithEmptySlots(), "__slots__") + assert not hasattr(RecordWithEmptySlots(), "__dict__") + assert not hasattr(RecordWithEmptySlots(), "__weakref__") + + class RecordWithUnsetSlots(Record): + pass + + assert hasattr(RecordWithUnsetSlots(), "__slots__") + assert hasattr(RecordWithUnsetSlots(), "__dict__") + assert hasattr(RecordWithUnsetSlots(), "__weakref__") + + from pytools import ImmutableRecord + + class ImmutableRecordWithEmptySlots(ImmutableRecord): + __slots__ = [] + + assert hasattr(ImmutableRecordWithEmptySlots(), "__slots__") + assert hasattr(ImmutableRecordWithEmptySlots(), "__dict__") + assert hasattr(ImmutableRecordWithEmptySlots(), "__weakref__") + + class ImmutableRecordWithUnsetSlots(ImmutableRecord): + pass + + assert hasattr(ImmutableRecordWithUnsetSlots(), "__slots__") + assert hasattr(ImmutableRecordWithUnsetSlots(), "__dict__") + assert hasattr(ImmutableRecordWithUnsetSlots(), "__weakref__") + + # }}} + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/pytools/version.py new/pytools-2024.1.4/pytools/version.py --- old/pytools-2024.1.2/pytools/version.py 2024-04-24 16:57:12.000000000 +0200 +++ new/pytools-2024.1.4/pytools/version.py 2024-05-31 19:39:32.000000000 +0200 @@ -1,3 +1,3 @@ -VERSION = (2024, 1, 2) +VERSION = (2024, 1, 4) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytools-2024.1.2/pytools.egg-info/PKG-INFO new/pytools-2024.1.4/pytools.egg-info/PKG-INFO --- old/pytools-2024.1.2/pytools.egg-info/PKG-INFO 2024-04-24 16:58:13.000000000 +0200 +++ new/pytools-2024.1.4/pytools.egg-info/PKG-INFO 2024-05-31 19:41:22.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: pytools -Version: 2024.1.2 +Version: 2024.1.4 Summary: A collection of tools for Python Home-page: http://pypi.python.org/pypi/pytools Author: Andreas Kloeckner
