Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-fsspec for openSUSE:Factory checked in at 2022-11-21 16:19:36 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-fsspec (Old) and /work/SRC/openSUSE:Factory/.python-fsspec.new.1597 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-fsspec" Mon Nov 21 16:19:36 2022 rev:23 rq:1036969 version:2022.11.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-fsspec/python-fsspec.changes 2022-11-01 13:43:36.888230136 +0100 +++ /work/SRC/openSUSE:Factory/.python-fsspec.new.1597/python-fsspec.changes 2022-11-21 16:19:38.787870435 +0100 @@ -1,0 +2,23 @@ +Sat Nov 19 14:48:38 UTC 2022 - Ben Greiner <c...@bnavigator.de> + +- Update to 2022.11.0 + ## Enhancements + * Speed up FSMap._key_to_str (#1101) + * Add modified/created to Memory and Arrow (#1096) + * Clear expired cache method (#1092) + * Allow seekable arrow file (#1091) + * Allow append for arrow (#1089) + * recursive for sftp.get (#1082) + * topdown arg to walk() (#1081) + ## Fixes + * fix doc warnings (#1106, #1084) + * Fix HDFS _strip_protocol (#1103) + * Allow URLs with protocol for HDFS (#1099) + * yarl in doc deps (#1095) + * missing await in genericFS.cp (#1094) + * explicit IPv4 for test HTTP server (#1088) + * sort when merging ranges for referenceFS (#1087) + ## Other + * Check that snappy is snappy (#1079) + +------------------------------------------------------------------- Old: ---- fsspec-2022.10.0.tar.gz New: ---- fsspec-2022.11.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-fsspec.spec ++++++ --- /var/tmp/diff_new_pack.B4qWzo/_old 2022-11-21 16:19:39.299873065 +0100 +++ /var/tmp/diff_new_pack.B4qWzo/_new 2022-11-21 16:19:39.303873085 +0100 @@ -24,17 +24,15 @@ %define psuffix %{nil} %bcond_with test %endif -%define skip_python2 1 Name: python-fsspec%{psuffix} -Version: 2022.10.0 +Version: 2022.11.0 Release: 0 Summary: Filesystem specification package License: BSD-3-Clause URL: https://github.com/fsspec/filesystem_spec # the tests are only in the GitHub archive Source: https://github.com/fsspec/filesystem_spec/archive/%{version}.tar.gz#/fsspec-%{version}.tar.gz -BuildRequires: %{python_module base >= 3.6} -BuildRequires: %{python_module importlib_metadata if %python-base < 3.8} +BuildRequires: %{python_module base >= 3.7} BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: fuse @@ -53,9 +51,6 @@ Suggests: python-s3fs Suggests: python-smbprotocol BuildArch: noarch -%if 0%{?python_version_nodots} < 38 -Requires: python-importlib_metadata -%endif %if %{with test} BuildRequires: %{python_module aiohttp} BuildRequires: %{python_module cloudpickle} ++++++ fsspec-2022.10.0.tar.gz -> fsspec-2022.11.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/ci/environment-win.yml new/filesystem_spec-2022.11.0/ci/environment-win.yml --- old/filesystem_spec-2022.10.0/ci/environment-win.yml 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/ci/environment-win.yml 2022-11-10 03:21:38.000000000 +0100 @@ -18,6 +18,7 @@ - pytest-mock - pytest-vcr - python-libarchive-c + - py - numpy - nomkl - tqdm diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/environment.yml new/filesystem_spec-2022.11.0/docs/environment.yml --- old/filesystem_spec-2022.10.0/docs/environment.yml 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/environment.yml 2022-11-10 03:21:38.000000000 +0100 @@ -3,4 +3,6 @@ - defaults dependencies: - python=3.9 + - docutils<0.17 - numpydoc + - yarl diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/api.rst new/filesystem_spec-2022.11.0/docs/source/api.rst --- old/filesystem_spec-2022.10.0/docs/source/api.rst 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/api.rst 2022-11-10 03:21:38.000000000 +0100 @@ -130,6 +130,7 @@ fsspec.implementations.dbfs.DatabricksFileSystem fsspec.implementations.reference.ReferenceFileSystem fsspec.implementations.dirfs.DirFileSystem + fsspec.implementations.tar.TarFileSystem .. autoclass:: fsspec.implementations.ftp.FTPFileSystem :members: __init__ @@ -194,6 +195,9 @@ .. autoclass:: fsspec.implementations.dirfs.DirFileSystem :members: __init__ +.. autoclass:: fsspec.implementations.tar.TarFileSystem + :members: __init__ + Other Known Implementations --------------------------- @@ -244,3 +248,12 @@ .. autoclass:: fsspec.caching.BlockCache :members: + +Utilities +--------- + +.. autosummary:: + + fsspec.utils.read_block + +.. autofunction:: fsspec.utils.read_block diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/changelog.rst new/filesystem_spec-2022.11.0/docs/source/changelog.rst --- old/filesystem_spec-2022.10.0/docs/source/changelog.rst 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/changelog.rst 2022-11-10 03:21:38.000000000 +0100 @@ -1,6 +1,33 @@ Changelog ========= +2022.11.0 +--------- + +Enhancements + +- Speed up FSMap._key_to_str (#1101) +- Add modified/created to Memory and Arrow (#1096) +- Clear expired cache method (#1092) +- Allow seekable arrow file (#1091) +- Allow append for arrow (#1089) +- recursive for sftp.get (#1082) +- topdown arg to walk() (#1081) + +Fixes + +- fix doc warnings (#1106, #1084) +- Fix HDFS _strip_protocol (#1103) +- Allow URLs with protocol for HDFS (#1099) +- yarl in doc deps (#1095) +- missing await in genericFS.cp (#1094) +- explicit IPv4 for test HTTP server (#1088) +- sort when merging ranges for referenceFS (#1087) + +Other + +- Check that snappy is snappy (#1079) + 2022.10.0 --------- @@ -218,7 +245,7 @@ Fixes -- Removed inaccurate `ZipFileSystem.cat()` override so that the base +- Removed inaccurate ``ZipFileSystem.cat()`` override so that the base class' version is used (#789) - fix entrypoint processing (#784) - case where no blocks of a block-cache have yet been loaded (#801) @@ -475,7 +502,7 @@ Features: - error options for cat -- memory fs created time in detailed `ls` +- memory fs created time in detailed ``ls``` Fixes: @@ -604,7 +631,7 @@ file reads in blocks (:pr:`191`). * Fixed equality checks for file system instance to return ``False`` when compared to objects other than file systems (:pr:`192`) -* Fixed a bug in :meth:`fsspec.FSMap.keys` returning a generator, which was consumed upon iteration (:pr:`189`). +* Fixed a bug in ``fsspec.FSMap.keys`` returning a generator, which was consumed upon iteration (:pr:`189`). * Removed the magic addition of aliases in ``AbstractFileSystem.__init__``. Now alias methods are always present (:pr:`177`) * Deprecated passing ``trim`` to :class:`fsspec.spec.AbstractBufferedFile`. Pass it in ``storage_options`` instead (:pr:`188`) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/conf.py new/filesystem_spec-2022.11.0/docs/source/conf.py --- old/filesystem_spec-2022.10.0/docs/source/conf.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/conf.py 2022-11-10 03:21:38.000000000 +0100 @@ -70,13 +70,6 @@ # The full version, including alpha/beta/rc tags. release = fsspec.__version__ -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path @@ -192,6 +185,6 @@ ] extlinks = { - "issue": ("https://github.com/fsspec/filesystem_spec/issues/%s", "GH#"), - "pr": ("https://github.com/fsspec/filesystem_spec/pull/%s", "GH#"), + "issue": ("https://github.com/fsspec/filesystem_spec/issues/%s", "GH#%s"), + "pr": ("https://github.com/fsspec/filesystem_spec/pull/%s", "GH#%s"), } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/features.rst new/filesystem_spec-2022.11.0/docs/source/features.rst --- old/filesystem_spec-2022.10.0/docs/source/features.rst 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/features.rst 2022-11-10 03:21:38.000000000 +0100 @@ -252,7 +252,7 @@ This local cache of data might be temporary (i.e., attached to the process and discarded when the process ends) or at some specific location in your local storage. -Two mechanisms are provided, and both involve wrapping a `target` filesystem. The following example +Two mechanisms are provided, and both involve wrapping a ``target`` filesystem. The following example creates a file-based cache. .. code-block:: python diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/index.rst new/filesystem_spec-2022.11.0/docs/source/index.rst --- old/filesystem_spec-2022.10.0/docs/source/index.rst 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/index.rst 2022-11-10 03:21:38.000000000 +0100 @@ -72,7 +72,7 @@ Installation ------------ -`fsspec` can be installed from PyPI or conda and has no dependencies of its own +``fsspec`` can be installed from PyPI or conda and has no dependencies of its own .. code-block:: sh @@ -88,7 +88,7 @@ pip install fsspec[gcs] conda install -c conda-forge gcsfs -`fsspec` attempts to provide the right message when you attempt to use a filesystem +``fsspec``` attempts to provide the right message when you attempt to use a filesystem for which you need additional dependencies. The current list of known implementations can be found as follows diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/intro.rst new/filesystem_spec-2022.11.0/docs/source/intro.rst --- old/filesystem_spec-2022.10.0/docs/source/intro.rst 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/intro.rst 2022-11-10 03:21:38.000000000 +0100 @@ -33,7 +33,7 @@ particularly a common interface to local and HDFS files, for example the `hdfs`_ interface (which actually communicated with HDFS with a choice of driver). These are mostly used internally within Arrow, but Dask was modified in order to be able -to use the alternate HDFS interface (which solves some security issues with `hdfs3`). In the process, a +to use the alternate HDFS interface (which solves some security issues with ``hdfs3``). In the process, a `conversation`_ was started, and I invite all interested parties to continue the conversation in this location. @@ -51,7 +51,7 @@ The following places to consider, when choosing the definitions of how we would like the file-system specification to look: -#. python's `os`_ module and its `path` namespace; also other file-connected +#. python's `os`_ module and its `path`_ namespace; also other file-connected functionality in the standard library #. posix/bash method naming conventions that linux/unix/osx users are familiar with; or perhaps their Windows variants #. the existing implementations for the various backends (e.g., @@ -62,6 +62,7 @@ validation code. .. _os: https://docs.python.org/3/library/os.html +.. _path: https://docs.python.org/3/library/os.path.html .. _gcsfs: http://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem .. _pyfilesystems: https://docs.pyfilesystem.org/en/latest/index.html diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/docs/source/usage.rst new/filesystem_spec-2022.11.0/docs/source/usage.rst --- old/filesystem_spec-2022.10.0/docs/source/usage.rst 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/docs/source/usage.rst 2022-11-10 03:21:38.000000000 +0100 @@ -12,7 +12,7 @@ file-like objects. Some concrete implementations are bundled with ``fsspec`` and others can be installed separately. They -can be instantiated directly, or the `registry` can be used to find them. +can be instantiated directly, or the ``registry`` can be used to find them. Direct instantiation: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/_version.py new/filesystem_spec-2022.11.0/fsspec/_version.py --- old/filesystem_spec-2022.10.0/fsspec/_version.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/_version.py 2022-11-10 03:21:38.000000000 +0100 @@ -22,9 +22,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = " (tag: 2022.10.0)" - git_full = "0912785ada9d8b7179bd0188eea8624380f20149" - git_date = "2022-10-19 10:54:46 -0400" + git_refnames = " (tag: 2022.11.0)" + git_full = "acad158dbfb71d4a58f8a928f667c2191b09bfc4" + git_date = "2022-11-09 21:21:38 -0500" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/archive.py new/filesystem_spec-2022.11.0/fsspec/archive.py --- old/filesystem_spec-2022.10.0/fsspec/archive.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/archive.py 2022-11-10 03:21:38.000000000 +0100 @@ -6,8 +6,10 @@ """ A generic superclass for implementing Archive-based filesystems. - Currently, it is shared amongst `ZipFileSystem`, `LibArchiveFileSystem` and - `TarFileSystem`. + Currently, it is shared amongst + :class:`~fsspec.implementations.zip.ZipFileSystem`, + :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and + :class:`~fsspec.implementations.tar.TarFileSystem`. """ def __str__(self): @@ -19,7 +21,8 @@ return tokenize(path, self.fo, self.protocol) def _all_dirnames(self, paths): - """Returns *all* directory names for each path in paths, including intermediate ones. + """Returns *all* directory names for each path in paths, including intermediate + ones. Parameters ---------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/asyn.py new/filesystem_spec-2022.11.0/fsspec/asyn.py --- old/filesystem_spec-2022.10.0/fsspec/asyn.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/asyn.py 2022-11-10 03:21:38.000000000 +0100 @@ -24,7 +24,7 @@ def get_lock(): """Allocate or return a threading lock. - The lock is allocatted on first use to allow setting one lock per forked process. + The lock is allocated on first use to allow setting one lock per forked process. """ global _lock if not _lock: @@ -61,8 +61,10 @@ """ Make loop run coroutine until it returns. Runs in other thread - Example usage: - fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args, timeout=timeout, **kwargs) + Examples + -------- + >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args, + timeout=timeout, **kwargs) """ timeout = timeout if timeout else None # convert 0 or 0.0 to None # NB: if the loop is not running *yet*, it is OK to submit work diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/caching.py new/filesystem_spec-2022.11.0/fsspec/caching.py --- old/filesystem_spec-2022.10.0/fsspec/caching.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/caching.py 2022-11-10 03:21:38.000000000 +0100 @@ -192,15 +192,15 @@ """ Cache holding memory as a set of blocks. - Requests are only ever made `blocksize` at a time, and are + Requests are only ever made ``blocksize`` at a time, and are stored in an LRU cache. The least recently accessed block is - discarded when more than `maxblocks` are stored. + discarded when more than ``maxblocks`` are stored. Parameters ---------- blocksize : int The number of bytes to store in each block. - Requests are only ever made for `blocksize`, so this + Requests are only ever made for ``blocksize``, so this should balance the overhead of making a request against the granularity of the blocks. fetcher : Callable diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/callbacks.py new/filesystem_spec-2022.11.0/fsspec/callbacks.py --- old/filesystem_spec-2022.10.0/fsspec/callbacks.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/callbacks.py 2022-11-10 03:21:38.000000000 +0100 @@ -201,7 +201,7 @@ callback=TqdmCallback(), ) - You can forward args to tqdm using the `tqdm_kwargs` parameter. + You can forward args to tqdm using the ``tqdm_kwargs`` parameter. >>> fs.upload( ".", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/compression.py new/filesystem_spec-2022.11.0/fsspec/compression.py --- old/filesystem_spec-2022.10.0/fsspec/compression.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/compression.py 2022-11-10 03:21:38.000000000 +0100 @@ -140,7 +140,7 @@ # standard implementation. register_compression("snappy", SnappyFile, []) -except (ImportError, NameError): +except (ImportError, NameError, AttributeError): pass try: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/core.py new/filesystem_spec-2022.11.0/fsspec/core.py --- old/filesystem_spec-2022.10.0/fsspec/core.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/core.py 2022-11-10 03:21:38.000000000 +0100 @@ -36,7 +36,7 @@ are typically binary-only. These instances are safe to serialize, as the low-level file object - is not created until invoked using `with`. + is not created until invoked using ``with``. Parameters ---------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/fuse.py new/filesystem_spec-2022.11.0/fsspec/fuse.py --- old/filesystem_spec-2022.10.0/fsspec/fuse.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/fuse.py 2022-11-10 03:21:38.000000000 +0100 @@ -176,8 +176,8 @@ within the mounter directory. Operation will typically be more stable if False. ready_file: bool - Whether the FUSE process is ready. The `.fuse_ready` file will - exist in the `mount_point` directory if True. Debugging purpose. + Whether the FUSE process is ready. The ``.fuse_ready`` file will + exist in the ``mount_point`` directory if True. Debugging purpose. ops_class: FUSEr or Subclass of FUSEr To override the default behavior of FUSEr. For Example, logging to file. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/generic.py new/filesystem_spec-2022.11.0/fsspec/generic.py --- old/filesystem_spec-2022.10.0/fsspec/generic.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/generic.py 2022-11-10 03:21:38.000000000 +0100 @@ -170,7 +170,7 @@ if hasattr(fs, "open_async") else fs.open(url, "rb", **kw) ) - callback.set_size(maybe_await(f1.size)) + callback.set_size(await maybe_await(f1.size)) f2 = ( await fs2.open_async(url2, "wb") if hasattr(fs2, "open_async") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/arrow.py new/filesystem_spec-2022.11.0/fsspec/implementations/arrow.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/arrow.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/arrow.py 2022-11-10 03:21:38.000000000 +0100 @@ -7,7 +7,7 @@ from functools import wraps from fsspec.spec import AbstractFileSystem -from fsspec.utils import infer_storage_options, mirror_from, stringify_path +from fsspec.utils import infer_storage_options, mirror_from def wrap_exceptions(func): @@ -52,13 +52,15 @@ @classmethod def _strip_protocol(cls, path): - path = stringify_path(path) - if "://" in path: - _, _, path = path.partition("://") - + ops = infer_storage_options(path) + path = ops["path"] + if path.startswith("//"): + # special case for "hdfs://path" (without the triple slash) + path = path[1:] return path def ls(self, path, detail=False, **kwargs): + path = self._strip_protocol(path) from pyarrow.fs import FileSelector entries = [ @@ -109,7 +111,7 @@ path2 = self._strip_protocol(path2).rstrip("/") with self._open(path1, "rb") as lstream: - tmp_fname = "/".join([self._parent(path2), f".tmp.{secrets.token_hex(16)}"]) + tmp_fname = f"{path2}.tmp.{secrets.token_hex(6)}" try: with self.open(tmp_fname, "wb") as rstream: shutil.copyfileobj(lstream, rstream) @@ -144,18 +146,24 @@ self.fs.delete_file(path) @wrap_exceptions - def _open(self, path, mode="rb", block_size=None, **kwargs): + def _open(self, path, mode="rb", block_size=None, seekable=False, **kwargs): if mode == "rb": - method = self.fs.open_input_stream + if seekable: + method = self.fs.open_input_file + else: + method = self.fs.open_input_stream elif mode == "wb": method = self.fs.open_output_stream + elif mode == "ab": + method = self.fs.open_append_stream else: raise ValueError(f"unsupported mode for Arrow filesystem: {mode!r}") _kwargs = {} - if PYARROW_VERSION[0] >= 4: - # disable compression auto-detection - _kwargs["compression"] = None + if mode != "rb" or not seekable: + if PYARROW_VERSION[0] >= 4: + # disable compression auto-detection + _kwargs["compression"] = None stream = method(path, **_kwargs) return ArrowFile(self, stream, path, mode, block_size, **kwargs) @@ -178,9 +186,14 @@ path = self._strip_protocol(path) self.fs.delete_dir(path) + @wrap_exceptions + def modified(self, path): + path = self._strip_protocol(path) + return self.fs.get_file_info(path).mtime + @mirror_from( - "stream", ["read", "seek", "tell", "write", "readable", "writable", "close"] + "stream", ["read", "seek", "tell", "write", "readable", "writable", "close", "size"] ) class ArrowFile(io.IOBase): def __init__(self, fs, stream, path, mode, block_size=None, **kwargs): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/cached.py new/filesystem_spec-2022.11.0/fsspec/implementations/cached.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/cached.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/cached.py 2022-11-10 03:21:38.000000000 +0100 @@ -15,7 +15,7 @@ from fsspec.spec import AbstractBufferedFile from fsspec.utils import infer_compression -logger = logging.getLogger("fsspec") +logger = logging.getLogger("fsspec.cached") class CachingFileSystem(AbstractFileSystem): @@ -232,6 +232,44 @@ rmtree(self.storage[-1]) self.load_cache() + def clear_expired_cache(self, expiry_time=None): + """Remove all expired files and metadata from the cache + + In the case of multiple cache locations, this clears only the last one, + which is assumed to be the read/write one. + + Parameters + ---------- + expiry_time: int + The time in seconds after which a local copy is considered useless. + If not defined the default is equivalent to the attribute from the + file caching instantiation. + """ + + if not expiry_time: + expiry_time = self.expiry + + self._check_cache() + + for path, detail in self.cached_files[-1].copy().items(): + if time.time() - detail["time"] > expiry_time: + if self.same_names: + basename = os.path.basename(detail["original"]) + fn = os.path.join(self.storage[-1], basename) + else: + fn = os.path.join(self.storage[-1], detail["fn"]) + if os.path.exists(fn): + os.remove(fn) + self.cached_files[-1].pop(path) + + if self.cached_files[-1]: + cache_path = os.path.join(self.storage[-1], "cache") + with open(cache_path, "wb") as fc: + pickle.dump(self.cached_files[-1], fc) + else: + rmtree(self.storage[-1]) + self.load_cache() + def pop_from_cache(self, path): """Remove cached version of given file @@ -389,6 +427,7 @@ "_check_cache", "_mkcache", "clear_cache", + "clear_expired_cache", "pop_from_cache", "_mkcache", "local_file", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/http.py new/filesystem_spec-2022.11.0/fsspec/implementations/http.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/http.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/http.py 2022-11-10 03:21:38.000000000 +0100 @@ -348,7 +348,7 @@ if block_size and size: return HTTPFile( self, - self.encode_url(path), + path, session=session, block_size=block_size, mode=mode, @@ -361,7 +361,7 @@ else: return HTTPStreamFile( self, - self.encode_url(path), + path, mode=mode, loop=self.loop, session=session, @@ -377,7 +377,7 @@ pass return AsyncStreamFile( self, - self.encode_url(path), + path, loop=self.loop, session=session, size=size, @@ -597,7 +597,7 @@ """ logger.debug(f"Fetch all for {self}") if not isinstance(self.cache, AllBytes): - r = await self.session.get(self.url, **self.kwargs) + r = await self.session.get(self.fs.encode_url(self.url), **self.kwargs) async with r: r.raise_for_status() out = await r.read() @@ -621,7 +621,9 @@ headers = kwargs.pop("headers", {}).copy() headers["Range"] = "bytes=%i-%i" % (start, end - 1) logger.debug(str(self.url) + " : " + headers["Range"]) - r = await self.session.get(self.url, headers=headers, **kwargs) + r = await self.session.get( + self.fs.encode_url(self.url), headers=headers, **kwargs + ) async with r: if r.status == 416: # range request outside file @@ -690,7 +692,7 @@ super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs) async def cor(): - r = await self.session.get(url, **kwargs).__aenter__() + r = await self.session.get(self.fs.encode_url(url), **kwargs).__aenter__() self.fs._raise_not_found_for_status(r, url) return r @@ -733,7 +735,9 @@ async def read(self, num=-1): if self.r is None: - r = await self.session.get(self.url, **self.kwargs).__aenter__() + r = await self.session.get( + self.fs.encode_url(self.url), **self.kwargs + ).__aenter__() self.fs._raise_not_found_for_status(r, self.url) self.r = r out = await self.r.content.read(num) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/libarchive.py new/filesystem_spec-2022.11.0/fsspec/implementations/libarchive.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/libarchive.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/libarchive.py 2022-11-10 03:21:38.000000000 +0100 @@ -107,7 +107,7 @@ ---------- fo: str or file-like Contains ZIP, and must exist. If a str, will fetch file using - `open_files()`, which must return one file exactly. + :meth:`~fsspec.open_files`, which must return one file exactly. mode: str Currently, only 'r' accepted target_protocol: str (optional) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/memory.py new/filesystem_spec-2022.11.0/fsspec/implementations/memory.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/memory.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/memory.py 2022-11-10 03:21:38.000000000 +0100 @@ -40,7 +40,7 @@ "name": path, "size": self.store[path].size, "type": "file", - "created": self.store[path].created, + "created": self.store[path].created.timestamp(), } ] paths = set() @@ -55,7 +55,7 @@ "name": p2, "size": self.store[p2].size, "type": "file", - "created": self.store[p2].created, + "created": self.store[p2].created.timestamp(), } ) elif len(p2) > len(starter): @@ -221,6 +221,20 @@ except KeyError as e: raise FileNotFoundError(path) from e + def modified(self, path): + path = self._strip_protocol(path) + try: + return self.store[path].modified + except KeyError: + raise FileNotFoundError(path) + + def created(self, path): + path = self._strip_protocol(path) + try: + return self.store[path].created + except KeyError: + raise FileNotFoundError(path) + def rm(self, path, recursive=False, maxdepth=None): if isinstance(path, str): path = self._strip_protocol(path) @@ -252,7 +266,8 @@ logger.debug("open file %s", path) self.fs = fs self.path = path - self.created = datetime.utcnow().timestamp() + self.created = datetime.utcnow() + self.modified = datetime.utcnow() if data: super().__init__(data) self.seek(0) @@ -272,3 +287,4 @@ def commit(self): self.fs.store[self.path] = self + self.modified = datetime.utcnow() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/reference.py new/filesystem_spec-2022.11.0/fsspec/implementations/reference.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/reference.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/reference.py 2022-11-10 03:21:38.000000000 +0100 @@ -125,11 +125,11 @@ max_gap, max_block: int For merging multiple concurrent requests to the same remote file. Neighboring byte ranges will only be merged when their - inter-range gap is <= `max_gap`. Default is 64KB. Set to 0 + inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0 to only merge when it requires no extra bytes. Pass a negative number to disable merging, appropriate for local target files. Neighboring byte ranges will only be merged when the size of - the aggregated range is <= `max_block`. Default is 256MB. + the aggregated range is <= ``max_block``. Default is 256MB. kwargs : passed to parent class """ super().__init__(loop=loop, **kwargs) @@ -338,7 +338,7 @@ list(urls2), list(starts2), list(ends2), - sort=False, + sort=True, max_gap=self.max_gap, max_block=self.max_block, ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/sftp.py new/filesystem_spec-2022.11.0/fsspec/implementations/sftp.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/sftp.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/sftp.py 2022-11-10 03:21:38.000000000 +0100 @@ -1,5 +1,6 @@ import datetime import logging +import os import types import uuid from stat import S_ISDIR, S_ISLNK @@ -129,9 +130,11 @@ logger.debug("Put file %s into %s" % (lpath, rpath)) self.ftp.put(lpath, rpath) - def get(self, rpath, lpath, callback=None, **kwargs): - logger.debug("Get file %s into %s" % (rpath, lpath)) - self.ftp.get(rpath, lpath) + def get_file(self, rpath, lpath, **kwargs): + if self.isdir(rpath): + os.makedirs(lpath, exist_ok=True) + else: + self.ftp.get(self._strip_protocol(rpath), lpath) def _open(self, path, mode="rb", block_size=None, **kwargs): """ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/conftest.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/conftest.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/conftest.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/conftest.py 2022-11-10 03:21:38.000000000 +0100 @@ -2,21 +2,33 @@ import pytest +from fsspec.implementations.arrow import ArrowFSWrapper from fsspec.implementations.local import LocalFileSystem - +from fsspec.implementations.memory import MemoryFileSystem # A dummy filesystem that has a list of protocols + + class MultiProtocolFileSystem(LocalFileSystem): protocol = ["file", "other"] -FILESYSTEMS = {"local": LocalFileSystem, "multi": MultiProtocolFileSystem} +FILESYSTEMS = { + "local": LocalFileSystem, + "multi": MultiProtocolFileSystem, + "memory": MemoryFileSystem, +} READ_ONLY_FILESYSTEMS = [] @pytest.fixture(scope="function") def fs(request): + pyarrow_fs = pytest.importorskip("pyarrow.fs") + FileSystem = pyarrow_fs.FileSystem + if request.param == "arrow": + fs = ArrowFSWrapper(FileSystem.from_uri("file:///")[0]) + return fs cls = FILESYSTEMS[request.param] return cls() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_archive.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_archive.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_archive.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_archive.py 2022-11-10 03:21:38.000000000 +0100 @@ -301,7 +301,8 @@ assert fs.find("deeply") == ["deeply/nested/path"] assert fs.find("deeply/") == fs.find("deeply") - def test_walk(self, scenario: ArchiveTestScenario): + @pytest.mark.parametrize("topdown", [True, False]) + def test_walk(self, scenario: ArchiveTestScenario, topdown): with scenario.provider(archive_data) as archive: fs = fsspec.filesystem(scenario.protocol, fo=archive) expected = [ @@ -310,7 +311,9 @@ ("deeply", ["nested"], []), ("deeply/nested", [], ["path"]), ] - for lhs, rhs in zip(fs.walk(""), expected): + if not topdown: + expected.reverse() + for lhs, rhs in zip(fs.walk("", topdown=topdown), expected): assert lhs[0] == rhs[0] assert sorted(lhs[1]) == sorted(rhs[1]) assert sorted(lhs[2]) == sorted(rhs[2]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_arrow.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_arrow.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_arrow.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_arrow.py 2022-11-10 03:21:38.000000000 +0100 @@ -14,11 +14,11 @@ return ArrowFSWrapper(fs) -@pytest.fixture(scope="function") -def remote_dir(fs): +@pytest.fixture(scope="function", params=[False, True]) +def remote_dir(fs, request): directory = secrets.token_hex(16) fs.makedirs(directory) - yield directory + yield ("hdfs://" if request.param else "/") + directory fs.rm(directory, recursive=True) @@ -28,20 +28,28 @@ return entry +def test_strip(fs): + assert fs._strip_protocol("/a/file") == "/a/file" + assert fs._strip_protocol("hdfs:///a/file") == "/a/file" + assert fs._strip_protocol("hdfs://1.1.1.1/a/file") == "/a/file" + assert fs._strip_protocol("hdfs://1.1.1.1:8888/a/file") == "/a/file" + + def test_info(fs, remote_dir): fs.touch(remote_dir + "/a.txt") + remote_dir_strip_protocol = fs._strip_protocol(remote_dir) details = fs.info(remote_dir + "/a.txt") assert details["type"] == "file" - assert details["name"] == remote_dir + "/a.txt" + assert details["name"] == remote_dir_strip_protocol + "/a.txt" assert details["size"] == 0 fs.mkdir(remote_dir + "/dir") details = fs.info(remote_dir + "/dir") assert details["type"] == "directory" - assert details["name"] == remote_dir + "/dir" + assert details["name"] == remote_dir_strip_protocol + "/dir" details = fs.info(remote_dir + "/dir/") - assert details["name"] == remote_dir + "/dir/" + assert details["name"] == remote_dir_strip_protocol + "/dir/" def test_move(fs, remote_dir): @@ -114,12 +122,16 @@ def test_ls(fs, remote_dir): + if remote_dir != "/": + remote_dir = remote_dir + "/" + remote_dir_strip_protocol = fs._strip_protocol(remote_dir) fs.mkdir(remote_dir + "dir/") files = set() for no in range(8): file = remote_dir + f"dir/test_{no}" + # we also want to make sure `fs.touch` works with protocol fs.touch(file) - files.add(file) + files.add(remote_dir_strip_protocol + f"dir/test_{no}") assert set(fs.ls(remote_dir + "dir/")) == files @@ -134,6 +146,8 @@ def test_mkdir(fs, remote_dir): + if remote_dir != "/": + remote_dir = remote_dir + "/" fs.mkdir(remote_dir + "dir/") assert fs.isdir(remote_dir + "dir/") assert len(fs.ls(remote_dir + "dir/")) == 0 @@ -181,3 +195,27 @@ with fs.open(remote_dir + "/b.txt", "rb") as stream: assert stream.read() == data * 400 + + +def test_open_append(fs, remote_dir): + data = b"dvc.org" + + with fs.open(remote_dir + "/a.txt", "wb") as stream: + stream.write(data) + + with fs.open(remote_dir + "/a.txt", "ab") as stream: + stream.write(data) + + with fs.open(remote_dir + "/a.txt") as stream: + assert stream.read() == 2 * data + + +def test_open_seekable(fs, remote_dir): + data = b"dvc.org" + + with fs.open(remote_dir + "/a.txt", "wb") as stream: + stream.write(data) + + with fs.open(remote_dir + "/a.txt", "rb", seekable=True) as file: + file.seek(2) + assert file.read() == data[2:] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_cached.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_cached.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_cached.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_cached.py 2022-11-10 03:21:38.000000000 +0100 @@ -153,6 +153,109 @@ assert len(os.listdir(cache1)) < 2 +def test_clear_expired(tmp_path): + def __ager(cache_fn, fn): + """ + Modify the cache file to virtually add time lag to selected files. + + Parameters + --------- + cache_fn: str + cache path + fn: str + file name to be modified + """ + import pathlib + import time + + if os.path.exists(cache_fn): + with open(cache_fn, "rb") as f: + cached_files = pickle.load(f) + fn_posix = pathlib.Path(fn).as_posix() + cached_files[fn_posix]["time"] = cached_files[fn_posix]["time"] - 691200 + assert os.access(cache_fn, os.W_OK), "Cache is not writable" + with open(cache_fn, "wb") as f: + pickle.dump(cached_files, f) + time.sleep(1) + + origin = tmp_path.joinpath("origin") + cache1 = tmp_path.joinpath("cache1") + cache2 = tmp_path.joinpath("cache2") + cache3 = tmp_path.joinpath("cache3") + + origin.mkdir() + cache1.mkdir() + cache2.mkdir() + cache3.mkdir() + + data = b"test data" + f1 = origin.joinpath("afile") + f2 = origin.joinpath("bfile") + f3 = origin.joinpath("cfile") + f4 = origin.joinpath("dfile") + + with open(f1, "wb") as f: + f.write(data) + with open(f2, "wb") as f: + f.write(data) + with open(f3, "wb") as f: + f.write(data) + with open(f4, "wb") as f: + f.write(data) + + # populates first cache + fs = fsspec.filesystem( + "filecache", target_protocol="file", cache_storage=str(cache1), cache_check=1 + ) + assert fs.cat(str(f1)) == data + + # populates "last" cache if file not found in first one + fs = fsspec.filesystem( + "filecache", + target_protocol="file", + cache_storage=[str(cache1), str(cache2)], + cache_check=1, + ) + assert fs.cat(str(f2)) == data + assert fs.cat(str(f3)) == data + assert len(os.listdir(cache2)) == 3 + + # force the expiration + cache_fn = os.path.join(fs.storage[-1], "cache") + __ager(cache_fn, f2) + + # remove from cache2 the expired files + fs.clear_expired_cache() + assert len(os.listdir(cache2)) == 2 + + # check complete cleanup + __ager(cache_fn, f3) + + fs.clear_expired_cache() + assert not fs._check_file(f2) + assert not fs._check_file(f3) + assert len(os.listdir(cache2)) < 2 + + # check cache1 to be untouched after cleaning + assert len(os.listdir(cache1)) == 2 + + # check cleaning with 'same_name' option enabled + fs = fsspec.filesystem( + "filecache", + target_protocol="file", + cache_storage=[str(cache1), str(cache2), str(cache3)], + same_names=True, + cache_check=1, + ) + assert fs.cat(str(f4)) == data + + cache_fn = os.path.join(fs.storage[-1], "cache") + __ager(cache_fn, f4) + + fs.clear_expired_cache() + assert not fs._check_file(str(f4)) + + def test_pop(): import tempfile diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_common.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_common.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_common.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_common.py 2022-11-10 03:21:38.000000000 +0100 @@ -18,14 +18,15 @@ fs.rm(temp_file) -@pytest.mark.parametrize("fs", ["local"], indirect=["fs"]) +@pytest.mark.parametrize("fs", ["local", "memory", "arrow"], indirect=["fs"]) def test_modified(fs: AbstractFileSystem, temp_file): try: fs.touch(temp_file) - created = fs.created(path=temp_file) + # created = fs.created(path=temp_file) + created = datetime.datetime.utcnow() # pyarrow only have modified time.sleep(0.05) fs.touch(temp_file) - modified = fs.modified(path=temp_file) + modified = fs.modified(path=temp_file).replace(tzinfo=None) assert isinstance(modified, datetime.datetime) assert modified > created finally: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_http.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_http.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_http.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_http.py 2022-11-10 03:21:38.000000000 +0100 @@ -452,3 +452,12 @@ fs = fsspec.filesystem("http", encoded=False) out = fs.cat(server + "/Hello: Günter", headers={"give_path": "true"}) assert json.loads(out)["path"] == "/Hello:%20G%C3%BCnter" + + +def test_with_cache(server): + fs = fsspec.filesystem("http", headers={"head_ok": "true", "give_length": "true"}) + fn = server + "/index/realfile" + fs1 = fsspec.filesystem("blockcache", fs=fs) + with fs1.open(fn, "rb") as f: + out = f.read() + assert out == fs1.cat(fn) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_sftp.py new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_sftp.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/tests/test_sftp.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/tests/test_sftp.py 2022-11-10 03:21:38.000000000 +0100 @@ -1,3 +1,4 @@ +import os import shlex import subprocess import time @@ -93,6 +94,27 @@ assert f.read() == b"hello" +@pytest.mark.parametrize("protocol", ["sftp", "ssh"]) +def test_get_dir(protocol, ssh, root_path): + f = fsspec.filesystem(protocol, **ssh) + f.mkdirs(root_path + "deeper", exist_ok=True) + f.touch(root_path + "deeper/afile") + f.get(root_path, ".", recursive=True) + + assert os.path.isdir("./deeper") + assert os.path.isfile("./deeper/afile") + + f.get( + protocol + "://{username}:{password}@{host}:{port}" + "{root_path}".format(root_path=root_path, **ssh), + "./test2", + recursive=True, + ) + + assert os.path.isdir("./test2/deeper") + assert os.path.isfile("./test2/deeper/afile") + + @pytest.fixture(scope="module") def netloc(ssh): username = ssh.get("username") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/webhdfs.py new/filesystem_spec-2022.11.0/fsspec/implementations/webhdfs.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/webhdfs.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/webhdfs.py 2022-11-10 03:21:38.000000000 +0100 @@ -24,7 +24,7 @@ Three auth mechanisms are supported: insecure: no auth is done, and the user is assumed to be whoever they - say they are (parameter `user`), or a predefined value such as + say they are (parameter ``user``), or a predefined value such as "dr.who" if not given spnego: when kerberos authentication is enabled, auth is negotiated by requests_kerberos https://github.com/requests/requests-kerberos . @@ -79,9 +79,9 @@ HDFS cluster is behind a proxy, running on Docker or otherwise has a mismatch between the host-names given by the name-node and the address by which to refer to them from the client. If a dict, - maps host names `host->data_proxy[host]`; if a callable, full + maps host names ``host->data_proxy[host]``; if a callable, full URLs are passed, and function must conform to - `url->data_proxy(url)`. + ``url->data_proxy(url)``. use_https: bool Whether to connect to the Name-node using HTTPS instead of HTTP kwargs diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/implementations/zip.py new/filesystem_spec-2022.11.0/fsspec/implementations/zip.py --- old/filesystem_spec-2022.10.0/fsspec/implementations/zip.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/implementations/zip.py 2022-11-10 03:21:38.000000000 +0100 @@ -34,7 +34,7 @@ ---------- fo: str or file-like Contains ZIP, and must exist. If a str, will fetch file using - `open_files()`, which must return one file exactly. + :meth:`~fsspec.open_files`, which must return one file exactly. mode: str Currently, only 'r' accepted target_protocol: str (optional) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/mapping.py new/filesystem_spec-2022.11.0/fsspec/mapping.py --- old/filesystem_spec-2022.10.0/fsspec/mapping.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/mapping.py 2022-11-10 03:21:38.000000000 +0100 @@ -1,4 +1,6 @@ import array +import posixpath +import warnings from collections.abc import MutableMapping from .core import url_to_fs @@ -34,9 +36,8 @@ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): self.fs = fs - self.root = fs._strip_protocol(root).rstrip( - "/" - ) # we join on '/' in _key_to_str + self.root = fs._strip_protocol(root).rstrip("/") + self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] if missing_exceptions is None: missing_exceptions = ( FileNotFoundError, @@ -120,11 +121,16 @@ def _key_to_str(self, key): """Generate full path for the key""" - if isinstance(key, (tuple, list)): - key = str(tuple(key)) - else: + if not isinstance(key, str): + # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" + warnings.warn( + "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError", + DeprecationWarning, + ) + if isinstance(key, list): + key = tuple(key) key = str(key) - return self.fs._strip_protocol("/".join([self.root, key]) if self.root else key) + return f"{self._root_key_to_str}{key}" def _str_to_key(self, s): """Strip path of to leave key name""" @@ -142,6 +148,7 @@ return result def pop(self, key, default=None): + """Pop data""" result = self.__getitem__(key, default) try: del self[key] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/spec.py new/filesystem_spec-2022.11.0/fsspec/spec.py --- old/filesystem_spec-2022.10.0/fsspec/spec.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/spec.py 2022-11-10 03:21:38.000000000 +0100 @@ -315,7 +315,7 @@ - type of entry, "file", "directory" or other Additional information - may be present, aproriate to the file-system, e.g., generation, + may be present, appropriate to the file-system, e.g., generation, checksum, etc. May use refresh=True|False to allow use of self._ls_from_cache to @@ -342,7 +342,7 @@ def _ls_from_cache(self, path): """Check cache for listing - Returns listing, if found (may me empty list for a directly that exists + Returns listing, if found (may be empty list for a directly that exists but contains nothing), None if not in cache. """ parent = self._parent(path) @@ -362,7 +362,7 @@ except KeyError: pass - def walk(self, path, maxdepth=None, **kwargs): + def walk(self, path, maxdepth=None, topdown=True, **kwargs): """Return all files belows path List all files, recursing into subdirectories; output is iterator-style, @@ -378,6 +378,9 @@ maxdepth: int Maximum recursion depth. None means limitless, but not recommended on link-based file-systems. + topdown: bool (True) + Whether to walk the directory tree from the top downwards or from + the bottom upwards. kwargs: passed to ``ls`` """ path = self._strip_protocol(path) @@ -408,10 +411,13 @@ else: files[name] = info - if detail: + if not detail: + dirs = list(dirs) + files = list(files) + + if topdown: + # Yield before recursion if walking top down yield path, dirs, files - else: - yield path, list(dirs), list(files) if maxdepth is not None: maxdepth -= 1 @@ -419,7 +425,13 @@ return for d in full_dirs: - yield from self.walk(d, maxdepth=maxdepth, detail=detail, **kwargs) + yield from self.walk( + d, maxdepth=maxdepth, detail=detail, topdown=topdown, **kwargs + ) + + if not topdown: + # Yield after recursion if walking bottom up + yield path, dirs, files def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): """List all files below path. @@ -1167,7 +1179,7 @@ See Also -------- - utils.read_block + :func:`fsspec.utils.read_block` """ with self.open(fn, "rb") as f: size = f.size diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/tests/conftest.py new/filesystem_spec-2022.11.0/fsspec/tests/conftest.py --- old/filesystem_spec-2022.10.0/fsspec/tests/conftest.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/tests/conftest.py 2022-11-10 03:21:38.000000000 +0100 @@ -10,7 +10,7 @@ requests = pytest.importorskip("requests") port = 9898 data = b"\n".join([b"some test data"] * 1000) -realfile = "http://localhost:%i/index/realfile" % port +realfile = "http://127.0.0.1:%i/index/realfile" % port index = b'<a href="%s">Link</a>' % realfile.encode() listing = open( os.path.join(os.path.dirname(__file__), "data", "listing.html"), "rb" @@ -139,7 +139,7 @@ th.daemon = True th.start() try: - yield "http://localhost:%i" % port + yield "http://127.0.0.1:%i" % port finally: httpd.socket.close() httpd.shutdown() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/fsspec/tests/test_mapping.py new/filesystem_spec-2022.11.0/fsspec/tests/test_mapping.py --- old/filesystem_spec-2022.10.0/fsspec/tests/test_mapping.py 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/fsspec/tests/test_mapping.py 2022-11-10 03:21:38.000000000 +0100 @@ -1,6 +1,8 @@ import os import pickle +import platform import sys +import uuid import pytest @@ -149,3 +151,66 @@ def test_empty_url(): m = fsspec.get_mapper() assert isinstance(m.fs, LocalFileSystem) + + +def test_fsmap_access_with_root_prefix(tmp_path): + # "/a" and "a" are the same for LocalFileSystem + tmp_path.joinpath("a").write_bytes(b"data") + m = fsspec.get_mapper(f"file://{tmp_path}") + assert m["/a"] == m["a"] == b"data" + + # "/a" and "a" differ for MemoryFileSystem + m = fsspec.get_mapper(f"memory://{uuid.uuid4()}") + m["/a"] = b"data" + + assert m["/a"] == b"data" + with pytest.raises(KeyError): + _ = m["a"] + + +@pytest.mark.parametrize( + "key", + [ + pytest.param(b"k", id="bytes"), + pytest.param(1234, id="int"), + pytest.param((1,), id="tuple"), + pytest.param([""], id="list"), + ], +) +def test_fsmap_non_str_keys(key): + m = fsspec.get_mapper() + + # Once the deprecation period passes + # FSMap.__getitem__ should raise TypeError for non-str keys + # with pytest.raises(TypeError): + # _ = m[key] + + with pytest.warns(DeprecationWarning): + with pytest.raises(KeyError): + _ = m[key] + + +def test_fsmap_error_on_protocol_keys(): + root = uuid.uuid4() + m = fsspec.get_mapper(f"memory://{root}", create=True) + m["a"] = b"data" + + assert m["a"] == b"data" + with pytest.raises(KeyError): + _ = m[f"memory://{root}/a"] + + +# on Windows opening a directory will raise PermissionError +# see: https://bugs.python.org/issue43095 +@pytest.mark.skipif( + platform.system() == "Windows", reason="raises PermissionError on windows" +) +def test_fsmap_access_with_suffix(tmp_path): + tmp_path.joinpath("b").mkdir() + tmp_path.joinpath("b", "a").write_bytes(b"data") + m = fsspec.get_mapper(f"file://{tmp_path}") + + with pytest.raises(KeyError): + _ = m["b/"] + + assert m["b/a/"] == b"data" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2022.10.0/tox.ini new/filesystem_spec-2022.11.0/tox.ini --- old/filesystem_spec-2022.10.0/tox.ini 2022-10-19 16:54:46.000000000 +0200 +++ new/filesystem_spec-2022.11.0/tox.ini 2022-11-10 03:21:38.000000000 +0100 @@ -30,6 +30,7 @@ pytest-cov pytest-mock pytest-vcr + py fusepy tomli < 2 msgpack-python @@ -64,7 +65,7 @@ {[core]conda_deps} httpretty aiobotocore - "moto<3.0" + "moto>=4" flask changedir=.tox/s3fs/tmp whitelist_externals=