Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-fsspec for openSUSE:Factory checked in at 2021-09-22 22:12:59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-fsspec (Old) and /work/SRC/openSUSE:Factory/.python-fsspec.new.1899 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-fsspec" Wed Sep 22 22:12:59 2021 rev:15 rq:920851 version:2021.8.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-fsspec/python-fsspec.changes 2021-08-28 22:29:56.410026760 +0200 +++ /work/SRC/openSUSE:Factory/.python-fsspec.new.1899/python-fsspec.changes 2021-09-22 22:13:20.284342023 +0200 @@ -1,0 +2,27 @@ +Tue Sep 21 10:00:47 UTC 2021 - Ben Greiner <c...@bnavigator.de> + +- Update to 2021.8.1 + * HTTP get_file/put_file APIs now support callbacks (#731) + * New HTTP put_file method for transferring data to the remote + server (chunked) (#731) + * Customizable HTTP client initializers (through passing + get_client argument) (#731, #701) + * Support for various checksum / fingerprint headers in HTTP + info() (#731) + * local implementation of rm_file (#736) + * local speed improvements (#711) + * sharing options in SMB (#706) + * streaming cat/get for ftp (#700) + * check for remote directory when putting (#737) + * storage_option update handling (#734( + * await HTTP call before checking status (#726) + * ftp connect (#722) + * bytes conversion of times in mapper (#721) + * variable overwrite in WholeFileCache cat (#719) + * http file size again (#718) + * rm and create directories in ftp (#716, #703) + * list of files in async put (#713) + * bytes to dict in cat (#710) +- Drop fsspec-pr710-bytesreturn.patch merged upstream + +------------------------------------------------------------------- Old: ---- fsspec-2021.07.0.tar.gz fsspec-pr710-bytesreturn.patch New: ---- fsspec-2021.08.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-fsspec.spec ++++++ --- /var/tmp/diff_new_pack.ntXj21/_old 2021-09-22 22:13:20.876342515 +0200 +++ /var/tmp/diff_new_pack.ntXj21/_new 2021-09-22 22:13:20.880342518 +0200 @@ -26,17 +26,15 @@ %bcond_with test %endif %define skip_python2 1 -%define ghversion 2021.07.0 +%define ghversion 2021.08.1 Name: python-fsspec%{psuffix} -Version: 2021.7.0 +Version: 2021.8.1 Release: 0 Summary: Filesystem specification package License: BSD-3-Clause URL: https://github.com/intake/filesystem_spec # the tests are only in the GitHub archive Source: %{url}/archive/%{ghversion}.tar.gz#/fsspec-%{ghversion}.tar.gz -# PATCH-FIX-UPSTREAM fsspec-pr710-bytesreturn.patch -- gh#intake/filesystem_spec#710 and gh#zarr-developers/zarr-python#812 -Patch0: %{url}/pull/710.patch#/fsspec-pr710-bytesreturn.patch BuildRequires: %{python_module base >= 3.6} BuildRequires: %{python_module importlib_metadata if %python-base < 3.8} BuildRequires: %{python_module setuptools} ++++++ fsspec-2021.07.0.tar.gz -> fsspec-2021.08.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/docs/source/changelog.rst new/filesystem_spec-2021.08.1/docs/source/changelog.rst --- old/filesystem_spec-2021.07.0/docs/source/changelog.rst 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/docs/source/changelog.rst 2021-08-31 16:14:55.000000000 +0200 @@ -1,8 +1,38 @@ Changelog ========= -Dev ---- +2021.08.1 +--------- + +Enhancements + +- HTTP get_file/put_file APIs now support callbacks (#731) +- New HTTP put_file method for transferring data to the remote server (chunked) (#731) +- Customizable HTTP client initializers (through passing ``get_client`` argument) (#731, #701) +- Support for various checksum / fingerprint headers in HTTP ``info()`` (#731) +- local implementation of rm_file (#736) +- local speed improvements (#711) +- sharing options in SMB (#706) +- streaming cat/get for ftp (#700) + +Fixes + +- check for remote directory when putting (#737) +- storage_option update handling (#734( +- await HTTP call before checking status (#726) +- ftp connect (#722) +- bytes conversion of times in mapper (#721) +- variable overwrite in WholeFileCache cat (#719) +- http file size again (#718) +- rm and create directories in ftp (#716, #703) +- list of files in async put (#713) +- bytes to dict in cat (#710) + + +2021.07.0 +--------- + +Enhancements - callbacks (#697) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/_version.py new/filesystem_spec-2021.08.1/fsspec/_version.py --- old/filesystem_spec-2021.07.0/fsspec/_version.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/_version.py 2021-08-31 16:14:55.000000000 +0200 @@ -22,9 +22,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = " (tag: 2021.07.0)" - git_full = "6de675e24adcbc1a8c3d74f5956a79bc95c34c2b" - git_date = "2021-07-12 20:42:46 -0400" + git_refnames = " (tag: 2021.08.1)" + git_full = "399cf82e6fdfa6d83ba219472f3fb2536c0ed702" + git_date = "2021-08-31 10:14:55 -0400" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/asyn.py new/filesystem_spec-2021.08.1/fsspec/asyn.py --- old/filesystem_spec-2021.07.0/fsspec/asyn.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/asyn.py 2021-08-31 16:14:55.000000000 +0200 @@ -378,18 +378,22 @@ lpath = make_path_posix(lpath) fs = LocalFileSystem() lpaths = fs.expand_path(lpath, recursive=recursive) - dirs = [l for l in lpaths if os.path.isdir(l)] - rdirs = other_paths(dirs, rpath) + rpaths = other_paths( + lpaths, rpath, exists=isinstance(rpath, str) and await self._isdir(rpath) + ) + + is_dir = {l: os.path.isdir(l) for l in lpaths} + rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]] + file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]] + await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs]) - files = sorted(set(lpaths) - set(dirs)) - rpaths = other_paths(files, rpath) batch_size = kwargs.pop("batch_size", self.batch_size) coros = [] - callback.call("set_size", len(files)) - for lpath, rpath in zip(files, rpaths): - callback.branch(lpath, rpath, kwargs) - coros.append(self._put_file(lpath, rpath, **kwargs)) + callback.call("set_size", len(file_pairs)) + for lfile, rfile in file_pairs: + callback.branch(lfile, rfile, kwargs) + coros.append(self._put_file(lfile, rfile, **kwargs)) return await _run_coros_in_chunks( coros, batch_size=batch_size, callback=callback diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/cached.py new/filesystem_spec-2021.08.1/fsspec/implementations/cached.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/cached.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/cached.py 2021-08-31 16:14:55.000000000 +0200 @@ -572,8 +572,8 @@ out = {} callback.set_size(len(paths)) - for path, fn in zip(paths, fns): - out[path] = open(fn, "rb").read() + for p, fn in zip(paths, fns): + out[p] = open(fn, "rb").read() callback.relative_update(1) if isinstance(path, str) and len(paths) == 1 and recursive is False: out = out[paths[0]] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/ftp.py new/filesystem_spec-2021.08.1/fsspec/implementations/ftp.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/ftp.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/ftp.py 2021-08-31 16:14:55.000000000 +0200 @@ -183,6 +183,14 @@ self.ftp.delete(path) self.invalidate_cache(self._parent(path)) + def rm(self, path, recursive=False, maxdepth=None): + paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) + for p in reversed(paths): + if self.isfile(p): + self.rm_file(p) + else: + self.rmdir(p) + def mkdir(self, path: str, create_parents: bool = True, **kwargs: Any) -> None: path = self._strip_protocol(path) parent = self._parent(path) @@ -302,7 +310,7 @@ self.fs.ftp.abort() self.fs.ftp.getmultiline() except Error: - self.fs.ftp._connect() + self.fs._connect() return b"".join(out) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/http.py new/filesystem_spec-2021.08.1/fsspec/implementations/http.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/http.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/http.py 2021-08-31 16:14:55.000000000 +0200 @@ -11,6 +11,7 @@ import requests from fsspec.asyn import AsyncFileSystem, sync, sync_wrapper +from fsspec.callbacks import _DEFAULT_CALLBACK from fsspec.exceptions import FSTimeoutError from fsspec.spec import AbstractBufferedFile from fsspec.utils import DEFAULT_BLOCK_SIZE, tokenize @@ -58,6 +59,7 @@ asynchronous=False, loop=None, client_kwargs=None, + get_client=get_client, **storage_options, ): """ @@ -79,6 +81,10 @@ Passed to aiohttp.ClientSession, see https://docs.aiohttp.org/en/stable/client_reference.html For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}`` + get_client: Callable[..., aiohttp.ClientSession] + A callable which takes keyword arguments and constructs + an aiohttp.ClientSession. It's state will be managed by + the HTTPFileSystem class. storage_options: key-value Any other parameters passed on to requests cache_type, cache_options: defaults used in open @@ -90,6 +96,7 @@ self.cache_type = cache_type self.cache_options = cache_options self.client_kwargs = client_kwargs or {} + self.get_client = get_client self.kwargs = storage_options self._session = None @@ -121,7 +128,7 @@ async def set_session(self): if self._session is None: - self._session = await get_client(loop=self.loop, **self.client_kwargs) + self._session = await self.get_client(loop=self.loop, **self.client_kwargs) if not self.asynchronous: weakref.finalize(self, self.close_session, self.loop, self._session) return self._session @@ -219,22 +226,65 @@ kw["headers"] = headers session = await self.set_session() async with session.get(url, **kw) as r: - self._raise_not_found_for_status(r, url) out = await r.read() + self._raise_not_found_for_status(r, url) return out - async def _get_file(self, rpath, lpath, chunk_size=5 * 2 ** 20, **kwargs): + async def _get_file( + self, rpath, lpath, chunk_size=5 * 2 ** 20, callback=_DEFAULT_CALLBACK, **kwargs + ): kw = self.kwargs.copy() kw.update(kwargs) logger.debug(rpath) session = await self.set_session() async with session.get(rpath, **self.kwargs) as r: + try: + size = int(r.headers["content-length"]) + except (ValueError, KeyError): + size = None + + callback.set_size(size) self._raise_not_found_for_status(r, rpath) with open(lpath, "wb") as fd: chunk = True while chunk: chunk = await r.content.read(chunk_size) fd.write(chunk) + callback.relative_update(len(chunk)) + + async def _put_file( + self, + rpath, + lpath, + chunk_size=5 * 2 ** 20, + callback=_DEFAULT_CALLBACK, + method="post", + **kwargs, + ): + async def gen_chunks(): + with open(rpath, "rb") as f: + callback.set_size(f.seek(0, 2)) + f.seek(0) + + chunk = f.read(64 * 1024) + while chunk: + yield chunk + callback.relative_update(len(chunk)) + chunk = f.read(64 * 1024) + + kw = self.kwargs.copy() + kw.update(kwargs) + session = await self.set_session() + + method = method.lower() + if method not in ("post", "put"): + raise ValueError( + f"method has to be either 'post' or 'put', not: {method!r}" + ) + + meth = getattr(session, method) + async with meth(lpath, data=gen_chunks(), **kw) as resp: + self._raise_not_found_for_status(resp, lpath) async def _exists(self, path, **kwargs): kw = self.kwargs.copy() @@ -316,22 +366,29 @@ which case size will be given as None (and certain operations on the corresponding file will not work). """ - size = False - kw = self.kwargs.copy() - kw.update(kwargs) + info = {} + session = await self.set_session() + for policy in ["head", "get"]: try: - session = await self.set_session() - size = await _file_size(url, size_policy=policy, session=session, **kw) - if size: + info.update( + await _file_info( + url, + size_policy=policy, + session=session, + **self.kwargs, + **kwargs, + ) + ) + if info.get("size") is not None: break - except Exception as e: - logger.debug((str(e))) - else: - # get failed, so conclude URL does not exist - if size is False: - raise FileNotFoundError(url) - return {"name": url, "size": size or None, "type": "file"} + except Exception as exc: + if policy == "get": + # If get failed, then raise a FileNotFoundError + raise FileNotFoundError(url) from exc + logger.debug(str(exc)) + + return {"name": url, "size": None, **info, "type": "file"} async def _glob(self, path, **kwargs): """ @@ -573,13 +630,16 @@ pass def __reduce__(self): - return reopen, ( - self.fs, - self.url, - self.mode, - self.blocksize, - self.cache.name, - self.size, + return ( + reopen, + ( + self.fs, + self.url, + self.mode, + self.blocksize, + self.cache.name, + self.size, + ), ) @@ -610,6 +670,7 @@ async def cor(): r = await self.session.get(url, **kwargs).__aenter__() + self.fs._raise_not_found_for_status(r, url) return r self.r = sync(self.loop, cor) @@ -651,8 +712,8 @@ return out -async def _file_size(url, session=None, size_policy="head", **kwargs): - """Call HEAD on the server to get file size +async def _file_info(url, session, size_policy="head", **kwargs): + """Call HEAD on the server to get details about the file (size/checksum etc.) Default operation is to explicitly allow redirects and use encoding 'identity' (no compression) to get the true size of the target. @@ -663,7 +724,8 @@ head = kwargs.get("headers", {}).copy() head["Accept-Encoding"] = "identity" kwargs["headers"] = head - session = session or await get_client() + + info = {} if size_policy == "head": r = await session.head(url, allow_redirects=ar, **kwargs) elif size_policy == "get": @@ -671,14 +733,29 @@ else: raise TypeError('size_policy must be "head" or "get", got %s' "" % size_policy) async with r: + r.raise_for_status() + # TODO: - # recognise lack of 'Accept-Ranges', or 'Accept-Ranges': 'none' (not 'bytes') + # recognise lack of 'Accept-Ranges', + # or 'Accept-Ranges': 'none' (not 'bytes') # to mean streaming only, no random access => return None if "Content-Length" in r.headers: - return int(r.headers["Content-Length"]) + info["size"] = int(r.headers["Content-Length"]) elif "Content-Range" in r.headers: - return int(r.headers["Content-Range"].split("/")[1]) - r.close() + info["size"] = int(r.headers["Content-Range"].split("/")[1]) + + for checksum_field in ["ETag", "Content-MD5", "Digest"]: + if r.headers.get(checksum_field): + info[checksum_field] = r.headers[checksum_field] + + return info + + +async def _file_size(url, session=None, *args, **kwargs): + if session is None: + session = await get_client() + info = await _file_info(url, session=session, *args, **kwargs) + return info.get("size") file_size = sync_wrapper(_file_size) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/local.py new/filesystem_spec-2021.08.1/fsspec/implementations/local.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/local.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/local.py 2021-08-31 16:14:55.000000000 +0200 @@ -1,9 +1,11 @@ import datetime import io import os +import os.path as osp import posixpath import re import shutil +import stat import tempfile from fsspec import AbstractFileSystem @@ -76,10 +78,12 @@ # str or path-like path = self._strip_protocol(path) out = os.stat(path, follow_symlinks=False) - link = os.path.islink(path) - if os.path.isdir(path): + link = stat.S_ISLNK(out.st_mode) + if link: + out = os.stat(path, follow_symlinks=True) + if stat.S_ISDIR(out.st_mode): t = "directory" - elif os.path.isfile(path): + elif stat.S_ISREG(out.st_mode): t = "file" else: t = "other" @@ -124,6 +128,9 @@ path2 = self._strip_protocol(path2).rstrip("/") shutil.move(path1, path2) + def rm_file(self, path): + os.remove(path) + def rm(self, path, recursive=False, maxdepth=None): if isinstance(path, str): path = [path] @@ -132,7 +139,7 @@ p = self._strip_protocol(p).rstrip("/") if recursive and self.isdir(p): - if os.path.abspath(p) == os.getcwd(): + if osp.abspath(p) == os.getcwd(): raise ValueError("Cannot delete current working directory") shutil.rmtree(p) else: @@ -188,7 +195,7 @@ if isinstance(path, (list, set, tuple)): return type(path)(make_path_posix(p) for p in path) if "~" in path: - path = os.path.expanduser(path) + path = osp.expanduser(path) if sep == "/": # most common fast case for posix if path.startswith("/"): @@ -202,7 +209,7 @@ # relative path like "path" or "rel\\path" (win) or rel/path" if os.sep == "\\": # abspath made some more '\\' separators - return make_path_posix(os.path.abspath(path)) + return make_path_posix(osp.abspath(path)) else: return os.getcwd() + "/" + path if re.match("/[A-Za-z]:", path): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/smb.py new/filesystem_spec-2021.08.1/fsspec/implementations/smb.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/smb.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/smb.py 2021-08-31 16:14:55.000000000 +0200 @@ -66,6 +66,7 @@ password=None, timeout=60, encrypt=None, + share_access=None, **kwargs, ): """ @@ -83,13 +84,22 @@ Port to connect with. Usually 445, sometimes 139. username: str or None Username to connect with. Required if Kerberos auth is not being used. - password: str of None + password: str or None User's password on the server, if using username timeout: int Connection timeout in seconds encrypt: bool Whether to force encryption or not, once this has been set to True the session cannot be changed back to False. + share_access: str or None + Specifies the default access applied to file open operations + performed with this file system object. + This affects whether other processes can concurrently open a handle + to the same file. + None (the default): exclusively locks the file until closed. + 'r': Allow other handles to be opened with read access. + 'w': Allow other handles to be opened with write access. + 'd': Allow other handles to be opened with delete access. """ super(SMBFileSystem, self).__init__(**kwargs) self.host = host @@ -99,6 +109,7 @@ self.timeout = timeout self.encrypt = encrypt self.temppath = kwargs.pop("temppath", "") + self.share_access = share_access self._connect() def _connect(self): @@ -193,16 +204,24 @@ """ block_size: int or None If 0, no buffering, 1, line buffering, >1, buffer that many bytes + + Notes + ----- + By specifying 'share_access' in 'kwargs' it is possible to override the + default shared access setting applied in the constructor of this object. """ bls = block_size if block_size is not None and block_size >= 0 else -1 wpath = _as_unc_path(self.host, path) + share_access = kwargs.pop("share_access", self.share_access) if "w" in mode and autocommit is False: temp = _as_temp_path(self.host, path, self.temppath) return SMBFileOpener(wpath, temp, mode, block_size=bls, **kwargs) - return smbclient.open_file(wpath, mode, buffering=bls, **kwargs) + return smbclient.open_file( + wpath, mode, buffering=bls, share_access=share_access, **kwargs + ) def copy(self, path1, path2, **kwargs): - """ Copy within two locations in the same filesystem""" + """Copy within two locations in the same filesystem""" wpath1 = _as_unc_path(self.host, path1) wpath2 = _as_unc_path(self.host, path2) smbclient.copyfile(wpath1, wpath2, **kwargs) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_ftp.py new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_ftp.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_ftp.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_ftp.py 2021-08-31 16:14:55.000000000 +0200 @@ -9,6 +9,7 @@ from fsspec import open_files from fsspec.implementations.ftp import FTPFileSystem +ftplib = pytest.importorskip("ftplib") here = os.path.dirname(os.path.abspath(__file__)) @@ -146,8 +147,6 @@ def test_mkdir(ftp_writable): - import ftplib - host, port, user, pw = ftp_writable fs = FTPFileSystem(host, port, user, pw) with pytest.raises(ftplib.error_perm): @@ -159,3 +158,18 @@ fs.makedirs("/tmp/not/exist", exist_ok=False) fs.makedirs("/tmp/not/exist/inner/inner") assert fs.isdir("/tmp/not/exist/inner/inner") + + +def test_rm_recursive(ftp_writable): + host, port, user, pw = ftp_writable + fs = FTPFileSystem(host, port, user, pw) + fs.mkdir("/tmp/topdir") + fs.mkdir("/tmp/topdir/underdir") + fs.touch("/tmp/topdir/afile") + fs.touch("/tmp/topdir/underdir/afile") + + with pytest.raises(ftplib.error_perm): + fs.rmdir("/tmp/topdir") + + fs.rm("/tmp/topdir", recursive=True) + assert not fs.exists("/tmp/topdir") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_http.py new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_http.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_http.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_http.py 2021-08-31 16:14:55.000000000 +0200 @@ -4,6 +4,7 @@ import sys import threading import time +from collections import ChainMap from http.server import BaseHTTPRequestHandler, HTTPServer import pytest @@ -25,6 +26,19 @@ class HTTPTestHandler(BaseHTTPRequestHandler): + static_files = { + "/index/realfile": data, + "/index/otherfile": data, + "/index": index, + "/data/20020401": listing, + } + dynamic_files = {} + + files = ChainMap(dynamic_files, static_files) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + def _respond(self, code=200, headers=None, data=b""): headers = headers or {} headers.update({"User-Agent": "test"}) @@ -36,50 +50,79 @@ self.wfile.write(data) def do_GET(self): - if self.path.rstrip("/") not in [ - "/index/realfile", - "/index/otherfile", - "/index", - "/data/20020401", - ]: - self._respond(404) - return - - d = data if self.path in ["/index/realfile", "/index/otherfile"] else index - if self.path == "/data/20020401": - d = listing + file_path = self.path.rstrip("/") + file_data = self.files.get(file_path) + if file_data is None: + return self._respond(404) if "Range" in self.headers: ran = self.headers["Range"] b, ran = ran.split("=") start, end = ran.split("-") if start: - d = d[int(start) : (int(end) + 1) if end else None] + file_data = file_data[int(start) : (int(end) + 1) if end else None] else: # suffix only - d = d[-int(end) :] + file_data = file_data[-int(end) :] if "give_length" in self.headers: - response_headers = {"Content-Length": len(d)} - self._respond(200, response_headers, d) + response_headers = {"Content-Length": len(file_data)} + self._respond(200, response_headers, file_data) elif "give_range" in self.headers: - self._respond(200, {"Content-Range": "0-%i/%i" % (len(d) - 1, len(d))}, d) + self._respond( + 200, + {"Content-Range": "0-%i/%i" % (len(file_data) - 1, len(file_data))}, + file_data, + ) else: - self._respond(200, data=d) + self._respond(200, data=file_data) + + def do_POST(self): + length = self.headers.get("Content-Length") + file_path = self.path.rstrip("/") + if length is None: + assert self.headers.get("Transfer-Encoding") == "chunked" + self.files[file_path] = b"".join(self.read_chunks()) + else: + self.files[file_path] = self.rfile.read(length) + self._respond(200) + + do_PUT = do_POST + + def read_chunks(self): + length = -1 + while length != 0: + line = self.rfile.readline().strip() + if len(line) == 0: + length = 0 + else: + length = int(line, 16) + yield self.rfile.read(length) + self.rfile.readline() def do_HEAD(self): - if "head_ok" not in self.headers: - self._respond(405) - return - d = data if self.path == "/index/realfile" else index - if self.path.rstrip("/") not in ["/index/realfile", "/index"]: - self._respond(404) - elif "give_length" in self.headers: - response_headers = {"Content-Length": len(d)} + if "head_not_auth" in self.headers: + return self._respond( + 403, {"Content-Length": 123}, b"not authorized for HEAD request" + ) + elif "head_ok" not in self.headers: + return self._respond(405) + + file_path = self.path.rstrip("/") + file_data = self.files.get(file_path) + if file_data is None: + return self._respond(404) + + if "give_length" in self.headers: + response_headers = {"Content-Length": len(file_data)} if "zero_length" in self.headers: response_headers["Content-Length"] = 0 self._respond(200, response_headers) elif "give_range" in self.headers: - self._respond(200, {"Content-Range": "0-%i/%i" % (len(d) - 1, len(d))}) + self._respond( + 200, {"Content-Range": "0-%i/%i" % (len(file_data) - 1, len(file_data))} + ) + elif "give_etag" in self.headers: + self._respond(200, {"ETag": "xxx"}) else: self._respond(200) # OK response, but no useful info @@ -105,6 +148,15 @@ yield s +@pytest.fixture +def reset_files(): + yield + + # Reset the newly added files after the + # test is completed. + HTTPTestHandler.dynamic_files.clear() + + def test_list(server): h = fsspec.filesystem("http") out = h.glob(server + "/index/*") @@ -286,6 +338,8 @@ {"give_length": "true"}, {"give_length": "true", "head_ok": "true"}, {"give_range": "true"}, + {"give_length": "true", "head_not_auth": "true"}, + {"give_range": "true", "head_not_auth": "true"}, ], ) def test_random_access(server, headers): @@ -387,6 +441,30 @@ assert out == {server + "/index/realfile": data} +def test_info(server): + fs = fsspec.filesystem("http", headers={"give_etag": "true", "head_ok": "true"}) + info = fs.info(server + "/index/realfile") + assert info["ETag"] == "xxx" + + +@pytest.mark.parametrize("method", ["POST", "PUT"]) +def test_put_file(server, tmp_path, method, reset_files): + src_file = tmp_path / "file_1" + src_file.write_bytes(data) + + dwl_file = tmp_path / "down_1" + + fs = fsspec.filesystem("http", headers={"head_ok": "true", "give_length": "true"}) + with pytest.raises(FileNotFoundError): + fs.info(server + "/hey") + + fs.put_file(src_file, server + "/hey", method=method) + assert fs.info(server + "/hey")["size"] == len(data) + + fs.get_file(server + "/hey", dwl_file) + assert dwl_file.read_bytes() == data + + @pytest.mark.xfail( condition=sys.flags.optimize > 1, reason="no docstrings when optimised" ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_local.py new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_local.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_local.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_local.py 2021-08-31 16:14:55.000000000 +0200 @@ -401,9 +401,20 @@ files = [tmpdir + "/afile4", tmpdir + "/afile5"] [fs.touch(f) for f in files] + + with pytest.raises(TypeError): + fs.rm_file(files) fs.rm(files) assert all(not fs.exists(f) for f in files) + fs.touch(tmpdir + "/afile6") + fs.rm_file(tmpdir + "/afile6") + assert not fs.exists(tmpdir + "/afile6") + + # IsADirectoryError raised on Linux, PermissionError on Windows + with pytest.raises((IsADirectoryError, PermissionError)): + fs.rm_file(tmpdir) + fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_memory.py new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_memory.py --- old/filesystem_spec-2021.07.0/fsspec/implementations/tests/test_memory.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/implementations/tests/test_memory.py 2021-08-31 16:14:55.000000000 +0200 @@ -1,3 +1,5 @@ +import os + import pytest @@ -19,6 +21,17 @@ assert m._strip_protocol("/b/c/") == "/b/c" +def test_put_single(m, tmpdir): + fn = os.path.join(str(tmpdir), "dir") + os.mkdir(fn) + open(os.path.join(fn, "abc"), "w").write("text") + m.put(fn, "/test") # no-op, no files + assert not m.exists("/test/abc") + assert not m.exists("/test/dir") + m.put(fn + "/", "/test", recursive=True) + assert m.cat("/test/dir/abc") == b"text" + + def test_ls(m): m.mkdir("/dir") m.mkdir("/dir/dir1") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/mapping.py new/filesystem_spec-2021.08.1/fsspec/mapping.py --- old/filesystem_spec-2021.07.0/fsspec/mapping.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/mapping.py 2021-08-31 16:14:55.000000000 +0200 @@ -88,6 +88,8 @@ oe = on_error if on_error == "raise" else "return" try: out = self.fs.cat(keys2, on_error=oe) + if isinstance(out, bytes): + out = {keys2[0]: out} except self.missing_exceptions as e: raise KeyError from e out = { @@ -176,6 +178,10 @@ def maybe_convert(value): if isinstance(value, array.array) or hasattr(value, "__array__"): # bytes-like things + if hasattr(value, "dtype") and value.dtype.kind in "Mm": + # The buffer interface doesn't support datetime64/timdelta64 numpy + # arrays + value = value.view("int64") value = bytearray(memoryview(value)) return value diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/spec.py new/filesystem_spec-2021.08.1/fsspec/spec.py --- old/filesystem_spec-2021.07.0/fsspec/spec.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/spec.py 2021-08-31 16:14:55.000000000 +0200 @@ -802,7 +802,9 @@ lpath = make_path_posix(lpath) fs = LocalFileSystem() lpaths = fs.expand_path(lpath, recursive=recursive) - rpaths = other_paths(lpaths, rpath) + rpaths = other_paths( + lpaths, rpath, exists=isinstance(rpath, str) and self.isdir(rpath) + ) callback.set_size(len(rpaths)) for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): @@ -1219,7 +1221,7 @@ def _isfilestore(self): # Originally inherited from pyarrow DaskFileSystem. Keeping this # here for backwards compatibility as long as pyarrow uses its - # legacy ffspec-compatible filesystems and thus accepts fsspec + # legacy fsspec-compatible filesystems and thus accepts fsspec # filesystems as well return False @@ -1230,7 +1232,7 @@ In the case that the backend does not provide a pythonic file-like object already, this class contains much of the logic to build one. The only methods that need to be overridden are ``_upload_chunk``, - ``_initate_upload`` and ``_fetch_range``. + ``_initiate_upload`` and ``_fetch_range``. """ DEFAULT_BLOCK_SIZE = 5 * 2 ** 20 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/tests/test_mapping.py new/filesystem_spec-2021.08.1/fsspec/tests/test_mapping.py --- old/filesystem_spec-2021.07.0/fsspec/tests/test_mapping.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/tests/test_mapping.py 2021-08-31 16:14:55.000000000 +0200 @@ -119,3 +119,27 @@ assert m["c"] == b"\x01\x00\x00\x00" m["c"] = np.array([1, 2], dtype="<i4") # array assert m["c"] == b"\x01\x00\x00\x00\x02\x00\x00\x00" + m["c"] = np.array( + np.datetime64("2000-01-01T23:59:59.999999999"), dtype="<M8[ns]" + ) # datetime64 scalar + assert m["c"] == b"\xff\xff\x91\xe3c\x9b#\r" + m["c"] = np.array( + [ + np.datetime64("1900-01-01T23:59:59.999999999"), + np.datetime64("2000-01-01T23:59:59.999999999"), + ], + dtype="<M8[ns]", + ) # datetime64 array + assert m["c"] == b"\xff\xff}p\xf8fX\xe1\xff\xff\x91\xe3c\x9b#\r" + m["c"] = np.array( + np.timedelta64(3155673612345678901, "ns"), dtype="<m8[ns]" + ) # timedelta64 scalar + assert m["c"] == b"5\x1c\xf0Rn4\xcb+" + m["c"] = np.array( + [ + np.timedelta64(450810516049382700, "ns"), + np.timedelta64(3155673612345678901, "ns"), + ], + dtype="<m8[ns]", + ) # timedelta64 scalar + assert m["c"] == b',M"\x9e\xc6\x99A\x065\x1c\xf0Rn4\xcb+' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/tests/test_registry.py new/filesystem_spec-2021.08.1/fsspec/tests/test_registry.py --- old/filesystem_spec-2021.07.0/fsspec/tests/test_registry.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/tests/test_registry.py 2021-08-31 16:14:55.000000000 +0200 @@ -93,6 +93,7 @@ def test_entry_points_registered_on_import(clear_registry, clean_imports): mock_ep = create_autospec(EntryPoint, module="fsspec.spec.AbstractFileSystem") mock_ep.name = "test" # this can't be set in the constructor... + mock_ep.value = "fsspec.spec.AbstractFileSystem" if sys.version_info < (3, 8): import_location = "importlib_metadata.entry_points" else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/tests/test_spec.py new/filesystem_spec-2021.08.1/fsspec/tests/test_spec.py --- old/filesystem_spec-2021.07.0/fsspec/tests/test_spec.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/tests/test_spec.py 2021-08-31 16:14:55.000000000 +0200 @@ -462,7 +462,7 @@ if file: # The reason that there is a relative_update(0) at the # end is that, we don't have an early exit on the - # impleementations of get_file/put_file so it needs to + # implementations of get_file/put_file so it needs to # go through the callback to get catch by the while's # condition and then it will stop the transfer. events.append(("relative_update", 0)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2021.07.0/fsspec/utils.py new/filesystem_spec-2021.08.1/fsspec/utils.py --- old/filesystem_spec-2021.07.0/fsspec/utils.py 2021-07-13 02:42:46.000000000 +0200 +++ new/filesystem_spec-2021.08.1/fsspec/utils.py 2021-08-31 16:14:55.000000000 +0200 @@ -4,7 +4,7 @@ import pathlib import re import sys -from hashlib import sha256 +from hashlib import md5 from urllib.parse import urlsplit DEFAULT_BLOCK_SIZE = 5 * 2 ** 20 @@ -98,11 +98,12 @@ inherited = {} collisions = set(options) & set(inherited) if collisions: - collisions = "\n".join("- %r" % k for k in collisions) - raise KeyError( - "Collision between inferred and specified storage " - "options:\n%s" % collisions - ) + for collision in collisions: + if options.get(collision) != inherited.get(collision): + raise KeyError( + "Collision between inferred and specified storage " + "option:\n%s" % collision + ) options.update(inherited) @@ -278,7 +279,7 @@ """ if kwargs: args += (kwargs,) - return sha256(str(args).encode()).hexdigest() + return md5(str(args).encode()).hexdigest() def stringify_path(filepath): @@ -334,7 +335,7 @@ return "/".join(parts[0][:i]) -def other_paths(paths, path2, is_dir=None): +def other_paths(paths, path2, is_dir=None, exists=False): """In bulk file operations, construct a new file tree from a list of files Parameters @@ -348,6 +349,9 @@ For the special case where the input in one element, whether to regard the value as the target path, or as a directory to put a file path within. If None, a directory is inferred if the path ends in '/' + exists: bool (optional) + For a str destination, it is already exists (and is a dir), files should + end up inside. Returns ------- @@ -358,6 +362,8 @@ path2 = path2.rstrip("/") if len(paths) > 1: cp = common_prefix(paths) + if exists: + cp = cp.rsplit("/", 1)[0] path2 = [p.replace(cp, path2, 1) for p in paths] else: if is_dir: