Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-s3fs for openSUSE:Factory checked in at 2022-07-04 11:32:39 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-s3fs (Old) and /work/SRC/openSUSE:Factory/.python-s3fs.new.1548 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-s3fs" Mon Jul 4 11:32:39 2022 rev:14 rq:985786 version:2022.5.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-s3fs/python-s3fs.changes 2022-04-28 23:07:48.936677529 +0200 +++ /work/SRC/openSUSE:Factory/.python-s3fs.new.1548/python-s3fs.changes 2022-07-04 11:32:41.564005520 +0200 @@ -1,0 +2,7 @@ +Wed Jun 29 08:56:40 UTC 2022 - Ben Greiner <[email protected]> + +- Update to 2022.5.0 + * aiobotocore 2.3 (#622, fixes #558) + * rate limiting (#619, #620) + +------------------------------------------------------------------- Old: ---- s3fs-2022.3.0.tar.gz New: ---- s3fs-2022.5.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-s3fs.spec ++++++ --- /var/tmp/diff_new_pack.svb1fB/_old 2022-07-04 11:32:42.104006390 +0200 +++ /var/tmp/diff_new_pack.svb1fB/_new 2022-07-04 11:32:42.108006396 +0200 @@ -19,14 +19,14 @@ %{?!python_module:%define python_module() python3-%{**}} %define skip_python2 1 Name: python-s3fs -Version: 2022.3.0 +Version: 2022.5.0 Release: 0 Summary: Python filesystem interface for S3 License: BSD-3-Clause URL: https://github.com/fsspec/s3fs/ Source: https://files.pythonhosted.org/packages/source/s/s3fs/s3fs-%{version}.tar.gz BuildRequires: %{python_module Flask} -BuildRequires: %{python_module aiobotocore >= 2.1.0} +BuildRequires: %{python_module aiobotocore >= 2.3.0} BuildRequires: %{python_module aiohttp} BuildRequires: %{python_module boto3} BuildRequires: %{python_module fsspec = %{version}} @@ -36,10 +36,7 @@ BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: python-rpm-macros -%if %{with python2} -BuildRequires: python-mock -%endif -Requires: python-aiobotocore >= 2.1.0 +Requires: python-aiobotocore >= 2.3.0 Requires: python-aiohttp Requires: python-fsspec = %{version} Recommends: aws-cli @@ -63,9 +60,11 @@ %python_expand %fdupes %{buildroot}%{$python_sitelib} %check -# test_anonymous_access - online test -# test_async_close - online test -%pytest -k 'not (test_anonymous_access or test_async_close)' +# online test (not mocked through moto) +donttest="test_async_close" +# not deleting fast enough obs serverside +donttest+=" or test_s3_big_ls" +%pytest -k "not ($donttest)" %files %{python_files} %doc README.rst ++++++ s3fs-2022.3.0.tar.gz -> s3fs-2022.5.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/PKG-INFO new/s3fs-2022.5.0/PKG-INFO --- old/s3fs-2022.3.0/PKG-INFO 2022-03-31 20:01:37.321205100 +0200 +++ new/s3fs-2022.5.0/PKG-INFO 2022-05-19 20:31:30.113800800 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: s3fs -Version: 2022.3.0 +Version: 2022.5.0 Summary: Convenient Filesystem interface over S3 Home-page: http://github.com/fsspec/s3fs/ Maintainer: Martin Durant diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/docs/source/changelog.rst new/s3fs-2022.5.0/docs/source/changelog.rst --- old/s3fs-2022.3.0/docs/source/changelog.rst 2022-03-31 20:00:49.000000000 +0200 +++ new/s3fs-2022.5.0/docs/source/changelog.rst 2022-05-19 20:15:42.000000000 +0200 @@ -1,6 +1,12 @@ Changelog ========= +2022.5.0 +-------- + +- aiobotocore 2.3 (#622, fixes #558) +- rate limiting (#619, #620) + 2022.3.0 -------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/release-procedure.md new/s3fs-2022.5.0/release-procedure.md --- old/s3fs-2022.3.0/release-procedure.md 2021-11-05 20:51:13.000000000 +0100 +++ new/s3fs-2022.5.0/release-procedure.md 2022-05-19 20:15:42.000000000 +0200 @@ -1,6 +1,9 @@ 1. Verify tests on Linux, OS-X, and Windows -2. Update version in setup.py and s3fs/__init__.py and commit +2. Complete entries in `docs/source/changelog.rst`. + + There's no need for changing version numbers in source files. + The release version will be determined from the git tag (see below). 3. Tag the commit diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/requirements.txt new/s3fs-2022.5.0/requirements.txt --- old/s3fs-2022.3.0/requirements.txt 2022-03-31 20:00:49.000000000 +0200 +++ new/s3fs-2022.5.0/requirements.txt 2022-05-19 20:30:51.000000000 +0200 @@ -1,3 +1,3 @@ -aiobotocore~=2.2.0 -fsspec==2022.3.0 +aiobotocore~=2.3.0 +fsspec==2022.5.0 aiohttp<=4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/s3fs/_version.py new/s3fs-2022.5.0/s3fs/_version.py --- old/s3fs-2022.3.0/s3fs/_version.py 2022-03-31 20:01:37.322401800 +0200 +++ new/s3fs-2022.5.0/s3fs/_version.py 2022-05-19 20:31:30.114924400 +0200 @@ -8,11 +8,11 @@ version_json = ''' { - "date": "2022-03-31T14:00:38-0400", + "date": "2022-05-19T14:30:47-0400", "dirty": false, "error": null, - "full-revisionid": "c4fb41f7cc2f2aede6bbb7755096c38b9e4cc553", - "version": "2022.3.0" + "full-revisionid": "3e9b7d4a62f6efb3d282d9ded2baf4625fab0515", + "version": "2022.5.0" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/s3fs/core.py new/s3fs-2022.5.0/s3fs/core.py --- old/s3fs-2022.3.0/s3fs/core.py 2022-03-29 19:28:02.000000000 +0200 +++ new/s3fs-2022.5.0/s3fs/core.py 2022-05-19 20:15:42.000000000 +0200 @@ -12,7 +12,13 @@ from fsspec.spec import AbstractBufferedFile from fsspec.utils import infer_storage_options, tokenize, setup_logging as setup_logger -from fsspec.asyn import AsyncFileSystem, sync, sync_wrapper, FSTimeoutError +from fsspec.asyn import ( + AsyncFileSystem, + sync, + sync_wrapper, + FSTimeoutError, + _run_coros_in_chunks, +) from fsspec.callbacks import _DEFAULT_CALLBACK import aiobotocore @@ -20,6 +26,7 @@ import aiobotocore.session from aiobotocore.config import AioConfig from botocore.exceptions import ClientError, HTTPClientError, ParamValidationError +from botocore.parsers import ResponseParserError from s3fs.errors import translate_boto_error from s3fs.utils import S3BucketRegionCache, ParamKwargsHelper, _get_brange, FileExpired @@ -46,7 +53,17 @@ MANAGED_COPY_THRESHOLD = 5 * 2**30 -S3_RETRYABLE_ERRORS = (socket.timeout, HTTPClientError, IncompleteRead) +# Certain rate-limiting responses can send invalid XML +# (see https://github.com/fsspec/s3fs/issues/484), which can result in a parser error +# deep within botocore. So we treat those as retryable as well, even though there could +# be some false positives. +S3_RETRYABLE_ERRORS = ( + socket.timeout, + HTTPClientError, + IncompleteRead, + FSTimeoutError, + ResponseParserError, +) if ClientPayloadError is not None: S3_RETRYABLE_ERRORS += (ClientPayloadError,) @@ -85,6 +102,41 @@ buck_acls = {"private", "public-read", "public-read-write", "authenticated-read"} +async def _error_wrapper(func, *, args=(), kwargs=None, retries): + if kwargs is None: + kwargs = {} + for i in range(retries): + try: + return await func(*args, **kwargs) + except S3_RETRYABLE_ERRORS as e: + err = e + logger.debug("Retryable error: %s", e) + await asyncio.sleep(min(1.7**i * 0.1, 15)) + except ClientError as e: + logger.debug("Client error (maybe retryable): %s", e) + err = e + if "SlowDown" in str(e): + await asyncio.sleep(min(1.7**i * 0.1, 15)) + else: + break + except Exception as e: + logger.debug("Nonretryable error: %s", e) + err = e + break + + if "'coroutine'" in str(err): + # aiobotocore internal error - fetch original botocore error + tb = err.__traceback__ + while tb.tb_next: + tb = tb.tb_next + try: + await tb.tb_frame.f_locals["response"] + except Exception as e: + err = e + err = translate_boto_error(err) + raise err + + def version_id_kw(version_id): """Helper to make versionId kwargs. @@ -216,7 +268,7 @@ cache_regions=False, asynchronous=False, loop=None, - **kwargs + **kwargs, ): if key and username: raise KeyError("Supply either key or username, not both") @@ -277,29 +329,9 @@ kw2.pop("Body", None) logger.debug("CALL: %s - %s - %s", method.__name__, akwarglist, kw2) additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist, **kwargs) - for i in range(self.retries): - try: - out = await method(**additional_kwargs) - return out - except S3_RETRYABLE_ERRORS as e: - logger.debug("Retryable error: %s", e) - err = e - await asyncio.sleep(min(1.7**i * 0.1, 15)) - except Exception as e: - logger.debug("Nonretryable error: %s", e) - err = e - break - if "'coroutine'" in str(err): - # aiobotocore internal error - fetch original botocore error - tb = err.__traceback__ - while tb.tb_next: - tb = tb.tb_next - try: - await tb.tb_frame.f_locals["response"] - except Exception as e: - err = e - err = translate_boto_error(err) - raise err + return await _error_wrapper( + method, kwargs=additional_kwargs, retries=self.retries + ) call_s3 = sync_wrapper(_call_s3) @@ -514,7 +546,7 @@ autocommit=True, requester_pays=None, cache_options=None, - **kwargs + **kwargs, ): """Open a file for reading or writing @@ -905,17 +937,22 @@ head = {"Range": await self._process_limits(path, start, end)} else: head = {} - resp = await self._call_s3( - "get_object", - Bucket=bucket, - Key=key, - **version_id_kw(version_id or vers), - **head, - **self.req_kw, - ) - data = await resp["Body"].read() - resp["Body"].close() - return data + + async def _call_and_read(): + resp = await self._call_s3( + "get_object", + Bucket=bucket, + Key=key, + **version_id_kw(version_id or vers), + **head, + **self.req_kw, + ) + try: + return await resp["Body"].read() + finally: + resp["Body"].close() + + return await _error_wrapper(_call_and_read, retries=self.retries) async def _pipe_file(self, path, data, chunksize=50 * 2**20, **kwargs): bucket, key, _ = self.split_path(path) @@ -1019,28 +1056,66 @@ async def _get_file( self, rpath, lpath, callback=_DEFAULT_CALLBACK, version_id=None ): - bucket, key, vers = self.split_path(rpath) if os.path.isdir(lpath): return - resp = await self._call_s3( - "get_object", - Bucket=bucket, - Key=key, - **version_id_kw(version_id or vers), - **self.req_kw, - ) - body = resp["Body"] - callback.set_size(resp.get("ContentLength", None)) + bucket, key, vers = self.split_path(rpath) + + async def _open_file(range: int): + kw = self.req_kw.copy() + if range: + kw["Range"] = f"bytes={range}-" + resp = await self._call_s3( + "get_object", + Bucket=bucket, + Key=key, + **version_id_kw(version_id or vers), + **self.req_kw, + ) + return resp["Body"], resp.get("ContentLength", None) + + body, content_length = await _open_file(range=0) + callback.set_size(content_length) + + failed_reads = 0 + bytes_read = 0 + try: with open(lpath, "wb") as f0: while True: - chunk = await body.read(2**16) + try: + chunk = await body.read(2**16) + except S3_RETRYABLE_ERRORS: + failed_reads += 1 + if failed_reads >= self.retries: + # Give up if we've failed too many times. + raise + # Closing the body may result in an exception if we've failed to read from it. + try: + body.close() + except Exception: + pass + + await asyncio.sleep(min(1.7**failed_reads * 0.1, 15)) + # Byte ranges are inclusive, which means we need to be careful to not read the same data twice + # in a failure. + # Examples: + # Read 1 byte -> failure, retry with read_range=0, byte range should be 0- + # Read 1 byte, success. Read 1 byte: failure. Retry with read_range=2, byte-range should be 2- + # Read 1 bytes, success. Read 1 bytes: success. Read 1 byte, failure. Retry with read_range=3, + # byte-range should be 3-. + body, _ = await _open_file(bytes_read + 1) + continue + if not chunk: break + bytes_read += len(chunk) segment_len = f0.write(chunk) callback.relative_update(segment_len) finally: - body.close() + try: + body.close() + except Exception: + pass async def _info(self, path, bucket=None, key=None, refresh=False, version_id=None): path = self._strip_protocol(path) @@ -1665,11 +1740,13 @@ files = [p for p in paths if self.split_path(p)[1]] dirs = [p for p in paths if not self.split_path(p)[1]] # TODO: fails if more than one bucket in list - await asyncio.gather( - *[ + await _run_coros_in_chunks( + [ self._bulk_delete(files[i : i + 1000]) for i in range(0, len(files), 1000) - ] + ], + batch_size=3, + nofiles=True, ) await asyncio.gather(*[self._rmdir(d) for d in dirs]) [ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/s3fs.egg-info/PKG-INFO new/s3fs-2022.5.0/s3fs.egg-info/PKG-INFO --- old/s3fs-2022.3.0/s3fs.egg-info/PKG-INFO 2022-03-31 20:01:36.000000000 +0200 +++ new/s3fs-2022.5.0/s3fs.egg-info/PKG-INFO 2022-05-19 20:31:29.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: s3fs -Version: 2022.3.0 +Version: 2022.5.0 Summary: Convenient Filesystem interface over S3 Home-page: http://github.com/fsspec/s3fs/ Maintainer: Martin Durant diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.3.0/s3fs.egg-info/requires.txt new/s3fs-2022.5.0/s3fs.egg-info/requires.txt --- old/s3fs-2022.3.0/s3fs.egg-info/requires.txt 2022-03-31 20:01:36.000000000 +0200 +++ new/s3fs-2022.5.0/s3fs.egg-info/requires.txt 2022-05-19 20:31:29.000000000 +0200 @@ -1,9 +1,9 @@ -aiobotocore~=2.2.0 -fsspec==2022.3.0 +aiobotocore~=2.3.0 +fsspec==2022.5.0 aiohttp<=4 [awscli] -aiobotocore[awscli]~=2.2.0 +aiobotocore[awscli]~=2.3.0 [boto3] -aiobotocore[boto3]~=2.2.0 +aiobotocore[boto3]~=2.3.0
