Hello community, here is the log from the commit of package python-s3fs for openSUSE:Factory checked in at 2019-10-30 14:47:51 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-s3fs (Old) and /work/SRC/openSUSE:Factory/.python-s3fs.new.2990 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-s3fs" Wed Oct 30 14:47:51 2019 rev:4 rq:743964 version:0.3.5 Changes: -------- --- /work/SRC/openSUSE:Factory/python-s3fs/python-s3fs.changes 2019-08-19 23:02:08.311454091 +0200 +++ /work/SRC/openSUSE:Factory/.python-s3fs.new.2990/python-s3fs.changes 2019-10-30 14:47:58.202191945 +0100 @@ -1,0 +2,7 @@ +Tue Sep 24 11:00:09 UTC 2019 - Tomáš Chvátal <[email protected]> + +- Update to 0.3.5: + * Test expansion + * Minor bugfixes + +------------------------------------------------------------------- Old: ---- s3fs-0.3.3.tar.gz New: ---- s3fs-0.3.5.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-s3fs.spec ++++++ --- /var/tmp/diff_new_pack.JvEAYm/_old 2019-10-30 14:48:00.014193871 +0100 +++ /var/tmp/diff_new_pack.JvEAYm/_new 2019-10-30 14:48:00.022193880 +0100 @@ -19,17 +19,16 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} %define skip_python2 1 Name: python-s3fs -Version: 0.3.3 +Version: 0.3.5 Release: 0 Summary: Python filesystem interface over S3 License: BSD-3-Clause -Group: Development/Languages/Python URL: https://github.com/dask/s3fs/ Source: https://files.pythonhosted.org/packages/source/s/s3fs/s3fs-%{version}.tar.gz BuildRequires: %{python_module boto3 >= 1.9.91} BuildRequires: %{python_module botocore >= 1.12.91} BuildRequires: %{python_module fsspec >= 0.2.2} -BuildRequires: %{python_module moto >= 1.3.7} +BuildRequires: %{python_module moto >= 1.3.12} BuildRequires: %{python_module pytest >= 4.2.0} BuildRequires: %{python_module setuptools} BuildRequires: fdupes ++++++ s3fs-0.3.3.tar.gz -> s3fs-0.3.5.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/PKG-INFO new/s3fs-0.3.5/PKG-INFO --- old/s3fs-0.3.3/PKG-INFO 2019-08-08 15:06:16.000000000 +0200 +++ new/s3fs-0.3.5/PKG-INFO 2019-10-06 18:26:35.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.2 Name: s3fs -Version: 0.3.3 +Version: 0.3.5 Summary: Convenient Filesystem interface over S3 Home-page: http://github.com/dask/s3fs/ Maintainer: Martin Durant diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/docs/source/index.rst new/s3fs-0.3.5/docs/source/index.rst --- old/s3fs-0.3.3/docs/source/index.rst 2019-08-04 22:54:14.000000000 +0200 +++ new/s3fs-0.3.5/docs/source/index.rst 2019-09-09 15:14:23.000000000 +0200 @@ -78,6 +78,13 @@ - no permissions/access-control (i.e., no chmod/chown methods) +Logging +------- + +The logger ``s3fs.core.logger`` provides information about the operations of the +file system. To see messages, set its level to DEBUG. You can also achieve this via +an environment variable ``S3FS_LOGGING_LEVEL=DEBUG``. + Credentials ----------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/s3fs/_version.py new/s3fs-0.3.5/s3fs/_version.py --- old/s3fs-0.3.3/s3fs/_version.py 2019-08-08 15:06:16.000000000 +0200 +++ new/s3fs-0.3.5/s3fs/_version.py 2019-10-06 18:26:35.000000000 +0200 @@ -8,11 +8,11 @@ version_json = ''' { - "date": "2019-08-08T09:02:10-0400", + "date": "2019-10-06T11:15:43-0400", "dirty": false, "error": null, - "full-revisionid": "990ceebb5ba73030819ddd09d5696506f0f865d7", - "version": "0.3.3" + "full-revisionid": "571a6463ac7aaaf1a6f80ee776e79e3b0d76a4f4", + "version": "0.3.5" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/s3fs/core.py new/s3fs-0.3.5/s3fs/core.py --- old/s3fs-0.3.3/s3fs/core.py 2019-08-08 14:57:46.000000000 +0200 +++ new/s3fs-0.3.5/s3fs/core.py 2019-10-06 17:15:47.000000000 +0200 @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -import errno import logging +import os import socket +import time from hashlib import md5 from fsspec import AbstractFileSystem @@ -13,7 +14,14 @@ from s3fs.errors import translate_boto_error from s3fs.utils import ParamKwargsHelper -logger = logging.getLogger(__name__) +logger = logging.getLogger('s3fs') +handle = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s ' + '- %(message)s') +handle.setFormatter(formatter) +logger.addHandler(handle) +if "S3FS_LOGGING_LEVEL" in os.environ: + logger.setLevel(os.environ["S3FS_LOGGING_LEVEL"]) logging.getLogger('boto3').setLevel(logging.WARNING) logging.getLogger('botocore').setLevel(logging.WARNING) @@ -100,12 +108,16 @@ client_kwargs : dict of parameters for the boto3 client requester_pays : bool (False) If RequesterPays buckets are supported. - default_block_size: None, int + default_block_size: int (None) If given, the default block size value used for ``open()``, if no specific value is given at all time. The built-in default is 5MB. default_fill_cache : Bool (True) Whether to use cache filling with open by default. Refer to ``S3File.open``. + default_cache_type : string ('bytes') + If given, the default cache_type value used for ``open()``. Set to "none" + if no caching is desired. See fsspec's documentation for other available + cache_type values. Default cache_type is 'bytes'. version_aware : bool (False) Whether to support bucket versioning. If enable this will require the user to have the necessary IAM permissions for dealing with versioned @@ -135,7 +147,7 @@ def __init__(self, anon=False, key=None, secret=None, token=None, use_ssl=True, client_kwargs=None, requester_pays=False, default_block_size=None, default_fill_cache=True, - version_aware=False, config_kwargs=None, + default_cache_type='bytes', version_aware=False, config_kwargs=None, s3_additional_kwargs=None, session=None, username=None, password=None, **kwargs): if key and username: @@ -164,6 +176,7 @@ config_kwargs = {} self.default_block_size = default_block_size or self.default_block_size self.default_fill_cache = default_fill_cache + self.default_cache_type = default_cache_type self.version_aware = version_aware self.client_kwargs = client_kwargs self.config_kwargs = config_kwargs @@ -177,6 +190,9 @@ return self._kwargs_helper.filter_dict(s3_method.__name__, kwargs) def _call_s3(self, method, *akwarglist, **kwargs): + kw2 = kwargs.copy() + kw2.pop('Body', None) + logger.debug("CALL: %s - %s - %s" % (method.__name__, akwarglist, kw2)) additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist, **kwargs) return method(**additional_kwargs) @@ -223,6 +239,7 @@ self.session = boto3.Session(self.key, self.secret, self.token, **self.kwargs) + logger.debug("Setting up s3fs instance") self.s3 = self.session.client('s3', config=conf, use_ssl=ssl, **self.client_kwargs) return self.s3 @@ -253,7 +270,7 @@ 'token': cred['SessionToken'], 'anon': False} def _open(self, path, mode='rb', block_size=None, acl='', version_id=None, - fill_cache=None, cache_type='bytes', autocommit=True, **kwargs): + fill_cache=None, cache_type=None, autocommit=True, **kwargs): """ Open a file for reading or writing Parameters @@ -280,7 +297,8 @@ The encoding to use if opening the file in text mode. The platform's default text encoding is used if not given. cache_type : str - "bytes", "mmap" or "none" + See fsspec's documentation for available cache_type values. Set to "none" + if no caching is desired. If None, defaults to ``self.default_cache_type``. kwargs: dict-like Additional parameters used for s3 methods. Typically used for ServerSideEncryption. @@ -297,6 +315,9 @@ raise ValueError("version_id cannot be specified if the filesystem " "is not version aware") + if cache_type is None: + cache_type = self.default_cache_type + return S3File(self, path, mode, block_size=block_size, acl=acl, version_id=version_id, fill_cache=fill_cache, s3_additional_kwargs=kw, cache_type=cache_type, @@ -310,6 +331,7 @@ prefix = prefix + '/' if prefix else "" if path not in self.dircache or refresh: try: + logger.debug("Get directory listing page for %s" % path) pag = self.s3.get_paginator('list_objects_v2') config = {} if max_items is not None: @@ -443,6 +465,17 @@ except FileNotFoundError: return False + def touch(self, path, truncate=True, data=None, **kwargs): + """Create empty file or truncate""" + bucket, key = split_path(path) + if not truncate and self.exists(path): + raise ValueError("S3 does not support touching existent files") + try: + self._call_s3(self.s3.put_object, kwargs, Bucket=bucket, Key=key) + except ClientError as ex: + raise translate_boto_error(ex) + self.invalidate_cache(self._parent(path)) + def info(self, path, version_id=None): if path in ['/', '']: return {'name': path, 'size': 0, 'type': 'directory'} @@ -912,6 +945,8 @@ self.key = key self.version_id = version_id self.acl = acl + if self.acl and self.acl not in key_acls: + raise ValueError('ACL not in %s', key_acls) self.mpu = None self.parts = None self.fill_cache = fill_cache @@ -929,20 +964,20 @@ self.size = self.details['size'] elif self.fs.version_aware: self.version_id = self.details.get('VersionId') - # In this case we have not managed to get the VersionId out of details and - # we should invalidate the cache and perform a full head_object since it + # In this case we have not managed to get the VersionId out of details and + # we should invalidate the cache and perform a full head_object since it # has likely been partially populated by ls. if self.version_id is None: self.fs.invalidate_cache(self.path) self.details = self.fs.info(self.path) self.version_id = self.details.get('VersionId') + self.append_block = False if 'a' in mode and s3.exists(path): loc = s3.info(path)['size'] if loc < 5 * 2 ** 20: # existing file too small for multi-upload: download self.write(self.fs.cat(self.path)) - self.append_block = False else: self.append_block = True self.loc = loc @@ -952,12 +987,11 @@ **kwargs) def _initiate_upload(self): - if self.acl and self.acl not in key_acls: - raise ValueError('ACL not in %s', key_acls) + if not self.append_block and self.tell() < self.blocksize: + # only happens when closing small file, use on-shot PUT + return + logger.debug("Initiate upload for %s" % self) self.parts = [] - self.size = 0 - if self.blocksize < 5 * 2 ** 20: - raise ValueError('Block size must be >=5MB') try: self.mpu = self._call_s3( self.fs.s3.create_multipart_upload, @@ -967,20 +1001,19 @@ except ParamValidationError as e: raise ValueError('Initiating write to %r failed: %s' % (self.path, e)) - if 'a' in self.mode and self.fs.exists(self.path): - if self.append_block: - # use existing data in key when appending, - # and block is big enough - out = self.fs._call_s3( - self.fs.s3.upload_part_copy, - self.s3_additional_kwargs, - Bucket=self.bucket, - Key=self.key, - PartNumber=1, - UploadId=self.mpu['UploadId'], - CopySource=self.path) - self.parts.append({'PartNumber': 1, - 'ETag': out['CopyPartResult']['ETag']}) + if self.append_block: + # use existing data in key when appending, + # and block is big enough + out = self.fs._call_s3( + self.fs.s3.upload_part_copy, + self.s3_additional_kwargs, + Bucket=self.bucket, + Key=self.key, + PartNumber=1, + UploadId=self.mpu['UploadId'], + CopySource=self.path) + self.parts.append({'PartNumber': 1, + 'ETag': out['CopyPartResult']['ETag']}) def metadata(self, refresh=False, **kwargs): """ Return metadata of file. @@ -1024,8 +1057,16 @@ def _upload_chunk(self, final=False): bucket, key = split_path(self.path) - self.buffer.seek(0) - (data0, data1) = (None, self.buffer.read(self.blocksize)) + logger.debug("Upload for %s, final=%s, loc=%s, buffer loc=%s" % ( + self, final, self.loc, self.buffer.tell() + )) + if not self.append_block and final and self.tell() < self.blocksize: + # only happens when closing small file, use on-shot PUT + data1 = False + else: + self.buffer.seek(0) + (data0, data1) = (None, self.buffer.read(self.blocksize)) + while data1: (data0, data1) = (data1, self.buffer.read(self.blocksize)) data1_size = len(data1) @@ -1041,6 +1082,7 @@ (data0, data1) = (remainder[:partition], remainder[partition:]) part = len(self.parts) + 1 + logger.debug("Upload chunk %s, %s" % (self, part)) for attempt in range(self.retries + 1): try: @@ -1054,6 +1096,7 @@ if attempt < self.retries: logger.debug('Exception %r on S3 write, retrying', exc, exc_info=True) + time.sleep(1.7**attempt * 0.1) except Exception as exc: raise IOError('Write failed: %r' % exc) else: @@ -1063,21 +1106,41 @@ if self.autocommit and final: self.commit() + return not final def commit(self): - logger.debug("COMMIT") - part_info = {'Parts': self.parts} - write_result = self._call_s3( - self.fs.s3.complete_multipart_upload, - Bucket=self.bucket, - Key=self.key, - UploadId=self.mpu['UploadId'], - MultipartUpload=part_info) - if self.fs.version_aware: - self.version_id = write_result.get('VersionId') + logger.debug("Commit %s" % self) + if self.tell() == 0: + if self.buffer is not None: + logger.debug("Empty file committed %s" % self) + self._abort_mpu() + self.fs.touch(self.path) + elif not self.parts: + if self.buffer is not None: + logger.debug("One-shot upload of %s" % self) + self.buffer.seek(0) + data = self.buffer.read() + self._call_s3( + self.fs.s3.put_object, + Key=self.key, Bucket=self.bucket, Body=data, **self.kwargs + ) + else: + raise RuntimeError + else: + logger.debug("Complete multi-part upload for %s " % self) + part_info = {'Parts': self.parts} + write_result = self._call_s3( + self.fs.s3.complete_multipart_upload, + Bucket=self.bucket, + Key=self.key, + UploadId=self.mpu['UploadId'], + MultipartUpload=part_info) + if self.fs.version_aware: + self.version_id = write_result.get('VersionId') # complex cache invalidation, since file's appearance can cause several # directories + self.buffer = None parts = self.path.split('/') path = parts[0] for p in parts[1:]: @@ -1088,20 +1151,36 @@ path = path + '/' + p def discard(self): - if self.autocommit: - raise ValueError("Cannot discard when autocommit is enabled") - self._call_s3( - self.fs.s3.abort_multipart_upload, - Bucket=self.bucket, - Key=self.key, - UploadId=self.mpu['UploadId'], - ) + self._abort_mpu() + self.buffer = None # file becomes unusable + + def _abort_mpu(self): + if self.mpu: + self._call_s3( + self.fs.s3.abort_multipart_upload, + Bucket=self.bucket, + Key=self.key, + UploadId=self.mpu['UploadId'], + ) + self.mpu = None def _fetch_range(client, bucket, key, version_id, start, end, max_attempts=10, req_kw=None): if req_kw is None: req_kw = {} + if start == end: + # When these match, we would make a request with `range=start-end - 1` + # According to RFC2616, servers are supposed to ignore the Range + # field when it's invalid like this. S3 does ignore it, moto doesn't. + # To avoid differences in behavior under mocking, we just avoid + # making these requests. It's hoped that since we're being called + # from a caching object, this won't end up mattering. + logger.debug( + 'skip fetch for negative range - bucket=%s,key=%s,start=%d,end=%d', + bucket, key, start, end + ) + return b'' logger.debug("Fetch: %s/%s, %s-%s", bucket, key, start, end) for i in range(max_attempts): try: @@ -1116,10 +1195,12 @@ except S3_RETRYABLE_ERRORS as e: logger.debug('Exception %r on S3 download, retrying', e, exc_info=True) + time.sleep(1.7**i * 0.1) continue except ConnectionError as e: logger.debug('ConnectionError %r on S3 download, retrying', e, exc_info=True) + time.sleep(1.7**i * 0.1) continue except ClientError as e: if e.response['Error'].get('Code', 'Unknown') in ['416', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/s3fs/tests/test_s3fs.py new/s3fs-0.3.5/s3fs/tests/test_s3fs.py --- old/s3fs-0.3.3/s3fs/tests/test_s3fs.py 2019-08-08 00:14:52.000000000 +0200 +++ new/s3fs-0.3.5/s3fs/tests/test_s3fs.py 2019-08-27 20:52:18.000000000 +0200 @@ -4,10 +4,10 @@ import json from concurrent.futures import ProcessPoolExecutor import io -import re import time import pytest from itertools import chain +import fsspec.core from s3fs.core import S3FileSystem from s3fs.utils import seek_delimiter, ignoring, SSEParams import moto @@ -134,6 +134,21 @@ assert out == data [email protected]('default_cache_type', ['none', 'bytes', 'mmap']) +def test_default_cache_type(s3, default_cache_type): + data = b'a' * (10 * 2 ** 20) + s3 = S3FileSystem(anon=False, default_cache_type=default_cache_type) + + with s3.open(a, 'wb') as f: + f.write(data) + + with s3.open(a, 'rb') as f: + assert isinstance(f.cache, fsspec.core.caches[default_cache_type]) + out = f.read(len(data)) + assert len(data) == len(out) + assert out == data + + def test_ssl_off(): s3 = S3FileSystem(use_ssl=False) assert s3.s3.meta.endpoint_url.startswith('http://') @@ -1211,6 +1226,7 @@ S3FileSystem.default_block_size = 5 * (1024 ** 2) S3FileSystem.cachable = True + def test_passed_in_session_set_correctly(s3): session = boto3.session.Session() s3 = S3FileSystem(session=session) @@ -1279,3 +1295,46 @@ # Cannot commit a file that was discarded with pytest.raises(Exception): fo.commit() + + +def test_touch(s3): + # create + fn = test_bucket_name + "/touched" + assert not s3.exists(fn) + s3.touch(fn) + assert s3.exists(fn) + assert s3.size(fn) == 0 + + # truncates + with s3.open(fn, 'wb') as f: + f.write(b'data') + assert s3.size(fn) == 4 + s3.touch(fn, truncate=True) + assert s3.size(fn) == 0 + + # exists error + with s3.open(fn, 'wb') as f: + f.write(b'data') + assert s3.size(fn) == 4 + with pytest.raises(ValueError): + s3.touch(fn, truncate=False) + assert s3.size(fn) == 4 + + +def test_seek_reads(s3): + fn = test_bucket_name + "/myfile" + with s3.open(fn, 'wb') as f: + f.write(b'a' * 175_627_146) + with s3.open(fn, 'rb', blocksize=100) as f: + f.seek(175561610) + d1 = f.read(65536) + + f.seek(4) + size = 17562198 + d2 = f.read(size) + assert len(d2) == size + + f.seek(17562288) + size = 17562187 + d3 = f.read(size) + assert len(d3) == size diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/s3fs.egg-info/PKG-INFO new/s3fs-0.3.5/s3fs.egg-info/PKG-INFO --- old/s3fs-0.3.3/s3fs.egg-info/PKG-INFO 2019-08-08 15:06:16.000000000 +0200 +++ new/s3fs-0.3.5/s3fs.egg-info/PKG-INFO 2019-10-06 18:26:35.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.2 Name: s3fs -Version: 0.3.3 +Version: 0.3.5 Summary: Convenient Filesystem interface over S3 Home-page: http://github.com/dask/s3fs/ Maintainer: Martin Durant diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-0.3.3/setup.cfg new/s3fs-0.3.5/setup.cfg --- old/s3fs-0.3.3/setup.cfg 2019-08-08 15:06:16.000000000 +0200 +++ new/s3fs-0.3.5/setup.cfg 2019-10-06 18:26:35.000000000 +0200 @@ -1,6 +1,3 @@ -[bdist_wheel] -universal = 1 - [metadata] long_description = file: README.rst
