Hello community, here is the log from the commit of package python-minio for openSUSE:Factory checked in at 2019-10-02 14:55:32 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-minio (Old) and /work/SRC/openSUSE:Factory/.python-minio.new.2352 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-minio" Wed Oct 2 14:55:32 2019 rev:7 rq:734320 version:5.0.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-minio/python-minio.changes 2019-09-13 14:58:32.309277795 +0200 +++ /work/SRC/openSUSE:Factory/.python-minio.new.2352/python-minio.changes 2019-10-02 14:55:36.291332169 +0200 @@ -1,0 +2,8 @@ +Tue Oct 1 12:55:45 UTC 2019 - Marketa Calabkova <[email protected]> + +- Update to 5.0.1 + * BREAKING API CHANGE: re-implement select_object_content + * Remove white-space characters before parsing XML + * Do not encode ~ in V4 S3 signing + +------------------------------------------------------------------- Old: ---- minio-4.0.21.tar.gz New: ---- minio-5.0.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-minio.spec ++++++ --- /var/tmp/diff_new_pack.dDiIIA/_old 2019-10-02 14:55:37.383329304 +0200 +++ /var/tmp/diff_new_pack.dDiIIA/_new 2019-10-02 14:55:37.399329262 +0200 @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-minio -Version: 4.0.21 +Version: 5.0.1 Release: 0 Summary: Minio library for Amazon S3 compatible cloud storage License: Apache-2.0 ++++++ minio-4.0.21.tar.gz -> minio-5.0.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/PKG-INFO new/minio-5.0.1/PKG-INFO --- old/minio-4.0.21/PKG-INFO 2019-08-28 21:41:17.000000000 +0200 +++ new/minio-5.0.1/PKG-INFO 2019-09-18 23:08:50.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: minio -Version: 4.0.21 +Version: 5.0.1 Summary: MinIO Python Library for Amazon S3 Compatible Cloud Storage for Python Home-page: https://github.com/minio/minio-py Author: MinIO, Inc. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/docs/API.md new/minio-5.0.1/docs/API.md --- old/minio-4.0.21/docs/API.md 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/docs/API.md 2019-09-18 23:07:19.000000000 +0200 @@ -690,7 +690,6 @@ |``obj``| _SelectObjectReader_ |Select_object_reader object. | - __Example__ @@ -736,7 +735,7 @@ # Get the stats print(data.stats()) -except CRCValidationError as err: +except SelectCRCValidationError as err: print(err) except ResponseError as err: print(err) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/examples/select_object_content.py new/minio-5.0.1/examples/select_object_content.py --- old/minio-4.0.21/examples/select_object_content.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/examples/select_object_content.py 2019-09-18 23:07:19.000000000 +0200 @@ -17,12 +17,12 @@ from minio import Minio from minio.error import ResponseError -from minio.select_object_reader import CRCValidationError -from minio.select_object_options import (SelectObjectOptions, CSVInput, - JSONInput, RequestProgress, - ParquetInput, InputSerialization, - OutputSerialization, CSVOutput, - JsonOutput) +from minio.select.errors import SelectCRCValidationError, SelectMessageError +from minio.select.options import (SelectObjectOptions, CSVInput, + JSONInput, RequestProgress, + ParquetInput, InputSerialization, + OutputSerialization, CSVOutput, + JsonOutput) client = Minio('s3.amazonaws.com', access_key='YOUR-ACCESSKEY', @@ -71,7 +71,11 @@ # Get the stats print(data.stats()) -except CRCValidationError as err: +except SelectMessageError as err: print(err) + +except SelectCRCValidationError as err: + print(err) + except ResponseError as err: print(err) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/__init__.py new/minio-5.0.1/minio/__init__.py --- old/minio-4.0.21/minio/__init__.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/__init__.py 2019-09-18 23:07:19.000000000 +0200 @@ -29,7 +29,7 @@ __title__ = 'minio-py' __author__ = 'MinIO, Inc.' -__version__ = '4.0.21' +__version__ = '5.0.1' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2015, 2016, 2017, 2018, 2019 MinIO, Inc.' @@ -38,6 +38,3 @@ from .post_policy import PostPolicy from .copy_conditions import CopyConditions from .definitions import Bucket, Object -from .select_object_reader import SelectObjectReader - - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/api.py new/minio-5.0.1/minio/api.py --- old/minio-4.0.21/minio/api.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/api.py 2019-09-18 23:07:19.000000000 +0200 @@ -77,8 +77,7 @@ is_valid_bucket_notification_config, is_valid_policy_type, mkdir_p, dump_http, amzprefix_user_metadata, is_supported_header,is_amz_header) -from .helpers import (MAX_MULTIPART_OBJECT_SIZE, - MAX_PART_SIZE, +from .helpers import (MAX_PART_SIZE, MAX_POOL_SIZE, MIN_PART_SIZE, DEFAULT_PART_SIZE, @@ -94,7 +93,7 @@ xml_marshal_select) from .fold_case_dict import FoldCaseDict from .thread_pool import ThreadPool -from .select_object_reader import SelectObjectReader +from .select import SelectObjectReader # Comment format. _COMMENTS = '({0}; {1})' @@ -664,13 +663,13 @@ # Verify if we wrote data properly. if total_written < content_size: - msg = 'Data written {0} bytes is smaller than the' \ + msg = 'Data written {0} bytes is smaller than the ' \ 'specified size {1} bytes'.format(total_written, content_size) raise InvalidSizeError(msg) if total_written > content_size: - msg = 'Data written {0} bytes is in excess than the' \ + msg = 'Data written {0} bytes is in excess than the ' \ 'specified size {1} bytes'.format(total_written, content_size) raise InvalidSizeError(msg) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/helpers.py new/minio-5.0.1/minio/helpers.py --- old/minio-4.0.21/minio/helpers.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/helpers.py 2019-09-18 23:07:19.000000000 +0200 @@ -55,17 +55,6 @@ MIN_PART_SIZE = 5 * 1024 * 1024 # 5MiB DEFAULT_PART_SIZE = MIN_PART_SIZE # Currently its 5MiB - -# Select Object Content -READ_SIZE_SELECT = 32 * 1024 # Buffer size -SQL = 'SQL' # Value for ExpressionType -EVENT_RECORDS = 'Records' # Event Type is Records -EVENT_PROGRESS = 'Progress' # Event Type Progress -EVENT_STATS = 'Stats' # Event Type Stats -EVENT = 'event' # Message Type is event -EVENT_END = 'End' # Event Type is End -ERROR = 'error' # Message Type is error - _VALID_BUCKETNAME_REGEX = re.compile('^[a-z0-9][a-z0-9\\.\\-]+[a-z0-9]$') _ALLOWED_HOSTNAME_REGEX = re.compile( '^((?!-)(?!_)[A-Z_\\d-]{1,63}(?<!-)(?<!_)\\.)*((?!_)(?!-)[A-Z_\\d-]{1,63}(?<!-)(?<!_))$', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/parsers.py new/minio-5.0.1/minio/parsers.py --- old/minio-4.0.21/minio/parsers.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/parsers.py 2019-09-18 23:07:19.000000000 +0200 @@ -69,7 +69,7 @@ :return: Returns an S3Element. """ try: - return cls(root_name, cElementTree.fromstring(data)) + return cls(root_name, cElementTree.fromstring(data.strip())) except _ETREE_EXCEPTIONS as error: raise InvalidXMLError( '"{}" XML is not parsable. Message: {}'.format( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/__init__.py new/minio-5.0.1/minio/select/__init__.py --- old/minio-4.0.21/minio/select/__init__.py 1970-01-01 01:00:00.000000000 +0100 +++ new/minio-5.0.1/minio/select/__init__.py 2019-09-18 23:07:19.000000000 +0200 @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# MinIO Python Library for Amazon S3 Compatible Cloud Storage, +# (C) 2019 MinIO, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +:copyright: (c) 2019 by MinIO, Inc. +:license: Apache 2.0, see LICENSE for more details. +""" + +__title__ = 'minio-py' +__author__ = 'MinIO, Inc.' +__version__ = '0.0.1' +__license__ = 'Apache 2.0' +__copyright__ = 'Copyright 2019 MinIO, Inc.' + +from .reader import * +from .helpers import * +from .errors import * +from .options import * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/errors.py new/minio-5.0.1/minio/select/errors.py --- old/minio-4.0.21/minio/select/errors.py 1970-01-01 01:00:00.000000000 +0100 +++ new/minio-5.0.1/minio/select/errors.py 2019-09-18 23:07:19.000000000 +0200 @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) +# 2019 MinIO, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +minio.select.errors +~~~~~~~~~~~~~~~ + +This module implements the error classes for SelectObject responses. + +:copyright: (c) 2019 by MinIO, Inc. +:license: Apache 2.0, see LICENSE for more details. + +""" + +class SelectMessageError(Exception): + ''' + Raised in case of message type 'error' + ''' + +class SelectCRCValidationError(Exception): + ''' + Raised in case of CRC mismatch + ''' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/helpers.py new/minio-5.0.1/minio/select/helpers.py --- old/minio-4.0.21/minio/select/helpers.py 1970-01-01 01:00:00.000000000 +0100 +++ new/minio-5.0.1/minio/select/helpers.py 2019-09-18 23:07:19.000000000 +0200 @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) +# 2019 MinIO, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +minio.select.helpers +~~~~~~~~~~~~~~~ + +This module implements the helper functions for SelectObject responses. + +:copyright: (c) 2019 by MinIO, Inc. +:license: Apache 2.0, see LICENSE for more details. + +""" + +import codecs +from binascii import crc32 + +SQL = 'SQL' # Value for ExpressionType +EVENT_RECORDS = 'Records' # Event Type is Records +EVENT_PROGRESS = 'Progress' # Event Type Progress +EVENT_STATS = 'Stats' # Event Type Stats +EVENT_CONT = 'Cont' # Event Type continue +EVENT_END = 'End' # Event Type is End +EVENT_CONTENT_TYPE = "text/xml" # Event content xml type +EVENT = 'event' # Message Type is event +ERROR = 'error' # Message Type is error + +def calculate_crc(value): + ''' + Returns the CRC using crc32 + ''' + return crc32(value) & 0xffffffff + +def validate_crc(current_value, expected_value): + ''' + Validate through CRC check + ''' + crc_current = calculate_crc(current_value) + crc_expected = byte_int(expected_value) + if crc_current == crc_expected: + return True + return False + +def byte_int(data_bytes): + ''' + Convert bytes to big-endian integer + ''' + return int(codecs.encode(data_bytes, 'hex'), 16) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/options.py new/minio-5.0.1/minio/select/options.py --- old/minio-4.0.21/minio/select/options.py 1970-01-01 01:00:00.000000000 +0100 +++ new/minio-5.0.1/minio/select/options.py 2019-09-18 23:07:19.000000000 +0200 @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) +# 2019 MinIO, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +minio.select.options +~~~~~~~~~~~~~~~ + +This module implements the SelectOption definition for SelectObject API. + +:copyright: (c) 2019 by MinIO, Inc. +:license: Apache 2.0, see LICENSE for more details. + +""" + +from .helpers import (SQL) + +class CSVInput: + """ + CSVInput: Input Format as CSV. + """ + def __init__(self, FileHeaderInfo=None, RecordDelimiter="\n", + FieldDelimiter=",", QuoteCharacter='"', + QuoteEscapeCharacter='"', Comments="#", + AllowQuotedRecordDelimiter=False): + self.FileHeaderInfo = FileHeaderInfo + self.RecordDelimiter = RecordDelimiter + self.FieldDelimiter = FieldDelimiter + self.QuoteCharacter = QuoteCharacter + self.QuoteEscapeCharacter = QuoteEscapeCharacter + self.Comments = Comments + self.AllowQuotedRecordDelimiter = AllowQuotedRecordDelimiter + +class JSONInput: + """ + JSONInput: Input format as JSON. + """ + def __init__(self, Type=None): + self.Type = Type + + +class ParquetInput: + """ + ParquetInput: Input format as Parquet + """ + + +class InputSerialization: + """ + InputSerialization: nput Format. + """ + def __init__(self, compression_type="NONE", csv=None, json=None, par=None): + self.compression_type = compression_type + self.csv_input = csv + self.json_input = json + self.parquet_input = par + + +class CSVOutput: + """ + CSVOutput: Output as CSV. + + """ + def __init__(self, QuoteFields="ASNEEDED", RecordDelimiter="\n", + FieldDelimiter=",", QuoteCharacter='"', + QuoteEscapeCharacter='"'): + self.QuoteFields = QuoteFields + self.RecordDelimiter = RecordDelimiter + self.FieldDelimiter = FieldDelimiter + self.QuoteCharacter = QuoteCharacter + self.QuoteEscapeCharacter = QuoteEscapeCharacter + + +class JsonOutput: + """ + JsonOutput- Output as JSON. + """ + def __init__(self, RecordDelimiter="\n"): + self.RecordDelimiter = RecordDelimiter + + +class OutputSerialization: + """ + OutputSerialization: Output Format. + """ + def __init__(self, csv=None, json=None): + self.csv_output = csv + self.json_output = json + + +class RequestProgress: + """ + RequestProgress: Sends progress message. + """ + def __init__(self, enabled=False): + self.enabled = enabled + + +class SelectObjectOptions: + """ + SelectObjectOptions: Options for select object + """ + expression_type = SQL + + def __init__(self, expression, input_serialization, + output_serialization, request_progress): + self.expression = expression + self.in_ser = input_serialization + self.out_ser = output_serialization + self.req_progress = request_progress diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/reader.py new/minio-5.0.1/minio/select/reader.py --- old/minio-4.0.21/minio/select/reader.py 1970-01-01 01:00:00.000000000 +0100 +++ new/minio-5.0.1/minio/select/reader.py 2019-09-18 23:07:19.000000000 +0200 @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) +# 2019 MinIO, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +minio.select.reader +~~~~~~~~~~~~~~~ + +This module implements the reader for SelectObject response body. + +:copyright: (c) 2019 by MinIO, Inc. +:license: Apache 2.0, see LICENSE for more details. + +""" + +from __future__ import absolute_import + +import io +import sys + +from binascii import crc32 +from xml.etree import cElementTree +from xml.etree.cElementTree import ParseError + +from .helpers import (EVENT_RECORDS, EVENT_PROGRESS, + EVENT_STATS, EVENT_CONT, + EVENT, EVENT_CONTENT_TYPE, + EVENT_END, ERROR) + +from .helpers import (validate_crc, calculate_crc, byte_int) +from .errors import (SelectMessageError, SelectCRCValidationError) + +def _extract_header(header_bytes): + """ + populates the header map after reading the header in bytes + """ + header_map = {} + header_byte_parsed = 0 + # While loop ends when all the headers present are read + # header contains multipe headers + while header_byte_parsed < len(header_bytes): + header_name_byte_length = byte_int(header_bytes[header_byte_parsed:header_byte_parsed+1]) + header_byte_parsed += 1 + header_name = \ + header_bytes[header_byte_parsed: + header_byte_parsed+header_name_byte_length] + header_byte_parsed += header_name_byte_length + # Header Value Type is of 1 bytes and is skipped + header_byte_parsed += 1 + value_string_byte_length = \ + byte_int(header_bytes[header_byte_parsed: + header_byte_parsed+2]) + header_byte_parsed += 2 + header_value = \ + header_bytes[header_byte_parsed: + header_byte_parsed+value_string_byte_length] + header_byte_parsed += value_string_byte_length + header_map[header_name.decode("utf-8").lstrip(":")] = \ + header_value.decode("utf-8").lstrip(":") + return header_map + +def _parse_stats(stats): + """ + Parses stats XML and populates the stat dict. + """ + stat = {} + for attribute in cElementTree.fromstring(stats): + if attribute.tag == 'BytesScanned': + stat['BytesScanned'] = attribute.text + elif attribute.tag == 'BytesProcessed': + stat['BytesProcessed'] = attribute.text + elif attribute.tag == 'BytesReturned': + stat['BytesReturned'] = attribute.text + + return stat + +class SelectObjectReader(object): + """ + SelectObjectReader returns a Reader that upon read + returns queried data, but stops when the response ends. + LimitedRandomReader is compatible with BufferedIOBase. + """ + def __init__(self, response): + self.response = response + self.remaining_bytes = bytes() + self.stat = {} + self.prog = {} + + def readable(self): + return True + + def writeable(self): + return False + + def close(self): + self.response.close() + + def stats(self): + return self.stat + + def progress(self): + return self.prog + + def __extract_message(self): + """ + Process the response sent from server. + https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html + """ + + crc_bytes = io.BytesIO() + total_bytes_len = self.response.read(4) + if len(total_bytes_len) == 0: + return {} + + total_length = byte_int(total_bytes_len) + header_bytes_len = self.response.read(4) + if len(header_bytes_len) == 0: + return {} + + header_len = byte_int(header_bytes_len) + + crc_bytes.write(total_bytes_len) + crc_bytes.write(header_bytes_len) + + prelude_bytes_crc = self.response.read(4) + if not validate_crc(crc_bytes.getvalue(), prelude_bytes_crc): + raise SelectCRCValidationError( + {"Checksum Mismatch, PreludeCRC of " + + str(calculate_crc(crc_bytes.getvalue())) + + " does not equal expected CRC of " + + str(byte_int(prelude_bytes_crc))}) + + crc_bytes.write(prelude_bytes_crc) + + header_bytes = self.response.read(header_len) + if len(header_bytes) == 0: + raise SelectMessageError( + "Premature truncation of select message header"+ + ", server is sending corrupt message?") + + crc_bytes.write(header_bytes) + + header_map = _extract_header(header_bytes) + payload_length = total_length - header_len - int(16) + payload_bytes = b'' + event_type = header_map["event-type"] + if header_map["message-type"] == ERROR: + raise SelectMessageError( + header_map["error-code"] + ":\"" + \ + header_map["error-message"] + "\"") + elif header_map["message-type"] == EVENT: + if event_type == EVENT_END: + pass + elif event_type == EVENT_CONT: + pass + elif event_type == EVENT_STATS: + content_type = header_map["content-type"] + if content_type != EVENT_CONTENT_TYPE: + raise SelectMessageError( + "Unrecognized content-type {0}".format(content_type)) + else: + payload_bytes = self.response.read(payload_length) + self.stat = _parse_stats(payload_bytes) + + elif event_type == EVENT_RECORDS: + payload_bytes = self.response.read(payload_length) + else: + raise SelectMessageError( + "Unrecognized message-type {0}".format(header_map["message-type"]) + ) + + crc_bytes.write(payload_bytes) + + message_crc = self.response.read(4) + if len(message_crc) == 0: + return {} + + if not validate_crc(crc_bytes.getvalue(), + message_crc): + raise SelectCRCValidationError( + {"Checksum Mismatch, MessageCRC of " + + str(calculate_crc(crc_bytes.getvalue())) + + " does not equal expected CRC of " + + str(byte_int(message_crc))}) + + message = {event_type: payload_bytes} + return message + + def stream(self, num_bytes=32*1024): + """ + extract each record from the response body ... and buffer it. + send only up to requested bytes such as message[:num_bytes] + rest is buffered and added to the next iteration. + + caller should call self.close() to close the stream. + """ + while not self.response.isclosed(): + if len(self.remaining_bytes) == 0: + message = self.__extract_message() + if EVENT_RECORDS in message: + self.remaining_bytes = message.get(EVENT_RECORDS, b'') + else: + # For all other events continue + continue + + result = self.remaining_bytes + if num_bytes < len(self.remaining_bytes): + result = self.remaining_bytes[:num_bytes] + self.remaining_bytes = self.remaining_bytes[len(result):] + + if result == b'': + break + if sys.version_info.major == 3: + yield result.decode('utf-8', errors='ignore') + else: + # Python 2.x needs explicit conversion. + yield result.decode('utf-8', errors='ignore').encode('utf-8') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select_object_options.py new/minio-5.0.1/minio/select_object_options.py --- old/minio-4.0.21/minio/select_object_options.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/select_object_options.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,121 +0,0 @@ -# -*- coding: utf-8 -*- -# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) -# 2019 MinIO, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" - -This module creates the request for Select - -:copyright: (c) 2019 by MinIO, Inc. -:license: Apache 2.0, see LICENSE for more details. - -""" -from .helpers import (SQL) - - -class CSVInput: - """ - CSVInput: Input Format as CSV. - """ - def __init__(self, FileHeaderInfo=None, RecordDelimiter="\n", - FieldDelimiter=",", QuoteCharacter='"', - QuoteEscapeCharacter='"', Comments="#", - AllowQuotedRecordDelimiter=False): - self.FileHeaderInfo = FileHeaderInfo - self.RecordDelimiter = RecordDelimiter - self.FieldDelimiter = FieldDelimiter - self.QuoteCharacter = QuoteCharacter - self.QuoteEscapeCharacter = QuoteEscapeCharacter - self.Comments = Comments - self.AllowQuotedRecordDelimiter = AllowQuotedRecordDelimiter - - -class JSONInput: - """ - JSONInput: Input format as JSON. - """ - def __init__(self, Type=None): - self.Type = Type - - -class ParquetInput: - """ - ParquetInput: Input format as Parquet - """ - - -class InputSerialization: - """ - InputSerialization: nput Format. - """ - def __init__(self, compression_type="NONE", csv=None, json=None, par=None): - self.compression_type = compression_type - self.csv_input = csv - self.json_input = json - self.parquet_input = par - - -class CSVOutput: - """ - CSVOutput: Output as CSV. - - """ - def __init__(self, QuoteFields="ASNEEDED", RecordDelimiter="\n", - FieldDelimiter=",", QuoteCharacter='"', - QuoteEscapeCharacter='"'): - self.QuoteFields = QuoteFields - self.RecordDelimiter = RecordDelimiter - self.FieldDelimiter = FieldDelimiter - self.QuoteCharacter = QuoteCharacter - self.QuoteEscapeCharacter = QuoteEscapeCharacter - - -class JsonOutput: - """ - JsonOutput- Output as JSON. - """ - def __init__(self, RecordDelimiter="\n"): - self.RecordDelimiter = RecordDelimiter - - -class OutputSerialization: - """ - OutputSerialization: Output Format. - """ - def __init__(self, csv=None, json=None): - self.csv_output = csv - self.json_output = json - - -class RequestProgress: - """ - RequestProgress: Sends progress message. - """ - def __init__(self, enabled=False): - self.enabled = enabled - - -class SelectObjectOptions: - """ - SelectObjectOptions: Options for select object - """ - expression_type = SQL - - def __init__(self, expression, input_serialization, - output_serialization, request_progress): - self.expression = expression - self.in_ser = input_serialization - self.out_ser = output_serialization - self.req_progress = request_progress diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select_object_reader.py new/minio-5.0.1/minio/select_object_reader.py --- old/minio-4.0.21/minio/select_object_reader.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/select_object_reader.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,294 +0,0 @@ -# -*- coding: utf-8 -*- -# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) -# 2015, 2016, 2017, 2018, 2019 MinIO, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import io -import codecs - -from binascii import crc32 -from xml.etree import cElementTree -from .error import InvalidXMLError -from xml.etree.cElementTree import ParseError - -from .helpers import (READ_SIZE_SELECT, EVENT_RECORDS, - EVENT_PROGRESS, EVENT_STATS, EVENT, EVENT_END, ERROR) - - -class CRCValidationError(Exception): - ''' - Raised in case of CRC mismatch - ''' - - -def calculate_crc(value): - ''' - Returns the CRC using crc32 - ''' - return crc32(value) & 0xffffffff - - -def validate_crc(current_value, expected_value): - ''' - Validate through CRC check - ''' - crc_current = calculate_crc(current_value) - crc_expected = byte_int(expected_value) - if crc_current == crc_expected: - return True - return False - - -def byte_int(data_bytes): - ''' - Convert bytes to big-endian integer - ''' - return int(codecs.encode(data_bytes, 'hex'), 16) - - -class SelectObjectReader(object): - """ - SelectObjectReader returns a Reader that upon read - returns queried data, but stops when the response ends. - LimitedRandomReader is compatible with BufferedIOBase. - """ - def __init__(self, response): - self.response = response - self.remaining_bytes = bytearray() - self.stat = {} - self.prog = {} - - def readable(self): - return True - - def writeable(self): - return False - - @property - def closed(self): - return self.response.isclosed() - - def close(self): - self.response.close() - - def stats(self): - return self.stat - - def progress(self): - return self.prog - - def __extract_message(self): - """ - Process the response sent from server. - https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - """ - rec = bytearray() - read_buffer = READ_SIZE_SELECT - # Messages read in chunks of read_buffer bytes - chunked_message = self.response.read(read_buffer) - total_byte_parsed = 0 - if len(chunked_message) == 0: - self.close() - return b'' - - # The first 4 bytes gives the total_byte_length and then - # complete message is extracted - while total_byte_parsed < read_buffer: - # Case 1 - If the total_byte_length is partially read - # in the chunked message , then complete the total_byte_length - # by reading the required bytes from response and then - # generate the complete message - if read_buffer - total_byte_parsed <= 4: - value = chunked_message[total_byte_parsed: - total_byte_parsed + - (read_buffer - total_byte_parsed) + - 1] - rem_bytes = self.response.read(4 - (read_buffer - - total_byte_parsed)) - message = value + rem_bytes + \ - self.response.read(byte_int(value+rem_bytes)-4) - end_status = self.__decode_message(message, rec) - total_byte_parsed = 0 - break - else: - total_byte_length = chunked_message[total_byte_parsed: total_byte_parsed + 4] - # Case 2 - Incomplete message in chunked message , - # so creating the complete message by reading the - # total_byte_length- len_read from the response message. - if total_byte_parsed + byte_int(total_byte_length) > read_buffer: - len_read = len(chunked_message[total_byte_parsed:]) - message = chunked_message[total_byte_parsed:] + \ - self.response.read(byte_int(total_byte_length)-len_read) - end_status = self.__decode_message(message, rec) - total_byte_parsed += byte_int(total_byte_length) - # Case 3- the complete message is present in chunked - # messsage. - else: - message = chunked_message[total_byte_parsed: - total_byte_parsed + - byte_int(total_byte_length)] - total_byte_parsed += byte_int(total_byte_length) - end_status = self.__decode_message(message, rec) - if end_status: - break - return rec - - def __extract_header(self, header, header_length): - """ - populates the header map after reading the header - """ - header_map = {} - header_byte_parsed = 0 - # While loop ends when all the headers present are read - # header contains multipe headers - while header_byte_parsed < header_length: - header_name_byte_length = \ - byte_int(header[header_byte_parsed: header_byte_parsed+1]) - header_byte_parsed += 1 - header_name = \ - header[header_byte_parsed: - header_byte_parsed+header_name_byte_length] - header_byte_parsed += header_name_byte_length - # Header Value Type is of 1 bytes and is skipped - header_byte_parsed += 1 - value_string_byte_length = \ - byte_int(header[header_byte_parsed: - header_byte_parsed+2]) - header_byte_parsed += 2 - header_value = \ - header[header_byte_parsed: - header_byte_parsed+value_string_byte_length] - header_byte_parsed += value_string_byte_length - header_map[header_name.decode("utf-8").lstrip(":")] = \ - header_value.decode("utf-8").lstrip(":") - return header_map - - def __read_stats(self, stats): - """ - pupulates the stat dict. - """ - root = cElementTree.fromstring(stats) - for attribute in root: - if attribute.tag == 'BytesScanned': - self.stat['BytesScanned'] = attribute.text - elif attribute.tag == 'BytesProcessed': - self.stat['BytesProcessed'] = attribute.text - elif attribute.tag == 'BytesReturned': - self.stat['BytesReturned'] = attribute.text - - def __parse_message(self, header_map, payload, payload_length, record): - ''' - Parses the message - ''' - if header_map["message-type"] == ERROR: - error = header_map["error-code"] + ":\"" +\ - header_map["error-message"] + "\"" - if header_map["message-type"] == EVENT: - # Fetch the content-type - content_type = header_map["content-type"] - # Fetch the event-type - event_type = header_map["event-type"] - if event_type == EVENT_RECORDS: - record += payload[0:payload_length] - elif event_type == EVENT_PROGRESS: - if content_type == "text/xml": - progress = payload[0:payload_length] - elif event_type == EVENT_STATS: - if content_type == "text/xml": - self.__read_stats(payload[0:payload_length]) - - def __decode_message(self, message, rec): - end_status = False - total_byte_length = message[0:4] # total_byte_length is of 4 bytes - headers_byte_length = message[4: 8] # headers_byte_length is 4 bytes - prelude_crc = message[8:12] # prelude_crc is of 4 bytes - header = message[12:12+byte_int(headers_byte_length)] - payload_length = byte_int(total_byte_length) - \ - byte_int(headers_byte_length) - int(16) - payload = message[12 + byte_int(headers_byte_length): - 12 + byte_int(headers_byte_length) + payload_length] - message_crc = message[12 + byte_int(headers_byte_length) + - payload_length: 12 + - byte_int(headers_byte_length) + - payload_length + 4] - - if not validate_crc(total_byte_length + headers_byte_length, - prelude_crc): - raise CRCValidationError( - {"Checksum Mismatch, MessageCRC of " + - str(calculate_crc(total_byte_length + - headers_byte_length)) + - " does not equal expected CRC of " + - str(byte_int(prelude_crc))}) - - if not validate_crc(message[0:len(message)-4], message_crc): - raise CRCValidationError( - {"Checksum Mismatch, MessageCRC of " + - str(calculate_crc(message)) + - " does not equal expected CRC of " + - str(byte_int(message_crc))}) - - header_map = self.__extract_header(header, byte_int(headers_byte_length)) - - if header_map["message-type"] == EVENT: - # Parse message only when event-type is Records, - # Progress, Stats. Break the loop if event type is End - # Do nothing if event type is Cont - if header_map["event-type"] == EVENT_RECORDS or \ - header_map["event-type"] == EVENT_PROGRESS or \ - header_map["event-type"] == EVENT_STATS: - self.__parse_message(header_map, payload, - payload_length, rec) - - if header_map["event-type"] == EVENT_END: - end_status = True - if header_map["message-type"] == ERROR: - self.__parse_message(header_map, payload, payload_length, rec) - end_status = True - return end_status - - def __read(self, num_bytes): - """ - extract each record from the response body ... and buffer it. - send only up to requested bytes such as message[:num_bytes] - rest is buffered and added to the next iteration. - """ - if len(self.remaining_bytes) == 0: - res = self.__extract_message() - if len(res) == 0: - return b'' - else: - self.remaining_bytes = res - - if num_bytes < len(self.remaining_bytes): - result = self.remaining_bytes[:num_bytes] - del self.remaining_bytes[:num_bytes] - return result - else: - left_in_buffer = self.remaining_bytes[:len(self.remaining_bytes)] - del self.remaining_bytes[:len(left_in_buffer)] - return left_in_buffer - - def stream(self, num_bytes): - """ - streams the response - """ - while True: - x = self.__read(num_bytes) - if x == b'': - break - elif len(x) < num_bytes: - x += self.__read(num_bytes-len(x)) - yield x.decode('utf-8') if isinstance(x, bytearray) else x diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/signer.py new/minio-5.0.1/minio/signer.py --- old/minio-4.0.21/minio/signer.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/minio/signer.py 2019-09-18 23:07:19.000000000 +0200 @@ -251,7 +251,9 @@ :param headers: HTTP header dictionary. :param content_sha256: Content sha256 hexdigest string. """ - lines = [method, parsed_url.path, parsed_url.query] + # Should not encode ~. Decode it back if present. + parsed_url_path = parsed_url.path.replace("%7E", "~") + lines = [method, parsed_url_path, parsed_url.query] # Headers added to canonical request. header_lines = [] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio.egg-info/PKG-INFO new/minio-5.0.1/minio.egg-info/PKG-INFO --- old/minio-4.0.21/minio.egg-info/PKG-INFO 2019-08-28 21:41:17.000000000 +0200 +++ new/minio-5.0.1/minio.egg-info/PKG-INFO 2019-09-18 23:08:50.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: minio -Version: 4.0.21 +Version: 5.0.1 Summary: MinIO Python Library for Amazon S3 Compatible Cloud Storage for Python Home-page: https://github.com/minio/minio-py Author: MinIO, Inc. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio.egg-info/SOURCES.txt new/minio-5.0.1/minio.egg-info/SOURCES.txt --- old/minio-4.0.21/minio.egg-info/SOURCES.txt 2019-08-28 21:41:17.000000000 +0200 +++ new/minio-5.0.1/minio.egg-info/SOURCES.txt 2019-09-18 23:08:50.000000000 +0200 @@ -49,8 +49,6 @@ minio/helpers.py minio/parsers.py minio/post_policy.py -minio/select_object_options.py -minio/select_object_reader.py minio/signer.py minio/sse.py minio/thread_pool.py @@ -60,6 +58,11 @@ minio.egg-info/dependency_links.txt minio.egg-info/requires.txt minio.egg-info/top_level.txt +minio/select/__init__.py +minio/select/errors.py +minio/select/helpers.py +minio/select/options.py +minio/select/reader.py tests/__init__.py tests/functional_test.sh tests/unit_test.sh diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/setup.py new/minio-5.0.1/setup.py --- old/minio-4.0.21/setup.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/setup.py 2019-09-18 23:07:19.000000000 +0200 @@ -37,6 +37,7 @@ packages = [ 'minio', + 'minio.select', ] requires = [ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/tests/functional/tests.py new/minio-5.0.1/tests/functional/tests.py --- old/minio-4.0.21/tests/functional/tests.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/tests/functional/tests.py 2019-09-18 23:07:19.000000000 +0200 @@ -16,13 +16,13 @@ # limitations under the License. from __future__ import division +from __future__ import absolute_import import os import io import csv import sys -from io import BytesIO from sys import exit import uuid import shutil @@ -44,10 +44,10 @@ from minio.error import (APINotImplemented, NoSuchBucketPolicy, ResponseError, PreconditionFailed, BucketAlreadyOwnedByYou, BucketAlreadyExists, InvalidBucketError) -from minio.select_object_options import (SelectObjectOptions, CSVInput, - RequestProgress, InputSerialization, - OutputSerialization, CSVOutput) -from minio.select_object_reader import (calculate_crc) +from minio.select.options import (SelectObjectOptions, CSVInput, + RequestProgress, InputSerialization, + OutputSerialization, CSVOutput) +from minio.select.helpers import (calculate_crc) from minio.sse import SSE_C from minio.sse import copy_SSE_C @@ -291,8 +291,8 @@ try: client.make_bucket(bucket_name) content = io.BytesIO(b"col1,col2,col3\none,two,three\nX,Y,Z\n") - expected_crc = calculate_crc(content.getbuffer()) - client.put_object(bucket_name, csvfile, content, content.getbuffer().nbytes) + expected_crc = calculate_crc(content.getvalue()) + client.put_object(bucket_name, csvfile, content, len(content.getvalue())) options = SelectObjectOptions( expression="select * from s3object", @@ -319,11 +319,11 @@ ) data = client.select_object_content(bucket_name, csvfile, options) # Get the records - records = "" + records = io.BytesIO() for d in data.stream(10*1024): - records += d - generated_crc = calculate_crc(str.encode(records)) + records.write(d.encode('utf-8')) + generated_crc = calculate_crc(records.getvalue()) if expected_crc != generated_crc: raise ValueError('Data mismatch Expected : "col1,col2,col3\none,two,three\nX,Y,Z\n"', 'Received {}', records) @@ -2062,10 +2062,8 @@ log_output = LogOutput(client.get_bucket_notification, 'test_get_bucket_notification') test_get_bucket_notification(client, log_output) - # getBuffer() of io.BytesIO is supported in Python3. - if sys.version_info.major == 3: - log_output = LogOutput(client.select_object_content, 'test_select_object_content') - test_select_object_content(client, log_output) + log_output = LogOutput(client.select_object_content, 'test_select_object_content') + test_select_object_content(client, log_output) else: # Quick mode tests @@ -2114,10 +2112,8 @@ log_output = LogOutput(client.copy_object, 'test_copy_object_no_copy_condition') test_copy_object_no_copy_condition(client, log_output) - # getBuffer() of io.BytesIO is supported in Python3. - if sys.version_info.major == 3: - log_output = LogOutput(client.select_object_content, 'test_select_object_content') - test_select_object_content(client, log_output) + log_output = LogOutput(client.select_object_content, 'test_select_object_content') + test_select_object_content(client, log_output) if secure: log_output = LogOutput(client.copy_object, 'test_copy_object_with_sse') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/tests/unit/generate_xml_test.py new/minio-5.0.1/tests/unit/generate_xml_test.py --- old/minio-4.0.21/tests/unit/generate_xml_test.py 2019-08-28 21:40:03.000000000 +0200 +++ new/minio-5.0.1/tests/unit/generate_xml_test.py 2019-09-18 23:07:19.000000000 +0200 @@ -21,13 +21,12 @@ from minio.xml_marshal import (xml_marshal_bucket_constraint, xml_marshal_complete_multipart_upload, xml_marshal_select) -from minio.select_object_options import (SelectObjectOptions, - CSVInput, - RequestProgress, - InputSerialization, - OutputSerialization, - CSVOutput) - +from minio.select.options import (SelectObjectOptions, + CSVInput, + RequestProgress, + InputSerialization, + OutputSerialization, + CSVOutput) class GenerateRequestTest(TestCase): def test_generate_bucket_constraint(self):
