This is an automated email from the ASF dual-hosted git repository. machristie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/airavata-django-portal-sdk.git
commit 94ec26a463888cebffbe468f90e6f1e893152d48 Author: Marcus Christie <[email protected]> AuthorDate: Wed Aug 4 09:26:36 2021 -0400 AIRAVATA-3420 Allow reading just a few bytes from very large files by streaming http download (initial use case is to determine file type) --- airavata_django_portal_sdk/user_storage/api.py | 8 ++++---- .../user_storage/backends/mft_provider.py | 11 +++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/airavata_django_portal_sdk/user_storage/api.py b/airavata_django_portal_sdk/user_storage/api.py index 74c8525..a15b4b7 100644 --- a/airavata_django_portal_sdk/user_storage/api.py +++ b/airavata_django_portal_sdk/user_storage/api.py @@ -843,10 +843,10 @@ def _determine_content_type(full_path, content_type=None, backend=None): # Check if file is Unicode text by trying to read some of it try: if backend is not None: - file = backend.open(full_path) - # Try to decode the first kb as UTF8 - file.read(1024).decode('utf-8') - result = "text/plain" + with backend.open(full_path) as file: + # Try to decode the first kb as UTF8 + file.read(1024).decode('utf-8') + result = "text/plain" except UnicodeDecodeError: logger.debug(f"Failed to read as Unicode text: {full_path}") return result diff --git a/airavata_django_portal_sdk/user_storage/backends/mft_provider.py b/airavata_django_portal_sdk/user_storage/backends/mft_provider.py index 927770a..44fb704 100644 --- a/airavata_django_portal_sdk/user_storage/backends/mft_provider.py +++ b/airavata_django_portal_sdk/user_storage/backends/mft_provider.py @@ -205,11 +205,14 @@ class MFTUserStorageProvider(UserStorageProvider, ProvidesDownloadUrl): def open(self, resource_path): download_url = self.get_download_url(resource_path) - r = requests.get(download_url) + r = requests.get(download_url, stream=True) r.raise_for_status() - file = io.BytesIO(r.content) - file.name = os.path.basename(resource_path) - return file + # raw stream doesn't automatically decode the response body based on the + # transfer encoding, but setting decode_content to True causes it to do + # the decoding. + r.raw.decode_content = True + r.raw.name = os.path.basename(resource_path) + return r.raw def _get_child_path(self, resource_path): """Convert resource path into child path appropriate for resource."""
