[
https://issues.apache.org/jira/browse/BEAM-4850?focusedWorklogId=167147&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-167147
]
ASF GitHub Bot logged work on BEAM-4850:
----------------------------------------
Author: ASF GitHub Bot
Created on: 17/Nov/18 02:13
Start Date: 17/Nov/18 02:13
Worklog Time Spent: 10m
Work Description: aaltay closed pull request #7051: [BEAM-4850] Remove
some dependencies of apitools
URL: https://github.com/apache/beam/pull/7051
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index b833aaa61ae..c9848bfd196 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -24,7 +24,6 @@
from builtins import object
import codecs
import getpass
-import httplib2
import json
import logging
import os
@@ -33,6 +32,7 @@
import time
from datetime import datetime
import io
+import httplib2
from past.builtins import unicode
@@ -57,6 +57,15 @@
from apache_beam.transforms.display import DisplayData
from apache_beam.utils import retry
+# Protect against environments where google storage library is not available.
+# pylint: disable=wrong-import-order, wrong-import-position
+try:
+ from google.cloud import storage as gcloud_storage
+ from google.cloud.exceptions import GoogleCloudError
+except ImportError:
+ gcloud_storage = None
+# pylint: enable=wrong-import-order, wrong-import-position
+
# Environment version information. It is passed to the service during a
# a job submission and is used by the service to establish what features
# are expected by the workers.
@@ -461,8 +470,7 @@ def _stage_resources(self, options):
staging_location=google_cloud_options.staging_location)
return resources
- def stage_file(self, gcs_or_local_path, file_name, stream,
- mime_type='application/octet-stream'):
+ def stage_file(self, gcs_or_local_path, file_name, stream):
"""Stages a file at a GCS or local path with stream-supplied contents."""
if not gcs_or_local_path.startswith('gs://'):
local_path = FileSystems.join(gcs_or_local_path, file_name)
@@ -471,27 +479,25 @@ def stage_file(self, gcs_or_local_path, file_name, stream,
f.write(stream.read())
return
gcs_location = FileSystems.join(gcs_or_local_path, file_name)
- bucket, name = gcs_location[5:].split('/', 1)
+ bucket_name, file_name = gcs_location[5:].split('/', 1)
- request = storage.StorageObjectsInsertRequest(
- bucket=bucket, name=name)
+ client = gcloud_storage.Client(project=self.google_cloud_options.project)
+ blob = client.get_bucket(bucket_name).blob(file_name)
logging.info('Starting GCS upload to %s...', gcs_location)
- upload = storage.Upload(stream, mime_type)
try:
- response = self._storage_client.objects.Insert(request, upload=upload)
- except exceptions.HttpError as e:
+ blob.upload_from_file(stream)
+ except GoogleCloudError as e:
reportable_errors = {
403: 'access denied',
404: 'bucket not found',
}
- if e.status_code in reportable_errors:
+ if e.code in reportable_errors:
raise IOError(('Could not upload to GCS path %s: %s. Please verify '
'that credentials are valid and that you have write '
'access to the specified path.') %
- (gcs_or_local_path, reportable_errors[e.status_code]))
+ (gcs_or_local_path, reportable_errors[e.code]))
raise
logging.info('Completed GCS upload to %s', gcs_location)
- return response
@retry.no_retries # Using no_retries marks this as an integration point.
def create_job(self, job):
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 7ebecd32664..d0535880eee 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -142,6 +142,7 @@ def get_version():
'proto-google-cloud-datastore-v1>=0.90.0,<=0.90.4',
'googledatastore==7.0.1; python_version < "3.0"',
'google-cloud-pubsub==0.35.4',
+ 'google-cloud-storage==1.13.0',
# GCP packages required by tests
'google-cloud-bigquery>=1.6.0,<1.7.0',
]
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 167147)
Time Spent: 1h 20m (was: 1h 10m)
> Remove dependency on google-apitools package in Python SDK.
> -----------------------------------------------------------
>
> Key: BEAM-4850
> URL: https://issues.apache.org/jira/browse/BEAM-4850
> Project: Beam
> Issue Type: Improvement
> Components: sdk-py-core
> Reporter: Valentyn Tymofieiev
> Assignee: Charles Chen
> Priority: Major
> Time Spent: 1h 20m
> Remaining Estimate: 0h
>
> AFAIK google-apitools is not officially supported. For Google Cloud Platform
> APIs such as Datastore, Cloud Storage or Pub/Sub, using [Cloud Client
> Libraries for
> Python|https://github.com/GoogleCloudPlatform/google-cloud-python] is
> recommended. If that not possible, we may be able to use
> [https://github.com/google/google-api-python-client] - officially supported,
> but now in maintenance mode.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)