[ 
https://issues.apache.org/jira/browse/BEAM-2264?focusedWorklogId=277009&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-277009
 ]

ASF GitHub Bot logged work on BEAM-2264:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 15/Jul/19 21:53
            Start Date: 15/Jul/19 21:53
    Worklog Time Spent: 10m 
      Work Description: aaltay commented on pull request #9060: [BEAM-2264] 
Reuse GCP credentials in GCS calls.
URL: https://github.com/apache/beam/pull/9060#discussion_r303653342
 
 

 ##########
 File path: sdks/python/apache_beam/internal/gcp/auth.py
 ##########
 @@ -57,77 +71,58 @@ def set_running_in_gce(worker_executing_project):
   executing_project = worker_executing_project
 
 
-class AuthenticationException(retry.PermanentException):
-  pass
-
-
-class _GCEMetadataCredentials(OAuth2Credentials):
-  """For internal use only; no backwards-compatibility guarantees.
-
-  Credential object initialized using access token from GCE VM metadata."""
-
-  def __init__(self, user_agent=None):
-    """Create an instance of GCEMetadataCredentials.
-
-    These credentials are generated by contacting the metadata server on a GCE
-    VM instance.
-
-    Args:
-      user_agent: string, The HTTP User-Agent to provide for this application.
-    """
-    super(_GCEMetadataCredentials, self).__init__(
-        None,  # access_token
-        None,  # client_id
-        None,  # client_secret
-        None,  # refresh_token
-        datetime.datetime(2010, 1, 1),  # token_expiry, set to time in past.
-        None,  # token_uri
-        user_agent)
-
-  @retry.with_exponential_backoff(
-      retry_filter=retry.retry_on_server_errors_and_timeout_filter)
-  def _refresh(self, http_request):
-    refresh_time = datetime.datetime.utcnow()
-    metadata_root = os.environ.get(
-        'GCE_METADATA_ROOT', 'metadata.google.internal')
-    token_url = ('http://{}/computeMetadata/v1/instance/service-accounts/'
-                 'default/token').format(metadata_root)
-    req = Request(token_url, headers={'Metadata-Flavor': 'Google'})
-    token_data = json.loads(urlopen(req, timeout=60).read().decode('utf-8'))
-    self.access_token = token_data['access_token']
-    self.token_expiry = (refresh_time +
-                         datetime.timedelta(seconds=token_data['expires_in']))
-
-
 def get_service_credentials():
   """For internal use only; no backwards-compatibility guarantees.
 
-  Get credentials to access Google services."""
-  user_agent = 'beam-python-sdk/1.0'
-  if is_running_in_gce:
-    # We are currently running as a GCE taskrunner worker.
-    #
-    # TODO(ccy): It's not entirely clear if these credentials are thread-safe.
-    # If so, we can cache these credentials to save the overhead of creating
-    # them again.
-    return _GCEMetadataCredentials(user_agent=user_agent)
-  else:
-    client_scopes = [
-        'https://www.googleapis.com/auth/bigquery',
-        'https://www.googleapis.com/auth/cloud-platform',
-        'https://www.googleapis.com/auth/devstorage.full_control',
-        'https://www.googleapis.com/auth/userinfo.email',
-        'https://www.googleapis.com/auth/datastore'
-    ]
-
-    try:
-      credentials = GoogleCredentials.get_application_default()
-      credentials = credentials.create_scoped(client_scopes)
-      logging.debug('Connecting using Google Application Default '
-                    'Credentials.')
-      return credentials
-    except Exception as e:
-      logging.warning(
-          'Unable to find default credentials to use: %s\n'
-          'Connecting anonymously.', e)
-      return None
+  Get credentials to access Google services.
+
+  Returns:
+    A ``oauth2client.client.OAuth2Credentials`` object or None if credentials
+    not found. Returned object is thread-safe.
+  """
+  return _Credentials.get_service_credentials()
+
+
+class _Credentials(object):
+  _credentials_lock = threading.Lock()
+  _credentials_init = False
+  _credentials = None
+
+  @classmethod
+  def get_service_credentials(cls):
+    if cls._credentials_init:
+      return cls._credentials
 
 Review comment:
   Is it possible for credentials to expire?
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 277009)
    Time Spent: 1h 20m  (was: 1h 10m)

> Re-use credential instead of generating a new one one each GCS call
> -------------------------------------------------------------------
>
>                 Key: BEAM-2264
>                 URL: https://issues.apache.org/jira/browse/BEAM-2264
>             Project: Beam
>          Issue Type: Improvement
>          Components: sdk-py-core
>            Reporter: Luke Cwik
>            Priority: Minor
>          Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> We should cache the credential used within a Pipeline and re-use it instead 
> of generating a new one on each GCS call. When executing (against 2.0.0 RC2):
> {code}
> python -m apache_beam.examples.wordcount --input 
> "gs://dataflow-samples/shakespeare/*" --output local_counts
> {code}
> Note that we seemingly generate a new access token each time instead of when 
> a refresh is required.
> {code}
>   super(GcsIO, cls).__new__(cls, storage_client))
> INFO:root:Starting the size estimation of the input
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> INFO:oauth2client.client:Refreshing access_token
> INFO:root:Finished the size estimation of the input at 1 files. Estimation 
> took 0.286200046539 seconds
> INFO:root:Running pipeline with DirectRunner.
> INFO:root:Starting the size estimation of the input
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> INFO:oauth2client.client:Refreshing access_token
> INFO:root:Finished the size estimation of the input at 43 files. Estimation 
> took 0.205624818802 seconds
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> INFO:oauth2client.client:Refreshing access_token
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> INFO:oauth2client.client:Refreshing access_token
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> INFO:oauth2client.client:Refreshing access_token
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> INFO:oauth2client.client:Refreshing access_token
> INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
> ... many more times ...
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

Reply via email to