Repository: beam Updated Branches: refs/heads/master a1e8ac4fa -> 9d3b0db2d
[BEAM-1218] Move the names and auth files to appropriate directories Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1512714d Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1512714d Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1512714d Branch: refs/heads/master Commit: 1512714d2c5fdc05f2012ed7584a060217d9ee65 Parents: a1e8ac4 Author: Sourabh Bajaj <[email protected]> Authored: Thu Feb 23 13:25:09 2017 -0800 Committer: Ahmet Altay <[email protected]> Committed: Thu Feb 23 14:12:13 2017 -0800 ---------------------------------------------------------------------- sdks/python/apache_beam/internal/auth.py | 185 ------------------- sdks/python/apache_beam/internal/auth_test.py | 44 ----- sdks/python/apache_beam/internal/gcp/auth.py | 185 +++++++++++++++++++ .../apache_beam/internal/gcp/auth_test.py | 44 +++++ sdks/python/apache_beam/io/gcp/bigquery.py | 2 +- .../apache_beam/io/gcp/datastore/v1/helper.py | 2 +- sdks/python/apache_beam/io/gcp/gcsio.py | 2 +- .../runners/dataflow/dataflow_runner.py | 6 +- .../runners/dataflow/internal/apiclient.py | 4 +- .../runners/dataflow/internal/dependency.py | 2 +- .../dataflow/internal/dependency_test.py | 2 +- .../runners/dataflow/internal/names.py | 82 ++++++++ sdks/python/apache_beam/utils/names.py | 82 -------- sdks/python/generate_pydoc.sh | 3 + 14 files changed, 324 insertions(+), 321 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/internal/auth.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/internal/auth.py b/sdks/python/apache_beam/internal/auth.py deleted file mode 100644 index ccc67c6..0000000 --- a/sdks/python/apache_beam/internal/auth.py +++ /dev/null @@ -1,185 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Dataflow credentials and authentication.""" - -import datetime -import json -import logging -import os -import sys -import urllib2 - -from oauth2client.client import GoogleCredentials -from oauth2client.client import OAuth2Credentials - -from apache_beam.utils import processes -from apache_beam.utils import retry -from apache_beam.utils.pipeline_options import GoogleCloudOptions -from apache_beam.utils.pipeline_options import PipelineOptions - - -# When we are running in GCE, we can authenticate with VM credentials. -is_running_in_gce = False - -# When we are running in GCE, this value is set based on worker startup -# information. -executing_project = None - - -def set_running_in_gce(worker_executing_project): - """Informs the authentication library that we are running in GCE. - - When we are running in GCE, we have the option of using the VM metadata - credentials for authentication to Google services. - - Args: - worker_executing_project: The project running the workflow. This information - comes from worker startup information. - """ - global is_running_in_gce - global executing_project - is_running_in_gce = True - executing_project = worker_executing_project - - -class AuthenticationException(retry.PermanentException): - pass - - -class GCEMetadataCredentials(OAuth2Credentials): - """Credential object initialized using access token from GCE VM metadata.""" - - def __init__(self, user_agent=None): - """Create an instance of GCEMetadataCredentials. - - These credentials are generated by contacting the metadata server on a GCE - VM instance. - - Args: - user_agent: string, The HTTP User-Agent to provide for this application. - """ - super(GCEMetadataCredentials, self).__init__( - None, # access_token - None, # client_id - None, # client_secret - None, # refresh_token - datetime.datetime(2010, 1, 1), # token_expiry, set to time in past. - None, # token_uri - user_agent) - - @retry.with_exponential_backoff( - retry_filter=retry.retry_on_server_errors_and_timeout_filter) - def _refresh(self, http_request): - refresh_time = datetime.datetime.now() - req = urllib2.Request('http://metadata.google.internal/computeMetadata/v1/' - 'instance/service-accounts/default/token', - headers={'Metadata-Flavor': 'Google'}) - token_data = json.loads(urllib2.urlopen(req).read()) - self.access_token = token_data['access_token'] - self.token_expiry = (refresh_time + - datetime.timedelta(seconds=token_data['expires_in'])) - - -class _GCloudWrapperCredentials(OAuth2Credentials): - """Credentials class wrapping gcloud credentials via shell.""" - - def __init__(self, user_agent, **kwds): - super(_GCloudWrapperCredentials, self).__init__( - None, None, None, None, None, None, user_agent, **kwds) - - def _refresh(self, http_request): - """Gets an access token using the gcloud client.""" - try: - gcloud_process = processes.Popen( - ['gcloud', 'auth', 'print-access-token'], stdout=processes.PIPE) - except OSError as exn: - logging.error('The gcloud tool was not found.', exc_info=True) - raise AuthenticationException('The gcloud tool was not found: %s' % exn) - output, _ = gcloud_process.communicate() - self.access_token = output.strip() - - -def get_service_credentials(): - """Get credentials to access Google services.""" - user_agent = 'beam-python-sdk/1.0' - if is_running_in_gce: - # We are currently running as a GCE taskrunner worker. - # - # TODO(ccy): It's not entirely clear if these credentials are thread-safe. - # If so, we can cache these credentials to save the overhead of creating - # them again. - return GCEMetadataCredentials(user_agent=user_agent) - else: - client_scopes = [ - 'https://www.googleapis.com/auth/bigquery', - 'https://www.googleapis.com/auth/cloud-platform', - 'https://www.googleapis.com/auth/devstorage.full_control', - 'https://www.googleapis.com/auth/userinfo.email', - 'https://www.googleapis.com/auth/datastore' - ] - - # TODO(BEAM-1068): Do not recreate options from sys.argv. - # We are currently being run from the command line. - google_cloud_options = PipelineOptions( - sys.argv).view_as(GoogleCloudOptions) - if google_cloud_options.service_account_name: - if not google_cloud_options.service_account_key_file: - raise AuthenticationException( - 'key file not provided for service account.') - if not os.path.exists(google_cloud_options.service_account_key_file): - raise AuthenticationException( - 'Specified service account key file does not exist.') - - # The following code uses oauth2client >=2.0.0 functionality and if this - # is not available due to import errors will use 1.5.2 functionality. - try: - from oauth2client.service_account import ServiceAccountCredentials - return ServiceAccountCredentials.from_p12_keyfile( - google_cloud_options.service_account_name, - google_cloud_options.service_account_key_file, - private_key_password=None, - scopes=client_scopes) - except ImportError: - with file(google_cloud_options.service_account_key_file) as f: - service_account_key = f.read() - from oauth2client.client import SignedJwtAssertionCredentials - return SignedJwtAssertionCredentials( - google_cloud_options.service_account_name, - service_account_key, - client_scopes, - user_agent=user_agent) - else: - try: - credentials = _GCloudWrapperCredentials(user_agent) - # Check if we are able to get an access token. If not fallback to - # application default credentials. - credentials.get_access_token() - return credentials - except AuthenticationException: - logging.warning('Unable to find credentials from gcloud.') - - # Falling back to application default credentials. - try: - credentials = GoogleCredentials.get_application_default() - credentials = credentials.create_scoped(client_scopes) - logging.debug('Connecting using Google Application Default ' - 'Credentials.') - return credentials - except Exception: - logging.warning('Unable to find default credentials to use.') - raise http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/internal/auth_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/internal/auth_test.py b/sdks/python/apache_beam/internal/auth_test.py deleted file mode 100644 index e4fd96f..0000000 --- a/sdks/python/apache_beam/internal/auth_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Unit tests for the auth module.""" - -import os -import sys -import unittest - -import mock - -from apache_beam.internal import auth - - -class AuthTest(unittest.TestCase): - - def test_create_application_client(self): - try: - test_args = [ - 'test', '--service_account_name', 'abc', '--service_account_key_file', - os.path.join(os.path.dirname(__file__), '..', 'tests', - 'data', 'privatekey.p12')] - with mock.patch.object(sys, 'argv', test_args): - credentials = auth.get_service_credentials() - self.assertIsNotNone(credentials) - except NotImplementedError: - self.skipTest('service account tests require pyOpenSSL module.') - - -if __name__ == '__main__': - unittest.main() http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/internal/gcp/auth.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py new file mode 100644 index 0000000..ccc67c6 --- /dev/null +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -0,0 +1,185 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Dataflow credentials and authentication.""" + +import datetime +import json +import logging +import os +import sys +import urllib2 + +from oauth2client.client import GoogleCredentials +from oauth2client.client import OAuth2Credentials + +from apache_beam.utils import processes +from apache_beam.utils import retry +from apache_beam.utils.pipeline_options import GoogleCloudOptions +from apache_beam.utils.pipeline_options import PipelineOptions + + +# When we are running in GCE, we can authenticate with VM credentials. +is_running_in_gce = False + +# When we are running in GCE, this value is set based on worker startup +# information. +executing_project = None + + +def set_running_in_gce(worker_executing_project): + """Informs the authentication library that we are running in GCE. + + When we are running in GCE, we have the option of using the VM metadata + credentials for authentication to Google services. + + Args: + worker_executing_project: The project running the workflow. This information + comes from worker startup information. + """ + global is_running_in_gce + global executing_project + is_running_in_gce = True + executing_project = worker_executing_project + + +class AuthenticationException(retry.PermanentException): + pass + + +class GCEMetadataCredentials(OAuth2Credentials): + """Credential object initialized using access token from GCE VM metadata.""" + + def __init__(self, user_agent=None): + """Create an instance of GCEMetadataCredentials. + + These credentials are generated by contacting the metadata server on a GCE + VM instance. + + Args: + user_agent: string, The HTTP User-Agent to provide for this application. + """ + super(GCEMetadataCredentials, self).__init__( + None, # access_token + None, # client_id + None, # client_secret + None, # refresh_token + datetime.datetime(2010, 1, 1), # token_expiry, set to time in past. + None, # token_uri + user_agent) + + @retry.with_exponential_backoff( + retry_filter=retry.retry_on_server_errors_and_timeout_filter) + def _refresh(self, http_request): + refresh_time = datetime.datetime.now() + req = urllib2.Request('http://metadata.google.internal/computeMetadata/v1/' + 'instance/service-accounts/default/token', + headers={'Metadata-Flavor': 'Google'}) + token_data = json.loads(urllib2.urlopen(req).read()) + self.access_token = token_data['access_token'] + self.token_expiry = (refresh_time + + datetime.timedelta(seconds=token_data['expires_in'])) + + +class _GCloudWrapperCredentials(OAuth2Credentials): + """Credentials class wrapping gcloud credentials via shell.""" + + def __init__(self, user_agent, **kwds): + super(_GCloudWrapperCredentials, self).__init__( + None, None, None, None, None, None, user_agent, **kwds) + + def _refresh(self, http_request): + """Gets an access token using the gcloud client.""" + try: + gcloud_process = processes.Popen( + ['gcloud', 'auth', 'print-access-token'], stdout=processes.PIPE) + except OSError as exn: + logging.error('The gcloud tool was not found.', exc_info=True) + raise AuthenticationException('The gcloud tool was not found: %s' % exn) + output, _ = gcloud_process.communicate() + self.access_token = output.strip() + + +def get_service_credentials(): + """Get credentials to access Google services.""" + user_agent = 'beam-python-sdk/1.0' + if is_running_in_gce: + # We are currently running as a GCE taskrunner worker. + # + # TODO(ccy): It's not entirely clear if these credentials are thread-safe. + # If so, we can cache these credentials to save the overhead of creating + # them again. + return GCEMetadataCredentials(user_agent=user_agent) + else: + client_scopes = [ + 'https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform', + 'https://www.googleapis.com/auth/devstorage.full_control', + 'https://www.googleapis.com/auth/userinfo.email', + 'https://www.googleapis.com/auth/datastore' + ] + + # TODO(BEAM-1068): Do not recreate options from sys.argv. + # We are currently being run from the command line. + google_cloud_options = PipelineOptions( + sys.argv).view_as(GoogleCloudOptions) + if google_cloud_options.service_account_name: + if not google_cloud_options.service_account_key_file: + raise AuthenticationException( + 'key file not provided for service account.') + if not os.path.exists(google_cloud_options.service_account_key_file): + raise AuthenticationException( + 'Specified service account key file does not exist.') + + # The following code uses oauth2client >=2.0.0 functionality and if this + # is not available due to import errors will use 1.5.2 functionality. + try: + from oauth2client.service_account import ServiceAccountCredentials + return ServiceAccountCredentials.from_p12_keyfile( + google_cloud_options.service_account_name, + google_cloud_options.service_account_key_file, + private_key_password=None, + scopes=client_scopes) + except ImportError: + with file(google_cloud_options.service_account_key_file) as f: + service_account_key = f.read() + from oauth2client.client import SignedJwtAssertionCredentials + return SignedJwtAssertionCredentials( + google_cloud_options.service_account_name, + service_account_key, + client_scopes, + user_agent=user_agent) + else: + try: + credentials = _GCloudWrapperCredentials(user_agent) + # Check if we are able to get an access token. If not fallback to + # application default credentials. + credentials.get_access_token() + return credentials + except AuthenticationException: + logging.warning('Unable to find credentials from gcloud.') + + # Falling back to application default credentials. + try: + credentials = GoogleCredentials.get_application_default() + credentials = credentials.create_scoped(client_scopes) + logging.debug('Connecting using Google Application Default ' + 'Credentials.') + return credentials + except Exception: + logging.warning('Unable to find default credentials to use.') + raise http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/internal/gcp/auth_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py new file mode 100644 index 0000000..b1f503e --- /dev/null +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Unit tests for the auth module.""" + +import os +import sys +import unittest + +import mock + +from apache_beam.internal.gcp import auth + + +class AuthTest(unittest.TestCase): + + def test_create_application_client(self): + try: + test_args = [ + 'test', '--service_account_name', 'abc', '--service_account_key_file', + os.path.join(os.path.dirname(__file__), '..', '..', 'tests', + 'data', 'privatekey.p12')] + with mock.patch.object(sys, 'argv', test_args): + credentials = auth.get_service_credentials() + self.assertIsNotNone(credentials) + except NotImplementedError: + self.skipTest('service account tests require pyOpenSSL module.') + + +if __name__ == '__main__': + unittest.main() http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/io/gcp/bigquery.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index 7822cc8..3186a55 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -111,7 +111,7 @@ import time import uuid from apache_beam import coders -from apache_beam.internal import auth +from apache_beam.internal.gcp import auth from apache_beam.internal.gcp.json_value import from_json_value from apache_beam.internal.gcp.json_value import to_json_value from apache_beam.runners.dataflow.native_io import iobase as dataflow_io http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py index e15e43b..45c794f 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py @@ -33,7 +33,7 @@ except ImportError: QUERY_NOT_FINISHED = None # pylint: enable=wrong-import-order, wrong-import-position -from apache_beam.internal import auth +from apache_beam.internal.gcp import auth from apache_beam.utils import retry http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/io/gcp/gcsio.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index dbf8e06..cf00bb2 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -35,7 +35,7 @@ import apitools.base.py.transfer as transfer from apitools.base.py.batch import BatchApiRequest from apitools.base.py.exceptions import HttpError -from apache_beam.internal import auth +from apache_beam.internal.gcp import auth from apache_beam.utils import retry # Issue a friendlier error message if the storage library is not available. http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py index 5a8f547..ebc9024 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py @@ -33,16 +33,16 @@ from apache_beam.internal import pickler from apache_beam.internal.gcp import json_value from apache_beam.pvalue import PCollectionView from apache_beam.runners.dataflow.dataflow_metrics import DataflowMetrics +from apache_beam.runners.dataflow.internal import names from apache_beam.runners.dataflow.internal.clients import dataflow as dataflow_api +from apache_beam.runners.dataflow.internal.names import PropertyNames +from apache_beam.runners.dataflow.internal.names import TransformNames from apache_beam.runners.runner import PValueCache from apache_beam.runners.runner import PipelineResult from apache_beam.runners.runner import PipelineRunner from apache_beam.runners.runner import PipelineState from apache_beam.transforms.display import DisplayData from apache_beam.typehints import typehints -from apache_beam.utils import names -from apache_beam.utils.names import PropertyNames -from apache_beam.utils.names import TransformNames from apache_beam.utils.pipeline_options import StandardOptions http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py index 481ab70..d4fa3ce 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py @@ -31,17 +31,17 @@ from apitools.base.py import encoding from apitools.base.py import exceptions from apache_beam import utils -from apache_beam.internal.auth import get_service_credentials +from apache_beam.internal.gcp.auth import get_service_credentials from apache_beam.internal.gcp.json_value import to_json_value from apache_beam.io.gcp.internal.clients import storage from apache_beam.runners.dataflow.internal import dependency from apache_beam.runners.dataflow.internal.clients import dataflow from apache_beam.runners.dataflow.internal.dependency import get_required_container_version from apache_beam.runners.dataflow.internal.dependency import get_sdk_name_and_version +from apache_beam.runners.dataflow.internal.names import PropertyNames from apache_beam.transforms import cy_combiners from apache_beam.transforms.display import DisplayData from apache_beam.utils import retry -from apache_beam.utils.names import PropertyNames from apache_beam.utils.pipeline_options import DebugOptions from apache_beam.utils.pipeline_options import GoogleCloudOptions from apache_beam.utils.pipeline_options import StandardOptions http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/runners/dataflow/internal/dependency.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/internal/dependency.py b/sdks/python/apache_beam/runners/dataflow/internal/dependency.py index 97998c9..4cd6895 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/dependency.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/dependency.py @@ -64,7 +64,7 @@ import tempfile from apache_beam import utils from apache_beam import version as beam_version from apache_beam.internal import pickler -from apache_beam.utils import names +from apache_beam.runners.dataflow.internal import names from apache_beam.utils import processes from apache_beam.utils.pipeline_options import GoogleCloudOptions from apache_beam.utils.pipeline_options import SetupOptions http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/runners/dataflow/internal/dependency_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/internal/dependency_test.py b/sdks/python/apache_beam/runners/dataflow/internal/dependency_test.py index 0657a07..545bcd6 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/dependency_test.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/dependency_test.py @@ -25,7 +25,7 @@ import unittest from apache_beam import utils from apache_beam.runners.dataflow.internal import dependency -from apache_beam.utils import names +from apache_beam.runners.dataflow.internal import names from apache_beam.utils.pipeline_options import GoogleCloudOptions from apache_beam.utils.pipeline_options import PipelineOptions from apache_beam.utils.pipeline_options import SetupOptions http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/runners/dataflow/internal/names.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py new file mode 100644 index 0000000..182f27e --- /dev/null +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -0,0 +1,82 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Various names for properties, transforms, etc.""" + + +# Standard file names used for staging files. +PICKLED_MAIN_SESSION_FILE = 'pickled_main_session' +DATAFLOW_SDK_TARBALL_FILE = 'dataflow_python_sdk.tar' + +# String constants related to sources framework +SOURCE_FORMAT = 'custom_source' +SOURCE_TYPE = 'CustomSourcesType' +SERIALIZED_SOURCE_KEY = 'serialized_source' + + +class TransformNames(object): + """Transform strings as they are expected in the CloudWorkflow protos.""" + COLLECTION_TO_SINGLETON = 'CollectionToSingleton' + COMBINE = 'CombineValues' + CREATE_PCOLLECTION = 'CreateCollection' + DO = 'ParallelDo' + FLATTEN = 'Flatten' + GROUP = 'GroupByKey' + READ = 'ParallelRead' + WRITE = 'ParallelWrite' + + +class PropertyNames(object): + """Property strings as they are expected in the CloudWorkflow protos.""" + BIGQUERY_CREATE_DISPOSITION = 'create_disposition' + BIGQUERY_DATASET = 'dataset' + BIGQUERY_QUERY = 'bigquery_query' + BIGQUERY_USE_LEGACY_SQL = 'bigquery_use_legacy_sql' + BIGQUERY_FLATTEN_RESULTS = 'bigquery_flatten_results' + BIGQUERY_EXPORT_FORMAT = 'bigquery_export_format' + BIGQUERY_TABLE = 'table' + BIGQUERY_PROJECT = 'project' + BIGQUERY_SCHEMA = 'schema' + BIGQUERY_WRITE_DISPOSITION = 'write_disposition' + DISPLAY_DATA = 'display_data' + ELEMENT = 'element' + ELEMENTS = 'elements' + ENCODING = 'encoding' + FILE_PATTERN = 'filepattern' + FILE_NAME_PREFIX = 'filename_prefix' + FILE_NAME_SUFFIX = 'filename_suffix' + FORMAT = 'format' + INPUTS = 'inputs' + NON_PARALLEL_INPUTS = 'non_parallel_inputs' + NUM_SHARDS = 'num_shards' + OUT = 'out' + OUTPUT = 'output' + OUTPUT_INFO = 'output_info' + OUTPUT_NAME = 'output_name' + PARALLEL_INPUT = 'parallel_input' + PUBSUB_TOPIC = 'pubsub_topic' + PUBSUB_SUBSCRIPTION = 'pubsub_subscription' + PUBSUB_ID_LABEL = 'pubsub_id_label' + SERIALIZED_FN = 'serialized_fn' + SHARD_NAME_TEMPLATE = 'shard_template' + SOURCE_STEP_INPUT = 'custom_source_step_input' + STEP_NAME = 'step_name' + USER_FN = 'user_fn' + USER_NAME = 'user_name' + VALIDATE_SINK = 'validate_sink' + VALIDATE_SOURCE = 'validate_source' + VALUE = 'value' http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/apache_beam/utils/names.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/utils/names.py b/sdks/python/apache_beam/utils/names.py deleted file mode 100644 index 182f27e..0000000 --- a/sdks/python/apache_beam/utils/names.py +++ /dev/null @@ -1,82 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Various names for properties, transforms, etc.""" - - -# Standard file names used for staging files. -PICKLED_MAIN_SESSION_FILE = 'pickled_main_session' -DATAFLOW_SDK_TARBALL_FILE = 'dataflow_python_sdk.tar' - -# String constants related to sources framework -SOURCE_FORMAT = 'custom_source' -SOURCE_TYPE = 'CustomSourcesType' -SERIALIZED_SOURCE_KEY = 'serialized_source' - - -class TransformNames(object): - """Transform strings as they are expected in the CloudWorkflow protos.""" - COLLECTION_TO_SINGLETON = 'CollectionToSingleton' - COMBINE = 'CombineValues' - CREATE_PCOLLECTION = 'CreateCollection' - DO = 'ParallelDo' - FLATTEN = 'Flatten' - GROUP = 'GroupByKey' - READ = 'ParallelRead' - WRITE = 'ParallelWrite' - - -class PropertyNames(object): - """Property strings as they are expected in the CloudWorkflow protos.""" - BIGQUERY_CREATE_DISPOSITION = 'create_disposition' - BIGQUERY_DATASET = 'dataset' - BIGQUERY_QUERY = 'bigquery_query' - BIGQUERY_USE_LEGACY_SQL = 'bigquery_use_legacy_sql' - BIGQUERY_FLATTEN_RESULTS = 'bigquery_flatten_results' - BIGQUERY_EXPORT_FORMAT = 'bigquery_export_format' - BIGQUERY_TABLE = 'table' - BIGQUERY_PROJECT = 'project' - BIGQUERY_SCHEMA = 'schema' - BIGQUERY_WRITE_DISPOSITION = 'write_disposition' - DISPLAY_DATA = 'display_data' - ELEMENT = 'element' - ELEMENTS = 'elements' - ENCODING = 'encoding' - FILE_PATTERN = 'filepattern' - FILE_NAME_PREFIX = 'filename_prefix' - FILE_NAME_SUFFIX = 'filename_suffix' - FORMAT = 'format' - INPUTS = 'inputs' - NON_PARALLEL_INPUTS = 'non_parallel_inputs' - NUM_SHARDS = 'num_shards' - OUT = 'out' - OUTPUT = 'output' - OUTPUT_INFO = 'output_info' - OUTPUT_NAME = 'output_name' - PARALLEL_INPUT = 'parallel_input' - PUBSUB_TOPIC = 'pubsub_topic' - PUBSUB_SUBSCRIPTION = 'pubsub_subscription' - PUBSUB_ID_LABEL = 'pubsub_id_label' - SERIALIZED_FN = 'serialized_fn' - SHARD_NAME_TEMPLATE = 'shard_template' - SOURCE_STEP_INPUT = 'custom_source_step_input' - STEP_NAME = 'step_name' - USER_FN = 'user_fn' - USER_NAME = 'user_name' - VALIDATE_SINK = 'validate_sink' - VALIDATE_SOURCE = 'validate_source' - VALUE = 'value' http://git-wip-us.apache.org/repos/asf/beam/blob/1512714d/sdks/python/generate_pydoc.sh ---------------------------------------------------------------------- diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh index 6dfda4d..1fe7ddf 100755 --- a/sdks/python/generate_pydoc.sh +++ b/sdks/python/generate_pydoc.sh @@ -26,6 +26,9 @@ set -e # Create docs directory if it does not exist +mkdir -p target/docs +rm -rf target/docs/* + mkdir -p target/docs/source # Exclude autogenerated API message definition files that aren't part of SDK.
