Adding documentation. Setting Python classes as STRING types.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/bfcff012 Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/bfcff012 Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/bfcff012 Branch: refs/heads/python-sdk Commit: bfcff012694b3049b895f1c9464ce9324c3758d1 Parents: d864d96 Author: Pablo <pabl...@google.com> Authored: Tue Oct 18 14:49:33 2016 -0700 Committer: Robert Bradshaw <rober...@google.com> Committed: Thu Oct 27 10:39:32 2016 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/runners/runner_test.py | 2 +- sdks/python/apache_beam/transforms/display.py | 113 ++++++++++++++++++- .../apache_beam/transforms/display_test.py | 2 +- 3 files changed, 109 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfcff012/sdks/python/apache_beam/runners/runner_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/runner_test.py b/sdks/python/apache_beam/runners/runner_test.py index 19160c3..891f9dc 100644 --- a/sdks/python/apache_beam/runners/runner_test.py +++ b/sdks/python/apache_beam/runners/runner_test.py @@ -111,7 +111,7 @@ class RunnerTest(unittest.TestCase): expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo', 'value': DisplayDataItem._format_value(now, 'TIMESTAMP'), 'key': 'a_time'}, - {'type': 'JAVA_CLASS', 'namespace': nspace+'SpecialParDo', + {'type': 'STRING', 'namespace': nspace+'SpecialParDo', 'value': nspace+'SpecialParDo', 'key': 'a_class', 'shortValue': 'SpecialParDo'}, {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn', http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfcff012/sdks/python/apache_beam/transforms/display.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py index 87d3046..372cbf5 100644 --- a/sdks/python/apache_beam/transforms/display.py +++ b/sdks/python/apache_beam/transforms/display.py @@ -17,6 +17,19 @@ """ DisplayData, its classes, interfaces and methods. + +The classes in this module allow users and transform developers to define +static display data to be displayed when a pipeline runs. PTransforms, DoFns +and other pipeline components are subclasses of the HasDisplayData mixin. To +add static display data to a component, you can override the display_data +method of the HasDisplayData class. + +Available classes: +- HasDisplayData - Components that inherit from this class can have static + display data shown in the UI. +- DisplayDataItem - This class represents static display data elements. +- DisplayData - Internal class that is used to create display data and + communicate it to the API. """ from __future__ import absolute_import @@ -30,11 +43,30 @@ __all__ = ['HasDisplayData', 'DisplayDataItem', 'DisplayData'] class HasDisplayData(object): - """ Basic interface for elements that contain display data. + """ Basic mixin for elements that contain display data. - It contains only the display_data method and a namespace method. + It implements only the display_data method and a _namespace method. """ + def display_data(self): + """ Returns the display data associated to a pipeline component. + + It should be reimplemented in pipeline components that wish to have + static display data. + + Returns: + A dictionary containing key:value pairs. The value might be an + integer, float or string value; a DisplayDataItem for values that + have more data (e.g. short value, label, url); or a HasDisplayData + instance that has more display data that should be picked up. For + example: + + { 'key1': 'string_value', + 'key2': 1234, + 'key3': 3.14159265, + 'key4': DisplayDataItem('apache.org', url='http://apache.org'), + 'key5': subComponent } + """ return {} def _namespace(self): @@ -42,12 +74,17 @@ class HasDisplayData(object): class DisplayData(object): + """ Static display data associated with a pipeline component. + """ + def __init__(self, namespace, display_data_dict): self.namespace = namespace self.items = [] self.populate_items(display_data_dict) def populate_items(self, display_data_dict): + """ Populates the list of display data items. + """ for key, element in display_data_dict.items(): if isinstance(element, HasDisplayData): subcomponent_display_data = DisplayData(element._namespace(), @@ -69,10 +106,21 @@ class DisplayData(object): key=key)) def output(self): + """ Returns the JSON-API list of display data items to send to the runner. + """ return [item.get_dict() for item in self.items] @classmethod def create_from(cls, has_display_data): + """ Creates DisplayData from a HasDisplayData instance. + + Returns: + A DisplayData instance with populated items. + + Raises: + ValueError: If the has_display_data argument is not an instance of + HasDisplayData. + """ if not isinstance(has_display_data, HasDisplayData): raise ValueError('Element of class {}.{} does not subclass HasDisplayData' .format(has_display_data.__module__, @@ -81,6 +129,11 @@ class DisplayData(object): class DisplayDataItem(object): + """ A DisplayDataItem represents a unit of static display data. + + Each item is identified by a key and the namespace of the component the + display item belongs to. + """ typeDict = {str:'STRING', int:'INTEGER', float:'FLOAT', @@ -99,6 +152,12 @@ class DisplayDataItem(object): self.label = label def is_valid(self): + """ Checks that all the necessary fields of the DisplayDataItem are + filled in. It checks that neither key, namespace, value or type are None. + + Raises: + ValueError: If the item does not have a key, namespace, value or type. + """ if self.key is None: raise ValueError('Key must not be None') if self.namespace is None: @@ -109,11 +168,22 @@ class DisplayDataItem(object): raise ValueError('Value {} is of an unsupported type.'.format(self.value)) def get_dict(self): + """ Returns the internal-API dictionary representing the DisplayDataItem. + + Returns: + A dictionary. The internal-API dictionary representing the + DisplayDataItem + + Raises: + ValueError: if the item is not valid. + """ self.is_valid() res = {'key': self.key, 'namespace': self.namespace, - 'type': self.type} + 'type': self.type if self.type != 'CLASS' else 'STRING'} + # TODO: Python Class types should not be special-cased once + # the Fn API is in. if self.url is not None: res['url'] = self.url @@ -129,8 +199,17 @@ class DisplayDataItem(object): @classmethod def _format_value(cls, value, type_): + """ Returns the API representation of a value given its type. + + Args: + value: The value of the item that needs to be shortened. + type_(string): The type of the value. + + Returns: + A formatted value in the form of a float, int, or string. + """ res = value - if type_ == 'JAVA_CLASS': + if type_ == 'CLASS': res = '{}.{}'.format(value.__module__, value.__name__) if type_ == 'DURATION': res = value.total_seconds()*1000 @@ -140,13 +219,35 @@ class DisplayDataItem(object): @classmethod def _get_short_value(cls, value, type_): - if type_ == 'JAVA_CLASS': + """ Calculates the short value for an item. + + Args: + value: The value of the item that needs to be shortened. + type_(string): The type of the value. + + Returns: + The unqualified name of a class if type_ is 'CLASS'. None otherwise. + """ + if type_ == 'CLASS': return value.__name__ return None @classmethod def _get_value_type(cls, value): + """ Infers the type of a given value. + + Args: + value: The value whose type needs to be inferred. For 'DURATION' and + 'TIMESTAMP', the corresponding Python type is datetime.timedelta and + datetime.datetime respectively. For Python classes, the API type is + just 'STRING' at the moment. + + Returns: + One of 'STRING', 'INTEGER', 'FLOAT', 'CLASS', 'DURATION', or + 'TIMESTAMP', depending on the type of the value. + """ + #TODO: Fix Args: documentation once the Python classes handling has changed type_ = cls.typeDict.get(type(value)) if type_ is None: - type_ = 'JAVA_CLASS' if inspect.isclass(value) else None + type_ = 'CLASS' if inspect.isclass(value) else None return type_ http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfcff012/sdks/python/apache_beam/transforms/display_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/transforms/display_test.py b/sdks/python/apache_beam/transforms/display_test.py index 227f3bc..906bb8f 100644 --- a/sdks/python/apache_beam/transforms/display_test.py +++ b/sdks/python/apache_beam/transforms/display_test.py @@ -78,7 +78,7 @@ class DisplayDataTest(unittest.TestCase): 'key': 'complex_url', 'type': 'STRING'}, {'type': 'TIMESTAMP', 'namespace': nspace, 'key': 'my_dd', 'value': DisplayDataItem._format_value(now, 'TIMESTAMP')}, - {'type': 'JAVA_CLASS', 'namespace': nspace, + {'type': 'STRING', 'namespace': nspace, 'shortValue': 'HasDisplayData', 'key': 'python_class', 'value': 'apache_beam.transforms.display.HasDisplayData'}, {'type': 'INTEGER', 'namespace': nspace,