Adding documentation. Setting Python classes as STRING types.

Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/bfcff012
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/bfcff012
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/bfcff012

Branch: refs/heads/python-sdk
Commit: bfcff012694b3049b895f1c9464ce9324c3758d1
Parents: d864d96
Author: Pablo <pabl...@google.com>
Authored: Tue Oct 18 14:49:33 2016 -0700
Committer: Robert Bradshaw <rober...@google.com>
Committed: Thu Oct 27 10:39:32 2016 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/runners/runner_test.py  |   2 +-
 sdks/python/apache_beam/transforms/display.py   | 113 ++++++++++++++++++-
 .../apache_beam/transforms/display_test.py      |   2 +-
 3 files changed, 109 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfcff012/sdks/python/apache_beam/runners/runner_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/runner_test.py 
b/sdks/python/apache_beam/runners/runner_test.py
index 19160c3..891f9dc 100644
--- a/sdks/python/apache_beam/runners/runner_test.py
+++ b/sdks/python/apache_beam/runners/runner_test.py
@@ -111,7 +111,7 @@ class RunnerTest(unittest.TestCase):
     expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo',
                       'value': DisplayDataItem._format_value(now, 'TIMESTAMP'),
                       'key': 'a_time'},
-                     {'type': 'JAVA_CLASS', 'namespace': nspace+'SpecialParDo',
+                     {'type': 'STRING', 'namespace': nspace+'SpecialParDo',
                       'value': nspace+'SpecialParDo', 'key': 'a_class',
                       'shortValue': 'SpecialParDo'},
                      {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn',

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfcff012/sdks/python/apache_beam/transforms/display.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/display.py 
b/sdks/python/apache_beam/transforms/display.py
index 87d3046..372cbf5 100644
--- a/sdks/python/apache_beam/transforms/display.py
+++ b/sdks/python/apache_beam/transforms/display.py
@@ -17,6 +17,19 @@
 
 """
 DisplayData, its classes, interfaces and methods.
+
+The classes in this module allow users and transform developers to define
+static display data to be displayed when a pipeline runs. PTransforms, DoFns
+and other pipeline components are subclasses of the HasDisplayData mixin. To
+add static display data to a component, you can override the display_data
+method of the HasDisplayData class.
+
+Available classes:
+- HasDisplayData - Components that inherit from this class can have static
+    display data shown in the UI.
+- DisplayDataItem - This class represents static display data elements.
+- DisplayData - Internal class that is used to create display data and
+    communicate it to the API.
 """
 
 from __future__ import absolute_import
@@ -30,11 +43,30 @@ __all__ = ['HasDisplayData', 'DisplayDataItem', 
'DisplayData']
 
 
 class HasDisplayData(object):
-  """ Basic interface for elements that contain display data.
+  """ Basic mixin for elements that contain display data.
 
-  It contains only the display_data method and a namespace method.
+  It implements only the display_data method and a _namespace method.
   """
+
   def display_data(self):
+    """ Returns the display data associated to a pipeline component.
+
+    It should be reimplemented in pipeline components that wish to have
+    static display data.
+
+    Returns:
+      A dictionary containing key:value pairs. The value might be an
+      integer, float or string value; a DisplayDataItem for values that
+      have more data (e.g. short value, label, url); or a HasDisplayData
+      instance that has more display data that should be picked up. For
+      example:
+
+      { 'key1': 'string_value',
+        'key2': 1234,
+        'key3': 3.14159265,
+        'key4': DisplayDataItem('apache.org', url='http://apache.org'),
+        'key5': subComponent }
+    """
     return {}
 
   def _namespace(self):
@@ -42,12 +74,17 @@ class HasDisplayData(object):
 
 
 class DisplayData(object):
+  """ Static display data associated with a pipeline component.
+  """
+
   def __init__(self, namespace, display_data_dict):
     self.namespace = namespace
     self.items = []
     self.populate_items(display_data_dict)
 
   def populate_items(self, display_data_dict):
+    """ Populates the list of display data items.
+    """
     for key, element in display_data_dict.items():
       if isinstance(element, HasDisplayData):
         subcomponent_display_data = DisplayData(element._namespace(),
@@ -69,10 +106,21 @@ class DisplayData(object):
                           key=key))
 
   def output(self):
+    """ Returns the JSON-API list of display data items to send to the runner.
+    """
     return [item.get_dict() for item in self.items]
 
   @classmethod
   def create_from(cls, has_display_data):
+    """ Creates DisplayData from a HasDisplayData instance.
+
+    Returns:
+      A DisplayData instance with populated items.
+
+    Raises:
+      ValueError: If the has_display_data argument is not an instance of
+        HasDisplayData.
+    """
     if not isinstance(has_display_data, HasDisplayData):
       raise ValueError('Element of class {}.{} does not subclass 
HasDisplayData'
                        .format(has_display_data.__module__,
@@ -81,6 +129,11 @@ class DisplayData(object):
 
 
 class DisplayDataItem(object):
+  """ A DisplayDataItem represents a unit of static display data.
+
+  Each item is identified by a key and the namespace of the component the
+  display item belongs to.
+  """
   typeDict = {str:'STRING',
               int:'INTEGER',
               float:'FLOAT',
@@ -99,6 +152,12 @@ class DisplayDataItem(object):
     self.label = label
 
   def is_valid(self):
+    """ Checks that all the necessary fields of the DisplayDataItem are
+    filled in. It checks that neither key, namespace, value or type are None.
+
+    Raises:
+      ValueError: If the item does not have a key, namespace, value or type.
+    """
     if self.key is None:
       raise ValueError('Key must not be None')
     if self.namespace is None:
@@ -109,11 +168,22 @@ class DisplayDataItem(object):
       raise ValueError('Value {} is of an unsupported 
type.'.format(self.value))
 
   def get_dict(self):
+    """ Returns the internal-API dictionary representing the DisplayDataItem.
+
+    Returns:
+      A dictionary. The internal-API dictionary representing the
+      DisplayDataItem
+
+    Raises:
+     ValueError: if the item is not valid.
+    """
     self.is_valid()
 
     res = {'key': self.key,
            'namespace': self.namespace,
-           'type': self.type}
+           'type': self.type if self.type != 'CLASS' else 'STRING'}
+    # TODO: Python Class types should not be special-cased once
+    # the Fn API is in.
 
     if self.url is not None:
       res['url'] = self.url
@@ -129,8 +199,17 @@ class DisplayDataItem(object):
 
   @classmethod
   def _format_value(cls, value, type_):
+    """ Returns the API representation of a value given its type.
+
+    Args:
+      value: The value of the item that needs to be shortened.
+      type_(string): The type of the value.
+
+    Returns:
+      A formatted value in the form of a float, int, or string.
+    """
     res = value
-    if type_ == 'JAVA_CLASS':
+    if type_ == 'CLASS':
       res = '{}.{}'.format(value.__module__, value.__name__)
     if type_ == 'DURATION':
       res = value.total_seconds()*1000
@@ -140,13 +219,35 @@ class DisplayDataItem(object):
 
   @classmethod
   def _get_short_value(cls, value, type_):
-    if type_ == 'JAVA_CLASS':
+    """ Calculates the short value for an item.
+
+    Args:
+      value: The value of the item that needs to be shortened.
+      type_(string): The type of the value.
+
+    Returns:
+      The unqualified name of a class if type_ is 'CLASS'. None otherwise.
+    """
+    if type_ == 'CLASS':
       return value.__name__
     return None
 
   @classmethod
   def _get_value_type(cls, value):
+    """ Infers the type of a given value.
+
+    Args:
+      value: The value whose type needs to be inferred. For 'DURATION' and
+        'TIMESTAMP', the corresponding Python type is datetime.timedelta and
+        datetime.datetime respectively. For Python classes, the API type is
+        just 'STRING' at the moment.
+
+    Returns:
+      One of 'STRING', 'INTEGER', 'FLOAT', 'CLASS', 'DURATION', or
+      'TIMESTAMP', depending on the type of the value.
+    """
+    #TODO: Fix Args: documentation once the Python classes handling has changed
     type_ = cls.typeDict.get(type(value))
     if type_ is None:
-      type_ = 'JAVA_CLASS' if inspect.isclass(value) else None
+      type_ = 'CLASS' if inspect.isclass(value) else None
     return type_

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfcff012/sdks/python/apache_beam/transforms/display_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/display_test.py 
b/sdks/python/apache_beam/transforms/display_test.py
index 227f3bc..906bb8f 100644
--- a/sdks/python/apache_beam/transforms/display_test.py
+++ b/sdks/python/apache_beam/transforms/display_test.py
@@ -78,7 +78,7 @@ class DisplayDataTest(unittest.TestCase):
          'key': 'complex_url', 'type': 'STRING'},
         {'type': 'TIMESTAMP', 'namespace': nspace, 'key': 'my_dd',
          'value': DisplayDataItem._format_value(now, 'TIMESTAMP')},
-        {'type': 'JAVA_CLASS', 'namespace': nspace,
+        {'type': 'STRING', 'namespace': nspace,
          'shortValue': 'HasDisplayData', 'key': 'python_class',
          'value': 'apache_beam.transforms.display.HasDisplayData'},
         {'type': 'INTEGER', 'namespace': nspace,

Reply via email to