Repository: beam Updated Branches: refs/heads/master 843b663cf -> cfc0a9955
[BEAM-1694] Fix docstring inaccuracies in Python-SDK Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/257d0d25 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/257d0d25 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/257d0d25 Branch: refs/heads/master Commit: 257d0d255b1e2d877c48cca6c3c2bb193e589488 Parents: 843b663 Author: Tibor Kiss <[email protected]> Authored: Sat Mar 11 07:13:04 2017 +0100 Committer: Ahmet Altay <[email protected]> Committed: Tue Mar 14 12:22:32 2017 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/coders/typecoders.py | 2 +- sdks/python/apache_beam/io/avroio.py | 9 +++++---- sdks/python/apache_beam/io/gcp/gcsio.py | 4 +++- sdks/python/apache_beam/pipeline.py | 1 + sdks/python/apache_beam/transforms/core.py | 2 +- sdks/python/apache_beam/typehints/trivial_inference.py | 4 +++- sdks/python/apache_beam/utils/counters.py | 5 ++--- sdks/python/apache_beam/utils/retry.py | 6 +++--- 8 files changed, 19 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/coders/typecoders.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py index 767d791..1bd4312 100644 --- a/sdks/python/apache_beam/coders/typecoders.py +++ b/sdks/python/apache_beam/coders/typecoders.py @@ -160,7 +160,7 @@ class CoderRegistry(object): class FirstOf(object): - "A class used to get the first matching coder from a list of coders." + """A class used to get the first matching coder from a list of coders.""" def __init__(self, coders): self._coders = coders http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/io/avroio.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py index ab98530..6fdd798 100644 --- a/sdks/python/apache_beam/io/avroio.py +++ b/sdks/python/apache_beam/io/avroio.py @@ -67,13 +67,11 @@ class ReadFromAvro(PTransform): {u'name': u'Alyssa', u'favorite_number': 256, u'favorite_color': None}). Args: - label: label of the PTransform. file_pattern: the set of files to be read. min_bundle_size: the minimum size in bytes, to be considered when splitting the input into bundles. validate: flag to verify that the files exist during the pipeline creation time. - **kwargs: Additional keyword arguments to be passed to the base class. """ super(ReadFromAvro, self).__init__() self._source = _AvroSource(file_pattern, min_bundle_size, validate=validate) @@ -129,6 +127,11 @@ class _AvroUtils(object): Args: f: Avro file to read. + codec: The codec to use for block-level decompression. + Supported codecs: 'null', 'deflate', 'snappy' + schema: Avro Schema definition represented as JSON string. + expected_sync_marker: Avro synchronization marker. If the block's sync + marker does not match with this parameter then ValueError is thrown. Returns: A single _AvroBlock. @@ -302,8 +305,6 @@ class WriteToAvro(beam.transforms.PTransform): codec: The codec to use for block-level compression. Any string supported by the Avro specification is accepted (for example 'null'). file_name_suffix: Suffix for the files written. - append_trailing_newlines: indicate whether this sink should write an - additional newline char after writing each element. num_shards: The number of files (shards) used for output. If not set, the service will decide on the optimal number of shards. Constraining the number of shards is likely to reduce http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/io/gcp/gcsio.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index cf00bb2..020c38f 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -155,6 +155,8 @@ class GcsIO(object): Args: pattern: GCS file path pattern in the form gs://<bucket>/<name_pattern>. + limit: Maximal number of path names to return. + All matching paths are returned if set to None. Returns: list of GCS file paths matching the given pattern. @@ -370,7 +372,7 @@ class GcsIO(object): """Returns the size of all the files in the glob as a dictionary Args: - path: a file path pattern that reads the size of all the files + pattern: a file path pattern that reads the size of all the files """ bucket, name_pattern = parse_gcs_path(pattern) # Get the prefix with which we can list objects in the given bucket. http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/pipeline.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py index 7db39a9..dc05bd3 100644 --- a/sdks/python/apache_beam/pipeline.py +++ b/sdks/python/apache_beam/pipeline.py @@ -191,6 +191,7 @@ class Pipeline(object): Args: transform: the PTranform to apply. pvalueish: the input for the PTransform (typically a PCollection). + label: label of the PTransform. Raises: TypeError: if the transform object extracted from the argument list is http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/transforms/core.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index 3251671..7a52828 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -521,7 +521,7 @@ class PartitionFn(WithTypeHints): def default_label(self): return self.__class__.__name__ - def partition_for(self, context, num_partitions, *args, **kwargs): + def partition_for(self, element, num_partitions, *args, **kwargs): """Specify which partition will receive this element. Args: http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/typehints/trivial_inference.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index e1fbc42..b611103 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -218,9 +218,10 @@ def infer_return_type(c, input_types, debug=False, depth=5): """Analyses a callable to deduce its return type. Args: - f: A Python function object to infer the return type of. + c: A Python callable to infer the return type of. input_types: A sequence of inputs corresponding to the input types. debug: Whether to print verbose debugging information. + depth: Maximum inspection depth during type inference. Returns: A TypeConstraint that that the return value of this function will (likely) @@ -268,6 +269,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): f: A Python function object to infer the return type of. input_types: A sequence of inputs corresponding to the input types. debug: Whether to print verbose debugging information. + depth: Maximum inspection depth during type inference. Returns: A TypeConstraint that that the return value of this function will (likely) http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/utils/counters.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/utils/counters.py b/sdks/python/apache_beam/utils/counters.py index f6b432a..e41d732 100644 --- a/sdks/python/apache_beam/utils/counters.py +++ b/sdks/python/apache_beam/utils/counters.py @@ -37,9 +37,8 @@ class Counter(object): Attributes: name: the name of the counter, a string - aggregation_kind: one of the aggregation kinds defined by this class. - total: the total size of all the items passed to update() - elements: the number of times update() was called + combine_fn: the CombineFn to use for aggregation + accumulator: the accumulator created for the combine_fn """ # Handy references to common counters. http://git-wip-us.apache.org/repos/asf/beam/blob/257d0d25/sdks/python/apache_beam/utils/retry.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/utils/retry.py b/sdks/python/apache_beam/utils/retry.py index 05973c5..8f7152a 100644 --- a/sdks/python/apache_beam/utils/retry.py +++ b/sdks/python/apache_beam/utils/retry.py @@ -59,7 +59,7 @@ class FuzzedExponentialIntervals(object): fuzz: A value between 0 and 1, indicating the fraction of fuzz. For a given delay d, the fuzzed delay is randomly chosen between [(1 - fuzz) * d, d]. - max_delay_sec: Maximum delay (in seconds). After this limit is reached, + max_delay_secs: Maximum delay (in seconds). After this limit is reached, further tries use max_delay_sec instead of exponentially increasing the time. Defaults to 1 hour. """ @@ -143,9 +143,9 @@ def with_exponential_backoff( can be used so that the delays are not randomized. factor: The exponential factor to use on subsequent retries. Default is 2 (doubling). - max_delay_sec: Maximum delay (in seconds). After this limit is reached, + max_delay_secs: Maximum delay (in seconds). After this limit is reached, further tries use max_delay_sec instead of exponentially increasing - the time. Defaults to 4 hours. + the time. Defaults to 1 hour. Returns: As per Python decorators with arguments pattern returns a decorator
