This is an automated email from the ASF dual-hosted git repository.
robertwb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 4208b86fdd1 [YAML] Combine docstrings for different language variants
of the same transform. (#30708)
4208b86fdd1 is described below
commit 4208b86fdd117ed875973f4639293dc574ab15bb
Author: Robert Bradshaw <[email protected]>
AuthorDate: Fri Mar 22 15:31:50 2024 -0700
[YAML] Combine docstrings for different language variants of the same
transform. (#30708)
---
sdks/python/apache_beam/yaml/generate_yaml_docs.py | 114 ++++++++++++---------
1 file changed, 68 insertions(+), 46 deletions(-)
diff --git a/sdks/python/apache_beam/yaml/generate_yaml_docs.py
b/sdks/python/apache_beam/yaml/generate_yaml_docs.py
index 6ff4063088c..b11062cce4d 100644
--- a/sdks/python/apache_beam/yaml/generate_yaml_docs.py
+++ b/sdks/python/apache_beam/yaml/generate_yaml_docs.py
@@ -17,6 +17,7 @@
import argparse
import io
+import itertools
import re
import yaml
@@ -66,8 +67,8 @@ def _fake_row(schema):
return {f.name: _fake_value(f.name, f.type) for f in schema.fields}
-def pretty_example(provider, t):
- spec = {'type': t}
+def pretty_example(provider, t, base_t=None):
+ spec = {'type': base_t or t}
try:
requires_inputs = provider.requires_inputs(t, {})
except Exception:
@@ -150,22 +151,33 @@ SKIP = [
]
-def transform_docs(t, providers):
+def transform_docs(transform_base, transforms, providers, extra_docs=''):
return '\n'.join([
- f'## {t}',
+ f'## {transform_base}',
'',
- longest(lambda p: p.description(t),
- providers).replace('::\n', '\n\n :::yaml\n'),
+ longest(
+ lambda t: longest(lambda p: p.description(t), providers[t]),
+ transforms).replace('::\n', '\n\n :::yaml\n'),
+ '',
+ extra_docs,
'',
'### Configuration',
'',
- longest(lambda p: config_docs(p.config_schema(t)), providers),
+ longest(
+ lambda t: longest(
+ lambda p: config_docs(p.config_schema(t)), providers[t]),
+ transforms),
'',
'### Usage',
'',
' :::yaml',
'',
- indent(longest(lambda p: pretty_example(p, t), providers), 4),
+ indent(
+ longest(
+ lambda t: longest(
+ lambda p: pretty_example(p, t, transform_base),
providers[t]),
+ transforms),
+ 4),
])
@@ -175,53 +187,63 @@ def main():
parser.add_argument('--html_file')
parser.add_argument('--schema_file')
parser.add_argument('--include', default='.*')
- parser.add_argument(
- '--exclude', default='(Combine)|(Filter)|(MapToFields)-.*')
+ parser.add_argument('--exclude', default='')
options = parser.parse_args()
include = re.compile(options.include).match
- exclude = re.compile(options.exclude).match
+ exclude = (
+ re.compile(options.exclude).match if options.exclude else lambda _:
False)
with subprocess_server.SubprocessServer.cache_subprocesses():
json_config_schemas = []
markdown_out = io.StringIO()
providers = yaml_provider.standard_providers()
- for transform in sorted(providers.keys(), key=io_grouping_key):
- if include(transform) and not exclude(transform):
- print(transform)
- if options.markdown_file:
- markdown_out.write(transform_docs(transform, providers[transform]))
+ for transform_base, transforms in itertools.groupby(
+ sorted(providers.keys(), key=io_grouping_key),
+ key=lambda s: s.split('-')[0]):
+ transforms = list(transforms)
+ if include(transform_base) and not exclude(transform_base):
+ print(transform_base)
+ if options.markdown_file or options.html_file:
+ if '-' in transforms[0]:
+ extra_docs = 'Supported languages: ' + ', '.join(
+ t.split('-')[-1] for t in sorted(transforms))
+ else:
+ extra_docs = ''
+ markdown_out.write(
+ transform_docs(transform_base, transforms, providers,
extra_docs))
markdown_out.write('\n\n')
if options.schema_file:
- schema = providers[transform][0].config_schema(transform)
- if schema:
- json_config_schemas.append({
- 'if': {
- 'properties': {
- 'type': {
- 'const': transform
- }
- }
- },
- 'then': {
- 'properties': {
- 'config': {
- 'type': 'object',
- 'properties': {
- '__line__': {
- 'type': 'integer'
- },
- '__uuid__': {},
- **{
- f.name: json_utils.beam_type_to_json_type(
- f.type)
- for f in schema.fields
- }
- },
- 'additionalProperties': False,
- }
- }
- }
- })
+ for transform in transforms:
+ schema = providers[transform][0].config_schema(transform)
+ if schema:
+ json_config_schemas.append({
+ 'if': {
+ 'properties': {
+ 'type': {
+ 'const': transform
+ }
+ }
+ },
+ 'then': {
+ 'properties': {
+ 'config': {
+ 'type': 'object',
+ 'properties': {
+ '__line__': {
+ 'type': 'integer'
+ },
+ '__uuid__': {},
+ **{
+ f.name: #
+ json_utils.beam_type_to_json_type(f.type)
+ for f in schema.fields
+ }
+ },
+ 'additionalProperties': False,
+ }
+ }
+ }
+ })
if options.schema_file:
with open(options.schema_file, 'w') as fout: