This is an automated email from the ASF dual-hosted git repository.

robertwb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 4208b86fdd1 [YAML] Combine docstrings for different language variants 
of the same transform. (#30708)
4208b86fdd1 is described below

commit 4208b86fdd117ed875973f4639293dc574ab15bb
Author: Robert Bradshaw <[email protected]>
AuthorDate: Fri Mar 22 15:31:50 2024 -0700

    [YAML] Combine docstrings for different language variants of the same 
transform. (#30708)
---
 sdks/python/apache_beam/yaml/generate_yaml_docs.py | 114 ++++++++++++---------
 1 file changed, 68 insertions(+), 46 deletions(-)

diff --git a/sdks/python/apache_beam/yaml/generate_yaml_docs.py 
b/sdks/python/apache_beam/yaml/generate_yaml_docs.py
index 6ff4063088c..b11062cce4d 100644
--- a/sdks/python/apache_beam/yaml/generate_yaml_docs.py
+++ b/sdks/python/apache_beam/yaml/generate_yaml_docs.py
@@ -17,6 +17,7 @@
 
 import argparse
 import io
+import itertools
 import re
 
 import yaml
@@ -66,8 +67,8 @@ def _fake_row(schema):
   return {f.name: _fake_value(f.name, f.type) for f in schema.fields}
 
 
-def pretty_example(provider, t):
-  spec = {'type': t}
+def pretty_example(provider, t, base_t=None):
+  spec = {'type': base_t or t}
   try:
     requires_inputs = provider.requires_inputs(t, {})
   except Exception:
@@ -150,22 +151,33 @@ SKIP = [
 ]
 
 
-def transform_docs(t, providers):
+def transform_docs(transform_base, transforms, providers, extra_docs=''):
   return '\n'.join([
-      f'## {t}',
+      f'## {transform_base}',
       '',
-      longest(lambda p: p.description(t),
-              providers).replace('::\n', '\n\n    :::yaml\n'),
+      longest(
+          lambda t: longest(lambda p: p.description(t), providers[t]),
+          transforms).replace('::\n', '\n\n    :::yaml\n'),
+      '',
+      extra_docs,
       '',
       '### Configuration',
       '',
-      longest(lambda p: config_docs(p.config_schema(t)), providers),
+      longest(
+          lambda t: longest(
+              lambda p: config_docs(p.config_schema(t)), providers[t]),
+          transforms),
       '',
       '### Usage',
       '',
       '    :::yaml',
       '',
-      indent(longest(lambda p: pretty_example(p, t), providers), 4),
+      indent(
+          longest(
+              lambda t: longest(
+                  lambda p: pretty_example(p, t, transform_base), 
providers[t]),
+              transforms),
+          4),
   ])
 
 
@@ -175,53 +187,63 @@ def main():
   parser.add_argument('--html_file')
   parser.add_argument('--schema_file')
   parser.add_argument('--include', default='.*')
-  parser.add_argument(
-      '--exclude', default='(Combine)|(Filter)|(MapToFields)-.*')
+  parser.add_argument('--exclude', default='')
   options = parser.parse_args()
   include = re.compile(options.include).match
-  exclude = re.compile(options.exclude).match
+  exclude = (
+      re.compile(options.exclude).match if options.exclude else lambda _: 
False)
 
   with subprocess_server.SubprocessServer.cache_subprocesses():
     json_config_schemas = []
     markdown_out = io.StringIO()
     providers = yaml_provider.standard_providers()
-    for transform in sorted(providers.keys(), key=io_grouping_key):
-      if include(transform) and not exclude(transform):
-        print(transform)
-        if options.markdown_file:
-          markdown_out.write(transform_docs(transform, providers[transform]))
+    for transform_base, transforms in itertools.groupby(
+        sorted(providers.keys(), key=io_grouping_key),
+        key=lambda s: s.split('-')[0]):
+      transforms = list(transforms)
+      if include(transform_base) and not exclude(transform_base):
+        print(transform_base)
+        if options.markdown_file or options.html_file:
+          if '-' in transforms[0]:
+            extra_docs = 'Supported languages: ' + ', '.join(
+                t.split('-')[-1] for t in sorted(transforms))
+          else:
+            extra_docs = ''
+          markdown_out.write(
+              transform_docs(transform_base, transforms, providers, 
extra_docs))
           markdown_out.write('\n\n')
         if options.schema_file:
-          schema = providers[transform][0].config_schema(transform)
-          if schema:
-            json_config_schemas.append({
-                'if': {
-                    'properties': {
-                        'type': {
-                            'const': transform
-                        }
-                    }
-                },
-                'then': {
-                    'properties': {
-                        'config': {
-                            'type': 'object',
-                            'properties': {
-                                '__line__': {
-                                    'type': 'integer'
-                                },
-                                '__uuid__': {},
-                                **{
-                                    f.name: json_utils.beam_type_to_json_type(
-                                        f.type)
-                                    for f in schema.fields
-                                }
-                            },
-                            'additionalProperties': False,
-                        }
-                    }
-                }
-            })
+          for transform in transforms:
+            schema = providers[transform][0].config_schema(transform)
+            if schema:
+              json_config_schemas.append({
+                  'if': {
+                      'properties': {
+                          'type': {
+                              'const': transform
+                          }
+                      }
+                  },
+                  'then': {
+                      'properties': {
+                          'config': {
+                              'type': 'object',
+                              'properties': {
+                                  '__line__': {
+                                      'type': 'integer'
+                                  },
+                                  '__uuid__': {},
+                                  **{
+                                      f.name:  #
+                                      json_utils.beam_type_to_json_type(f.type)
+                                      for f in schema.fields
+                                  }
+                              },
+                              'additionalProperties': False,
+                          }
+                      }
+                  }
+              })
 
     if options.schema_file:
       with open(options.schema_file, 'w') as fout:

Reply via email to