derrickaw commented on code in PR #35914: URL: https://github.com/apache/beam/pull/35914#discussion_r2292581951
########## sdks/python/apache_beam/yaml/examples/testing/examples_test.py: ########## @@ -538,8 +557,64 @@ word = element.split('=')[1].split(',')[0].replace("'", '') count = int(element.split('=')[2].replace(')', '')) all_words += [word] * count - random.shuffle(all_words) + return _wordcount_random_shuffler(test_spec, all_words, env) + + +@YamlExamplesTestSuite.register_test_preprocessor( + ['test_wordCountInclude_yaml']) +def _wordcount_test_preprocessor( + test_spec: dict, expected: List[str], env: TestEnvironment): + """ + Preprocessor for the wordcount Jinja tests. + + This preprocessor generates a random input file based on the expected output + of the wordcount example. This allows the test to verify the pipeline's + correctness without relying on a fixed input file. + + Based on this expected output: # Row(output='king - 311') + + Args: + test_spec: The dictionary representation of the YAML pipeline specification. + expected: A list of strings representing the expected output of the + pipeline. + env: The TestEnvironment object providing utilities for creating temporary + files. + + Returns: + The modified test_spec dictionary with the input file path replaced. + """ + all_words = [] + for element in expected: + word = element.split('=')[1].split(' - ')[0].replace("'", '') + count = int(element.split('=')[1].split(' - ')[1].replace("')", '')) + all_words += [word] * count Review Comment: Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org