derrickaw commented on code in PR #35914:
URL: https://github.com/apache/beam/pull/35914#discussion_r2292581951


##########
sdks/python/apache_beam/yaml/examples/testing/examples_test.py:
##########
@@ -538,8 +557,64 @@
     word = element.split('=')[1].split(',')[0].replace("'", '')
     count = int(element.split('=')[2].replace(')', ''))
     all_words += [word] * count
-  random.shuffle(all_words)
 
+  return _wordcount_random_shuffler(test_spec, all_words, env)
+
+
+@YamlExamplesTestSuite.register_test_preprocessor(
+    ['test_wordCountInclude_yaml'])
+def _wordcount_test_preprocessor(
+    test_spec: dict, expected: List[str], env: TestEnvironment):
+  """
+  Preprocessor for the wordcount Jinja tests.
+
+  This preprocessor generates a random input file based on the expected output
+  of the wordcount example. This allows the test to verify the pipeline's
+  correctness without relying on a fixed input file.
+
+  Based on this expected output: #  Row(output='king - 311')
+
+  Args:
+    test_spec: The dictionary representation of the YAML pipeline 
specification.
+    expected: A list of strings representing the expected output of the
+      pipeline.
+    env: The TestEnvironment object providing utilities for creating temporary
+      files.
+
+  Returns:
+    The modified test_spec dictionary with the input file path replaced.
+  """
+  all_words = []
+  for element in expected:
+    word = element.split('=')[1].split(' - ')[0].replace("'", '')
+    count = int(element.split('=')[1].split(' - ')[1].replace("')", ''))
+    all_words += [word] * count

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to