This is an automated email from the ASF dual-hosted git repository.

jrmccluskey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 3c3e2024d98 add some debug checks in test case and remove an extra 
line (#37237)
3c3e2024d98 is described below

commit 3c3e2024d98bf25b82d0b69f0765f4d444b25864
Author: Derrick Williams <[email protected]>
AuthorDate: Wed Jan 7 10:20:19 2026 -0500

    add some debug checks in test case and remove an extra line (#37237)
---
 sdks/python/apache_beam/dataframe/io.py             |  1 -
 sdks/python/apache_beam/yaml/yaml_transform_test.py | 12 ++++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/sdks/python/apache_beam/dataframe/io.py 
b/sdks/python/apache_beam/dataframe/io.py
index d16191bc4cc..02423f517ee 100644
--- a/sdks/python/apache_beam/dataframe/io.py
+++ b/sdks/python/apache_beam/dataframe/io.py
@@ -793,7 +793,6 @@ class ReadViaPandas(beam.PTransform):
     if format == 'csv':
       kwargs['filename_column'] = filename_column
     self._reader = globals()['read_%s' % format](*args, **kwargs)
-    self._reader = globals()['read_%s' % format](*args, **kwargs)
     self._include_indexes = include_indexes
     self._objects_as_strings = objects_as_strings
     self._filename_column = filename_column
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py 
b/sdks/python/apache_beam/yaml/yaml_transform_test.py
index 89e4dc8b951..5cf2fa00f15 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py
@@ -34,6 +34,8 @@ try:
 except ImportError:
   jsonschema = None
 
+_LOGGER = logging.getLogger(__name__)
+
 
 class CreateTimestamped(beam.PTransform):
   _yaml_requires_inputs = False
@@ -244,6 +246,10 @@ class YamlTransformE2ETest(unittest.TestCase):
       input = os.path.join(tmpdir, 'input.csv')
       output = os.path.join(tmpdir, 'output.json')
       data.to_csv(input, index=False)
+      with open(input, 'r') as f:
+        lines = f.readlines()
+      _LOGGER.debug("input.csv has these {lines} lines.")
+      self.assertEqual(len(lines), len(data) + 1)  # +1 for header
 
       with beam.Pipeline() as p:
         result = p | YamlTransform(
@@ -256,9 +262,11 @@ class YamlTransformE2ETest(unittest.TestCase):
               - type: WriteToJson
                 config:
                     path: %s
-                num_shards: 1
+                    num_shards: 1
+              - type: LogForTesting
             ''' % (repr(input), repr(output)))
-
+      all_output = list(glob.glob(output + "*"))
+      self.assertEqual(len(all_output), 1)
       output_shard = list(glob.glob(output + "*"))[0]
       result = pd.read_json(
           output_shard, orient='records',

Reply via email to