TheNeuralBit commented on a change in pull request #12705:
URL: https://github.com/apache/beam/pull/12705#discussion_r478697347



##########
File path: sdks/python/apache_beam/dataframe/transforms_test.py
##########
@@ -266,6 +266,17 @@ def check(actual):
           lambda x: {'res': 3 * x}, proxy)
       assert_that(res['res'], equal_to_series(three_series), 'CheckDictOut')
 
+  def test_cat(self):
+    # verify that cat works with a List[Series] sicne this is missing from 
doctests
+    df = pd.DataFrame({
+        'one': ['A', 'B', 'C'],
+        'two': ['BB', 'CC', 'A'],
+        'three': ['CCC', 'AA', 'B'],
+    })
+    self.run_scenario(df, lambda df: df.two.str.cat([df.three], join='outer'))
+    self.run_scenario(
+        df, lambda df: df.one.str.cat([df.two, df.three], join='outer'))
+

Review comment:
       I've marked this PR WIP because this test does not pass. It yields the 
following error, that I'm not sure how to diagnose. @robertwb can you provide 
any insight here?
   
   ```
   apache_beam/runners/common.py:806: in _invoke_process_per_window             
                                                                                
                                                                           
       [si[global_window] for si in self.side_inputs]))                         
                                                                                
                                                                           
   apache_beam/runners/common.py:806: in <listcomp>                             
                                                                                
                                                                           
       [si[global_window] for si in self.side_inputs]))                         
                                                                                
                                                                           
   apache_beam/runners/worker/bundle_processor.py:422: in __getitem__           
                                                                                
                                                                           
       self._cache[target_window] = self._side_input_data.view_fn(raw_view)     
                                                                                
                                                                           
   apache_beam/pvalue.py:395: in <lambda>                                       
                                                                                
                                                                           
       lambda iterable: from_runtime_iterable(iterable, view_options))          
                                                                                
                                                                           
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _                                 
                                                                            
                                                                                
                                                                                
                                                                           
   it = <apache_beam.runners.worker.bundle_processor._StateBackedIterable 
object at 0x7f3b0e5cf310>                                                       
                                                                                
 
   options = {'coder': WindowedValueCoder[FastPrimitivesCoder], 
'window_mapping_fn': <function _global_window_mapping_fn at 0x7f3b1b31b710>}
                                                                                
                                                                                
                                                                           
       @staticmethod                                                            
                                       
       def _from_runtime_iterable(it, options):                                 
                                                                                
                                                                           
         head = list(itertools.islice(it, 2))                                   
                                                                                
                                                                           
         if not head:                                                           
                                       
           return options.get('default', EmptySideInput())                      
                                       
         elif len(head) == 1:                                                   
                                       
           return head[0]                                                       
                                                                                
                                                                           
         raise ValueError(                                                      
                                                                                
                                                                           
             'PCollection of size %d with more than one element accessed as a ' 
                                                                                
                                                                           
             'singleton view. First two elements encountered are "%s", "%s".' % 
                                                                                
                                                                           
   >         (len(head), str(head[0]), str(head[1])))                           
                                                                                
                                                                           
   E     ValueError: PCollection of size 2 with more than one element accessed 
as a singleton view. First two elements encountered are 
"<pandas.core.strings.StringMethods object at 0x7f3b0e32ec90>", 
"<pandas.core.strings.StringMethods 
   object at 0x7f3b0e3d7510>". [while running 
'DataframeTransform/Eval/[ComputedExpression[cat_Series_139891620772240]]:139891619912080/FlatMap(evaluate)/FlatMap(evaluate)']
  
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to