parthchandra commented on code in PR #3000:
URL: https://github.com/apache/datafusion-comet/pull/3000#discussion_r2669474732


##########
native/core/src/execution/expressions/strings.rs:
##########
@@ -123,3 +123,56 @@ impl ExpressionBuilder for FromJsonBuilder {
         Ok(Arc::new(FromJson::new(child, schema, &expr.timezone)))
     }
 }
+
+/// Builder for StartsWith expressions
+pub struct StartsWithBuilder;
+
+impl ExpressionBuilder for StartsWithBuilder {
+    fn build(
+        &self,
+        spark_expr: &Expr,
+        input_schema: SchemaRef,
+        planner: &PhysicalPlanner,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let expr = extract_expr!(spark_expr, StartsWith);
+        let left = planner.create_expr(expr.left.as_ref().unwrap(), 
Arc::clone(&input_schema))?;
+        let right = planner.create_expr(expr.right.as_ref().unwrap(), 
input_schema)?;
+
+        let pattern = extract_string_literal(&right)?;
+        Ok(Arc::new(StartsWithExpr::new(left, pattern)))
+    }
+}
+
+/// Builder for EndsWith expressions
+pub struct EndsWithBuilder;
+
+impl ExpressionBuilder for EndsWithBuilder {
+    fn build(
+        &self,
+        spark_expr: &Expr,
+        input_schema: SchemaRef,
+        planner: &PhysicalPlanner,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let expr = extract_expr!(spark_expr, EndsWith);
+        let left = planner.create_expr(expr.left.as_ref().unwrap(), 
Arc::clone(&input_schema))?;
+        let right = planner.create_expr(expr.right.as_ref().unwrap(), 
input_schema)?;
+
+        let pattern = extract_string_literal(&right)?;
+        Ok(Arc::new(EndsWithExpr::new(left, pattern)))
+    }
+}
+
+/// Helper function to extract a string literal from a physical expression
+fn extract_string_literal(expr: &Arc<dyn PhysicalExpr>) -> Result<String, 
ExecutionError> {
+    match expr.as_any().downcast_ref::<Literal>() {
+        Some(literal) => match literal.value() {
+            ScalarValue::Utf8(Some(s)) => Ok(s.clone()),
+            _ => Err(ExecutionError::GeneralError(
+                "StartsWith/EndsWith pattern must be a string 
literal".to_string(),
+            )),
+        },
+        None => Err(ExecutionError::GeneralError(

Review Comment:
   While a large number of cases will have the pattern as a literal, the 
pattern can be an expression of type string or binary (Spark handles both in 
its `Utf8String` class). This means that when the pattern is not literal, we 
need to process the pattern as a physical expression every time.  
   We can verify with 
   ```
   select startsWith(c1, substring(c1, 0, 3)) from parquetV1Table
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to