Copilot commented on code in PR #19598:
URL: https://github.com/apache/datafusion/pull/19598#discussion_r2656667733


##########
datafusion/functions/src/string/bit_length.rs:
##########
@@ -90,17 +95,44 @@ impl ScalarUDFImpl for BitLengthFunc {
         let [array] = take_function_args(self.name(), &args.args)?;
 
         match array {
-            ColumnarValue::Array(v) => 
Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+            ColumnarValue::Array(v) => {
+                if let Some(arr) = v.as_any().downcast_ref::<StringArray>() {
+                    let mut builder = Int32Builder::with_capacity(arr.len());
+                    for i in 0..arr.len() {
+                        if arr.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            let byte_len = arr.value(i).as_bytes().len();
+                            builder.append_value((byte_len * 8) as i32);

Review Comment:
   The cast `(byte_len * 8) as i32` could overflow if the string is longer than 
i32::MAX / 8 bytes (approximately 268 MB). Consider adding overflow checking or 
using `checked_mul` and returning an error for excessively large strings to 
prevent silent overflow.



##########
datafusion/functions/src/string/bit_length.rs:
##########
@@ -90,17 +95,44 @@ impl ScalarUDFImpl for BitLengthFunc {
         let [array] = take_function_args(self.name(), &args.args)?;
 
         match array {
-            ColumnarValue::Array(v) => 
Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+            ColumnarValue::Array(v) => {
+                if let Some(arr) = v.as_any().downcast_ref::<StringArray>() {
+                    let mut builder = Int32Builder::with_capacity(arr.len());
+                    for i in 0..arr.len() {
+                        if arr.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            let byte_len = arr.value(i).as_bytes().len();
+                            builder.append_value((byte_len * 8) as i32);
+                        }
+                    }
+                    Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+                } else if let Some(arr) = 
v.as_any().downcast_ref::<LargeStringArray>() {
+                    let mut builder = Int32Builder::with_capacity(arr.len());
+                    for i in 0..arr.len() {
+                        if arr.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            let byte_len = arr.value(i).as_bytes().len();
+                            builder.append_value((byte_len * 8) as i32);
+                        }
+                    }
+                    Ok(ColumnarValue::Array(Arc::new(builder.finish())))

Review Comment:
   There is significant code duplication between the StringArray and 
LargeStringArray implementations. Consider extracting the common logic into a 
generic helper function or using a macro to reduce duplication and improve 
maintainability. The only differences are the array types being downcast and 
the builder types used.



##########
datafusion/functions/src/string/bit_length.rs:
##########
@@ -90,17 +95,44 @@ impl ScalarUDFImpl for BitLengthFunc {
         let [array] = take_function_args(self.name(), &args.args)?;
 
         match array {
-            ColumnarValue::Array(v) => 
Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+            ColumnarValue::Array(v) => {
+                if let Some(arr) = v.as_any().downcast_ref::<StringArray>() {
+                    let mut builder = Int32Builder::with_capacity(arr.len());
+                    for i in 0..arr.len() {
+                        if arr.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            let byte_len = arr.value(i).as_bytes().len();
+                            builder.append_value((byte_len * 8) as i32);
+                        }
+                    }
+                    Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+                } else if let Some(arr) = 
v.as_any().downcast_ref::<LargeStringArray>() {
+                    let mut builder = Int32Builder::with_capacity(arr.len());
+                    for i in 0..arr.len() {
+                        if arr.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            let byte_len = arr.value(i).as_bytes().len();
+                            builder.append_value((byte_len * 8) as i32);
+                        }
+                    }
+                    Ok(ColumnarValue::Array(Arc::new(builder.finish())))

Review Comment:
   The return type for LargeStringArray should be Int64, not Int32. The 
`return_type` method returns Int64 for LargeUtf8 (via `utf8_to_int_type`), and 
the scalar handling for LargeUtf8 also returns Int64 (line 130-132). This 
implementation should use `Int64Builder` to maintain consistency with the 
declared return type and scalar handling.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to