Copilot commented on code in PR #19598:
URL: https://github.com/apache/datafusion/pull/19598#discussion_r2656667733
##########
datafusion/functions/src/string/bit_length.rs:
##########
@@ -90,17 +95,44 @@ impl ScalarUDFImpl for BitLengthFunc {
let [array] = take_function_args(self.name(), &args.args)?;
match array {
- ColumnarValue::Array(v) =>
Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+ ColumnarValue::Array(v) => {
+ if let Some(arr) = v.as_any().downcast_ref::<StringArray>() {
+ let mut builder = Int32Builder::with_capacity(arr.len());
+ for i in 0..arr.len() {
+ if arr.is_null(i) {
+ builder.append_null();
+ } else {
+ let byte_len = arr.value(i).as_bytes().len();
+ builder.append_value((byte_len * 8) as i32);
Review Comment:
The cast `(byte_len * 8) as i32` could overflow if the string is longer than
i32::MAX / 8 bytes (approximately 268 MB). Consider adding overflow checking or
using `checked_mul` and returning an error for excessively large strings to
prevent silent overflow.
##########
datafusion/functions/src/string/bit_length.rs:
##########
@@ -90,17 +95,44 @@ impl ScalarUDFImpl for BitLengthFunc {
let [array] = take_function_args(self.name(), &args.args)?;
match array {
- ColumnarValue::Array(v) =>
Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+ ColumnarValue::Array(v) => {
+ if let Some(arr) = v.as_any().downcast_ref::<StringArray>() {
+ let mut builder = Int32Builder::with_capacity(arr.len());
+ for i in 0..arr.len() {
+ if arr.is_null(i) {
+ builder.append_null();
+ } else {
+ let byte_len = arr.value(i).as_bytes().len();
+ builder.append_value((byte_len * 8) as i32);
+ }
+ }
+ Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+ } else if let Some(arr) =
v.as_any().downcast_ref::<LargeStringArray>() {
+ let mut builder = Int32Builder::with_capacity(arr.len());
+ for i in 0..arr.len() {
+ if arr.is_null(i) {
+ builder.append_null();
+ } else {
+ let byte_len = arr.value(i).as_bytes().len();
+ builder.append_value((byte_len * 8) as i32);
+ }
+ }
+ Ok(ColumnarValue::Array(Arc::new(builder.finish())))
Review Comment:
There is significant code duplication between the StringArray and
LargeStringArray implementations. Consider extracting the common logic into a
generic helper function or using a macro to reduce duplication and improve
maintainability. The only differences are the array types being downcast and
the builder types used.
##########
datafusion/functions/src/string/bit_length.rs:
##########
@@ -90,17 +95,44 @@ impl ScalarUDFImpl for BitLengthFunc {
let [array] = take_function_args(self.name(), &args.args)?;
match array {
- ColumnarValue::Array(v) =>
Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+ ColumnarValue::Array(v) => {
+ if let Some(arr) = v.as_any().downcast_ref::<StringArray>() {
+ let mut builder = Int32Builder::with_capacity(arr.len());
+ for i in 0..arr.len() {
+ if arr.is_null(i) {
+ builder.append_null();
+ } else {
+ let byte_len = arr.value(i).as_bytes().len();
+ builder.append_value((byte_len * 8) as i32);
+ }
+ }
+ Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+ } else if let Some(arr) =
v.as_any().downcast_ref::<LargeStringArray>() {
+ let mut builder = Int32Builder::with_capacity(arr.len());
+ for i in 0..arr.len() {
+ if arr.is_null(i) {
+ builder.append_null();
+ } else {
+ let byte_len = arr.value(i).as_bytes().len();
+ builder.append_value((byte_len * 8) as i32);
+ }
+ }
+ Ok(ColumnarValue::Array(Arc::new(builder.finish())))
Review Comment:
The return type for LargeStringArray should be Int64, not Int32. The
`return_type` method returns Int64 for LargeUtf8 (via `utf8_to_int_type`), and
the scalar handling for LargeUtf8 also returns Int64 (line 130-132). This
implementation should use `Int64Builder` to maintain consistency with the
declared return type and scalar handling.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]