Copilot commented on code in PR #81:
URL: https://github.com/apache/fluss-rust/pull/81#discussion_r2605598270


##########
crates/fluss/src/record/arrow.rs:
##########
@@ -820,3 +885,118 @@ impl ArrowReader {
     }
 }
 pub struct MyVec<T>(pub StreamReader<T>);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::metadata::DataTypes;
+
+    #[test]
+    fn test_to_array_type() {
+        assert_eq!(to_arrow_type(&DataTypes::boolean()), 
ArrowDataType::Boolean);
+        assert_eq!(to_arrow_type(&DataTypes::tinyint()), ArrowDataType::Int8);
+        assert_eq!(to_arrow_type(&DataTypes::smallint()), 
ArrowDataType::Int16);
+        assert_eq!(to_arrow_type(&DataTypes::bigint()), ArrowDataType::Int64);
+        assert_eq!(to_arrow_type(&DataTypes::int()), ArrowDataType::Int32);
+        assert_eq!(to_arrow_type(&DataTypes::float()), ArrowDataType::Float32);
+        assert_eq!(to_arrow_type(&DataTypes::double()), 
ArrowDataType::Float64);
+        assert_eq!(to_arrow_type(&DataTypes::char(16)), ArrowDataType::Utf8);
+        assert_eq!(to_arrow_type(&DataTypes::string()), ArrowDataType::Utf8);
+        assert_eq!(
+            to_arrow_type(&DataTypes::decimal(10, 2)),
+            ArrowDataType::Decimal128(10, 2)
+        );
+        assert_eq!(to_arrow_type(&DataTypes::date()), ArrowDataType::Date32);
+        assert_eq!(
+            to_arrow_type(&DataTypes::time()),
+            ArrowDataType::Time32(arrow_schema::TimeUnit::Second)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(3)),
+            ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(6)),
+            ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(9)),
+            ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(0)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(3)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(6)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(9)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(0)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(3)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(6)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(9)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)
+        );
+        assert_eq!(to_arrow_type(&DataTypes::bytes()), ArrowDataType::Binary);
+        assert_eq!(
+            to_arrow_type(&DataTypes::binary(16)),
+            ArrowDataType::FixedSizeBinary(16)
+        );
+
+        assert_eq!(
+            to_arrow_type(&DataTypes::array(DataTypes::int())),
+            ArrowDataType::List(Field::new_list_field(ArrowDataType::Int32, 
true).into())
+        );
+
+        assert_eq!(
+            to_arrow_type(&DataTypes::map(DataTypes::string(), 
DataTypes::int())),
+            ArrowDataType::Map(
+                Arc::new(Field::new(
+                    "my_entries",
+                    ArrowDataType::Struct(arrow_schema::Fields::from(vec![
+                        Field::new("key", ArrowDataType::Utf8, false),
+                        Field::new("value", ArrowDataType::Int32, true),
+                    ])),
+                    true,
+                )),
+                false,
+            )
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid precision value for TimeType: 10")]
+    fn test_time_invalid_precision() {
+        to_arrow_type(&DataTypes::time_with_precision(10));
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid precision value for TimestampType: 10")]
+    fn test_timestamp_invalid_precision() {
+        to_arrow_type(&&DataTypes::timestamp_with_precision(10));

Review Comment:
   Unnecessary double reference. The function signature accepts `&DataType`, so 
passing `&&DataTypes::timestamp_with_precision(10)` is redundant. Use a single 
reference: `&DataTypes::timestamp_with_precision(10)`.
   ```suggestion
           to_arrow_type(&DataTypes::timestamp_with_precision(10));
   ```



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -820,3 +885,118 @@ impl ArrowReader {
     }
 }
 pub struct MyVec<T>(pub StreamReader<T>);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::metadata::DataTypes;
+
+    #[test]
+    fn test_to_array_type() {
+        assert_eq!(to_arrow_type(&DataTypes::boolean()), 
ArrowDataType::Boolean);
+        assert_eq!(to_arrow_type(&DataTypes::tinyint()), ArrowDataType::Int8);
+        assert_eq!(to_arrow_type(&DataTypes::smallint()), 
ArrowDataType::Int16);
+        assert_eq!(to_arrow_type(&DataTypes::bigint()), ArrowDataType::Int64);
+        assert_eq!(to_arrow_type(&DataTypes::int()), ArrowDataType::Int32);
+        assert_eq!(to_arrow_type(&DataTypes::float()), ArrowDataType::Float32);
+        assert_eq!(to_arrow_type(&DataTypes::double()), 
ArrowDataType::Float64);
+        assert_eq!(to_arrow_type(&DataTypes::char(16)), ArrowDataType::Utf8);
+        assert_eq!(to_arrow_type(&DataTypes::string()), ArrowDataType::Utf8);
+        assert_eq!(
+            to_arrow_type(&DataTypes::decimal(10, 2)),
+            ArrowDataType::Decimal128(10, 2)
+        );
+        assert_eq!(to_arrow_type(&DataTypes::date()), ArrowDataType::Date32);
+        assert_eq!(
+            to_arrow_type(&DataTypes::time()),
+            ArrowDataType::Time32(arrow_schema::TimeUnit::Second)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(3)),
+            ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(6)),
+            ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(9)),
+            ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(0)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(3)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(6)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(9)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(0)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(3)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(6)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(9)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)
+        );
+        assert_eq!(to_arrow_type(&DataTypes::bytes()), ArrowDataType::Binary);
+        assert_eq!(
+            to_arrow_type(&DataTypes::binary(16)),
+            ArrowDataType::FixedSizeBinary(16)
+        );
+
+        assert_eq!(
+            to_arrow_type(&DataTypes::array(DataTypes::int())),
+            ArrowDataType::List(Field::new_list_field(ArrowDataType::Int32, 
true).into())
+        );
+
+        assert_eq!(
+            to_arrow_type(&DataTypes::map(DataTypes::string(), 
DataTypes::int())),
+            ArrowDataType::Map(
+                Arc::new(Field::new(
+                    "my_entries",
+                    ArrowDataType::Struct(arrow_schema::Fields::from(vec![
+                        Field::new("key", ArrowDataType::Utf8, false),
+                        Field::new("value", ArrowDataType::Int32, true),
+                    ])),
+                    true,
+                )),
+                false,
+            )
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid precision value for TimeType: 10")]
+    fn test_time_invalid_precision() {
+        to_arrow_type(&DataTypes::time_with_precision(10));
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid precision value for TimestampType: 10")]
+    fn test_timestamp_invalid_precision() {
+        to_arrow_type(&&DataTypes::timestamp_with_precision(10));
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid precision value for TimestampLTzType: 
10")]
+    fn test_timestamp_ltz_invalid_precision() {
+        to_arrow_type(&&&DataTypes::timestamp_ltz_with_precision(10));

Review Comment:
   Unnecessary triple reference. The function signature accepts `&DataType`, so 
passing `&&&DataTypes::timestamp_ltz_with_precision(10)` is redundant. Use a 
single reference: `&DataTypes::timestamp_ltz_with_precision(10)`.
   ```suggestion
           to_arrow_type(&DataTypes::timestamp_ltz_with_precision(10));
   ```



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,16 +589,81 @@ pub fn to_arrow_type(fluss_type: &DataType) -> 
ArrowDataType {
         DataType::Double(_) => ArrowDataType::Float64,
         DataType::Char(_) => ArrowDataType::Utf8,
         DataType::String(_) => ArrowDataType::Utf8,
-        DataType::Decimal(_) => todo!(),
+        DataType::Decimal(decimal_type) => ArrowDataType::Decimal128(
+            decimal_type
+                .precision()
+                .try_into()
+                .expect("length exceeds u8::MAX"),

Review Comment:
   The error message is misleading. The precision field of `DecimalType` is 
being converted, not a length. The message should say "precision exceeds 
u8::MAX" instead of "length exceeds u8::MAX".
   ```suggestion
                   .expect("precision exceeds u8::MAX"),
   ```



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -820,3 +885,118 @@ impl ArrowReader {
     }
 }
 pub struct MyVec<T>(pub StreamReader<T>);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::metadata::DataTypes;
+
+    #[test]
+    fn test_to_array_type() {
+        assert_eq!(to_arrow_type(&DataTypes::boolean()), 
ArrowDataType::Boolean);
+        assert_eq!(to_arrow_type(&DataTypes::tinyint()), ArrowDataType::Int8);
+        assert_eq!(to_arrow_type(&DataTypes::smallint()), 
ArrowDataType::Int16);
+        assert_eq!(to_arrow_type(&DataTypes::bigint()), ArrowDataType::Int64);
+        assert_eq!(to_arrow_type(&DataTypes::int()), ArrowDataType::Int32);
+        assert_eq!(to_arrow_type(&DataTypes::float()), ArrowDataType::Float32);
+        assert_eq!(to_arrow_type(&DataTypes::double()), 
ArrowDataType::Float64);
+        assert_eq!(to_arrow_type(&DataTypes::char(16)), ArrowDataType::Utf8);
+        assert_eq!(to_arrow_type(&DataTypes::string()), ArrowDataType::Utf8);
+        assert_eq!(
+            to_arrow_type(&DataTypes::decimal(10, 2)),
+            ArrowDataType::Decimal128(10, 2)
+        );
+        assert_eq!(to_arrow_type(&DataTypes::date()), ArrowDataType::Date32);
+        assert_eq!(
+            to_arrow_type(&DataTypes::time()),
+            ArrowDataType::Time32(arrow_schema::TimeUnit::Second)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(3)),
+            ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(6)),
+            ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::time_with_precision(9)),
+            ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(0)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(3)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(6)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_with_precision(9)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(0)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(3)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(6)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+        );
+        assert_eq!(
+            to_arrow_type(&DataTypes::timestamp_ltz_with_precision(9)),
+            ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)
+        );
+        assert_eq!(to_arrow_type(&DataTypes::bytes()), ArrowDataType::Binary);
+        assert_eq!(
+            to_arrow_type(&DataTypes::binary(16)),
+            ArrowDataType::FixedSizeBinary(16)
+        );
+
+        assert_eq!(
+            to_arrow_type(&DataTypes::array(DataTypes::int())),
+            ArrowDataType::List(Field::new_list_field(ArrowDataType::Int32, 
true).into())
+        );
+
+        assert_eq!(
+            to_arrow_type(&DataTypes::map(DataTypes::string(), 
DataTypes::int())),
+            ArrowDataType::Map(
+                Arc::new(Field::new(
+                    "my_entries",
+                    ArrowDataType::Struct(arrow_schema::Fields::from(vec![
+                        Field::new("key", ArrowDataType::Utf8, false),
+                        Field::new("value", ArrowDataType::Int32, true),
+                    ])),
+                    true,
+                )),
+                false,
+            )
+        );
+    }

Review Comment:
   Missing test coverage for the Row type conversion. The `test_to_array_type` 
function tests Array and Map types but doesn't test the Row type conversion 
(lines 654-666). Consider adding a test case to verify Row type conversion 
works correctly with nested fields.



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,16 +589,81 @@ pub fn to_arrow_type(fluss_type: &DataType) -> 
ArrowDataType {
         DataType::Double(_) => ArrowDataType::Float64,
         DataType::Char(_) => ArrowDataType::Utf8,
         DataType::String(_) => ArrowDataType::Utf8,
-        DataType::Decimal(_) => todo!(),
+        DataType::Decimal(decimal_type) => ArrowDataType::Decimal128(
+            decimal_type
+                .precision()
+                .try_into()
+                .expect("length exceeds u8::MAX"),
+            decimal_type
+                .scale()
+                .try_into()
+                .expect("length exceeds i8::MAX"),
+        ),
         DataType::Date(_) => ArrowDataType::Date32,
-        DataType::Time(_) => todo!(),
-        DataType::Timestamp(_) => todo!(),
-        DataType::TimestampLTz(_) => todo!(),
-        DataType::Bytes(_) => todo!(),
-        DataType::Binary(_) => todo!(),
-        DataType::Array(_data_type) => todo!(),
-        DataType::Map(_data_type) => todo!(),
-        DataType::Row(_data_fields) => todo!(),
+        DataType::Time(time_type) => match time_type.precision() {
+            0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+            1 | 2 | 3 => 
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+            4 | 5 | 6 => 
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+            7 | 8 | 9 => 
ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+            // This arm should never be reached due to validation in TimeType.
+            invalid => panic!("Invalid precision value for TimeType: {}", 
invalid),
+        },
+        DataType::Timestamp(timestamp_type) => match 
timestamp_type.precision() {
+            0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, 
None),
+            1 | 2 | 3 => 
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+            4 | 5 | 6 => 
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+            7 | 8 | 9 => 
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
+            // This arm should never be reached due to validation in Timestamp.
+            invalid => panic!("Invalid precision value for TimestampType: {}", 
invalid),
+        },
+        DataType::TimestampLTz(timestamp_ltz_type) => match 
timestamp_ltz_type.precision() {
+            0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second, 
None),
+            1 | 2 | 3 => 
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+            4 | 5 | 6 => 
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+            7 | 8 | 9 => 
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
+            // This arm should never be reached due to validation in 
TimestampLTz.
+            invalid => panic!("Invalid precision value for TimestampLTzType: 
{}", invalid),
+        },
+        DataType::Bytes(_) => ArrowDataType::Binary,
+        DataType::Binary(binary_type) => ArrowDataType::FixedSizeBinary(
+            binary_type
+                .length()
+                .try_into()
+                .expect("length exceeds i32::MAX"),
+        ),
+        DataType::Array(arrow_type) => ArrowDataType::List(
+            
arrow_schema::Field::new_list_field(to_arrow_type(arrow_type.get_element_type()),
 true)
+                .into(),
+        ),
+        DataType::Map(map_type) => {
+            let key_type = to_arrow_type(map_type.key_type());
+            let value_type = to_arrow_type(map_type.value_type());
+            let entry_fields = vec![
+                arrow_schema::Field::new("key", key_type, false),
+                arrow_schema::Field::new("value", value_type, true),
+            ];
+            ArrowDataType::Map(
+                Arc::new(arrow_schema::Field::new(
+                    "my_entries",

Review Comment:
   The hardcoded field name "my_entries" appears arbitrary and unprofessional. 
According to the Arrow specification for Map types, the standard field name 
should be "entries". Consider using "entries" instead to follow Arrow 
conventions.
   ```suggestion
                       "entries",
   ```



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,16 +589,81 @@ pub fn to_arrow_type(fluss_type: &DataType) -> 
ArrowDataType {
         DataType::Double(_) => ArrowDataType::Float64,
         DataType::Char(_) => ArrowDataType::Utf8,
         DataType::String(_) => ArrowDataType::Utf8,
-        DataType::Decimal(_) => todo!(),
+        DataType::Decimal(decimal_type) => ArrowDataType::Decimal128(
+            decimal_type
+                .precision()
+                .try_into()
+                .expect("length exceeds u8::MAX"),
+            decimal_type
+                .scale()
+                .try_into()
+                .expect("length exceeds i8::MAX"),

Review Comment:
   The error message is misleading. The scale field of `DecimalType` is being 
converted, not a length. The message should say "scale exceeds i8::MAX" instead 
of "length exceeds i8::MAX".
   ```suggestion
                   .expect("scale exceeds i8::MAX"),
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to