Copilot commented on code in PR #81:
URL: https://github.com/apache/fluss-rust/pull/81#discussion_r2601657628
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::Bytes(_) => ArrowDataType::Binary,
+ DataType::Binary(_data_type) =>
ArrowDataType::FixedSizeBinary(_data_type.length() as i32),
Review Comment:
Unsafe type cast from `usize` to `i32` for binary length. On 64-bit systems,
`usize` can hold values larger than `i32::MAX` (2,147,483,647). BinaryType has
MAX_LENGTH of `usize::MAX`, so this cast could overflow silently and produce
negative or incorrect sizes. Consider using `.try_into().expect("length exceeds
i32::MAX")` or validating the length fits within i32 range.
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
Review Comment:
Variable naming inconsistency. The pattern name `_data_type` with leading
underscore typically indicates an intentionally unused variable in Rust.
However, this variable is actively used. Consider renaming to `data_type` or
`timestamp_ltz_type` to follow standard Rust conventions and match the naming
pattern used elsewhere in the codebase.
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
Review Comment:
Unsafe type cast from `u32` to `i8` for scale. While DecimalType's scale is
constrained to valid ranges, casting `u32` to `i8` (max value 127) could cause
silent overflow if scale exceeds 127. Arrow's Decimal128 expects `i8` for scale
(allowing negative scales), so consider adding validation or using a checked
conversion like `.try_into().expect("scale out of range")`.
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
Review Comment:
[nitpick] The wildcard pattern `_` catches all precision values > 6,
including valid values 7-9 and potentially invalid values > 9 (MAX_PRECISION).
While TimeType validates precision on construction (MAX_PRECISION=9), consider
being more explicit with `7..=9` or adding a comment explaining that invalid
values are prevented at construction time. This makes the code more
self-documenting.
```suggestion
// Only precision values 0..=9 are valid; higher values are
prevented at construction time.
DataType::Time(_data_type) => match _data_type.precision() {
0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
7..=9 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
// This arm should never be reached due to validation in
TimeType.
invalid => panic!("Invalid precision value for TimeType: {}",
invalid),
},
DataType::Timestamp(_data_type) => match _data_type.precision() {
0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
7..=9 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
// This arm should never be reached due to validation in
TimeType.
invalid => panic!("Invalid precision value for TimestampType:
{}", invalid),
},
DataType::TimestampLTz(_data_type) => match _data_type.precision() {
0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
7..=9 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
// This arm should never be reached due to validation in
TimeType.
invalid => panic!("Invalid precision value for TimestampLTzType:
{}", invalid),
```
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::Bytes(_) => ArrowDataType::Binary,
+ DataType::Binary(_data_type) =>
ArrowDataType::FixedSizeBinary(_data_type.length() as i32),
Review Comment:
The new datatype conversions (Decimal, Time, Timestamp, TimestampLTz,
Binary) lack unit test coverage. While integration tests exercise some Arrow
functionality, consider adding unit tests in a `#[cfg(test)]` module to verify:
1. Precision/scale mappings for Decimal (especially edge cases like
precision=38, scale=0)
2. Precision-to-TimeUnit mappings for Time/Timestamp types (0, 1-3, 4-6, 7-9)
3. Binary length conversion behavior
This would help catch the type casting issues and ensure correctness of the
mappings.
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
Review Comment:
[nitpick] The wildcard pattern `_` catches all precision values > 6,
including valid values 7-9 and potentially invalid values > 9 (MAX_PRECISION).
While TimestampLTzType validates precision on construction (MAX_PRECISION=9),
consider being more explicit with `7..=9` or adding a comment explaining that
invalid values are prevented at construction time. This makes the code more
self-documenting.
```suggestion
// Only precision values 0..=9 are valid; invalid values are
prevented at construction time.
DataType::TimestampLTz(_data_type) => match _data_type.precision() {
0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
7..=9 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
// This arm should never be reached due to constructor
validation.
_ => unreachable!("Invalid precision for TimestampLTzType; must
be 0..=9"),
```
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
Review Comment:
[nitpick] The wildcard pattern `_` catches all precision values > 6,
including valid values 7-9 and potentially invalid values > 9 (MAX_PRECISION).
While TimestampType validates precision on construction (MAX_PRECISION=9),
consider being more explicit with `7..=9` or adding a comment explaining that
invalid values are prevented at construction time. This makes the code more
self-documenting.
```suggestion
// Only precision values up to 9 are valid; enforced at
construction (MAX_PRECISION=9)
7..=9 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
},
DataType::Timestamp(_data_type) => match _data_type.precision() {
0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
// Only precision values up to 9 are valid; enforced at
construction (MAX_PRECISION=9)
7..=9 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
},
DataType::TimestampLTz(_data_type) => match _data_type.precision() {
0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
// Only precision values up to 9 are valid; enforced at
construction (MAX_PRECISION=9)
7..=9 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
```
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
Review Comment:
Variable naming inconsistency. The pattern name `_data_type` with leading
underscore typically indicates an intentionally unused variable in Rust.
However, this variable is actively used. Consider renaming to `data_type` or
`time_type` to follow standard Rust conventions and match the naming pattern
used elsewhere in the codebase.
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::TimestampLTz(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
+ DataType::Bytes(_) => ArrowDataType::Binary,
+ DataType::Binary(_data_type) =>
ArrowDataType::FixedSizeBinary(_data_type.length() as i32),
Review Comment:
Variable naming inconsistency. The pattern name `_data_type` with leading
underscore typically indicates an intentionally unused variable in Rust.
However, this variable is actively used. Consider renaming to `data_type` or
`binary_type` to follow standard Rust conventions and match the naming pattern
used elsewhere in the codebase.
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
Review Comment:
Variable naming inconsistency. The pattern name `_data_type` with leading
underscore typically indicates an intentionally unused variable in Rust.
However, this variable is actively used. Consider renaming to `data_type` or
`decimal_type` to follow standard Rust conventions and match the naming pattern
used elsewhere in this file (e.g., line 105, 108 in json_serde.rs use `_type`).
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
Review Comment:
Unsafe type cast from `u32` to `u8` for precision. DecimalType has
MAX_PRECISION of 38, but `u8` can only hold values up to 255. While 38 < 255,
this cast should be safe in practice, but it's semantically incorrect. Arrow's
Decimal128 constructor expects `u8` for precision (max 38), so consider adding
a validation check or a comment explaining the safe range.
```suggestion
DataType::Decimal(_data_type) => {
// Arrow's Decimal128 expects precision in 0..=38, which fits in
u8.
// Assert here to catch any future changes that might violate
this invariant.
assert!(
_data_type.precision() <= 38,
"Decimal precision {} exceeds Arrow's maximum allowed value
of 38",
_data_type.precision()
);
```
##########
crates/fluss/src/record/arrow.rs:
##########
@@ -589,13 +589,30 @@ pub fn to_arrow_type(fluss_type: &DataType) ->
ArrowDataType {
DataType::Double(_) => ArrowDataType::Float64,
DataType::Char(_) => ArrowDataType::Utf8,
DataType::String(_) => ArrowDataType::Utf8,
- DataType::Decimal(_) => todo!(),
+ DataType::Decimal(_data_type) => {
+ ArrowDataType::Decimal128(_data_type.precision() as u8,
_data_type.scale() as i8)
+ }
DataType::Date(_) => ArrowDataType::Date32,
- DataType::Time(_) => todo!(),
- DataType::Timestamp(_) => todo!(),
- DataType::TimestampLTz(_) => todo!(),
- DataType::Bytes(_) => todo!(),
- DataType::Binary(_) => todo!(),
+ DataType::Time(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Time32(arrow_schema::TimeUnit::Second),
+ 1 | 2 | 3 =>
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond),
+ 4 | 5 | 6 =>
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond),
+ _ => ArrowDataType::Time64(arrow_schema::TimeUnit::Nanosecond),
+ },
+ DataType::Timestamp(_data_type) => match _data_type.precision() {
+ 0 => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Second,
None),
+ 1 | 2 | 3 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+ 4 | 5 | 6 =>
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+ _ => ArrowDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None),
+ },
Review Comment:
Variable naming inconsistency. The pattern name `_data_type` with leading
underscore typically indicates an intentionally unused variable in Rust.
However, this variable is actively used. Consider renaming to `data_type` or
`timestamp_type` to follow standard Rust conventions and match the naming
pattern used elsewhere in the codebase.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]