Copilot commented on code in PR #265:
URL: https://github.com/apache/fluss-rust/pull/265#discussion_r2776723783


##########
bindings/cpp/src/types.rs:
##########
@@ -68,6 +78,7 @@ fn ffi_data_type_to_core(dt: i32) -> 
Result<fcore::metadata::DataType> {
         DATA_TYPE_TIME => Ok(fcore::metadata::DataTypes::time()),
         DATA_TYPE_TIMESTAMP => Ok(fcore::metadata::DataTypes::timestamp()),
         DATA_TYPE_TIMESTAMP_LTZ => 
Ok(fcore::metadata::DataTypes::timestamp_ltz()),
+        DATA_TYPE_DECIMAL => Ok(fcore::metadata::DataTypes::decimal(precision, 
scale)),

Review Comment:
   `DataTypes::decimal(precision, scale)` panics on invalid parameters (it 
calls `DecimalType::new(...).expect(...)`). Since precision/scale come from FFI 
input, this should not be able to panic. Consider validating precision/scale 
before calling this, or constructing via `DecimalType::new(...)?` and mapping 
the error into the `Result` returned by `ffi_data_type_to_core`.
   ```suggestion
           DATA_TYPE_DECIMAL => {
               if precision == 0 {
                   return Err(anyhow!(
                       "Invalid decimal type from FFI: precision must be > 0, 
got {precision}"
                   ));
               }
               if scale > precision {
                   return Err(anyhow!(
                       "Invalid decimal type from FFI: scale ({scale}) must be 
<= precision ({precision})"
                   ));
               }
               Ok(fcore::metadata::DataTypes::decimal(precision, scale))
           }
   ```



##########
bindings/cpp/src/types.rs:
##########
@@ -97,7 +109,7 @@ pub fn ffi_descriptor_to_core(
     let mut schema_builder = fcore::metadata::Schema::builder();
 
     for col in &descriptor.schema.columns {
-        let dt = ffi_data_type_to_core(col.data_type)?;
+        let dt = ffi_data_type_to_core(col.data_type, col.precision as u32, 
col.scale as u32)?;

Review Comment:
   `col.precision`/`col.scale` are `i32` and are cast to `u32` here. If a 
caller passes negative values, this will wrap to a huge `u32` and then 
`DataTypes::decimal(...)` will panic internally (it uses `expect("Invalid 
decimal parameters")`), which would abort across the CXX FFI boundary. Please 
validate precision/scale are non-negative and within the supported bounds (p in 
1..=38, s in 0..=p) and return a normal `Err` instead of panicking.
   ```suggestion
           let dt = if col.data_type == DATA_TYPE_DECIMAL {
               let precision = col.precision;
               let scale = col.scale;
   
               // Validate decimal precision/scale to avoid panics in decimal 
constructor
               if precision < 1 || precision > 38 {
                   return Err(anyhow!(
                       "Invalid decimal precision {} for column '{}'; expected 
1..=38",
                       precision,
                       col.name
                   ));
               }
               if scale < 0 || scale > precision {
                   return Err(anyhow!(
                       "Invalid decimal scale {} for column '{}' with precision 
{}; expected 0..=precision",
                       scale,
                       col.name,
                       precision
                   ));
               }
   
               ffi_data_type_to_core(col.data_type, precision as u32, scale as 
u32)?
           } else {
               ffi_data_type_to_core(col.data_type, col.precision as u32, 
col.scale as u32)?
           };
   ```



##########
bindings/cpp/src/types.rs:
##########
@@ -224,12 +263,23 @@ pub fn ffi_row_to_core(row: &ffi::FfiGenericRow) -> 
fcore::row::GenericRow<'_> {
             DATUM_TYPE_FLOAT64 => Datum::Float64(field.f64_val.into()),
             DATUM_TYPE_STRING => 
Datum::String(Cow::Borrowed(field.string_val.as_str())),
             DATUM_TYPE_BYTES => 
Datum::Blob(Cow::Borrowed(field.bytes_val.as_slice())),
-            _ => Datum::Null,
+            DATUM_TYPE_DECIMAL_STRING => {
+                let (precision, scale) = get_decimal_type(idx, schema)?;
+                let bd = 
bigdecimal::BigDecimal::from_str(field.string_val.as_str())
+                    .map_err(|e| anyhow!(
+                        "Column {idx}: invalid decimal string '{}': {e}",
+                        field.string_val
+                    ))?;
+                let decimal = fcore::row::Decimal::from_big_decimal(bd, 
precision, scale)
+                    .map_err(|e| anyhow!("Column {idx}: {e}"))?;
+                Datum::Decimal(decimal)
+            }

Review Comment:
   `ffi_row_to_core` only accepts `DATUM_TYPE_DECIMAL_STRING`, but the read 
path introduced `DATUM_TYPE_DECIMAL_I64`/`DATUM_TYPE_DECIMAL_I128` and C++ 
exposes those `DatumType` values. This makes it easy for users to take a 
scanned row (which will contain DecimalI64/I128) and append it back, but it 
will currently fail with "unknown datum type". Please handle 
`DATUM_TYPE_DECIMAL_I64`/`DATUM_TYPE_DECIMAL_I128` here (you already have 
`decimal_precision`/`decimal_scale` and the unscaled value in the datum) and 
convert them into `Datum::Decimal`.
   ```suggestion
               }
               DATUM_TYPE_DECIMAL_I64 => {
                   let (precision, scale) = get_decimal_type(idx, schema)?;
                   let unscaled = field.i64_val;
                   let s = if scale == 0 {
                       unscaled.to_string()
                   } else {
                       let negative = unscaled < 0;
                       let mut digits = unscaled
                           .abs()
                           .to_string();
                       let scale_usize = scale as usize;
                       if digits.len() <= scale_usize {
                           let mut padded = String::with_capacity(scale_usize + 
1);
                           for _ in 0..(scale_usize - digits.len() + 1) {
                               padded.push('0');
                           }
                           padded.push_str(&digits);
                           digits = padded;
                       }
                       let split_pos = digits.len() - scale_usize;
                       let (int_part, frac_part) = digits.split_at(split_pos);
                       let mut result = String::with_capacity(digits.len() + 2);
                       if negative {
                           result.push('-');
                       }
                       result.push_str(int_part);
                       result.push('.');
                       result.push_str(frac_part);
                       result
                   };
                   let bd = bigdecimal::BigDecimal::from_str(&s).map_err(|e| {
                       anyhow!("Column {idx}: invalid decimal I64 value '{}': 
{e}", s)
                   })?;
                   let decimal = fcore::row::Decimal::from_big_decimal(bd, 
precision, scale)
                       .map_err(|e| anyhow!("Column {idx}: {e}"))?;
                   Datum::Decimal(decimal)
               }
               DATUM_TYPE_DECIMAL_I128 => {
                   let (precision, scale) = get_decimal_type(idx, schema)?;
                   let unscaled = field.i128_val;
                   let s = if scale == 0 {
                       unscaled.to_string()
                   } else {
                       let negative = unscaled < 0;
                       let mut digits = unscaled
                           .abs()
                           .to_string();
                       let scale_usize = scale as usize;
                       if digits.len() <= scale_usize {
                           let mut padded = String::with_capacity(scale_usize + 
1);
                           for _ in 0..(scale_usize - digits.len() + 1) {
                               padded.push('0');
                           }
                           padded.push_str(&digits);
                           digits = padded;
                       }
                       let split_pos = digits.len() - scale_usize;
                       let (int_part, frac_part) = digits.split_at(split_pos);
                       let mut result = String::with_capacity(digits.len() + 2);
                       if negative {
                           result.push('-');
                       }
                       result.push_str(int_part);
                       result.push('.');
                       result.push_str(frac_part);
                       result
                   };
                   let bd = bigdecimal::BigDecimal::from_str(&s).map_err(|e| {
                       anyhow!("Column {idx}: invalid decimal I128 value '{}': 
{e}", s)
                   })?;
                   let decimal = fcore::row::Decimal::from_big_decimal(bd, 
precision, scale)
                       .map_err(|e| anyhow!("Column {idx}: {e}"))?;
                   Datum::Decimal(decimal)
               }
   ```



##########
bindings/cpp/include/fluss.hpp:
##########
@@ -269,6 +286,70 @@ struct Datum {
         d.bytes_val = std::move(v);
         return d;
     }
+    // Stores the decimal string as-is. Rust side will parse via BigDecimal,
+    // look up (p,s) from the schema, validate, and create the Decimal.
+    static Datum DecimalString(std::string str) {
+        Datum d;
+        d.type = DatumType::DecimalString;
+        d.string_val = std::move(str);
+        return d;
+    }
+
+    bool IsDecimal() const {
+        return type == DatumType::DecimalI64 || type == DatumType::DecimalI128
+            || type == DatumType::DecimalString;
+    }
+
+    std::string DecimalToString() const {
+        if (type == DatumType::DecimalI64) {
+            return FormatUnscaled64(i64_val, decimal_scale);
+        } else if (type == DatumType::DecimalI128) {
+            __int128 val = (static_cast<__int128>(i128_hi) << 64) |
+                           
static_cast<__int128>(static_cast<uint64_t>(i128_lo));

Review Comment:
   In `DecimalToString()`, reconstructing the `__int128` uses 
`(static_cast<__int128>(i128_hi) << 64)`, but left-shifting a negative signed 
value is undefined behavior in C++. For negative decimals, `i128_hi` will be 
negative, so this can miscompile. Reconstruct using an unsigned intermediate 
(bitwise composition in `unsigned __int128` from the `uint64_t` bit patterns) 
and then cast to `__int128`.
   ```suggestion
               unsigned __int128 uval = (static_cast<unsigned 
__int128>(static_cast<uint64_t>(i128_hi)) << 64) |
                                        static_cast<unsigned 
__int128>(static_cast<uint64_t>(i128_lo));
               __int128 val = static_cast<__int128>(uval);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to