Re: [PR] Support put KV: Add HashBucketAssigner [fluss-rust]

via GitHub Sat, 27 Dec 2025 19:19:48 -0800


Copilot commented on code in PR #117:
URL: https://github.com/apache/fluss-rust/pull/117#discussion_r2649448547



##########
crates/fluss/src/bucketing/mod.rs:
##########
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::metadata::DataLakeFormat;
+use crate::util::murmur_hash;
+
+pub trait BucketingFunction: Sync + Send {
+    fn bucketing(&self, bucket_key: &[u8], num_buckets: i32) -> i32;
+}
+
+#[allow(dead_code)]
+impl dyn BucketingFunction {
+    pub fn of(lake_format: Option<&DataLakeFormat>) -> Box<dyn 
BucketingFunction> {
+        match lake_format {
+            None => Box::new(FlussBucketingFunction),
+            Some(DataLakeFormat::PAIMON) => Box::new(PaimonBucketingFunction),
+            Some(DataLakeFormat::LANCE) => Box::new(FlussBucketingFunction),
+            Some(DataLakeFormat::ICEBERG) => 
Box::new(IcebergBucketingFunction),
+        }
+    }
+}
+
+struct FlussBucketingFunction;
+impl BucketingFunction for FlussBucketingFunction {
+    fn bucketing(&self, bucket_key: &[u8], num_buckets: i32) -> i32 {
+        assert!(!bucket_key.is_empty(), "Assigned key must not be empty!");
+        let key_hash = murmur_hash::flink_hash_bytes(bucket_key);
+        murmur_hash::flink_hash_i32(key_hash) % num_buckets
+    }
+}
+
+struct PaimonBucketingFunction;
+impl BucketingFunction for PaimonBucketingFunction {
+    fn bucketing(&self, bucket_key: &[u8], num_buckets: i32) -> i32 {
+        assert!(!bucket_key.is_empty(), "Assigned key must not be empty!");
+        let key_hash = murmur_hash::flink_hash_bytes(bucket_key);
+        (key_hash % num_buckets).abs()
+    }
+}
+
+struct IcebergBucketingFunction;
+impl BucketingFunction for IcebergBucketingFunction {
+    fn bucketing(&self, bucket_key: &[u8], num_buckets: i32) -> i32 {
+        if bucket_key.is_empty() {
+            panic!("bucket_key must not be empty!");
+        }
+
+        if num_buckets <= 0 {
+            panic!("num_buckets must be positive!");
+        }
+

Review Comment:
   The panic message uses inconsistent formatting compared to the other 
bucketing functions. For consistency with the assert! macro used in 
FlussBucketingFunction and PaimonBucketingFunction (lines 40 and 49), either 
use assert! here or change all three to use panic! consistently.
   ```suggestion
           assert!(!bucket_key.is_empty(), "Assigned key must not be empty!");
           assert!(num_buckets > 0, "num_buckets must be positive!");
   ```



##########
crates/fluss/src/client/write/bucket_assigner.rs:
##########
@@ -100,3 +101,33 @@ impl BucketAssigner for StickyBucketAssigner {
         }
     }
 }
+
+pub struct HashBucketAssigner {
+    num_buckets: i32,
+    bucketing_function: Box<dyn BucketingFunction>,
+}
+
+#[allow(dead_code)]
+impl HashBucketAssigner {
+    pub fn new(num_buckets: i32, bucketing_function: Box<dyn 
BucketingFunction>) -> Self {
+        HashBucketAssigner {
+            num_buckets,
+            bucketing_function,
+        }
+    }

Review Comment:
   The HashBucketAssigner struct and its new() constructor lack documentation 
comments. Add doc comments explaining the purpose of this struct, what it does, 
and the meaning of its parameters (num_buckets and bucketing_function).



##########
crates/fluss/src/client/write/bucket_assigner.rs:
##########
@@ -100,3 +101,33 @@ impl BucketAssigner for StickyBucketAssigner {
         }
     }
 }
+
+pub struct HashBucketAssigner {
+    num_buckets: i32,
+    bucketing_function: Box<dyn BucketingFunction>,
+}
+
+#[allow(dead_code)]
+impl HashBucketAssigner {
+    pub fn new(num_buckets: i32, bucketing_function: Box<dyn 
BucketingFunction>) -> Self {
+        HashBucketAssigner {
+            num_buckets,
+            bucketing_function,
+        }
+    }
+}
+
+impl BucketAssigner for HashBucketAssigner {
+    fn abort_if_batch_full(&self) -> bool {
+        false
+    }
+
+    fn on_new_batch(&self, _: &Cluster, _: i32) {
+        // do nothing
+    }
+
+    fn assign_bucket(&self, bucket_key: Option<&[u8]>, _: &Cluster) -> i32 {
+        let key = bucket_key.expect("no bucket key provided");

Review Comment:
   Using expect() will panic if bucket_key is None. Consider whether panic is 
the appropriate error handling strategy here, or if this should return a Result 
to allow the caller to handle the error gracefully. If panic is intentional, 
the message should be more descriptive about why this is a programming error.
   ```suggestion
           let key = match bucket_key {
               Some(k) => k,
               None => panic!(
                   "HashBucketAssigner::assign_bucket must be called with 
Some(bucket_key); \
                    received None (num_buckets = {})",
                   self.num_buckets
               ),
           };
   ```



##########
crates/fluss/src/util/murmur_hash.rs:
##########
@@ -0,0 +1,211 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* This file is based on source code of Apache Flink Project 
(https://flink.apache.org/), licensed by the Apache
+ * Software Foundation (ASF) under the Apache License, Version 2.0. See the 
NOTICE file distributed with this work for
+ * additional information regarding copyright ownership. */
+
+pub const MURMUR3_DEFAULT_SEED: i32 = 0;
+pub const FLINK_MURMUR3_DEFAULT_SEED: i32 = 42;
+
+const C1: i32 = 0xCC9E_2D51_u32 as i32;
+const C2: i32 = 0x1B87_3593;
+const R1: u32 = 15;
+const R2: u32 = 13;
+const M: i32 = 5;
+const N: i32 = 0xE654_6B64_u32 as i32;
+const CHUNK_SIZE: usize = 4;
+
+/// Hashes the data using 32-bit Murmur3 hash with 0 as seed
+///
+/// # Arguments
+/// * `data` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn hash_bytes(data: &[u8]) -> i32 {
+    hash_bytes_with_seed(data, MURMUR3_DEFAULT_SEED)
+}
+
+#[inline(always)]
+fn hash_bytes_with_seed(data: &[u8], seed: i32) -> i32 {
+    let length = data.len();
+    let chunks = length / CHUNK_SIZE;
+    let length_aligned = chunks * CHUNK_SIZE;
+
+    let mut h1 = hash_full_chunks(data, seed, length_aligned);
+    let mut k1 = 0i32;
+
+    for (shift, &b) in data[length_aligned..].iter().enumerate() {
+        k1 |= (b as i32) << (8 * shift);
+    }
+
+    h1 ^= k1.wrapping_mul(C1).rotate_left(R1).wrapping_mul(C2);
+
+    fmix(h1, length)
+}
+
+/// Hashes the data using Flink's variant of 32-bit Murmur hash with 42 as 
seed and tail bytes mixed into hash byte-by-byte
+///
+/// # Arguments
+/// * `data` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn flink_hash_bytes(data: &[u8]) -> i32 {
+    flink_hash_bytes_with_seed(data, FLINK_MURMUR3_DEFAULT_SEED)
+}
+#[inline(always)]
+fn flink_hash_bytes_with_seed(data: &[u8], seed: i32) -> i32 {
+    let length = data.len();
+    let chunks = length / CHUNK_SIZE;
+    let length_aligned = chunks * CHUNK_SIZE;
+
+    let mut h1 = hash_full_chunks(data, seed, length_aligned);
+
+    #[allow(clippy::needless_range_loop)]
+    for index in length_aligned..length {
+        let byte = i32::from(data[index]);
+        let k1 = mix_k1(byte);
+        h1 = mix_h1(h1, k1);
+    }
+
+    fmix(h1, length)
+}
+
+#[inline(always)]
+fn hash_full_chunks(data: &[u8], seed: i32, length_aligned: usize) -> i32 {
+    let mut h1 = seed;
+
+    for i in 0..length_aligned / CHUNK_SIZE {
+        let offset = i * 4;
+        let block = i32::from_le_bytes(data[offset..offset + 
4].try_into().unwrap());

Review Comment:
   Using unwrap() here can cause a panic if the slice doesn't have exactly 4 
bytes. However, since this is called within a loop that ensures we only process 
complete 4-byte chunks (lines 94-99), this is actually safe. Consider adding a 
debug assertion or safety comment to document this invariant.
   ```suggestion
           let chunk = &data[offset..offset + CHUNK_SIZE];
           debug_assert_eq!(
               chunk.len(),
               CHUNK_SIZE,
               "chunk size must be exactly {} bytes for from_le_bytes",
               CHUNK_SIZE
           );
           let block = i32::from_le_bytes(chunk.try_into().unwrap());
   ```



##########
crates/fluss/src/client/write/bucket_assigner.rs:
##########
@@ -19,6 +19,7 @@ use crate::cluster::Cluster;
 use crate::metadata::TablePath;
 use rand::Rng;
 use std::sync::atomic::{AtomicI32, Ordering};
+use crate::bucketing::BucketingFunction;

Review Comment:
   The import statement should be grouped with other crate imports at the top 
rather than appearing after external crate imports. Move this import to line 21 
(before the std import) to follow Rust's conventional import grouping: external 
crates first, then standard library, then internal crate imports grouped 
together.
   ```suggestion
   use crate::bucketing::BucketingFunction;
   use rand::Rng;
   use std::sync::atomic::{AtomicI32, Ordering};
   ```



##########
crates/fluss/src/util/murmur_hash.rs:
##########
@@ -0,0 +1,211 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* This file is based on source code of Apache Flink Project 
(https://flink.apache.org/), licensed by the Apache
+ * Software Foundation (ASF) under the Apache License, Version 2.0. See the 
NOTICE file distributed with this work for
+ * additional information regarding copyright ownership. */
+
+pub const MURMUR3_DEFAULT_SEED: i32 = 0;
+pub const FLINK_MURMUR3_DEFAULT_SEED: i32 = 42;
+
+const C1: i32 = 0xCC9E_2D51_u32 as i32;
+const C2: i32 = 0x1B87_3593;
+const R1: u32 = 15;
+const R2: u32 = 13;
+const M: i32 = 5;
+const N: i32 = 0xE654_6B64_u32 as i32;
+const CHUNK_SIZE: usize = 4;
+
+/// Hashes the data using 32-bit Murmur3 hash with 0 as seed
+///
+/// # Arguments
+/// * `data` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn hash_bytes(data: &[u8]) -> i32 {
+    hash_bytes_with_seed(data, MURMUR3_DEFAULT_SEED)
+}
+
+#[inline(always)]
+fn hash_bytes_with_seed(data: &[u8], seed: i32) -> i32 {
+    let length = data.len();
+    let chunks = length / CHUNK_SIZE;
+    let length_aligned = chunks * CHUNK_SIZE;
+
+    let mut h1 = hash_full_chunks(data, seed, length_aligned);
+    let mut k1 = 0i32;
+
+    for (shift, &b) in data[length_aligned..].iter().enumerate() {
+        k1 |= (b as i32) << (8 * shift);
+    }
+
+    h1 ^= k1.wrapping_mul(C1).rotate_left(R1).wrapping_mul(C2);
+
+    fmix(h1, length)
+}
+
+/// Hashes the data using Flink's variant of 32-bit Murmur hash with 42 as 
seed and tail bytes mixed into hash byte-by-byte
+///
+/// # Arguments
+/// * `data` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn flink_hash_bytes(data: &[u8]) -> i32 {
+    flink_hash_bytes_with_seed(data, FLINK_MURMUR3_DEFAULT_SEED)
+}
+#[inline(always)]
+fn flink_hash_bytes_with_seed(data: &[u8], seed: i32) -> i32 {
+    let length = data.len();
+    let chunks = length / CHUNK_SIZE;
+    let length_aligned = chunks * CHUNK_SIZE;
+
+    let mut h1 = hash_full_chunks(data, seed, length_aligned);
+
+    #[allow(clippy::needless_range_loop)]
+    for index in length_aligned..length {
+        let byte = i32::from(data[index]);
+        let k1 = mix_k1(byte);
+        h1 = mix_h1(h1, k1);
+    }
+
+    fmix(h1, length)
+}
+
+#[inline(always)]
+fn hash_full_chunks(data: &[u8], seed: i32, length_aligned: usize) -> i32 {
+    let mut h1 = seed;
+
+    for i in 0..length_aligned / CHUNK_SIZE {
+        let offset = i * 4;
+        let block = i32::from_le_bytes(data[offset..offset + 
4].try_into().unwrap());
+        let k1 = mix_k1(block);
+        h1 = mix_h1(h1, k1);
+    }
+    h1
+}
+
+#[inline(always)]
+fn mix_k1(k1: i32) -> i32 {
+    k1.wrapping_mul(C1).rotate_left(R1).wrapping_mul(C2)
+}
+
+#[inline(always)]
+fn mix_h1(h1: i32, k1: i32) -> i32 {
+    (h1 ^ k1).rotate_left(R2).wrapping_mul(M).wrapping_add(N)
+}
+
+// Finalization mix - force all bits of a hash block to avalanche
+#[inline(always)]
+fn fmix(mut h1: i32, length: usize) -> i32 {
+    h1 ^= length as i32;
+    bit_mix(h1)
+}
+
+/// Hashes an i32 using Flink's variant of Murmur
+///
+/// # Arguments
+/// * `code` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn flink_hash_i32(code: i32) -> i32 {
+    let mut code = code.wrapping_mul(C1);
+    code = code.rotate_left(R1);
+    code = code.wrapping_mul(C2);
+    code = code.rotate_left(R2);
+
+    code = code.wrapping_mul(M).wrapping_add(N);
+    code ^= CHUNK_SIZE as i32;
+    code = bit_mix(code);
+
+    if code >= 0 {
+        code
+    } else if code != i32::MIN {
+        -code
+    } else {
+        0
+    }
+}
+
+const BIT_MIX_A: i32 = 0x85EB_CA6Bu32 as i32;
+const BIT_MIX_B: i32 = 0xC2B2_AE35u32 as i32;
+
+#[inline(always)]
+fn bit_mix(mut input: i32) -> i32 {
+    input = input ^ ((input as u32) >> 16) as i32;
+    input = input.wrapping_mul(BIT_MIX_A);
+    input = input ^ ((input as u32) >> 13) as i32;
+    input = input.wrapping_mul(BIT_MIX_B);
+    input = input ^ ((input as u32) >> 16) as i32;
+    input
+}

Review Comment:
   The comment incorrectly refers to "code" as the parameter name when the 
actual parameter is named "input". Update the comment to match the actual 
parameter name.



##########
crates/fluss/src/util/murmur_hash.rs:
##########
@@ -0,0 +1,211 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* This file is based on source code of Apache Flink Project 
(https://flink.apache.org/), licensed by the Apache
+ * Software Foundation (ASF) under the Apache License, Version 2.0. See the 
NOTICE file distributed with this work for
+ * additional information regarding copyright ownership. */
+
+pub const MURMUR3_DEFAULT_SEED: i32 = 0;
+pub const FLINK_MURMUR3_DEFAULT_SEED: i32 = 42;
+
+const C1: i32 = 0xCC9E_2D51_u32 as i32;
+const C2: i32 = 0x1B87_3593;
+const R1: u32 = 15;
+const R2: u32 = 13;
+const M: i32 = 5;
+const N: i32 = 0xE654_6B64_u32 as i32;
+const CHUNK_SIZE: usize = 4;
+
+/// Hashes the data using 32-bit Murmur3 hash with 0 as seed
+///
+/// # Arguments
+/// * `data` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn hash_bytes(data: &[u8]) -> i32 {
+    hash_bytes_with_seed(data, MURMUR3_DEFAULT_SEED)
+}
+
+#[inline(always)]
+fn hash_bytes_with_seed(data: &[u8], seed: i32) -> i32 {
+    let length = data.len();
+    let chunks = length / CHUNK_SIZE;
+    let length_aligned = chunks * CHUNK_SIZE;
+
+    let mut h1 = hash_full_chunks(data, seed, length_aligned);
+    let mut k1 = 0i32;
+
+    for (shift, &b) in data[length_aligned..].iter().enumerate() {
+        k1 |= (b as i32) << (8 * shift);
+    }
+
+    h1 ^= k1.wrapping_mul(C1).rotate_left(R1).wrapping_mul(C2);
+
+    fmix(h1, length)
+}
+
+/// Hashes the data using Flink's variant of 32-bit Murmur hash with 42 as 
seed and tail bytes mixed into hash byte-by-byte
+///
+/// # Arguments
+/// * `data` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn flink_hash_bytes(data: &[u8]) -> i32 {
+    flink_hash_bytes_with_seed(data, FLINK_MURMUR3_DEFAULT_SEED)
+}
+#[inline(always)]
+fn flink_hash_bytes_with_seed(data: &[u8], seed: i32) -> i32 {
+    let length = data.len();
+    let chunks = length / CHUNK_SIZE;
+    let length_aligned = chunks * CHUNK_SIZE;
+
+    let mut h1 = hash_full_chunks(data, seed, length_aligned);
+
+    #[allow(clippy::needless_range_loop)]
+    for index in length_aligned..length {
+        let byte = i32::from(data[index]);
+        let k1 = mix_k1(byte);
+        h1 = mix_h1(h1, k1);
+    }
+
+    fmix(h1, length)
+}
+
+#[inline(always)]
+fn hash_full_chunks(data: &[u8], seed: i32, length_aligned: usize) -> i32 {
+    let mut h1 = seed;
+
+    for i in 0..length_aligned / CHUNK_SIZE {
+        let offset = i * 4;
+        let block = i32::from_le_bytes(data[offset..offset + 
4].try_into().unwrap());
+        let k1 = mix_k1(block);
+        h1 = mix_h1(h1, k1);
+    }
+    h1
+}
+
+#[inline(always)]
+fn mix_k1(k1: i32) -> i32 {
+    k1.wrapping_mul(C1).rotate_left(R1).wrapping_mul(C2)
+}
+
+#[inline(always)]
+fn mix_h1(h1: i32, k1: i32) -> i32 {
+    (h1 ^ k1).rotate_left(R2).wrapping_mul(M).wrapping_add(N)
+}
+
+// Finalization mix - force all bits of a hash block to avalanche
+#[inline(always)]
+fn fmix(mut h1: i32, length: usize) -> i32 {
+    h1 ^= length as i32;
+    bit_mix(h1)
+}
+
+/// Hashes an i32 using Flink's variant of Murmur
+///
+/// # Arguments
+/// * `code` - byte array containing data to be hashed
+///
+/// # Returns
+/// Returns hash value
+pub fn flink_hash_i32(code: i32) -> i32 {

Review Comment:
   The documentation comment incorrectly describes the parameter as "byte array 
containing data to be hashed" when the parameter is actually named "code" and 
is an i32 value, not a byte array.



##########
crates/fluss/src/bucketing/mod.rs:
##########
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::metadata::DataLakeFormat;
+use crate::util::murmur_hash;
+
+pub trait BucketingFunction: Sync + Send {
+    fn bucketing(&self, bucket_key: &[u8], num_buckets: i32) -> i32;
+}
+
+#[allow(dead_code)]
+impl dyn BucketingFunction {
+    pub fn of(lake_format: Option<&DataLakeFormat>) -> Box<dyn 
BucketingFunction> {
+        match lake_format {
+            None => Box::new(FlussBucketingFunction),
+            Some(DataLakeFormat::PAIMON) => Box::new(PaimonBucketingFunction),
+            Some(DataLakeFormat::LANCE) => Box::new(FlussBucketingFunction),
+            Some(DataLakeFormat::ICEBERG) => 
Box::new(IcebergBucketingFunction),
+        }
+    }
+}

Review Comment:
   The BucketingFunction trait and the of() method lack documentation 
explaining their purpose and usage. Add doc comments describing what the trait 
represents, what the bucketing method does, and how the of() factory method 
selects different implementations based on the lake format.



##########
crates/fluss/src/metadata/data_lake_format.rs:
##########
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub enum DataLakeFormat {
+    PAIMON,
+    LANCE,
+    ICEBERG,

Review Comment:
   The enum variants use SCREAMING_SNAKE_CASE (PAIMON, LANCE, ICEBERG) which 
violates Rust naming conventions. Enum variants should use PascalCase. Change 
these to Paimon, Lance, and Iceberg. Note that this will also require updating 
all match patterns in bucketing/mod.rs lines 30-32.
   ```suggestion
       Paimon,
       Lance,
       Iceberg,
   ```



##########
crates/fluss/src/metadata/data_lake_format.rs:
##########
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub enum DataLakeFormat {
+    PAIMON,
+    LANCE,

Review Comment:
   The DataLakeFormat enum lacks documentation explaining what it represents 
and how it's used. Add a doc comment describing the purpose of this enum and 
what each variant represents.
   ```suggestion
   
   /// Identifies the logical format of a data lake table supported by this 
crate.
   /// 
   /// This enum is typically used in metadata and configuration to distinguish
   /// between different table formats so that the appropriate integration and
   /// semantics can be applied.
   pub enum DataLakeFormat {
       /// Apache Paimon data lake table format.
       PAIMON,
       /// Lance columnar data format / lakehouse table format.
       LANCE,
       /// Apache Iceberg data lake table format.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Support put KV: Add HashBucketAssigner [fluss-rust]

Reply via email to