Xuanwo commented on code in PR #15:
URL: https://github.com/apache/paimon-rust/pull/15#discussion_r1669549883


##########
Makefile:
##########


Review Comment:
   Using a `Makefile` in Rust projects is uncommon. I recommend using 
[cargo-make](https://github.com/sagiegurari/cargo-make) instead. Additionally, 
we can separate this part into a new PR rather than including it with the 
data_file spec.



##########
.gitignore:
##########
@@ -17,3 +17,10 @@
 
 /target
 /Cargo.lock
+
+# Mac DS_Store
+**/*.DS_Store

Review Comment:
   Let's rebase this PR.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;

Review Comment:
   Please use `super::schema::DataField` for short.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {

Review Comment:
   Please add `#[serde(rename_all("camelCase"))]` to convert all fields to 
`camelCase`.
   
   https://serde.rs/container-attrs.html#rename_all



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: u64,
+    // row_count tells the total number of rows (including add & delete) in 
this file.
+    pub row_count: u64,
+    // rowCount = add_row_count + delete_row_count.
+    pub delete_row_count: u64,
+    pub min_key: Option<BinaryRow>,
+    pub max_key: Option<BinaryRow>,
+    pub key_stats: Option<SimpleStats>,
+    pub value_stats: SimpleStats,
+    pub min_seq: u64,
+    pub max_seq: u64,
+    pub schema_id: u64,
+    pub level: u32,
+    pub extra_files: Option<Vec<String>>,
+    pub creation_time: Timestamp,
+    // file index filter bytes, if it is small, store in data file meta
+    pub embedded_index: Option<Vec<u8>>,
+    pub file_source: Option<FileSource>,
+}
+
+impl Display for DataFileMeta {
+    fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
+        todo!()
+    }
+}
+
+impl PartialEq for DataFileMeta {

Review Comment:
   Most of the time, we can derive `PartialEq` like we do for `Clone`



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;

Review Comment:
   We can use 
[chrono::DateTime](https://docs.rs/chrono/latest/chrono/struct.DateTime.html) 
with UTC timezone.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {

Review Comment:
   We are porting the Java implementation to Rust. It would be helpful to 
provide a link to the implementation we are referring to for clearer 
understanding.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,

Review Comment:
   My current understanding is that `nullable` is handled and stored in 
`DataType`, so we don't need to maintain it here.
   



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,

Review Comment:
   Please maintain consistent naming conventions.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: u64,
+    // row_count tells the total number of rows (including add & delete) in 
this file.
+    pub row_count: u64,
+    // rowCount = add_row_count + delete_row_count.
+    pub delete_row_count: u64,
+    pub min_key: Option<BinaryRow>,
+    pub max_key: Option<BinaryRow>,
+    pub key_stats: Option<SimpleStats>,

Review Comment:
   key_stats won't be None.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: u64,
+    // row_count tells the total number of rows (including add & delete) in 
this file.
+    pub row_count: u64,
+    // rowCount = add_row_count + delete_row_count.
+    pub delete_row_count: u64,
+    pub min_key: Option<BinaryRow>,
+    pub max_key: Option<BinaryRow>,
+    pub key_stats: Option<SimpleStats>,
+    pub value_stats: SimpleStats,
+    pub min_seq: u64,
+    pub max_seq: u64,
+    pub schema_id: u64,
+    pub level: u32,
+    pub extra_files: Option<Vec<String>>,

Review Comment:
   We don't need the extra `Option` here.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: u64,
+    // row_count tells the total number of rows (including add & delete) in 
this file.
+    pub row_count: u64,
+    // rowCount = add_row_count + delete_row_count.
+    pub delete_row_count: u64,
+    pub min_key: Option<BinaryRow>,
+    pub max_key: Option<BinaryRow>,
+    pub key_stats: Option<SimpleStats>,
+    pub value_stats: SimpleStats,
+    pub min_seq: u64,

Review Comment:
   Please make sure the fields' name aligned.



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: u64,

Review Comment:
   Java's long should ususally be treated as `i64` in rust?



##########
crates/paimon/src/spec/data_file.rs:
##########
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::schema::DataField;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    fields: Vec<DataField>,
+    nullable: bool,
+}
+
+impl Default for RowType {
+    fn default() -> Self {
+        Self {
+            fields: vec![],
+            nullable: true,
+        }
+    }
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self {
+            fields: list,
+            nullable: true,
+        }
+    }
+    pub fn with_data_fields<F: Into<Vec<DataField>>>(list: F) -> Self {
+        Self {
+            fields: list.into(),
+            nullable: true,
+        }
+    }
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+// TODO: move me to a better place.
+#[derive(Debug, Eq, PartialEq)]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+type SimpleStats = ();
+
+type Timestamp = u64;
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum FileSource {
+    Append = 0,
+    COMPACT = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, Serialize, Deserialize)]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: u64,
+    // row_count tells the total number of rows (including add & delete) in 
this file.
+    pub row_count: u64,
+    // rowCount = add_row_count + delete_row_count.
+    pub delete_row_count: u64,

Review Comment:
   delete_row_count chould be `None`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to