This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 0ff9a96  feat(spec): Add DataFileMeta (#15)
0ff9a96 is described below

commit 0ff9a968217daa95201d417e09370de4e4a45133
Author: Ryan Tan <[email protected]>
AuthorDate: Wed Jul 24 09:59:26 2024 +0800

    feat(spec): Add DataFileMeta (#15)
---
 crates/paimon/Cargo.toml            |   1 +
 crates/paimon/src/spec/data_file.rs | 125 ++++++++++++++++++++++++++++++++++++
 crates/paimon/src/spec/mod.rs       |   3 +
 3 files changed, 129 insertions(+)

diff --git a/crates/paimon/Cargo.toml b/crates/paimon/Cargo.toml
index aae592d..6d057d5 100644
--- a/crates/paimon/Cargo.toml
+++ b/crates/paimon/Cargo.toml
@@ -27,6 +27,7 @@ license.workspace = true
 version.workspace = true
 
 [dependencies]
+chrono = {version = "0.4.38", features = ["serde"]}
 serde = { version = "1", features = ["derive"] }
 serde_with = "3.8.3"
 snafu = "0.8.3"
diff --git a/crates/paimon/src/spec/data_file.rs 
b/crates/paimon/src/spec/data_file.rs
new file mode 100644
index 0000000..4f6c41f
--- /dev/null
+++ b/crates/paimon/src/spec/data_file.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::schema::DataField;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+/// Data type of a sequence of fields. A field consists of a field name, field 
type, and an optional
+/// description. The most specific type of a row of a table is a row type. In 
this case, each column
+/// of the row corresponds to the field of the row type that has the same 
ordinal position as the
+/// column. Compared to the SQL standard, an optional field description 
simplifies the handling with
+/// complex structures.
+///
+/// Impl Reference: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/RowType.java>
+///
+/// TODO: make RowType extends DataType.
+/// TODO: move me to a better place.
+pub struct RowType {
+    _fields: Vec<DataField>,
+}
+
+impl RowType {
+    pub const fn new(list: Vec<DataField>) -> Self {
+        Self { _fields: list }
+    }
+}
+
+pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0);
+
+/// An implementation of InternalRow.
+///
+/// Impl Reference: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/data/BinaryRow.java>
+#[derive(Debug, Eq, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct BinaryRow {
+    arity: i32,
+    null_bits_size_in_bytes: i32,
+}
+
+impl BinaryRow {
+    pub const HEADER_SIZE_IN_BYTES: i32 = 8;
+    pub const fn cal_bit_set_width_in_bytes(arity: i32) -> i32 {
+        ((arity + 63 + Self::HEADER_SIZE_IN_BYTES) / 64) * 8
+    }
+    pub const fn cal_fix_part_size_in_bytes(arity: i32) -> i32 {
+        Self::cal_bit_set_width_in_bytes(arity) + 8 * arity
+    }
+    pub const fn new(arity: i32) -> Self {
+        Self {
+            arity,
+            null_bits_size_in_bytes: (arity + 7) / 8,
+        }
+    }
+}
+
+/// TODO: implement me.
+/// The statistics for columns, supports the following stats.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/stats/SimpleStats.java>
+type SimpleStats = ();
+
+/// The Source of a file.
+/// TODO: move me to the manifest module.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/manifest/FileSource.java>
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub enum FileSource {
+    Append = 0,
+    Compact = 1,
+}
+
+/// Metadata of a data file.
+///
+/// Impl References: 
<https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-core/src/main/java/org/apache/paimon/io/DataFileMeta.java>
+#[derive(Debug, Eq, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct DataFileMeta {
+    pub file_name: String,
+    pub file_size: i64,
+    // row_count tells the total number of rows (including add & delete) in 
this file.
+    pub row_count: i64,
+    pub min_key: BinaryRow,
+    pub max_key: BinaryRow,
+    pub key_stats: SimpleStats,
+    pub value_stats: SimpleStats,
+    pub min_sequence_number: i64,
+    pub max_sequence_number: i64,
+    pub schema_id: i64,
+    pub level: i32,
+    pub extra_files: Vec<String>,
+    pub creation_time: DateTime<Utc>,
+    // rowCount = add_row_count + delete_row_count.
+    pub delete_row_count: Option<i64>,
+    // file index filter bytes, if it is small, store in data file meta
+    pub embedded_index: Option<Vec<u8>>,
+    pub file_source: Option<FileSource>,
+}
+
+impl Display for DataFileMeta {
+    fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
+        todo!()
+    }
+}
+
+impl DataFileMeta {
+    // TODO: implement me
+    pub const SCHEMA: RowType = RowType::new(vec![]);
+}
diff --git a/crates/paimon/src/spec/mod.rs b/crates/paimon/src/spec/mod.rs
index b4b8370..eb25755 100644
--- a/crates/paimon/src/spec/mod.rs
+++ b/crates/paimon/src/spec/mod.rs
@@ -19,6 +19,9 @@
 //!
 //! All paimon specs types are defined here.
 
+mod data_file;
+pub use data_file::*;
+
 mod schema;
 pub use schema::*;
 

Reply via email to