This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 593be85 feat: introduce manifest (#94)
593be85 is described below
commit 593be85827ca584a62696ff4f427071277acc767
Author: yuxia Luo <[email protected]>
AuthorDate: Mon Mar 2 11:55:20 2026 +0800
feat: introduce manifest (#94)
---
crates/paimon/src/spec/manifest.rs | 100 +++++++++++++++++++++++++++++++
crates/paimon/src/spec/manifest_entry.rs | 4 +-
crates/paimon/src/spec/mod.rs | 2 +-
3 files changed, 103 insertions(+), 3 deletions(-)
diff --git a/crates/paimon/src/spec/manifest.rs
b/crates/paimon/src/spec/manifest.rs
new file mode 100644
index 0000000..ff88510
--- /dev/null
+++ b/crates/paimon/src/spec/manifest.rs
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::io::FileIO;
+use crate::spec::manifest_entry::ManifestEntry;
+use apache_avro::types::Value;
+use apache_avro::{from_value, Reader};
+
+use crate::Error;
+use crate::Result;
+
+/// Manifest file reader and writer.
+///
+/// A manifest file contains a list of ManifestEntry records in Avro format.
+/// Each entry represents an addition or deletion of a data file.
+///
+/// Impl Reference:
<https://github.com/apache/paimon/blob/release-1.3/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java>
+pub struct Manifest;
+
+#[allow(dead_code)]
+impl Manifest {
+ /// Read manifest entries from a file.
+ ///
+ /// # Arguments
+ /// * `file_io` - FileIO instance for reading files
+ /// * `path` - Path to the manifest file
+ ///
+ /// # Returns
+ /// A vector of ManifestEntry records
+ pub async fn read(file_io: &FileIO, path: &str) ->
Result<Vec<ManifestEntry>> {
+ let input_file = file_io.new_input(path)?;
+
+ if !input_file.exists().await? {
+ return Ok(Vec::new());
+ }
+
+ let content = input_file.read().await?;
+ Self::read_from_bytes(&content)
+ }
+
+ /// Read manifest entries from bytes.
+ ///
+ /// # Arguments
+ /// * `bytes` - Avro-encoded manifest file content
+ ///
+ /// # Returns
+ /// A vector of ManifestEntry records
+ fn read_from_bytes(bytes: &[u8]) -> Result<Vec<ManifestEntry>> {
+ let reader = Reader::new(bytes).map_err(Error::from)?;
+ let records = reader
+ .collect::<std::result::Result<Vec<Value>, _>>()
+ .map_err(Error::from)?;
+ let values = Value::Array(records);
+ from_value::<Vec<ManifestEntry>>(&values).map_err(Error::from)
+ }
+}
+
+#[cfg(test)]
+#[cfg(not(windows))] // Skip on Windows due to path compatibility issues
+mod tests {
+ use super::*;
+ use crate::io::FileIO;
+ use crate::spec::manifest_common::FileKind;
+ use std::env::current_dir;
+
+ #[tokio::test]
+ async fn test_read_manifest_from_file() {
+ let workdir = current_dir().unwrap();
+ let path =
+
workdir.join("tests/fixtures/manifest/manifest-8ded1f09-fcda-489e-9167-582ac0f9f846-0");
+
+ let file_io = FileIO::from_url("file://").unwrap().build().unwrap();
+ let entries = Manifest::read(&file_io, path.to_str().unwrap())
+ .await
+ .unwrap();
+ assert_eq!(entries.len(), 2);
+ // verify manifest entry
+ let t1 = &entries[0];
+ assert_eq!(t1.kind(), &FileKind::Delete);
+ assert_eq!(t1.bucket(), 1);
+
+ let t2 = &entries[1];
+ assert_eq!(t2.kind(), &FileKind::Add);
+ assert_eq!(t2.bucket(), 2);
+ }
+}
diff --git a/crates/paimon/src/spec/manifest_entry.rs
b/crates/paimon/src/spec/manifest_entry.rs
index df1dfab..c7a3b82 100644
--- a/crates/paimon/src/spec/manifest_entry.rs
+++ b/crates/paimon/src/spec/manifest_entry.rs
@@ -55,7 +55,7 @@ pub struct ManifestEntry {
#[allow(dead_code)]
impl ManifestEntry {
- fn kind(&self) -> &FileKind {
+ pub(crate) fn kind(&self) -> &FileKind {
&self.kind
}
@@ -63,7 +63,7 @@ impl ManifestEntry {
&self.partition
}
- fn bucket(&self) -> i32 {
+ pub(crate) fn bucket(&self) -> i32 {
self.bucket
}
diff --git a/crates/paimon/src/spec/mod.rs b/crates/paimon/src/spec/mod.rs
index 7f2f9c6..76d5614 100644
--- a/crates/paimon/src/spec/mod.rs
+++ b/crates/paimon/src/spec/mod.rs
@@ -38,10 +38,10 @@ mod index_file_meta;
pub use index_file_meta::*;
mod index_manifest;
+mod manifest;
mod manifest_common;
mod manifest_entry;
mod objects_file;
mod stats;
mod types;
-
pub use types::*;