Xuanwo commented on code in PR #59:
URL: https://github.com/apache/paimon-rust/pull/59#discussion_r1728658418


##########
crates/paimon/src/error.rs:
##########
@@ -52,6 +52,14 @@ pub enum Error {
         display("Paimon hitting invalid config: {}", message)
     )]
     ConfigInvalid { message: String },
+    #[snafu(
+        visibility(pub(crate)),
+        display("Paimon hitting unexpected avro error {}: {:?}", message, 
source)
+    )]
+    AvroFailed {

Review Comment:
   I believe users don't need to be aware of `AvroFailed`. How about using 
`DataInvalid` or `DataUnexpected` until we determine which is more appropriate?



##########
crates/paimon/src/spec/manifest_list.rs:
##########
@@ -16,15 +16,133 @@
 // under the License.
 
 use super::manifest_file_meta::ManifestFileMeta;
+use crate::io::FileIO;
+use crate::{Error, Result};
+use apache_avro::types::Value;
+use apache_avro::{from_value, Reader};
+use serde::{Deserialize, Serialize};
 
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(transparent)]
 /// This file includes several [`ManifestFileMeta`], representing all data of 
the whole table at the corresponding snapshot.
-pub struct ManifestList {}
+pub struct ManifestList {
+    entries: Vec<ManifestFileMeta>,
+}
 
 impl ManifestList {
+    pub fn entries(&self) -> &Vec<ManifestFileMeta> {
+        &self.entries
+    }
+
+    pub fn from_avro_bytes(bytes: &[u8]) -> Result<ManifestList> {
+        let reader = Reader::new(bytes).map_err(Error::from)?;
+        let records = reader
+            .collect::<std::result::Result<Vec<Value>, _>>()
+            .map_err(Error::from)?;
+        let values = Value::Array(records);
+        from_value::<ManifestList>(&values).map_err(Error::from)
+    }
+}
+
+pub struct ManifestListFactory {
+    file_io: FileIO,
+}
+
+/// The factory to read and write [`ManifestList`]
+impl ManifestListFactory {
+    pub fn new(file_io: FileIO) -> ManifestListFactory {
+        Self { file_io }
+    }
+
     /// Write several [`ManifestFileMeta`]s into a manifest list.
     ///
     /// NOTE: This method is atomic.
     pub fn write(&mut self, _metas: Vec<ManifestFileMeta>) -> &str {
         todo!()
     }
+
+    /// Read [`ManifestList`] from the manifest file.
+    pub async fn read(&self, path: &str) -> Result<ManifestList> {
+        let bs = self.file_io.new_input(path)?.read().await?;
+        // todo support other formats
+        ManifestList::from_avro_bytes(bs.as_ref())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::spec::ManifestList;
+
+    #[tokio::test]
+    async fn test_read_manifest_list() {
+        let workdir =
+            std::env::current_dir().unwrap_or_else(|err| panic!("current_dir 
must exist: {err}"));
+        let path = workdir
+            
.join("tests/fixtures/manifest/manifest-list-5c7399a0-46ae-4a5e-9c13-3ab07212cdb6-0");
+        let v = std::fs::read(path.to_str().unwrap()).unwrap();
+        let res = ManifestList::from_avro_bytes(&v).unwrap();
+        assert_eq!(res.entries().len(), 2);
+        assert_eq!(
+            res.entries().first().unwrap().file_name(),
+            "manifest-7ff677cc-46c0-497d-8feb-1c6785707a4b"
+        );
+        assert_eq!(res.entries().first().unwrap().version(), 2);

Review Comment:
   How about `assert_eq!(res, ManifestList { ... })` directly? I think we can 
derive `Eq` for ManifestList.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to