This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 41b29ae  refactor: use `anyhow` for generic errors (#26)
41b29ae is described below

commit 41b29ae6d445266ba97b044025cd3ac3d5e7835e
Author: Shiyan Xu <[email protected]>
AuthorDate: Sat Jun 22 17:19:08 2024 -0500

    refactor: use `anyhow` for generic errors (#26)
---
 Cargo.toml                        |  1 -
 crates/core/Cargo.toml            |  1 -
 crates/core/src/error.rs          | 36 ------------------------------------
 crates/core/src/file_group/mod.rs | 31 +++++++++++++++++++++----------
 crates/core/src/lib.rs            |  1 -
 crates/core/src/table/config.rs   | 15 ++++++++-------
 crates/core/src/table/mod.rs      | 10 +++++-----
 crates/core/src/timeline/mod.rs   | 31 ++++++++++++++++---------------
 crates/datafusion/Cargo.toml      |  1 -
 crates/fs/Cargo.toml              |  1 -
 10 files changed, 50 insertions(+), 78 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 4a9f419..fa5f51b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,7 +62,6 @@ bytes = { version = "1" }
 chrono = { version = "=0.4.34", default-features = false, features = ["clock"] 
}
 tracing = { version = "0.1", features = ["log"] }
 regex = { version = "1" }
-thiserror = { version = "1" }
 url = { version = "2" }
 uuid = { version = "1" }
 
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index f97a526..04da3f2 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -61,7 +61,6 @@ bytes = { workspace = true }
 chrono = { workspace = true, default-features = false, features = ["clock"] }
 hashbrown = "0.14.3"
 regex = { workspace = true }
-thiserror = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
 
diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs
deleted file mode 100644
index e8f76c9..0000000
--- a/crates/core/src/error.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-use std::fmt::Debug;
-
-use thiserror::Error;
-
-#[derive(Debug, Error)]
-pub enum HudiFileGroupError {
-    #[error("Commit time {0} is already present in File Group {1}")]
-    CommitTimeAlreadyExists(String, String),
-}
-
-#[derive(Debug, Error)]
-pub enum HudiCoreError {
-    #[error("Failed to load file group")]
-    FailToLoadFileGroup(#[from] HudiFileGroupError),
-    #[error("Failed to load table properties")]
-    LoadTablePropertiesError,
-}
diff --git a/crates/core/src/file_group/mod.rs 
b/crates/core/src/file_group/mod.rs
index b71c0ce..b0d791f 100644
--- a/crates/core/src/file_group/mod.rs
+++ b/crates/core/src/file_group/mod.rs
@@ -21,9 +21,8 @@ use std::collections::BTreeMap;
 use std::fmt;
 use std::fmt::Formatter;
 
-use crate::error::HudiFileGroupError;
-use crate::error::HudiFileGroupError::CommitTimeAlreadyExists;
 use crate::storage::file_metadata::FileMetadata;
+use anyhow::{anyhow, Result};
 
 #[derive(Clone, Debug)]
 pub struct BaseFile {
@@ -94,7 +93,6 @@ impl fmt::Display for FileGroup {
     }
 }
 
-#[allow(dead_code)]
 impl FileGroup {
     pub fn new(id: String, partition_path: Option<String>) -> Self {
         Self {
@@ -104,20 +102,19 @@ impl FileGroup {
         }
     }
 
-    pub fn add_base_file_from_name(
-        &mut self,
-        file_name: &str,
-    ) -> Result<&Self, HudiFileGroupError> {
+    #[allow(dead_code)]
+    pub fn add_base_file_from_name(&mut self, file_name: &str) -> 
Result<&Self> {
         let base_file = BaseFile::new(file_name);
         self.add_base_file(base_file)
     }
 
-    pub fn add_base_file(&mut self, base_file: BaseFile) -> Result<&Self, 
HudiFileGroupError> {
+    pub fn add_base_file(&mut self, base_file: BaseFile) -> Result<&Self> {
         let commit_time = base_file.commit_time.as_str();
         if self.file_slices.contains_key(commit_time) {
-            Err(CommitTimeAlreadyExists(
+            Err(anyhow!(
+                "Commit time {0} is already present in File Group {1}",
                 commit_time.to_owned(),
-                self.to_string(),
+                self.id,
             ))
         } else {
             self.file_slices.insert(
@@ -170,4 +167,18 @@ mod tests {
             "20240402144910683"
         )
     }
+
+    #[test]
+    fn add_base_file_with_same_commit_time_should_fail() {
+        let mut fg = 
FileGroup::new("5a226868-2934-4f84-a16f-55124630c68d-0".to_owned(), None);
+        let res1 = fg.add_base_file_from_name(
+            
"5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet",
+        );
+        assert!(res1.is_ok());
+        let res2 = fg.add_base_file_from_name(
+            
"5a226868-2934-4f84-a16f-55124630c68d-0_2-10-0_20240402144910683.parquet",
+        );
+        assert!(res2.is_err());
+        assert_eq!(res2.unwrap_err().to_string(), "Commit time 
20240402144910683 is already present in File Group 
5a226868-2934-4f84-a16f-55124630c68d-0");
+    }
 }
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index 3198cbc..aa7d66a 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -19,7 +19,6 @@
 
 use crate::table::Table;
 
-mod error;
 mod file_group;
 pub mod table;
 pub type HudiTable = Table;
diff --git a/crates/core/src/table/config.rs b/crates/core/src/table/config.rs
index 2f64cc0..6a1a6ce 100644
--- a/crates/core/src/table/config.rs
+++ b/crates/core/src/table/config.rs
@@ -17,7 +17,8 @@
  * under the License.
  */
 
-use crate::error::HudiCoreError;
+use anyhow::anyhow;
+use anyhow::Result;
 use std::str::FromStr;
 
 pub enum ConfigKey {
@@ -67,13 +68,13 @@ pub enum TableType {
 }
 
 impl FromStr for TableType {
-    type Err = HudiCoreError;
+    type Err = anyhow::Error;
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
+    fn from_str(s: &str) -> Result<Self> {
         match s.to_ascii_lowercase().as_str() {
             "copy_on_write" | "copy-on-write" | "cow" => Ok(Self::CopyOnWrite),
             "merge_on_read" | "merge-on-read" | "mor" => Ok(Self::MergeOnRead),
-            _ => Err(HudiCoreError::LoadTablePropertiesError),
+            _ => Err(anyhow!("Unsupported table type: {}", s)),
         }
     }
 }
@@ -84,12 +85,12 @@ pub enum BaseFileFormat {
 }
 
 impl FromStr for BaseFileFormat {
-    type Err = HudiCoreError;
+    type Err = anyhow::Error;
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
+    fn from_str(s: &str) -> Result<Self> {
         match s.to_ascii_lowercase().as_str() {
             "parquet" => Ok(Self::Parquet),
-            _ => Err(HudiCoreError::LoadTablePropertiesError),
+            _ => Err(anyhow!("Unsupported base file format: {}", s)),
         }
     }
 }
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index d2584cd..50f5289 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -17,8 +17,8 @@
  * under the License.
  */
 
+use anyhow::Result;
 use std::collections::HashMap;
-use std::error::Error;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use std::path::{Path, PathBuf};
@@ -55,7 +55,7 @@ impl Table {
         }
     }
 
-    fn load_properties(path: &Path) -> Result<HashMap<String, String>, 
std::io::Error> {
+    fn load_properties(path: &Path) -> Result<HashMap<String, String>> {
         let file = File::open(path)?;
         let reader = BufReader::new(file);
         let lines = reader.lines();
@@ -81,7 +81,7 @@ impl Table {
         }
     }
 
-    pub fn get_timeline(&self) -> Result<Timeline, std::io::Error> {
+    pub fn get_timeline(&self) -> Result<Timeline> {
         Timeline::new(self.base_path.as_path())
     }
 
@@ -99,7 +99,7 @@ impl Table {
         }
     }
 
-    pub fn get_latest_file_slices(&self) -> Result<Vec<FileSlice>, Box<dyn 
Error>> {
+    pub fn get_latest_file_slices(&self) -> Result<Vec<FileSlice>> {
         let mut file_slices = Vec::new();
         let mut fs_view = FileSystemView::new(self.base_path.as_path());
         for f in fs_view.get_latest_file_slices() {
@@ -108,7 +108,7 @@ impl Table {
         Ok(file_slices)
     }
 
-    pub fn get_latest_file_paths(&self) -> Result<Vec<String>, Box<dyn Error>> 
{
+    pub fn get_latest_file_paths(&self) -> Result<Vec<String>> {
         let mut file_paths = Vec::new();
         for f in self.get_latest_file_slices()? {
             if let Some(f) = f.base_file_path() {
diff --git a/crates/core/src/timeline/mod.rs b/crates/core/src/timeline/mod.rs
index 1cd9e13..04e66c8 100644
--- a/crates/core/src/timeline/mod.rs
+++ b/crates/core/src/timeline/mod.rs
@@ -17,16 +17,18 @@
  * under the License.
  */
 
-use hudi_fs::file_name_without_ext;
 use std::collections::HashMap;
+use std::fs;
+use std::fs::File;
+use std::io::Read;
+use std::path::{Path, PathBuf};
 
+use anyhow::{anyhow, Result};
 use arrow_schema::SchemaRef;
 use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
 use serde_json::Value;
-use std::fs::File;
-use std::io::{ErrorKind, Read};
-use std::path::{Path, PathBuf};
-use std::{fs, io};
+
+use hudi_fs::file_name_without_ext;
 
 #[allow(dead_code)]
 #[derive(Debug, Clone, PartialEq)]
@@ -64,7 +66,7 @@ pub struct Timeline {
 }
 
 impl Timeline {
-    pub fn new(base_path: &Path) -> Result<Self, io::Error> {
+    pub fn new(base_path: &Path) -> Result<Self> {
         let instants = Self::load_completed_commit_instants(base_path)?;
         Ok(Self {
             base_path: base_path.to_path_buf(),
@@ -72,7 +74,7 @@ impl Timeline {
         })
     }
 
-    fn load_completed_commit_instants(base_path: &Path) -> 
Result<Vec<Instant>, io::Error> {
+    fn load_completed_commit_instants(base_path: &Path) -> 
Result<Vec<Instant>> {
         let mut completed_commits = Vec::new();
         let mut timeline_path = base_path.to_path_buf();
         timeline_path.push(".hoodie");
@@ -91,7 +93,7 @@ impl Timeline {
         Ok(completed_commits)
     }
 
-    pub fn get_latest_commit_metadata(&self) -> Result<HashMap<String, Value>, 
io::Error> {
+    pub fn get_latest_commit_metadata(&self) -> Result<HashMap<String, Value>> 
{
         match self.instants.iter().next_back() {
             Some(instant) => {
                 let mut latest_instant_file_path = 
self.base_path.to_path_buf();
@@ -107,7 +109,7 @@ impl Timeline {
         }
     }
 
-    pub fn get_latest_schema(&self) -> Result<SchemaRef, io::Error> {
+    pub fn get_latest_schema(&self) -> Result<SchemaRef> {
         let commit_metadata = self.get_latest_commit_metadata()?;
         if let Some(partition_to_write_stats) = 
commit_metadata["partitionToWriteStats"].as_object()
         {
@@ -123,19 +125,18 @@ impl Timeline {
                 }
             }
         }
-        Err(io::Error::new(
-            ErrorKind::InvalidData,
-            "Failed to resolve schema.",
-        ))
+        Err(anyhow!("Failed to resolve schema."))
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::timeline::{Instant, State, Timeline};
-    use hudi_fs::test_utils::extract_test_table;
     use std::path::Path;
 
+    use hudi_fs::test_utils::extract_test_table;
+
+    use crate::timeline::{Instant, State, Timeline};
+
     #[test]
     fn read_latest_schema() {
         let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml
index 5d9efc7..0a042ab 100644
--- a/crates/datafusion/Cargo.toml
+++ b/crates/datafusion/Cargo.toml
@@ -63,7 +63,6 @@ bytes = { workspace = true }
 chrono = { workspace = true, default-features = false, features = ["clock"] }
 hashbrown = "0.14.3"
 regex = { workspace = true }
-thiserror = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
 
diff --git a/crates/fs/Cargo.toml b/crates/fs/Cargo.toml
index cd55c31..84d4950 100644
--- a/crates/fs/Cargo.toml
+++ b/crates/fs/Cargo.toml
@@ -58,7 +58,6 @@ bytes = { workspace = true }
 chrono = { workspace = true, default-features = false, features = ["clock"] }
 hashbrown = "0.14.3"
 regex = { workspace = true }
-thiserror = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
 

Reply via email to