This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 41b29ae refactor: use `anyhow` for generic errors (#26)
41b29ae is described below
commit 41b29ae6d445266ba97b044025cd3ac3d5e7835e
Author: Shiyan Xu <[email protected]>
AuthorDate: Sat Jun 22 17:19:08 2024 -0500
refactor: use `anyhow` for generic errors (#26)
---
Cargo.toml | 1 -
crates/core/Cargo.toml | 1 -
crates/core/src/error.rs | 36 ------------------------------------
crates/core/src/file_group/mod.rs | 31 +++++++++++++++++++++----------
crates/core/src/lib.rs | 1 -
crates/core/src/table/config.rs | 15 ++++++++-------
crates/core/src/table/mod.rs | 10 +++++-----
crates/core/src/timeline/mod.rs | 31 ++++++++++++++++---------------
crates/datafusion/Cargo.toml | 1 -
crates/fs/Cargo.toml | 1 -
10 files changed, 50 insertions(+), 78 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 4a9f419..fa5f51b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,7 +62,6 @@ bytes = { version = "1" }
chrono = { version = "=0.4.34", default-features = false, features = ["clock"]
}
tracing = { version = "0.1", features = ["log"] }
regex = { version = "1" }
-thiserror = { version = "1" }
url = { version = "2" }
uuid = { version = "1" }
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index f97a526..04da3f2 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -61,7 +61,6 @@ bytes = { workspace = true }
chrono = { workspace = true, default-features = false, features = ["clock"] }
hashbrown = "0.14.3"
regex = { workspace = true }
-thiserror = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4"] }
url = { workspace = true }
diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs
deleted file mode 100644
index e8f76c9..0000000
--- a/crates/core/src/error.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-use std::fmt::Debug;
-
-use thiserror::Error;
-
-#[derive(Debug, Error)]
-pub enum HudiFileGroupError {
- #[error("Commit time {0} is already present in File Group {1}")]
- CommitTimeAlreadyExists(String, String),
-}
-
-#[derive(Debug, Error)]
-pub enum HudiCoreError {
- #[error("Failed to load file group")]
- FailToLoadFileGroup(#[from] HudiFileGroupError),
- #[error("Failed to load table properties")]
- LoadTablePropertiesError,
-}
diff --git a/crates/core/src/file_group/mod.rs
b/crates/core/src/file_group/mod.rs
index b71c0ce..b0d791f 100644
--- a/crates/core/src/file_group/mod.rs
+++ b/crates/core/src/file_group/mod.rs
@@ -21,9 +21,8 @@ use std::collections::BTreeMap;
use std::fmt;
use std::fmt::Formatter;
-use crate::error::HudiFileGroupError;
-use crate::error::HudiFileGroupError::CommitTimeAlreadyExists;
use crate::storage::file_metadata::FileMetadata;
+use anyhow::{anyhow, Result};
#[derive(Clone, Debug)]
pub struct BaseFile {
@@ -94,7 +93,6 @@ impl fmt::Display for FileGroup {
}
}
-#[allow(dead_code)]
impl FileGroup {
pub fn new(id: String, partition_path: Option<String>) -> Self {
Self {
@@ -104,20 +102,19 @@ impl FileGroup {
}
}
- pub fn add_base_file_from_name(
- &mut self,
- file_name: &str,
- ) -> Result<&Self, HudiFileGroupError> {
+ #[allow(dead_code)]
+ pub fn add_base_file_from_name(&mut self, file_name: &str) ->
Result<&Self> {
let base_file = BaseFile::new(file_name);
self.add_base_file(base_file)
}
- pub fn add_base_file(&mut self, base_file: BaseFile) -> Result<&Self,
HudiFileGroupError> {
+ pub fn add_base_file(&mut self, base_file: BaseFile) -> Result<&Self> {
let commit_time = base_file.commit_time.as_str();
if self.file_slices.contains_key(commit_time) {
- Err(CommitTimeAlreadyExists(
+ Err(anyhow!(
+ "Commit time {0} is already present in File Group {1}",
commit_time.to_owned(),
- self.to_string(),
+ self.id,
))
} else {
self.file_slices.insert(
@@ -170,4 +167,18 @@ mod tests {
"20240402144910683"
)
}
+
+ #[test]
+ fn add_base_file_with_same_commit_time_should_fail() {
+ let mut fg =
FileGroup::new("5a226868-2934-4f84-a16f-55124630c68d-0".to_owned(), None);
+ let res1 = fg.add_base_file_from_name(
+
"5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet",
+ );
+ assert!(res1.is_ok());
+ let res2 = fg.add_base_file_from_name(
+
"5a226868-2934-4f84-a16f-55124630c68d-0_2-10-0_20240402144910683.parquet",
+ );
+ assert!(res2.is_err());
+ assert_eq!(res2.unwrap_err().to_string(), "Commit time
20240402144910683 is already present in File Group
5a226868-2934-4f84-a16f-55124630c68d-0");
+ }
}
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index 3198cbc..aa7d66a 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -19,7 +19,6 @@
use crate::table::Table;
-mod error;
mod file_group;
pub mod table;
pub type HudiTable = Table;
diff --git a/crates/core/src/table/config.rs b/crates/core/src/table/config.rs
index 2f64cc0..6a1a6ce 100644
--- a/crates/core/src/table/config.rs
+++ b/crates/core/src/table/config.rs
@@ -17,7 +17,8 @@
* under the License.
*/
-use crate::error::HudiCoreError;
+use anyhow::anyhow;
+use anyhow::Result;
use std::str::FromStr;
pub enum ConfigKey {
@@ -67,13 +68,13 @@ pub enum TableType {
}
impl FromStr for TableType {
- type Err = HudiCoreError;
+ type Err = anyhow::Error;
- fn from_str(s: &str) -> Result<Self, Self::Err> {
+ fn from_str(s: &str) -> Result<Self> {
match s.to_ascii_lowercase().as_str() {
"copy_on_write" | "copy-on-write" | "cow" => Ok(Self::CopyOnWrite),
"merge_on_read" | "merge-on-read" | "mor" => Ok(Self::MergeOnRead),
- _ => Err(HudiCoreError::LoadTablePropertiesError),
+ _ => Err(anyhow!("Unsupported table type: {}", s)),
}
}
}
@@ -84,12 +85,12 @@ pub enum BaseFileFormat {
}
impl FromStr for BaseFileFormat {
- type Err = HudiCoreError;
+ type Err = anyhow::Error;
- fn from_str(s: &str) -> Result<Self, Self::Err> {
+ fn from_str(s: &str) -> Result<Self> {
match s.to_ascii_lowercase().as_str() {
"parquet" => Ok(Self::Parquet),
- _ => Err(HudiCoreError::LoadTablePropertiesError),
+ _ => Err(anyhow!("Unsupported base file format: {}", s)),
}
}
}
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index d2584cd..50f5289 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -17,8 +17,8 @@
* under the License.
*/
+use anyhow::Result;
use std::collections::HashMap;
-use std::error::Error;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
@@ -55,7 +55,7 @@ impl Table {
}
}
- fn load_properties(path: &Path) -> Result<HashMap<String, String>,
std::io::Error> {
+ fn load_properties(path: &Path) -> Result<HashMap<String, String>> {
let file = File::open(path)?;
let reader = BufReader::new(file);
let lines = reader.lines();
@@ -81,7 +81,7 @@ impl Table {
}
}
- pub fn get_timeline(&self) -> Result<Timeline, std::io::Error> {
+ pub fn get_timeline(&self) -> Result<Timeline> {
Timeline::new(self.base_path.as_path())
}
@@ -99,7 +99,7 @@ impl Table {
}
}
- pub fn get_latest_file_slices(&self) -> Result<Vec<FileSlice>, Box<dyn
Error>> {
+ pub fn get_latest_file_slices(&self) -> Result<Vec<FileSlice>> {
let mut file_slices = Vec::new();
let mut fs_view = FileSystemView::new(self.base_path.as_path());
for f in fs_view.get_latest_file_slices() {
@@ -108,7 +108,7 @@ impl Table {
Ok(file_slices)
}
- pub fn get_latest_file_paths(&self) -> Result<Vec<String>, Box<dyn Error>>
{
+ pub fn get_latest_file_paths(&self) -> Result<Vec<String>> {
let mut file_paths = Vec::new();
for f in self.get_latest_file_slices()? {
if let Some(f) = f.base_file_path() {
diff --git a/crates/core/src/timeline/mod.rs b/crates/core/src/timeline/mod.rs
index 1cd9e13..04e66c8 100644
--- a/crates/core/src/timeline/mod.rs
+++ b/crates/core/src/timeline/mod.rs
@@ -17,16 +17,18 @@
* under the License.
*/
-use hudi_fs::file_name_without_ext;
use std::collections::HashMap;
+use std::fs;
+use std::fs::File;
+use std::io::Read;
+use std::path::{Path, PathBuf};
+use anyhow::{anyhow, Result};
use arrow_schema::SchemaRef;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use serde_json::Value;
-use std::fs::File;
-use std::io::{ErrorKind, Read};
-use std::path::{Path, PathBuf};
-use std::{fs, io};
+
+use hudi_fs::file_name_without_ext;
#[allow(dead_code)]
#[derive(Debug, Clone, PartialEq)]
@@ -64,7 +66,7 @@ pub struct Timeline {
}
impl Timeline {
- pub fn new(base_path: &Path) -> Result<Self, io::Error> {
+ pub fn new(base_path: &Path) -> Result<Self> {
let instants = Self::load_completed_commit_instants(base_path)?;
Ok(Self {
base_path: base_path.to_path_buf(),
@@ -72,7 +74,7 @@ impl Timeline {
})
}
- fn load_completed_commit_instants(base_path: &Path) ->
Result<Vec<Instant>, io::Error> {
+ fn load_completed_commit_instants(base_path: &Path) ->
Result<Vec<Instant>> {
let mut completed_commits = Vec::new();
let mut timeline_path = base_path.to_path_buf();
timeline_path.push(".hoodie");
@@ -91,7 +93,7 @@ impl Timeline {
Ok(completed_commits)
}
- pub fn get_latest_commit_metadata(&self) -> Result<HashMap<String, Value>,
io::Error> {
+ pub fn get_latest_commit_metadata(&self) -> Result<HashMap<String, Value>>
{
match self.instants.iter().next_back() {
Some(instant) => {
let mut latest_instant_file_path =
self.base_path.to_path_buf();
@@ -107,7 +109,7 @@ impl Timeline {
}
}
- pub fn get_latest_schema(&self) -> Result<SchemaRef, io::Error> {
+ pub fn get_latest_schema(&self) -> Result<SchemaRef> {
let commit_metadata = self.get_latest_commit_metadata()?;
if let Some(partition_to_write_stats) =
commit_metadata["partitionToWriteStats"].as_object()
{
@@ -123,19 +125,18 @@ impl Timeline {
}
}
}
- Err(io::Error::new(
- ErrorKind::InvalidData,
- "Failed to resolve schema.",
- ))
+ Err(anyhow!("Failed to resolve schema."))
}
}
#[cfg(test)]
mod tests {
- use crate::timeline::{Instant, State, Timeline};
- use hudi_fs::test_utils::extract_test_table;
use std::path::Path;
+ use hudi_fs::test_utils::extract_test_table;
+
+ use crate::timeline::{Instant, State, Timeline};
+
#[test]
fn read_latest_schema() {
let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml
index 5d9efc7..0a042ab 100644
--- a/crates/datafusion/Cargo.toml
+++ b/crates/datafusion/Cargo.toml
@@ -63,7 +63,6 @@ bytes = { workspace = true }
chrono = { workspace = true, default-features = false, features = ["clock"] }
hashbrown = "0.14.3"
regex = { workspace = true }
-thiserror = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4"] }
url = { workspace = true }
diff --git a/crates/fs/Cargo.toml b/crates/fs/Cargo.toml
index cd55c31..84d4950 100644
--- a/crates/fs/Cargo.toml
+++ b/crates/fs/Cargo.toml
@@ -58,7 +58,6 @@ bytes = { workspace = true }
chrono = { workspace = true, default-features = false, features = ["clock"] }
hashbrown = "0.14.3"
regex = { workspace = true }
-thiserror = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4"] }
url = { workspace = true }