This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new e93dd04e fix: fix location generator (#1479)
e93dd04e is described below
commit e93dd04e1ba661c783b05b5dced424c624be8af5
Author: Dylan <[email protected]>
AuthorDate: Mon Jun 30 20:54:20 2025 +0800
fix: fix location generator (#1479)
## Which issue does this PR close?
- It seems we enforce that the `write.data.path` must be a subdirectory
of the table location, but it is unnecessary. I met this issue when
integrating Databricks's managed iceberg table.
- Closes #.
## What changes are included in this PR?
- Use `write.data.path` directly if provided.
## Are these changes tested?
<!--
Specify what test covers (unit test, integration test, etc.).
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
---
.../src/writer/file_writer/location_generator.rs | 41 ++++++++--------------
1 file changed, 15 insertions(+), 26 deletions(-)
diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs
b/crates/iceberg/src/writer/file_writer/location_generator.rs
index 43609743..0c0032c4 100644
--- a/crates/iceberg/src/writer/file_writer/location_generator.rs
+++ b/crates/iceberg/src/writer/file_writer/location_generator.rs
@@ -20,8 +20,8 @@
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
+use crate::Result;
use crate::spec::{DataFileFormat, TableMetadata};
-use crate::{Error, ErrorKind, Result};
/// `LocationGenerator` used to generate the location of data file.
pub trait LocationGenerator: Clone + Send + 'static {
@@ -46,29 +46,16 @@ impl DefaultLocationGenerator {
/// Create a new `DefaultLocationGenerator`.
pub fn new(table_metadata: TableMetadata) -> Result<Self> {
let table_location = table_metadata.location();
- let rel_dir_path = {
- let prop = table_metadata.properties();
- let data_location = prop
- .get(WRITE_DATA_LOCATION)
- .or(prop.get(WRITE_FOLDER_STORAGE_LOCATION));
- if let Some(data_location) = data_location {
- data_location.strip_prefix(table_location).ok_or_else(|| {
- Error::new(
- ErrorKind::DataInvalid,
- format!(
- "data location {} is not a subpath of table
location {}",
- data_location, table_location
- ),
- )
- })?
- } else {
- DEFAULT_DATA_DIR
- }
+ let prop = table_metadata.properties();
+ let data_location = prop
+ .get(WRITE_DATA_LOCATION)
+ .or(prop.get(WRITE_FOLDER_STORAGE_LOCATION));
+ let dir_path = if let Some(data_location) = data_location {
+ data_location.clone()
+ } else {
+ format!("{}{}", table_location, DEFAULT_DATA_DIR)
};
-
- Ok(Self {
- dir_path: format!("{}{}", table_location, rel_dir_path),
- })
+ Ok(Self { dir_path })
}
}
@@ -222,13 +209,15 @@ pub(crate) mod test {
"s3://data.db/table/data_2/part-00002-test.parquet"
);
- // test invalid data location
table_metadata.properties.insert(
WRITE_DATA_LOCATION.to_string(),
// invalid table location
"s3://data.db/data_3".to_string(),
);
- let location_generator =
super::DefaultLocationGenerator::new(table_metadata.clone());
- assert!(location_generator.is_err());
+ let location_generator =
+
super::DefaultLocationGenerator::new(table_metadata.clone()).unwrap();
+ let location =
+
location_generator.generate_location(&file_name_genertaor.generate_file_name());
+ assert_eq!(location, "s3://data.db/data_3/part-00003-test.parquet");
}
}