This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new ce71aeb fix: renaming FileScanTask.data_file to data_manifest_entry
(#300)
ce71aeb is described below
commit ce71aeb999f7272f3bcfaa8500480a5dc1dbab69
Author: Alon Agmon <[email protected]>
AuthorDate: Tue Mar 26 12:15:49 2024 +0200
fix: renaming FileScanTask.data_file to data_manifest_entry (#300)
* renaming FileScanTask.data_file to data_manifest_entry
* renaming data_file.content() to content_type()
* changing pub method to data()
---
crates/iceberg/src/arrow.rs | 2 +-
crates/iceberg/src/scan.rs | 14 +++++++-------
crates/iceberg/src/spec/manifest.rs | 8 +++++++-
3 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/crates/iceberg/src/arrow.rs b/crates/iceberg/src/arrow.rs
index 527fb19..47cbaa1 100644
--- a/crates/iceberg/src/arrow.rs
+++ b/crates/iceberg/src/arrow.rs
@@ -87,7 +87,7 @@ impl ArrowReader {
let projection_mask = self.get_arrow_projection_mask(&task);
let parquet_reader = file_io
- .new_input(task.data_file().file_path())?
+ .new_input(task.data().data_file().file_path())?
.reader()
.await?;
diff --git a/crates/iceberg/src/scan.rs b/crates/iceberg/src/scan.rs
index 852bcaf..358de5d 100644
--- a/crates/iceberg/src/scan.rs
+++ b/crates/iceberg/src/scan.rs
@@ -170,7 +170,7 @@ impl TableScan {
}
DataContentType::Data => {
let scan_task: crate::Result<FileScanTask> =
Ok(FileScanTask {
- data_file: manifest_entry.clone(),
+ data_manifest_entry: manifest_entry.clone(),
start: 0,
length: manifest_entry.file_size_in_bytes(),
});
@@ -198,7 +198,7 @@ impl TableScan {
/// A task to scan part of file.
#[derive(Debug)]
pub struct FileScanTask {
- data_file: ManifestEntryRef,
+ data_manifest_entry: ManifestEntryRef,
#[allow(dead_code)]
start: u64,
#[allow(dead_code)]
@@ -209,8 +209,8 @@ pub struct FileScanTask {
pub type ArrowRecordBatchStream = BoxStream<'static,
crate::Result<RecordBatch>>;
impl FileScanTask {
- pub fn data_file(&self) -> ManifestEntryRef {
- self.data_file.clone()
+ pub fn data(&self) -> ManifestEntryRef {
+ self.data_manifest_entry.clone()
}
}
@@ -504,17 +504,17 @@ mod tests {
assert_eq!(tasks.len(), 2);
- tasks.sort_by_key(|t| t.data_file.file_path().to_string());
+ tasks.sort_by_key(|t| t.data().data_file().file_path().to_string());
// Check first task is added data file
assert_eq!(
- tasks[0].data_file.file_path(),
+ tasks[0].data().data_file().file_path(),
format!("{}/1.parquet", &fixture.table_location)
);
// Check second task is existing data file
assert_eq!(
- tasks[1].data_file.file_path(),
+ tasks[1].data().data_file().file_path(),
format!("{}/3.parquet", &fixture.table_location)
);
}
diff --git a/crates/iceberg/src/spec/manifest.rs
b/crates/iceberg/src/spec/manifest.rs
index 44ac7ca..3daa5c2 100644
--- a/crates/iceberg/src/spec/manifest.rs
+++ b/crates/iceberg/src/spec/manifest.rs
@@ -900,6 +900,12 @@ impl ManifestEntry {
pub fn file_size_in_bytes(&self) -> u64 {
self.data_file.file_size_in_bytes
}
+
+ /// get a reference to the actual data file
+ #[inline]
+ pub fn data_file(&self) -> &DataFile {
+ &self.data_file
+ }
}
/// Used to track additions and deletions in ManifestEntry.
@@ -1054,7 +1060,7 @@ pub struct DataFile {
impl DataFile {
/// Get the content type of the data file (data, equality deletes, or
position deletes)
- pub fn content(&self) -> DataContentType {
+ pub fn content_type(&self) -> DataContentType {
self.content
}
/// Get the file path as full URI with FS scheme