This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new f0effeb fix: enable public access to ManifestEntry properties (#284)
f0effeb is described below
commit f0effeb4f746864e960d06ffe82e818ce7ee6d28
Author: Alon Agmon <[email protected]>
AuthorDate: Thu Mar 21 07:02:25 2024 +0200
fix: enable public access to ManifestEntry properties (#284)
* enable public access to ManifestEntry properties
* implementing getter methods instead of direct access
---
crates/iceberg/src/scan.rs | 2 +-
crates/iceberg/src/spec/manifest.rs | 83 +++++++++++++++++++++++++++++++++++++
2 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/crates/iceberg/src/scan.rs b/crates/iceberg/src/scan.rs
index bd0e6ad..489161b 100644
--- a/crates/iceberg/src/scan.rs
+++ b/crates/iceberg/src/scan.rs
@@ -209,7 +209,7 @@ pub struct FileScanTask {
pub type ArrowRecordBatchStream = BoxStream<'static,
crate::Result<RecordBatch>>;
impl FileScanTask {
- pub(crate) fn data_file(&self) -> ManifestEntryRef {
+ pub fn data_file(&self) -> ManifestEntryRef {
self.data_file.clone()
}
}
diff --git a/crates/iceberg/src/spec/manifest.rs
b/crates/iceberg/src/spec/manifest.rs
index a6cdf3a..9908297 100644
--- a/crates/iceberg/src/spec/manifest.rs
+++ b/crates/iceberg/src/spec/manifest.rs
@@ -1052,6 +1052,89 @@ pub struct DataFile {
pub(crate) sort_order_id: Option<i32>,
}
+impl DataFile {
+ /// Get the content type of the data file (data, equality deletes, or
position deletes)
+ pub fn content(&self) -> DataContentType {
+ self.content
+ }
+ /// Get the file path as full URI with FS scheme
+ pub fn file_path(&self) -> &str {
+ &self.file_path
+ }
+ /// Get the file format of the file (avro, orc or parquet).
+ pub fn file_format(&self) -> DataFileFormat {
+ self.file_format
+ }
+ /// Get the partition values of the file.
+ pub fn partition(&self) -> &Struct {
+ &self.partition
+ }
+ /// Get the record count in the data file.
+ pub fn record_count(&self) -> u64 {
+ self.record_count
+ }
+ /// Get the file size in bytes.
+ pub fn file_size_in_bytes(&self) -> u64 {
+ self.file_size_in_bytes
+ }
+ /// Get the column sizes.
+ /// Map from column id to the total size on disk of all regions that
+ /// store the column. Does not include bytes necessary to read other
+ /// columns, like footers. Null for row-oriented formats (Avro)
+ pub fn column_sizes(&self) -> &HashMap<i32, u64> {
+ &self.column_sizes
+ }
+ /// Get the columns value counts for the data file.
+ /// Map from column id to number of values in the column (including null
+ /// and NaN values)
+ pub fn value_counts(&self) -> &HashMap<i32, u64> {
+ &self.value_counts
+ }
+ /// Get the null value counts of the data file.
+ /// Map from column id to number of null values in the column
+ pub fn null_value_counts(&self) -> &HashMap<i32, u64> {
+ &self.null_value_counts
+ }
+ /// Get the nan value counts of the data file.
+ /// Map from column id to number of NaN values in the column
+ pub fn nan_value_counts(&self) -> &HashMap<i32, u64> {
+ &self.nan_value_counts
+ }
+ /// Get the lower bounds of the data file values per column.
+ /// Map from column id to lower bound in the column serialized as binary.
+ pub fn lower_bounds(&self) -> &HashMap<i32, Literal> {
+ &self.lower_bounds
+ }
+ /// Get the upper bounds of the data file values per column.
+ /// Map from column id to upper bound in the column serialized as binary.
+ pub fn upper_bounds(&self) -> &HashMap<i32, Literal> {
+ &self.upper_bounds
+ }
+ /// Get the Implementation-specific key metadata for the data file.
+ pub fn key_metadata(&self) -> &[u8] {
+ &self.key_metadata
+ }
+ /// Get the split offsets of the data file.
+ /// For example, all row group offsets in a Parquet file.
+ pub fn split_offsets(&self) -> &[i64] {
+ &self.split_offsets
+ }
+ /// Get the equality ids of the data file.
+ /// Field ids used to determine row equality in equality delete files.
+ /// null when content is not EqualityDeletes.
+ pub fn equality_ids(&self) -> &[i32] {
+ &self.equality_ids
+ }
+ /// Get the sort order id of the data file.
+ /// Only data files and equality delete files should be
+ /// written with a non-null order id. Position deletes are required to be
+ /// sorted by file and position, not a table order, and should set sort
+ /// order id to null. Readers must ignore sort order id for position
+ /// delete files.
+ pub fn sort_order_id(&self) -> Option<i32> {
+ self.sort_order_id
+ }
+}
/// Type of content stored by the data file: data, equality deletes, or
/// position deletes (all v1 files are data files)
#[derive(Debug, PartialEq, Eq, Clone, Copy)]