(iceberg-rust) branch main updated: fix: enable public access to ManifestEntry properties (#284)

liurenjie1024 Wed, 20 Mar 2024 22:04:21 -0700

This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git



The following commit(s) were added to refs/heads/main by this push:
     new f0effeb  fix: enable public access to ManifestEntry properties (#284)
f0effeb is described below

commit f0effeb4f746864e960d06ffe82e818ce7ee6d28
Author: Alon Agmon <[email protected]>
AuthorDate: Thu Mar 21 07:02:25 2024 +0200

    fix: enable public access to ManifestEntry properties (#284)
    
    * enable public access to ManifestEntry properties
    
    * implementing getter methods instead of direct access
---
 crates/iceberg/src/scan.rs          |  2 +-
 crates/iceberg/src/spec/manifest.rs | 83 +++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/crates/iceberg/src/scan.rs b/crates/iceberg/src/scan.rs
index bd0e6ad..489161b 100644
--- a/crates/iceberg/src/scan.rs
+++ b/crates/iceberg/src/scan.rs
@@ -209,7 +209,7 @@ pub struct FileScanTask {
 pub type ArrowRecordBatchStream = BoxStream<'static, 
crate::Result<RecordBatch>>;
 
 impl FileScanTask {
-    pub(crate) fn data_file(&self) -> ManifestEntryRef {
+    pub fn data_file(&self) -> ManifestEntryRef {
         self.data_file.clone()
     }
 }
diff --git a/crates/iceberg/src/spec/manifest.rs 
b/crates/iceberg/src/spec/manifest.rs
index a6cdf3a..9908297 100644
--- a/crates/iceberg/src/spec/manifest.rs
+++ b/crates/iceberg/src/spec/manifest.rs
@@ -1052,6 +1052,89 @@ pub struct DataFile {
     pub(crate) sort_order_id: Option<i32>,
 }
 
+impl DataFile {
+    /// Get the content type of the data file (data, equality deletes, or 
position deletes)
+    pub fn content(&self) -> DataContentType {
+        self.content
+    }
+    /// Get the file path as full URI with FS scheme
+    pub fn file_path(&self) -> &str {
+        &self.file_path
+    }
+    /// Get the file format of the file (avro, orc or parquet).
+    pub fn file_format(&self) -> DataFileFormat {
+        self.file_format
+    }
+    /// Get the partition values of the file.
+    pub fn partition(&self) -> &Struct {
+        &self.partition
+    }
+    /// Get the record count in the data file.
+    pub fn record_count(&self) -> u64 {
+        self.record_count
+    }
+    /// Get the file size in bytes.
+    pub fn file_size_in_bytes(&self) -> u64 {
+        self.file_size_in_bytes
+    }
+    /// Get the column sizes.
+    /// Map from column id to the total size on disk of all regions that
+    /// store the column. Does not include bytes necessary to read other
+    /// columns, like footers. Null for row-oriented formats (Avro)
+    pub fn column_sizes(&self) -> &HashMap<i32, u64> {
+        &self.column_sizes
+    }
+    /// Get the columns value counts for the data file.
+    /// Map from column id to number of values in the column (including null
+    /// and NaN values)
+    pub fn value_counts(&self) -> &HashMap<i32, u64> {
+        &self.value_counts
+    }
+    /// Get the null value counts of the data file.
+    /// Map from column id to number of null values in the column
+    pub fn null_value_counts(&self) -> &HashMap<i32, u64> {
+        &self.null_value_counts
+    }
+    /// Get the nan value counts of the data file.
+    /// Map from column id to number of NaN values in the column
+    pub fn nan_value_counts(&self) -> &HashMap<i32, u64> {
+        &self.nan_value_counts
+    }
+    /// Get the lower bounds of the data file values per column.
+    /// Map from column id to lower bound in the column serialized as binary.
+    pub fn lower_bounds(&self) -> &HashMap<i32, Literal> {
+        &self.lower_bounds
+    }
+    /// Get the upper bounds of the data file values per column.
+    /// Map from column id to upper bound in the column serialized as binary.
+    pub fn upper_bounds(&self) -> &HashMap<i32, Literal> {
+        &self.upper_bounds
+    }
+    /// Get the Implementation-specific key metadata for the data file.
+    pub fn key_metadata(&self) -> &[u8] {
+        &self.key_metadata
+    }
+    /// Get the split offsets of the data file.
+    /// For example, all row group offsets in a Parquet file.
+    pub fn split_offsets(&self) -> &[i64] {
+        &self.split_offsets
+    }
+    /// Get the equality ids of the data file.
+    /// Field ids used to determine row equality in equality delete files.
+    /// null when content is not EqualityDeletes.
+    pub fn equality_ids(&self) -> &[i32] {
+        &self.equality_ids
+    }
+    /// Get the sort order id of the data file.
+    /// Only data files and equality delete files should be
+    /// written with a non-null order id. Position deletes are required to be
+    /// sorted by file and position, not a table order, and should set sort
+    /// order id to null. Readers must ignore sort order id for position
+    /// delete files.
+    pub fn sort_order_id(&self) -> Option<i32> {
+        self.sort_order_id
+    }
+}
 /// Type of content stored by the data file: data, equality deletes, or
 /// position deletes (all v1 files are data files)
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]

(iceberg-rust) branch main updated: fix: enable public access to ManifestEntry properties (#284)

Reply via email to