alamb commented on code in PR #17275:
URL: https://github.com/apache/datafusion/pull/17275#discussion_r2291910112


##########
datafusion/datasource-parquet/src/opener.rs:
##########
@@ -535,8 +535,8 @@ async fn load_page_index<T: AsyncFileReader>(
     if missing_column_index || missing_offset_index {
         let m = Arc::try_unwrap(Arc::clone(parquet_metadata))
             .unwrap_or_else(|e| e.as_ref().clone());
-        let mut reader =
-            
ParquetMetaDataReader::new_with_metadata(m).with_page_indexes(true);
+        let mut reader = ParquetMetaDataReader::new_with_metadata(m)

Review Comment:
   Due to this change from @kczimm
   
   - https://github.com/apache/arrow-rs/issues/8070



##########
Cargo.toml:
##########
@@ -90,62 +90,62 @@ ahash = { version = "0.8", default-features = false, 
features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.17", default-features = false }
-arrow = { version = "56.0.0", features = [
+arrow = { version = "56.1.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "56.0.0", default-features = false }
-arrow-flight = { version = "56.0.0", features = [
+arrow-buffer = { version = "56.1.0", default-features = false }
+arrow-flight = { version = "56.1.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "56.0.0", default-features = false, features = [
+arrow-ipc = { version = "56.1.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "56.0.0", default-features = false }
-arrow-schema = { version = "56.0.0", default-features = false }
+arrow-ord = { version = "56.1.0", default-features = false }
+arrow-schema = { version = "56.1.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
 bytes = "1.10"
 chrono = { version = "0.4.41", default-features = false }
 criterion = "0.5.1"
 ctor = "0.4.3"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "49.0.0", default-features 
= false }
-datafusion-catalog = { path = "datafusion/catalog", version = "49.0.0" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = 
"49.0.0" }
-datafusion-common = { path = "datafusion/common", version = "49.0.0", 
default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = 
"49.0.0" }
-datafusion-datasource = { path = "datafusion/datasource", version = "49.0.0", 
default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = 
"49.0.0", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = 
"49.0.0", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = 
"49.0.0", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", 
version = "49.0.0", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "49.0.0" }
-datafusion-execution = { path = "datafusion/execution", version = "49.0.0" }
-datafusion-expr = { path = "datafusion/expr", version = "49.0.0" }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "49.0.0" 
}
-datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" }
-datafusion-functions = { path = "datafusion/functions", version = "49.0.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", 
version = "49.0.0" }
-datafusion-functions-aggregate-common = { path = 
"datafusion/functions-aggregate-common", version = "49.0.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version 
= "49.0.0" }
-datafusion-functions-table = { path = "datafusion/functions-table", version = 
"49.0.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version 
= "49.0.0" }
-datafusion-functions-window-common = { path = 
"datafusion/functions-window-common", version = "49.0.0" }
-datafusion-macros = { path = "datafusion/macros", version = "49.0.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "49.0.0", 
default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = 
"49.0.0", default-features = false }
-datafusion-physical-expr-adapter = { path = 
"datafusion/physical-expr-adapter", version = "49.0.0", default-features = 
false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", 
version = "49.0.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", 
version = "49.0.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = 
"49.0.0" }
-datafusion-proto = { path = "datafusion/proto", version = "49.0.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = 
"49.0.0" }
-datafusion-pruning = { path = "datafusion/pruning", version = "49.0.0" }
-datafusion-session = { path = "datafusion/session", version = "49.0.0" }
-datafusion-spark = { path = "datafusion/spark", version = "49.0.0" }
-datafusion-sql = { path = "datafusion/sql", version = "49.0.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "49.0.0" }
+datafusion = { path = "datafusion/core", version = "49.0.1", default-features 
= false }

Review Comment:
   drive by change to update all versions in Cargo.toml to the latest



##########
datafusion-cli/src/main.rs:
##########
@@ -570,15 +570,15 @@ mod tests {
         let df = ctx.sql(sql).await?;
         let rbs = df.collect().await?;
 
-        assert_snapshot!(batches_to_string(&rbs),@r#"
+        assert_snapshot!(batches_to_string(&rbs),@r"
         
+-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | 
metadata_size_bytes | hits | extra            |
         
+-----------------------------------+-----------------+---------------------+------+------------------+
         | alltypes_plain.parquet            | 1851            | 10181          
     | 2    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881634         
     | 2    | page_index=true  |
+        | alltypes_tiny_pages.parquet       | 454233          | 881418         
     | 2    | page_index=true  |

Review Comment:
   I don't really know why the in-memory size of the ParquetMetadata has 
decreased, but it seems like a good improvement to me



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to