This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a40750033e Upgrade arrow-rs, parquet to `54.0.0` and pyo3 to `0.23.3` 
(#14153)
a40750033e is described below

commit a40750033e87b00dc9e12e19a64f64f42c027c1c
Author: Owen Leung <[email protected]>
AuthorDate: Sat Jan 18 02:15:48 2025 +0800

    Upgrade arrow-rs, parquet to `54.0.0` and pyo3 to `0.23.3` (#14153)
    
    * Upgrade arrow-rs, parquet and pyo3
    
    * Fix fmt CI
---
 Cargo.toml                                         | 16 ++---
 datafusion-cli/Cargo.lock                          | 68 ++++++++++------------
 datafusion-cli/Cargo.toml                          |  4 +-
 datafusion/common/Cargo.toml                       |  2 +-
 .../common/src/file_options/parquet_writer.rs      | 18 ++++--
 datafusion/common/src/pyarrow.rs                   | 54 +++++++++++------
 .../core/src/datasource/file_format/parquet.rs     |  8 +--
 .../physical_plan/parquet/row_group_filter.rs      |  6 +-
 datafusion/core/tests/fuzz_cases/pruning.rs        |  5 +-
 datafusion/physical-plan/src/aggregates/mod.rs     | 16 ++---
 datafusion/proto-common/src/from_proto/mod.rs      |  7 ++-
 datafusion/proto-common/src/to_proto/mod.rs        |  1 +
 .../proto/tests/cases/roundtrip_logical_plan.rs    |  2 +
 13 files changed, 113 insertions(+), 94 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 1581c115f5..aa412cba51 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,21 +77,21 @@ version = "44.0.0"
 ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
-arrow = { version = "53.3.0", features = [
+arrow = { version = "54.0.0", features = [
     "prettyprint",
 ] }
-arrow-array = { version = "53.3.0", default-features = false, features = [
+arrow-array = { version = "54.0.0", default-features = false, features = [
     "chrono-tz",
 ] }
-arrow-buffer = { version = "53.3.0", default-features = false }
-arrow-flight = { version = "53.3.0", features = [
+arrow-buffer = { version = "54.0.0", default-features = false }
+arrow-flight = { version = "54.0.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "53.3.0", default-features = false, features = [
+arrow-ipc = { version = "54.0.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "53.3.0", default-features = false }
-arrow-schema = { version = "53.3.0", default-features = false }
+arrow-ord = { version = "54.0.0", default-features = false }
+arrow-schema = { version = "54.0.0", default-features = false }
 async-trait = "0.1.73"
 bigdecimal = "0.4.7"
 bytes = "1.4"
@@ -133,7 +133,7 @@ itertools = "0.14"
 log = "^0.4"
 object_store = { version = "0.11.0", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "53.3.0", default-features = false, features = [
+parquet = { version = "54.0.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index fbd6cc01ad..8c7f2113ee 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -175,9 +175,9 @@ checksum = 
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d"
+checksum = "d2ccdcc8fb14508ca20aaec7076032e5c0b0751b906036d4496786e2f227a37a"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -196,24 +196,23 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f"
+checksum = "a1aad8e27f32e411a0fc0bf5a625a35f0bf9b9f871cf4542abe31f7cef4beea2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
  "chrono",
- "half",
  "num",
 ]
 
 [[package]]
 name = "arrow-array"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f"
+checksum = "bd6ed90c28c6f73a706c55799b8cc3a094e89257238e5b1d65ca7c70bd3ae23f"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -228,9 +227,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16"
+checksum = "fe4a40bdc1552ea10fbdeae4e5a945d8572c32f66bce457b96c13d9c46b80447"
 dependencies = [
  "bytes",
  "half",
@@ -239,9 +238,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd"
+checksum = "430c0a21aa7f81bcf0f97c57216d7127795ea755f494d27bae2bd233be43c2cc"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -260,28 +259,25 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97"
+checksum = "b4444c8f8c57ac00e6a679ede67d1ae8872c170797dc45b46f75702437a77888"
 dependencies = [
  "arrow-array",
- "arrow-buffer",
  "arrow-cast",
- "arrow-data",
  "arrow-schema",
  "chrono",
  "csv",
  "csv-core",
  "lazy_static",
- "lexical-core",
  "regex",
 ]
 
 [[package]]
 name = "arrow-data"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623"
+checksum = "09af476cfbe9879937e50b1334c73189de6039186e025b1b1ac84b283b87b20e"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -291,13 +287,12 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8"
+checksum = "136296e8824333a8a4c4a6e508e4aa65d5678b801246d0408825ae7b2523c628"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
- "arrow-cast",
  "arrow-data",
  "arrow-schema",
  "flatbuffers",
@@ -306,9 +301,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85"
+checksum = "e222ad0e419ab8276818c5605a5bb1e35ed86fa8c5e550726433cc63b09c3c78"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -326,26 +321,23 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd"
+checksum = "eddf14c5f03b679ec8ceac4dfac43f63cdc4ed54dab3cc120a4ef46af38481eb"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
  "arrow-select",
- "half",
- "num",
 ]
 
 [[package]]
 name = "arrow-row"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff"
+checksum = "e9acdc58da19f383f4ba381fa0e3583534ae2ceb31269aaf4a03f08ff13e8443"
 dependencies = [
- "ahash",
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
@@ -355,15 +347,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678"
+checksum = "3a1822a1a952955637e85e8f9d6b0e04dd75d65492b87ec548dd593d3a1f772b"
 
 [[package]]
 name = "arrow-select"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722"
+checksum = "5c4172e9a12dfe15303d3926269f9ead471ea93bdd067d113abc65cb6c48e246"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -375,9 +367,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf"
+checksum = "73683040445f4932342781926189901c9521bb1a787c35dbe628a3ce51372d3c"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -2903,9 +2895,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "53.3.0"
+version = "54.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191"
+checksum = "3334c50239d9f4951653d84fa6f636da86f53742e5e5849a30fbe852f3ff4383"
 dependencies = [
  "ahash",
  "arrow-array",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 8b5bb901b7..b9d190ac07 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.80.1"
 readme = "README.md"
 
 [dependencies]
-arrow = { version = "53.0.0" }
+arrow = { version = "54.0.0" }
 async-trait = "0.1.73"
 ## 1.5.13 requires a hiher MSRV 1.81 so lock until DataFusion MSRV catches up
 aws-config = "=1.5.10"
@@ -62,7 +62,7 @@ home = "=0.5.9"
 mimalloc = { version = "0.1", default-features = false }
 object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] }
 parking_lot = { version = "0.12" }
-parquet = { version = "53.0.0", default-features = false }
+parquet = { version = "54.0.0", default-features = false }
 regex = "1.8"
 rustyline = "14.0"
 tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", 
"sync", "parking_lot", "signal"] }
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index feba589082..fe6d652be7 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -64,7 +64,7 @@ log = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
-pyo3 = { version = "0.22.0", optional = true }
+pyo3 = { version = "0.23.3", optional = true }
 recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true }
 tokio = { workspace = true }
diff --git a/datafusion/common/src/file_options/parquet_writer.rs 
b/datafusion/common/src/file_options/parquet_writer.rs
index 46bce06470..3f06e11bb3 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -26,6 +26,7 @@ use crate::{
 };
 
 use arrow_schema::Schema;
+#[allow(deprecated)]
 use parquet::{
     arrow::ARROW_SCHEMA_META_KEY,
     basic::{BrotliLevel, GzipLevel, ZstdLevel},
@@ -157,8 +158,10 @@ impl TryFrom<&TableParquetOptions> for 
WriterPropertiesBuilder {
             }
 
             if let Some(max_statistics_size) = options.max_statistics_size {
-                builder =
-                    builder.set_column_max_statistics_size(path, 
max_statistics_size);
+                builder = {
+                    #[allow(deprecated)]
+                    builder.set_column_max_statistics_size(path, 
max_statistics_size)
+                }
             }
         }
 
@@ -244,15 +247,19 @@ impl ParquetOptions {
                     .and_then(|s| parse_statistics_string(s).ok())
                     .unwrap_or(DEFAULT_STATISTICS_ENABLED),
             )
-            .set_max_statistics_size(
-                max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
-            )
             .set_max_row_group_size(*max_row_group_size)
             .set_created_by(created_by.clone())
             .set_column_index_truncate_length(*column_index_truncate_length)
             .set_data_page_row_count_limit(*data_page_row_count_limit)
             .set_bloom_filter_enabled(*bloom_filter_on_write);
 
+        builder = {
+            #[allow(deprecated)]
+            builder.set_max_statistics_size(
+                max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
+            )
+        };
+
         if let Some(bloom_filter_fpp) = bloom_filter_fpp {
             builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp);
         };
@@ -528,6 +535,7 @@ mod tests {
             ),
             bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
             bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
+            #[allow(deprecated)]
             max_statistics_size: Some(props.max_statistics_size(&col)),
         }
     }
diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs
index 29869c8da5..60dde78611 100644
--- a/datafusion/common/src/pyarrow.rs
+++ b/datafusion/common/src/pyarrow.rs
@@ -23,7 +23,7 @@ use arrow_array::Array;
 use pyo3::exceptions::PyException;
 use pyo3::prelude::PyErr;
 use pyo3::types::{PyAnyMethods, PyList};
-use pyo3::{Bound, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python};
+use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyObject, PyResult, 
Python};
 
 use crate::{DataFusionError, ScalarValue};
 
@@ -40,8 +40,8 @@ impl FromPyArrow for ScalarValue {
         let val = value.call_method0("as_py")?;
 
         // construct pyarrow array from the python value and pyarrow type
-        let factory = py.import_bound("pyarrow")?.getattr("array")?;
-        let args = PyList::new_bound(py, [val]);
+        let factory = py.import("pyarrow")?.getattr("array")?;
+        let args = PyList::new(py, [val])?;
         let array = factory.call1((args, typ))?;
 
         // convert the pyarrow array to rust array using C data interface
@@ -69,14 +69,25 @@ impl<'source> FromPyObject<'source> for ScalarValue {
     }
 }
 
-impl IntoPy<PyObject> for ScalarValue {
-    fn into_py(self, py: Python) -> PyObject {
-        self.to_pyarrow(py).unwrap()
+impl<'source> IntoPyObject<'source> for ScalarValue {
+    type Target = PyAny;
+
+    type Output = Bound<'source, Self::Target>;
+
+    type Error = PyErr;
+
+    fn into_pyobject(self, py: Python<'source>) -> Result<Self::Output, 
Self::Error> {
+        let array = self.to_array()?;
+        // convert to pyarrow array using C data interface
+        let pyarray = array.to_data().to_pyarrow(py)?;
+        let pyarray_bound = pyarray.bind(py);
+        pyarray_bound.call_method1("__getitem__", (0,))
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use pyo3::ffi::c_str;
     use pyo3::prepare_freethreaded_python;
     use pyo3::py_run;
     use pyo3::types::PyDict;
@@ -86,10 +97,12 @@ mod tests {
     fn init_python() {
         prepare_freethreaded_python();
         Python::with_gil(|py| {
-            if py.run_bound("import pyarrow", None, None).is_err() {
-                let locals = PyDict::new_bound(py);
-                py.run_bound(
-                    "import sys; executable = sys.executable; python_path = 
sys.path",
+            if py.run(c_str!("import pyarrow"), None, None).is_err() {
+                let locals = PyDict::new(py);
+                py.run(
+                    c_str!(
+                        "import sys; executable = sys.executable; python_path 
= sys.path"
+                    ),
                     None,
                     Some(&locals),
                 )
@@ -135,20 +148,25 @@ mod tests {
     }
 
     #[test]
-    fn test_py_scalar() {
+    fn test_py_scalar() -> PyResult<()> {
         init_python();
 
-        // TODO: remove this attribute when bumping pyo3 to v0.23.0
-        // See: 
<https://github.com/PyO3/pyo3/blob/v0.23.0/guide/src/migration.md#gil-refs-feature-removed>
-        #[allow(unexpected_cfgs)]
-        Python::with_gil(|py| {
+        Python::with_gil(|py| -> PyResult<()> {
             let scalar_float = ScalarValue::Float64(Some(12.34));
-            let py_float = scalar_float.into_py(py).call_method0(py, 
"as_py").unwrap();
+            let py_float = scalar_float
+                .into_pyobject(py)?
+                .call_method0("as_py")
+                .unwrap();
             py_run!(py, py_float, "assert py_float == 12.34");
 
             let scalar_string = ScalarValue::Utf8(Some("Hello!".to_string()));
-            let py_string = scalar_string.into_py(py).call_method0(py, 
"as_py").unwrap();
+            let py_string = scalar_string
+                .into_pyobject(py)?
+                .call_method0("as_py")
+                .unwrap();
             py_run!(py, py_string, "assert py_string == 'Hello!'");
-        });
+
+            Ok(())
+        })
     }
 }
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs 
b/datafusion/core/src/datasource/file_format/parquet.rs
index 9243f6f12b..4c7169764a 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -79,9 +79,7 @@ use parquet::arrow::arrow_writer::{
     ArrowLeafColumn, ArrowWriterOptions,
 };
 use parquet::arrow::async_reader::MetadataFetch;
-use parquet::arrow::{
-    arrow_to_parquet_schema, parquet_to_arrow_schema, AsyncArrowWriter,
-};
+use parquet::arrow::{parquet_to_arrow_schema, ArrowSchemaConverter, 
AsyncArrowWriter};
 use parquet::errors::ParquetError;
 use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, 
RowGroupMetaData};
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
@@ -916,7 +914,7 @@ fn spawn_column_parallel_row_group_writer(
     max_buffer_size: usize,
     pool: &Arc<dyn MemoryPool>,
 ) -> Result<(Vec<ColumnWriterTask>, Vec<ColSender>)> {
-    let schema_desc = arrow_to_parquet_schema(&schema)?;
+    let schema_desc = ArrowSchemaConverter::new().convert(&schema)?;
     let col_writers = get_column_writers(&schema_desc, &parquet_props, 
&schema)?;
     let num_columns = col_writers.len();
 
@@ -1119,7 +1117,7 @@ async fn concatenate_parallel_row_groups(
     let mut file_reservation =
         
MemoryConsumer::new("ParquetSink(SerializedFileWriter)").register(&pool);
 
-    let schema_desc = arrow_to_parquet_schema(schema.as_ref())?;
+    let schema_desc = ArrowSchemaConverter::new().convert(schema.as_ref())?;
     let mut parquet_writer = SerializedFileWriter::new(
         merged_buff.clone(),
         schema_desc.root_schema_ptr(),
diff --git 
a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs 
b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
index 3854f04566..7658663be5 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -456,8 +456,8 @@ mod tests {
     use datafusion_expr::{cast, col, lit, Expr};
     use datafusion_physical_expr::planner::logical2physical;
 
-    use parquet::arrow::arrow_to_parquet_schema;
     use parquet::arrow::async_reader::ParquetObjectReader;
+    use parquet::arrow::ArrowSchemaConverter;
     use parquet::basic::LogicalType;
     use parquet::data_type::{ByteArray, FixedLenByteArray};
     use parquet::file::metadata::ColumnChunkMetaData;
@@ -744,7 +744,7 @@ mod tests {
             Field::new("c1", DataType::Int32, false),
             Field::new("c2", DataType::Boolean, false),
         ]));
-        let schema_descr = arrow_to_parquet_schema(&schema).unwrap();
+        let schema_descr = 
ArrowSchemaConverter::new().convert(&schema).unwrap();
         let expr = col("c1").gt(lit(15)).and(col("c2").is_null());
         let expr = logical2physical(&expr, &schema);
         let pruning_predicate = PruningPredicate::try_new(expr, 
schema.clone()).unwrap();
@@ -773,7 +773,7 @@ mod tests {
             Field::new("c1", DataType::Int32, false),
             Field::new("c2", DataType::Boolean, false),
         ]));
-        let schema_descr = arrow_to_parquet_schema(&schema).unwrap();
+        let schema_descr = 
ArrowSchemaConverter::new().convert(&schema).unwrap();
         let expr = col("c1")
             .gt(lit(15))
             .and(col("c2").eq(lit(ScalarValue::Boolean(None))));
diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs 
b/datafusion/core/tests/fuzz_cases/pruning.rs
index 3725e6d908..8ce980ee08 100644
--- a/datafusion/core/tests/fuzz_cases/pruning.rs
+++ b/datafusion/core/tests/fuzz_cases/pruning.rs
@@ -347,7 +347,10 @@ async fn write_parquet_file(
     let mut buf = BytesMut::new().writer();
     let mut props = WriterProperties::builder();
     if let Some(truncation_length) = truncation_length {
-        props = props.set_max_statistics_size(truncation_length);
+        props = {
+            #[allow(deprecated)]
+            props.set_max_statistics_size(truncation_length)
+        }
     }
     props = props.set_statistics_enabled(EnabledStatistics::Chunk); // row 
group level
     let props = props.build();
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs 
b/datafusion/physical-plan/src/aggregates/mod.rs
index fb29254249..4502466f0b 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -2470,25 +2470,21 @@ mod tests {
                     "labels".to_string(),
                     DataType::Struct(
                         vec![
-                            Field::new_dict(
+                            Field::new(
                                 "a".to_string(),
                                 DataType::Dictionary(
                                     Box::new(DataType::Int32),
                                     Box::new(DataType::Utf8),
                                 ),
                                 true,
-                                0,
-                                false,
                             ),
-                            Field::new_dict(
+                            Field::new(
                                 "b".to_string(),
                                 DataType::Dictionary(
                                     Box::new(DataType::Int32),
                                     Box::new(DataType::Utf8),
                                 ),
                                 true,
-                                0,
-                                false,
                             ),
                         ]
                         .into(),
@@ -2500,15 +2496,13 @@ mod tests {
             vec![
                 Arc::new(StructArray::from(vec![
                     (
-                        Arc::new(Field::new_dict(
+                        Arc::new(Field::new(
                             "a".to_string(),
                             DataType::Dictionary(
                                 Box::new(DataType::Int32),
                                 Box::new(DataType::Utf8),
                             ),
                             true,
-                            0,
-                            false,
                         )),
                         Arc::new(
                             vec![Some("a"), None, Some("a")]
@@ -2517,15 +2511,13 @@ mod tests {
                         ) as ArrayRef,
                     ),
                     (
-                        Arc::new(Field::new_dict(
+                        Arc::new(Field::new(
                             "b".to_string(),
                             DataType::Dictionary(
                                 Box::new(DataType::Int32),
                                 Box::new(DataType::Utf8),
                             ),
                             true,
-                            0,
-                            false,
                         )),
                         Arc::new(
                             vec![Some("b"), Some("c"), Some("b")]
diff --git a/datafusion/proto-common/src/from_proto/mod.rs 
b/datafusion/proto-common/src/from_proto/mod.rs
index eb75eb0948..37462acec7 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -321,6 +321,7 @@ impl TryFrom<&protobuf::Field> for Field {
     fn try_from(field: &protobuf::Field) -> Result<Self, Self::Error> {
         let datatype = field.arrow_type.as_deref().required("arrow_type")?;
         let field = if field.dict_id != 0 {
+            #[allow(deprecated)]
             Self::new_dict(
                 field.name.as_str(),
                 datatype,
@@ -434,7 +435,11 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
 
                     let id = dict_batch.id();
 
-                    let fields_using_this_dictionary = 
schema.fields_with_dict_id(id);
+                    let fields_using_this_dictionary = {
+                        #[allow(deprecated)]
+                        schema.fields_with_dict_id(id)
+                    };
+
                     let first_field = 
fields_using_this_dictionary.first().ok_or_else(|| {
                         Error::General("dictionary id not found in schema 
while deserializing ScalarValue::List".to_string())
                     })?;
diff --git a/datafusion/proto-common/src/to_proto/mod.rs 
b/datafusion/proto-common/src/to_proto/mod.rs
index 79faaba864..c69f7b85f4 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -97,6 +97,7 @@ impl TryFrom<&Field> for protobuf::Field {
             nullable: field.is_nullable(),
             children: Vec::new(),
             metadata: field.metadata().clone(),
+            #[allow(deprecated)]
             dict_id: field.dict_id().unwrap_or(0),
             dict_ordered: field.dict_is_ordered().unwrap_or(false),
         })
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index d7620e65c4..3544e6c28b 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1815,6 +1815,8 @@ fn round_trip_datatype() {
     }
 }
 
+// TODO file a ticket about handling deprecated dict_id attributes
+#[allow(deprecated)]
 #[test]
 fn roundtrip_dict_id() -> Result<()> {
     let dict_id = 42;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to