This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a40750033e Upgrade arrow-rs, parquet to `54.0.0` and pyo3 to `0.23.3`
(#14153)
a40750033e is described below
commit a40750033e87b00dc9e12e19a64f64f42c027c1c
Author: Owen Leung <[email protected]>
AuthorDate: Sat Jan 18 02:15:48 2025 +0800
Upgrade arrow-rs, parquet to `54.0.0` and pyo3 to `0.23.3` (#14153)
* Upgrade arrow-rs, parquet and pyo3
* Fix fmt CI
---
Cargo.toml | 16 ++---
datafusion-cli/Cargo.lock | 68 ++++++++++------------
datafusion-cli/Cargo.toml | 4 +-
datafusion/common/Cargo.toml | 2 +-
.../common/src/file_options/parquet_writer.rs | 18 ++++--
datafusion/common/src/pyarrow.rs | 54 +++++++++++------
.../core/src/datasource/file_format/parquet.rs | 8 +--
.../physical_plan/parquet/row_group_filter.rs | 6 +-
datafusion/core/tests/fuzz_cases/pruning.rs | 5 +-
datafusion/physical-plan/src/aggregates/mod.rs | 16 ++---
datafusion/proto-common/src/from_proto/mod.rs | 7 ++-
datafusion/proto-common/src/to_proto/mod.rs | 1 +
.../proto/tests/cases/roundtrip_logical_plan.rs | 2 +
13 files changed, 113 insertions(+), 94 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 1581c115f5..aa412cba51 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,21 +77,21 @@ version = "44.0.0"
ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
-arrow = { version = "53.3.0", features = [
+arrow = { version = "54.0.0", features = [
"prettyprint",
] }
-arrow-array = { version = "53.3.0", default-features = false, features = [
+arrow-array = { version = "54.0.0", default-features = false, features = [
"chrono-tz",
] }
-arrow-buffer = { version = "53.3.0", default-features = false }
-arrow-flight = { version = "53.3.0", features = [
+arrow-buffer = { version = "54.0.0", default-features = false }
+arrow-flight = { version = "54.0.0", features = [
"flight-sql-experimental",
] }
-arrow-ipc = { version = "53.3.0", default-features = false, features = [
+arrow-ipc = { version = "54.0.0", default-features = false, features = [
"lz4",
] }
-arrow-ord = { version = "53.3.0", default-features = false }
-arrow-schema = { version = "53.3.0", default-features = false }
+arrow-ord = { version = "54.0.0", default-features = false }
+arrow-schema = { version = "54.0.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "0.4.7"
bytes = "1.4"
@@ -133,7 +133,7 @@ itertools = "0.14"
log = "^0.4"
object_store = { version = "0.11.0", default-features = false }
parking_lot = "0.12"
-parquet = { version = "53.3.0", default-features = false, features = [
+parquet = { version = "54.0.0", default-features = false, features = [
"arrow",
"async",
"object_store",
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index fbd6cc01ad..8c7f2113ee 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -175,9 +175,9 @@ checksum =
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d"
+checksum = "d2ccdcc8fb14508ca20aaec7076032e5c0b0751b906036d4496786e2f227a37a"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -196,24 +196,23 @@ dependencies = [
[[package]]
name = "arrow-arith"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f"
+checksum = "a1aad8e27f32e411a0fc0bf5a625a35f0bf9b9f871cf4542abe31f7cef4beea2"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
- "half",
"num",
]
[[package]]
name = "arrow-array"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f"
+checksum = "bd6ed90c28c6f73a706c55799b8cc3a094e89257238e5b1d65ca7c70bd3ae23f"
dependencies = [
"ahash",
"arrow-buffer",
@@ -228,9 +227,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16"
+checksum = "fe4a40bdc1552ea10fbdeae4e5a945d8572c32f66bce457b96c13d9c46b80447"
dependencies = [
"bytes",
"half",
@@ -239,9 +238,9 @@ dependencies = [
[[package]]
name = "arrow-cast"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd"
+checksum = "430c0a21aa7f81bcf0f97c57216d7127795ea755f494d27bae2bd233be43c2cc"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -260,28 +259,25 @@ dependencies = [
[[package]]
name = "arrow-csv"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97"
+checksum = "b4444c8f8c57ac00e6a679ede67d1ae8872c170797dc45b46f75702437a77888"
dependencies = [
"arrow-array",
- "arrow-buffer",
"arrow-cast",
- "arrow-data",
"arrow-schema",
"chrono",
"csv",
"csv-core",
"lazy_static",
- "lexical-core",
"regex",
]
[[package]]
name = "arrow-data"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623"
+checksum = "09af476cfbe9879937e50b1334c73189de6039186e025b1b1ac84b283b87b20e"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -291,13 +287,12 @@ dependencies = [
[[package]]
name = "arrow-ipc"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8"
+checksum = "136296e8824333a8a4c4a6e508e4aa65d5678b801246d0408825ae7b2523c628"
dependencies = [
"arrow-array",
"arrow-buffer",
- "arrow-cast",
"arrow-data",
"arrow-schema",
"flatbuffers",
@@ -306,9 +301,9 @@ dependencies = [
[[package]]
name = "arrow-json"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85"
+checksum = "e222ad0e419ab8276818c5605a5bb1e35ed86fa8c5e550726433cc63b09c3c78"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -326,26 +321,23 @@ dependencies = [
[[package]]
name = "arrow-ord"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd"
+checksum = "eddf14c5f03b679ec8ceac4dfac43f63cdc4ed54dab3cc120a4ef46af38481eb"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
- "half",
- "num",
]
[[package]]
name = "arrow-row"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff"
+checksum = "e9acdc58da19f383f4ba381fa0e3583534ae2ceb31269aaf4a03f08ff13e8443"
dependencies = [
- "ahash",
"arrow-array",
"arrow-buffer",
"arrow-data",
@@ -355,15 +347,15 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678"
+checksum = "3a1822a1a952955637e85e8f9d6b0e04dd75d65492b87ec548dd593d3a1f772b"
[[package]]
name = "arrow-select"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722"
+checksum = "5c4172e9a12dfe15303d3926269f9ead471ea93bdd067d113abc65cb6c48e246"
dependencies = [
"ahash",
"arrow-array",
@@ -375,9 +367,9 @@ dependencies = [
[[package]]
name = "arrow-string"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf"
+checksum = "73683040445f4932342781926189901c9521bb1a787c35dbe628a3ce51372d3c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -2903,9 +2895,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "53.3.0"
+version = "54.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191"
+checksum = "3334c50239d9f4951653d84fa6f636da86f53742e5e5849a30fbe852f3ff4383"
dependencies = [
"ahash",
"arrow-array",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 8b5bb901b7..b9d190ac07 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.80.1"
readme = "README.md"
[dependencies]
-arrow = { version = "53.0.0" }
+arrow = { version = "54.0.0" }
async-trait = "0.1.73"
## 1.5.13 requires a hiher MSRV 1.81 so lock until DataFusion MSRV catches up
aws-config = "=1.5.10"
@@ -62,7 +62,7 @@ home = "=0.5.9"
mimalloc = { version = "0.1", default-features = false }
object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] }
parking_lot = { version = "0.12" }
-parquet = { version = "53.0.0", default-features = false }
+parquet = { version = "54.0.0", default-features = false }
regex = "1.8"
rustyline = "14.0"
tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread",
"sync", "parking_lot", "signal"] }
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index feba589082..fe6d652be7 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -64,7 +64,7 @@ log = { workspace = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true, optional = true, default-features = true }
paste = "1.0.15"
-pyo3 = { version = "0.22.0", optional = true }
+pyo3 = { version = "0.23.3", optional = true }
recursive = { workspace = true, optional = true }
sqlparser = { workspace = true }
tokio = { workspace = true }
diff --git a/datafusion/common/src/file_options/parquet_writer.rs
b/datafusion/common/src/file_options/parquet_writer.rs
index 46bce06470..3f06e11bb3 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -26,6 +26,7 @@ use crate::{
};
use arrow_schema::Schema;
+#[allow(deprecated)]
use parquet::{
arrow::ARROW_SCHEMA_META_KEY,
basic::{BrotliLevel, GzipLevel, ZstdLevel},
@@ -157,8 +158,10 @@ impl TryFrom<&TableParquetOptions> for
WriterPropertiesBuilder {
}
if let Some(max_statistics_size) = options.max_statistics_size {
- builder =
- builder.set_column_max_statistics_size(path,
max_statistics_size);
+ builder = {
+ #[allow(deprecated)]
+ builder.set_column_max_statistics_size(path,
max_statistics_size)
+ }
}
}
@@ -244,15 +247,19 @@ impl ParquetOptions {
.and_then(|s| parse_statistics_string(s).ok())
.unwrap_or(DEFAULT_STATISTICS_ENABLED),
)
- .set_max_statistics_size(
- max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
- )
.set_max_row_group_size(*max_row_group_size)
.set_created_by(created_by.clone())
.set_column_index_truncate_length(*column_index_truncate_length)
.set_data_page_row_count_limit(*data_page_row_count_limit)
.set_bloom_filter_enabled(*bloom_filter_on_write);
+ builder = {
+ #[allow(deprecated)]
+ builder.set_max_statistics_size(
+ max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
+ )
+ };
+
if let Some(bloom_filter_fpp) = bloom_filter_fpp {
builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp);
};
@@ -528,6 +535,7 @@ mod tests {
),
bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
+ #[allow(deprecated)]
max_statistics_size: Some(props.max_statistics_size(&col)),
}
}
diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs
index 29869c8da5..60dde78611 100644
--- a/datafusion/common/src/pyarrow.rs
+++ b/datafusion/common/src/pyarrow.rs
@@ -23,7 +23,7 @@ use arrow_array::Array;
use pyo3::exceptions::PyException;
use pyo3::prelude::PyErr;
use pyo3::types::{PyAnyMethods, PyList};
-use pyo3::{Bound, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python};
+use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyObject, PyResult,
Python};
use crate::{DataFusionError, ScalarValue};
@@ -40,8 +40,8 @@ impl FromPyArrow for ScalarValue {
let val = value.call_method0("as_py")?;
// construct pyarrow array from the python value and pyarrow type
- let factory = py.import_bound("pyarrow")?.getattr("array")?;
- let args = PyList::new_bound(py, [val]);
+ let factory = py.import("pyarrow")?.getattr("array")?;
+ let args = PyList::new(py, [val])?;
let array = factory.call1((args, typ))?;
// convert the pyarrow array to rust array using C data interface
@@ -69,14 +69,25 @@ impl<'source> FromPyObject<'source> for ScalarValue {
}
}
-impl IntoPy<PyObject> for ScalarValue {
- fn into_py(self, py: Python) -> PyObject {
- self.to_pyarrow(py).unwrap()
+impl<'source> IntoPyObject<'source> for ScalarValue {
+ type Target = PyAny;
+
+ type Output = Bound<'source, Self::Target>;
+
+ type Error = PyErr;
+
+ fn into_pyobject(self, py: Python<'source>) -> Result<Self::Output,
Self::Error> {
+ let array = self.to_array()?;
+ // convert to pyarrow array using C data interface
+ let pyarray = array.to_data().to_pyarrow(py)?;
+ let pyarray_bound = pyarray.bind(py);
+ pyarray_bound.call_method1("__getitem__", (0,))
}
}
#[cfg(test)]
mod tests {
+ use pyo3::ffi::c_str;
use pyo3::prepare_freethreaded_python;
use pyo3::py_run;
use pyo3::types::PyDict;
@@ -86,10 +97,12 @@ mod tests {
fn init_python() {
prepare_freethreaded_python();
Python::with_gil(|py| {
- if py.run_bound("import pyarrow", None, None).is_err() {
- let locals = PyDict::new_bound(py);
- py.run_bound(
- "import sys; executable = sys.executable; python_path =
sys.path",
+ if py.run(c_str!("import pyarrow"), None, None).is_err() {
+ let locals = PyDict::new(py);
+ py.run(
+ c_str!(
+ "import sys; executable = sys.executable; python_path
= sys.path"
+ ),
None,
Some(&locals),
)
@@ -135,20 +148,25 @@ mod tests {
}
#[test]
- fn test_py_scalar() {
+ fn test_py_scalar() -> PyResult<()> {
init_python();
- // TODO: remove this attribute when bumping pyo3 to v0.23.0
- // See:
<https://github.com/PyO3/pyo3/blob/v0.23.0/guide/src/migration.md#gil-refs-feature-removed>
- #[allow(unexpected_cfgs)]
- Python::with_gil(|py| {
+ Python::with_gil(|py| -> PyResult<()> {
let scalar_float = ScalarValue::Float64(Some(12.34));
- let py_float = scalar_float.into_py(py).call_method0(py,
"as_py").unwrap();
+ let py_float = scalar_float
+ .into_pyobject(py)?
+ .call_method0("as_py")
+ .unwrap();
py_run!(py, py_float, "assert py_float == 12.34");
let scalar_string = ScalarValue::Utf8(Some("Hello!".to_string()));
- let py_string = scalar_string.into_py(py).call_method0(py,
"as_py").unwrap();
+ let py_string = scalar_string
+ .into_pyobject(py)?
+ .call_method0("as_py")
+ .unwrap();
py_run!(py, py_string, "assert py_string == 'Hello!'");
- });
+
+ Ok(())
+ })
}
}
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs
b/datafusion/core/src/datasource/file_format/parquet.rs
index 9243f6f12b..4c7169764a 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -79,9 +79,7 @@ use parquet::arrow::arrow_writer::{
ArrowLeafColumn, ArrowWriterOptions,
};
use parquet::arrow::async_reader::MetadataFetch;
-use parquet::arrow::{
- arrow_to_parquet_schema, parquet_to_arrow_schema, AsyncArrowWriter,
-};
+use parquet::arrow::{parquet_to_arrow_schema, ArrowSchemaConverter,
AsyncArrowWriter};
use parquet::errors::ParquetError;
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader,
RowGroupMetaData};
use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
@@ -916,7 +914,7 @@ fn spawn_column_parallel_row_group_writer(
max_buffer_size: usize,
pool: &Arc<dyn MemoryPool>,
) -> Result<(Vec<ColumnWriterTask>, Vec<ColSender>)> {
- let schema_desc = arrow_to_parquet_schema(&schema)?;
+ let schema_desc = ArrowSchemaConverter::new().convert(&schema)?;
let col_writers = get_column_writers(&schema_desc, &parquet_props,
&schema)?;
let num_columns = col_writers.len();
@@ -1119,7 +1117,7 @@ async fn concatenate_parallel_row_groups(
let mut file_reservation =
MemoryConsumer::new("ParquetSink(SerializedFileWriter)").register(&pool);
- let schema_desc = arrow_to_parquet_schema(schema.as_ref())?;
+ let schema_desc = ArrowSchemaConverter::new().convert(schema.as_ref())?;
let mut parquet_writer = SerializedFileWriter::new(
merged_buff.clone(),
schema_desc.root_schema_ptr(),
diff --git
a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
index 3854f04566..7658663be5 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -456,8 +456,8 @@ mod tests {
use datafusion_expr::{cast, col, lit, Expr};
use datafusion_physical_expr::planner::logical2physical;
- use parquet::arrow::arrow_to_parquet_schema;
use parquet::arrow::async_reader::ParquetObjectReader;
+ use parquet::arrow::ArrowSchemaConverter;
use parquet::basic::LogicalType;
use parquet::data_type::{ByteArray, FixedLenByteArray};
use parquet::file::metadata::ColumnChunkMetaData;
@@ -744,7 +744,7 @@ mod tests {
Field::new("c1", DataType::Int32, false),
Field::new("c2", DataType::Boolean, false),
]));
- let schema_descr = arrow_to_parquet_schema(&schema).unwrap();
+ let schema_descr =
ArrowSchemaConverter::new().convert(&schema).unwrap();
let expr = col("c1").gt(lit(15)).and(col("c2").is_null());
let expr = logical2physical(&expr, &schema);
let pruning_predicate = PruningPredicate::try_new(expr,
schema.clone()).unwrap();
@@ -773,7 +773,7 @@ mod tests {
Field::new("c1", DataType::Int32, false),
Field::new("c2", DataType::Boolean, false),
]));
- let schema_descr = arrow_to_parquet_schema(&schema).unwrap();
+ let schema_descr =
ArrowSchemaConverter::new().convert(&schema).unwrap();
let expr = col("c1")
.gt(lit(15))
.and(col("c2").eq(lit(ScalarValue::Boolean(None))));
diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs
b/datafusion/core/tests/fuzz_cases/pruning.rs
index 3725e6d908..8ce980ee08 100644
--- a/datafusion/core/tests/fuzz_cases/pruning.rs
+++ b/datafusion/core/tests/fuzz_cases/pruning.rs
@@ -347,7 +347,10 @@ async fn write_parquet_file(
let mut buf = BytesMut::new().writer();
let mut props = WriterProperties::builder();
if let Some(truncation_length) = truncation_length {
- props = props.set_max_statistics_size(truncation_length);
+ props = {
+ #[allow(deprecated)]
+ props.set_max_statistics_size(truncation_length)
+ }
}
props = props.set_statistics_enabled(EnabledStatistics::Chunk); // row
group level
let props = props.build();
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs
b/datafusion/physical-plan/src/aggregates/mod.rs
index fb29254249..4502466f0b 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -2470,25 +2470,21 @@ mod tests {
"labels".to_string(),
DataType::Struct(
vec![
- Field::new_dict(
+ Field::new(
"a".to_string(),
DataType::Dictionary(
Box::new(DataType::Int32),
Box::new(DataType::Utf8),
),
true,
- 0,
- false,
),
- Field::new_dict(
+ Field::new(
"b".to_string(),
DataType::Dictionary(
Box::new(DataType::Int32),
Box::new(DataType::Utf8),
),
true,
- 0,
- false,
),
]
.into(),
@@ -2500,15 +2496,13 @@ mod tests {
vec![
Arc::new(StructArray::from(vec![
(
- Arc::new(Field::new_dict(
+ Arc::new(Field::new(
"a".to_string(),
DataType::Dictionary(
Box::new(DataType::Int32),
Box::new(DataType::Utf8),
),
true,
- 0,
- false,
)),
Arc::new(
vec![Some("a"), None, Some("a")]
@@ -2517,15 +2511,13 @@ mod tests {
) as ArrayRef,
),
(
- Arc::new(Field::new_dict(
+ Arc::new(Field::new(
"b".to_string(),
DataType::Dictionary(
Box::new(DataType::Int32),
Box::new(DataType::Utf8),
),
true,
- 0,
- false,
)),
Arc::new(
vec![Some("b"), Some("c"), Some("b")]
diff --git a/datafusion/proto-common/src/from_proto/mod.rs
b/datafusion/proto-common/src/from_proto/mod.rs
index eb75eb0948..37462acec7 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -321,6 +321,7 @@ impl TryFrom<&protobuf::Field> for Field {
fn try_from(field: &protobuf::Field) -> Result<Self, Self::Error> {
let datatype = field.arrow_type.as_deref().required("arrow_type")?;
let field = if field.dict_id != 0 {
+ #[allow(deprecated)]
Self::new_dict(
field.name.as_str(),
datatype,
@@ -434,7 +435,11 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
let id = dict_batch.id();
- let fields_using_this_dictionary =
schema.fields_with_dict_id(id);
+ let fields_using_this_dictionary = {
+ #[allow(deprecated)]
+ schema.fields_with_dict_id(id)
+ };
+
let first_field =
fields_using_this_dictionary.first().ok_or_else(|| {
Error::General("dictionary id not found in schema
while deserializing ScalarValue::List".to_string())
})?;
diff --git a/datafusion/proto-common/src/to_proto/mod.rs
b/datafusion/proto-common/src/to_proto/mod.rs
index 79faaba864..c69f7b85f4 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -97,6 +97,7 @@ impl TryFrom<&Field> for protobuf::Field {
nullable: field.is_nullable(),
children: Vec::new(),
metadata: field.metadata().clone(),
+ #[allow(deprecated)]
dict_id: field.dict_id().unwrap_or(0),
dict_ordered: field.dict_is_ordered().unwrap_or(false),
})
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index d7620e65c4..3544e6c28b 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1815,6 +1815,8 @@ fn round_trip_datatype() {
}
}
+// TODO file a ticket about handling deprecated dict_id attributes
+#[allow(deprecated)]
#[test]
fn roundtrip_dict_id() -> Result<()> {
let dict_id = 42;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]