This is an automated email from the ASF dual-hosted git repository.
jiayuliu pushed a change to branch arrow2
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.
from 2008b1d Update docs to note support for VARIANCE and STDDEV (#1543)
add b42ebe7 Clarify docs about `Accumulator::update` and
`Accumulator::update_batch` (#1542)
add b05feda Mark ARRAY_AGG(DISTINCT ...) not implemented (#1534)
add 06d147a Add batch operations to stddev (#1547)
add e1e7b86 Address clippy warnings (#1553)
add 14176ff Update to arrow-7.0.0 (#1523)
add 794b92b Remove unused `update` and `merge` implementations from
Aggregates and supporting `ScalarValue` arithmetic (#1550)
add cf76969 Make call SchedulerServer::new once in ballista-scheduler
process (#1537)
add b4c77e5 Add covar operators (#1551)
add d7e465a Initial MemoryManager and DiskManager APIs for query
execution + External Sort implementation (#1526)
add 811bb51 Update to rust 1.58 (#1557)
add 0bddfb7 support cast/try_cast for decimal: signed numeric to decimal
(#1442)
add 1c39f5c support comparison for decimal data type and refactor the
binary coercion rule (#1483)
add b743610 add correlation function (#1561)
add 1dae7e2 Rename sql integration tests from `mod` to `sql_integration`
(#1575)
add bbfc2c0 update reference to python and update readme (#1581)
add 278e859 minor: improve the benchmark readme (#1567)
add 438b417 Tests for support try_cast/cast decimal to numeric (#1465)
add 6f7b2d2 implement Hash for various types and replace PartialOrd
(#1580)
add f027e5f add from_slice trait to ease arrow2 migration (#1588)
add 92a3e45 Consolidate `batch_size` configuration in `ExecutionConfig`,
`RuntimeConfig` and `PhysicalPlanConfig` (#1562)
add 30df911 support from_slice for binary, string, and boolean array
types (#1589)
add 059e52b update nightly version (#1597)
add 82e8003 remove update and merge (#1582)
No new revisions were added by this update.
Summary of changes:
.env | 2 +-
.github/workflows/rust.yml | 2 +-
Cargo.toml | 2 -
README.md | 13 +-
ballista-examples/Cargo.toml | 6 +-
ballista/rust/client/Cargo.toml | 2 +-
ballista/rust/core/Cargo.toml | 12 +-
ballista/rust/core/proto/ballista.proto | 16 +-
ballista/rust/core/src/client.rs | 14 +-
ballista/rust/core/src/error.rs | 2 +-
.../core/src/execution_plans/distributed_query.rs | 4 +-
.../core/src/execution_plans/shuffle_reader.rs | 7 +-
.../core/src/execution_plans/shuffle_writer.rs | 76 +-
.../core/src/execution_plans/unresolved_shuffle.rs | 6 +-
.../rust/core/src/serde/logical_plan/from_proto.rs | 4 +-
ballista/rust/core/src/serde/logical_plan/mod.rs | 99 +-
.../rust/core/src/serde/logical_plan/to_proto.rs | 41 +-
ballista/rust/core/src/serde/mod.rs | 25 +-
.../core/src/serde/physical_plan/from_proto.rs | 13 +-
ballista/rust/core/src/serde/physical_plan/mod.rs | 2 +-
.../rust/core/src/serde/physical_plan/to_proto.rs | 9 +-
ballista/rust/core/src/utils.rs | 9 +-
ballista/rust/executor/Cargo.toml | 6 +-
ballista/rust/executor/src/collect.rs | 6 +-
ballista/rust/executor/src/executor.rs | 11 +-
ballista/rust/scheduler/Cargo.toml | 6 +-
ballista/rust/scheduler/src/lib.rs | 4 +
ballista/rust/scheduler/src/main.rs | 15 +-
ballista/rust/scheduler/src/planner.rs | 6 +-
benchmarks/Cargo.toml | 2 +-
benchmarks/README.md | 2 +-
benchmarks/src/bin/nyctaxi.rs | 3 +-
benchmarks/src/bin/tpch.rs | 12 +-
ci/docker/linux-apt-lint.dockerfile | 2 +-
datafusion-cli/Cargo.toml | 4 +-
datafusion-cli/Dockerfile | 2 +-
datafusion-cli/src/print_format.rs | 15 +-
datafusion-examples/Cargo.toml | 8 +-
.../examples/dataframe_in_memory.rs | 6 +-
datafusion-examples/examples/simple_udaf.rs | 63 +-
datafusion-examples/examples/simple_udf.rs | 5 +-
datafusion/Cargo.toml | 8 +-
datafusion/benches/data_utils/mod.rs | 3 +-
datafusion/benches/filter_query_sql.rs | 5 +-
datafusion/benches/math_query_sql.rs | 8 +-
datafusion/benches/physical_plan.rs | 8 +-
datafusion/benches/sort_limit_query_sql.rs | 9 +-
datafusion/src/avro_to_arrow/schema.rs | 7 +-
datafusion/src/catalog/catalog.rs | 12 +
datafusion/src/catalog/mod.rs | 1 +
datafusion/src/catalog/schema.rs | 6 +
datafusion/src/datasource/datasource.rs | 1 -
datafusion/src/datasource/empty.rs | 1 -
datafusion/src/datasource/file_format/avro.rs | 54 +-
datafusion/src/datasource/file_format/csv.rs | 28 +-
datafusion/src/datasource/file_format/json.rs | 28 +-
datafusion/src/datasource/file_format/mod.rs | 4 +-
datafusion/src/datasource/file_format/parquet.rs | 102 +-
datafusion/src/datasource/listing/table.rs | 12 +-
datafusion/src/datasource/memory.rs | 69 +-
datafusion/src/datasource/mod.rs | 5 +-
datafusion/src/datasource/object_store/mod.rs | 8 +-
datafusion/src/error.rs | 8 +-
datafusion/src/execution/context.rs | 133 ++-
datafusion/src/execution/dataframe_impl.rs | 12 +-
datafusion/src/execution/disk_manager.rs | 125 +++
datafusion/src/execution/memory_manager.rs | 488 ++++++++++
datafusion/src/execution/mod.rs | 3 +
datafusion/src/execution/options.rs | 6 +
datafusion/src/execution/runtime_env.rs | 149 +++
datafusion/src/from_slice.rs | 116 +++
datafusion/src/lib.rs | 17 +-
datafusion/src/logical_plan/dfschema.rs | 25 +-
datafusion/src/logical_plan/expr.rs | 28 +-
datafusion/src/logical_plan/operators.rs | 2 +-
datafusion/src/logical_plan/plan.rs | 2 +-
datafusion/src/logical_plan/window_frames.rs | 11 +-
.../src/optimizer/common_subexpr_eliminate.rs | 6 +
datafusion/src/optimizer/eliminate_limit.rs | 1 +
datafusion/src/optimizer/filter_push_down.rs | 2 +-
datafusion/src/optimizer/limit_push_down.rs | 1 +
datafusion/src/optimizer/mod.rs | 1 +
datafusion/src/optimizer/projection_push_down.rs | 1 +
datafusion/src/optimizer/simplify_expressions.rs | 1 +
.../src/optimizer/single_distinct_to_groupby.rs | 1 +
.../src/physical_optimizer/aggregate_statistics.rs | 5 +-
.../src/physical_optimizer/coalesce_batches.rs | 3 +-
.../physical_optimizer/hash_build_probe_order.rs | 1 +
datafusion/src/physical_optimizer/merge_exec.rs | 1 +
datafusion/src/physical_optimizer/pruning.rs | 6 +-
datafusion/src/physical_optimizer/repartition.rs | 9 +-
datafusion/src/physical_plan/aggregates.rs | 261 ++++-
datafusion/src/physical_plan/analyze.rs | 12 +-
datafusion/src/physical_plan/coalesce_batches.rs | 15 +-
.../src/physical_plan/coalesce_partitions.rs | 21 +-
.../physical_plan/coercion_rule/aggregate_rule.rs | 30 +-
.../src/physical_plan/coercion_rule/binary_rule.rs | 229 +++++
datafusion/src/physical_plan/coercion_rule/mod.rs | 2 +
datafusion/src/physical_plan/common.rs | 85 +-
datafusion/src/physical_plan/cross_join.rs | 11 +-
.../src/physical_plan/distinct_expressions.rs | 64 +-
datafusion/src/physical_plan/empty.rs | 18 +-
datafusion/src/physical_plan/explain.rs | 7 +-
.../physical_plan/expressions/approx_distinct.rs | 17 -
.../src/physical_plan/expressions/array_agg.rs | 46 +-
.../src/physical_plan/expressions/average.rs | 41 +-
datafusion/src/physical_plan/expressions/binary.rs | 1005 ++++++++++++++++----
datafusion/src/physical_plan/expressions/case.rs | 10 +-
datafusion/src/physical_plan/expressions/cast.rs | 303 +++++-
.../src/physical_plan/expressions/coercion.rs | 15 +-
.../src/physical_plan/expressions/correlation.rs | 543 +++++++++++
datafusion/src/physical_plan/expressions/count.rs | 28 +-
.../src/physical_plan/expressions/covariance.rs | 715 ++++++++++++++
.../src/physical_plan/expressions/is_not_null.rs | 3 +-
.../src/physical_plan/expressions/is_null.rs | 3 +-
.../src/physical_plan/expressions/lead_lag.rs | 4 +-
.../src/physical_plan/expressions/min_max.rs | 71 +-
datafusion/src/physical_plan/expressions/mod.rs | 38 +-
.../src/physical_plan/expressions/nth_value.rs | 4 +-
datafusion/src/physical_plan/expressions/nullif.rs | 4 +-
datafusion/src/physical_plan/expressions/rank.rs | 22 +-
.../src/physical_plan/expressions/row_number.rs | 3 +-
datafusion/src/physical_plan/expressions/stddev.rs | 136 ++-
datafusion/src/physical_plan/expressions/sum.rs | 29 +-
.../src/physical_plan/expressions/try_cast.rs | 296 +++++-
.../src/physical_plan/expressions/variance.rs | 260 +++--
datafusion/src/physical_plan/file_format/avro.rs | 32 +-
datafusion/src/physical_plan/file_format/csv.rs | 40 +-
datafusion/src/physical_plan/file_format/json.rs | 30 +-
datafusion/src/physical_plan/file_format/mod.rs | 11 +-
.../src/physical_plan/file_format/parquet.rs | 47 +-
datafusion/src/physical_plan/filter.rs | 17 +-
datafusion/src/physical_plan/functions.rs | 29 +-
datafusion/src/physical_plan/hash_aggregate.rs | 47 +-
datafusion/src/physical_plan/hash_join.rs | 74 +-
datafusion/src/physical_plan/hash_utils.rs | 8 +-
datafusion/src/physical_plan/limit.rs | 25 +-
datafusion/src/physical_plan/memory.rs | 22 +-
datafusion/src/physical_plan/metrics/mod.rs | 6 +-
datafusion/src/physical_plan/metrics/value.rs | 26 +-
datafusion/src/physical_plan/mod.rs | 66 +-
datafusion/src/physical_plan/planner.rs | 19 +-
datafusion/src/physical_plan/projection.rs | 17 +-
datafusion/src/physical_plan/regex_expressions.rs | 11 +-
datafusion/src/physical_plan/repartition.rs | 70 +-
.../src/physical_plan/sorts/external_sort.rs | 657 +++++++++++++
datafusion/src/physical_plan/sorts/in_mem_sort.rs | 241 +++++
datafusion/src/physical_plan/sorts/mod.rs | 295 ++++++
datafusion/src/physical_plan/{ => sorts}/sort.rs | 38 +-
.../{ => sorts}/sort_preserving_merge.rs | 473 ++++-----
datafusion/src/physical_plan/udaf.rs | 14 +-
datafusion/src/physical_plan/udf.rs | 14 +-
datafusion/src/physical_plan/union.rs | 20 +-
datafusion/src/physical_plan/values.rs | 7 +-
datafusion/src/physical_plan/window_functions.rs | 4 +-
datafusion/src/physical_plan/windows/mod.rs | 12 +-
.../src/physical_plan/windows/window_agg_exec.rs | 9 +-
datafusion/src/scalar.rs | 583 +-----------
datafusion/src/test/exec.rs | 38 +-
datafusion/src/test/mod.rs | 5 +-
datafusion/src/test/variable.rs | 6 +-
datafusion/src/test_util.rs | 8 +-
datafusion/tests/custom_sources.rs | 26 +-
datafusion/tests/dataframe.rs | 16 +-
datafusion/tests/dataframe_functions.rs | 8 +-
datafusion/tests/parquet_pruning.rs | 7 +-
datafusion/tests/provider_filter_pushdown.rs | 8 +-
datafusion/tests/sql/aggregates.rs | 39 +-
datafusion/tests/sql/avro.rs | 5 +-
datafusion/tests/sql/errors.rs | 3 +-
datafusion/tests/sql/explain_analyze.rs | 40 +-
datafusion/tests/sql/functions.rs | 4 +-
datafusion/tests/sql/joins.rs | 15 +-
datafusion/tests/sql/mod.rs | 24 +-
datafusion/tests/sql/parquet.rs | 6 +-
datafusion/tests/sql/select.rs | 7 +-
datafusion/tests/sql/timestamp.rs | 6 +-
datafusion/tests/{mod.rs => sql_integration.rs} | 0
datafusion/tests/statistics.rs | 8 +-
datafusion/tests/user_defined_plan.rs | 19 +-
dev/docker/ballista-base.dockerfile | 2 +-
181 files changed, 7540 insertions(+), 2162 deletions(-)
create mode 100644 datafusion/src/execution/disk_manager.rs
create mode 100644 datafusion/src/execution/memory_manager.rs
create mode 100644 datafusion/src/execution/runtime_env.rs
create mode 100644 datafusion/src/from_slice.rs
create mode 100644 datafusion/src/physical_plan/coercion_rule/binary_rule.rs
create mode 100644 datafusion/src/physical_plan/expressions/correlation.rs
create mode 100644 datafusion/src/physical_plan/expressions/covariance.rs
create mode 100644 datafusion/src/physical_plan/sorts/external_sort.rs
create mode 100644 datafusion/src/physical_plan/sorts/in_mem_sort.rs
create mode 100644 datafusion/src/physical_plan/sorts/mod.rs
rename datafusion/src/physical_plan/{ => sorts}/sort.rs (94%)
rename datafusion/src/physical_plan/{ => sorts}/sort_preserving_merge.rs (79%)
rename datafusion/tests/{mod.rs => sql_integration.rs} (100%)