[arrow] branch master updated (325eb07 -> 4b902f7)
This is an automated email from the ASF dual-hosted git repository. kou pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 325eb07 ARROW-12746: [Go][Flight] append instead of overwriting outgoing metadata add 4b902f7 ARROW-12780: [CI][C++] Install necessary packages for MinGW builds No new revisions were added by this update. Summary of changes: ci/scripts/msys2_setup.sh | 1 + 1 file changed, 1 insertion(+)
[arrow-datafusion] branch master updated: add --quiet/-q flag and allow timing info to be turned on/off (#323)
This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git The following commit(s) were added to refs/heads/master by this push: new b44238d add --quiet/-q flag and allow timing info to be turned on/off (#323) b44238d is described below commit b44238d05094ab0fa0171769ce8b890a0045e1e1 Author: Jiayu Liu AuthorDate: Fri May 14 02:38:19 2021 +0800 add --quiet/-q flag and allow timing info to be turned on/off (#323) * add print options and allow timing info to be turned on/off * remove self reference * use quiet --- datafusion-cli/src/format.rs| 17 --- datafusion-cli/src/lib.rs | 56 ++ datafusion-cli/src/main.rs | 63 + datafusion-cli/src/{format => }/print_format.rs | 0 4 files changed, 77 insertions(+), 59 deletions(-) diff --git a/datafusion-cli/src/format.rs b/datafusion-cli/src/format.rs deleted file mode 100644 index c5da78f..000 --- a/datafusion-cli/src/format.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -pub mod print_format; diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs new file mode 100644 index 000..5bd16e3 --- /dev/null +++ b/datafusion-cli/src/lib.rs @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +pub mod print_format; + +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::error::Result; +use print_format::PrintFormat; +use std::time::Instant; + +#[derive(Debug, Clone)] +pub struct PrintOptions { +pub format: PrintFormat, +pub quiet: bool, +} + +fn print_timing_info(row_count: usize, now: Instant) { +println!( +"{} {} in set. Query took {} seconds.", +row_count, +if row_count == 1 { "row" } else { "rows" }, +now.elapsed().as_secs() +); +} + +impl PrintOptions { +/// print the batches to stdout using the specified format +pub fn print_batches(, batches: &[RecordBatch]) -> Result<()> { +let now = Instant::now(); +if batches.is_empty() { +if !self.quiet { +print_timing_info(0, now); +} +} else { +self.format.print_batches(batches)?; +if !self.quiet { +let row_count: usize = batches.iter().map(|b| b.num_rows()).sum(); +print_timing_info(row_count, now); +} +} +Ok(()) +} +} diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 2360d46..f36b5d9 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -17,19 +17,16 @@ #![allow(bare_trait_objects)] -mod format; - use clap::{crate_version, App, Arg}; use datafusion::error::Result; use datafusion::execution::context::{ExecutionConfig, ExecutionContext}; -use format::print_format::PrintFormat; +use datafusion_cli::{print_format::PrintFormat, PrintOptions}; use rustyline::Editor; use std::env; use std::fs::File; use std::io::prelude::*; use std::io::BufReader; use std::path::Path; -use std::time::Instant; #[tokio::main] pub async fn main() { @@ -72,6 +69,13 @@ pub async fn main() { .validator(is_valid_format) .takes_value(true), ) +.arg( +
[arrow] branch master updated (f1a7c50 -> 325eb07)
This is an automated email from the ASF dual-hosted git repository. emkornfield pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from f1a7c50 ARROW-5385: [Go] Implement EXTENSION datatype add 325eb07 ARROW-12746: [Go][Flight] append instead of overwriting outgoing metadata No new revisions were added by this update. Summary of changes: go/arrow/flight/client_auth.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
[arrow] branch master updated (9347731 -> f1a7c50)
This is an automated email from the ASF dual-hosted git repository. emkornfield pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 9347731 ARROW-12731: [R] Use InMemoryDataset for Table/RecordBatch in dplyr code add f1a7c50 ARROW-5385: [Go] Implement EXTENSION datatype No new revisions were added by this update. Summary of changes: dev/archery/archery/integration/datagen.py | 3 +- docs/source/status.rst | 4 +- go/arrow/array/array.go| 2 +- go/arrow/array/array_test.go | 5 +- go/arrow/array/builder.go | 2 + go/arrow/array/compare.go | 6 + go/arrow/array/extension.go| 236 ++ go/arrow/array/extension_test.go | 96 go/arrow/compare.go| 53 ++- go/arrow/compare_test.go | 20 +- go/arrow/datatype_extension.go | 163 +++ go/arrow/datatype_extension_test.go| 86 go/arrow/datatype_nested.go| 14 +- go/arrow/internal/arrdata/arrdata.go | 99 go/arrow/internal/arrjson/arrjson.go | 180 +++- go/arrow/internal/arrjson/arrjson_test.go | 497 - go/arrow/internal/arrjson/reader.go| 5 +- go/arrow/internal/arrjson/writer.go| 77 +--- go/arrow/internal/testing/types/extension_types.go | 247 ++ .../ipc/cmd/arrow-json-integration-test/main.go| 4 + go/arrow/ipc/file_reader.go| 5 + go/arrow/ipc/metadata.go | 68 ++- go/arrow/ipc/metadata_test.go | 63 +++ go/arrow/ipc/reader.go | 9 +- go/arrow/ipc/writer.go | 9 + go/arrow/schema.go | 29 ++ 26 files changed, 1856 insertions(+), 126 deletions(-) create mode 100644 go/arrow/array/extension.go create mode 100644 go/arrow/array/extension_test.go create mode 100644 go/arrow/datatype_extension.go create mode 100644 go/arrow/datatype_extension_test.go create mode 100644 go/arrow/internal/testing/types/extension_types.go
[arrow] branch master updated (b34c8f6 -> 9347731)
This is an automated email from the ASF dual-hosted git repository. npr pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from b34c8f6 MINOR: [Docs] Add link to File.fbs in columnar format docs add 9347731 ARROW-12731: [R] Use InMemoryDataset for Table/RecordBatch in dplyr code No new revisions were added by this update. Summary of changes: r/DESCRIPTION| 11 +- r/NAMESPACE |5 +- r/R/arrow-datum.R| 71 +- r/R/arrow-package.R | 16 +- r/R/arrow-tabular.R |6 +- r/R/arrowExports.R |4 + r/R/dataset-scan.R | 14 +- r/R/dataset-write.R |8 - r/R/dplyr-arrange.R | 93 ++ r/R/dplyr-collect.R | 62 ++ r/R/dplyr-eval.R | 99 +++ r/R/dplyr-filter.R | 84 ++ r/R/dplyr-functions.R| 352 r/R/dplyr-group-by.R | 65 ++ r/R/dplyr-mutate.R | 117 +++ r/R/dplyr-select.R | 120 +++ r/{extra-tests/helpers.R => R/dplyr-summarize.R} | 30 +- r/R/dplyr.R | 1005 +- r/R/expression.R | 194 + r/man/contains_regex.Rd |2 +- r/man/get_stringr_pattern_options.Rd |2 +- r/src/arrowExports.cpp | 17 + r/src/expression.cpp |8 + r/tests/testthat/helper-arrow.R |2 +- r/tests/testthat/test-RecordBatch.R |7 +- r/tests/testthat/test-Table.R|7 +- r/tests/testthat/test-compute-arith.R|3 +- r/tests/testthat/test-compute-sort.R | 17 +- r/tests/testthat/test-dataset.R | 339 +--- r/tests/testthat/test-dplyr-arrange.R|2 + r/tests/testthat/test-dplyr-filter.R | 57 +- r/tests/testthat/test-dplyr-group-by.R |2 + r/tests/testthat/test-dplyr-mutate.R | 39 +- r/tests/testthat/test-dplyr-string-functions.R | 114 +-- r/tests/testthat/test-dplyr.R| 28 +- r/tests/testthat/test-expression.R | 56 +- r/tests/testthat/test-filesystem.R |4 + 37 files changed, 1339 insertions(+), 1723 deletions(-) create mode 100644 r/R/dplyr-arrange.R create mode 100644 r/R/dplyr-collect.R create mode 100644 r/R/dplyr-eval.R create mode 100644 r/R/dplyr-filter.R create mode 100644 r/R/dplyr-functions.R create mode 100644 r/R/dplyr-group-by.R create mode 100644 r/R/dplyr-mutate.R create mode 100644 r/R/dplyr-select.R copy r/{extra-tests/helpers.R => R/dplyr-summarize.R} (53%)
[arrow] branch master updated (00a5152 -> b34c8f6)
This is an automated email from the ASF dual-hosted git repository. lidavidm pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 00a5152 ARROW-12772: [CI] Merge script test fails due to missing dependency add b34c8f6 MINOR: [Docs] Add link to File.fbs in columnar format docs No new revisions were added by this update. Summary of changes: docs/source/format/Columnar.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
[arrow] branch master updated (b2cd200 -> 00a5152)
This is an automated email from the ASF dual-hosted git repository. kszucs pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from b2cd200 ARROW-12756: [C++] MSVC build fails with latest gtest from vcpkg add 00a5152 ARROW-12772: [CI] Merge script test fails due to missing dependency No new revisions were added by this update. Summary of changes: .github/workflows/dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[arrow] branch master updated (73d833c -> b2cd200)
This is an automated email from the ASF dual-hosted git repository. kszucs pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 73d833c ARROW-12490: [Dev] Use only miniforge in verify-release-candidate.sh add b2cd200 ARROW-12756: [C++] MSVC build fails with latest gtest from vcpkg No new revisions were added by this update. Summary of changes: cpp/vcpkg.json| 6 +- dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat | 1 + docs/source/developers/cpp/building.rst | 3 ++- docs/source/developers/cpp/windows.rst| 1 + 4 files changed, 9 insertions(+), 2 deletions(-)
[arrow-rs] branch master updated: Fix subtraction underflow when sorting string arrays with many nulls (#285)
This is an automated email from the ASF dual-hosted git repository. nevime pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow-rs.git The following commit(s) were added to refs/heads/master by this push: new ce8e67c Fix subtraction underflow when sorting string arrays with many nulls (#285) ce8e67c is described below commit ce8e67c28ad1431cda36b38434e53871c2dd520a Author: Michael Edwards AuthorDate: Thu May 13 13:28:46 2021 +0200 Fix subtraction underflow when sorting string arrays with many nulls (#285) --- arrow/src/compute/kernels/sort.rs | 285 -- 1 file changed, 274 insertions(+), 11 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index 9287425..7cd463d 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -410,24 +410,27 @@ fn sort_boolean( len = limit.min(len); } if !descending { -sort_by( valids, len - nulls_len, |a, b| cmp(a.1, b.1)); +sort_by( valids, len.saturating_sub(nulls_len), |a, b| { +cmp(a.1, b.1) +}); } else { -sort_by( valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse()); +sort_by( valids, len.saturating_sub(nulls_len), |a, b| { +cmp(a.1, b.1).reverse() +}); // reverse to keep a stable ordering nulls.reverse(); } // collect results directly into a buffer instead of a vec to avoid another aligned allocation -let mut result = MutableBuffer::new(values.len() * std::mem::size_of::()); +let result_capacity = len * std::mem::size_of::(); +let mut result = MutableBuffer::new(result_capacity); // sets len to capacity so we can access the whole buffer as a typed slice -result.resize(values.len() * std::mem::size_of::(), 0); +result.resize(result_capacity, 0); let result_slice: [u32] = result.typed_data_mut(); -debug_assert_eq!(result_slice.len(), nulls_len + valids_len); - if options.nulls_first { let size = nulls_len.min(len); -result_slice[0..nulls_len.min(len)].copy_from_slice(); +result_slice[0..size].copy_from_slice([0..size]); if nulls_len < len { insert_valid_values(result_slice, nulls_len, [0..len - size]); } @@ -626,9 +629,13 @@ where len = limit.min(len); } if !descending { -sort_by( valids, len - nulls_len, |a, b| cmp(a.1, b.1)); +sort_by( valids, len.saturating_sub(nulls_len), |a, b| { +cmp(a.1, b.1) +}); } else { -sort_by( valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse()); +sort_by( valids, len.saturating_sub(nulls_len), |a, b| { +cmp(a.1, b.1).reverse() +}); // reverse to keep a stable ordering nulls.reverse(); } @@ -689,11 +696,11 @@ where len = limit.min(len); } if !descending { -sort_by( valids, len - nulls_len, |a, b| { +sort_by( valids, len.saturating_sub(nulls_len), |a, b| { cmp_array(a.1.as_ref(), b.1.as_ref()) }); } else { -sort_by( valids, len - nulls_len, |a, b| { +sort_by( valids, len.saturating_sub(nulls_len), |a, b| { cmp_array(a.1.as_ref(), b.1.as_ref()).reverse() }); // reverse to keep a stable ordering @@ -1285,6 +1292,48 @@ mod tests { None, vec![5, 0, 2, 1, 4, 3], ); + +// valid values less than limit with extra nulls +test_sort_to_indices_primitive_arrays::( +vec![Some(2.0), None, None, Some(1.0)], +Some(SortOptions { +descending: false, +nulls_first: false, +}), +Some(3), +vec![3, 0, 1], +); + +test_sort_to_indices_primitive_arrays::( +vec![Some(2.0), None, None, Some(1.0)], +Some(SortOptions { +descending: false, +nulls_first: true, +}), +Some(3), +vec![1, 2, 3], +); + +// more nulls than limit +test_sort_to_indices_primitive_arrays::( +vec![Some(1.0), None, None, None], +Some(SortOptions { +descending: false, +nulls_first: true, +}), +Some(2), +vec![1, 2], +); + +test_sort_to_indices_primitive_arrays::( +vec![Some(1.0), None, None, None], +Some(SortOptions { +descending: false, +nulls_first: false, +}), +Some(2), +vec![0, 1], +); } #[test] @@ -1329,6 +1378,48 @@ mod tests { Some(3), vec![5, 0, 2], ); + +// valid values less than limit with extra nulls +test_sort_to_indices_boolean_arrays( +
[arrow] branch master updated (950e18b -> 73d833c)
This is an automated email from the ASF dual-hosted git repository. kszucs pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 950e18b ARROW-12757: [Archery] Fix spurious warning when running "archery docker run" add 73d833c ARROW-12490: [Dev] Use only miniforge in verify-release-candidate.sh No new revisions were added by this update. Summary of changes: dev/release/verify-release-candidate.sh | 15 +-- dev/tasks/tasks.yml | 3 ++- dev/tasks/verify-rc/github.linux.yml| 1 + 3 files changed, 8 insertions(+), 11 deletions(-)
[arrow] branch master updated (5c9200d -> 950e18b)
This is an automated email from the ASF dual-hosted git repository. kszucs pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 5c9200d ARROW-12530: [C++] Remove Buffer::mutable_data_ add 950e18b ARROW-12757: [Archery] Fix spurious warning when running "archery docker run" No new revisions were added by this update. Summary of changes: docker-compose.yml | 1 - 1 file changed, 1 deletion(-)