[arrow-datafusion] branch master updated: automatically add python label to PRs (#791)

2021-07-28 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new a6baf88  automatically add python label to PRs (#791)
a6baf88 is described below

commit a6baf88501765ea4a8884e9e21be23a25b8e76f8
Author: QP Hou 
AuthorDate: Wed Jul 28 19:46:49 2021 -0700

automatically add python label to PRs (#791)

also adjust label detection logic for datafusion and ballista
---
 .github/workflows/dev_pr/labeler.yml | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/dev_pr/labeler.yml 
b/.github/workflows/dev_pr/labeler.yml
index df9d412..c27fb2c 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -17,6 +17,12 @@
 
 datafusion:
   - datafusion/**/*
+  - datafusion-cli/**/*
+  - datafusion-examples/**/*
 
 ballista:
   - ballista/**/*
+  - ballista-examples/**/*
+
+python:
+  - python/**/*


[arrow-datafusion] branch master updated: Show the result of all optimizer passes in EXPLAIN VERBOSE (#759)

2021-07-20 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 30693df  Show the result of all optimizer passes in EXPLAIN VERBOSE 
(#759)
30693df is described below

commit 30693df8961dca300306dfd0c8fca130375b50b3
Author: Andrew Lamb 
AuthorDate: Tue Jul 20 16:43:51 2021 -0400

Show the result of all optimizer passes in EXPLAIN VERBOSE (#759)
---
 datafusion/src/execution/context.rs  | 106 ++---
 datafusion/src/logical_plan/builder.rs   |  15 ++-
 datafusion/src/logical_plan/mod.rs   |   2 +-
 datafusion/src/logical_plan/plan.rs  |  20 
 datafusion/src/optimizer/limit_push_down.rs  |  20 
 datafusion/src/optimizer/projection_push_down.rs |  22 +
 datafusion/src/optimizer/simplify_expressions.rs |  23 +
 datafusion/src/optimizer/utils.rs| 111 +-
 datafusion/src/physical_plan/display.rs  |  11 +++
 datafusion/src/physical_plan/explain.rs  |  24 -
 datafusion/src/physical_plan/planner.rs  | 115 +++
 datafusion/src/sql/planner.rs|   9 +-
 datafusion/tests/sql.rs  |   7 ++
 datafusion/tests/user_defined_plan.rs|   6 +-
 14 files changed, 252 insertions(+), 239 deletions(-)

diff --git a/datafusion/src/execution/context.rs 
b/datafusion/src/execution/context.rs
index bd939ce..0cf8b3b 100644
--- a/datafusion/src/execution/context.rs
+++ b/datafusion/src/execution/context.rs
@@ -21,6 +21,7 @@ use crate::{
 catalog::{CatalogList, MemoryCatalogList},
 information_schema::CatalogWithInformationSchema,
 },
+logical_plan::{PlanType, ToStringifiedPlan},
 optimizer::{
 aggregate_statistics::AggregateStatistics, 
eliminate_limit::EliminateLimit,
 hash_build_probe_order::HashBuildProbeOrder,
@@ -446,19 +447,31 @@ impl ExecutionContext {
 
 /// Optimizes the logical plan by applying optimizer rules.
 pub fn optimize(, plan: ) -> Result {
-let state =  self.state.lock().unwrap();
-let execution_props =  state.execution_props.clone();
-let optimizers = 
-
-let execution_props = execution_props.start_execution();
-
-let mut new_plan = plan.clone();
-debug!("Logical plan:\n {:?}", plan);
-for optimizer in optimizers {
-new_plan = optimizer.optimize(_plan, execution_props)?;
+if let LogicalPlan::Explain {
+verbose,
+plan,
+stringified_plans,
+schema,
+} = plan
+{
+let mut stringified_plans = stringified_plans.clone();
+
+// optimize the child plan, capturing the output of each optimizer
+let plan = self.optimize_internal(plan, |optimized_plan, 
optimizer| {
+let optimizer_name = optimizer.name().to_string();
+let plan_type = PlanType::OptimizedLogicalPlan { 
optimizer_name };
+
stringified_plans.push(optimized_plan.to_stringified(plan_type));
+})?;
+
+Ok(LogicalPlan::Explain {
+verbose: *verbose,
+plan: Arc::new(plan),
+stringified_plans,
+schema: schema.clone(),
+})
+} else {
+self.optimize_internal(plan, |_, _| {})
 }
-debug!("Optimized logical plan:\n {:?}", new_plan);
-Ok(new_plan)
 }
 
 /// Creates a physical plan from a logical plan.
@@ -556,6 +569,32 @@ impl ExecutionContext {
 ))),
 }
 }
+
+/// Optimizes the logical plan by applying optimizer rules, and
+/// invoking observer function after each call
+fn optimize_internal(
+,
+plan: ,
+mut observer: F,
+) -> Result
+where
+F: FnMut(,  OptimizerRule),
+{
+let state =  self.state.lock().unwrap();
+let execution_props =  state.execution_props.clone();
+let optimizers = 
+
+let execution_props = execution_props.start_execution();
+
+let mut new_plan = plan.clone();
+debug!("Logical plan:\n {:?}", plan);
+for optimizer in optimizers {
+new_plan = optimizer.optimize(_plan, execution_props)?;
+observer(_plan, optimizer.as_ref());
+}
+debug!("Optimized logical plan:\n {:?}", new_plan);
+Ok(new_plan)
+}
 }
 
 impl From>> for ExecutionContext {
@@ -941,6 +980,49 @@ mod tests {
 use tempfile::TempDir;
 use test::*;
 
+#[test]
+fn optimize_explain() {
+let schema = Schema::new(vec![Field::new("id", DataType::Int32, 
false)]);
+
+let plan = LogicalPlanBuild

[arrow-experimental-rs-parquet2] branch master updated: Update .asf.yaml (#2)

2021-07-08 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository 
https://gitbox.apache.org/repos/asf/arrow-experimental-rs-parquet2.git


The following commit(s) were added to refs/heads/master by this push:
 new a0c187c  Update .asf.yaml (#2)
a0c187c is described below

commit a0c187c4e476134328d321d3279c2fe687a96389
Author: Andrew Lamb 
AuthorDate: Thu Jul 8 11:58:24 2021 -0400

Update .asf.yaml (#2)
---
 .asf.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.asf.yaml b/.asf.yaml
index 646bdac..be0839d 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -21,11 +21,11 @@ notifications:
   pullrequests: git...@arrow.apache.org
   jira_options: link label worklog
 github:
-  description: "Official Rust implementation of Apache Arrow"
+  description: "Experimental Rust implementation of Apache Arrow Parquet"
   homepage: https://arrow.apache.org/
   enabled_merge_buttons:
 squash: true
 merge: false
 rebase: false
   features:
-issues: true
\ No newline at end of file
+issues: true


[arrow-experimental-rs-arrow2] branch master updated: Update .asf.yaml (#2)

2021-07-08 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository 
https://gitbox.apache.org/repos/asf/arrow-experimental-rs-arrow2.git


The following commit(s) were added to refs/heads/master by this push:
 new d7356b4  Update .asf.yaml (#2)
d7356b4 is described below

commit d7356b4e45ec9b116d0cf15463751eb328f6fc1e
Author: Andrew Lamb 
AuthorDate: Thu Jul 8 11:58:02 2021 -0400

Update .asf.yaml (#2)
---
 .asf.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.asf.yaml b/.asf.yaml
index 646bdac..1695e7a 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -21,11 +21,11 @@ notifications:
   pullrequests: git...@arrow.apache.org
   jira_options: link label worklog
 github:
-  description: "Official Rust implementation of Apache Arrow"
+  description: "Experimental Rust implementation of Apache Arrow"
   homepage: https://arrow.apache.org/
   enabled_merge_buttons:
 squash: true
 merge: false
 rebase: false
   features:
-issues: true
\ No newline at end of file
+issues: true


[arrow-datafusion] branch master updated: Fix RAT check (#652)

2021-07-02 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new d97fc91  Fix RAT check (#652)
d97fc91 is described below

commit d97fc9145bfc41969bd30dee3d7ac3932aa5
Author: Andrew Lamb 
AuthorDate: Fri Jul 2 23:28:33 2021 -0400

Fix RAT check (#652)
---
 .github/workflows/dev.yml | 21 ++
 dev/release/check-rat-report.py   | 59 +++
 dev/release/rat_exclude_files.txt | 16 +++
 dev/release/run-rat.sh| 43 
 4 files changed, 133 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index a7e574e..8bb35f1 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -16,15 +16,11 @@
 # under the License.
 
 name: Dev
-
-on:
-  # always trigger
-  push:
-  pull_request:
+on: [push, pull_request]
 
 jobs:
   lint:
-name: Lint C++, Python, R, Rust, Docker, RAT
+name: Lint C++, Python, R, Rust, Docker
 runs-on: ubuntu-latest
 steps:
   - name: Checkout Arrow
@@ -42,6 +38,19 @@ jobs:
   - name: Lint
 run: archery lint --rat
 
+  rat:
+name: Release Audit Tool (RAT)
+runs-on: ubuntu-latest
+steps:
+  - name: Checkout
+uses: actions/checkout@v2
+  - name: Setup Python
+uses: actions/setup-python@v1
+with:
+  python-version: 3.8
+  - name: Audit licenses
+run: ./dev/release/run-rat.sh .
+
   prettier:
 name: Use prettier to check formatting of documents
 runs-on: ubuntu-latest
diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py
new file mode 100644
index 000..e30d72b
--- /dev/null
+++ b/dev/release/check-rat-report.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+##
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##
+import fnmatch
+import re
+import sys
+import xml.etree.ElementTree as ET
+
+if len(sys.argv) != 3:
+sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" %
+ sys.argv[0])
+sys.exit(1)
+
+exclude_globs_filename = sys.argv[1]
+xml_filename = sys.argv[2]
+
+globs = [line.strip() for line in open(exclude_globs_filename, "r")]
+
+tree = ET.parse(xml_filename)
+root = tree.getroot()
+resources = root.findall('resource')
+
+all_ok = True
+for r in resources:
+approvals = r.findall('license-approval')
+if not approvals or approvals[0].attrib['name'] == 'true':
+continue
+clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
+excluded = False
+for g in globs:
+if fnmatch.fnmatch(clean_name, g):
+excluded = True
+break
+if not excluded:
+sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % (
+clean_name, r.attrib['name'], approvals[0].attrib['name']))
+all_ok = False
+
+if not all_ok:
+sys.exit(1)
+
+print('OK')
+sys.exit(0)
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 96beccd..5a7d351 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -106,3 +106,19 @@ ballista/rust/scheduler/testdata/*
 ballista/ui/scheduler/yarn.lock
 python/rust-toolchain
 python/requirements*.txt
+**/testdata/*
+benchmarks/queries/*
+benchmarks/data/*
+ci/*
+**/*.svg
+**/*.csv
+**/*.json
+venv/*
+testing/*
+target/*
+**/target/*
+Cargo.lock
+**/Cargo.lock
+.history
+parquet-testing/*
+*rat.txt
diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh
new file mode 100755
index 000..94fa55f
--- /dev/null
+++ b/dev/release/run-rat.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses thi

[arrow-datafusion] branch master updated: Rename MergeExec to CoalescePartitionsExec (#635)

2021-06-28 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 4068f8b  Rename MergeExec to CoalescePartitionsExec (#635)
4068f8b is described below

commit 4068f8b3a212aff8d7cdf2183fd1834be0dc5e69
Author: Andy Grove 
AuthorDate: Mon Jun 28 02:05:00 2021 -0600

Rename MergeExec to CoalescePartitionsExec (#635)
---
 ballista/rust/core/proto/ballista.proto   |  4 ++--
 .../rust/core/src/serde/physical_plan/from_proto.rs   |  4 ++--
 .../rust/core/src/serde/physical_plan/to_proto.rs |  6 +++---
 ballista/rust/core/src/utils.rs   | 14 +-
 ballista/rust/scheduler/src/planner.rs| 19 ---
 ballista/rust/scheduler/src/test_utils.rs |  4 ++--
 datafusion/src/execution/context.rs   |  4 ++--
 datafusion/src/physical_optimizer/merge_exec.rs   | 18 +-
 .../{merge.rs => coalesce_partitions.rs}  | 12 ++--
 datafusion/src/physical_plan/cross_join.rs|  6 --
 datafusion/src/physical_plan/hash_aggregate.rs|  4 ++--
 datafusion/src/physical_plan/hash_join.rs |  4 ++--
 datafusion/src/physical_plan/limit.rs |  5 +++--
 datafusion/src/physical_plan/mod.rs   |  8 +---
 datafusion/src/physical_plan/sort.rs  |  4 ++--
 datafusion/src/physical_plan/sort_preserving_merge.rs |  4 ++--
 16 files changed, 67 insertions(+), 53 deletions(-)

diff --git a/ballista/rust/core/proto/ballista.proto 
b/ballista/rust/core/proto/ballista.proto
index 365d8e9..2aa6102 100644
--- a/ballista/rust/core/proto/ballista.proto
+++ b/ballista/rust/core/proto/ballista.proto
@@ -414,7 +414,7 @@ message PhysicalPlanNode {
 SortExecNode sort = 11;
 CoalesceBatchesExecNode coalesce_batches = 12;
 FilterExecNode filter = 13;
-MergeExecNode merge = 14;
+CoalescePartitionsExecNode merge = 14;
 UnresolvedShuffleExecNode unresolved = 15;
 RepartitionExecNode repartition = 16;
 WindowAggExecNode window = 17;
@@ -648,7 +648,7 @@ message CoalesceBatchesExecNode {
   uint32 target_batch_size = 2;
 }
 
-message MergeExecNode {
+message CoalescePartitionsExecNode {
   PhysicalPlanNode input = 1;
 }
 
diff --git a/ballista/rust/core/src/serde/physical_plan/from_proto.rs 
b/ballista/rust/core/src/serde/physical_plan/from_proto.rs
index 4b87be4..83cbdb4 100644
--- a/ballista/rust/core/src/serde/physical_plan/from_proto.rs
+++ b/ballista/rust/core/src/serde/physical_plan/from_proto.rs
@@ -37,9 +37,9 @@ use datafusion::execution::context::{
 };
 use datafusion::logical_plan::{window_frames::WindowFrame, DFSchema, Expr};
 use datafusion::physical_plan::aggregates::{create_aggregate_expr, 
AggregateFunction};
+use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion::physical_plan::hash_aggregate::{AggregateMode, 
HashAggregateExec};
 use datafusion::physical_plan::hash_join::PartitionMode;
-use datafusion::physical_plan::merge::MergeExec;
 use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
 use datafusion::physical_plan::window_functions::{
 BuiltInWindowFunction, WindowFunction,
@@ -147,7 +147,7 @@ impl TryInto> for 
::PhysicalPlanNode {
 }
 PhysicalPlanType::Merge(merge) => {
 let input: Arc = 
convert_box_required!(merge.input)?;
-Ok(Arc::new(MergeExec::new(input)))
+Ok(Arc::new(CoalescePartitionsExec::new(input)))
 }
 PhysicalPlanType::Repartition(repart) => {
 let input: Arc = 
convert_box_required!(repart.input)?;
diff --git a/ballista/rust/core/src/serde/physical_plan/to_proto.rs 
b/ballista/rust/core/src/serde/physical_plan/to_proto.rs
index cf5401b..306abc1 100644
--- a/ballista/rust/core/src/serde/physical_plan/to_proto.rs
+++ b/ballista/rust/core/src/serde/physical_plan/to_proto.rs
@@ -59,8 +59,8 @@ use crate::execution_plans::{ShuffleReaderExec, 
UnresolvedShuffleExec};
 use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
 use crate::serde::scheduler::PartitionLocation;
 use crate::serde::{protobuf, BallistaError};
+use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion::physical_plan::functions::{BuiltinScalarFunction, 
ScalarFunctionExpr};
-use datafusion::physical_plan::merge::MergeExec;
 use datafusion::physical_plan::repartition::RepartitionExec;
 
 impl TryInto for Arc {
@@ -292,11 +292,11 @@ impl TryInto for Arc {
 },
 )),
 })
-} else if let Some(exec) = plan.downcast_ref::() {
+} else if let Some(exec) = 
plan.downcast_ref::() {
 let input: protobuf::PhysicalPlanNode = 
exec.inpu

[arrow-experimental-rs-arrow2] branch master updated (5518118 -> 52fea24)

2021-06-27 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository 
https://gitbox.apache.org/repos/asf/arrow-experimental-rs-arrow2.git.


 discard 5518118  Removed all.
 discard f2f75a2  Removed tooling related to arrow crate.
 discard c64ad28  Kept parquet
 new 52fea24  Removed all.

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (5518118)
\
 N -- N -- N   refs/heads/master (52fea24)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


[arrow-rs] branch master updated: Bump flatbuffers (#499)

2021-06-24 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 7bea1f6  Bump flatbuffers (#499)
7bea1f6 is described below

commit 7bea1f665d502e67298451700816fee995045bb0
Author: Chojan Shang 
AuthorDate: Thu Jun 24 22:47:40 2021 +0800

Bump flatbuffers (#499)

Signed-off-by: Chojan Shang 
---
 arrow/Cargo.toml  |  2 +-
 arrow/src/ipc/gen/Message.rs  | 12 ++--
 arrow/src/ipc/gen/Schema.rs   | 40 +++
 arrow/src/ipc/gen/SparseTensor.rs |  8 
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index c3ba8b1..0ed2a45 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -47,7 +47,7 @@ regex = "1.3"
 lazy_static = "1.4"
 packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
 chrono = "0.4"
-flatbuffers = { version = "=0.8.4", optional = true }
+flatbuffers = { version = "=2.0.0", optional = true }
 hex = "0.4"
 prettytable-rs = { version = "0.8.0", optional = true }
 lexical-core = "^0.7"
diff --git a/arrow/src/ipc/gen/Message.rs b/arrow/src/ipc/gen/Message.rs
index 7903844..707c62c 100644
--- a/arrow/src/ipc/gen/Message.rs
+++ b/arrow/src/ipc/gen/Message.rs
@@ -76,7 +76,7 @@ impl<'a> flatbuffers::Follow<'a> for CompressionType {
 type Inner = Self;
 #[inline]
 fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-let b = flatbuffers::read_scalar_at::(buf, loc);
+let b = unsafe { flatbuffers::read_scalar_at::(buf, loc) };
 Self(b)
 }
 }
@@ -85,7 +85,7 @@ impl flatbuffers::Push for CompressionType {
 type Output = CompressionType;
 #[inline]
 fn push(, dst:  [u8], _rest: &[u8]) {
-flatbuffers::emplace_scalar::(dst, self.0);
+unsafe { flatbuffers::emplace_scalar::(dst, self.0) };
 }
 }
 
@@ -173,7 +173,7 @@ impl<'a> flatbuffers::Follow<'a> for BodyCompressionMethod {
 type Inner = Self;
 #[inline]
 fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-let b = flatbuffers::read_scalar_at::(buf, loc);
+let b = unsafe { flatbuffers::read_scalar_at::(buf, loc) };
 Self(b)
 }
 }
@@ -182,7 +182,7 @@ impl flatbuffers::Push for BodyCompressionMethod {
 type Output = BodyCompressionMethod;
 #[inline]
 fn push(, dst:  [u8], _rest: &[u8]) {
-flatbuffers::emplace_scalar::(dst, self.0);
+unsafe { flatbuffers::emplace_scalar::(dst, self.0) };
 }
 }
 
@@ -292,7 +292,7 @@ impl<'a> flatbuffers::Follow<'a> for MessageHeader {
 type Inner = Self;
 #[inline]
 fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-let b = flatbuffers::read_scalar_at::(buf, loc);
+let b = unsafe { flatbuffers::read_scalar_at::(buf, loc) };
 Self(b)
 }
 }
@@ -301,7 +301,7 @@ impl flatbuffers::Push for MessageHeader {
 type Output = MessageHeader;
 #[inline]
 fn push(, dst:  [u8], _rest: &[u8]) {
-flatbuffers::emplace_scalar::(dst, self.0);
+unsafe { flatbuffers::emplace_scalar::(dst, self.0) };
 }
 }
 
diff --git a/arrow/src/ipc/gen/Schema.rs b/arrow/src/ipc/gen/Schema.rs
index f37f920..12af5b5 100644
--- a/arrow/src/ipc/gen/Schema.rs
+++ b/arrow/src/ipc/gen/Schema.rs
@@ -96,7 +96,7 @@ impl<'a> flatbuffers::Follow<'a> for MetadataVersion {
 type Inner = Self;
 #[inline]
 fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-let b = flatbuffers::read_scalar_at::(buf, loc);
+let b = unsafe { flatbuffers::read_scalar_at::(buf, loc) };
 Self(b)
 }
 }
@@ -105,7 +105,7 @@ impl flatbuffers::Push for MetadataVersion {
 type Output = MetadataVersion;
 #[inline]
 fn push(, dst:  [u8], _rest: &[u8]) {
-flatbuffers::emplace_scalar::(dst, self.0);
+unsafe { flatbuffers::emplace_scalar::(dst, self.0) };
 }
 }
 
@@ -217,7 +217,7 @@ impl<'a> flatbuffers::Follow<'a> for Feature {
 type Inner = Self;
 #[inline]
 fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-let b = flatbuffers::read_scalar_at::(buf, loc);
+let b = unsafe { flatbuffers::read_scalar_at::(buf, loc) };
 Self(b)
 }
 }
@@ -226,7 +226,7 @@ impl flatbuffers::Push for Feature {
 type Output = Feature;
 #[inline]
 fn push(, dst:  [u8], _rest: &[u8]) {
-flatbuffers::emplace_scalar::(dst, self.0);
+unsafe { flatbuffers::emplace_scalar::(dst, self.0) };
 }
 }
 
@@ -305,7 +305,7 @@ impl<'a> flatbuffers::Follow<'a> for UnionMode {
 type Inner = Self;
 #[inline]
  

[arrow-rs] branch master updated (4c7d418 -> 8672274)

2021-06-23 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


from 4c7d418  Fixed RAT check. (#482)
 add 8672274  Add C data interface for decimal128 and timestamp (#453)

No new revisions were added by this update.

Summary of changes:
 .github/workflows/rust.yml |   2 +-
 .../tests/test_sql.py  |  84 ++-
 arrow/src/ffi.rs   | 166 -
 3 files changed, 237 insertions(+), 15 deletions(-)


[arrow-rs] branch master updated: Derive Eq and PartialEq for SortOptions (#425)

2021-06-08 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 5adfd3d  Derive Eq and PartialEq for SortOptions (#425)
5adfd3d is described below

commit 5adfd3d339b426fc1267088c2295822f71a65450
Author: Raphael Taylor-Davies <1781103+tustv...@users.noreply.github.com>
AuthorDate: Tue Jun 8 18:27:30 2021 +0100

Derive Eq and PartialEq for SortOptions (#425)
---
 arrow/src/compute/kernels/sort.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arrow/src/compute/kernels/sort.rs 
b/arrow/src/compute/kernels/sort.rs
index 0a02167..c5e05a6 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -361,7 +361,7 @@ pub fn sort_to_indices(
 }
 
 /// Options that define how sort kernels should behave
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct SortOptions {
 /// Whether to sort in descending order
 pub descending: bool,


[arrow-datafusion] branch master updated: Optimize cast function during planning stage (#513)

2021-06-05 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new b84789a  Optimize cast function during planning stage (#513)
b84789a is described below

commit b84789afc5a67e3f70cd8903bf96993b13414aaf
Author: sathis 
AuthorDate: Sun Jun 6 03:04:24 2021 +0530

Optimize cast function during planning stage (#513)

Co-authored-by: Sathis Kumar 
---
 datafusion/src/optimizer/constant_folding.rs | 58 
 1 file changed, 58 insertions(+)

diff --git a/datafusion/src/optimizer/constant_folding.rs 
b/datafusion/src/optimizer/constant_folding.rs
index 97cc232..d2ac5ce 100644
--- a/datafusion/src/optimizer/constant_folding.rs
+++ b/datafusion/src/optimizer/constant_folding.rs
@@ -30,6 +30,7 @@ use crate::optimizer::utils;
 use crate::physical_plan::functions::BuiltinScalarFunction;
 use crate::scalar::ScalarValue;
 use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
+use arrow::compute::{kernels, DEFAULT_CAST_OPTIONS};
 
 /// Optimizer that simplifies comparison expressions involving boolean 
literals.
 ///
@@ -247,6 +248,25 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> {
 }
 }
 }
+Expr::Cast {
+expr: inner,
+data_type,
+} => match inner.as_ref() {
+Expr::Literal(val) => {
+let scalar_array = val.to_array();
+let cast_array = kernels::cast::cast_with_options(
+_array,
+_type,
+_CAST_OPTIONS,
+)?;
+let cast_scalar = ScalarValue::try_from_array(_array, 
0)?;
+Expr::Literal(cast_scalar)
+}
+_ => Expr::Cast {
+expr: inner,
+data_type,
+},
+},
 expr => {
 // no rewrite possible
 expr
@@ -725,6 +745,44 @@ mod tests {
 }
 
 #[test]
+fn cast_expr() {
+let table_scan = test_table_scan().unwrap();
+let proj = vec![Expr::Cast {
+expr: 
Box::new(Expr::Literal(ScalarValue::Utf8(Some("0".to_string(),
+data_type: DataType::Int32,
+}];
+let plan = LogicalPlanBuilder::from(_scan)
+.project(proj)
+.unwrap()
+.build()
+.unwrap();
+
+let expected = "Projection: Int32(0)\
+\n  TableScan: test projection=None";
+let actual = get_optimized_plan_formatted(, ::Utc::now());
+assert_eq!(expected, actual);
+}
+
+#[test]
+fn cast_expr_wrong_arg() {
+let table_scan = test_table_scan().unwrap();
+let proj = vec![Expr::Cast {
+expr: 
Box::new(Expr::Literal(ScalarValue::Utf8(Some("".to_string(),
+data_type: DataType::Int32,
+}];
+let plan = LogicalPlanBuilder::from(_scan)
+.project(proj)
+.unwrap()
+.build()
+.unwrap();
+
+let expected = "Projection: Int32(NULL)\
+\n  TableScan: test projection=None";
+let actual = get_optimized_plan_formatted(, ::Utc::now());
+assert_eq!(expected, actual);
+}
+
+#[test]
 fn single_now_expr() {
 let table_scan = test_table_scan().unwrap();
 let proj = vec![Expr::ScalarFunction {


[arrow-datafusion] branch master updated: simplify python function definitions (#477)

2021-06-03 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 53792ec  simplify python function definitions (#477)
53792ec is described below

commit 53792ecf0bcaca6f15d36ca6a7e7f2b591c45831
Author: Jiayu Liu 
AuthorDate: Fri Jun 4 11:37:03 2021 +0800

simplify python function definitions (#477)
---
 python/README.md|  16 ++-
 python/src/functions.rs | 352 
 2 files changed, 98 insertions(+), 270 deletions(-)

diff --git a/python/README.md b/python/README.md
index 1859fca..50143ae 100644
--- a/python/README.md
+++ b/python/README.md
@@ -115,7 +115,7 @@ df = df.aggregate(
 )
 ```
 
-## How to install
+## How to install (from pip)
 
 ```bash
 pip install datafusion
@@ -135,12 +135,18 @@ cd arrow-datafusion/python
 
 # prepare development environment (used to build wheel / install in 
development)
 python3 -m venv venv
-pip install maturin==0.10.4 toml==0.10.1 pyarrow==1.0.0
+
+# activate the venv
+source venv/bin/activate
+
+# install dependencies
+pip install maturin==0.10.6 toml==0.10.1 pyarrow==4.0.0
 ```
 
-Whenever rust code changes (your changes or via git pull):
+Whenever rust code changes (your changes or via `git pull`):
 
 ```bash
-venv/bin/maturin develop
-venv/bin/python -m unittest discover tests
+# make sure you activate the venv using "source venv/bin/activate" first
+maturin develop
+python -m unittest discover tests
 ```
diff --git a/python/src/functions.rs b/python/src/functions.rs
index f46dd3e..b03004f 100644
--- a/python/src/functions.rs
+++ b/python/src/functions.rs
@@ -15,16 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
-
-use datafusion::arrow::datatypes::DataType;
-use pyo3::{prelude::*, wrap_pyfunction};
-
-use datafusion::logical_plan;
-
 use crate::udaf;
 use crate::udf;
 use crate::{expression, types::PyDataType};
+use datafusion::arrow::datatypes::DataType;
+use datafusion::logical_plan;
+use pyo3::{prelude::*, wrap_pyfunction};
+use std::sync::Arc;
 
 /// Expression representing a column on the existing plan.
 #[pyfunction]
@@ -52,55 +49,6 @@ fn array(value: Vec) -> 
expression::Expression {
 }
 
 #[pyfunction]
-fn ascii(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::ascii(value.expr),
-}
-}
-
-#[pyfunction]
-fn sum(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::sum(value.expr),
-}
-}
-
-#[pyfunction]
-fn bit_length(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::bit_length(value.expr),
-}
-}
-
-#[pyfunction]
-fn btrim(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::btrim(value.expr),
-}
-}
-
-#[pyfunction]
-fn character_length(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::character_length(value.expr),
-}
-}
-
-#[pyfunction]
-fn chr(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::chr(value.expr),
-}
-}
-
-#[pyfunction]
-fn concat_ws(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::concat_ws(value.expr),
-}
-}
-
-#[pyfunction]
 fn in_list(
 expr: expression::Expression,
 value: Vec,
@@ -115,215 +63,87 @@ fn in_list(
 }
 }
 
-#[pyfunction]
-fn initcap(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::initcap(value.expr),
-}
-}
-
-#[pyfunction]
-fn left(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::left(value.expr),
-}
-}
-
-#[pyfunction]
-fn lower(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::lower(value.expr),
-}
-}
-
-#[pyfunction]
-fn lpad(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::lpad(value.expr),
-}
-}
-
-#[pyfunction]
-fn ltrim(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::ltrim(value.expr),
-}
-}
-
-#[pyfunction]
-fn md5(value: expression::Expression) -> expression::Expression {
-expression::Expression {
-expr: logical_plan::md5(value.expr),
-}
-}
-
-#[pyfunction]
-fn octet_length(value: expression::Expression) -> expression::Expression {
-expression::Expression

[arrow-datafusion] branch master updated: Speed up `create_batch_from_map` (#339)

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 9e7bd2d  Speed up `create_batch_from_map` (#339)
9e7bd2d is described below

commit 9e7bd2d13643c81e474e023749998ec8efa770a4
Author: Daniël Heres 
AuthorDate: Thu May 27 06:57:48 2021 +0200

Speed up `create_batch_from_map` (#339)
---
 datafusion/src/physical_plan/hash_aggregate.rs | 156 +++--
 datafusion/src/scalar.rs   | 140 ++
 2 files changed, 182 insertions(+), 114 deletions(-)

diff --git a/datafusion/src/physical_plan/hash_aggregate.rs 
b/datafusion/src/physical_plan/hash_aggregate.rs
index 5008f49..ffb51b2 100644
--- a/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/datafusion/src/physical_plan/hash_aggregate.rs
@@ -20,6 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 use std::task::{Context, Poll};
+use std::vec;
 
 use ahash::RandomState;
 use futures::{
@@ -32,6 +33,7 @@ use crate::physical_plan::{
 Accumulator, AggregateExpr, DisplayFormatType, Distribution, ExecutionPlan,
 Partitioning, PhysicalExpr, SQLMetric,
 };
+use crate::scalar::ScalarValue;
 
 use arrow::{
 array::{Array, UInt32Builder},
@@ -623,10 +625,12 @@ fn create_key_for_col(col: , row: usize, vec: 
 Vec) -> Result<(
 DataType::UInt64 => {
 dictionary_create_key_for_col::(col, row, vec)?;
 }
-_ => return Err(DataFusionError::Internal(format!(
+_ => {
+return Err(DataFusionError::Internal(format!(
 "Unsupported GROUP BY type (dictionary index type not 
supported creating key) {}",
 col.data_type(),
-))),
+)))
+}
 },
 _ => {
 // This is internal because we should have caught this before.
@@ -957,20 +961,6 @@ impl RecordBatchStream for HashAggregateStream {
 }
 }
 
-/// Given Vec>, concatenates the inners `Vec` into 
`ArrayRef`, returning `Vec`
-/// This assumes that `arrays` is not empty.
-fn concatenate(arrays: Vec>) -> ArrowResult> {
-(0..arrays[0].len())
-.map(|column| {
-let array_list = arrays
-.iter()
-.map(|a| a[column].as_ref())
-.collect::>();
-compute::concat(_list)
-})
-.collect::>>()
-}
-
 /// Create a RecordBatch with all group keys and accumulator' states or values.
 fn create_batch_from_map(
 mode: ,
@@ -978,84 +968,72 @@ fn create_batch_from_map(
 num_group_expr: usize,
 output_schema: ,
 ) -> ArrowResult {
-// 1. for each key
-// 2. create single-row ArrayRef with all group expressions
-// 3. create single-row ArrayRef with all aggregate states or values
-// 4. collect all in a vector per key of vec, vec[i][j]
-// 5. concatenate the arrays over the second index [j] into a single 
vec.
-let arrays = accumulators
-.iter()
-.map(|(_, (group_by_values, accumulator_set, _))| {
-// 2.
-let mut groups = (0..num_group_expr)
-.map(|i| match _by_values[i] {
-GroupByScalar::Float32(n) => {
-Arc::new(Float32Array::from(vec![(*n).into()] as 
Vec))
-as ArrayRef
-}
-GroupByScalar::Float64(n) => {
-Arc::new(Float64Array::from(vec![(*n).into()] as 
Vec))
-as ArrayRef
-}
-GroupByScalar::Int8(n) => {
-Arc::new(Int8Array::from(vec![*n])) as ArrayRef
-}
-GroupByScalar::Int16(n) => 
Arc::new(Int16Array::from(vec![*n])),
-GroupByScalar::Int32(n) => 
Arc::new(Int32Array::from(vec![*n])),
-GroupByScalar::Int64(n) => 
Arc::new(Int64Array::from(vec![*n])),
-GroupByScalar::UInt8(n) => 
Arc::new(UInt8Array::from(vec![*n])),
-GroupByScalar::UInt16(n) => 
Arc::new(UInt16Array::from(vec![*n])),
-GroupByScalar::UInt32(n) => 
Arc::new(UInt32Array::from(vec![*n])),
-GroupByScalar::UInt64(n) => 
Arc::new(UInt64Array::from(vec![*n])),
-GroupByScalar::Utf8(str) => {
-Arc::new(StringArray::from(vec![&***str]))
-}
-GroupByScalar::LargeUtf8(str) => {
-Arc::new(LargeStringArray::from(vec![&***str]))
-}
-GroupByScalar::Boolean(b) => 
Arc::new(BooleanArray::from(vec![*b])),
-GroupByScalar

[arrow-rs] branch master updated (74d5957 -> b802895)

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


from 74d5957  Disable MIRI check until it runs cleanly on CI (#360)
 add b802895  allow `SliceableCursor` to be constructed from an `Arc` 
directly (#369)

No new revisions were added by this update.

Summary of changes:
 parquet/src/util/cursor.rs | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)


[arrow-rs] branch master updated (94a82cd -> 74d5957)

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


from 94a82cd  ensure null-counts are written for all-null columns (#307)
 add 74d5957  Disable MIRI check until it runs cleanly on CI (#360)

No new revisions were added by this update.

Summary of changes:
 .github/workflows/rust.yml | 71 --
 1 file changed, 37 insertions(+), 34 deletions(-)


[arrow-site] branch release-note-4.0.1 updated (fe06a99 -> c8c6e73)

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch release-note-4.0.1
in repository https://gitbox.apache.org/repos/asf/arrow-site.git.


from fe06a99  [Website] Add release note for 4.0.1
 add c8c6e73  Tweaked message.

No new revisions were added by this update.

Summary of changes:
 _release/4.0.1.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


[arrow-site] branch release-note-4.0.1 updated: [Website] Add release note for 4.0.1

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch release-note-4.0.1
in repository https://gitbox.apache.org/repos/asf/arrow-site.git


The following commit(s) were added to refs/heads/release-note-4.0.1 by this 
push:
 new fe06a99  [Website] Add release note for 4.0.1
fe06a99 is described below

commit fe06a9928bf9662bb51063c5905ede6645badbbe
Author: Jorge C. Leitao 
AuthorDate: Wed May 26 19:30:23 2021 +0200

[Website] Add release note for 4.0.1
---
 _release/4.0.1.md | 29 +
 1 file changed, 29 insertions(+)

diff --git a/_release/4.0.1.md b/_release/4.0.1.md
index 028ee32..6aefa86 100644
--- a/_release/4.0.1.md
+++ b/_release/4.0.1.md
@@ -70,6 +70,35 @@ $ git shortlog -csn apache-arrow-4.0.0..apache-arrow-4.0.1
 
 ## Changelog
 
+## Apache Arrow 4.0.1 (2021-05-26)
+
+### Bug Fixes
+
+* [ARROW-12568](https://issues.apache.org/jira/browse/ARROW-12568) - 
[Python][C++] Segfault when casting a sliced ListArray of int64 in v4.0.0
+* [ARROW-12601](https://issues.apache.org/jira/browse/ARROW-12601) - 
[R][Packaging] Fix pkg-config check in r/configure
+* [ARROW-12603](https://issues.apache.org/jira/browse/ARROW-12603) - [R] 
open\_dataset ignoring provided schema when using select
+* [ARROW-12604](https://issues.apache.org/jira/browse/ARROW-12604) - 
[R][Packaging] Dataset, Parquet off in autobrew and CRAN Mac builds
+* [ARROW-12617](https://issues.apache.org/jira/browse/ARROW-12617) - [Python] 
pyarrow.orc.write\_table signature reverses that of pyarrow.parquet.write\_table
+* [ARROW-12622](https://issues.apache.org/jira/browse/ARROW-12622) - [Python] 
Segfault when reading CSV inside Flight server
+* [ARROW-12642](https://issues.apache.org/jira/browse/ARROW-12642) - [R] 
LIBARROW\_MINIMAL, LIBARROW\_DOWNLOAD, NOT\_CRAN env vars should not be 
case-sensitive
+* [ARROW-12663](https://issues.apache.org/jira/browse/ARROW-12663) - [C++] 
segfault when arrow header is compiled with nvcc 11.2
+* [ARROW-12670](https://issues.apache.org/jira/browse/ARROW-12670) - [C++] 
extract\_regex gives bizarre behavior after nulls or non-matches
+* [ARROW-12746](https://issues.apache.org/jira/browse/ARROW-12746) - 
[Go][Flight] Client Auth handler overwrites outgoing metadata
+* [ARROW-12769](https://issues.apache.org/jira/browse/ARROW-12769) - [Python] 
Negative out of range slices yield invalid arrays
+* [ARROW-12774](https://issues.apache.org/jira/browse/ARROW-12774) - 
[C++][Compute] replace\_substring\_regex() creates invalid arrays =\> crash
+* [ARROW-12776](https://issues.apache.org/jira/browse/ARROW-12776) - 
[Archery][Integration] Fix decimal case generation in write\_js\_test\_json
+
+
+### New Features and Improvements
+
+* [ARROW-11926](https://issues.apache.org/jira/browse/ARROW-11926) - [R] Pass 
on the new UCRT CRAN windows builds
+* [ARROW-12520](https://issues.apache.org/jira/browse/ARROW-12520) - [R] Minor 
docs updates
+* [ARROW-12571](https://issues.apache.org/jira/browse/ARROW-12571) - [R][CI] 
Run nightly R with valgrind
+* [ARROW-12578](https://issues.apache.org/jira/browse/ARROW-12578) - [JS] 
Simplify UTF8 handling in NodeJS
+* [ARROW-12619](https://issues.apache.org/jira/browse/ARROW-12619) - [Python] 
pyarrow sdist should not require git
+* [ARROW-12806](https://issues.apache.org/jira/browse/ARROW-12806) - [Python] 
test\_write\_to\_dataset\_filesystem missing a dataset mark
+
+
 [1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-4.0.1/
 [2]: https://bintray.com/apache/arrow/centos/4.0.1/
 [3]: https://bintray.com/apache/arrow/debian/4.0.1/


[arrow-site] branch release-note-4.0.1 created (now ce11ae3)

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch release-note-4.0.1
in repository https://gitbox.apache.org/repos/asf/arrow-site.git.


  at ce11ae3  [Website] Add release note for 4.0.1

This branch includes the following new commits:

 new ce11ae3  [Website] Add release note for 4.0.1

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-site] 01/01: [Website] Add release note for 4.0.1

2021-05-26 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch release-note-4.0.1
in repository https://gitbox.apache.org/repos/asf/arrow-site.git

commit ce11ae3452f6714ed04403100a09e337d324dd83
Author: Jorge C. Leitao 
AuthorDate: Wed May 26 19:26:24 2021 +0200

[Website] Add release note for 4.0.1
---
 _data/versions.yml |  24 +--
 _release/4.0.1.md  |  78 
 _release/index.md  | 114 +++--
 3 files changed, 148 insertions(+), 68 deletions(-)

diff --git a/_data/versions.yml b/_data/versions.yml
index 4de00ba..b2cdefa 100644
--- a/_data/versions.yml
+++ b/_data/versions.yml
@@ -16,16 +16,16 @@
 # Database of the current version
 #
 current:
-  number: '4.0.0'
+  number: '4.0.1'
   pinned_number: '4.0.*'
-  date: '26 April 2021'
-  git-tag: 'f959141ece4d660bce5f7fa545befc0116a7db79'
-  github-tag-link: 
'https://github.com/apache/arrow/releases/tag/apache-arrow-4.0.0'
-  release-notes: 'https://arrow.apache.org/release/4.0.0.html'
-  mirrors: 'https://www.apache.org/dyn/closer.lua/arrow/arrow-4.0.0/'
-  tarball-name: 'apache-arrow-4.0.0.tar.gz'
-  tarball-url: 
'https://www.apache.org/dyn/closer.lua?action=download=arrow/arrow-4.0.0/apache-arrow-4.0.0.tar.gz'
-  java-artifacts: 
'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%224.0.0%22'
-  asc: 
'https://downloads.apache.org/arrow/arrow-4.0.0/apache-arrow-4.0.0.tar.gz.asc'
-  sha256: 
'https://downloads.apache.org/arrow/arrow-4.0.0/apache-arrow-4.0.0.tar.gz.sha256'
-  sha512: 
'https://downloads.apache.org/arrow/arrow-4.0.0/apache-arrow-4.0.0.tar.gz.sha512'
+  date: '26 May 2021'
+  git-tag: '81ff679c47754692224f655dab32cc0936bb5f55'
+  github-tag-link: 
'https://github.com/apache/arrow/releases/tag/apache-arrow-4.0.1'
+  release-notes: 'https://arrow.apache.org/release/4.0.1.html'
+  mirrors: 'https://www.apache.org/dyn/closer.lua/arrow/arrow-4.0.1/'
+  tarball-name: 'apache-arrow-4.0.1.tar.gz'
+  tarball-url: 
'https://www.apache.org/dyn/closer.lua?action=download=arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz'
+  java-artifacts: 
'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%224.0.1%22'
+  asc: 
'https://downloads.apache.org/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.asc'
+  sha256: 
'https://downloads.apache.org/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha256'
+  sha512: 
'https://downloads.apache.org/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha512'
diff --git a/_release/4.0.1.md b/_release/4.0.1.md
new file mode 100644
index 000..028ee32
--- /dev/null
+++ b/_release/4.0.1.md
@@ -0,0 +1,78 @@
+---
+layout: default
+title: Apache Arrow 4.0.1 Release
+permalink: /release/4.0.1.html
+---
+
+
+# Apache Arrow 4.0.1 (26 May 2021)
+
+This is a major release covering more than 1 months of development.
+
+## Download
+
+* [**Source Artifacts**][1]
+* **Binary Artifacts**
+  * [For CentOS][2]
+  * [For Debian][3]
+  * [For Python][4]
+  * [For Ubuntu][5]
+* [Git tag][6]
+
+## Contributors
+
+This release includes   21 commits from   13 distinct contributors.
+
+```console
+$ git shortlog -sn apache-arrow-4.0.0..apache-arrow-4.0.1
+ 3 Jorge C. Leitao
+ 3 Joris Van den Bossche
+ 2 Krisztián Szűcs
+ 2 Ian Cook
+ 2 David Li
+ 2 Jonathan Keane
+ 1 Antoine Pitrou
+ 1 niranda perera
+ 1 Dominik Moritz
+ 1 GALI PREM SAGAR
+ 1 Jeroen Ooms
+ 1 Matthew Topol
+ 1 Yibo Cai
+```
+
+## Patch Committers
+
+The following Apache committers merged contributed patches to the repository.
+
+```console
+$ git shortlog -csn apache-arrow-4.0.0..apache-arrow-4.0.1
+17 Krisztián Szűcs
+ 3 Jorge C. Leitao
+ 1 GitHub
+```
+
+## Changelog
+
+[1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-4.0.1/
+[2]: https://bintray.com/apache/arrow/centos/4.0.1/
+[3]: https://bintray.com/apache/arrow/debian/4.0.1/
+[4]: https://bintray.com/apache/arrow/python/4.0.1/
+[5]: https://bintray.com/apache/arrow/ubuntu/4.0.1/
+[6]: https://github.com/apache/arrow/releases/tag/apache-arrow-4.0.1
diff --git a/_release/index.md b/_release/index.md
index 97b99bf..8520324 100644
--- a/_release/index.md
+++ b/_release/index.md
@@ -26,60 +26,62 @@ limitations under the License.
 
 Navigate to the release page for downloads and the changelog.
 
-* [4.0.0 (26 April 2021)][1]
-* [3.0.0 (26 January 2021)][2]
-* [2.0.0 (19 October 2020)][3]
-* [1.0.1 (21 August 2020)][4]
-* [1.0.0 (24 July 2020)][5]
-* [0.17.1 (18 May 2020)][6]
-* [0.17.0 (20 April 2020)][7]
-* [0.16.0 (7 February 2020)][8]
-* [0.15.1 (1 November 2019)][9]
-* [0.15.0 (5 October 2019)][10]
-* [0.14.1 (22 July 2019)][11]
-* [0.14.0 (4 July 2019)][12]
-* [0.13.0 (1 April 2019)][13]
-* [0.12.0 (20 January 2019)][14]
-* [0.11.1 (19 October 2018)][15]
-* [0.11.0 (8 October 2018)][16]
-* [0.10.0 (6 August 2018)][17]
-* [0.9.0 (21 March 2018)][18]
-* [0.8.0 (18

svn commit: r47931 - in /release/arrow: arrow-1.0.1/ arrow-2.0.0/ arrow-4.0.1/ arrow-4.0.1/apache-arrow-4.0.1.tar.gz arrow-4.0.1/apache-arrow-4.0.1.tar.gz.asc arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha

2021-05-26 Thread jorgecarleitao
Author: jorgecarleitao
Date: Wed May 26 16:38:07 2021
New Revision: 47931

Log:
Apache Arrow 4.0.1

Added:
release/arrow/arrow-4.0.1/
release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz   (with props)
release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.asc
release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha256
release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha512
Removed:
release/arrow/arrow-1.0.1/
release/arrow/arrow-2.0.0/

Added: release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz
==
Binary file - no diff available.

Propchange: release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz
--
svn:mime-type = application/octet-stream

Added: release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.asc
==
--- release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.asc (added)
+++ release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.asc Wed May 26 16:38:07 
2021
@@ -0,0 +1,16 @@
+-BEGIN PGP SIGNATURE-
+
+iQIzBAABCAAdFiEErD3fXew3+nQzRMXT2QFQ96jyxu4FAmCki74ACgkQ2QFQ96jy
+xu5IqRAAjNyOjT65mv2UCB6RCe6bKTsN3gRCv/KxztpYoGVyJRZ331dodr86SlcD
+a5FSR0Sin+LSUaRUD/3hjV+vneCNSEUiiUEfpsD+PbkStUfsiDyP7s8eD+zqTXmn
+ErBvFggynwDfPeXTV1Z+eHDxfXUvlWRX1IwyxJjLQ3YuP2M8KCt8FzJtxPPdmOZi
+igvol7fDKxOrVe4i8pae5HwMbBd0h3MARJhDcYS16WPbtsYglonfgdCFk7k0TJH2
+dCXoC7m1CjbbqrPsGzdM/ZqXFdZUESNwj873eMz59bt3IJVp5o8vtaoC8Wdq8L+8
+/gLB+yy4e6SZ2sJDgdGjfsSBztnl6CBZaBsup1MswqaQ3VshVvH03EUUKhOE1eqS
+UxsrWMNFlQuujSYK7sMGoz84LA5Wwncp4iD0ncRGNlzy09IXO4nJZYHIEwntQpTq
+VRyPjUWHHPbOu8ACeqV2u5Am+td+0E4YkgcHThhM1g51I76CwQ6UVHDAt9zeUtlS
+JCBLrUJ6E2q+xXIQywWOfg64dOWrXIgiZ4FASqAu0cNqiVGnbdI327k5MBpECJeb
+FfG9+phVrimXUq5krAlm8hLKnJSIFNuqwY9sH107ZvVl8MekDFH218z9UZRqDkQR
+pf+nDk46Wsa2q1Q0rXstWdqqoW2T2KtThmQ2u/lfh4qSFzcgnnQ=
+=M4Jq
+-END PGP SIGNATURE-

Added: release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha256
==
--- release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha256 (added)
+++ release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha256 Wed May 26 
16:38:07 2021
@@ -0,0 +1 @@
+75ccbfa276b925c6b1c978a920ff2f30c4b0d3fdf8b51777915b6f69a211896e  
apache-arrow-4.0.1.tar.gz

Added: release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha512
==
--- release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha512 (added)
+++ release/arrow/arrow-4.0.1/apache-arrow-4.0.1.tar.gz.sha512 Wed May 26 
16:38:07 2021
@@ -0,0 +1 @@
+f76e5da1522ae3dce7a917e53d9b3ead9f4edf6f5bfa3e5bfadd9444084622c502eb265ebaa00eae73c1f3a38da580f67701fac94e182d3ec54ec8776811bee2
  apache-arrow-4.0.1.tar.gz




[arrow] branch master updated: ARROW-12643: [Governance] Added experimental repos guidelines.

2021-05-25 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 16199b7  ARROW-12643: [Governance] Added experimental repos guidelines.
16199b7 is described below

commit 16199b717ba133adf6c63911cb33b8bc98d3362e
Author: Jorge Leitao 
AuthorDate: Wed May 26 04:10:06 2021 +

ARROW-12643: [Governance] Added experimental repos guidelines.

Closes #10239 from jorgecarleitao/experimental

Lead-authored-by: Jorge Leitao 
Co-authored-by: Jorge C. Leitao 
Signed-off-by: Jorge C. Leitao 
---
 docs/source/developers/contributing.rst   |  2 +
 docs/source/developers/experimental_repos.rst | 65 +++
 2 files changed, 67 insertions(+)

diff --git a/docs/source/developers/contributing.rst 
b/docs/source/developers/contributing.rst
index e75d2c6..9b81a6f 100644
--- a/docs/source/developers/contributing.rst
+++ b/docs/source/developers/contributing.rst
@@ -311,6 +311,8 @@ In addition, the GitHub PR "suggestion" feature can also 
add commits to
 your branch, so it is possible that your local copy of your branch is missing
 some additions.
 
+.. include:: experimental_repos.rst
+
 Guidance for specific features
 ==
 
diff --git a/docs/source/developers/experimental_repos.rst 
b/docs/source/developers/experimental_repos.rst
new file mode 100644
index 000..6f800b5
--- /dev/null
+++ b/docs/source/developers/experimental_repos.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Experimental repositories
+=
+
+Apache Arrow has an explicit policy over developing experimental repositories
+in the context of
+`rules for revolutionaries 
<https://grep.codeconsult.ch/2020/04/07/rules-for-revolutionaries-2000-edition/>`_.
+
+The main motivation for this policy is to offer a lightweight mechanism to
+conduct experimental work, with the necessary creative freedom, within the ASF
+and the Apache Arrow governance model. This policy allows committers to work on
+new repositories, as they offer many important tools to manage it (e.g. github
+issues, “watch”, “github stars” to measure overall interest).
+
+Process
+---
+
+* A committer *may* initiate experimental work by creating a separate git
+  repository within the Apache Arrow (e.g. via `selfserve 
<https://selfserve.apache.org/>`_)
+  and announcing it on the mailing list, together with its goals, and a link 
to the
+  newly created repository.
+* The committer *must* initiate an email thread with the sole purpose of
+  presenting updates to the community about the status of the repo.
+* There *must not* be official releases from the repository.
+* Any decision to make the experimental repo official in any way, whether by 
merging or migrating, *must* be discussed and voted on in the mailing list.
+* The committer is responsible for managing issues, documentation, CI of the 
repository,
+  including licensing checks.
+* The committer decides when the repository is archived.
+
+Repository management
+-
+
+* The repository *must* be under `apache/`
+* The repository’s name *must* be prefixed by `arrow-experimental-`
+* The committer has full permissions over the repository (within possible in 
ASF)
+* Push / merge permissions *must only* be granted to Apache Arrow committers
+
+Development process
+---
+
+* The repository must follow the ASF requirements about 3rd party code.
+* The committer decides how to manage issues, PRs, etc.
+
+Divergences
+---
+
+* If any of the “must” above fails to materialize and no correction measure
+  is taken by the committer upon request, the PMC *should* take ownership
+  and decide what to do.


[arrow-datafusion] branch master updated (8b31714 -> 0aea0df)

2021-05-24 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


from 8b31714  Return Vec from PredicateBuilder rather than an `Fn` 
(#370)
 add 0aea0df  Implement fmt_as for ShuffleReaderExec (#400)

No new revisions were added by this update.

Summary of changes:
 .../core/src/execution_plans/shuffle_reader.rs | 30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)


[arrow-datafusion] branch master updated: refactor datafusion/`scalar_value` to use more macro and avoid dup code (#392)

2021-05-24 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 1ba0eb0  refactor datafusion/`scalar_value` to use more macro and 
avoid dup code (#392)
1ba0eb0 is described below

commit 1ba0eb0ea4d3f2f91b70a670cda5cf395d9e3f94
Author: Jiayu Liu 
AuthorDate: Mon May 24 17:01:10 2021 +0800

refactor datafusion/`scalar_value` to use more macro and avoid dup code 
(#392)
---
 datafusion/src/scalar.rs | 242 ++-
 1 file changed, 133 insertions(+), 109 deletions(-)

diff --git a/datafusion/src/scalar.rs b/datafusion/src/scalar.rs
index e59d21e..e19e274 100644
--- a/datafusion/src/scalar.rs
+++ b/datafusion/src/scalar.rs
@@ -17,28 +17,17 @@
 
 //! This module provides ScalarValue, an enum that can be used for storage of 
single elements
 
-use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
-
-use arrow::datatypes::{ArrowDictionaryKeyType, DataType, Field, IntervalUnit, 
TimeUnit};
+use crate::error::{DataFusionError, Result};
 use arrow::{
 array::*,
 datatypes::{
-ArrowNativeType, Float32Type, Int16Type, Int32Type, Int64Type, 
Int8Type,
-TimestampNanosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+ArrowDictionaryKeyType, ArrowNativeType, DataType, Field, Float32Type, 
Int16Type,
+Int32Type, Int64Type, Int8Type, IntervalUnit, TimeUnit, 
TimestampMicrosecondType,
+TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
+UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 },
 };
-use arrow::{
-array::{
-ArrayRef, Int16Builder, Int32Builder, Int64Builder, Int8Builder, 
ListBuilder,
-TimestampMicrosecondArray, TimestampMillisecondArray, 
TimestampNanosecondArray,
-UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
-},
-datatypes::{
-TimestampMicrosecondType, TimestampMillisecondType, 
TimestampSecondType,
-},
-};
-
-use crate::error::{DataFusionError, Result};
+use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
 
 /// Represents a dynamically typed, nullable single value.
 /// This is the single-valued counter-part of arrow’s `Array`.
@@ -192,6 +181,27 @@ macro_rules! build_values_list {
 }};
 }
 
+macro_rules! build_array_from_option {
+($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
+match $EXPR {
+Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
+None => new_null_array(::$DATA_TYPE, $SIZE),
+}
+}};
+($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) 
=> {{
+match $EXPR {
+Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
+None => new_null_array(::$DATA_TYPE($ENUM), $SIZE),
+}
+}};
+($DATA_TYPE:ident, $ENUM:expr, $ENUM2:expr, $ARRAY_TYPE:ident, $EXPR:expr, 
$SIZE:expr) => {{
+match $EXPR {
+Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
+None => new_null_array(::$DATA_TYPE($ENUM, $ENUM2), 
$SIZE),
+}
+}};
+}
+
 impl ScalarValue {
 /// Getter for the `DataType` of the value
 pub fn get_datatype() -> DataType {
@@ -289,80 +299,59 @@ impl ScalarValue {
 ScalarValue::Boolean(e) => {
 Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
 }
-ScalarValue::Float64(e) => match e {
-Some(value) => Arc::new(Float64Array::from_value(*value, 
size)),
-None => new_null_array(::Float64, size),
-},
-ScalarValue::Float32(e) => match e {
-Some(value) => Arc::new(Float32Array::from_value(*value, 
size)),
-None => new_null_array(::Float32, size),
-},
-ScalarValue::Int8(e) => match e {
-Some(value) => Arc::new(Int8Array::from_value(*value, size)),
-None => new_null_array(::Int8, size),
-},
-ScalarValue::Int16(e) => match e {
-Some(value) => Arc::new(Int16Array::from_value(*value, size)),
-None => new_null_array(::Int16, size),
-},
-ScalarValue::Int32(e) => match e {
-Some(value) => Arc::new(Int32Array::from_value(*value, size)),
-None => new_null_array(::Int32, size),
-},
-ScalarValue::Int64(e) => match e {
-Some(value) => Arc::new(Int64Array::from_value(*value, size)),
-None => new_null_array(::Int64, size),
-},
-ScalarValue::UInt8(e) => match e {
-Some(value) => Arc::new

[arrow] annotated tag apache-arrow-4.0.1 updated (81ff679 -> 2004cc0)

2021-05-19 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to annotated tag apache-arrow-4.0.1
in repository https://gitbox.apache.org/repos/asf/arrow.git.


*** WARNING: tag apache-arrow-4.0.1 was modified! ***

from 81ff679  (commit)
  to 2004cc0  (tag)
 tagging 81ff679c47754692224f655dab32cc0936bb5f55 (commit)
 replaces apache-arrow-4.0.0
  by Jorge C. Leitao
  on Wed May 19 05:51:46 2021 +0200

- Log -
[Release] Apache Arrow Release 4.0.1
---


No new revisions were added by this update.

Summary of changes:


[arrow] branch release-4.0.1-rc1 created (now 81ff679)

2021-05-19 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch release-4.0.1-rc1
in repository https://gitbox.apache.org/repos/asf/arrow.git.


  at 81ff679  [Release] Update versions for 4.0.1

No new revisions were added by this update.


svn commit: r47807 - in /dev/arrow/apache-arrow-4.0.1-rc1: ./ apache-arrow-4.0.1.tar.gz apache-arrow-4.0.1.tar.gz.asc apache-arrow-4.0.1.tar.gz.sha256 apache-arrow-4.0.1.tar.gz.sha512

2021-05-18 Thread jorgecarleitao
Author: jorgecarleitao
Date: Wed May 19 03:53:48 2021
New Revision: 47807

Log:
Apache Arrow 4.0.1 RC1

Added:
dev/arrow/apache-arrow-4.0.1-rc1/
dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz   (with props)
dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.asc
dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha256
dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha512

Added: dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz
==
Binary file - no diff available.

Propchange: dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.asc
==
--- dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.asc (added)
+++ dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.asc Wed May 19 
03:53:48 2021
@@ -0,0 +1,16 @@
+-BEGIN PGP SIGNATURE-
+
+iQIzBAABCAAdFiEErD3fXew3+nQzRMXT2QFQ96jyxu4FAmCki74ACgkQ2QFQ96jy
+xu5IqRAAjNyOjT65mv2UCB6RCe6bKTsN3gRCv/KxztpYoGVyJRZ331dodr86SlcD
+a5FSR0Sin+LSUaRUD/3hjV+vneCNSEUiiUEfpsD+PbkStUfsiDyP7s8eD+zqTXmn
+ErBvFggynwDfPeXTV1Z+eHDxfXUvlWRX1IwyxJjLQ3YuP2M8KCt8FzJtxPPdmOZi
+igvol7fDKxOrVe4i8pae5HwMbBd0h3MARJhDcYS16WPbtsYglonfgdCFk7k0TJH2
+dCXoC7m1CjbbqrPsGzdM/ZqXFdZUESNwj873eMz59bt3IJVp5o8vtaoC8Wdq8L+8
+/gLB+yy4e6SZ2sJDgdGjfsSBztnl6CBZaBsup1MswqaQ3VshVvH03EUUKhOE1eqS
+UxsrWMNFlQuujSYK7sMGoz84LA5Wwncp4iD0ncRGNlzy09IXO4nJZYHIEwntQpTq
+VRyPjUWHHPbOu8ACeqV2u5Am+td+0E4YkgcHThhM1g51I76CwQ6UVHDAt9zeUtlS
+JCBLrUJ6E2q+xXIQywWOfg64dOWrXIgiZ4FASqAu0cNqiVGnbdI327k5MBpECJeb
+FfG9+phVrimXUq5krAlm8hLKnJSIFNuqwY9sH107ZvVl8MekDFH218z9UZRqDkQR
+pf+nDk46Wsa2q1Q0rXstWdqqoW2T2KtThmQ2u/lfh4qSFzcgnnQ=
+=M4Jq
+-END PGP SIGNATURE-

Added: dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha256
==
--- dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha256 (added)
+++ dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha256 Wed May 
19 03:53:48 2021
@@ -0,0 +1 @@
+75ccbfa276b925c6b1c978a920ff2f30c4b0d3fdf8b51777915b6f69a211896e  
apache-arrow-4.0.1.tar.gz

Added: dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha512
==
--- dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha512 (added)
+++ dev/arrow/apache-arrow-4.0.1-rc1/apache-arrow-4.0.1.tar.gz.sha512 Wed May 
19 03:53:48 2021
@@ -0,0 +1 @@
+f76e5da1522ae3dce7a917e53d9b3ead9f4edf6f5bfa3e5bfadd9444084622c502eb265ebaa00eae73c1f3a38da580f67701fac94e182d3ec54ec8776811bee2
  apache-arrow-4.0.1.tar.gz




[arrow] branch release-4.0.1 created (now d1ca32e)

2021-05-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch release-4.0.1
in repository https://gitbox.apache.org/repos/asf/arrow.git.


  at d1ca32e  ARROW-12603: [C++][Dataset] Backport fix for specifying CSV 
column types (#10344)

No new revisions were added by this update.


[arrow] 03/03: [Release] Update versions for 4.0.1

2021-05-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch release-4.0.1-rc0
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit a63795a28e63d4900092509dc39b3da367ed392d
Author: Jorge C. Leitao 
AuthorDate: Wed May 19 05:32:53 2021 +0200

[Release] Update versions for 4.0.1
---
 c_glib/meson.build   | 2 +-
 ci/scripts/PKGBUILD  | 2 +-
 cpp/CMakeLists.txt   | 2 +-
 cpp/vcpkg.json   | 2 +-
 csharp/Directory.Build.props | 2 +-
 dev/tasks/homebrew-formulae/apache-arrow.rb  | 2 +-
 dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb | 2 +-
 java/adapter/avro/pom.xml| 2 +-
 java/adapter/jdbc/pom.xml| 2 +-
 java/adapter/orc/pom.xml | 2 +-
 java/algorithm/pom.xml   | 2 +-
 java/compression/pom.xml | 2 +-
 java/dataset/pom.xml | 2 +-
 java/flight/flight-core/pom.xml  | 2 +-
 java/flight/flight-grpc/pom.xml  | 2 +-
 java/format/pom.xml  | 2 +-
 java/gandiva/pom.xml | 2 +-
 java/memory/memory-core/pom.xml  | 2 +-
 java/memory/memory-netty/pom.xml | 2 +-
 java/memory/memory-unsafe/pom.xml| 2 +-
 java/memory/pom.xml  | 2 +-
 java/performance/pom.xml | 4 ++--
 java/plasma/pom.xml  | 2 +-
 java/pom.xml | 2 +-
 java/tools/pom.xml   | 2 +-
 java/vector/pom.xml  | 2 +-
 js/package.json  | 2 +-
 matlab/CMakeLists.txt| 2 +-
 python/setup.py  | 2 +-
 r/DESCRIPTION| 2 +-
 ruby/red-arrow-cuda/lib/arrow-cuda/version.rb| 2 +-
 ruby/red-arrow-dataset/lib/arrow-dataset/version.rb  | 2 +-
 ruby/red-arrow/lib/arrow/version.rb  | 2 +-
 ruby/red-gandiva/lib/gandiva/version.rb  | 2 +-
 ruby/red-parquet/lib/parquet/version.rb  | 2 +-
 ruby/red-plasma/lib/plasma/version.rb| 2 +-
 rust/arrow-flight/Cargo.toml | 4 ++--
 rust/arrow-pyarrow-integration-testing/Cargo.toml| 4 ++--
 rust/arrow/Cargo.toml| 2 +-
 rust/benchmarks/Cargo.toml   | 2 +-
 rust/datafusion-examples/Cargo.toml  | 2 +-
 rust/datafusion/Cargo.toml   | 6 +++---
 rust/datafusion/README.md| 2 +-
 rust/integration-testing/Cargo.toml  | 2 +-
 rust/parquet/Cargo.toml  | 6 +++---
 rust/parquet/README.md   | 4 ++--
 rust/parquet_derive/Cargo.toml   | 4 ++--
 rust/parquet_derive/README.md| 4 ++--
 rust/parquet_derive_test/Cargo.toml  | 6 +++---
 49 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/c_glib/meson.build b/c_glib/meson.build
index 79abfa6..c5b12f9 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -23,7 +23,7 @@ project('arrow-glib', 'c', 'cpp',
   'cpp_std=c++11',
 ])
 
-version = '4.0.0'
+version = '4.0.1'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 34cd434..3a4f241 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=4.0.0
+pkgver=4.0.1
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory 
data (mingw-w64)"
 arch=("any")
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b7a1dae..1621b92 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -47,7 +47,7 @@ if(POLICY CMP0074)
   cmake_policy(SET CMP0074 NEW)
 endif()
 
-set(ARROW_VERSION "4.0.0")
+set(ARROW_VERSION "4.0.1")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION 
"${ARROW_VERSION}")
 
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index eea9cd9..129afe3 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "4.0.0",
+  "version-string": "4.0.1",
   "dependencies": [
 "abseil",
 {

[arrow] branch release-4.0.1-rc0 created (now a63795a)

2021-05-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch release-4.0.1-rc0
in repository https://gitbox.apache.org/repos/asf/arrow.git.


  at a63795a  [Release] Update versions for 4.0.1

This branch includes the following new commits:

 new c3db3e5  [Release] Update CHANGELOG.md for 4.0.1
 new ebe4d52  [Release] Update .deb/.rpm changelogs for 4.0.1
 new a63795a  [Release] Update versions for 4.0.1

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow] 02/03: [Release] Update .deb/.rpm changelogs for 4.0.1

2021-05-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch release-4.0.1-rc0
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit ebe4d521229769428188bc9599ae087db2f699a6
Author: Jorge C. Leitao 
AuthorDate: Wed May 19 05:32:18 2021 +0200

[Release] Update .deb/.rpm changelogs for 4.0.1
---
 dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog   | 6 ++
 .../apache-arrow-release/yum/apache-arrow-release.spec.in   | 3 +++
 dev/tasks/linux-packages/apache-arrow/debian/changelog  | 6 ++
 dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog 
b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 47ae270..7101e26 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (4.0.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Jorge C. Leitao   Wed, 19 May 2021 03:32:16 -
+
 apache-arrow-apt-source (4.0.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git 
a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
 
b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index de02687..e2af30f 100644
--- 
a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ 
b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -94,6 +94,9 @@ else
 fi
 
 %changelog
+* Wed May 19 2021 Jorge C. Leitao  - 4.0.1-1
+- New upstream release.
+
 * Wed Apr 21 2021 Krisztián Szűcs  - 4.0.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog 
b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index 7929097..487f42e 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (4.0.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Jorge C. Leitao   Wed, 19 May 2021 03:32:16 -
+
 apache-arrow (4.0.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in 
b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index b247edc..e3a990d 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -750,6 +750,9 @@ Documentation for Apache Parquet GLib.
 %{_datadir}/gtk-doc/html/parquet-glib/
 
 %changelog
+* Wed May 19 2021 Jorge C. Leitao  - 4.0.1-1
+- New upstream release.
+
 * Wed Apr 21 2021 Krisztián Szűcs  - 4.0.0-1
 - New upstream release.
 


[arrow] 01/03: [Release] Update CHANGELOG.md for 4.0.1

2021-05-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch release-4.0.1-rc0
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit c3db3e5163295037e86c94c775cb1dd556cf6ac8
Author: Jorge C. Leitao 
AuthorDate: Wed May 19 05:32:16 2021 +0200

[Release] Update CHANGELOG.md for 4.0.1
---
 CHANGELOG.md | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85f5d53..1acc148 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,34 @@
 
+# Apache Arrow 4.0.1 (2021-05-19)
+
+## Bug Fixes
+
+* [ARROW-12568](https://issues.apache.org/jira/browse/ARROW-12568) - 
[Python][C++] Segfault when casting a sliced ListArray of int64 in v4.0.0
+* [ARROW-12601](https://issues.apache.org/jira/browse/ARROW-12601) - 
[R][Packaging] Fix pkg-config check in r/configure
+* [ARROW-12603](https://issues.apache.org/jira/browse/ARROW-12603) - [R] 
open\_dataset ignoring provided schema when using select
+* [ARROW-12604](https://issues.apache.org/jira/browse/ARROW-12604) - 
[R][Packaging] Dataset, Parquet off in autobrew and CRAN Mac builds
+* [ARROW-12617](https://issues.apache.org/jira/browse/ARROW-12617) - [Python] 
pyarrow.orc.write\_table signature reverses that of pyarrow.parquet.write\_table
+* [ARROW-12622](https://issues.apache.org/jira/browse/ARROW-12622) - [Python] 
Segfault when reading CSV inside Flight server
+* [ARROW-12642](https://issues.apache.org/jira/browse/ARROW-12642) - [R] 
LIBARROW\_MINIMAL, LIBARROW\_DOWNLOAD, NOT\_CRAN env vars should not be 
case-sensitive
+* [ARROW-12663](https://issues.apache.org/jira/browse/ARROW-12663) - [C++] 
segfault when arrow header is compiled with nvcc 11.2
+* [ARROW-12670](https://issues.apache.org/jira/browse/ARROW-12670) - [C++] 
extract\_regex gives bizarre behavior after nulls or non-matches
+* [ARROW-12746](https://issues.apache.org/jira/browse/ARROW-12746) - 
[Go][Flight] Client Auth handler overwrites outgoing metadata
+* [ARROW-12769](https://issues.apache.org/jira/browse/ARROW-12769) - [Python] 
Negative out of range slices yield invalid arrays
+* [ARROW-12774](https://issues.apache.org/jira/browse/ARROW-12774) - 
[C++][Compute] replace\_substring\_regex() creates invalid arrays =\> crash
+* [ARROW-12776](https://issues.apache.org/jira/browse/ARROW-12776) - 
[Archery][Integration] Fix decimal case generation in write\_js\_test\_json
+
+
+## New Features and Improvements
+
+* [ARROW-11926](https://issues.apache.org/jira/browse/ARROW-11926) - [R] Pass 
on the new UCRT CRAN windows builds
+* [ARROW-12520](https://issues.apache.org/jira/browse/ARROW-12520) - [R] Minor 
docs updates
+* [ARROW-12571](https://issues.apache.org/jira/browse/ARROW-12571) - [R][CI] 
Run nightly R with valgrind
+* [ARROW-12578](https://issues.apache.org/jira/browse/ARROW-12578) - [JS] 
Simplify UTF8 handling in NodeJS
+* [ARROW-12619](https://issues.apache.org/jira/browse/ARROW-12619) - [Python] 
pyarrow sdist should not require git
+* [ARROW-12806](https://issues.apache.org/jira/browse/ARROW-12806) - [Python] 
test\_write\_to\_dataset\_filesystem missing a dataset mark
+
+
+
 # Apache Arrow 4.0.0 (2021-04-21)
 
 ## Bug Fixes


svn commit: r47806 - in /dev/arrow/apache-arrow-4.0.1-rc0: ./ apache-arrow-4.0.1.tar.gz apache-arrow-4.0.1.tar.gz.asc apache-arrow-4.0.1.tar.gz.sha256 apache-arrow-4.0.1.tar.gz.sha512

2021-05-18 Thread jorgecarleitao
Author: jorgecarleitao
Date: Wed May 19 03:36:20 2021
New Revision: 47806

Log:
Apache Arrow 4.0.1 RC0

Added:
dev/arrow/apache-arrow-4.0.1-rc0/
dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz   (with props)
dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.asc
dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha256
dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha512

Added: dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz
==
Binary file - no diff available.

Propchange: dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.asc
==
--- dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.asc (added)
+++ dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.asc Wed May 19 
03:36:20 2021
@@ -0,0 +1,11 @@
+-BEGIN PGP SIGNATURE-
+
+iQEzBAABCAAdFiEEyIEq4mz99FAcYpK9ApQnxohORI0FAmCkh6UACgkQApQnxohO
+RI0FCwf/bJSgIYBSBdsczKHFgyU3haaI4QfCx3KGLQLYdNf6LlmuKR22ViWlom/O
+YKnqPhdmtMJC/hMzXkyt1RYzMSFDZJz9CznpINrhXdhq6unsa8GOsUsQ+itug6Pi
+ML0qQcOV/rbjsHdFdNCPGqUOj5ZsiiHNW36K5R4rFHCSIjWyy90+Vi7nEr1Kfubf
+BI2L8n22SNKLLXYMolE+19iCiElkPCzoECmhnCwcOtduuOVupE68LRisInmng2y9
+RTzL0RenroDEmIDfLiSf+r3tL+MFCTA7rKDzCXyqPqB9V6n4l9umYWV4RcZuX2OG
+BDUFhs6JlACQNA7p72bQiQPU3mZ9YQ==
+=gf2q
+-END PGP SIGNATURE-

Added: dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha256
==
--- dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha256 (added)
+++ dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha256 Wed May 
19 03:36:20 2021
@@ -0,0 +1 @@
+371114164d802a0c4319cc32a94957d81ef60613a5241192a0b9d1b46d614991  
apache-arrow-4.0.1.tar.gz

Added: dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha512
==
--- dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha512 (added)
+++ dev/arrow/apache-arrow-4.0.1-rc0/apache-arrow-4.0.1.tar.gz.sha512 Wed May 
19 03:36:20 2021
@@ -0,0 +1 @@
+cb65e91cbab31ff87115d7bdd2f0e09b1ba745aa12e8ac58e69c13f8ac398b0fb104cbb54c4d13e1a661d11995bc081464b740fcb3e6ab26df2e1cd77cdf80de
  apache-arrow-4.0.1.tar.gz




svn commit: r47792 - /release/arrow/KEYS

2021-05-18 Thread jorgecarleitao
Author: jorgecarleitao
Date: Tue May 18 19:36:56 2021
New Revision: 47792

Log:
Key jorgecarleitao

Modified:
release/arrow/KEYS

Modified: release/arrow/KEYS
==
--- release/arrow/KEYS (original)
+++ release/arrow/KEYS Tue May 18 19:36:56 2021
@@ -927,3 +927,86 @@ Hg3Sc5RHLw/lIC/yEtIptzcpDLgDxS1Bf2c+RF+7
 pyqcs9cFy0tt3nAdWcVobS3PYhSacIDlPFPImatg3Nr+p50dx94OdWvoTzZPWTY=
 =FdOt
 -END PGP PUBLIC KEY BLOCK-
+pub   rsa4096 2021-05-17 [SC]
+  D6892EA1881BD9610330AD7B0801999AF78748E8
+uid   [ultimate] Jorge C. Leitao 
+sig 30801999AF78748E8 2021-05-17  Jorge C. Leitao 

+sig  25BCCA5220D84079 2021-05-18  Krisztian Szucs (apache) 

+sub   rsa4096 2021-05-17 [S]
+sig  0801999AF78748E8 2021-05-17  Jorge C. Leitao 

+
+-BEGIN PGP PUBLIC KEY BLOCK-
+
+mQINBGCiyecBEACu0h3wnLz0+K+uPlBZDPgvr/7d7+0/MNcBOj9z2pA9uqURUUwV
+c4GXs/SN/AUu4xoQ5yo5xENUYZHOA5lsfduylg7U6R39jkbS3VljVlkgsrgWfNl4
+7E5jG5mCZI+S8otf1/fLeYs7PvRLQwuXlsBG5E9+xdPWxlHNaSyiobW3sAiTgLBw
+ywWh1S9mbr9FF40oMrZxOvMLQrFDbB+Xnmz3fowrRV5QqE0hSRFUEMmCDuOeyirb
+b6g2avWp/BEyOcU/mTx219wmXQmvShJMauKTdjGBAFZavqUTs+QHXBzo3lzEjPoR
+e0iVix8MGcYWzmcUq3qT3QltlS0ozGPoXSibqojl15xF5aDKNZ3npVqOuUe+bAoa
+0l5OLpQn28AkI0kBkThT+BLTzyFv1MxUA/ZqSOVHOw25GbiGC7fm5MAS3LqtxfVT
+Ds0xChJvljS9R8sAIGUuNGUUkGp2F5WIUU8ymUmShA2AMMQE5vHFiphr+KpoajuD
+lsUEo/iK+IMu32oLaq9ZNuf36Pr7lY/N1JTQQaBm5Sh7EXqNa7t0NQCLo1oRYuRV
+qSAnfri6BX2SRHdkzmZOZW/rCndak+iXstw5Vxh101168cDXBhY1uOZnPmp63OCz
+PP0GbKmQI/5tqjIzbUF9W+rCGQWIqXiklFLWFG+oKrfyvlW/XtPJ2G7FhwARAQAB
+tCpKb3JnZSBDLiBMZWl0YW8gPGpvcmdlY2FybGVpdGFvQGdtYWlsLmNvbT6JAk4E
+EwEIADgCGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AWIQTWiS6hiBvZYQMwrXsI
+AZma94dI6AUCYKLLugAKCRAIAZma94dI6C81D/4wE44s3gUsmq2A/b940nU2/gtU
+qmSyw7r5WMr/H38lf83TbsCwkzVpFiCrJlBud03hB5F7Kfrdhz1szHPFshG20VkY
+oixn8jMHmVrHLTGbmKPX5Mx/pzrIQeoV0kky+43AvSkOeXaRVE75/5sesJvB6IlZ
+u7uN/ejHGDKzO08K+0kqqo6UXp6La/l3YZ3+LHJo2Iuxeg13LBfaDss2k5voHdkN
+AqznxF2XypQna/jH68n6jaMCeeGdl/jxyblJTcJxhh8sY6ocs2FVztrttNi8VFtY
+qh5hQmaDqqWGrAVY/bVeqf4YcGOgoRALRQCO9fFwb/kkCQ56XwTD1RDWARRPeTrf
+YOUrfXGo0YTghB6WqXuoZ9vyEnSU6oYV+WHcqyURA5269uX3nEBXxyMKUtXoFdJU
+dRdvVfjMiIvDpAeDlT0qAsaLHSrKaDmM3K6/gO3oNmvWPPONOBMinjtTuKG1NsKt
+Kc4N89TIuP1H1KdcgL9hBrm/0Qw5WrWA75K82GyZN1PxzOvatjOZxeVpeGfls4PM
+iEjrCYEKUNXiPO0/z+gSJAcu5dxnqmrC+uaKP6bSb5ILmT7EmEZVqs36jYDBRztx
+XcymMtb3l7vCl+MobHCpRNnjRlJAeNpR/m3BGuz3V+hp18y0qBkkyOaVWk5jr81w
+gRPavtmfD6SoMqnaeIkCMwQQAQgAHRYhBCZfgKuE/gMSfhTwESW8ylIg2EB5BQJg
+o9qEAAoJECW8ylIg2EB5ttMP/i8dyOJi77YLn/NqDsnAmGeY/l9NwbSim2Q3+VwA
+EU6ghGo972whAEHZuvg1qnXls1zWXdoBg61o8/TCMqeEcR5kPJUSxb4dbHThW50K
+dQphsSxQzt7VODi1SE1aiQEbJggIUu4ZiwUdP7IO6eucqCKv/xVg07bOO7V1Eq1C
+ucLK9hJCu7xki6s2tz2OSxCf4QSWR0uD4OyMqRXw7rcLMnF67H3CO2RUI4XRq4y9
+7dBzE2SfTZO7wT2abxNnKApG+wqGRdPBEnyQrl6r8Y8r86E/dL1cAf5bySWQqb8a
+YktNQyMECnyfuJFpAr12DGSUI++IhbiDlvbpxs1wTTfgvREIbnLTqz/kjt4wXrwS
+4kLhMqG92v+OmDI60xHTSie0gBSXaJDmBe45jGL6861OOcviW+QubM3I/RO/f9dT
+/aXZNTGuRSdWbK0Y25ayKrIipaYCwZO2flIgltADxmUcWNZ5YzezuWL5SB4jTwwj
+7FtYJdzZKkZUwty24+02RwU7J7nKidoPJXTpw4VfU3ltTPyNmhAR5uvp53/1UN6E
+Yq2TpOFIHNPc0TK01SlTfc1ixFSRN/GyNFynocXeOkIWJtkw+AoRtxGTxqQffOU/
+Efn9UhV/aqv68nmHlD/r2woOpcU59+4WCq46LGVv1ahbszz+OrjDORJ0RUlwoeiZ
+qwZyuQINBGCi0IABEADIq1PKO9YCq+pPZDNjuKftAISwfI/CAcqBU4Io6yBUWerH
+mx1+WfAdcuhzbU84yEK8MlLsskWQ4KAFa7W1nFXmhtzcQsAtGLRB4RRei2/Sv4BM
+52+YNWOOWpUsg/yMcj2jsfhA7xU1oPxgnE58erJ4aSMKPQzRGJtytBr+eUvWULVJ
+Q10qtf8EIaIVclhiBu4HeLXctuZVmo5K0JgZ8ZZca3QYbKpUx7WeTqn+KzONJ1Nb
+33lCS4H7vGEe9EXtjua9MPLCVEd8ic2S5ZUMpIWcd8B8R0dk5biO46ZK23YGzw2y
+R6LQiiWgUvELip+L8WmEnphHtxVOlZI+9EmBhhcSP8GBzImk89RCFpdOqMPOlRmG
+A2z8ZgjJhF1NOu+2U/w33cSfiISCdeeQfKXqrI+RGRv668uLp+5aoFp1EdbjETGz
+nDOIAzzoFIDsqG8jWSEUEWag4+EV70xjaYK2XOt9/RLo8jQe34WsqE177bX0x/8C
+frllmC8MSBHX56/3PFN10BUqxwBV8qXmqyv64FI9jsK01aFhd0CLvEZuBQpiZzLQ
+LmaupvlrQgs5cKriyxITbyAS0RfY3zOj20rxMNXcaCpU2XhfOuuIh+llo2Z3gw17
+ZR08b4eIejV7d+8RdFf1o3A4HUIpeeDPHE3K1QcT2XvB1IN+S8TUNinKg/MliQAR
+AQABiQRsBBgBCAAgFiEE1okuoYgb2WEDMK17CAGZmveHSOgFAmCi0IACGwICQAkQ
+CAGZmveHSOjBdCAEGQEIAB0WIQSsPd9d7Df6dDNExdPZAVD3qPLG7gUCYKLQgAAK
+CRDZAVD3qPLG7sfJD/9yRT30wtPUaxwfBah3SKc+wZgotNVrVxJBK10NihOcO9fP
+kmNISgCm4KX3UVofb9j/AB8cg2ps1czz6k6kQJb/baCE751xnIHDQ7El9XGnBQiA
+PvGgAWWMwzldJTW3PQBcWbxBAujeL2q8iVE+aP7afI2yiyderP7C5StQMNaOlQUC
+WaNd04ZF06Ke0E5gfZKW5GlwiDIl5mCf+/cZCMtydXtYDK6RAPURcgqvitlRCeFZ
+e8CJSlEQBc+BcVg+ynunXkYgl2kSt6WiT5ugA17aVNdF+6dmg9ikqmAVrqAYQa00
+7KAueCffYwWAiLoh4PrKuwbY4yNJaQjCi3ety6pPh+PmnKJVuUpYszacsdRIbEzu
+TvcJp8rSYNXdfgp5j79HwSki0UkazJ0cofIGmopQoHgew+S8JB7wzEPVO0m1sVyg
+la1Tj7dxdCNr/qX/LZtAJd42zIfWhbrk5NVg1FmZYJSjxzZTxzjV+aH5AxKGqjEX
+qZVOi3ZvNq7HKEfEwv4tI9xqq6/2DjNpoJ59Jv6N12Dd1uLYtaE+sw8X0W097e72
+fw8JWmkBOwHueeSZAQkFLc+uPszWf4JakVGMo5a20MBlHxN+a+OE5bVBngmXC9oq
+tqUWPgIEos5d7nJWM0KYppf/w46+78qByxNMOgIJ8vShGkU83FRnBI2/eYwEJLIs
+D/sGr58dwtiBhpL53wyg17KaVhTya1HRFLKLpeAutJ71o1bsZF4guge35tTExJcT
+B2OlGmhUf0AhNRfBzsa9HgvACOgotfekSA3n6q1MAfMOBgohKk9l+UX0sQomDF+Z
+C1G29t2

svn commit: r47791 - /dev/arrow/KEYS

2021-05-18 Thread jorgecarleitao
Author: jorgecarleitao
Date: Tue May 18 19:35:28 2021
New Revision: 47791

Log:
Key jorgecarleitao

Modified:
dev/arrow/KEYS

Modified: dev/arrow/KEYS
==
--- dev/arrow/KEYS (original)
+++ dev/arrow/KEYS Tue May 18 19:35:28 2021
@@ -1023,3 +1023,86 @@ Hg3Sc5RHLw/lIC/yEtIptzcpDLgDxS1Bf2c+RF+7
 pyqcs9cFy0tt3nAdWcVobS3PYhSacIDlPFPImatg3Nr+p50dx94OdWvoTzZPWTY=
 =FdOt
 -END PGP PUBLIC KEY BLOCK-
+pub   rsa4096 2021-05-17 [SC]
+  D6892EA1881BD9610330AD7B0801999AF78748E8
+uid   [ultimate] Jorge C. Leitao 
+sig 30801999AF78748E8 2021-05-17  Jorge C. Leitao 

+sig  25BCCA5220D84079 2021-05-18  Krisztian Szucs (apache) 

+sub   rsa4096 2021-05-17 [S]
+sig  0801999AF78748E8 2021-05-17  Jorge C. Leitao 

+
+-BEGIN PGP PUBLIC KEY BLOCK-
+
+mQINBGCiyecBEACu0h3wnLz0+K+uPlBZDPgvr/7d7+0/MNcBOj9z2pA9uqURUUwV
+c4GXs/SN/AUu4xoQ5yo5xENUYZHOA5lsfduylg7U6R39jkbS3VljVlkgsrgWfNl4
+7E5jG5mCZI+S8otf1/fLeYs7PvRLQwuXlsBG5E9+xdPWxlHNaSyiobW3sAiTgLBw
+ywWh1S9mbr9FF40oMrZxOvMLQrFDbB+Xnmz3fowrRV5QqE0hSRFUEMmCDuOeyirb
+b6g2avWp/BEyOcU/mTx219wmXQmvShJMauKTdjGBAFZavqUTs+QHXBzo3lzEjPoR
+e0iVix8MGcYWzmcUq3qT3QltlS0ozGPoXSibqojl15xF5aDKNZ3npVqOuUe+bAoa
+0l5OLpQn28AkI0kBkThT+BLTzyFv1MxUA/ZqSOVHOw25GbiGC7fm5MAS3LqtxfVT
+Ds0xChJvljS9R8sAIGUuNGUUkGp2F5WIUU8ymUmShA2AMMQE5vHFiphr+KpoajuD
+lsUEo/iK+IMu32oLaq9ZNuf36Pr7lY/N1JTQQaBm5Sh7EXqNa7t0NQCLo1oRYuRV
+qSAnfri6BX2SRHdkzmZOZW/rCndak+iXstw5Vxh101168cDXBhY1uOZnPmp63OCz
+PP0GbKmQI/5tqjIzbUF9W+rCGQWIqXiklFLWFG+oKrfyvlW/XtPJ2G7FhwARAQAB
+tCpKb3JnZSBDLiBMZWl0YW8gPGpvcmdlY2FybGVpdGFvQGdtYWlsLmNvbT6JAk4E
+EwEIADgCGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AWIQTWiS6hiBvZYQMwrXsI
+AZma94dI6AUCYKLLugAKCRAIAZma94dI6C81D/4wE44s3gUsmq2A/b940nU2/gtU
+qmSyw7r5WMr/H38lf83TbsCwkzVpFiCrJlBud03hB5F7Kfrdhz1szHPFshG20VkY
+oixn8jMHmVrHLTGbmKPX5Mx/pzrIQeoV0kky+43AvSkOeXaRVE75/5sesJvB6IlZ
+u7uN/ejHGDKzO08K+0kqqo6UXp6La/l3YZ3+LHJo2Iuxeg13LBfaDss2k5voHdkN
+AqznxF2XypQna/jH68n6jaMCeeGdl/jxyblJTcJxhh8sY6ocs2FVztrttNi8VFtY
+qh5hQmaDqqWGrAVY/bVeqf4YcGOgoRALRQCO9fFwb/kkCQ56XwTD1RDWARRPeTrf
+YOUrfXGo0YTghB6WqXuoZ9vyEnSU6oYV+WHcqyURA5269uX3nEBXxyMKUtXoFdJU
+dRdvVfjMiIvDpAeDlT0qAsaLHSrKaDmM3K6/gO3oNmvWPPONOBMinjtTuKG1NsKt
+Kc4N89TIuP1H1KdcgL9hBrm/0Qw5WrWA75K82GyZN1PxzOvatjOZxeVpeGfls4PM
+iEjrCYEKUNXiPO0/z+gSJAcu5dxnqmrC+uaKP6bSb5ILmT7EmEZVqs36jYDBRztx
+XcymMtb3l7vCl+MobHCpRNnjRlJAeNpR/m3BGuz3V+hp18y0qBkkyOaVWk5jr81w
+gRPavtmfD6SoMqnaeIkCMwQQAQgAHRYhBCZfgKuE/gMSfhTwESW8ylIg2EB5BQJg
+o9qEAAoJECW8ylIg2EB5ttMP/i8dyOJi77YLn/NqDsnAmGeY/l9NwbSim2Q3+VwA
+EU6ghGo972whAEHZuvg1qnXls1zWXdoBg61o8/TCMqeEcR5kPJUSxb4dbHThW50K
+dQphsSxQzt7VODi1SE1aiQEbJggIUu4ZiwUdP7IO6eucqCKv/xVg07bOO7V1Eq1C
+ucLK9hJCu7xki6s2tz2OSxCf4QSWR0uD4OyMqRXw7rcLMnF67H3CO2RUI4XRq4y9
+7dBzE2SfTZO7wT2abxNnKApG+wqGRdPBEnyQrl6r8Y8r86E/dL1cAf5bySWQqb8a
+YktNQyMECnyfuJFpAr12DGSUI++IhbiDlvbpxs1wTTfgvREIbnLTqz/kjt4wXrwS
+4kLhMqG92v+OmDI60xHTSie0gBSXaJDmBe45jGL6861OOcviW+QubM3I/RO/f9dT
+/aXZNTGuRSdWbK0Y25ayKrIipaYCwZO2flIgltADxmUcWNZ5YzezuWL5SB4jTwwj
+7FtYJdzZKkZUwty24+02RwU7J7nKidoPJXTpw4VfU3ltTPyNmhAR5uvp53/1UN6E
+Yq2TpOFIHNPc0TK01SlTfc1ixFSRN/GyNFynocXeOkIWJtkw+AoRtxGTxqQffOU/
+Efn9UhV/aqv68nmHlD/r2woOpcU59+4WCq46LGVv1ahbszz+OrjDORJ0RUlwoeiZ
+qwZyuQINBGCi0IABEADIq1PKO9YCq+pPZDNjuKftAISwfI/CAcqBU4Io6yBUWerH
+mx1+WfAdcuhzbU84yEK8MlLsskWQ4KAFa7W1nFXmhtzcQsAtGLRB4RRei2/Sv4BM
+52+YNWOOWpUsg/yMcj2jsfhA7xU1oPxgnE58erJ4aSMKPQzRGJtytBr+eUvWULVJ
+Q10qtf8EIaIVclhiBu4HeLXctuZVmo5K0JgZ8ZZca3QYbKpUx7WeTqn+KzONJ1Nb
+33lCS4H7vGEe9EXtjua9MPLCVEd8ic2S5ZUMpIWcd8B8R0dk5biO46ZK23YGzw2y
+R6LQiiWgUvELip+L8WmEnphHtxVOlZI+9EmBhhcSP8GBzImk89RCFpdOqMPOlRmG
+A2z8ZgjJhF1NOu+2U/w33cSfiISCdeeQfKXqrI+RGRv668uLp+5aoFp1EdbjETGz
+nDOIAzzoFIDsqG8jWSEUEWag4+EV70xjaYK2XOt9/RLo8jQe34WsqE177bX0x/8C
+frllmC8MSBHX56/3PFN10BUqxwBV8qXmqyv64FI9jsK01aFhd0CLvEZuBQpiZzLQ
+LmaupvlrQgs5cKriyxITbyAS0RfY3zOj20rxMNXcaCpU2XhfOuuIh+llo2Z3gw17
+ZR08b4eIejV7d+8RdFf1o3A4HUIpeeDPHE3K1QcT2XvB1IN+S8TUNinKg/MliQAR
+AQABiQRsBBgBCAAgFiEE1okuoYgb2WEDMK17CAGZmveHSOgFAmCi0IACGwICQAkQ
+CAGZmveHSOjBdCAEGQEIAB0WIQSsPd9d7Df6dDNExdPZAVD3qPLG7gUCYKLQgAAK
+CRDZAVD3qPLG7sfJD/9yRT30wtPUaxwfBah3SKc+wZgotNVrVxJBK10NihOcO9fP
+kmNISgCm4KX3UVofb9j/AB8cg2ps1czz6k6kQJb/baCE751xnIHDQ7El9XGnBQiA
+PvGgAWWMwzldJTW3PQBcWbxBAujeL2q8iVE+aP7afI2yiyderP7C5StQMNaOlQUC
+WaNd04ZF06Ke0E5gfZKW5GlwiDIl5mCf+/cZCMtydXtYDK6RAPURcgqvitlRCeFZ
+e8CJSlEQBc+BcVg+ynunXkYgl2kSt6WiT5ugA17aVNdF+6dmg9ikqmAVrqAYQa00
+7KAueCffYwWAiLoh4PrKuwbY4yNJaQjCi3ety6pPh+PmnKJVuUpYszacsdRIbEzu
+TvcJp8rSYNXdfgp5j79HwSki0UkazJ0cofIGmopQoHgew+S8JB7wzEPVO0m1sVyg
+la1Tj7dxdCNr/qX/LZtAJd42zIfWhbrk5NVg1FmZYJSjxzZTxzjV+aH5AxKGqjEX
+qZVOi3ZvNq7HKEfEwv4tI9xqq6/2DjNpoJ59Jv6N12Dd1uLYtaE+sw8X0W097e72
+fw8JWmkBOwHueeSZAQkFLc+uPszWf4JakVGMo5a20MBlHxN+a+OE5bVBngmXC9oq
+tqUWPgIEos5d7nJWM0KYppf/w46+78qByxNMOgIJ8vShGkU83FRnBI2/eYwEJLIs
+D/sGr58dwtiBhpL53wyg17KaVhTya1HRFLKLpeAutJ71o1bsZF4guge35tTExJcT
+B2OlGmhUf0AhNRfBzsa9HgvACOgotfekSA3n6q1MAfMOBgohKk9l+UX0sQomDF+Z
+C1G29t2/uMc7zRda7mJXoC56lLqrIR

[arrow] 05/05: ARROW-12617: [Python] Align orc.write_table keyword order with parquet.write_table

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit cbcdb33a78763cb39b201241229c0a41cff236e0
Author: Joris Van den Bossche 
AuthorDate: Mon May 3 17:29:16 2021 +0200

ARROW-12617: [Python] Align orc.write_table keyword order with 
parquet.write_table

Closes #10223 from jorisvandenbossche/ARROW-12617-orc-write_table-signature

Authored-by: Joris Van den Bossche 
Signed-off-by: Antoine Pitrou 
---
 python/pyarrow/_orc.pyx  |  3 ++-
 python/pyarrow/orc.py| 16 
 python/pyarrow/tests/test_orc.py | 10 +-
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index 2640057..e56a62d 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -29,6 +29,7 @@ from pyarrow.lib cimport (check_status, _Weakrefable,
   Schema, pyarrow_wrap_schema,
   pyarrow_wrap_batch,
   RecordBatch,
+  Table,
   pyarrow_wrap_table,
   pyarrow_unwrap_schema,
   pyarrow_unwrap_table,
@@ -127,7 +128,7 @@ cdef class ORCWriter(_Weakrefable):
 self.writer = move(GetResultValue[unique_ptr[ORCFileWriter]](
 ORCFileWriter.Open(self.rd_handle.get(
 
-def write(self, object table):
+def write(self, Table table):
 cdef:
 shared_ptr[CTable] sp_table
 sp_table = pyarrow_unwrap_table(table)
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index e1009bc..13af5a2 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -18,9 +18,10 @@
 
 from itertools import count
 from numbers import Integral
+import warnings
 
 from pyarrow import types
-from pyarrow.lib import Schema
+from pyarrow.lib import Schema, Table
 import pyarrow._orc as _orc
 
 
@@ -184,19 +185,26 @@ class ORCWriter:
 self.writer.close()
 
 
-def write_table(where, table):
+def write_table(table, where):
 """
 Write a table into an ORC file
 
 Parameters
 --
+table : pyarrow.lib.Table
+The table to be written into the ORC file
 where : str or pyarrow.io.NativeFile
 Writable target. For passing Python file objects or byte buffers,
 see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
 or pyarrow.io.FixedSizeBufferWriter.
-table : pyarrow.lib.Table
-The table to be written into the ORC file
 """
+if isinstance(where, Table):
+warnings.warn(
+"The order of the arguments has changed. Pass as "
+"'write_table(table, where)' instead. The old order will raise "
+"an error in the future.", FutureWarning, stacklevel=2
+)
+table, where = where, table
 writer = ORCWriter(where)
 writer.write(table)
 writer.close()
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index 14edad8..e71c452 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -176,7 +176,15 @@ def test_orcfile_readwrite():
 a = pa.array([1, None, 3, None])
 b = pa.array([None, "Arrow", None, "ORC"])
 table = pa.table({"int64": a, "utf8": b})
-orc.write_table(buffer_output_stream, table)
+orc.write_table(table, buffer_output_stream)
+buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
+output_table = orc.ORCFile(buffer_reader).read()
+assert table.equals(output_table)
+
+# deprecated keyword order
+buffer_output_stream = pa.BufferOutputStream()
+with pytest.warns(FutureWarning):
+orc.write_table(buffer_output_stream, table)
 buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
 output_table = orc.ORCFile(buffer_reader).read()
 assert table.equals(output_table)


[arrow] 04/05: ARROW-12622: [Python] Fix segfault in read_csv when not on main thread

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 2b5b15dd7c37ec2033f114b3e9c65e70d8dfc38a
Author: David Li 
AuthorDate: Mon May 3 16:53:16 2021 +0200

ARROW-12622: [Python] Fix segfault in read_csv when not on main thread

An uninitialized StopToken caused segfaults if you ever called read_csv 
with cancellation disabled or when not on the main thread (e.g. if used in a 
Flight server). If we have a 4.0.1 I think this qualifies as a regression.

Closes #10227 from lidavidm/arrow-12622

Authored-by: David Li 
Signed-off-by: Antoine Pitrou 
---
 python/pyarrow/error.pxi | 2 +-
 python/pyarrow/tests/test_csv.py | 8 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index f9e45f2..2866848 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -188,8 +188,8 @@ cdef class SignalStopHandler:
 if signal.getsignal(sig) not in (signal.SIG_DFL,
  signal.SIG_IGN, None)]
 
+self._stop_token = StopToken()
 if not self._signals.empty():
-self._stop_token = StopToken()
 self._stop_token.init(GetResultValue(
 SetSignalStopSource()).token())
 self._enabled = True
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 395f948..34ab556 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -942,6 +942,14 @@ class BaseTestCSVRead:
 assert isinstance(e, pa.ArrowCancelled)
 assert e.signum == signal.SIGINT
 
+def test_cancellation_disabled(self):
+# ARROW-12622: reader would segfault when the cancelling signal
+# handler was not enabled (e.g. if disabled, or if not on the
+# main thread)
+t = threading.Thread(target=lambda: self.read_bytes(b"f64\n0.1"))
+t.start()
+t.join()
+
 
 class TestSerialCSVRead(BaseTestCSVRead, unittest.TestCase):
 


[arrow] 02/05: ARROW-12601: [R][Packaging] Fix pkg-config check in r/configure

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 3ed7fab9eb56a60e45a0de4374616a25c4091a17
Author: Ian Cook 
AuthorDate: Thu Apr 29 17:32:33 2021 -0400

ARROW-12601: [R][Packaging] Fix pkg-config check in r/configure

Fixes false positives in a check that pkg-config is installed

Closes #10198 from ianmcook/ARROW-12601

Authored-by: Ian Cook 
Signed-off-by: Ian Cook 
---
 r/configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/configure b/r/configure
index f6c1013..2da2b9f 100755
--- a/r/configure
+++ b/r/configure
@@ -77,7 +77,7 @@ elif [ "$INCLUDE_DIR" ] && [ "$LIB_DIR" ]; then
 else
   # Use pkg-config if available and allowed
   pkg-config --version >/dev/null 2>&1
-  if [ "$ARROW_USE_PKG_CONFIG" != "false" ] && [ $? -eq 0 ]; then
+  if [ $? -eq 0 ] && [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
 PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}`
 PKGCONFIG_LIBS=`pkg-config --libs-only-l --silence-errors 
${PKG_CONFIG_NAME}`
 PKGCONFIG_DIRS=`pkg-config --libs-only-L --silence-errors 
${PKG_CONFIG_NAME}`


[arrow] 03/05: ARROW-11926: [R] preparations for ucrt toolchains

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 0bebdbc26b41139433d97c23608afc2af4e18976
Author: Jeroen Ooms 
AuthorDate: Fri Apr 30 15:09:28 2021 -0500

ARROW-11926: [R] preparations for ucrt toolchains

Minimal changes to link to ucrt builds specifically once supported (and 
will be safely redundant until then).

Closes #10217 from jeroen/winucrt

Lead-authored-by: Jeroen Ooms 
Co-authored-by: Neal Richardson 
Signed-off-by: Jonathan Keane 
---
 r/configure.win |  2 +-
 r/src/Makevars.ucrt | 19 +++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/r/configure.win b/r/configure.win
index d645834..f31bf8f 100644
--- a/r/configure.win
+++ b/r/configure.win
@@ -50,7 +50,7 @@ AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer 
-laws-cpp-sdk-identity-man
 # NOTE: If you make changes to the libraries below, you should also change
 # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD
 PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC 
-DARROW_DS_STATIC -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET 
-DARROW_R_WITH_DATASET"
-PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) 
'"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow_dataset -larrow 
-larrow_bundled_dependencies -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 
${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
+PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) 
'"-L${RWINLIB}/lib"'$(R_ARCH)$(CRT) '"-lparquet -larrow_dataset -larrow 
-larrow_bundled_dependencies -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 
${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
 
 # S3 and re2 support only for Rtools40 (i.e. R >= 4.0)
 "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e 'R.version$major >= 4' | grep TRUE 
>/dev/null 2>&1
diff --git a/r/src/Makevars.ucrt b/r/src/Makevars.ucrt
new file mode 100644
index 000..52488eb
--- /dev/null
+++ b/r/src/Makevars.ucrt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CRT=-ucrt
+include Makevars.win


[arrow] branch maint-4.0.x updated (f959141 -> cbcdb33)

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from f959141  [Release] Update versions for 4.0.0
 new 28c1fd6  ARROW-12520: [R] Minor docs updates
 new 3ed7fab  ARROW-12601: [R][Packaging] Fix pkg-config check in 
r/configure
 new 0bebdbc  ARROW-11926: [R] preparations for ucrt toolchains
 new 2b5b15d  ARROW-12622: [Python] Fix segfault in read_csv when not on 
main thread
 new cbcdb33  ARROW-12617: [Python] Align orc.write_table keyword order 
with parquet.write_table

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 python/pyarrow/_orc.pyx|  3 ++-
 python/pyarrow/error.pxi   |  2 +-
 python/pyarrow/orc.py  | 16 ++
 python/pyarrow/tests/test_csv.py   |  8 +++
 python/pyarrow/tests/test_orc.py   | 10 -
 r/configure|  2 +-
 r/configure.win|  2 +-
 .../arrow/gpu/.gitignore => r/src/Makevars.ucrt|  3 ++-
 r/vignettes/arrow.Rmd  |  2 +-
 r/vignettes/developing.Rmd | 25 +++---
 10 files changed, 50 insertions(+), 23 deletions(-)
 copy cpp/src/arrow/gpu/.gitignore => r/src/Makevars.ucrt (96%)


[arrow] 01/05: ARROW-12520: [R] Minor docs updates

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 28c1fd6175ef2b925562f4fd5f1afed5a02abd67
Author: Jonathan Keane 
AuthorDate: Mon Apr 26 12:23:47 2021 -0700

ARROW-12520: [R] Minor docs updates

Closes #10143 from jonkeane/ARROW-12520

Authored-by: Jonathan Keane 
Signed-off-by: Neal Richardson 
---
 r/vignettes/arrow.Rmd  |  2 +-
 r/vignettes/developing.Rmd | 25 +
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd
index 21cbbe4..4c5da50 100644
--- a/r/vignettes/arrow.Rmd
+++ b/r/vignettes/arrow.Rmd
@@ -104,7 +104,7 @@ corresponds to a class of the same name in the Arrow C++ 
library. The `arrow`
 package provides a variety of `R6` and S3 methods for interacting with 
instances
 of these classes.
 
-For convenience, the `arrow package also defines several synthetic classes that
+For convenience, the `arrow` package also defines several synthetic classes 
that
 do not exist in the C++ library, including:
 
 * `ArrowDatum`: inherited by `Scalar`, `Array`, and `ChunkedArray`
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 38027a9..efb2c80 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -116,7 +116,7 @@ export ARROW_HOME=$(pwd)/dist
 mkdir $ARROW_HOME
 ```
 
-_Special instructions on Linux:_ You will need to set `LD_LIBRARY_PATH` to the 
`lib` directory that will is under where we set `$ARROW_HOME`, before launching 
R and using Arrow. One way to do this is to add it to your profile (we use 
`~/.bash_profile` here, but you might need to put this in a different file 
depending on your setup, e.g. if you use a shell other than `bash`). On macOS 
we do not need to do this because the macOS shared library paths are hardcoded 
to their locations during  [...]
+_Special instructions on Linux:_ You will need to set `LD_LIBRARY_PATH` to the 
`lib` directory that is under where we set `$ARROW_HOME`, before launching R 
and using Arrow. One way to do this is to add it to your profile (we use 
`~/.bash_profile` here, but you might need to put this in a different file 
depending on your setup, e.g. if you use a shell other than `bash`). On macOS 
we do not need to do this because the macOS shared library paths are hardcoded 
to their locations during build time.
 
 ```{bash, save=run & ubuntu & !sys_install}
 export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
@@ -140,14 +140,14 @@ cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   -DARROW_JEMALLOC=ON \
   -DARROW_JSON=ON \
   -DARROW_PARQUET=ON \
   -DARROW_WITH_SNAPPY=ON \
   -DARROW_WITH_ZLIB=ON \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   ..
 ```
 
@@ -172,14 +172,14 @@ cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   -DARROW_JEMALLOC=ON \
   -DARROW_JSON=ON \
   -DARROW_PARQUET=ON \
   -DARROW_WITH_SNAPPY=ON \
   -DARROW_WITH_ZLIB=ON \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   ..
 ```
 
@@ -191,6 +191,7 @@ To enable optional features including: S3 support, an 
alternative memory allocat
 
 ``` shell
   -DARROW_MIMALLOC=ON \
+  -DARROW_S3=ON \
   -DARROW_WITH_BROTLI=ON \
   -DARROW_WITH_BZ2=ON \
   -DARROW_WITH_LZ4=ON \
@@ -252,7 +253,7 @@ export ARROW_R_CXXFLAGS=-fno-omit-frame-pointer
 With the setups described here, you should not need to rebuild the Arrow 
library or even the C++ source in the R package as you iterated and work on the 
R package. The only time those should need to be rebuilt is if you have changed 
the C++ in the R package (and even then, `R CMD INSTALL .` should only need to 
recompile the files that have changed) _or_ if the Arrow library C++ has 
changed and there is a mismatch between the Arrow Library and the R package. If 
you find yourself rebuildin [...]
 
 
-For a full build: a `cmake` command with all of the R-relevant 
optional dependencies turned on
+For a full build: a `cmake` command with all of the R-relevant 
optional dependencies turned on. Development with other languages might require 
different flags as well. For example, to develop Python, you would need to also 
add `-DARROW_PYTHON=ON` (though all of the other flags used for Python are 
already included here).
 
 
 ``` shell
@@ -262,26 +263,26 @@ cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   -DARROW_JEMALLOC=ON \
   -DARROW_JSON=ON \
-  -DARROW_PARQUET=ON \
-  -DARROW_WITH_SNAPPY=ON \
-  -DARROW_W

[arrow] branch maint-4.0.x created (now f959141)

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git.


  at f959141  [Release] Update versions for 4.0.0

No new revisions were added by this update.


[arrow-datafusion] branch master updated: Make it easer for developers to find Ballista documentation (#330)

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 1702d6c  Make it easer for developers to find Ballista documentation 
(#330)
1702d6c is described below

commit 1702d6c85ebfdbc968b1dc427a9799e74b64ff96
Author: Andy Grove 
AuthorDate: Fri May 14 15:03:53 2021 -0600

Make it easer for developers to find Ballista documentation (#330)
---
 DEVELOPERS.md |  3 ++
 README.md |  3 ++
 ballista/README.md| 15 +++---
 ballista/docs/README.md   |  7 +--
 ballista/docs/{dev-env-rust.md => dev-env.md} |  0
 ballista/docs/integration-testing.md  | 10 ++--
 ballista/docs/release-process.md  | 68 ---
 ballista/docs/rust-docker.md  | 66 --
 8 files changed, 18 insertions(+), 154 deletions(-)

diff --git a/DEVELOPERS.md b/DEVELOPERS.md
index 1dc9304..be8bb61 100644
--- a/DEVELOPERS.md
+++ b/DEVELOPERS.md
@@ -21,6 +21,9 @@
 
 This section describes how you can get started at developing DataFusion.
 
+For information on developing with Ballista, see the 
+[Ballista developer documentation](ballista/docs/README.md). 
+
 ### Bootstrap environment
 
 DataFusion is written in Rust and it uses a standard rust toolkit:
diff --git a/README.md b/README.md
index ded264a..f72c73b 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,9 @@ logical query plans as well as a query optimizer and 
execution engine
 capable of parallel execution against partitioned data sources (CSV
 and Parquet) using threads.
 
+DataFusion also supports distributed query execution via the  
+[Ballista](ballista/README.md) crate.
+
 ## Use Cases
 
 DataFusion is used to create modern, fast and efficient data
diff --git a/ballista/README.md b/ballista/README.md
index 288386f..276af3c 100644
--- a/ballista/README.md
+++ b/ballista/README.md
@@ -50,15 +50,14 @@ Although Ballista is largely inspired by Apache Spark, 
there are some key differ
 - The use of Apache Arrow as the memory model and network protocol means that 
data can be exchanged between executors
   in any programming language with minimal serialization overhead.
 
-# Status
+## Status
 
-The Ballista project was donated to Apache Arrow in April 2021 and work is 
underway to integrate more tightly with 
-DataFusion.
-
-One of the goals is to implement a common scheduler that can seamlessly scale 
queries across cores in DataFusion and 
-across nodes in Ballista.
-
-Ballista issues are tracked in ASF JIRA 
[here](https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20component%20%3D%20%22Rust%20-%20Ballista%22)
+Ballista was 
[donated](https://arrow.apache.org/blog/2021/04/12/ballista-donation/) to the 
Apache Arrow project in
+April 2021 and should be considered experimental.
 
+## Getting Started
 
+The [Ballista Developer Documentation](docs/README.md) and the 
+[DataFusion User 
Guide](https://github.com/apache/arrow-datafusion/tree/master/docs/user-guide) 
are currently the 
+best sources of information for getting started with Ballista.
 
diff --git a/ballista/docs/README.md b/ballista/docs/README.md
index 44c831d..6588c1d 100644
--- a/ballista/docs/README.md
+++ b/ballista/docs/README.md
@@ -20,7 +20,7 @@
 
 This directory contains documentation for developers that are contributing to 
Ballista. If you are looking for 
 end-user documentation for a published release, please start with the 
-[Ballista User Guide](https://ballistacompute.org/docs/) instead.
+[DataFusion User Guide](../../docs/user-guide) instead.
 
 ## Architecture & Design
 
@@ -29,9 +29,6 @@ end-user documentation for a published release, please start 
with the
 
 ## Build, Test, Release
 
-- Setting up a [Rust development environment](dev-env-rust.md).
-- Setting up a [Java development environment](dev-env-jvm.md).
-- Notes on building [Rust docker images](rust-docker.md)  
+- Setting up a [development environment](dev-env.md).
 - [Integration Testing](integration-testing.md)
-- [Release process](release-process.md)
 
diff --git a/ballista/docs/dev-env-rust.md b/ballista/docs/dev-env.md
similarity index 100%
rename from ballista/docs/dev-env-rust.md
rename to ballista/docs/dev-env.md
diff --git a/ballista/docs/integration-testing.md 
b/ballista/docs/integration-testing.md
index 2a979b6..3f818a4 100644
--- a/ballista/docs/integration-testing.md
+++ b/ballista/docs/integration-testing.md
@@ -18,15 +18,11 @@
 -->
 # Integration Testing
 
-Ballista has a [benchmark 
crate](https://github.com/ballista-compute/ballista/tree/main/rust/benchmarks/tpch)
 which is
-derived from TPC-H and this is currently the main form of integration testing. 
+We use the [DataFusion 
Benchmarks](https://

[arrow-datafusion] branch master updated: Update arrow dependencies again (#341)

2021-05-14 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 874132e  Update arrow dependencies again (#341)
874132e is described below

commit 874132ec752c251dcdd2541f80e6889d307219d6
Author: Andrew Lamb 
AuthorDate: Fri May 14 17:03:28 2021 -0400

Update arrow dependencies again (#341)
---
 ballista/rust/client/Cargo.toml| 2 +-
 ballista/rust/core/Cargo.toml  | 4 ++--
 ballista/rust/executor/Cargo.toml  | 4 ++--
 ballista/rust/scheduler/Cargo.toml | 2 +-
 datafusion-cli/Cargo.toml  | 2 +-
 datafusion-examples/Cargo.toml | 2 +-
 datafusion/Cargo.toml  | 4 ++--
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml
index 8b5d7af..f7ed273 100644
--- a/ballista/rust/client/Cargo.toml
+++ b/ballista/rust/client/Cargo.toml
@@ -31,5 +31,5 @@ futures = "0.3"
 log = "0.4"
 tokio = "1.0"
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
 datafusion = { path = "../../../datafusion" }
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index d98cc7e..7eec207 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -40,8 +40,8 @@ tokio = "1.0"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
 
 datafusion = { path = "../../../datafusion" }
 
diff --git a/ballista/rust/executor/Cargo.toml 
b/ballista/rust/executor/Cargo.toml
index a5e4034..31fd9d0 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -44,8 +44,8 @@ tokio-stream = "0.1"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
 
 datafusion = { path = "../../../datafusion" }
 
diff --git a/ballista/rust/scheduler/Cargo.toml 
b/ballista/rust/scheduler/Cargo.toml
index 43dc428..4793534 100644
--- a/ballista/rust/scheduler/Cargo.toml
+++ b/ballista/rust/scheduler/Cargo.toml
@@ -52,7 +52,7 @@ tonic = "0.4"
 tower = { version = "0.4" }
 warp = "0.3"
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
 datafusion = { path = "../../../datafusion" }
 
 [dev-dependencies]
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 2551b77..747a6b0 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -31,4 +31,4 @@ clap = "2.33"
 rustyline = "8.0"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", 
"sync"] }
 datafusion = { path = "../datafusion" }
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce2792699bc98" }
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 8d8f20e..28175f8 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -29,7 +29,7 @@ publish = false
 
 
 [dev-dependencies]
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"8226219fe7104f6c8a2740806f96f02c960d991c" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"4449ee96fe3fd4a0b275da8dd25ce279

[arrow-datafusion] branch master updated: fix clippy warning (#286)

2021-05-06 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new b8805d4  fix clippy warning (#286)
b8805d4 is described below

commit b8805d4f44d4da8f16069d93ab342dc6f082ca07
Author: Jiayu Liu 
AuthorDate: Fri May 7 12:14:45 2021 +0800

fix clippy warning (#286)
---
 datafusion/src/physical_plan/distinct_expressions.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion/src/physical_plan/distinct_expressions.rs 
b/datafusion/src/physical_plan/distinct_expressions.rs
index ffc138a..927f16f 100644
--- a/datafusion/src/physical_plan/distinct_expressions.rs
+++ b/datafusion/src/physical_plan/distinct_expressions.rs
@@ -64,10 +64,10 @@ impl DistinctCount {
 let state_data_types = 
input_data_types.into_iter().map(state_type).collect();
 
 Self {
-state_data_types,
-exprs,
 name,
 data_type,
+state_data_types,
+exprs,
 }
 }
 }


[arrow-datafusion] branch master updated: Use standard make_null_array for CASE (#223)

2021-04-30 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 23d02bb  Use standard make_null_array for CASE (#223)
23d02bb is described below

commit 23d02bb3c642ed69e7b963ed74df9687b91af970
Author: Andrew Lamb 
AuthorDate: Fri Apr 30 10:53:45 2021 -0400

Use standard make_null_array for CASE (#223)
---
 datafusion/src/physical_plan/expressions/case.rs | 34 ++--
 1 file changed, 2 insertions(+), 32 deletions(-)

diff --git a/datafusion/src/physical_plan/expressions/case.rs 
b/datafusion/src/physical_plan/expressions/case.rs
index 723438d..95ae532 100644
--- a/datafusion/src/physical_plan/expressions/case.rs
+++ b/datafusion/src/physical_plan/expressions/case.rs
@@ -234,36 +234,6 @@ fn if_then_else(
 }
 }
 
-macro_rules! make_null_array {
-($TY:ty, $N:expr) => {{
-let mut builder = <$TY>::new($N);
-for _ in 0..$N {
-builder.append_null()?;
-}
-Ok(Arc::new(builder.finish()))
-}};
-}
-
-fn build_null_array(data_type: , num_rows: usize) -> Result 
{
-match data_type {
-DataType::UInt8 => make_null_array!(array::UInt8Builder, num_rows),
-DataType::UInt16 => make_null_array!(array::UInt16Builder, num_rows),
-DataType::UInt32 => make_null_array!(array::UInt32Builder, num_rows),
-DataType::UInt64 => make_null_array!(array::UInt64Builder, num_rows),
-DataType::Int8 => make_null_array!(array::Int8Builder, num_rows),
-DataType::Int16 => make_null_array!(array::Int16Builder, num_rows),
-DataType::Int32 => make_null_array!(array::Int32Builder, num_rows),
-DataType::Int64 => make_null_array!(array::Int64Builder, num_rows),
-DataType::Float32 => make_null_array!(array::Float32Builder, num_rows),
-DataType::Float64 => make_null_array!(array::Float64Builder, num_rows),
-DataType::Utf8 => make_null_array!(array::StringBuilder, num_rows),
-other => Err(DataFusionError::Execution(format!(
-"CASE does not support '{:?}'",
-other
-))),
-}
-}
-
 macro_rules! array_equals {
 ($TY:ty, $L:expr, $R:expr, $eq_fn:expr) => {{
 let when_value = $L
@@ -347,7 +317,7 @@ impl CaseExpr {
 let mut current_value: Option = if let Some(e) = 
_expr {
 Some(e.evaluate(batch)?.into_array(batch.num_rows()))
 } else {
-Some(build_null_array(_type, batch.num_rows())?)
+Some(new_null_array(_type, batch.num_rows()))
 };
 
 // walk backwards through the when/then expressions
@@ -388,7 +358,7 @@ impl CaseExpr {
 let mut current_value: Option = if let Some(e) = 
_expr {
 Some(e.evaluate(batch)?.into_array(batch.num_rows()))
 } else {
-Some(build_null_array(_type, batch.num_rows())?)
+Some(new_null_array(_type, batch.num_rows()))
 };
 
 // walk backwards through the when/then expressions


[arrow-rs] branch master updated: Disabled rebase needed until demonstrate working. (#243)

2021-04-30 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 4865247  Disabled rebase needed until demonstrate working. (#243)
4865247 is described below

commit 486524733639d3c9e60e44bb07a65c628958b7b6
Author: Jorge Leitao 
AuthorDate: Fri Apr 30 13:26:41 2021 +0200

Disabled rebase needed until demonstrate working. (#243)
---
 .github/workflows/dev_pr.yml | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index c37cd4f..8202853 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -38,8 +38,8 @@ jobs:
   configuration-path: .github/workflows/dev_pr/labeler.yml
   sync-labels: true
 
-  - name: Checks if PR needs rebase
-uses: eps1lon/actions-label-merge-conflict@releases/2.x
-with:
-  dirtyLabel: "needs-rebase"
-  repoToken: "${{ secrets.GITHUB_TOKEN }}"
+  #- name: Checks if PR needs rebase
+  #  uses: eps1lon/actions-label-merge-conflict@releases/2.x
+  #  with:
+  #dirtyLabel: "needs-rebase"
+  #repoToken: "${{ secrets.GITHUB_TOKEN }}"


[arrow-datafusion] branch master updated: Fix Filter / where clause without column names is removed in optimization pass (#225)

2021-04-30 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 2423ff0  Fix Filter / where clause without column names is removed in 
optimization pass (#225)
2423ff0 is described below

commit 2423ff0dd1fe9c0932c1cb8d1776efa3acd69554
Author: Daniël Heres 
AuthorDate: Fri Apr 30 10:11:14 2021 +0200

Fix Filter / where clause without column names is removed in optimization 
pass (#225)

* Workaround where without columns

* Add some docs

* Remove print statement

* Bring back removed comment
---
 datafusion/src/optimizer/filter_push_down.rs | 34 
 datafusion/tests/sql.rs  | 14 
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/datafusion/src/optimizer/filter_push_down.rs 
b/datafusion/src/optimizer/filter_push_down.rs
index 4622e9f..356d497 100644
--- a/datafusion/src/optimizer/filter_push_down.rs
+++ b/datafusion/src/optimizer/filter_push_down.rs
@@ -237,17 +237,30 @@ fn optimize(plan: , mut state: State) -> 
Result {
 let mut predicates = vec![];
 split_members(predicate,  predicates);
 
+// Predicates without referencing columns (WHERE FALSE, WHERE 1=1, 
etc.)
+let mut no_col_predicates = vec![];
+
 predicates
 .into_iter()
 .try_for_each::<_, Result<()>>(|predicate| {
 let mut columns: HashSet = HashSet::new();
 utils::expr_to_column_names(predicate,  columns)?;
-// collect the predicate
-state.filters.push((predicate.clone(), columns));
+if columns.is_empty() {
+no_col_predicates.push(predicate)
+} else {
+// collect the predicate
+state.filters.push((predicate.clone(), columns));
+}
 Ok(())
 })?;
-
-optimize(input, state)
+// Predicates without columns will not be pushed down.
+// As those contain only literals, they could be optimized using 
constant folding
+// and removal of WHERE TRUE / WHERE FALSE
+if !no_col_predicates.is_empty() {
+Ok(add_filter(optimize(input, state)?, _col_predicates))
+} else {
+optimize(input, state)
+}
 }
 LogicalPlan::Projection {
 input,
@@ -483,6 +496,19 @@ mod tests {
 }
 
 #[test]
+fn filter_no_columns() -> Result<()> {
+let table_scan = test_table_scan()?;
+let plan = LogicalPlanBuilder::from(_scan)
+.filter(lit(0i64).eq(lit(1i64)))?
+.build()?;
+let expected = "\
+Filter: Int64(0) Eq Int64(1)\
+\n  TableScan: test projection=None";
+assert_optimized_plan_eq(, expected);
+Ok(())
+}
+
+#[test]
 fn filter_jump_2_plans() -> Result<()> {
 let table_scan = test_table_scan()?;
 let plan = LogicalPlanBuilder::from(_scan)
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 79baeae..7169294 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -475,6 +475,20 @@ async fn csv_query_group_by_and_having_and_where() -> 
Result<()> {
 }
 
 #[tokio::test]
+async fn all_where_empty() -> Result<()> {
+let mut ctx = ExecutionContext::new();
+register_aggregate_csv( ctx)?;
+let sql = "SELECT *
+   FROM aggregate_test_100
+   WHERE 1=2";
+let mut actual = execute( ctx, sql).await;
+actual.sort();
+let expected: Vec> = vec![];
+assert_eq!(expected, actual);
+Ok(())
+}
+
+#[tokio::test]
 async fn csv_query_having_without_group_by() -> Result<()> {
 let mut ctx = ExecutionContext::new();
 register_aggregate_csv( ctx)?;


[arrow-datafusion] branch master updated: Update arrow-rs deps (#224)

2021-04-29 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 88222b7  Update arrow-rs deps (#224)
88222b7 is described below

commit 88222b7dcf1baa5befbba5cd175fdd90ea5d
Author: Andrew Lamb 
AuthorDate: Thu Apr 29 15:07:31 2021 -0400

Update arrow-rs deps (#224)
---
 ballista/rust/client/Cargo.toml| 2 +-
 ballista/rust/core/Cargo.toml  | 4 ++--
 ballista/rust/executor/Cargo.toml  | 4 ++--
 ballista/rust/scheduler/Cargo.toml | 2 +-
 datafusion-examples/Cargo.toml | 2 +-
 datafusion/Cargo.toml  | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml
index 013adc0..d812b65 100644
--- a/ballista/rust/client/Cargo.toml
+++ b/ballista/rust/client/Cargo.toml
@@ -31,5 +31,5 @@ futures = "0.3"
 log = "0.4"
 tokio = "1.0"
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
 datafusion = { path = "../../../datafusion" }
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index b92225f..b1fab62 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -40,8 +40,8 @@ tokio = "1.0"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
 
 datafusion = { path = "../../../datafusion" }
 
diff --git a/ballista/rust/executor/Cargo.toml 
b/ballista/rust/executor/Cargo.toml
index 6c9546e..2284d91 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -44,8 +44,8 @@ tokio-stream = "0.1"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
 
 datafusion = { path = "../../../datafusion" }
 
diff --git a/ballista/rust/scheduler/Cargo.toml 
b/ballista/rust/scheduler/Cargo.toml
index 1f488c0..93a0730 100644
--- a/ballista/rust/scheduler/Cargo.toml
+++ b/ballista/rust/scheduler/Cargo.toml
@@ -52,7 +52,7 @@ tonic = "0.4"
 tower = { version = "0.4" }
 warp = "0.3"
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
 datafusion = { path = "../../../datafusion" }
 
 [dev-dependencies]
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index f8f4f35..77b1553 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -29,7 +29,7 @@ publish = false
 
 
 [dev-dependencies]
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576" }
 datafusion = { path = "../datafusion" }
 prost = "0.7"
 tonic = "0.4"
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index d7540db..5f743f6 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -51,8 +51,8 @@ unicode_expressions = ["unicode-segmentation"]
 [dependencies]
 ahash = "0.7"
 hashbrown = "0.11"
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014", features = ["prettyprint"] }
-parquet = { git = "https://github.com/apache/arrow-rs;, rev = 
"ed00e4d4a160cd5182bfafb81fee2240ec005014", features = ["arrow"] }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576", features = ["prettyprint"] }
+parquet = { git = "https://github.com/apache/arrow-rs;, rev = 
"d008f31b107c1030a1f5144c164e8ca8bf543576", features = ["arrow"] }
 sqlparser = "0.9.0"
 clap = "2.33"
 rustyline = {version = "7.0", optional = true}


[arrow-datafusion] branch master updated: Remove hard coded ballista versions. Fixes #32 (#49)

2021-04-24 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new fa99979  Remove hard coded ballista versions. Fixes #32 (#49)
fa99979 is described below

commit fa999799b65fc22abfada36955b0b0cc3ebf4c4f
Author: sathis 
AuthorDate: Sun Apr 25 10:35:52 2021 +0530

Remove hard coded ballista versions. Fixes #32 (#49)

Co-authored-by: Sathis Kumar 
---
 benchmarks/tpch-gen.sh   | 3 +--
 dev/build-rust-base.sh   | 3 ++-
 dev/build-rust.sh| 3 +--
 dev/{build-rust-base.sh => build-set-env.sh} | 5 ++---
 dev/build-ui.sh  | 3 +--
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/benchmarks/tpch-gen.sh b/benchmarks/tpch-gen.sh
index fef3480..3cef3bd 100755
--- a/benchmarks/tpch-gen.sh
+++ b/benchmarks/tpch-gen.sh
@@ -16,10 +16,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-BALLISTA_VERSION=0.5.0-SNAPSHOT
-
 #set -e
 
+. ./dev/build-set-env.sh
 docker build -t ballistacompute/ballista-tpchgen:$BALLISTA_VERSION -f 
tpchgen.dockerfile .
 
 # Generate data into the ./data directory if it does not already exist
diff --git a/dev/build-rust-base.sh b/dev/build-rust-base.sh
index 1bedbd8..f2a4cc3 100755
--- a/dev/build-rust-base.sh
+++ b/dev/build-rust-base.sh
@@ -16,6 +16,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-BALLISTA_VERSION=0.5.0-SNAPSHOT
 set -e
+
+. ./dev/build-set-env.sh
 docker build -t ballistacompute/rust-base:$BALLISTA_VERSION -f 
dev/docker/rust-base.dockerfile .
diff --git a/dev/build-rust.sh b/dev/build-rust.sh
index 5777d1e..479cb2a 100755
--- a/dev/build-rust.sh
+++ b/dev/build-rust.sh
@@ -17,8 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-BALLISTA_VERSION=0.5.0-SNAPSHOT
-
 set -e
 
+. ./dev/build-set-env.sh
 docker build -t ballistacompute/ballista-rust:$BALLISTA_VERSION -f 
dev/docker/rust.dockerfile .
diff --git a/dev/build-rust-base.sh b/dev/build-set-env.sh
similarity index 85%
copy from dev/build-rust-base.sh
copy to dev/build-set-env.sh
index 1bedbd8..3eb29e7 100755
--- a/dev/build-rust-base.sh
+++ b/dev/build-set-env.sh
@@ -16,6 +16,5 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-BALLISTA_VERSION=0.5.0-SNAPSHOT
-set -e
-docker build -t ballistacompute/rust-base:$BALLISTA_VERSION -f 
dev/docker/rust-base.dockerfile .
+
+export BALLISTA_VERSION=$(awk -F'[ ="]+' '$1 == "version" { print $2 }' 
ballista/rust/core/Cargo.toml)
diff --git a/dev/build-ui.sh b/dev/build-ui.sh
index d39d610..bb5bff3 100755
--- a/dev/build-ui.sh
+++ b/dev/build-ui.sh
@@ -17,8 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-
 set -e
 
+. ./dev/build-set-env.sh
 docker build -t ballistacompute/ballista-scheduler-ui:$BALLISTA_VERSION -f 
dev/docker/ui.scheduler.dockerfile ballista/ui/scheduler


[arrow-rs] branch master updated: Support auto-vectorization for min/max using multiversion (#9)

2021-04-23 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 463f88f  Support auto-vectorization for min/max using multiversion (#9)
463f88f is described below

commit 463f88f5553a51a0498658bdc5a0ba13a2e24eda
Author: Daniël Heres 
AuthorDate: Fri Apr 23 09:59:34 2021 +0200

Support auto-vectorization for min/max using multiversion (#9)
---
 arrow/Cargo.toml   | 1 +
 arrow/src/compute/kernels/aggregate.rs | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 2346ced..f23217b 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -51,6 +51,7 @@ flatbuffers = "^0.8"
 hex = "0.4"
 prettytable-rs = { version = "0.8.0", optional = true }
 lexical-core = "^0.7"
+multiversion = "0.6.1"
 
 [features]
 default = []
diff --git a/arrow/src/compute/kernels/aggregate.rs 
b/arrow/src/compute/kernels/aggregate.rs
index d0e3f22..a01b29e 100644
--- a/arrow/src/compute/kernels/aggregate.rs
+++ b/arrow/src/compute/kernels/aggregate.rs
@@ -17,6 +17,7 @@
 
 //! Defines aggregations over Arrow arrays.
 
+use multiversion::multiversion;
 use std::ops::Add;
 
 use crate::array::{
@@ -103,6 +104,8 @@ pub fn min_string(
 }
 
 /// Helper function to perform min/max lambda function on values from a 
numeric array.
+#[multiversion]
+#[clone(target = "x86_64+avx")]
 fn min_max_helper(array: , cmp: F) -> Option
 where
 T: ArrowNumericType,


[arrow-rs] branch master updated: Add GitHub templates (#17)

2021-04-22 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 861b572  Add GitHub templates (#17)
861b572 is described below

commit 861b5723cff4bed0a03dda6102fe02d7c09618a5
Author: Andy Grove 
AuthorDate: Thu Apr 22 10:08:23 2021 -0600

Add GitHub templates (#17)
---
 .github/ISSUE_TEMPLATE/bug_report.md  | 20 
 .github/ISSUE_TEMPLATE/feature_request.md | 21 +
 .github/pull_request_template.md  | 19 +++
 dev/release/rat_exclude_files.txt |  3 ++-
 4 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md 
b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 000..5600dab
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,20 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md 
b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 000..d9883dd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,21 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem or challenge? Please describe 
what you are trying to do.**
+A clear and concise description of what the problem is. Ex. I'm always 
frustrated when [...] 
+(This section helps Arrow developers understand the context and *why* for this 
feature, in addition to  the *what*)
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features 
you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 000..5da0d08
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,19 @@
+# Which issue does this PR close?
+
+We generally require a GitHub issue to be filed for all bug fixes and 
enhancements and this helps us generate change logs for our releases. You can 
link an issue to this PR using the GitHub syntax. For example `Closes #123` 
indicates that this PR will close issue #123.
+
+Closes #.
+
+ # Rationale for this change
+ Why are you proposing this change? If this is already explained clearly in 
the issue then this section is not needed.
+ Explaining clearly why changes are proposed helps reviewers understand your 
changes and offer better suggestions for fixes.  
+
+# What changes are included in this PR?
+
+There is no need to duplicate the description in the issue here but it is 
sometimes worth providing a summary of the individual changes in this PR.
+
+# Are there any user-facing changes?
+
+If there are user-facing changes then we may require documentation to be 
updated before approving the PR.
+
+If there are any breaking changes to public APIs, please add the `breaking 
change` label.
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 58f937f..f3eb273 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -7,7 +7,8 @@
 *.csv
 *.json
 *.snap
-.github/ISSUE_TEMPLATE/question.md
+.github/ISSUE_TEMPLATE/*.md
+.github/pull_request_template.md
 ci/etc/rprofile
 ci/etc/*.patch
 ci/vcpkg/*.patch


[arrow-rs] branch master updated: Buffer::from_slice_ref set correct capacity (#18)

2021-04-22 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new c3fe3ba  Buffer::from_slice_ref set correct capacity (#18)
c3fe3ba is described below

commit c3fe3bab9905739fdda75301dab07a18c91731bd
Author: Raphael Taylor-Davies <1781103+tustv...@users.noreply.github.com>
AuthorDate: Thu Apr 22 14:09:59 2021 +0100

Buffer::from_slice_ref set correct capacity (#18)

Fixed ARROW-12504
---
 arrow/src/buffer/immutable.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arrow/src/buffer/immutable.rs b/arrow/src/buffer/immutable.rs
index cd6a2a3..f4aeae9 100644
--- a/arrow/src/buffer/immutable.rs
+++ b/arrow/src/buffer/immutable.rs
@@ -55,8 +55,8 @@ impl Buffer {
 /// Initializes a [Buffer] from a slice of items.
 pub fn from_slice_ref>(items: ) -> 
Self {
 let slice = items.as_ref();
-let len = slice.len();
-let mut buffer = MutableBuffer::with_capacity(len);
+let capacity = slice.len() * std::mem::size_of::();
+let mut buffer = MutableBuffer::with_capacity(capacity);
 buffer.extend_from_slice(slice);
 buffer.into()
 }


[arrow-rs] branch master updated: Added rebase-needed bot (#13)

2021-04-22 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new a0d5e11  Added rebase-needed bot (#13)
a0d5e11 is described below

commit a0d5e11d2938312ae5856be8cba8f9ae798cab03
Author: Jorge Leitao 
AuthorDate: Thu Apr 22 15:57:26 2021 +0200

Added rebase-needed bot (#13)
---
 .github/workflows/dev_pr.yml | 26 ++
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 89bc776..c37cd4f 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -18,15 +18,7 @@
 name: Dev PR
 
 on:
-  # TODO: Enable this when eps1lon/actions-label-merge-conflict is available.
-  # push:
-  #   branches:
-  # - master
-  pull_request_target:
-types:
-  - opened
-  - edited
-  - synchronize
+  pull_request:
 
 jobs:
   process:
@@ -46,14 +38,8 @@ jobs:
   configuration-path: .github/workflows/dev_pr/labeler.yml
   sync-labels: true
 
-  # TODO: Enable this when eps1lon/actions-label-merge-conflict is 
available.
-  # - name: Checks if PR needs rebase
-  #   if: |
-  # github.event_name == 'push' ||
-  # (github.event_name == 'pull_request_target' &&
-  #(github.event.action == 'opened' ||
-  # github.event.action == 'synchronize'))
-  #   uses: eps1lon/actions-label-merge-conflict@releases/2.x
-  #   with:
-  # dirtyLabel: "needs-rebase"
-  # repoToken: "${{ secrets.GITHUB_TOKEN }}"
+  - name: Checks if PR needs rebase
+uses: eps1lon/actions-label-merge-conflict@releases/2.x
+with:
+  dirtyLabel: "needs-rebase"
+  repoToken: "${{ secrets.GITHUB_TOKEN }}"


[arrow-datafusion] branch master updated: Fix some typos (#31)

2021-04-22 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 395d9d6  Fix some typos (#31)
395d9d6 is described below

commit 395d9d665800c7ea788c991b5847db828b4d88d6
Author: Yichen Wang <18348405+a...@users.noreply.github.com>
AuthorDate: Thu Apr 22 21:12:08 2021 +0800

Fix some typos (#31)
---
 datafusion/src/optimizer/constant_folding.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/src/optimizer/constant_folding.rs 
b/datafusion/src/optimizer/constant_folding.rs
index 2fa03eb..d63177b 100644
--- a/datafusion/src/optimizer/constant_folding.rs
+++ b/datafusion/src/optimizer/constant_folding.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Boolean comparision rule rewrites redudant comparison expression involing 
boolean literal into
+//! Boolean comparison rule rewrites redundant comparison expression involving 
boolean literal into
 //! unary expression.
 
 use std::sync::Arc;
@@ -30,7 +30,7 @@ use crate::scalar::ScalarValue;
 
 /// Optimizer that simplifies comparison expressions involving boolean 
literals.
 ///
-/// Recursively go through all expressionss and simplify the following cases:
+/// Recursively go through all expressions and simplify the following cases:
 /// * `expr = true` and `expr != false` to `expr` when `expr` is of boolean 
type
 /// * `expr = false` and `expr != true` to `!expr` when `expr` is of boolean 
type
 /// * `true = true` and `false = false` to `true`
@@ -253,7 +253,7 @@ mod tests {
 }
 
 #[test]
-fn optimize_expr_null_comparision() -> Result<()> {
+fn optimize_expr_null_comparison() -> Result<()> {
 let schema = expr_test_schema();
 let mut rewriter = ConstantRewriter {
 schemas: vec![],


[arrow-rs] branch master updated: Update URLs (#14)

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 8c1063c  Update URLs (#14)
8c1063c is described below

commit 8c1063c7b5e0c308ff5bcc4ba0283a2f0d67eeec
Author: Daniël Heres 
AuthorDate: Wed Apr 21 20:19:58 2021 +0200

Update URLs (#14)
---
 arrow-flight/Cargo.toml  | 4 ++--
 arrow-pyarrow-integration-testing/Cargo.toml | 4 ++--
 arrow/Cargo.toml | 4 ++--
 integration-testing/Cargo.toml   | 4 ++--
 parquet/Cargo.toml   | 4 ++--
 parquet_derive/Cargo.toml| 4 ++--
 parquet_derive_test/Cargo.toml   | 4 ++--
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
index c607b56..ba92f4f 100644
--- a/arrow-flight/Cargo.toml
+++ b/arrow-flight/Cargo.toml
@@ -21,8 +21,8 @@ description = "Apache Arrow Flight"
 version = "4.0.0-SNAPSHOT"
 edition = "2018"
 authors = ["Apache Arrow "]
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 license = "Apache-2.0"
 
 [dependencies]
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml 
b/arrow-pyarrow-integration-testing/Cargo.toml
index ef356b0..d9ccc58 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -19,8 +19,8 @@
 name = "arrow-pyarrow-integration-testing"
 description = ""
 version = "4.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 authors = ["Apache Arrow "]
 license = "Apache-2.0"
 keywords = [ "arrow" ]
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 5ab1f8c..2346ced 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -19,8 +19,8 @@
 name = "arrow"
 version = "4.0.0-SNAPSHOT"
 description = "Rust implementation of Apache Arrow"
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 authors = ["Apache Arrow "]
 license = "Apache-2.0"
 keywords = [ "arrow" ]
diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml
index 12564c7..7580a8c 100644
--- a/integration-testing/Cargo.toml
+++ b/integration-testing/Cargo.toml
@@ -19,8 +19,8 @@
 name = "arrow-integration-testing"
 description = "Binaries used in the Arrow integration tests"
 version = "4.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 authors = ["Apache Arrow "]
 license = "Apache-2.0"
 edition = "2018"
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index e171196..bda23ef 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -20,8 +20,8 @@ name = "parquet"
 version = "4.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Apache Parquet implementation in Rust"
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 authors = ["Apache Arrow "]
 keywords = [ "arrow", "parquet", "hadoop" ]
 readme = "README.md"
diff --git a/parquet_derive/Cargo.toml b/parquet_derive/Cargo.toml
index 6bed07d..732957d 100644
--- a/parquet_derive/Cargo.toml
+++ b/parquet_derive/Cargo.toml
@@ -20,8 +20,8 @@ name = "parquet_derive"
 version = "4.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Derive macros for the Rust implementation of Apache Parquet"
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 authors = ["Apache Arrow "]
 keywords = [ "parquet" ]
 readme = "README.md"
diff --git a/parquet_derive_test/Cargo.toml b/parquet_derive_test/Cargo.toml
index 5914d7e..0a4c2de 100644
--- a/parquet_derive_test/Cargo.toml
+++ b/parquet_derive_test/Cargo.toml
@@ -20,8 +20,8 @@ name = "parquet_derive_test"
 version = "4.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Integration test package for parquet-derive"
-homepage = "https://github.com/apache/arrow;
-repository = "https://github.com/apache/arrow;
+homepage = "https://github.com/apache/arrow-rs;
+repository = "https://github.com/apache/arrow-rs;
 authors = ["Apache Arrow "]
 keywords = [ "parquet" ]
 edition = "2018"


[arrow-rs] 04/05: Allow creating issues. (#6)

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 5918670a498a6514866e97b5d8ff75b494ef0ede
Author: Jorge Leitao 
AuthorDate: Mon Apr 19 17:05:17 2021 +0200

Allow creating issues. (#6)
---
 .github/.dir-locals.el | 19 --
 .github/CONTRIBUTING.md| 72 --
 .github/ISSUE_TEMPLATE/config.yml  | 22 
 .github/ISSUE_TEMPLATE/question.md | 26 --
 4 files changed, 139 deletions(-)

diff --git a/.github/.dir-locals.el b/.github/.dir-locals.el
deleted file mode 100644
index a880e4a..000
--- a/.github/.dir-locals.el
+++ /dev/null
@@ -1,19 +0,0 @@
-;;; Licensed to the Apache Software Foundation (ASF) under one
-;;; or more contributor license agreements.  See the NOTICE file
-;;; distributed with this work for additional information
-;;; regarding copyright ownership.  The ASF licenses this file
-;;; to you under the Apache License, Version 2.0 (the
-;;; "License"); you may not use this file except in compliance
-;;; with the License.  You may obtain a copy of the License at
-;;;
-;;;   http://www.apache.org/licenses/LICENSE-2.0
-;;;
-;;; Unless required by applicable law or agreed to in writing,
-;;; software distributed under the License is distributed on an
-;;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-;;; KIND, either express or implied.  See the License for the
-;;; specific language governing permissions and limitations
-;;; under the License.
-
-((js-mode . ((indent-tabs-mode . nil)
- (js-indent-level . 2
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
deleted file mode 100644
index bbabe35..000
--- a/.github/CONTRIBUTING.md
+++ /dev/null
@@ -1,72 +0,0 @@
-
-
-## Contributing to Apache Arrow
-
-There are many ways to contribute to Apache Arrow:
-
-* Contributing code (we call them "patches")
-* Writing documentation (another form of code, in a way)
-* Participating in discussions on JIRA or the mailing list
-* Helping users of the libraries
-
-## Reporting bugs and asking questions
-
-We support GitHub issues as a lightweight way to ask questions and engage with
-the Arrow developer community. We use [JIRA][3] for maintaining a queue of
-development work and as the public record for work on the project. So, feel
-free to open GitHub issues, but bugs and feature requests will eventually need
-to end up in JIRA, either before or after completing a pull request.
-
-## How to contribute patches
-
-We prefer to receive contributions in the form of GitHub pull requests. Please
-send pull requests against the [github.com/apache/arrow][4] repository 
following
-the procedure below.
-
-If you are looking for some ideas on what to contribute, check out the [JIRA
-issues][3] for the Apache Arrow project. Comment on the issue and/or contact
-[d...@arrow.apache.org](https://lists.apache.org/list.html?d...@arrow.apache.org)
-with your questions and ideas.
-
-If you’d like to report a bug but don’t have time to fix it, you can still post
-it on JIRA, or email the mailing list
-[d...@arrow.apache.org](https://lists.apache.org/list.html?d...@arrow.apache.org)
-
-To contribute a patch:
-
-1. Break your work into small, single-purpose patches if possible. It’s much
-harder to merge in a large change with a lot of disjoint features.
-2. If one doesn't already exist, create a JIRA for your patch on the [Arrow 
Project
-JIRA](https://issues.apache.org/jira/browse/ARROW).
-3. Submit the patch as a GitHub pull request against the master branch. For a
-tutorial, see the GitHub guides on [forking a 
repo](https://help.github.com/en/articles/fork-a-repo)
-and [sending a pull 
request](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork).
 So that your pull request syncs with the JIRA issue, prefix your pull request
-name with the JIRA issue id (ex: [ARROW-767: [C++] Filesystem 
abstraction](https://github.com/apache/arrow/pull/4225))
-4. Make sure that your code passes the unit tests. You can find instructions
-how to run the unit tests for each Arrow component in its respective README
-file.
-5. Add new unit tests for your code.
-
-Thank you in advance for your contributions!
-
-[1]: mailto:dev-subscr...@arrow.apache.org
-[2]: https://github.com/apache/arrow/tree/master/format
-[3]: https://issues.apache.org/jira/browse/ARROW
-[4]: https://github.com/apache/arrow
diff --git a/.github/ISSUE_TEMPLATE/config.yml 
b/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index 5a05012..000
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this f

[arrow-rs] 03/05: Removed bot comment about title and JIRA. (#4)

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 08812b915b8a53197754bfe80e7ccbaed2d3fd39
Author: Jorge Leitao 
AuthorDate: Mon Apr 19 08:02:59 2021 +0200

Removed bot comment about title and JIRA. (#4)
---
 .github/workflows/dev_pr.yml| 24 
 .github/workflows/dev_pr/link.js| 69 -
 .github/workflows/dev_pr/title_check.js | 56 --
 .github/workflows/dev_pr/title_check.md | 37 --
 4 files changed, 186 deletions(-)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 7b92b89..89bc776 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -35,30 +35,6 @@ jobs:
 steps:
   - uses: actions/checkout@v2
 
-  - name: Comment JIRA link
-if: |
-  github.event_name == 'pull_request_target' &&
-(github.event.action == 'opened' ||
- github.event.action == 'edited')
-uses: actions/github-script@v3
-with:
-  github-token: ${{ secrets.GITHUB_TOKEN }}
-  script: |
-const script = 
require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/dev_pr/link.js`);
-script({github, context});
-
-  - name: Check title
-if: |
-  github.event_name == 'pull_request_target' &&
-(github.event.action == 'opened' ||
- github.event.action == 'edited')
-uses: actions/github-script@v3
-with:
-  github-token: ${{ secrets.GITHUB_TOKEN }}
-  script: |
-const script = 
require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/dev_pr/title_check.js`);
-script({github, context});
-
   - name: Assign GitHub labels
 if: |
   github.event_name == 'pull_request_target' &&
diff --git a/.github/workflows/dev_pr/link.js b/.github/workflows/dev_pr/link.js
deleted file mode 100644
index 550a9cd..000
--- a/.github/workflows/dev_pr/link.js
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-function detectJIRAID(title) {
-  if (!title) {
-return null;
-  }
-  const matched = /^(WIP:?\s*)?((ARROW|PARQUET)-\d+)/.exec(title);
-  if (!matched) {
-return null;
-  }
-  return matched[2];
-}
-
-async function haveComment(github, context, pullRequestNumber, body) {
-  const options = {
-owner: context.repo.owner,
-repo: context.repo.repo,
-issue_number: pullRequestNumber,
-page: 1
-  };
-  while (true) {
-const response = await github.issues.listComments(options);
-if (response.data.some(comment => comment.body === body)) {
-  return true;
-}
-if (!/;\s*rel="next"/.test(response.headers.link || "")) {
-  break;
-}
-options.page++;
-  }
-  return false;
-}
-
-async function commentJIRAURL(github, context, pullRequestNumber, jiraID) {
-  const jiraURL = `https://issues.apache.org/jira/browse/${jiraID}`;
-  if (await haveComment(github, context, pullRequestNumber, jiraURL)) {
-return;
-  }
-  await github.issues.createComment({
-owner: context.repo.owner,
-repo: context.repo.repo,
-issue_number: pullRequestNumber,
-body: jiraURL
-  });
-}
-
-module.exports = async ({github, context}) => {
-  const pullRequestNumber = context.payload.number;
-  const title = context.payload.pull_request.title;
-  const jiraID = detectJIRAID(title);
-  if (jiraID) {
-await commentJIRAURL(github, context, pullRequestNumber, jiraID);
-  }
-};
diff --git a/.github/workflows/dev_pr/title_check.js 
b/.github/workflows/dev_pr/title_check.js
deleted file mode 100644
index c1ebd9d..000
--- a/.github/workflows/dev_pr/title_check.js
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache Lice

[arrow-rs] 01/05: Fixed ident error.

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 72eda5a255182455651cd45a20ed847756a2a9aa
Author: Jorge C. Leitao 
AuthorDate: Tue Apr 20 16:06:23 2021 +

Fixed ident error.
---
 .asf.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.asf.yaml b/.asf.yaml
index addd7dd..646bdac 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -22,7 +22,7 @@ notifications:
   jira_options: link label worklog
 github:
   description: "Official Rust implementation of Apache Arrow"
-homepage: https://arrow.apache.org/
+  homepage: https://arrow.apache.org/
   enabled_merge_buttons:
 squash: true
 merge: false


[arrow-rs] 02/05: Made CI run on any change. (#5)

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 8707fd2b2d17b17bd3e79be0255a18ffaea6914a
Author: Jorge Leitao 
AuthorDate: Mon Apr 19 08:01:42 2021 +0200

Made CI run on any change. (#5)
---
 .github/workflows/rust.yml | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index d19dd93..53f9ac2 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -18,16 +18,9 @@
 name: Rust
 
 on:
+  # always trigger
   push:
-paths:
-  - '.github/workflows/rust.yml'
-  - 'rust/**'
-  - 'format/Flight.proto'
   pull_request:
-paths:
-  - '.github/workflows/rust.yml'
-  - 'rust/**'
-  - 'format/Flight.proto'
 
 jobs:
 


[arrow-rs] 05/05: Fixed labeler. (#8)

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 3d7cefb41db55e7b3d0735a12b4cd2945a16f965
Author: Jorge Leitao 
AuthorDate: Tue Apr 20 13:20:25 2021 +0200

Fixed labeler. (#8)
---
 .github/workflows/dev_pr/labeler.yml | 53 
 1 file changed, 5 insertions(+), 48 deletions(-)

diff --git a/.github/workflows/dev_pr/labeler.yml 
b/.github/workflows/dev_pr/labeler.yml
index 65ca3f2..63b4154 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -15,54 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"lang-c++":
-  - cpp/**/*
+arrow:
+  - arrow/**/*
 
-lang-c-glib:
-  - c_glib/**/*
-
-lang-csharp:
-  - csharp/**/*
-
-lang-go:
-  - go/**/*
-
-lang-java:
-  - java/**/*
-
-lang-js:
-  - js/**/*
-
-lang-julia:
-  - julia/**/*
-
-lang-python:
-  - python/**/*
-
-lang-R:
-  - r/**/*
-
-lang-ruby:
-  - ruby/**/*
-
-lang-rust:
-  - rust/**/*
-
-flight:
-  - cpp/src/arrow/flight/**/*
-  - r/R/flight.*
-  - rust/arrow-flight/**/*
-  - python/pyarrow/*flight.*
-
-gandiva:
-  - c_glib/gandiva-glib/**/*
-  - cpp/src/gandiva/**/*
-  - ruby/red-gandiva/**/*
-  - python/pyarrow/gandiva.*
+arrow-flight:
+  - arrow-flight/**/*
 
 parquet:
-  - c_glib/parquet-glib/**/*
-  - cpp/src/parquet/**/*
-  - r/R/parquet.*
-  - ruby/red-parquet/**/*
-  - rust/parquet*/**/*
+  - parquet/**/*


[arrow-rs] branch master updated (5479e19 -> 3d7cefb)

2021-04-21 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


omit 5479e19  Fixed labeler. (#8)
omit 5b3298a  Allow creating issues. (#6)
omit 9d22485  Removed bot comment about title and JIRA. (#4)
omit 87df452  Made CI run on any change. (#5)
omit 1a2d445  Merge pull request #3 from andygrove/fix-github-settings
omit 2f8bf1b  fix indent
 new 72eda5a  Fixed ident error.
 new 8707fd2  Made CI run on any change. (#5)
 new 08812b9  Removed bot comment about title and JIRA. (#4)
 new 5918670  Allow creating issues. (#6)
 new 3d7cefb  Fixed labeler. (#8)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (5479e19)
\
 N -- N -- N   refs/heads/master (3d7cefb)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


[arrow-datafusion] branch master updated: [Ballista] Fix Ballista IT (#5)

2021-04-19 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new 8345b5e  [Ballista] Fix Ballista IT (#5)
8345b5e is described below

commit 8345b5e33ae3bae05cf0a998c183a31fd6c2fa71
Author: Andy Grove 
AuthorDate: Mon Apr 19 22:44:10 2021 -0600

[Ballista] Fix Ballista IT (#5)
---
 .dockerignore | 42 ++-
 ballista/.dockerignore| 18 
 ballista/docker/README.md | 29 --
 {ballista/dev => dev}/build-rust-base.sh  |  2 +-
 {ballista/dev => dev}/build-rust.sh   |  2 +-
 {ballista => dev}/docker/rust-base.dockerfile |  2 +-
 {ballista => dev}/docker/rust.dockerfile  | 30 +++
 {ballista/dev => dev}/integration-tests.sh|  3 +-
 8 files changed, 32 insertions(+), 96 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index eb71138..9a64a12 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -21,42 +21,6 @@
 # This setup requires to all of our docker containers have arrow's source
 # as a mounted directory.
 
-# exclude everything
-**
-
-# include explicitly
-!ci/**
-!c_glib/Gemfile
-!dev/archery/requirements*.txt
-!python/requirements*.txt
-!python/manylinux1/**
-!python/manylinux2010/**
-!r/DESCRIPTION
-!ruby/Gemfile
-!ruby/red-arrow/Gemfile
-!ruby/red-arrow/lib/arrow/version.rb
-!ruby/red-arrow/red-arrow.gemspec
-!ruby/red-arrow-cuda/Gemfile
-!ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
-!ruby/red-arrow-cuda/red-arrow-cuda.gemspec
-!ruby/red-gandiva/Gemfile
-!ruby/red-gandiva/lib/gandiva/version.rb
-!ruby/red-gandiva/red-gandiva.gemspec
-!ruby/red-parquet/Gemfile
-!ruby/red-parquet/lib/parquet/version.rb
-!ruby/red-parquet/red-parquet.gemspec
-!ruby/red-plasma/Gemfile
-!ruby/red-plasma/lib/plasma/version.rb
-!ruby/red-plasma/red-plasma.gemspec
-!rust/Cargo.toml
-!rust/benchmarks/Cargo.toml
-!rust/arrow/Cargo.toml
-!rust/arrow/benches
-!rust/arrow-flight/Cargo.toml
-!rust/parquet/Cargo.toml
-!rust/parquet/build.rs
-!rust/parquet_derive/Cargo.toml
-!rust/parquet_derive_test/Cargo.toml
-!rust/datafusion/Cargo.toml
-!rust/datafusion/benches
-!rust/integration-testing/Cargo.toml
+ci
+dev
+**/target/*
diff --git a/ballista/.dockerignore b/ballista/.dockerignore
deleted file mode 100644
index 3cde49e..000
--- a/ballista/.dockerignore
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-rust/**/target
diff --git a/ballista/docker/README.md b/ballista/docker/README.md
deleted file mode 100644
index 8417d04..000
--- a/ballista/docker/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-# Ballista Docker Images
-
-Pre-built docker images are available from [Docker 
Hub](https://hub.docker.com/orgs/ballistacompute/repositories) but here are the 
commands to build the images from source.
-
-Run these commands from the root directory of the project.
-
-```bash
-./dev/build-all.sh
-```
-
diff --git a/ballista/dev/build-rust-base.sh b/dev/build-rust-base.sh
similarity index 96%
rename from ballista/dev/build-rust-base.sh
rename to dev/build-rust-base.sh
index ee4b32c..e424909 100755
--- a/ballista/dev/build-rust-base.sh
+++ b/dev/build-rust-base.sh
@@ -18,4 +18,4 @@
 # under the License.
 BALLISTA_VERSION=0.4.2-SNAPSHOT
 set -e
-docker build -t ballistacompute/rust-base:$BALLISTA_VERSION -f 
docker/rust-base.dockerfile .
+docker build -t ballistacompute/rust-base:$BALLISTA_VERSION -f 
dev/docker/rust-base.dockerfile .
diff --git a/ballista/dev/build-rust.sh b/dev/build-rust.sh
similarity index 96%
rename from ballista/dev/build-rust.sh
rename to dev/build-rust.sh
index 1916f8e..d31c524 100755
--- a/ballista/dev/build-rust.sh
+++ b/dev/build-rust.sh
@@ -21,4 +21,4 @@ BALLISTA_VERSION=0.4.2-SNAPSHOT
 
 set -e
 
-docker build -t ballistacompute/ballista-rust:$BALLISTA_VERSION -f 
docker/rust.dockerfile .
+docker build -t ballistacompute/ballista-rust:$BALLISTA_VERSION -f 
dev/docker/rust.dockerfile .
diff --git a/ballista/docker/rust-base.dockerfile 
b/dev/doc

[arrow-rs] branch master updated: Allow creating issues. (#6)

2021-04-19 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new ae812db  Allow creating issues. (#6)
ae812db is described below

commit ae812dbdf698c90159fa7dda3f8b3111e1c60e02
Author: Jorge Leitao 
AuthorDate: Mon Apr 19 17:05:17 2021 +0200

Allow creating issues. (#6)
---
 .github/.dir-locals.el | 19 --
 .github/CONTRIBUTING.md| 72 --
 .github/ISSUE_TEMPLATE/config.yml  | 22 
 .github/ISSUE_TEMPLATE/question.md | 26 --
 4 files changed, 139 deletions(-)

diff --git a/.github/.dir-locals.el b/.github/.dir-locals.el
deleted file mode 100644
index a880e4a..000
--- a/.github/.dir-locals.el
+++ /dev/null
@@ -1,19 +0,0 @@
-;;; Licensed to the Apache Software Foundation (ASF) under one
-;;; or more contributor license agreements.  See the NOTICE file
-;;; distributed with this work for additional information
-;;; regarding copyright ownership.  The ASF licenses this file
-;;; to you under the Apache License, Version 2.0 (the
-;;; "License"); you may not use this file except in compliance
-;;; with the License.  You may obtain a copy of the License at
-;;;
-;;;   http://www.apache.org/licenses/LICENSE-2.0
-;;;
-;;; Unless required by applicable law or agreed to in writing,
-;;; software distributed under the License is distributed on an
-;;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-;;; KIND, either express or implied.  See the License for the
-;;; specific language governing permissions and limitations
-;;; under the License.
-
-((js-mode . ((indent-tabs-mode . nil)
- (js-indent-level . 2
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
deleted file mode 100644
index bbabe35..000
--- a/.github/CONTRIBUTING.md
+++ /dev/null
@@ -1,72 +0,0 @@
-
-
-## Contributing to Apache Arrow
-
-There are many ways to contribute to Apache Arrow:
-
-* Contributing code (we call them "patches")
-* Writing documentation (another form of code, in a way)
-* Participating in discussions on JIRA or the mailing list
-* Helping users of the libraries
-
-## Reporting bugs and asking questions
-
-We support GitHub issues as a lightweight way to ask questions and engage with
-the Arrow developer community. We use [JIRA][3] for maintaining a queue of
-development work and as the public record for work on the project. So, feel
-free to open GitHub issues, but bugs and feature requests will eventually need
-to end up in JIRA, either before or after completing a pull request.
-
-## How to contribute patches
-
-We prefer to receive contributions in the form of GitHub pull requests. Please
-send pull requests against the [github.com/apache/arrow][4] repository 
following
-the procedure below.
-
-If you are looking for some ideas on what to contribute, check out the [JIRA
-issues][3] for the Apache Arrow project. Comment on the issue and/or contact
-[d...@arrow.apache.org](https://lists.apache.org/list.html?d...@arrow.apache.org)
-with your questions and ideas.
-
-If you’d like to report a bug but don’t have time to fix it, you can still post
-it on JIRA, or email the mailing list
-[d...@arrow.apache.org](https://lists.apache.org/list.html?d...@arrow.apache.org)
-
-To contribute a patch:
-
-1. Break your work into small, single-purpose patches if possible. It’s much
-harder to merge in a large change with a lot of disjoint features.
-2. If one doesn't already exist, create a JIRA for your patch on the [Arrow 
Project
-JIRA](https://issues.apache.org/jira/browse/ARROW).
-3. Submit the patch as a GitHub pull request against the master branch. For a
-tutorial, see the GitHub guides on [forking a 
repo](https://help.github.com/en/articles/fork-a-repo)
-and [sending a pull 
request](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork).
 So that your pull request syncs with the JIRA issue, prefix your pull request
-name with the JIRA issue id (ex: [ARROW-767: [C++] Filesystem 
abstraction](https://github.com/apache/arrow/pull/4225))
-4. Make sure that your code passes the unit tests. You can find instructions
-how to run the unit tests for each Arrow component in its respective README
-file.
-5. Add new unit tests for your code.
-
-Thank you in advance for your contributions!
-
-[1]: mailto:dev-subscr...@arrow.apache.org
-[2]: https://github.com/apache/arrow/tree/master/format
-[3]: https://issues.apache.org/jira/browse/ARROW
-[4]: https://github.com/apache/arrow
diff --git a/.github/ISSUE_TEMPLATE/config.yml 
b/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index 5a05012..000
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license ag

[arrow-rs] branch master updated: Removed bot comment about title and JIRA. (#4)

2021-04-19 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 3f13806  Removed bot comment about title and JIRA. (#4)
3f13806 is described below

commit 3f1380664251bf5dffdaed6daa6e0d6378e13ac7
Author: Jorge Leitao 
AuthorDate: Mon Apr 19 08:02:59 2021 +0200

Removed bot comment about title and JIRA. (#4)
---
 .github/workflows/dev_pr.yml| 24 
 .github/workflows/dev_pr/link.js| 69 -
 .github/workflows/dev_pr/title_check.js | 56 --
 .github/workflows/dev_pr/title_check.md | 37 --
 4 files changed, 186 deletions(-)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 7b92b89..89bc776 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -35,30 +35,6 @@ jobs:
 steps:
   - uses: actions/checkout@v2
 
-  - name: Comment JIRA link
-if: |
-  github.event_name == 'pull_request_target' &&
-(github.event.action == 'opened' ||
- github.event.action == 'edited')
-uses: actions/github-script@v3
-with:
-  github-token: ${{ secrets.GITHUB_TOKEN }}
-  script: |
-const script = 
require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/dev_pr/link.js`);
-script({github, context});
-
-  - name: Check title
-if: |
-  github.event_name == 'pull_request_target' &&
-(github.event.action == 'opened' ||
- github.event.action == 'edited')
-uses: actions/github-script@v3
-with:
-  github-token: ${{ secrets.GITHUB_TOKEN }}
-  script: |
-const script = 
require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/dev_pr/title_check.js`);
-script({github, context});
-
   - name: Assign GitHub labels
 if: |
   github.event_name == 'pull_request_target' &&
diff --git a/.github/workflows/dev_pr/link.js b/.github/workflows/dev_pr/link.js
deleted file mode 100644
index 550a9cd..000
--- a/.github/workflows/dev_pr/link.js
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-function detectJIRAID(title) {
-  if (!title) {
-return null;
-  }
-  const matched = /^(WIP:?\s*)?((ARROW|PARQUET)-\d+)/.exec(title);
-  if (!matched) {
-return null;
-  }
-  return matched[2];
-}
-
-async function haveComment(github, context, pullRequestNumber, body) {
-  const options = {
-owner: context.repo.owner,
-repo: context.repo.repo,
-issue_number: pullRequestNumber,
-page: 1
-  };
-  while (true) {
-const response = await github.issues.listComments(options);
-if (response.data.some(comment => comment.body === body)) {
-  return true;
-}
-if (!/;\s*rel="next"/.test(response.headers.link || "")) {
-  break;
-}
-options.page++;
-  }
-  return false;
-}
-
-async function commentJIRAURL(github, context, pullRequestNumber, jiraID) {
-  const jiraURL = `https://issues.apache.org/jira/browse/${jiraID}`;
-  if (await haveComment(github, context, pullRequestNumber, jiraURL)) {
-return;
-  }
-  await github.issues.createComment({
-owner: context.repo.owner,
-repo: context.repo.repo,
-issue_number: pullRequestNumber,
-body: jiraURL
-  });
-}
-
-module.exports = async ({github, context}) => {
-  const pullRequestNumber = context.payload.number;
-  const title = context.payload.pull_request.title;
-  const jiraID = detectJIRAID(title);
-  if (jiraID) {
-await commentJIRAURL(github, context, pullRequestNumber, jiraID);
-  }
-};
diff --git a/.github/workflows/dev_pr/title_check.js 
b/.github/workflows/dev_pr/title_check.js
deleted file mode 100644
index c1ebd9d..000
--- a/.github/workflows/dev_pr/title_check.js
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the

[arrow-rs] branch master updated: Made CI run on any change. (#5)

2021-04-19 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 30e3759  Made CI run on any change. (#5)
30e3759 is described below

commit 30e375919e79c2ff08c5b6df217a3e967418c6a2
Author: Jorge Leitao 
AuthorDate: Mon Apr 19 08:01:42 2021 +0200

Made CI run on any change. (#5)
---
 .github/workflows/rust.yml | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index d19dd93..53f9ac2 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -18,16 +18,9 @@
 name: Rust
 
 on:
+  # always trigger
   push:
-paths:
-  - '.github/workflows/rust.yml'
-  - 'rust/**'
-  - 'format/Flight.proto'
   pull_request:
-paths:
-  - '.github/workflows/rust.yml'
-  - 'rust/**'
-  - 'format/Flight.proto'
 
 jobs:
 


[arrow-datafusion] branch master updated: Bumped arrow. (#7)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
 new a07cd6f  Bumped arrow. (#7)
a07cd6f is described below

commit a07cd6ff70ea33886aaa32c41c92d5caf02a6a26
Author: Jorge Leitao 
AuthorDate: Sun Apr 18 21:20:07 2021 +0200

Bumped arrow. (#7)
---
 ballista/rust/benchmarks/tpch/Cargo.toml | 4 ++--
 ballista/rust/client/Cargo.toml  | 2 +-
 ballista/rust/core/Cargo.toml| 4 ++--
 ballista/rust/executor/Cargo.toml| 4 ++--
 ballista/rust/scheduler/Cargo.toml   | 2 +-
 benchmarks/Cargo.toml| 4 ++--
 datafusion-examples/Cargo.toml   | 4 ++--
 datafusion/Cargo.toml| 4 ++--
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/ballista/rust/benchmarks/tpch/Cargo.toml 
b/ballista/rust/benchmarks/tpch/Cargo.toml
index 9a7d651..8d62e20 100644
--- a/ballista/rust/benchmarks/tpch/Cargo.toml
+++ b/ballista/rust/benchmarks/tpch/Cargo.toml
@@ -28,8 +28,8 @@ edition = "2018"
 ballista = { path="../../client" }
 datafusion = { path = "../../../../datafusion" }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
-parquet = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
+parquet = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
 
 env_logger = "0.8"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml
index bf40cdb..6ac8687 100644
--- a/ballista/rust/client/Cargo.toml
+++ b/ballista/rust/client/Cargo.toml
@@ -31,5 +31,5 @@ futures = "0.3"
 log = "0.4"
 tokio = "1.0"
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
 datafusion = { path = "../../../datafusion" }
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index 58e6d22..e9d7682 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -40,8 +40,8 @@ tokio = "1.0"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
 
 datafusion = { path = "../../../datafusion" }
 
diff --git a/ballista/rust/executor/Cargo.toml 
b/ballista/rust/executor/Cargo.toml
index ccf30cf..79ceabe 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -45,8 +45,8 @@ tokio-stream = "0.1"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
 
 datafusion = { path = "../../../datafusion" }
 
diff --git a/ballista/rust/scheduler/Cargo.toml 
b/ballista/rust/scheduler/Cargo.toml
index 197a231..ce8ca09 100644
--- a/ballista/rust/scheduler/Cargo.toml
+++ b/ballista/rust/scheduler/Cargo.toml
@@ -52,7 +52,7 @@ tonic = "0.4"
 tower = { version = "0.4" }
 warp = "0.3"
 
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
 datafusion = { path = "../../../datafusion" }
 
 [dev-dependencies]
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index 7fd8444..66a81be 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -31,8 +31,8 @@ simd = ["datafusion/simd"]
 snmalloc = ["snmalloc-rs"]
 
 [dependencies]
-arrow = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
-parquet = { git = "https://github.com/apache/arrow-rs;, rev = "e023b4c" }
+arrow = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
+parquet = { git = "https://github.com/apache/arrow-rs;, rev = "08a662f" }
 dat

[arrow-rs] 01/01: Removed bot comment about title and JIRA.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch fix-pr
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit a48f27c6da95659e343f78bc4342cd718e3692fd
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 19:07:47 2021 +

Removed bot comment about title and JIRA.
---
 .github/workflows/dev_pr.yml| 24 
 .github/workflows/dev_pr/link.js| 69 -
 .github/workflows/dev_pr/title_check.js | 56 --
 .github/workflows/dev_pr/title_check.md | 37 --
 4 files changed, 186 deletions(-)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 7b92b89..89bc776 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -35,30 +35,6 @@ jobs:
 steps:
   - uses: actions/checkout@v2
 
-  - name: Comment JIRA link
-if: |
-  github.event_name == 'pull_request_target' &&
-(github.event.action == 'opened' ||
- github.event.action == 'edited')
-uses: actions/github-script@v3
-with:
-  github-token: ${{ secrets.GITHUB_TOKEN }}
-  script: |
-const script = 
require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/dev_pr/link.js`);
-script({github, context});
-
-  - name: Check title
-if: |
-  github.event_name == 'pull_request_target' &&
-(github.event.action == 'opened' ||
- github.event.action == 'edited')
-uses: actions/github-script@v3
-with:
-  github-token: ${{ secrets.GITHUB_TOKEN }}
-  script: |
-const script = 
require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/dev_pr/title_check.js`);
-script({github, context});
-
   - name: Assign GitHub labels
 if: |
   github.event_name == 'pull_request_target' &&
diff --git a/.github/workflows/dev_pr/link.js b/.github/workflows/dev_pr/link.js
deleted file mode 100644
index 550a9cd..000
--- a/.github/workflows/dev_pr/link.js
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-function detectJIRAID(title) {
-  if (!title) {
-return null;
-  }
-  const matched = /^(WIP:?\s*)?((ARROW|PARQUET)-\d+)/.exec(title);
-  if (!matched) {
-return null;
-  }
-  return matched[2];
-}
-
-async function haveComment(github, context, pullRequestNumber, body) {
-  const options = {
-owner: context.repo.owner,
-repo: context.repo.repo,
-issue_number: pullRequestNumber,
-page: 1
-  };
-  while (true) {
-const response = await github.issues.listComments(options);
-if (response.data.some(comment => comment.body === body)) {
-  return true;
-}
-if (!/;\s*rel="next"/.test(response.headers.link || "")) {
-  break;
-}
-options.page++;
-  }
-  return false;
-}
-
-async function commentJIRAURL(github, context, pullRequestNumber, jiraID) {
-  const jiraURL = `https://issues.apache.org/jira/browse/${jiraID}`;
-  if (await haveComment(github, context, pullRequestNumber, jiraURL)) {
-return;
-  }
-  await github.issues.createComment({
-owner: context.repo.owner,
-repo: context.repo.repo,
-issue_number: pullRequestNumber,
-body: jiraURL
-  });
-}
-
-module.exports = async ({github, context}) => {
-  const pullRequestNumber = context.payload.number;
-  const title = context.payload.pull_request.title;
-  const jiraID = detectJIRAID(title);
-  if (jiraID) {
-await commentJIRAURL(github, context, pullRequestNumber, jiraID);
-  }
-};
diff --git a/.github/workflows/dev_pr/title_check.js 
b/.github/workflows/dev_pr/title_check.js
deleted file mode 100644
index c1ebd9d..000
--- a/.github/workflows/dev_pr/title_check.js
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache Lice

[arrow-rs] branch fix-pr created (now a48f27c)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch fix-pr
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


  at a48f27c  Removed bot comment about title and JIRA.

This branch includes the following new commits:

 new a48f27c  Removed bot comment about title and JIRA.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-datafusion] branch fix-ci updated (8a995c9 -> 72b2f9b)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch fix-ci
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


 discard 8a995c9  More cleanups.
 add 72b2f9b  More cleanups.

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (8a995c9)
\
 N -- N -- N   refs/heads/fix-ci (72b2f9b)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

No new revisions were added by this update.

Summary of changes:
 .github/workflows/rust.yml  | 4 ++--
 ballista/rust/benchmarks/tpch/README.md | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)


[arrow-datafusion] branch fix-ci updated (eed1b82 -> 8a995c9)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch fix-ci
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


from eed1b82  More cleanups.
 add 8a995c9  More cleanups.

No new revisions were added by this update.

Summary of changes:
 .github/workflows/rust.yml | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)


[arrow-datafusion] branch fix-ci updated: More cleanups.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch fix-ci
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/fix-ci by this push:
 new eed1b82  More cleanups.
eed1b82 is described below

commit eed1b82b1c2ff817129f75533170f8e6148dbf07
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 18:41:41 2021 +

More cleanups.
---
 .gitignore  |   6 ++
 dev/.gitignore  |   1 +
 dev/archery/archery/cli.py  |   5 +-
 dev/archery/archery/utils/source.py |   8 +--
 dev/release/rat_exclude_files.txt   | 111 ++--
 5 files changed, 15 insertions(+), 116 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5397fe3..31bdf49 100644
--- a/.gitignore
+++ b/.gitignore
@@ -85,3 +85,9 @@ cpp/Brewfile.lock.json
 target
 Cargo.lock
 
+rusty-tags.vi
+.history
+.flatbuffers/
+
+.vscode
+venv/*
diff --git a/dev/.gitignore b/dev/.gitignore
index b079293..399c309 100644
--- a/dev/.gitignore
+++ b/dev/.gitignore
@@ -18,3 +18,4 @@
 # Python virtual environments for dev tools
 .venv*/
 
+__pycache__
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index bcaddf1..4bbde75 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -289,8 +289,7 @@ def decorate_lint_command(cmd):
 
 
 @archery.command(short_help="Check Arrow source tree for errors")
-@click.option("--src", metavar="", default=None,
-  callback=validate_arrow_sources,
+@click.option("--src", metavar="", default=".",
   help="Specify Arrow source directory")
 @click.option("--fix", is_flag=True, type=BOOL, default=False,
   help="Toggle fixing the lint errors if the linter supports it.")
@@ -301,6 +300,8 @@ def decorate_lint_command(cmd):
 @decorate_lint_command
 @click.pass_context
 def lint(ctx, src, fix, iwyu_all, **checks):
+src = ArrowSources(src)
+
 if checks.pop('all'):
 # "--all" is given => enable all non-selected checks
 for k, v in checks.items():
diff --git a/dev/archery/archery/utils/source.py 
b/dev/archery/archery/utils/source.py
index d30b4f1..1ae0fe0 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -45,13 +45,7 @@ class ArrowSources:
 --
 path : src
 """
-path = Path(path)
-# validate by checking a specific path in the arrow source tree
-if not (path / 'cpp' / 'CMakeLists.txt').exists():
-raise InvalidArrowSource(
-"No Arrow C++ sources found in {}.".format(path)
-)
-self.path = path
+self.path = Path(path)
 
 @property
 def archery(self):
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 68f5668..ead9c8d 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -12,37 +12,6 @@ ci/etc/rprofile
 ci/etc/*.patch
 ci/vcpkg/*.patch
 CHANGELOG.md
-cpp/CHANGELOG_PARQUET.md
-cpp/src/arrow/io/mman.h
-cpp/src/arrow/util/random.h
-cpp/src/arrow/status.cc
-cpp/src/arrow/status.h
-cpp/src/arrow/vendored/*
-cpp/build-support/asan_symbolize.py
-cpp/build-support/cpplint.py
-cpp/build-support/lint_exclusions.txt
-cpp/build-support/iwyu/*
-cpp/cmake_modules/FindPythonLibsNew.cmake
-cpp/cmake_modules/SnappyCMakeLists.txt
-cpp/cmake_modules/SnappyConfig.h
-cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake
-cpp/src/parquet/.parquetcppversion
-cpp/src/generated/parquet_constants.cpp
-cpp/src/generated/parquet_constants.h
-cpp/src/generated/parquet_types.cpp
-cpp/src/generated/parquet_types.h
-cpp/src/plasma/thirdparty/ae/ae.c
-cpp/src/plasma/thirdparty/ae/ae.h
-cpp/src/plasma/thirdparty/ae/ae_epoll.c
-cpp/src/plasma/thirdparty/ae/ae_evport.c
-cpp/src/plasma/thirdparty/ae/ae_kqueue.c
-cpp/src/plasma/thirdparty/ae/ae_select.c
-cpp/src/plasma/thirdparty/ae/config.h
-cpp/src/plasma/thirdparty/ae/zmalloc.h
-cpp/src/plasma/thirdparty/dlmalloc.c
-cpp/thirdparty/flatbuffers/include/flatbuffers/base.h
-cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h
-cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h
 dev/requirements*.txt
 dev/archery/MANIFEST.in
 dev/archery/requirements*.txt
@@ -115,56 +84,11 @@ dev/tasks/linux-packages/apache-arrow/debian/source/format
 dev/tasks/linux-packages/apache-arrow/debian/watch
 dev/tasks/requirements*.txt
 dev/tasks/conda-recipes/*
-docs/requirements.txt
-go/arrow/flight/Flight_grpc.pb.go
-go/arrow/go.sum
-go/arrow/Gopkg.lock
-go/arrow/flight/Flight.pb.go
-go/arrow/flight/Flight_grpc.pb.go
-go/arrow/internal/cpu/*
-go/arrow/type_string.go
-go/*.tmpldata
-go/*.s
-go/parquet/go.sum
-go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
-go/parq

[arrow-datafusion] branch fix-ci updated: More cleanup.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch fix-ci
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/fix-ci by this push:
 new e0908bc  More cleanup.
e0908bc is described below

commit e0908bcab760c099ff242dc034cdba73d45b26ff
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 18:39:26 2021 +

More cleanup.
---
 .github/workflows/archery.yml|  64 --
 .github/workflows/cpp.yml| 395 ---
 .github/workflows/cpp_cron.yml   | 149 -
 .github/workflows/csharp.yml | 121 ---
 .github/workflows/dev.yml|   4 -
 .github/workflows/dev_pr/labeler.yml |  52 -
 .github/workflows/go.yml | 125 ---
 .github/workflows/java.yml   | 112 --
 .github/workflows/java_jni.yml   |  83 
 .github/workflows/js.yml | 122 ---
 .github/workflows/julia.yml  |  53 -
 .github/workflows/python.yml | 154 --
 .github/workflows/python_cron.yml| 141 -
 .github/workflows/r.yml  | 255 --
 .github/workflows/ruby.yml   | 290 -
 15 files changed, 2120 deletions(-)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
deleted file mode 100644
index 761e045..000
--- a/.github/workflows/archery.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Archery & Crossbow
-
-on:
-  push:
-paths:
-  - '.github/workflows/archery.yml'
-  - 'dev/archery/**'
-  - 'dev/tasks/**'
-  - 'docker-compose.yml'
-  pull_request:
-paths:
-  - '.github/workflows/archery.yml'
-  - 'dev/archery/**'
-  - 'dev/tasks/**'
-  - 'docker-compose.yml'
-
-jobs:
-
-  test:
-if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-name: Archery Unittests and Crossbow Check Config
-runs-on: ubuntu-latest
-steps:
-  - name: Checkout Arrow
-uses: actions/checkout@v2
-with:
-  fetch-depth: 0
-  - name: Git Fixup
-if: ${{ github.event_name == 'pull_request' }}
-shell: bash
-run: git branch master origin/master
-  - name: Free Up Disk Space
-run: ci/scripts/util_cleanup.sh
-  - name: Setup Python
-uses: actions/setup-python@v1
-with:
-  python-version: '3.6'
-  - name: Install Archery, Crossbow- and Test Dependencies
-run: pip install pytest responses -e dev/archery[all]
-  - name: Archery Unittests
-working-directory: dev/archery
-run: pytest -v archery
-  - name: Archery Docker Validation
-run: archery docker
-  - name: Crossbow Check Config
-working-directory: dev/tasks
-run: archery crossbow check-config
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
deleted file mode 100644
index 0bcf346..000
--- a/.github/workflows/cpp.yml
+++ /dev/null
@@ -1,395 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: C++
-
-on:
-  push:
-paths:
-  - '.github/workflows/cpp.yml'
-  - 'ci/docker/**'
-  - 'ci/scripts/cpp_*'
-  - 'ci/scripts/msys2_*'
-  - 'ci/scripts/util_*'
-  - 'cpp/**'
-  - 'format/Flight.proto'
- 

[arrow-datafusion] 01/01: Fixed CI.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch fix-ci
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 21170cb87a9524ec3f828d989452218ff7195596
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 18:30:39 2021 +

Fixed CI.
---
 .github/workflows/cancel.yml  |  77 --
 .github/workflows/dev.yml |  59 +--
 .github/workflows/integration.yml |  83 
 .github/workflows/rust.yml| 204 +++---
 4 files changed, 18 insertions(+), 405 deletions(-)

diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
index de980eb..e1c6ed9 100644
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@@ -31,20 +31,6 @@ jobs:
   # Unfortunately, we need to define a separate cancellation step for
   # each workflow where we want to cancel stale runs.
   - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale C++ runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: cpp.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale C# runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: csharp.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
 name: "Cancel stale Dev runs"
 with:
   cancelMode: allDuplicates
@@ -52,69 +38,6 @@ jobs:
   workflowFileName: dev.yml
   skipEventTypes: '["push", "schedule"]'
   - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Go runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: go.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Integration runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: integration.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Java JNI runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: java_jni.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Java runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: java.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale JS runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: js.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Julia runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: julia.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Python runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: python.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale R runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: r.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Ruby runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: ruby.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
 name: "Cancel stale Rust runs"
 with:
   cancelMode: allDuplicates
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index d1b0184..9d8146a 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -31,66 +31,13 @@ jobs:
   lint:
 name: Lint C++, Pyth

[arrow-datafusion] branch fix-ci created (now 21170cb)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch fix-ci
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


  at 21170cb  Fixed CI.

This branch includes the following new commits:

 new 21170cb  Fixed CI.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-rs] branch master updated: Specify GitHub settings (#2)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 1e4b0e7  Specify GitHub settings (#2)
1e4b0e7 is described below

commit 1e4b0e7fe45e17c30a4c332bb44e9bd279a2d42d
Author: Andy Grove 
AuthorDate: Sun Apr 18 12:22:39 2021 -0600

Specify GitHub settings (#2)
---
 .asf.yaml | 9 +
 1 file changed, 9 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
index 4bd5191..addd7dd 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -20,3 +20,12 @@ notifications:
   issues:   git...@arrow.apache.org
   pullrequests: git...@arrow.apache.org
   jira_options: link label worklog
+github:
+  description: "Official Rust implementation of Apache Arrow"
+homepage: https://arrow.apache.org/
+  enabled_merge_buttons:
+squash: true
+merge: false
+rebase: false
+  features:
+issues: true
\ No newline at end of file


[arrow-rs] 01/01: Disabled merge.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch rebase-only
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 72aa5bbb462da03e7bbc2114505edb821a728a6a
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 18:02:54 2021 +

Disabled merge.
---
 .asf.yaml | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
index 4bd5191..67e7fa5 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -20,3 +20,9 @@ notifications:
   issues:   git...@arrow.apache.org
   pullrequests: git...@arrow.apache.org
   jira_options: link label worklog
+
+github:
+  enabled_merge_buttons:
+squash:  true
+merge:   false
+rebase:  true


[arrow-rs] branch rebase-only created (now 72aa5bb)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch rebase-only
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


  at 72aa5bb  Disabled merge.

This branch includes the following new commits:

 new 72aa5bb  Disabled merge.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-rs] branch master updated (212a001 -> c7bd778)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


from 212a001  Fixed RAT and Rust linting.
 add 3f9e2d9  Fixed CI.
 add ab13ebe  Fiddle paths to submodules.
 add a0ddd29  Fiddle with CI paths
 add 878a8d8  Removed test from datafusion.
 add 7b61b9c  Fiddle paths for testing.
 add 18a3cf2  Trying dev fix.
 add cbf8ed2  Fixed Linting.
 add c3190ee  Removed unused cancel jobs
 add c7bd778  Temporarily removed integration in CI.

No new revisions were added by this update.

Summary of changes:
 .github/workflows/cancel.yml|  70 --
 .github/workflows/dev.yml   |   2 +-
 .github/workflows/integration.yml   |  83 --
 .github/workflows/rust.yml  |  68 +-
 .gitignore  |   3 +
 .pre-commit-config.yaml |   4 +-
 README.md   |   4 +-
 arrow/src/util/test_util.rs |   7 +--
 ci/scripts/python_sdist_test.sh |   2 +-
 ci/scripts/python_test.sh   |   2 +-
 ci/scripts/rust_build.sh|   2 +-
 dev/.gitignore  |   2 +
 dev/archery/archery/cli.py  |   5 +-
 dev/archery/archery/utils/lint.py   |   4 --
 dev/archery/archery/utils/source.py |   8 +--
 dev/release/rat_exclude_files.txt   | 113 ++--
 16 files changed, 52 insertions(+), 327 deletions(-)
 delete mode 100644 .github/workflows/integration.yml


[arrow-rs] branch ci-fix updated: Temporarily removed integration in CI.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new c7bd778  Temporarily removed integration in CI.
c7bd778 is described below

commit c7bd778fd57257eea9ba804e774148caea0cb09d
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 17:48:42 2021 +

Temporarily removed integration in CI.
---
 .github/workflows/integration.yml | 83 ---
 1 file changed, 83 deletions(-)

diff --git a/.github/workflows/integration.yml 
b/.github/workflows/integration.yml
deleted file mode 100644
index 2011255..000
--- a/.github/workflows/integration.yml
+++ /dev/null
@@ -1,83 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Integration
-
-on:
-  push:
-paths:
-  - '.github/workflows/integration.yml'
-  - 'ci/**'
-  - 'dev/archery/**'
-  - 'go/**'
-  - 'integration/**'
-  - 'js/**'
-  - 'cpp/**'
-  - 'java/**'
-  - 'format/**'
-  - 'rust/**'
-  pull_request:
-paths:
-  - '.github/workflows/integration.yml'
-  - 'ci/**'
-  - 'dev/archery/**'
-  - 'go/**'
-  - 'integration/**'
-  - 'js/**'
-  - 'cpp/**'
-  - 'java/**'
-  - 'format/**'
-  - 'rust/**'
-
-env:
-  DOCKER_VOLUME_PREFIX: ".docker/"
-  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-
-jobs:
-
-  docker:
-name: AMD64 Conda Integration Test
-runs-on: ubuntu-latest
-if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-steps:
-  - name: Checkout Arrow
-uses: actions/checkout@v2
-with:
-  fetch-depth: 0
-  - name: Fetch Submodules and Tags
-run: ci/scripts/util_checkout.sh
-  - name: Free Up Disk Space
-run: ci/scripts/util_cleanup.sh
-  - name: Cache Docker Volumes
-uses: actions/cache@v2
-with:
-  path: .docker
-  key: conda-${{ hashFiles('cpp/**') }}
-  restore-keys: conda-
-  - name: Setup Python
-uses: actions/setup-python@v1
-with:
-  python-version: 3.8
-  - name: Setup Archery
-run: pip install -e dev/archery[docker]
-  - name: Execute Docker Build
-run: archery docker run conda-integration
-  - name: Docker Push
-if: success() && github.event_name == 'push' && github.repository == 
'apache/arrow'
-continue-on-error: true
-run: archery docker push conda-integration


[arrow-rs] branch ci-fix updated: Removed unused cancel jobs

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new c3190ee  Removed unused cancel jobs
c3190ee is described below

commit c3190ee6e8712b63b03202c4460ac63b89b414c3
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 17:44:44 2021 +

Removed unused cancel jobs
---
 .github/workflows/cancel.yml | 70 
 1 file changed, 70 deletions(-)

diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
index de980eb..b4fb904 100644
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@@ -31,20 +31,6 @@ jobs:
   # Unfortunately, we need to define a separate cancellation step for
   # each workflow where we want to cancel stale runs.
   - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale C++ runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: cpp.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale C# runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: csharp.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
 name: "Cancel stale Dev runs"
 with:
   cancelMode: allDuplicates
@@ -52,13 +38,6 @@ jobs:
   workflowFileName: dev.yml
   skipEventTypes: '["push", "schedule"]'
   - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Go runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: go.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
 name: "Cancel stale Integration runs"
 with:
   cancelMode: allDuplicates
@@ -66,55 +45,6 @@ jobs:
   workflowFileName: integration.yml
   skipEventTypes: '["push", "schedule"]'
   - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Java JNI runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: java_jni.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Java runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: java.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale JS runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: js.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Julia runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: julia.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Python runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: python.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale R runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: r.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
-name: "Cancel stale Ruby runs"
-with:
-  cancelMode: allDuplicates
-  token: ${{ secrets.GITHUB_TOKEN }}
-  workflowFileName: ruby.yml
-  skipEventTypes: '["push", "schedule"]'
-  - uses: potiuk/cancel-workflow-runs@master
 name: "Cancel stale Rust runs"
 with:
   cancelMode: allDuplicates


[arrow-rs] 01/01: Fixed Linting.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit cbf8ed2b92deaa6c4a7ed1c21abcba52430b8d7f
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 17:35:17 2021 +

Fixed Linting.
---
 .github/workflows/dev.yml   |   2 +-
 .github/workflows/rust.yml  |  14 +
 .gitignore  |   3 +
 arrow/src/util/test_util.rs |   5 +-
 dev/.gitignore  |   2 +
 dev/archery/archery/cli.py  |   4 ++
 dev/archery/archery/utils/lint.py   |   4 --
 dev/archery/archery/utils/source.py |   8 +--
 dev/release/rat_exclude_files.txt   | 113 ++--
 9 files changed, 32 insertions(+), 123 deletions(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 12888a6..9d8146a 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -40,4 +40,4 @@ jobs:
   - name: Setup Archery
 run: pip install -e dev/archery[docker]
   - name: Lint
-run: archery lint --rat --rust
+run: archery lint --rat
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 9ab74f5..d19dd93 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -269,6 +269,20 @@ jobs:
   # Ignore MIRI errors until we can get a clean run
   cargo miri test || true
 
+  lint:
+name: Lint
+runs-on: ubuntu-latest
+container:
+  image: amd64/rust
+steps:
+  - uses: actions/checkout@v2
+  - name: Setup toolchain
+run: |
+  rustup toolchain install stable
+  rustup default stable
+  rustup component add rustfmt
+  - name: Run
+run: cargo fmt --all -- --check
   coverage:
 name: Coverage
 runs-on: ubuntu-latest
diff --git a/.gitignore b/.gitignore
index 389f4ab..5b3bf6c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@ target
 rusty-tags.vi
 .history
 .flatbuffers/
+
+.vscode
+venv/*
diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs
index cfbec03..261b245 100644
--- a/arrow/src/util/test_util.rs
+++ b/arrow/src/util/test_util.rs
@@ -98,10 +98,7 @@ pub fn arrow_test_data() -> String {
 /// assert!(std::path::PathBuf::from(filename).exists());
 /// ```
 pub fn parquet_test_data() -> String {
-match get_data_dir(
-"PARQUET_TEST_DATA",
-"../parquet-testing/data",
-) {
+match get_data_dir("PARQUET_TEST_DATA", "../parquet-testing/data") {
 Ok(pb) => pb.display().to_string(),
 Err(err) => panic!("failed to get parquet data dir: {}", err),
 }
diff --git a/dev/.gitignore b/dev/.gitignore
index b079293..c03a7c7 100644
--- a/dev/.gitignore
+++ b/dev/.gitignore
@@ -18,3 +18,5 @@
 # Python virtual environments for dev tools
 .venv*/
 
+__pycache__
+*.egg-info
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 1dfdaaf..4bbde75 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -289,6 +289,8 @@ def decorate_lint_command(cmd):
 
 
 @archery.command(short_help="Check Arrow source tree for errors")
+@click.option("--src", metavar="", default=".",
+  help="Specify Arrow source directory")
 @click.option("--fix", is_flag=True, type=BOOL, default=False,
   help="Toggle fixing the lint errors if the linter supports it.")
 @click.option("--iwyu_all", is_flag=True, type=BOOL, default=False,
@@ -298,6 +300,8 @@ def decorate_lint_command(cmd):
 @decorate_lint_command
 @click.pass_context
 def lint(ctx, src, fix, iwyu_all, **checks):
+src = ArrowSources(src)
+
 if checks.pop('all'):
 # "--all" is given => enable all non-selected checks
 for k, v in checks.items():
diff --git a/dev/archery/archery/utils/lint.py 
b/dev/archery/archery/utils/lint.py
index 3b94d01..e81d6ac 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -265,10 +265,6 @@ def rat_linter(src, root):
 """Run apache-rat license linter."""
 logger.info("Running apache-rat linter")
 
-if src.git_dirty:
-logger.warn("Due to the usage of git-archive, uncommitted files will"
-" not be checked for rat violations. ")
-
 exclusion = exclusion_from_globs(
 os.path.join(src.dev, "release", "rat_exclude_files.txt"))
 
diff --git a/dev/archery/archery/utils/source.py 
b/dev/archery/archery/utils/source.py
index d30b4f1..1ae0fe0 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -45,13 +45,7 @@ class ArrowSources:
 --
 path : src
 """
-path

[arrow-rs] branch ci-fix updated (445fe62 -> cbf8ed2)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


 discard 445fe62  Fixed Linting.
 new cbf8ed2  Fixed Linting.

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (445fe62)
\
 N -- N -- N   refs/heads/ci-fix (cbf8ed2)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .github/workflows/dev.yml  |  2 +-
 .github/workflows/rust.yml | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)


[arrow-rs] branch ci-fix updated: Fixed Linting.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new 445fe62  Fixed Linting.
445fe62 is described below

commit 445fe6262ab6ac304a68fdad06c7371f3c13c557
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 17:35:17 2021 +

Fixed Linting.
---
 .gitignore  |   3 +
 arrow/src/util/test_util.rs |   5 +-
 dev/.gitignore  |   2 +
 dev/archery/archery/cli.py  |   4 ++
 dev/archery/archery/utils/lint.py   |   4 --
 dev/archery/archery/utils/source.py |   8 +--
 dev/release/rat_exclude_files.txt   | 113 ++--
 7 files changed, 17 insertions(+), 122 deletions(-)

diff --git a/.gitignore b/.gitignore
index 389f4ab..5b3bf6c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@ target
 rusty-tags.vi
 .history
 .flatbuffers/
+
+.vscode
+venv/*
diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs
index cfbec03..261b245 100644
--- a/arrow/src/util/test_util.rs
+++ b/arrow/src/util/test_util.rs
@@ -98,10 +98,7 @@ pub fn arrow_test_data() -> String {
 /// assert!(std::path::PathBuf::from(filename).exists());
 /// ```
 pub fn parquet_test_data() -> String {
-match get_data_dir(
-"PARQUET_TEST_DATA",
-"../parquet-testing/data",
-) {
+match get_data_dir("PARQUET_TEST_DATA", "../parquet-testing/data") {
 Ok(pb) => pb.display().to_string(),
 Err(err) => panic!("failed to get parquet data dir: {}", err),
 }
diff --git a/dev/.gitignore b/dev/.gitignore
index b079293..c03a7c7 100644
--- a/dev/.gitignore
+++ b/dev/.gitignore
@@ -18,3 +18,5 @@
 # Python virtual environments for dev tools
 .venv*/
 
+__pycache__
+*.egg-info
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 1dfdaaf..4bbde75 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -289,6 +289,8 @@ def decorate_lint_command(cmd):
 
 
 @archery.command(short_help="Check Arrow source tree for errors")
+@click.option("--src", metavar="", default=".",
+  help="Specify Arrow source directory")
 @click.option("--fix", is_flag=True, type=BOOL, default=False,
   help="Toggle fixing the lint errors if the linter supports it.")
 @click.option("--iwyu_all", is_flag=True, type=BOOL, default=False,
@@ -298,6 +300,8 @@ def decorate_lint_command(cmd):
 @decorate_lint_command
 @click.pass_context
 def lint(ctx, src, fix, iwyu_all, **checks):
+src = ArrowSources(src)
+
 if checks.pop('all'):
 # "--all" is given => enable all non-selected checks
 for k, v in checks.items():
diff --git a/dev/archery/archery/utils/lint.py 
b/dev/archery/archery/utils/lint.py
index 3b94d01..e81d6ac 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -265,10 +265,6 @@ def rat_linter(src, root):
 """Run apache-rat license linter."""
 logger.info("Running apache-rat linter")
 
-if src.git_dirty:
-logger.warn("Due to the usage of git-archive, uncommitted files will"
-" not be checked for rat violations. ")
-
 exclusion = exclusion_from_globs(
 os.path.join(src.dev, "release", "rat_exclude_files.txt"))
 
diff --git a/dev/archery/archery/utils/source.py 
b/dev/archery/archery/utils/source.py
index d30b4f1..1ae0fe0 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -45,13 +45,7 @@ class ArrowSources:
 --
 path : src
 """
-path = Path(path)
-# validate by checking a specific path in the arrow source tree
-if not (path / 'cpp' / 'CMakeLists.txt').exists():
-raise InvalidArrowSource(
-"No Arrow C++ sources found in {}.".format(path)
-)
-self.path = path
+self.path = Path(path)
 
 @property
 def archery(self):
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 68f5668..58f937f 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -12,37 +12,6 @@ ci/etc/rprofile
 ci/etc/*.patch
 ci/vcpkg/*.patch
 CHANGELOG.md
-cpp/CHANGELOG_PARQUET.md
-cpp/src/arrow/io/mman.h
-cpp/src/arrow/util/random.h
-cpp/src/arrow/status.cc
-cpp/src/arrow/status.h
-cpp/src/arrow/vendored/*
-cpp/build-support/asan_symbolize.py
-cpp/build-support/cpplint.py
-cpp/build-support/lint_exclusions.txt
-cpp/build-support/iwyu/*
-cpp/cmake_modules/FindPythonLibsNew.cmake
-cpp/cmake_modules/SnappyCMakeLists.txt
-cpp/cmake_

[arrow-rs] branch ci-fix updated: Trying dev fix.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new 18a3cf2  Trying dev fix.
18a3cf2 is described below

commit 18a3cf2279f5a72259d422dbeda1b5c9b4aee3b5
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 17:12:30 2021 +

Trying dev fix.
---
 dev/archery/archery/cli.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index bcaddf1..1dfdaaf 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -289,9 +289,6 @@ def decorate_lint_command(cmd):
 
 
 @archery.command(short_help="Check Arrow source tree for errors")
-@click.option("--src", metavar="", default=None,
-  callback=validate_arrow_sources,
-  help="Specify Arrow source directory")
 @click.option("--fix", is_flag=True, type=BOOL, default=False,
   help="Toggle fixing the lint errors if the linter supports it.")
 @click.option("--iwyu_all", is_flag=True, type=BOOL, default=False,


[arrow-rs] 02/05: Fiddle paths to submodules.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit ab13ebeefaf3bc2773ff68b3e14c10004bb94aae
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 15:48:43 2021 +

Fiddle paths to submodules.
---
 .github/workflows/rust.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ab3b8c0..05ac302 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -87,8 +87,8 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
+PARQUET_TEST_DATA: /__w/arrow-rs/arrow-rs/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
 with:


[arrow-rs] 03/05: Fiddle with CI paths

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit a0ddd2995266cc0edeb3b9e0dc0e328b69e9ee56
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 15:58:46 2021 +

Fiddle with CI paths
---
 .github/workflows/rust.yml | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 05ac302..5b539f0 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -144,7 +144,7 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
 steps:
   - uses: actions/checkout@v2
 with:
@@ -195,7 +195,7 @@ jobs:
 shell: bash
 run: |
   export ARROW_TEST_DATA=$(pwd)/testing/data
-  export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
+  export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
   # do not produce debug symbols to keep memory usage down
   export RUSTFLAGS="-C debuginfo=0"
   cargo test
@@ -374,7 +374,7 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
 PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
@@ -418,7 +418,7 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
 PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
 steps:
   - uses: actions/checkout@v2


[arrow-rs] 04/05: Removed test from datafusion.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 878a8d8e9cd8ea301637424a38df98f9dbef1333
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 16:10:02 2021 +

Removed test from datafusion.
---
 .github/workflows/rust.yml | 6 --
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 5b539f0..fd44d2c 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -116,12 +116,6 @@ jobs:
   export CARGO_TARGET_DIR="/github/home/target"
   # run tests on all workspace members with default feature list
   cargo test
-  # test datafusion examples
-  cd datafusion-examples
-  cargo test --no-default-features
-  cargo run --example csv_sql
-  cargo run --example parquet_sql
-  cd ..
   cd arrow
   # re-run tests on arrow workspace with additional features
   cargo test --features=prettyprint


[arrow-rs] 01/05: Fixed CI.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 3f9e2d944f95b0254a55103a3d61132f53205b6e
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 15:06:20 2021 +

Fixed CI.
---
 .github/workflows/rust.yml | 30 --
 .pre-commit-config.yaml|  4 ++--
 2 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 4bb17a2..ab3b8c0 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -70,16 +70,7 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust
   cargo build
-  # Ballista is currently not part of the main workspace so requires a 
separate build step
-  - name: Build Ballista
-run: |
-  export CARGO_HOME="/github/home/.cargo"
-  export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/ballista/rust
-  # snmalloc requires cmake so build without default features
-  cargo build --no-default-features
 
   # test the crate
   linux-test:
@@ -123,7 +114,6 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust
   # run tests on all workspace members with default feature list
   cargo test
   # test datafusion examples
@@ -139,14 +129,6 @@ jobs:
   cargo run --example dynamic_types
   cargo run --example read_csv
   cargo run --example read_csv_infer_schema
-  # Ballista is currently not part of the main workspace so requires a 
separate test step
-  - name: Run Ballista tests
-run: |
-  export CARGO_HOME="/github/home/.cargo"
-  export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/ballista/rust
-  # snmalloc requires cmake so build without default features
-  cargo test --no-default-features
 
   # test the --features "simd" of the arrow crate. This requires nightly.
   linux-test-simd:
@@ -188,7 +170,7 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/arrow
+  cd arrow
   cargo test --features "simd"
 
   windows-and-macos:
@@ -216,7 +198,6 @@ jobs:
   export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
   # do not produce debug symbols to keep memory usage down
   export RUSTFLAGS="-C debuginfo=0"
-  cd rust
   cargo test
 
   clippy:
@@ -258,7 +239,6 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust
   cargo clippy --all-targets --workspace -- -D warnings -A 
clippy::redundant_field_names
 
   miri-checks:
@@ -290,7 +270,6 @@ jobs:
   RUST_LOG: 'trace'
 run: |
   export MIRIFLAGS="-Zmiri-disable-isolation"
-  cd rust
   cargo miri setup
   cargo clean
   # Ignore MIRI errors until we can get a clean run
@@ -330,7 +309,6 @@ jobs:
   # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
   # see https://github.com/xd009642/tarpaulin/issues/618
   cargo install --version 0.16.0 cargo-tarpaulin
-  cd rust
   cargo tarpaulin --out Xml
   - name: Report coverage
 continue-on-error: true
@@ -373,7 +351,7 @@ jobs:
   export CARGO_HOME="/home/runner/.cargo"
   export CARGO_TARGET_DIR="/home/runner/target"
 
-  cd rust/arrow-pyarrow-integration-testing
+  cd arrow-pyarrow-integration-testing
 
   python -m venv venv
   source venv/bin/activate
@@ -423,7 +401,7 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/arrow
+  cd arrow
   cargo build --target wasm32-unknown-unknown
 
   # test the projects can build without default features
@@ -466,5 +444,5 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/arrow
+  cd arrow
   cargo check --all-targets --no-default-features
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9d2d2d8..5331a53 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,8 +32,8 @@ repos:
   - id: rustfmt
 name: Rust Format
 language: system
-

[arrow-rs] 05/05: Fiddle paths for testing.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 7b61b9c4e9ac9a4f6ed1dec63827cf1570d48257
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 16:29:47 2021 +

Fiddle paths for testing.
---
 .github/workflows/rust.yml  | 6 +++---
 README.md   | 4 ++--
 arrow/src/util/test_util.rs | 2 +-
 ci/scripts/python_sdist_test.sh | 2 +-
 ci/scripts/python_test.sh   | 2 +-
 ci/scripts/rust_build.sh| 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index fd44d2c..9ab74f5 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -298,7 +298,7 @@ jobs:
   export CARGO_TARGET_DIR="/home/runner/target"
 
   export ARROW_TEST_DATA=$(pwd)/testing/data
-  export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
+  export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
 
   # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
   # see https://github.com/xd009642/tarpaulin/issues/618
@@ -369,7 +369,7 @@ jobs:
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
 ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
-PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+PARQUET_TEST_DATA: /__w/arrow/arrow/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
 with:
@@ -413,7 +413,7 @@ jobs:
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
 ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
-PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+PARQUET_TEST_DATA: /__w/arrow/arrow/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
 with:
diff --git a/README.md b/README.md
index 7fdef29..574b60b 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ git submodule update --init
 
 This populates data in two git submodules:
 
-- `../cpp/submodules/parquet_testing/data` (sourced from 
https://github.com/apache/parquet-testing.git)
+- `../parquet_testing/data` (sourced from 
https://github.com/apache/parquet-testing.git)
 - `../testing` (sourced from https://github.com/apache/arrow-testing)
 
 By default, `cargo test` will look for these directories at their
@@ -112,7 +112,7 @@ standard location. The following environment variables can 
be used to override t
 
 ```bash
 # Optionaly specify a different location for test data
-export PARQUET_TEST_DATA=$(cd ../cpp/submodules/parquet-testing/data; pwd)
+export PARQUET_TEST_DATA=$(cd ../parquet-testing/data; pwd)
 export ARROW_TEST_DATA=$(cd ../testing/data; pwd)
 ```
 
diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs
index 62e906d..cfbec03 100644
--- a/arrow/src/util/test_util.rs
+++ b/arrow/src/util/test_util.rs
@@ -84,7 +84,7 @@ pub fn arrow_test_data() -> String {
 
 /// Returns the parquest test data directory, which is by default
 /// stored in a git submodule rooted at
-/// `arrow/cpp/submodules/parquest-testing/data`.
+/// `arrow/parquest-testing/data`.
 ///
 /// The default can be overridden by the optional environment variable
 /// `PARQUET_TEST_DATA`
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
index 1388ca0..154c1b3 100755
--- a/ci/scripts/python_sdist_test.sh
+++ b/ci/scripts/python_sdist_test.sh
@@ -23,7 +23,7 @@ arrow_dir=${1}
 
 export ARROW_SOURCE_DIR=${arrow_dir}
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
-export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data
 
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 80a9cde..5af3caf 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -23,7 +23,7 @@ arrow_dir=${1}
 
 export ARROW_SOURCE_DIR=${arrow_dir}
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
-export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data
 export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 
 # Enable some checks inside Python itself
diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh
index 726ecd8..8099d30 100755
--- a/ci/scripts/rust_build.sh
+++ b/ci/scripts/rust_build.sh
@@ -29,7 +29,7 @@ source_dir=${1}/rust
 export RUSTFLAGS="-C debuginfo=1"
 
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
-export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data
 
 # show activated toolchain
 rustup show


[arrow-rs] branch master updated: Fixed RAT and Rust linting.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 212a001  Fixed RAT and Rust linting.
212a001 is described below

commit 212a0016b7b2353bcde9b40fef5404e947a479a5
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 17:00:52 2021 +

Fixed RAT and Rust linting.
---
 .github/workflows/archery.yml | 64 ---
 .github/workflows/dev.yml | 59 ++-
 2 files changed, 3 insertions(+), 120 deletions(-)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
deleted file mode 100644
index 761e045..000
--- a/.github/workflows/archery.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Archery & Crossbow
-
-on:
-  push:
-paths:
-  - '.github/workflows/archery.yml'
-  - 'dev/archery/**'
-  - 'dev/tasks/**'
-  - 'docker-compose.yml'
-  pull_request:
-paths:
-  - '.github/workflows/archery.yml'
-  - 'dev/archery/**'
-  - 'dev/tasks/**'
-  - 'docker-compose.yml'
-
-jobs:
-
-  test:
-if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-name: Archery Unittests and Crossbow Check Config
-runs-on: ubuntu-latest
-steps:
-  - name: Checkout Arrow
-uses: actions/checkout@v2
-with:
-  fetch-depth: 0
-  - name: Git Fixup
-if: ${{ github.event_name == 'pull_request' }}
-shell: bash
-run: git branch master origin/master
-  - name: Free Up Disk Space
-run: ci/scripts/util_cleanup.sh
-  - name: Setup Python
-uses: actions/setup-python@v1
-with:
-  python-version: '3.6'
-  - name: Install Archery, Crossbow- and Test Dependencies
-run: pip install pytest responses -e dev/archery[all]
-  - name: Archery Unittests
-working-directory: dev/archery
-run: pytest -v archery
-  - name: Archery Docker Validation
-run: archery docker
-  - name: Crossbow Check Config
-working-directory: dev/tasks
-run: archery crossbow check-config
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index d1b0184..12888a6 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -31,66 +31,13 @@ jobs:
   lint:
 name: Lint C++, Python, R, Rust, Docker, RAT
 runs-on: ubuntu-latest
-if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
 steps:
-  - name: Checkout Arrow
-uses: actions/checkout@v2
-with:
-  fetch-depth: 0
-  - name: Fetch Submodules and Tags
-run: ci/scripts/util_checkout.sh
-  - name: Free Up Disk Space
-run: ci/scripts/util_cleanup.sh
+  - uses: actions/checkout@v2
   - name: Setup Python
 uses: actions/setup-python@v1
 with:
   python-version: 3.8
   - name: Setup Archery
 run: pip install -e dev/archery[docker]
-  - name: Execute Docker Build
-run: |
-  sudo sysctl -w kernel.core_pattern="core.%e.%p"
-  ulimit -c unlimited
-  archery docker run ubuntu-lint
-  - name: Docker Push
-if: success() && github.event_name == 'push' && github.repository == 
'apache/arrow'
-continue-on-error: true
-run: archery docker push ubuntu-lint
-
-  release:
-name: Source Release and Merge Script
-runs-on: ubuntu-20.04
-if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-env:
-  GIT_AUTHOR_NAME: Github Actions
-  GIT_AUTHOR_EMAIL: github@actions
-  GIT_COMMITTER_NAME: Github Actions
-  GIT_COMMITTER_EMAIL: github@actions
-steps:
-  - name: Checkout Arrow
-uses: actions/checkout@v2
-with:
-  fetch-depth: 0
-  - name: Fetch Submodules and Tags
-shell: bash
-run: ci/scripts/util_checkout.sh
-  - name: Install Python
-uses: actions/setup-python@v1
-with:
-  python-version: '3.6'
-  

[arrow-rs] branch ci-fix updated: Fiddle paths for testing.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new 90c8ff7  Fiddle paths for testing.
90c8ff7 is described below

commit 90c8ff7f35ba77078bd2848144a5e6ab93ffc40f
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 16:29:47 2021 +

Fiddle paths for testing.
---
 .github/workflows/rust.yml  | 6 +++---
 README.md   | 4 ++--
 arrow/src/util/test_util.rs | 2 +-
 ci/scripts/python_sdist_test.sh | 2 +-
 ci/scripts/python_test.sh   | 2 +-
 ci/scripts/rust_build.sh| 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index fd44d2c..9ab74f5 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -298,7 +298,7 @@ jobs:
   export CARGO_TARGET_DIR="/home/runner/target"
 
   export ARROW_TEST_DATA=$(pwd)/testing/data
-  export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
+  export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
 
   # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
   # see https://github.com/xd009642/tarpaulin/issues/618
@@ -369,7 +369,7 @@ jobs:
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
 ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
-PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+PARQUET_TEST_DATA: /__w/arrow/arrow/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
 with:
@@ -413,7 +413,7 @@ jobs:
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
 ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
-PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+PARQUET_TEST_DATA: /__w/arrow/arrow/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
 with:
diff --git a/README.md b/README.md
index 7fdef29..574b60b 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ git submodule update --init
 
 This populates data in two git submodules:
 
-- `../cpp/submodules/parquet_testing/data` (sourced from 
https://github.com/apache/parquet-testing.git)
+- `../parquet_testing/data` (sourced from 
https://github.com/apache/parquet-testing.git)
 - `../testing` (sourced from https://github.com/apache/arrow-testing)
 
 By default, `cargo test` will look for these directories at their
@@ -112,7 +112,7 @@ standard location. The following environment variables can 
be used to override t
 
 ```bash
 # Optionaly specify a different location for test data
-export PARQUET_TEST_DATA=$(cd ../cpp/submodules/parquet-testing/data; pwd)
+export PARQUET_TEST_DATA=$(cd ../parquet-testing/data; pwd)
 export ARROW_TEST_DATA=$(cd ../testing/data; pwd)
 ```
 
diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs
index 62e906d..cfbec03 100644
--- a/arrow/src/util/test_util.rs
+++ b/arrow/src/util/test_util.rs
@@ -84,7 +84,7 @@ pub fn arrow_test_data() -> String {
 
 /// Returns the parquest test data directory, which is by default
 /// stored in a git submodule rooted at
-/// `arrow/cpp/submodules/parquest-testing/data`.
+/// `arrow/parquest-testing/data`.
 ///
 /// The default can be overridden by the optional environment variable
 /// `PARQUET_TEST_DATA`
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
index 1388ca0..154c1b3 100755
--- a/ci/scripts/python_sdist_test.sh
+++ b/ci/scripts/python_sdist_test.sh
@@ -23,7 +23,7 @@ arrow_dir=${1}
 
 export ARROW_SOURCE_DIR=${arrow_dir}
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
-export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data
 
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 80a9cde..5af3caf 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -23,7 +23,7 @@ arrow_dir=${1}
 
 export ARROW_SOURCE_DIR=${arrow_dir}
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
-export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data
 export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 
 # Enable some checks inside Python itself
diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh
index 726ecd8..8099d30 100755
--- a/ci/scripts/rust_build.sh
+++ b/ci/scripts/rust_build.sh
@@ -29,7 +29,7 @@ source_dir=${1}/rust
 export RUSTFLAGS="-C debuginfo=1"
 
 export ARRO

[arrow-rs] branch ci-fix updated: Removed test from datafusion.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new 7a6f479  Removed test from datafusion.
7a6f479 is described below

commit 7a6f479e2f6246674ac3a30fadebcb9249c32060
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 16:10:02 2021 +

Removed test from datafusion.
---
 .github/workflows/rust.yml | 6 --
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 5b539f0..fd44d2c 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -116,12 +116,6 @@ jobs:
   export CARGO_TARGET_DIR="/github/home/target"
   # run tests on all workspace members with default feature list
   cargo test
-  # test datafusion examples
-  cd datafusion-examples
-  cargo test --no-default-features
-  cargo run --example csv_sql
-  cargo run --example parquet_sql
-  cd ..
   cd arrow
   # re-run tests on arrow workspace with additional features
   cargo test --features=prettyprint


[arrow-datafusion] branch test created (now e023b4c)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch test
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


  at e023b4c  Update paths to arrow and parquet test data

No new revisions were added by this update.


[arrow-rs] branch ci-fix updated: Fiddle with CI paths

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new f89c9bf  Fiddle with CI paths
f89c9bf is described below

commit f89c9bf2297846d2aeb63467135bd625f6b099fc
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 15:58:46 2021 +

Fiddle with CI paths
---
 .github/workflows/rust.yml | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 05ac302..5b539f0 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -144,7 +144,7 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
 steps:
   - uses: actions/checkout@v2
 with:
@@ -195,7 +195,7 @@ jobs:
 shell: bash
 run: |
   export ARROW_TEST_DATA=$(pwd)/testing/data
-  export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
+  export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
   # do not produce debug symbols to keep memory usage down
   export RUSTFLAGS="-C debuginfo=0"
   cargo test
@@ -374,7 +374,7 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
 PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
@@ -418,7 +418,7 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
 PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
 steps:
   - uses: actions/checkout@v2


[arrow-rs] branch ci-fix updated: Fiddle paths to submodules.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/ci-fix by this push:
 new 6555953  Fiddle paths to submodules.
6555953 is described below

commit 65559538db7e2ada35a2a13925961a17206c1065
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 15:48:43 2021 +

Fiddle paths to submodules.
---
 .github/workflows/rust.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ab3b8c0..05ac302 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -87,8 +87,8 @@ jobs:
 # Disable full debug symbol generation to speed up CI build and keep 
memory down
 # "1" means line tables only, which is useful for panic tracebacks.
 RUSTFLAGS: "-C debuginfo=1"
-ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
+PARQUET_TEST_DATA: /__w/arrow-rs/arrow-rs/parquet-testing/data
 steps:
   - uses: actions/checkout@v2
 with:


[arrow-rs] branch old_master created (now a9189bd)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch old_master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


  at a9189bd  Fiddle path to flight protobuf generation.

No new revisions were added by this update.


[arrow-rs] branch ci-fix created (now bb31b83)

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a change to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


  at bb31b83  Fixed CI.

This branch includes the following new commits:

 new bb31b83  Fixed CI.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-rs] 01/01: Fixed CI.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch ci-fix
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit bb31b832f536b835d345b0a1bbd0435f69206a1d
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 15:06:20 2021 +

Fixed CI.
---
 .github/workflows/rust.yml | 30 --
 .pre-commit-config.yaml|  4 ++--
 2 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 4bb17a2..ab3b8c0 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -70,16 +70,7 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust
   cargo build
-  # Ballista is currently not part of the main workspace so requires a 
separate build step
-  - name: Build Ballista
-run: |
-  export CARGO_HOME="/github/home/.cargo"
-  export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/ballista/rust
-  # snmalloc requires cmake so build without default features
-  cargo build --no-default-features
 
   # test the crate
   linux-test:
@@ -123,7 +114,6 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust
   # run tests on all workspace members with default feature list
   cargo test
   # test datafusion examples
@@ -139,14 +129,6 @@ jobs:
   cargo run --example dynamic_types
   cargo run --example read_csv
   cargo run --example read_csv_infer_schema
-  # Ballista is currently not part of the main workspace so requires a 
separate test step
-  - name: Run Ballista tests
-run: |
-  export CARGO_HOME="/github/home/.cargo"
-  export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/ballista/rust
-  # snmalloc requires cmake so build without default features
-  cargo test --no-default-features
 
   # test the --features "simd" of the arrow crate. This requires nightly.
   linux-test-simd:
@@ -188,7 +170,7 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/arrow
+  cd arrow
   cargo test --features "simd"
 
   windows-and-macos:
@@ -216,7 +198,6 @@ jobs:
   export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
   # do not produce debug symbols to keep memory usage down
   export RUSTFLAGS="-C debuginfo=0"
-  cd rust
   cargo test
 
   clippy:
@@ -258,7 +239,6 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust
   cargo clippy --all-targets --workspace -- -D warnings -A 
clippy::redundant_field_names
 
   miri-checks:
@@ -290,7 +270,6 @@ jobs:
   RUST_LOG: 'trace'
 run: |
   export MIRIFLAGS="-Zmiri-disable-isolation"
-  cd rust
   cargo miri setup
   cargo clean
   # Ignore MIRI errors until we can get a clean run
@@ -330,7 +309,6 @@ jobs:
   # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
   # see https://github.com/xd009642/tarpaulin/issues/618
   cargo install --version 0.16.0 cargo-tarpaulin
-  cd rust
   cargo tarpaulin --out Xml
   - name: Report coverage
 continue-on-error: true
@@ -373,7 +351,7 @@ jobs:
   export CARGO_HOME="/home/runner/.cargo"
   export CARGO_TARGET_DIR="/home/runner/target"
 
-  cd rust/arrow-pyarrow-integration-testing
+  cd arrow-pyarrow-integration-testing
 
   python -m venv venv
   source venv/bin/activate
@@ -423,7 +401,7 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/arrow
+  cd arrow
   cargo build --target wasm32-unknown-unknown
 
   # test the projects can build without default features
@@ -466,5 +444,5 @@ jobs:
 run: |
   export CARGO_HOME="/github/home/.cargo"
   export CARGO_TARGET_DIR="/github/home/target"
-  cd rust/arrow
+  cd arrow
   cargo check --all-targets --no-default-features
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9d2d2d8..5331a53 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,8 +32,8 @@ repos:
   - id: rustfmt
 name: Rust Format
 language: system
-

[arrow-rs] 01/03: Removed unused files.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit ce36856381c097fcd70e8b1a1257cb98180862dc
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 14:46:08 2021 +

Removed unused files.
---
 .clang-format |  20 --
 .clang-tidy   |  31 ---
 .clang-tidy-ignore|  18 --
 .dir-locals.el|  22 ---
 .env  |  74 ---
 .github/workflows/cpp.yml | 395 --
 .github/workflows/cpp_cron.yml| 149 --
 .github/workflows/csharp.yml  | 121 
 .github/workflows/go.yml  | 125 
 .github/workflows/java.yml| 112 ---
 .github/workflows/java_jni.yml|  83 
 .github/workflows/js.yml  | 122 
 .github/workflows/julia.yml   |  53 -
 .github/workflows/python.yml  | 154 ---
 .github/workflows/python_cron.yml | 141 --
 .github/workflows/r.yml   | 255 
 .github/workflows/ruby.yml| 290 
 .hadolint.yaml|  24 ---
 .readthedocs.yml  |  19 --
 .travis.yml   | 165 
 appveyor.yml  |  79 
 cmake-format.py   |  59 --
 run-cmake-format.py   | 111 ---
 23 files changed, 2622 deletions(-)

diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index 06453df..000
--- a/.clang-format
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.

-BasedOnStyle:  Google
-DerivePointerAlignment: false
-ColumnLimit: 90
diff --git a/.clang-tidy b/.clang-tidy
deleted file mode 100644
index 8b2c167..000
--- a/.clang-tidy
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.

-Checks: 
'clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,google-*,modernize-*,-modernize-use-trailing-return-type'
-# produce HeaderFilterRegex from cpp/build-support/lint_exclusions.txt with:
-# echo -n '^('; sed -e 's/*/\.*/g' cpp/build-support/lint_exclusions.txt | tr 
'\n' '|'; echo ')$'
-HeaderFilterRegex: 
'^(.*codegen.*|.*_generated.*|.*windows_compatibility.h|.*pyarrow_api.h|.*pyarrow_lib.h|.*python/config.h|.*python/platform.h|.*thirdparty/ae/.*|.*vendored/.*|.*RcppExports.cpp.*|)$'
-AnalyzeTemporaryDtors: true
-CheckOptions:
-  - key: 
google-readability-braces-around-statements.ShortStatementLines
-value:   '1'
-  - key: google-readability-function-size.StatementThreshold
-value:   '800'
-  - key: google-readability-namespace-comments.ShortNamespaceLines
-value:   '10'
-  - key: google-readability-namespace-comments.SpacesBeforeComments
-value:   '2'
diff --git a/.clang-tidy-ignore b/.clang-tidy-ignore
deleted file mode 100644
index 3270b97..000
--- a/.clang-tidy-ignore
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apac

[arrow-rs] 02/03: Flatten directory.

2021-04-18 Thread jorgecarleitao
This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 77caed688512d71c074414e5c143edca63202379
Author: Jorge C. Leitao 
AuthorDate: Sun Apr 18 14:50:11 2021 +

Flatten directory.
---
 .gitignore |  87 +--
 rust/Cargo.toml => Cargo.toml  |   0
 README.md  | 254 ++---
 {rust/arrow-flight => arrow-flight}/Cargo.toml |   0
 {rust/arrow-flight => arrow-flight}/README.md  |   0
 {rust/arrow-flight => arrow-flight}/build.rs   |   0
 .../examples/server.rs |   0
 .../src/arrow.flight.protocol.rs   |   0
 {rust/arrow-flight => arrow-flight}/src/lib.rs |   0
 {rust/arrow-flight => arrow-flight}/src/utils.rs   |   0
 .../.cargo/config  |   0
 .../.gitignore |   0
 .../Cargo.toml |   0
 .../README.md  |   0
 .../pyproject.toml |   0
 .../src/lib.rs |   0
 .../tests/test_sql.py  |   0
 {rust/arrow => arrow}/Cargo.toml   |   0
 {rust/arrow => arrow}/README.md|   0
 {rust/arrow => arrow}/benches/aggregate_kernels.rs |   0
 .../arrow => arrow}/benches/arithmetic_kernels.rs  |   0
 {rust/arrow => arrow}/benches/array_from_vec.rs|   0
 {rust/arrow => arrow}/benches/array_slice.rs   |   0
 {rust/arrow => arrow}/benches/bit_length_kernel.rs |   0
 {rust/arrow => arrow}/benches/boolean_kernels.rs   |   0
 {rust/arrow => arrow}/benches/buffer_bit_ops.rs|   0
 {rust/arrow => arrow}/benches/buffer_create.rs |   0
 {rust/arrow => arrow}/benches/builder.rs   |   0
 {rust/arrow => arrow}/benches/cast_kernels.rs  |   0
 .../arrow => arrow}/benches/comparison_kernels.rs  |   0
 .../arrow => arrow}/benches/concatenate_kernel.rs  |   0
 {rust/arrow => arrow}/benches/csv_writer.rs|   0
 {rust/arrow => arrow}/benches/equal.rs |   0
 {rust/arrow => arrow}/benches/filter_kernels.rs|   0
 {rust/arrow => arrow}/benches/json_reader.rs   |   0
 {rust/arrow => arrow}/benches/length_kernel.rs |   0
 {rust/arrow => arrow}/benches/mutable_array.rs |   0
 {rust/arrow => arrow}/benches/sort_kernel.rs   |   0
 {rust/arrow => arrow}/benches/take_kernels.rs  |   0
 {rust/arrow => arrow}/build.rs |   0
 {rust/arrow => arrow}/examples/builders.rs |   0
 {rust/arrow => arrow}/examples/dynamic_types.rs|   0
 {rust/arrow => arrow}/examples/read_csv.rs |   0
 .../examples/read_csv_infer_schema.rs  |   0
 {rust/arrow => arrow}/examples/tensor_builder.rs   |   0
 {rust/arrow => arrow}/format-0ed34c83.patch|   0
 {rust/arrow => arrow}/regen.sh |   0
 {rust/arrow => arrow}/src/alloc/alignment.rs   |   0
 {rust/arrow => arrow}/src/alloc/mod.rs |   0
 {rust/arrow => arrow}/src/alloc/types.rs   |   0
 {rust/arrow => arrow}/src/arch/avx512.rs   |   0
 {rust/arrow => arrow}/src/arch/mod.rs  |   0
 {rust/arrow => arrow}/src/array/array.rs   |   0
 {rust/arrow => arrow}/src/array/array_binary.rs|   0
 {rust/arrow => arrow}/src/array/array_boolean.rs   |   0
 .../arrow => arrow}/src/array/array_dictionary.rs  |   0
 {rust/arrow => arrow}/src/array/array_list.rs  |   0
 {rust/arrow => arrow}/src/array/array_primitive.rs |   0
 {rust/arrow => arrow}/src/array/array_string.rs|   0
 {rust/arrow => arrow}/src/array/array_struct.rs|   0
 {rust/arrow => arrow}/src/array/array_union.rs |   0
 {rust/arrow => arrow}/src/array/builder.rs |   0
 {rust/arrow => arrow}/src/array/cast.rs|   0
 {rust/arrow => arrow}/src/array/data.rs|   0
 {rust/arrow => arrow}/src/array/equal/boolean.rs   |   0
 {rust/arrow => arrow}/src/array/equal/decimal.rs   |   0
 .../arrow => arrow}/src/array/equal/dictionary.rs  |   0
 .../src/array/equal/fixed_binary.rs|   0
 .../arrow => arrow}/src/array/equal/fixed_list.rs  |   0
 {rust/arrow => arrow}/src/array/equal/list.rs  |   0
 {rust/arrow => arrow}/src/array/equal/mod.rs   |   0
 {rust/arrow => arrow}/src/array/equal/null.rs  |   0
 {rust/arrow => arrow}/src/array/equal/primitive.rs |   0
 {rust/arrow => arrow}/src/array/equal/structure.rs |   0
 {rust/arrow => arrow}/src/array/equal/utils.rs |   0
 .../src/array/equal/variable_size.rs   |   0
 {rust/arrow => arrow}/src/array/equal_jso

  1   2   3   >