[arrow] branch master updated: ARROW-3572: [Crossbow] Raise more helpful exception if Crossbow queue has an SSH origin URL
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new ba4e0f3 ARROW-3572: [Crossbow] Raise more helpful exception if Crossbow queue has an SSH origin URL ba4e0f3 is described below commit ba4e0f3f1f0ce8fd846cc6fadf70ad4383809c4a Author: Wes McKinney AuthorDate: Sun Jun 23 21:05:11 2019 -0500 ARROW-3572: [Crossbow] Raise more helpful exception if Crossbow queue has an SSH origin URL Author: Wes McKinney Closes #4666 from wesm/ARROW-3572 and squashes the following commits: 867a1561e Raise more helpful exception if Crossbow queue has an SSH origin URL --- dev/tasks/crossbow.py | 27 --- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py index b58ae9d..f7518d0 100755 --- a/dev/tasks/crossbow.py +++ b/dev/tasks/crossbow.py @@ -232,12 +232,17 @@ class Repo: A high level wrapper used for both reading revision information from arrow's repository and pushing continuous integration tasks to the queue repository. -""" -def __init__(self, path, github_token=None): +Parameters +-- +require_https : boolean, default False +Raise exception for SSH origin URLs +""" +def __init__(self, path, github_token=None, require_https=False): self.path = Path(path) self.repo = pygit2.Repository(str(self.path)) self.github_token = github_token +self.require_https = require_https self._updated_refs = [] def __str__(self): @@ -253,7 +258,11 @@ class Repo: @property def origin(self): -return self.repo.remotes['origin'] +remote = self.repo.remotes['origin'] +if self.require_https and remote.url.startswith('g...@github.com'): +raise ValueError("Change SSH origin URL to HTTPS to use " + "Crossbow: {}".format(remote.url)) +return remote def fetch(self): refspec = '+refs/heads/*:refs/remotes/origin/*' @@ -297,8 +306,7 @@ class Repo: If an SSH github url is set, it will be replaced by the https equivalent usable with Github OAuth token. """ -return self.remote.url.replace('g...@github.com:', - 'https://github.com/') +return _git_ssh_to_https(self.remote.url) @property def user_name(self): @@ -385,6 +393,10 @@ class Repo: return gh.repository(username, reponame) +def _git_ssh_to_https(url): +return url.replace('g...@github.com:', 'https://github.com/') + + class Queue(Repo): def _next_job_id(self, prefix): @@ -631,8 +643,9 @@ def crossbow(ctx, github_token, arrow_path, queue_path): 'valid GitHub access token or pass one to --github-token.' ) -ctx.obj['arrow'] = Repo(Path(arrow_path)) -ctx.obj['queue'] = Queue(Path(queue_path), github_token=github_token) +ctx.obj['arrow'] = Repo(arrow_path) +ctx.obj['queue'] = Queue(queue_path, github_token=github_token, + require_https=True) @crossbow.command()
[arrow] branch master updated: ARROW-1012: [C++] Configurable batch size for parquet RecordBatchReader
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 532d4ba ARROW-1012: [C++] Configurable batch size for parquet RecordBatchReader 532d4ba is described below commit 532d4ba05a87e64a23e3a7b44bfbf34fa0c9a90b Author: Hatem Helal AuthorDate: Sun Jun 23 20:33:53 2019 -0500 ARROW-1012: [C++] Configurable batch size for parquet RecordBatchReader This patch adds support for configuring the record batch size when reading a parquet file by adding a `batch_size` to `ArrowReaderProperties`. Author: Hatem Helal Closes #4304 from hatemhelal/arrow-1012 and squashes the following commits: 9ed935374 update todo comment to be a bit more precise 9f93da7e0 rework existing RecordBatchReader test to cover the case where batch size is smaller than the row group 0e2162849 use deque instead of list 108a5d775 Change default bactch size to 64K and comment 159b03041 fix appveyor windows failure: must cast size_t to int b45782e56 Initial attempt at supporting a configurable batch size for parquet RecordBatchReader --- cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 18 +-- cpp/src/parquet/arrow/reader.cc | 151 ++ cpp/src/parquet/arrow/reader.h| 12 +- 3 files changed, 86 insertions(+), 95 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc index f59db1f..5781ad5 100644 --- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc @@ -1940,23 +1940,23 @@ TEST(TestArrowReadWrite, GetRecordBatchReader) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, num_rows / 2, default_arrow_writer_properties(), )); + ArrowReaderProperties properties = default_arrow_reader_properties(); + properties.set_batch_size(100); + std::unique_ptr reader; ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), - ::parquet::default_reader_properties(), nullptr, )); + ::arrow::default_memory_pool(), properties, )); std::shared_ptr<::arrow::RecordBatchReader> rb_reader; ASSERT_OK_NO_THROW(reader->GetRecordBatchReader({0, 1}, _reader)); std::shared_ptr<::arrow::RecordBatch> batch; - ASSERT_OK(rb_reader->ReadNext()); - ASSERT_EQ(500, batch->num_rows()); - ASSERT_EQ(20, batch->num_columns()); - - ASSERT_OK(rb_reader->ReadNext()); - ASSERT_EQ(500, batch->num_rows()); - ASSERT_EQ(20, batch->num_columns()); + for (int i = 0; i < 10; ++i) { +ASSERT_OK(rb_reader->ReadNext()); +ASSERT_EQ(100, batch->num_rows()); +ASSERT_EQ(20, batch->num_columns()); + } ASSERT_OK(rb_reader->ReadNext()); ASSERT_EQ(nullptr, batch); diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 5665603..484719e 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -110,14 +112,37 @@ ArrowReaderProperties default_arrow_reader_properties() { // so we can read only a single row group if we want class FileColumnIterator { public: - explicit FileColumnIterator(int column_index, ParquetFileReader* reader) + explicit FileColumnIterator(int column_index, ParquetFileReader* reader, + std::vector row_groups) : column_index_(column_index), reader_(reader), -schema_(reader->metadata()->schema()) {} +schema_(reader->metadata()->schema()), +row_groups_(row_groups.begin(), row_groups.end()) {} virtual ~FileColumnIterator() {} - virtual std::unique_ptr<::parquet::PageReader> NextChunk() = 0; + std::unique_ptr<::parquet::PageReader> NextChunk() { +if (row_groups_.empty()) { + return nullptr; +} + +auto row_group_reader = reader_->RowGroup(row_groups_.front()); +row_groups_.pop_front(); +return row_group_reader->GetColumnPageReader(column_index_); + } + + static FileColumnIterator* MakeAllRowGroupsIterator(int column_index, + ParquetFileReader* reader) { +std::vector row_groups(reader->metadata()->num_row_groups()); +std::iota(row_groups.begin(), row_groups.end(), 0); +return new FileColumnIterator(column_index, reader, row_groups); + } + + static FileColumnIterator* MakeSingleRowGroupIterator(int column_index, +ParquetFileReader* reader, +int row_group) { +return new
[arrow] branch master updated: ARROW-5698: [R] Fix docker-compose build
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 1cb762a ARROW-5698: [R] Fix docker-compose build 1cb762a is described below commit 1cb762aec27a62684eec6fce4d6b962361904223 Author: Wes McKinney AuthorDate: Sun Jun 23 20:29:56 2019 -0500 ARROW-5698: [R] Fix docker-compose build Author: Wes McKinney Closes #4665 from wesm/docker-fix-r and squashes the following commits: d8172a4d0 Remove extraneous continuation --- r/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/Dockerfile b/r/Dockerfile index 541bc19..a43ac20 100644 --- a/r/Dockerfile +++ b/r/Dockerfile @@ -51,7 +51,7 @@ RUN apt-get update -y && \ apt-get install -y \ texlive-latex-base && \ apt-get clean && \ -rm -rf /var/lib/apt/lists/* && \ +rm -rf /var/lib/apt/lists/* # So that arrowExports.* files are generated ENV ARROW_R_DEV=TRUE
[arrow] branch master updated (5d3668f -> 1e123e8)
This is an automated email from the ASF dual-hosted git repository. wesm pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 5d3668f ARROW-5637: [Java][C++][Gandiva] Complete In Expression Support add 1e123e8 ARROW-5694: [Python] Support list of Decimals in conversion to pandas No new revisions were added by this update. Summary of changes: cpp/src/arrow/python/arrow_to_pandas.cc | 2 ++ python/pyarrow/tests/test_pandas.py | 12 2 files changed, 14 insertions(+)
[arrow] branch master updated: ARROW-5637: [Java][C++][Gandiva] Complete In Expression Support
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 5d3668f ARROW-5637: [Java][C++][Gandiva] Complete In Expression Support 5d3668f is described below commit 5d3668f6700fcc15e4f6e4428bd0dc7ef6ef91d2 Author: Praveen AuthorDate: Sun Jun 23 16:07:26 2019 -0500 ARROW-5637: [Java][C++][Gandiva] Complete In Expression Support Complete the java and jni bindings for In expression. Author: Praveen Closes #4662 from praveenbingo/in-expr and squashes the following commits: 82e1d3456 Fix lint issues. ce51188a6 Fix review comments. d204f0d57 Address Review comments. 7722fa59a Fixed lint issues. d2c448cc0 Complete In Expression Support. --- cpp/src/gandiva/jni/jni_common.cc | 43 + cpp/src/gandiva/proto/Types.proto | 27 ++ .../apache/arrow/gandiva/expression/InNode.java| 102 + .../arrow/gandiva/expression/TreeBuilder.java | 21 + .../arrow/gandiva/evaluator/ProjectorTest.java | 91 ++ 5 files changed, 284 insertions(+) diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc index 72061c0..b4b9ffe 100644 --- a/cpp/src/gandiva/jni/jni_common.cc +++ b/cpp/src/gandiva/jni/jni_common.cc @@ -313,6 +313,45 @@ NodePtr ProtoTypeToOrNode(const types::OrNode& node) { return TreeExprBuilder::MakeOr(children); } +NodePtr ProtoTypeToInNode(const types::InNode& node) { + NodePtr field = ProtoTypeToFieldNode(node.field()); + + if (node.has_intvalues()) { +std::unordered_set int_values; +for (int i = 0; i < node.intvalues().intvalues_size(); i++) { + int_values.insert(node.intvalues().intvalues(i).value()); +} +return TreeExprBuilder::MakeInExpressionInt32(field, int_values); + } + + if (node.has_longvalues()) { +std::unordered_set long_values; +for (int i = 0; i < node.longvalues().longvalues_size(); i++) { + long_values.insert(node.longvalues().longvalues(i).value()); +} +return TreeExprBuilder::MakeInExpressionInt64(field, long_values); + } + + if (node.has_stringvalues()) { +std::unordered_set stringvalues; +for (int i = 0; i < node.stringvalues().stringvalues_size(); i++) { + stringvalues.insert(node.stringvalues().stringvalues(i).value()); +} +return TreeExprBuilder::MakeInExpressionString(field, stringvalues); + } + + if (node.has_binaryvalues()) { +std::unordered_set stringvalues; +for (int i = 0; i < node.binaryvalues().binaryvalues_size(); i++) { + stringvalues.insert(node.binaryvalues().binaryvalues(i).value()); +} +return TreeExprBuilder::MakeInExpressionBinary(field, stringvalues); + } + // not supported yet. + std::cerr << "Unknown constant type for in expression.\n"; + return nullptr; +} + NodePtr ProtoTypeToNullNode(const types::NullNode& node) { DataTypePtr data_type = ProtoTypeToDataType(node.type()); if (data_type == nullptr) { @@ -344,6 +383,10 @@ NodePtr ProtoTypeToNode(const types::TreeNode& node) { return ProtoTypeToOrNode(node.ornode()); } + if (node.has_innode()) { +return ProtoTypeToInNode(node.innode()); + } + if (node.has_nullnode()) { return ProtoTypeToNullNode(node.nullnode()); } diff --git a/cpp/src/gandiva/proto/Types.proto b/cpp/src/gandiva/proto/Types.proto index 9efa80f..d264450 100644 --- a/cpp/src/gandiva/proto/Types.proto +++ b/cpp/src/gandiva/proto/Types.proto @@ -173,6 +173,9 @@ message TreeNode { optional StringNode stringNode = 17; optional BinaryNode binaryNode = 18; optional DecimalNode decimalNode = 19; + + // in expr + optional InNode inNode = 21; } message ExpressionRoot { @@ -205,3 +208,27 @@ message FunctionSignature { optional ExtGandivaType returnType = 2; repeated ExtGandivaType paramTypes = 3; } + +message InNode { + optional FieldNode field = 1; + optional IntConstants intValues = 2; + optional LongConstants longValues = 3; + optional StringConstants stringValues = 4; + optional BinaryConstants binaryValues = 5; +} + +message IntConstants { + repeated IntNode intValues = 1; +} + +message LongConstants { + repeated LongNode longValues = 1; +} + +message StringConstants { + repeated StringNode stringValues = 1; +} + +message BinaryConstants { + repeated BinaryNode binaryValues = 1; +} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java new file mode 100644 index 000..0420ffb --- /dev/null +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE
[arrow] branch master updated (3533213 -> f01b17b)
This is an automated email from the ASF dual-hosted git repository. shiro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git. from 3533213 ARROW-5693: [Go] skip IPC integration tests for Decimal128 add f01b17b ARROW-5092: [C#] Create a dummy .git directory to download the source files from GitHub with Source Link No new revisions were added by this update. Summary of changes: ci/travis_release_test.sh | 10 ++ csharp/src/Apache.Arrow/Apache.Arrow.csproj | 2 +- dev/release/00-prepare-test.rb | 9 +- dev/release/02-source-test.rb | 103 + dev/release/02-source.sh| 218 dev/release/source/build.sh | 2 +- dev/release/test-helper.rb | 13 ++ python/setup.py | 2 +- 8 files changed, 254 insertions(+), 105 deletions(-) create mode 100644 dev/release/02-source-test.rb