[arrow] branch main updated: PARQUET-2225: [C++][Parquet] Allow reading dense with RecordReader (#17877)

2023-03-03 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 54f8fe3770 PARQUET-2225: [C++][Parquet] Allow reading dense with 
RecordReader (#17877)
54f8fe3770 is described below

commit 54f8fe377099ab9904e9858aa26c63caa199e3df
Author: Fatemah Panahi 
AuthorDate: Fri Mar 3 11:28:39 2023 -0800

PARQUET-2225: [C++][Parquet] Allow reading dense with RecordReader (#17877)

Currently ReadRecords reads spaced by default. Some readers may need to 
read the values dense, and reading spaced is less efficient than reading dense. 
We need an option for reading dense.

Lead-authored-by: Fatemah Panahi 
Co-authored-by: Fatemah Panahi 
Signed-off-by: Micah Kornfield 
---
 cpp/src/parquet/column_reader.cc   | 233 ++---
 cpp/src/parquet/column_reader.h|  37 +-
 cpp/src/parquet/column_reader_benchmark.cc | 112 -
 cpp/src/parquet/column_reader_test.cc  | 734 ++---
 4 files changed, 855 insertions(+), 261 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 7e650596a6..cf1316bceb 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -340,13 +340,13 @@ class SerializedPageReader : public PageReader {
 void SerializedPageReader::InitDecryption() {
   // Prepare the AAD for quick update later.
   if (crypto_ctx_.data_decryptor != nullptr) {
-DCHECK(!crypto_ctx_.data_decryptor->file_aad().empty());
+ARROW_DCHECK(!crypto_ctx_.data_decryptor->file_aad().empty());
 data_page_aad_ = encryption::CreateModuleAad(
 crypto_ctx_.data_decryptor->file_aad(), encryption::kDataPage,
 crypto_ctx_.row_group_ordinal, crypto_ctx_.column_ordinal, 
kNonPageOrdinal);
   }
   if (crypto_ctx_.meta_decryptor != nullptr) {
-DCHECK(!crypto_ctx_.meta_decryptor->file_aad().empty());
+ARROW_DCHECK(!crypto_ctx_.meta_decryptor->file_aad().empty());
 data_page_header_aad_ = encryption::CreateModuleAad(
 crypto_ctx_.meta_decryptor->file_aad(), encryption::kDataPageHeader,
 crypto_ctx_.row_group_ordinal, crypto_ctx_.column_ordinal, 
kNonPageOrdinal);
@@ -355,7 +355,7 @@ void SerializedPageReader::InitDecryption() {
 
 void SerializedPageReader::UpdateDecryption(const std::shared_ptr& 
decryptor,
 int8_t module_type, std::string* 
page_aad) {
-  DCHECK(decryptor != nullptr);
+  ARROW_DCHECK(decryptor != nullptr);
   if (crypto_ctx_.start_decrypt_with_dictionary_page) {
 std::string aad = encryption::CreateModuleAad(
 decryptor->file_aad(), module_type, crypto_ctx_.row_group_ordinal,
@@ -768,7 +768,7 @@ class ColumnReaderImplBase {
 
 new_dictionary_ = true;
 current_decoder_ = decoders_[encoding].get();
-DCHECK(current_decoder_);
+ARROW_DCHECK(current_decoder_);
   }
 
   // Initialize repetition and definition level decoders on the next data page.
@@ -868,7 +868,7 @@ class ColumnReaderImplBase {
 
 auto it = decoders_.find(static_cast(encoding));
 if (it != decoders_.end()) {
-  DCHECK(it->second.get() != nullptr);
+  ARROW_DCHECK(it->second.get() != nullptr);
   current_decoder_ = it->second.get();
 } else {
   switch (encoding) {
@@ -1326,11 +1326,12 @@ class TypedRecordReader : public 
TypedColumnReaderImpl,
  public:
   using T = typename DType::c_type;
   using BASE = TypedColumnReaderImpl;
-  TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, 
MemoryPool* pool)
+  TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, 
MemoryPool* pool,
+bool read_dense_for_nullable)
   // Pager must be set using SetPageReader.
   : BASE(descr, /* pager = */ nullptr, pool) {
 leaf_info_ = leaf_info;
-nullable_values_ = leaf_info.HasNullableValues();
+nullable_values_ = leaf_info_.HasNullableValues();
 at_record_start_ = true;
 values_written_ = 0;
 null_count_ = 0;
@@ -1338,6 +1339,7 @@ class TypedRecordReader : public 
TypedColumnReaderImpl,
 levels_written_ = 0;
 levels_position_ = 0;
 levels_capacity_ = 0;
+read_dense_for_nullable_ = read_dense_for_nullable;
 uses_values_ = !(descr->physical_type() == Type::BYTE_ARRAY);
 
 if (uses_values_) {
@@ -1666,7 +1668,7 @@ class TypedRecordReader : public 
TypedColumnReaderImpl,
   }
 
   std::shared_ptr ReleaseIsValid() override {
-if (leaf_info_.HasNullableValues()) {
+if (nullable_values()) {
   auto result = valid_bits_;
   
PARQUET_THROW_NOT_OK(result->Resize(bit_util::BytesForBits(values_written_),
   /*shrink_to_fit=*/true));
@@ -1690,7 +1692,7 @@ class TypedRecordReader : public 
TypedColumnReaderImpl,
 cons

[arrow] branch main updated: PARQUET-2201: [parquet-cpp] Add stress test for RecordReader ReadRecords and SkipRecords. (#14879)

2023-02-23 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 8e09f8c5dd PARQUET-2201: [parquet-cpp] Add stress test for 
RecordReader ReadRecords and SkipRecords. (#14879)
8e09f8c5dd is described below

commit 8e09f8c5dd8935eed830127b7d73aa034092ca4c
Author: Fatemah Panahi 
AuthorDate: Thu Feb 23 20:58:23 2023 -0800

PARQUET-2201: [parquet-cpp] Add stress test for RecordReader ReadRecords 
and SkipRecords. (#14879)

This pull request adds a stress test for testing ReadRecords and 
SkipRecords for required, optional, and repeated fields. It will create random 
pages and run a random set of skip and read calls to consume all the pages. It 
will then compare the expected output with the actual output from the record 
reader.

Lead-authored-by: Fatemah Panahi 
Co-authored-by: Fatemah Panahi 
Co-authored-by: Antoine Pitrou 
Co-authored-by: fatemehp 
Signed-off-by: Micah Kornfield 
---
 cpp/src/parquet/column_reader.cc  |   3 +
 cpp/src/parquet/column_reader_test.cc | 200 +-
 cpp/src/parquet/test_util.h   |  20 +++-
 3 files changed, 218 insertions(+), 5 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 422016e176..f19079b902 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1360,6 +1360,7 @@ class TypedRecordReader : public 
TypedColumnReaderImpl,
   }
 
   int64_t ReadRecords(int64_t num_records) override {
+if (num_records == 0) return 0;
 // Delimit records, then read values at the end
 int64_t records_read = 0;
 
@@ -1621,6 +1622,8 @@ class TypedRecordReader : public 
TypedColumnReaderImpl,
   }
 
   int64_t SkipRecords(int64_t num_records) override {
+if (num_records == 0) return 0;
+
 // Top level required field. Number of records equals to number of levels,
 // and there is not read-ahead for levels.
 if (this->max_rep_level_ == 0 && this->max_def_level_ == 0) {
diff --git a/cpp/src/parquet/column_reader_test.cc 
b/cpp/src/parquet/column_reader_test.cc
index 29414b6b4d..32fb09f21b 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -723,8 +723,12 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) {
   auto pager = std::make_unique(pages);
   record_reader_->SetPageReader(std::move(pager));
 
+  // Test reading 0 records.
+  int64_t records_read = record_reader_->ReadRecords(/*num_records=*/0);
+  ASSERT_EQ(records_read, 0);
+
   // Read [10], null
-  int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2);
+  records_read = record_reader_->ReadRecords(/*num_records=*/2);
   ASSERT_EQ(records_read, 2);
   CheckState(/*values_written=*/2, /*null_count=*/1, /*levels_written=*/9,
  /*levels_position=*/2);
@@ -744,6 +748,11 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) {
   record_reader_->Reset();
   CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/1,
  /*levels_position=*/0);
+
+  // Test reading 0 records.
+  records_read = record_reader_->ReadRecords(/*num_records=*/0);
+  ASSERT_EQ(records_read, 0);
+
   // Read the last null value and read past the end.
   records_read = record_reader_->ReadRecords(/*num_records=*/3);
   ASSERT_EQ(records_read, 1);
@@ -887,6 +896,12 @@ TEST_F(RecordReaderTest, SkipRepeated) {
   auto pager = std::make_unique(pages);
   record_reader_->SetPageReader(std::move(pager));
 
+  {
+// Skip 0 records.
+int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/0);
+ASSERT_EQ(records_skipped, 0);
+  }
+
   {
 // This should skip the first null record.
 int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/1);
@@ -915,6 +930,12 @@ TEST_F(RecordReaderTest, SkipRepeated) {
 /*expected_reps=*/{0, 1, 1});
   }
 
+  {
+// Skip 0 records.
+int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/0);
+ASSERT_EQ(records_skipped, 0);
+  }
+
   {
 // Skip the null record and also skip [30, 30]
 int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/2);
@@ -1211,5 +1232,182 @@ TEST(RecordReaderByteArrayTest, SkipByteArray) {
   }
 }
 
+// Test random combination of ReadRecords and SkipRecords.
+class RecordReaderStressTest : public 
::testing::TestWithParam {};
+
+TEST_P(RecordReaderStressTest, StressTest) {
+  internal::LevelInfo level_info;
+  // Define these boolean variables for improving readability below.
+  bool repeated = false, required = false;
+  if (GetParam() == Repetition::REQUIRED) {
+level_info.def_level = 0;
+level_info.rep_level = 0;
+required = true;
+  } else if (GetParam() == Rep

[arrow] branch master updated: PARQUET-2210: [C++][Parquet] Skip pages based on header metadata using a callback (#14603)

2023-01-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 97998d835f PARQUET-2210: [C++][Parquet] Skip pages based on header 
metadata using a callback (#14603)
97998d835f is described below

commit 97998d835f404dd4876a2691a93b973fc022ffd3
Author: Fatemah Panahi 
AuthorDate: Thu Jan 12 15:11:12 2023 -0800

PARQUET-2210: [C++][Parquet] Skip pages based on header metadata using a 
callback (#14603)

Currently, we do not expose the page header metadata and they cannot be 
used for skipping pages. I propose exposing the metadata through a callback 
that would allow the caller to decide if they want to read or skip the page 
based on the metadata.

Authored-by: Fatemah Panahi 
Signed-off-by: Micah Kornfield 
---
 cpp/src/parquet/column_reader.cc |  99 ++---
 cpp/src/parquet/column_reader.h  |  39 
 cpp/src/parquet/file_deserialize_test.cc | 333 ++-
 cpp/src/parquet/metadata.cc  |   1 -
 cpp/src/parquet/statistics.cc|  11 +
 cpp/src/parquet/statistics.h |   8 +
 6 files changed, 457 insertions(+), 34 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index f881651737..3670af49fb 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -226,6 +226,12 @@ EncodedStatistics ExtractStatsFromHeader(const H& header) {
   return page_statistics;
 }
 
+void CheckNumValuesInHeader(int num_values) {
+  if (num_values < 0) {
+throw ParquetException("Invalid page header (negative number of values)");
+  }
+}
+
 // --
 // SerializedPageReader deserializes Thrift metadata and pages that have been
 // assembled in a serialized stream for storing in a Parquet files
@@ -269,6 +275,11 @@ class SerializedPageReader : public PageReader {
  int compressed_len, int 
uncompressed_len,
  int levels_byte_len = 0);
 
+  // Returns true for non-data pages, and if we should skip based on
+  // data_page_filter_. Performs basic checks on values in the page header.
+  // Fills in data_page_statistics.
+  bool ShouldSkipPage(EncodedStatistics* data_page_statistics);
+
   const ReaderProperties properties_;
   std::shared_ptr stream_;
 
@@ -342,6 +353,55 @@ void SerializedPageReader::UpdateDecryption(const 
std::shared_ptr& de
   }
 }
 
+bool SerializedPageReader::ShouldSkipPage(EncodedStatistics* 
data_page_statistics) {
+  const PageType::type page_type = LoadEnumSafe(_page_header_.type);
+  if (page_type == PageType::DATA_PAGE) {
+const format::DataPageHeader& header = 
current_page_header_.data_page_header;
+CheckNumValuesInHeader(header.num_values);
+*data_page_statistics = ExtractStatsFromHeader(header);
+seen_num_values_ += header.num_values;
+if (data_page_filter_) {
+  const EncodedStatistics* filter_statistics =
+  data_page_statistics->is_set() ? data_page_statistics : nullptr;
+  DataPageStats data_page_stats(filter_statistics, header.num_values,
+/*num_rows=*/std::nullopt);
+  if (data_page_filter_(data_page_stats)) {
+return true;
+  }
+}
+  } else if (page_type == PageType::DATA_PAGE_V2) {
+const format::DataPageHeaderV2& header = 
current_page_header_.data_page_header_v2;
+CheckNumValuesInHeader(header.num_values);
+if (header.num_rows < 0) {
+  throw ParquetException("Invalid page header (negative number of rows)");
+}
+if (header.definition_levels_byte_length < 0 ||
+header.repetition_levels_byte_length < 0) {
+  throw ParquetException("Invalid page header (negative levels byte 
length)");
+}
+*data_page_statistics = ExtractStatsFromHeader(header);
+seen_num_values_ += header.num_values;
+if (data_page_filter_) {
+  const EncodedStatistics* filter_statistics =
+  data_page_statistics->is_set() ? data_page_statistics : nullptr;
+  DataPageStats data_page_stats(filter_statistics, header.num_values,
+header.num_rows);
+  if (data_page_filter_(data_page_stats)) {
+return true;
+  }
+}
+  } else if (page_type == PageType::DICTIONARY_PAGE) {
+const format::DictionaryPageHeader& dict_header =
+current_page_header_.dictionary_page_header;
+CheckNumValuesInHeader(dict_header.num_values);
+  } else {
+// We don't know what this page type is. We're allowed to skip non-data
+// pages.
+return true;
+  }
+  return false;
+}
+
 std::shared_ptr SerializedPageReader::NextPage() {
   ThriftDeserializ

[arrow] branch master updated: PARQUET-2163: Handle decimal schemas with large fixed_len_byte_arrays

2022-07-06 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 423ca163a2 PARQUET-2163:  Handle decimal schemas with large 
fixed_len_byte_arrays
423ca163a2 is described below

commit 423ca163a26781ef6a8229af22b7e6e2d7423a54
Author: William Butler 
AuthorDate: Wed Jul 6 09:02:00 2022 -0700

PARQUET-2163:  Handle decimal schemas with large fixed_len_byte_arrays

The precision calculation had been overflowing to infinity when the
length of the fixed_len_byte_array > 128, triggering an error when then
trying to convert infinity to an int32. We can actually simplify the
logic by noting that log_b(a^(x)) = log_b(a)*x. This avoids the
intermediate infinity. We also added a check for extremely large value
sizes implying a max precision that cannot fit in int32. Even 129 byte
decimal seems extreme.

The formula Parquet C++ was using is technically incorrect vs the
Parquet specification. The specification says that the max precision is
floor(log_10(2^(B*8 -1) - 1)), where the C++ implementation was omitting the
outer -1. However, this is okay as it is easy to prove that these values
will always be the same (ignoring the realities of FP arithmetic) & in
practice all three formulas agree through 128 when using FP.

Bug found through fuzzing.

Closes #13456 from tachyonwill/float_overflow

Authored-by: William Butler 
Signed-off-by: Micah Kornfield 
---
 cpp/src/parquet/schema_test.cc | 28 
 cpp/src/parquet/types.cc   |  8 +++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
index 703bac8108..603d9ed8e2 100644
--- a/cpp/src/parquet/schema_test.cc
+++ b/cpp/src/parquet/schema_test.cc
@@ -1688,9 +1688,26 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   ASSERT_ANY_THROW(PrimitiveNode::Make("interval", Repetition::REQUIRED,
IntervalLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 11));
+  // Scale is greater than precision.
+  ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
+   DecimalLogicalType::Make(10, 11), 
Type::INT64));
+  ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
+   DecimalLogicalType::Make(17, 18), 
Type::INT64));
   // Primitive too small for given precision ...
   ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
DecimalLogicalType::Make(16, 6), 
Type::INT32));
+  ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
+   DecimalLogicalType::Make(10, 9), 
Type::INT32));
+  ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
+   DecimalLogicalType::Make(19, 17), 
Type::INT64));
+  ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
+   DecimalLogicalType::Make(308, 6),
+   Type::FIXED_LEN_BYTE_ARRAY, 128));
+  // Length is too long
+  ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
+   DecimalLogicalType::Make(10, 6),
+   Type::FIXED_LEN_BYTE_ARRAY, 891723283));
+
   // Incompatible primitive length ...
   ASSERT_ANY_THROW(PrimitiveNode::Make("uuid", Repetition::REQUIRED,
UUIDLogicalType::Make(),
@@ -1942,6 +1959,17 @@ TEST_F(TestDecimalSchemaElementConstruction, 
DecimalCases) {
true, check_DECIMAL},
   {"decimal", LogicalType::Decimal(11, 11), Type::INT64, -1, true,
ConvertedType::DECIMAL, true, check_DECIMAL},
+  {"decimal", LogicalType::Decimal(9, 9), Type::INT32, -1, true,
+   ConvertedType::DECIMAL, true, check_DECIMAL},
+  {"decimal", LogicalType::Decimal(18, 18), Type::INT64, -1, true,
+   ConvertedType::DECIMAL, true, check_DECIMAL},
+  {"decimal", LogicalType::Decimal(307, 7), Type::FIXED_LEN_BYTE_ARRAY, 
128, true,
+   ConvertedType::DECIMAL, true, check_DECIMAL},
+  {"decimal", LogicalType::Decimal(310, 32), Type::FIXED_LEN_BYTE_ARRAY, 
129, true,
+   ConvertedType::DECIMAL, true, check_DECIMAL},
+  {"decimal", LogicalType::Decimal(2147483645, 2147483645),
+   Type::FIXED_LEN_BYTE_ARRAY, 891723282, true, ConvertedType::DECIMAL, 
true,
+   check_DECIMAL},
   };
 
   for (const SchemaElementConstructi

[arrow-testing] branch master updated: PARQUET-2124: Add fuzzer testcase

2022-03-04 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-testing.git


The following commit(s) were added to refs/heads/master by this push:
 new d315f79  PARQUET-2124:  Add fuzzer testcase
d315f79 is described below

commit d315f7985207d2d67fc2c8e41053e9d97d573f4b
Author: William Butler 
AuthorDate: Mon Feb 14 23:58:19 2022 +

PARQUET-2124:  Add fuzzer testcase
---
 .../parquet/fuzzing/clusterfuzz-testcase-6606237035003904 | Bin 0 -> 300 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/data/parquet/fuzzing/clusterfuzz-testcase-6606237035003904 
b/data/parquet/fuzzing/clusterfuzz-testcase-6606237035003904
new file mode 100644
index 000..cd82a82
Binary files /dev/null and 
b/data/parquet/fuzzing/clusterfuzz-testcase-6606237035003904 differ


[arrow-testing] branch master updated: PARQUET-2130: Add fuzzer found DCHECKing file.

2022-03-04 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-testing.git


The following commit(s) were added to refs/heads/master by this push:
 new 8992888  PARQUET-2130:  Add fuzzer found DCHECKing file.
8992888 is described below

commit 8992888f758ce4ed126d70e87c3ca03b354210f1
Author: William Butler 
AuthorDate: Tue Feb 22 20:19:20 2022 +

PARQUET-2130:  Add fuzzer found DCHECKing file.
---
 .../crash-649c71a618ae2fd80cec177a9676eb3e280fc1fa   | Bin 0 -> 3896 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git 
a/data/parquet/fuzzing/crash-649c71a618ae2fd80cec177a9676eb3e280fc1fa 
b/data/parquet/fuzzing/crash-649c71a618ae2fd80cec177a9676eb3e280fc1fa
new file mode 100644
index 000..20fd42d
Binary files /dev/null and 
b/data/parquet/fuzzing/crash-649c71a618ae2fd80cec177a9676eb3e280fc1fa differ


[arrow] branch master updated (6734d0f -> 4ef95eb)

2022-03-04 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 6734d0f  ARROW-15795: [Java] Add a getter for the timeZone in 
timestamp with timezone vectors
 add 4ef95eb  PARQUET-2131:  Number values decoded DCHECKs should be 
exceptions

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/column_reader.cc | 21 +++--
 testing  |  2 +-
 2 files changed, 16 insertions(+), 7 deletions(-)


[arrow] branch master updated (762bb3d -> 348057a)

2022-03-04 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 762bb3d  ARROW-15845: [Python][Packaging] Fix macOS wheel builds
 add 348057a  PARQUET-2130:  Fix crash in debug with non-standard key names.

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/arrow_schema_test.cc | 40 +++---
 cpp/src/parquet/arrow/schema.cc|  2 +-
 2 files changed, 38 insertions(+), 4 deletions(-)


[arrow] branch master updated (da1868b -> e73821d)

2021-11-05 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from da1868b  ARROW-14616: [C++] Fix build errors on master
 add e73821d  ARROW-14601: [JAVA] fix the comment for timestamp sec

No new revisions were added by this update.

Summary of changes:
 java/vector/src/main/java/org/apache/arrow/vector/types/Types.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


[arrow] branch master updated (4ac62d5 -> a8e1c81)

2021-10-20 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 4ac62d5  ARROW-14393: [C++] GTest linking errors during the source 
release verification
 add a8e1c81  ARROW-14345: [C++] Implement streaming reads

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/filesystem/gcsfs.cc  |  63 +++--
 cpp/src/arrow/filesystem/gcsfs_internal.cc |   4 +-
 cpp/src/arrow/filesystem/gcsfs_test.cc | 109 +++--
 3 files changed, 162 insertions(+), 14 deletions(-)


[arrow] branch master updated (63b8b7e -> defcf7d)

2021-10-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 63b8b7e  ARROW-13800 [R] Use divide instead of divide_checked
 add defcf7d  ARROW-13151: [C++][Parquet] Propagate schema changes from 
selection all the way up the stack

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 143 ++
 cpp/src/parquet/arrow/reader.cc   |  63 --
 2 files changed, 199 insertions(+), 7 deletions(-)


[arrow] branch master updated (f857553 -> eee13b0)

2021-10-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from f857553  ARROW-13257: [Java][Dataset] Allow passing empty columns for 
projection
 add eee13b0  ARROW-13604 [Java]: Remove deprecation annotations for APIs 
representing unsupported operations

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/templates/DenseUnionVector.java |  7 +++
 java/vector/src/main/codegen/templates/UnionVector.java  |  9 -
 java/vector/src/main/codegen/templates/ValueHolders.java | 14 --
 .../org/apache/arrow/vector/BaseFixedWidthVector.java|  9 -
 .../arrow/vector/BaseLargeVariableWidthVector.java   |  7 +++
 .../org/apache/arrow/vector/BaseVariableWidthVector.java |  9 -
 .../org/apache/arrow/vector/ExtensionTypeVector.java |  8 
 .../main/java/org/apache/arrow/vector/FieldVector.java   |  3 +++
 .../main/java/org/apache/arrow/vector/NullVector.java|  8 
 .../arrow/vector/complex/BaseRepeatedValueVector.java|  6 ++
 .../apache/arrow/vector/complex/FixedSizeListVector.java |  9 -
 .../org/apache/arrow/vector/complex/LargeListVector.java | 16 +++-
 .../java/org/apache/arrow/vector/complex/ListVector.java |  9 -
 .../apache/arrow/vector/complex/RepeatedValueVector.java |  3 +++
 .../org/apache/arrow/vector/complex/StructVector.java|  9 -
 15 files changed, 113 insertions(+), 13 deletions(-)


[arrow] branch master updated (37ae220 -> f857553)

2021-10-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 37ae220  ARROW-13975: [C++] Implement decimal round
 add f857553  ARROW-13257: [Java][Dataset] Allow passing empty columns for 
projection

No new revisions were added by this update.

Summary of changes:
 cpp/src/jni/dataset/jni_wrapper.cc |  5 +-
 .../org/apache/arrow/dataset/jni/JniWrapper.java   |  4 +-
 .../apache/arrow/dataset/jni/NativeDataset.java|  4 +-
 .../apache/arrow/dataset/scanner/ScanOptions.java  | 34 -
 .../arrow/dataset/file/TestFileSystemDataset.java  | 80 +-
 .../arrow/dataset/jni/TestReservationListener.java |  4 +-
 6 files changed, 103 insertions(+), 28 deletions(-)


[arrow] branch master updated (adb0190 -> a74dfde)

2021-09-25 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from adb0190  MINOR: [R] Fix quantile() test failure in test-r-versions CI 
task
 add a74dfde  ARROW-13847: [Java] Avoid unnecessary collection copies

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/util/AutoCloseables.java | 23 --
 .../java/org/apache/arrow/util/Collections2.java   | 36 --
 .../org/apache/arrow/util/TestCollections2.java| 83 ++
 .../main/codegen/templates/DenseUnionVector.java   |  3 +-
 .../src/main/codegen/templates/UnionVector.java|  3 +-
 .../java/org/apache/arrow/vector/VectorLoader.java |  2 +-
 .../org/apache/arrow/vector/types/pojo/Field.java  |  3 +-
 .../apache/arrow/vector/types/pojo/FieldType.java  |  7 +-
 .../org/apache/arrow/vector/types/pojo/Schema.java | 31 
 9 files changed, 157 insertions(+), 34 deletions(-)
 create mode 100644 
java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java


[arrow] branch master updated: ARROW-13544 [Java]: Remove APIs that have been deprecated for long (Changes to ArrowBuf)

2021-09-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 1049dde  ARROW-13544 [Java]: Remove APIs that have been deprecated for 
long (Changes to ArrowBuf)
1049dde is described below

commit 1049dde95ed09671eba1453b9984d3a1ac43f82e
Author: liyafan82 
AuthorDate: Sun Sep 12 14:25:29 2021 -0700

ARROW-13544 [Java]: Remove APIs that have been deprecated for long (Changes 
to ArrowBuf)

See https://issues.apache.org/jira/browse/ARROW-13544

According to the discussion in 
https://github.com/apache/arrow/pull/10864#issuecomment-895707729, we want to 
split the task into multiple parts.

This PR is for the changes related to `ArrowBuf`

Closes #10901 from liyafan82/fly_0810_dep

Authored-by: liyafan82 
Signed-off-by: Micah Kornfield 
---
 .../java/org/apache/arrow/memory/ArrowBuf.java | 43 --
 .../apache/arrow/vector/ipc/JsonFileWriter.java|  2 +-
 2 files changed, 1 insertion(+), 44 deletions(-)

diff --git 
a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java 
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
index ea5e29f..d782707 100644
--- 
a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
+++ 
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -1195,51 +1195,8 @@ public final class ArrowBuf implements AutoCloseable {
 }
   }
 
-  /**
-   * Following are wrapper methods to keep this backward compatible.
-   */
-  @Deprecated
-  public void release() {
-referenceManager.release();
-  }
-
-  @Deprecated
-  public void release(int decrement) {
-referenceManager.release(decrement);
-  }
-
-  @Deprecated
-  public void retain() {
-referenceManager.retain();
-  }
-
-  @Deprecated
-  public void retain(int increment) {
-referenceManager.retain(increment);
-  }
-
-  @Deprecated
   public ArrowBuf clear() {
 this.readerIndex = this.writerIndex = 0;
 return this;
   }
-
-  /**
-   * Initialize the reader and writer index.
-   * @param readerIndex index to read from
-   * @param writerIndex index to write to
-   * @return this
-   */
-  @Deprecated
-  public ArrowBuf setIndex(int readerIndex, int writerIndex) {
-if (readerIndex >= 0 && readerIndex <= writerIndex && writerIndex <= 
this.capacity()) {
-  this.readerIndex = readerIndex;
-  this.writerIndex = writerIndex;
-  return this;
-} else {
-  throw new IndexOutOfBoundsException(String.format("readerIndex: %d, 
writerIndex: %d " +
-   "(expected:0 <= readerIndex <= writerIndex <= capacity(%d))", 
readerIndex, writerIndex, this.capacity()));
-}
-  }
-
 }
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java 
b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
index f6cd3bc..58760c1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
@@ -232,7 +232,7 @@ public class JsonFileWriter implements AutoCloseable {
 ArrowBuf vectorBufferTmp = vector.getAllocator().buffer(4);
 vectorBufferTmp.setInt(0, 0);
 writeValueToGenerator(bufferType, vectorBufferTmp, null, vector, 
i);
-vectorBufferTmp.release();
+vectorBufferTmp.close();
   } else {
 writeValueToGenerator(bufferType, vectorBuffer, null, vector, i);
   }


[arrow] branch master updated: ARROW-13733 [Java]: Allow JDBC adapters to reuse vector schema roots

2021-09-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new e8ab3ae  ARROW-13733 [Java]: Allow JDBC adapters to reuse vector 
schema roots
e8ab3ae is described below

commit e8ab3ae5ccd1c3979dfe22d8fdf204fcedb8b215
Author: liyafan82 
AuthorDate: Sun Sep 12 14:23:52 2021 -0700

ARROW-13733 [Java]: Allow JDBC adapters to reuse vector schema roots

According to the current design of the JDBC adapter, it is not possible to 
reuse the vector schema roots. That is, a new vector schema root is created and 
released for each batch.

This can cause performance problems, because in many scenarios, the client 
code only reads data in vector schema root. So the vector schema roots can be 
reused in the following cycle: populate data -> client use data -> populate 
data -> ...

The current design has another problem. For most times, it has two 
alternating vector schema roots in memory, causing a large waste of memory, 
especially for large batches.

We solve both problems by providing a flag in the config, which allows the 
user to reuse the vector shema roots.

Closes #10983 from liyafan82/fly_0824_jd

Authored-by: liyafan82 
Signed-off-by: Micah Kornfield 
---
 .../arrow/adapter/jdbc/ArrowVectorIterator.java| 50 +++---
 .../arrow/adapter/jdbc/JdbcToArrowConfig.java  | 18 +++-
 .../adapter/jdbc/JdbcToArrowConfigBuilder.java |  9 
 .../adapter/jdbc/AbstractJdbcToArrowTest.java  |  1 +
 .../arrow/adapter/jdbc/JdbcToArrowConfigTest.java  | 10 +++--
 .../arrow/adapter/jdbc/h2/JdbcToArrowTest.java | 27 
 .../jdbc/h2/JdbcToArrowVectorIteratorTest.java | 50 +++---
 7 files changed, 122 insertions(+), 43 deletions(-)

diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
index e445056..0e833bc 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
@@ -46,6 +46,7 @@ public class ArrowVectorIterator implements 
Iterator, AutoClos
   private final JdbcConsumer[] consumers;
   final CompositeJdbcConsumer compositeConsumer;
 
+  // this is used only if resuing vector schema root is enabled.
   private VectorSchemaRoot nextBatch;
 
   private final int targetBatchSize;
@@ -73,7 +74,7 @@ public class ArrowVectorIterator implements 
Iterator, AutoClos
   arrowType, i, isColumnNullable(resultSet, i), null, config);
 }
 
-load(createVectorSchemaRoot());
+this.nextBatch = config.isReuseVectorSchemaRoot() ? 
createVectorSchemaRoot() : null;
   }
 
   /**
@@ -83,15 +84,17 @@ public class ArrowVectorIterator implements 
Iterator, AutoClos
   ResultSet resultSet,
   JdbcToArrowConfig config)
   throws SQLException {
-
-ArrowVectorIterator iterator = new ArrowVectorIterator(resultSet, config);
+ArrowVectorIterator iterator = null;
 try {
+  iterator = new ArrowVectorIterator(resultSet, config);
   iterator.initialize();
-  return iterator;
-} catch (Exception e) {
-  iterator.close();
+} catch (Throwable e) {
+  if (iterator != null) {
+iterator.close();
+  }
   throw new RuntimeException("Error occurred while creating iterator.", e);
 }
+return iterator;
   }
 
   private void consumeData(VectorSchemaRoot root) {
@@ -111,9 +114,8 @@ public class ArrowVectorIterator implements 
Iterator, AutoClos
 }
   }
 
-
   root.setRowCount(readRowCount);
-} catch (Exception e) {
+} catch (Throwable e) {
   compositeConsumer.close();
   throw new RuntimeException("Error occurred while consuming data.", e);
 }
@@ -126,7 +128,7 @@ public class ArrowVectorIterator implements 
Iterator, AutoClos
   if (config.getTargetBatchSize() != 
JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
 ValueVectorUtility.preAllocate(root, config.getTargetBatchSize());
   }
-} catch (Exception e) {
+} catch (Throwable e) {
   if (root != null) {
 root.close();
   }
@@ -137,40 +139,38 @@ public class ArrowVectorIterator implements 
Iterator, AutoClos
 
   // Loads the next schema root or null if no more rows are available.
   private void load(VectorSchemaRoot root) throws SQLException {
-
-for (int i = 1; i <= consumers.length; i++) {
-  consumers[i - 
1].resetValueVector(root.getVector(rsmd.getColumnLabel(i)));
+for (int i = 0; i < consumers.length; i++) {
+  consumers[i].resetValueVector(root.getVector(i));
 }
 
 consumeData(root);
-
-if (root.getR

[arrow] branch master updated: ARROW-13859: [Java] Add code coverage support

2021-09-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new c091e6d  ARROW-13859: [Java] Add code coverage support
c091e6d is described below

commit c091e6da9c92882cc68152ceb0588104dd0bc55d
Author: Laurent Goujon 
AuthorDate: Sun Sep 12 14:08:08 2021 -0700

ARROW-13859: [Java] Add code coverage support

Add optional code coverage support to the Maven build configuration
using the JaCoCo plugin.

Code coverage reporting is enabled when using the `code-coverage`
profile.

Closes #11069 from laurentgo/laurentgo/code-coverage

Authored-by: Laurent Goujon 
Signed-off-by: Micah Kornfield 
---
 java/adapter/jdbc/pom.xml | 13 ---
 java/adapter/orc/pom.xml  | 11 --
 java/memory/memory-core/pom.xml   | 18 --
 java/memory/memory-netty/pom.xml  | 29 ---
 java/memory/memory-unsafe/pom.xml | 19 --
 java/pom.xml  | 76 ---
 java/vector/pom.xml   | 19 +-
 7 files changed, 72 insertions(+), 113 deletions(-)

diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index 76a5f8f..774676c 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -93,17 +93,4 @@
 
 
 
-
-
-
-org.apache.maven.plugins
-maven-surefire-plugin
-
-
-UTC
-
-
-
-
-
 
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 7928d2c..2b20330 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -109,16 +109,5 @@
 
 
 
-
-
-org.apache.maven.plugins
-maven-surefire-plugin
-
-
-UTC
-
-
-
-
 
 
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index 65abe8e..c6cbe4a 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -37,22 +37,4 @@
 
   
 
-  
-
-  
-maven-surefire-plugin
-3.0.0-M3
-
-  true
-  true
-  ${forkCount}
-  true
-  
-${project.build.directory}
-UTC
-  
-
-  
-
-  
 
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index b5f256f..d94f4da 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -45,27 +45,6 @@
 
   
 
-  
-
-  
-maven-surefire-plugin
-3.0.0-M3
-
-  true
-  true
-  ${forkCount}
-  true
-  
-${project.build.directory}
-
true
-
1048576
-UTC
-  
-
-  
-
-  
-
   
 
   
+  
1048576
 
-
--Darrow.vector.max_allocation_bytes=1048576
   
 
-
+
+  maven-failsafe-plugin
+  3.0.0-M3
+  
+
+  ${project.build.directory}
+  
true
+  UTC
+
+  
+
+
+  org.jacoco
+  jacoco-maven-plugin
+  0.8.7
+
 
 
@@ -760,6 +775,57 @@
   
 
 
+
+  code-coverage
+  
+  
+
+  
+org.jacoco
+jacoco-maven-plugin
+
+  
+default-prepare-agent
+
+  prepare-agent
+
+  
+  
+default-prepare-agent-integration
+
+  prepare-agent-integration
+
+  
+
+  
+
+  
+  
+
+  
+org.jacoco
+jacoco-maven-plugin
+0.8.7
+
+  
+false
+
+  
+  report
+
+  
+
+  
+
+  
+
+
   
 
 
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 1336f0d..4661a13 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -105,20 +105,6 @@
   
 org.apache.maven.plugins
 maven-surefire-plugin
-
-  true
-  true
-  ${forkCount}
-  true
-  
-${project.build.directory}
-
true
-UTC
-  
-  
-  -Darrow.vector.max_allocation_bytes=1048576

[arrow] branch master updated (bae7e2b -> db5b848)

2021-09-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from bae7e2b  MINOR: [Doc][Python] Fix typo ParquetFileForma (#11137)
 add db5b848  ARROW-13979: [Go] Enable -race for go tests

No new revisions were added by this update.

Summary of changes:
 ci/scripts/go_test.sh| 12 ++--
 go/parquet/internal/hashing/xxh3_memo_table.go   |  8 +++-
 go/parquet/internal/utils/bit_set_run_reader_test.go |  3 ---
 3 files changed, 17 insertions(+), 6 deletions(-)


[arrow] branch master updated (111f0c7 -> 09497a9)

2021-09-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 111f0c7  ARROW-13823 [Java]: Exclude .factorypath
 add 09497a9  ARROW-13544 [Java]: Remove APIs that have been deprecated for 
long (Changes to JDBC)

No new revisions were added by this update.

Summary of changes:
 .../org/apache/arrow/adapter/jdbc/JdbcToArrow.java | 170 
 .../adapter/jdbc/AbstractJdbcToArrowTest.java  | 178 +
 .../adapter/jdbc/h2/JdbcAliasToArrowTest.java  |   4 +-
 .../adapter/jdbc/h2/JdbcToArrowArrayTest.java  |  10 +-
 .../adapter/jdbc/h2/JdbcToArrowCharSetTest.java|  17 +-
 .../adapter/jdbc/h2/JdbcToArrowDataTypesTest.java  |  17 +-
 .../arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java |  17 +-
 .../jdbc/h2/JdbcToArrowOptionalColumnsTest.java|   3 +-
 .../arrow/adapter/jdbc/h2/JdbcToArrowTest.java |  16 +-
 .../adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java   |  11 +-
 10 files changed, 224 insertions(+), 219 deletions(-)


[arrow] branch master updated (b76caf4 -> 111f0c7)

2021-09-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from b76caf4  ARROW-13544 [Java]: Remove APIs that have been deprecated for 
long (Changes to Vectors)
 add 111f0c7  ARROW-13823 [Java]: Exclude .factorypath

No new revisions were added by this update.

Summary of changes:
 java/.gitignore | 1 +
 java/pom.xml| 1 +
 2 files changed, 2 insertions(+)


[arrow] branch master updated (69972dd -> b76caf4)

2021-09-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 69972dd  ARROW-13792 [Java]: The toString representation is incorrect 
for unsigned integer vectors
 add b76caf4  ARROW-13544 [Java]: Remove APIs that have been deprecated for 
long (Changes to Vectors)

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/templates/UnionVector.java|   7 +-
 .../arrow/vector/BaseVariableWidthVector.java  |  11 --
 .../org/apache/arrow/vector/BitVectorHelper.java   |  10 --
 .../arrow/vector/complex/FixedSizeListVector.java  |  17 +---
 .../arrow/vector/complex/LargeListVector.java  |   2 +-
 .../apache/arrow/vector/complex/ListVector.java|  23 +
 .../org/apache/arrow/vector/types/pojo/Field.java  |  20 
 .../arrow/vector/util/ByteFunctionHelpers.java | 112 -
 .../apache/arrow/vector/TestBitVectorHelper.java   |   8 +-
 .../apache/arrow/vector/TestDictionaryVector.java  |  10 +-
 .../arrow/vector/TestFixedSizeListVector.java  |   6 +-
 .../org/apache/arrow/vector/TestUnionVector.java   |  30 --
 .../org/apache/arrow/vector/TestValueVector.java   |   7 +-
 .../org/apache/arrow/vector/TestVectorReAlloc.java |   2 +-
 .../org/apache/arrow/vector/TestVectorReset.java   |   2 +-
 .../vector/compare/TestRangeEqualsVisitor.java |  11 +-
 .../vector/compare/TestTypeEqualsVisitor.java  |   6 +-
 .../vector/complex/writer/TestComplexWriter.java   |   2 +-
 .../org/apache/arrow/vector/pojo/TestConvert.java  |   2 +-
 19 files changed, 55 insertions(+), 233 deletions(-)
 delete mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/util/ByteFunctionHelpers.java


[arrow] branch master updated (e9eeff1 -> 69972dd)

2021-09-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from e9eeff1  ARROW-12714: [C++] String title case kernel
 add 69972dd  ARROW-13792 [Java]: The toString representation is incorrect 
for unsigned integer vectors

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/vector/UInt1Vector.java  |  6 +++-
 .../java/org/apache/arrow/vector/UInt2Vector.java  |  7 +
 .../java/org/apache/arrow/vector/UInt4Vector.java  |  6 
 .../java/org/apache/arrow/vector/UInt8Vector.java  |  6 
 .../arrow/vector/util/ValueVectorUtility.java  | 21 --
 .../org/apache/arrow/vector/TestValueVector.java   | 32 ++
 6 files changed, 74 insertions(+), 4 deletions(-)


[arrow] branch master updated (f849a26 -> 820e506)

2021-08-16 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from f849a26  ARROW-13614: [C++] Add decimal support to min_max/hash_min_max
 add 820e506  ARROW-13330: [Go][Parquet] Add the rest of the Encoding 
package

No new revisions were added by this update.

Summary of changes:
 go/parquet/internal/encoding/boolean_encoder.go|4 +-
 go/parquet/internal/encoding/delta_bit_packing.go  |5 +-
 go/parquet/internal/encoding/delta_byte_array.go   |   15 +-
 .../internal/encoding/delta_length_byte_array.go   |   12 +-
 go/parquet/internal/encoding/encoder.go|   23 +-
 .../internal/encoding/encoding_benchmarks_test.go  |  466 +
 go/parquet/internal/encoding/encoding_test.go  |  684 +
 go/parquet/internal/encoding/levels.go |  288 ++
 go/parquet/internal/encoding/levels_test.go|  292 ++
 go/parquet/internal/encoding/memo_table.go |  380 
 go/parquet/internal/encoding/memo_table_test.go|  291 ++
 .../internal/encoding/memo_table_types.gen.go  |  366 +++
 .../internal/encoding/memo_table_types.gen.go.tmpl |  115 +++
 go/parquet/internal/encoding/typed_encoder.gen.go  |   24 +
 .../internal/encoding/typed_encoder.gen.go.tmpl|4 +-
 go/parquet/internal/encoding/types.go  |   65 +-
 go/parquet/internal/hashing/hashing_test.go|  114 +++
 go/parquet/internal/hashing/types.tmpldata |   18 +
 go/parquet/internal/hashing/xxh3_memo_table.gen.go | 1013 
 .../internal/hashing/xxh3_memo_table.gen.go.tmpl   |  304 ++
 go/parquet/internal/hashing/xxh3_memo_table.go |  386 
 go/parquet/internal/utils/bit_reader_test.go   |   32 +-
 go/parquet/internal/utils/rle.go   |   39 +-
 23 files changed, 4832 insertions(+), 108 deletions(-)
 create mode 100644 go/parquet/internal/encoding/encoding_benchmarks_test.go
 create mode 100644 go/parquet/internal/encoding/encoding_test.go
 create mode 100644 go/parquet/internal/encoding/levels.go
 create mode 100644 go/parquet/internal/encoding/levels_test.go
 create mode 100644 go/parquet/internal/encoding/memo_table.go
 create mode 100644 go/parquet/internal/encoding/memo_table_test.go
 create mode 100644 go/parquet/internal/encoding/memo_table_types.gen.go
 create mode 100644 go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
 create mode 100644 go/parquet/internal/hashing/hashing_test.go
 create mode 100644 go/parquet/internal/hashing/types.tmpldata
 create mode 100644 go/parquet/internal/hashing/xxh3_memo_table.gen.go
 create mode 100644 go/parquet/internal/hashing/xxh3_memo_table.gen.go.tmpl
 create mode 100644 go/parquet/internal/hashing/xxh3_memo_table.go


[arrow] branch master updated: ARROW-13172: [Java] Make TYPE_WIDTH publicly accessible

2021-08-09 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 822d6d8  ARROW-13172: [Java] Make TYPE_WIDTH publicly accessible
822d6d8 is described below

commit 822d6d8593999a937ccb06d6c89acf44bea2dc54
Author: Eduard Tudenhoefner 
AuthorDate: Mon Aug 9 21:32:32 2021 -0700

ARROW-13172: [Java] Make TYPE_WIDTH publicly accessible

Some Vector class were already making `TYPE_WIDTH` publicly accessible.
This PR just makes sure that this is done across all Vector classes.

Closes #10600 from nastra/ARROW-13172

Authored-by: Eduard Tudenhoefner 
Signed-off-by: Micah Kornfield 
---
 java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java| 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java   | 2 +-
 .../src/main/java/org/apache/arrow/vector/IntervalYearVector.java   | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java   | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java| 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java  | 2 +-
 java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java  | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
index e849a85..3e88268 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
@@ -37,7 +37,7 @@ import org.apache.arrow.vector.util.TransferPair;
  */
 public final class DateDayVector extends BaseFixedWidthVector {
 
-  private static final byte TYPE_WIDTH = 4;
+  public static final byte TYPE_WIDTH = 4;
   private final FieldReader reader;
 
   /**
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
index 80f2156..73738d7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
@@ -39,7 +39,7 @@ import org.apache.arrow.vector.util.TransferPair;
  * maintained to track which elements in the vector are null.
  */
 public final class DateMilliVector extends BaseFixedWidthVector {
-  private static final byte TYPE_WIDTH = 8;
+  public static final byte TYPE_WIDTH = 8;
   private final FieldReader reader;
 
   /**
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
index 9f65c56..9671b34 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
@@ -42,7 +42,7 @@ import org.apache.arrow.vector.util.TransferPair;
  * vector are null.
  */
 public final class DurationVector extends BaseFixedWidthVector {
-  private static final byte TYPE_WIDTH = 8;
+  public static final byte TYPE_WIDTH = 8;
   private final FieldReader reader;
 
   private final TimeUnit unit;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
index c2eb8cf..7ddfe6b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
@@ -38,7 +38,7 @@ import org.apache.arrow.vector.util.TransferPair;
  * (bit vector) is maintained to track which elements in the vector are null.
  */
 public final class IntervalYearVector extends BaseFixedWidthVector {
-  private static final byte TYPE_WIDTH = 4;
+  public static final byte TYPE_WIDTH = 4;
   private final FieldReader reader;
 
   /**
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
index 7923612..cf12885 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
@@ -37,7 +37,7 @@ import org.apache.arrow.vector.util.TransferPair;
  * vector are null

[arrow] branch master updated (2d921dc -> 73f73d9)

2021-07-25 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 2d921dc  ARROW-13434: [R] group_by() with an unnammed expression
 add 73f73d9  ARROW-13430: [Go] fix handling of zero value for FromBigInt

No new revisions were added by this update.

Summary of changes:
 go/arrow/decimal128/decimal128.go  | 9 +++--
 go/arrow/decimal128/decimal128_test.go | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)


[arrow] branch master updated (75a6475 -> afdb8da)

2021-07-13 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 75a6475  ARROW-12994: [R] Fix tests that assume UTC local tz
 add afdb8da  ARROW-12851: [Go][Parquet] Add Golang Parquet encoding package

No new revisions were added by this update.

Summary of changes:
 go/parquet/doc.go  |4 +-
 go/parquet/go.mod  |6 +-
 go/parquet/go.sum  |   18 +-
 go/parquet/internal/encoding/boolean_decoder.go|  101 +
 go/parquet/internal/encoding/boolean_encoder.go|   84 +
 go/parquet/internal/encoding/byte_array_decoder.go |   88 +
 go/parquet/internal/encoding/byte_array_encoder.go |  123 +
 go/parquet/internal/encoding/decoder.go|  186 +
 go/parquet/internal/encoding/delta_bit_packing.go  |  520 ++
 go/parquet/internal/encoding/delta_byte_array.go   |  216 +
 .../internal/encoding/delta_length_byte_array.go   |  144 +
 go/parquet/internal/encoding/encoder.go|  311 +
 .../encoding/fixed_len_byte_array_decoder.go   |   66 +
 .../encoding/fixed_len_byte_array_encoder.go   |  104 +
 .../{utils => encoding}/physical_types.tmpldata|0
 .../internal/encoding/plain_encoder_types.gen.go   |  639 ++
 .../encoding/plain_encoder_types.gen.go.tmpl   |  182 +
 go/parquet/internal/encoding/typed_encoder.gen.go  | 1443 
 .../internal/encoding/typed_encoder.gen.go.tmpl|  341 +
 go/parquet/internal/encoding/types.go  |  497 ++
 go/parquet/internal/testutils/utils.go |   42 +
 go/parquet/internal/utils/Makefile |4 +-
 go/parquet/internal/utils/_lib/bit_packing_avx2.s  |2 +-
 go/parquet/internal/utils/_lib/min_max_avx2.s  | 1519 +
 go/parquet/internal/utils/_lib/min_max_sse4.s  |2 +-
 go/parquet/internal/utils/_lib/unpack_bool_avx2.s  | 6361 +-
 go/parquet/internal/utils/_lib/unpack_bool_sse4.s  |2 +-
 go/parquet/internal/utils/bitmap_writer.go |   33 +-
 go/parquet/internal/utils/min_max_avx2_amd64.s | 1567 +
 go/parquet/internal/utils/unpack_bool_avx2_amd64.s | 7023 +---
 30 files changed, 5946 insertions(+), 15682 deletions(-)
 create mode 100644 go/parquet/internal/encoding/boolean_decoder.go
 create mode 100644 go/parquet/internal/encoding/boolean_encoder.go
 create mode 100644 go/parquet/internal/encoding/byte_array_decoder.go
 create mode 100644 go/parquet/internal/encoding/byte_array_encoder.go
 create mode 100644 go/parquet/internal/encoding/decoder.go
 create mode 100644 go/parquet/internal/encoding/delta_bit_packing.go
 create mode 100644 go/parquet/internal/encoding/delta_byte_array.go
 create mode 100644 go/parquet/internal/encoding/delta_length_byte_array.go
 create mode 100644 go/parquet/internal/encoding/encoder.go
 create mode 100644 go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
 create mode 100644 go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
 copy go/parquet/internal/{utils => encoding}/physical_types.tmpldata (100%)
 create mode 100644 go/parquet/internal/encoding/plain_encoder_types.gen.go
 create mode 100644 go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
 create mode 100644 go/parquet/internal/encoding/typed_encoder.gen.go
 create mode 100644 go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
 create mode 100644 go/parquet/internal/encoding/types.go
 create mode 100644 go/parquet/internal/testutils/utils.go


[arrow] branch master updated (dbcd0d9 -> d5a2aa2)

2021-06-17 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from dbcd0d9  ARROW-11705: [R] Support scalar value recycling in 
RecordBatch/Table$create()
 add d5a2aa2  PARQUET-2056: [C++] Add ability for retrieving dictionary and 
indices separately for ColumnReader

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/column_reader.cc  | 132 ++
 cpp/src/parquet/column_reader.h   |  43 +++
 cpp/src/parquet/column_reader_test.cc |  86 ++
 cpp/src/parquet/encoding.cc   |  13 
 cpp/src/parquet/encoding.h|  18 +
 cpp/src/parquet/file_reader.cc|  39 ++
 cpp/src/parquet/file_reader.h |  14 
 cpp/src/parquet/reader_test.cc|  78 
 cpp/src/parquet/types.h   |   9 +++
 9 files changed, 404 insertions(+), 28 deletions(-)


[arrow] branch master updated: ARROW-12310: [Java] ValueVector#getObject should support covariance for complex types

2021-05-19 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new fa47050  ARROW-12310: [Java] ValueVector#getObject should support 
covariance for complex types
fa47050 is described below

commit fa47050e497c946800d324f222e32f814fc87785
Author: liyafan82 
AuthorDate: Wed May 19 22:52:12 2021 -0700

ARROW-12310: [Java] ValueVector#getObject should support covariance for 
complex types

Currently, the `ValueVector#getObject` API supports covariance for 
primitive types.
For example, `IntVector#getObject` returns `Integer` while 
`BitVector#getObject` returns `Boolean`.

For complex types, we should also support covariance. For example, 
`ListVector#getObject` should return a List

This will help reduce unnecessary casts, and enforce type safety.

Closes #9964 from liyafan82/fly_0408_cv

Authored-by: liyafan82 
Signed-off-by: Micah Kornfield 
---
 .../test/java/org/apache/arrow/AvroTestBase.java   |  5 +-
 .../dictionary/TestHashTableDictionaryEncoder.java |  2 +-
 .../arrow/vector/complex/FixedSizeListVector.java  |  2 +-
 .../arrow/vector/complex/LargeListVector.java  |  2 +-
 .../apache/arrow/vector/complex/ListVector.java|  2 +-
 .../vector/complex/NonNullableStructVector.java|  2 +-
 .../apache/arrow/vector/complex/StructVector.java  |  3 +-
 .../apache/arrow/vector/TestDictionaryVector.java  | 47 +--
 .../arrow/vector/TestFixedSizeListVector.java  | 31 ++---
 .../apache/arrow/vector/TestLargeListVector.java   | 54 +++---
 .../org/apache/arrow/vector/TestListVector.java| 48 +--
 .../org/apache/arrow/vector/TestMapVector.java | 28 +--
 .../org/apache/arrow/vector/ipc/BaseFileTest.java  |  3 +-
 13 files changed, 113 insertions(+), 116 deletions(-)

diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java 
b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
index f24f0f1..a00cd77 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
@@ -36,7 +36,6 @@ import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.StructVector;
-import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.Text;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -88,7 +87,7 @@ public class AvroTestBase {
   protected void checkArrayResult(List> expected, ListVector vector) {
 assertEquals(expected.size(), vector.getValueCount());
 for (int i = 0; i < expected.size(); i++) {
-  checkArrayElement(expected.get(i), (JsonStringArrayList) 
vector.getObject(i));
+  checkArrayElement(expected.get(i), vector.getObject(i));
 }
   }
 
@@ -177,7 +176,7 @@ public class AvroTestBase {
 int index = 0;
 for (ListVector vector : vectors) {
   for (int i = 0; i < vector.getValueCount(); i++) {
-checkArrayElement(expected.get(index++), (JsonStringArrayList) 
vector.getObject(i));
+checkArrayElement(expected.get(index++), vector.getObject(i));
   }
 }
   }
diff --git 
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
 
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
index 56fdfe9..dd22ac9 100644
--- 
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
+++ 
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -250,7 +250,7 @@ public class TestHashTableDictionaryEncoder {
 
 assertEquals(vector.getValueCount(), decoded.getValueCount());
 for (int i = 0; i < 5; i++) {
-  assertEquals(vector.getObject(i), ((VarCharVector) 
decoded).getObject(i));
+  assertEquals(vector.getObject(i), decoded.getObject(i));
 }
   }
 }
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 6767305..c22cba4 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -445,7 +445,7 @@ public class FixedSizeListVector extends BaseValueVector 
implements BaseListVect
   }
 
   @Override
-  public Object getObject(int index) {
+  public List getObject(int index) {
 if (isSet(index) == 0) {
   return null;
 }
diff --git 
a

[arrow] branch master updated (f1a7c50 -> 325eb07)

2021-05-13 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from f1a7c50  ARROW-5385: [Go] Implement EXTENSION datatype
 add 325eb07  ARROW-12746: [Go][Flight] append instead of overwriting 
outgoing metadata

No new revisions were added by this update.

Summary of changes:
 go/arrow/flight/client_auth.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)


[arrow] branch master updated (9347731 -> f1a7c50)

2021-05-13 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 9347731  ARROW-12731: [R] Use InMemoryDataset for Table/RecordBatch in 
dplyr code
 add f1a7c50  ARROW-5385: [Go] Implement EXTENSION datatype

No new revisions were added by this update.

Summary of changes:
 dev/archery/archery/integration/datagen.py |   3 +-
 docs/source/status.rst |   4 +-
 go/arrow/array/array.go|   2 +-
 go/arrow/array/array_test.go   |   5 +-
 go/arrow/array/builder.go  |   2 +
 go/arrow/array/compare.go  |   6 +
 go/arrow/array/extension.go| 236 ++
 go/arrow/array/extension_test.go   |  96 
 go/arrow/compare.go|  53 ++-
 go/arrow/compare_test.go   |  20 +-
 go/arrow/datatype_extension.go | 163 +++
 go/arrow/datatype_extension_test.go|  86 
 go/arrow/datatype_nested.go|  14 +-
 go/arrow/internal/arrdata/arrdata.go   |  99 
 go/arrow/internal/arrjson/arrjson.go   | 180 +++-
 go/arrow/internal/arrjson/arrjson_test.go  | 497 -
 go/arrow/internal/arrjson/reader.go|   5 +-
 go/arrow/internal/arrjson/writer.go|  77 +---
 go/arrow/internal/testing/types/extension_types.go | 247 ++
 .../ipc/cmd/arrow-json-integration-test/main.go|   4 +
 go/arrow/ipc/file_reader.go|   5 +
 go/arrow/ipc/metadata.go   |  68 ++-
 go/arrow/ipc/metadata_test.go  |  63 +++
 go/arrow/ipc/reader.go |   9 +-
 go/arrow/ipc/writer.go |   9 +
 go/arrow/schema.go |  29 ++
 26 files changed, 1856 insertions(+), 126 deletions(-)
 create mode 100644 go/arrow/array/extension.go
 create mode 100644 go/arrow/array/extension_test.go
 create mode 100644 go/arrow/datatype_extension.go
 create mode 100644 go/arrow/datatype_extension_test.go
 create mode 100644 go/arrow/internal/testing/types/extension_types.go


[arrow] branch master updated (4a121b6 -> f06c50f)

2021-05-11 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 4a121b6  ARROW-11173: [Java] Add map type in complex reader / writer
 add f06c50f  ARROW-12684: [Go][Flight] fix nil pointer dereference, add 
test.

No new revisions were added by this update.

Summary of changes:
 go/arrow/flight/flight_test.go | 32 
 go/arrow/flight/record_batch_reader.go | 10 ++
 2 files changed, 38 insertions(+), 4 deletions(-)


[arrow] branch master updated (e8a9615 -> 4a121b6)

2021-05-11 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from e8a9615  MINOR: Fix pyarrow.parquet.read_table docstring (#10293)
 add 4a121b6  ARROW-11173: [Java] Add map type in complex reader / writer

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/includes/vv_imports.ftl   |   1 +
 .../codegen/templates/AbstractFieldReader.java |   4 +
 .../codegen/templates/AbstractFieldWriter.java |  57 ++-
 .../templates/AbstractPromotableFieldWriter.java   |  61 ++-
 .../src/main/codegen/templates/BaseReader.java |  10 +
 .../src/main/codegen/templates/BaseWriter.java |  15 +
 .../main/codegen/templates/DenseUnionReader.java   |  12 +
 .../main/codegen/templates/DenseUnionVector.java   |  16 +
 .../main/codegen/templates/DenseUnionWriter.java   |  40 ++
 .../src/main/codegen/templates/StructWriters.java  |  40 ++
 .../templates/UnionFixedSizeListWriter.java|  23 ++
 .../main/codegen/templates/UnionListWriter.java|  23 ++
 .../src/main/codegen/templates/UnionMapWriter.java |  12 +
 .../src/main/codegen/templates/UnionReader.java|  13 +
 .../src/main/codegen/templates/UnionVector.java|  28 ++
 .../src/main/codegen/templates/UnionWriter.java|  86 +
 .../vector/complex/impl/AbstractBaseReader.java|   6 +
 .../vector/complex/impl/PromotableWriter.java  |  13 +-
 .../arrow/vector/complex/reader/FieldReader.java   |   5 +-
 .../arrow/vector/complex/writer/FieldWriter.java   |   3 +-
 .../org/apache/arrow/vector/TestMapVector.java | 411 +
 .../org/apache/arrow/vector/TestUnionVector.java   |  63 
 .../vector/complex/writer/TestComplexWriter.java   |  77 
 23 files changed, 1003 insertions(+), 16 deletions(-)


[arrow] branch master updated (553f3d8 -> e8a9615)

2021-05-11 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 553f3d8  ARROW-12721: [CI] Fix path for uploading aarch64 conda 
artifacts from the nightly builds
 add e8a9615  MINOR: Fix pyarrow.parquet.read_table docstring (#10293)

No new revisions were added by this update.

Summary of changes:
 python/pyarrow/parquet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)


[arrow] branch master updated (9218fe4 -> 0d11014)

2021-04-28 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 9218fe4  ARROW-12517: [Go][Flight] Expose app metadata in flight 
client and server
 add 0d11014  ARROW-5640: [Go] Implement Arrow Map Array

No new revisions were added by this update.

Summary of changes:
 dev/archery/archery/integration/datagen.py |  12 +-
 docs/source/status.rst |   2 +-
 go/arrow/array/array.go|   2 +-
 go/arrow/array/array_test.go   |  20 +-
 go/arrow/array/builder.go  |   2 +
 go/arrow/array/compare.go  |   8 +-
 go/arrow/array/map.go  | 272 
 go/arrow/array/map_test.go | 151 +++
 go/arrow/datatype_nested.go|  35 ++
 go/arrow/datatype_nested_test.go   |  70 
 go/arrow/example_test.go   |  64 +++
 go/arrow/internal/arrdata/arrdata.go   | 150 +++
 go/arrow/internal/arrjson/arrjson.go   |  48 +++
 go/arrow/internal/arrjson/arrjson_test.go  | 637 +
 go/arrow/ipc/file_reader.go|  16 +
 go/arrow/ipc/metadata.go   |  34 +-
 go/arrow/ipc/writer.go |  37 ++
 17 files changed, 1535 insertions(+), 25 deletions(-)
 create mode 100644 go/arrow/array/map.go
 create mode 100644 go/arrow/array/map_test.go


[arrow] branch master updated (8c3363e -> 9218fe4)

2021-04-28 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 8c3363e  ARROW-12407: [Python][Dataset] Remove ScanTask bindings
 add 9218fe4  ARROW-12517: [Go][Flight] Expose app metadata in flight 
client and server

No new revisions were added by this update.

Summary of changes:
 go/arrow/flight/flight_test.go | 63 ++
 go/arrow/flight/record_batch_reader.go | 54 ++---
 go/arrow/flight/record_batch_writer.go | 23 +++--
 3 files changed, 134 insertions(+), 6 deletions(-)


[arrow] branch master updated (d920695 -> 5de2fe4)

2021-04-27 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from d920695  ARROW-12392: [C++] Restore asynchronous streaming CSV reader
 add 5de2fe4  ARROW-7948: [Go] Decimal128 Integration fix

No new revisions were added by this update.

Summary of changes:
 dev/archery/archery/integration/datagen.py |   1 -
 go/arrow/decimal128/decimal128.go  |  51 
 go/arrow/decimal128/decimal128_test.go |  93 +--
 go/arrow/go.mod|   5 +-
 go/arrow/go.sum|   9 -
 go/arrow/internal/arrjson/arrjson.go   | 410 +++--
 go/arrow/internal/arrjson/arrjson_test.go  |  93 ++-
 7 files changed, 488 insertions(+), 174 deletions(-)


[arrow] branch master updated (b2ceb8f -> 715cb57)

2021-04-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from b2ceb8f  ARROW-12104: [Go][Parquet] Second chunk of Ported Go Parquet 
code
 add 715cb57  ARROW-11999: [Java] Support parallel vector element search 
with user-specified comparator

No new revisions were added by this update.

Summary of changes:
 .../arrow/algorithm/search/ParallelSearcher.java   | 77 --
 .../algorithm/search/TestParallelSearcher.java | 52 +--
 2 files changed, 116 insertions(+), 13 deletions(-)


[arrow] branch master updated (3e5895d -> b2ceb8f)

2021-04-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 3e5895d  ARROW-12111: [Java] Generate flatbuffer files using flatc 
1.12.0
 add b2ceb8f  ARROW-12104: [Go][Parquet] Second chunk of Ported Go Parquet 
code

No new revisions were added by this update.

Summary of changes:
 dev/release/rat_exclude_files.txt  | 3 +
 go/parquet/compress/brotli.go  |   115 +
 go/parquet/compress/compress.go|   156 +
 go/parquet/compress/compress_test.go   |   138 +
 go/parquet/compress/gzip.go|98 +
 go/parquet/compress/snappy.go  |62 +
 go/parquet/compress/zstd.go|   112 +
 go/parquet/encryption_properties.go|   711 ++
 go/parquet/encryption_properties_test.go   |   217 +
 go/parquet/go.mod  | 4 +
 go/parquet/go.sum  | 8 +
 go/parquet/internal/bmi/Makefile   | 9 +-
 go/parquet/internal/bmi/bitmap_bmi2.s  | 2 +-
 go/{arrow => parquet}/internal/debug/assert_off.go | 0
 go/{arrow => parquet}/internal/debug/assert_on.go  | 4 +-
 .../internal/debug/doc.go} |13 +-
 go/parquet/internal/encryption/aes.go  |   264 +
 go/parquet/internal/encryption/decryptor.go|   261 +
 go/parquet/internal/encryption/encryptor.go|   237 +
 go/parquet/internal/encryption/key_handling.go |62 +
 .../gen-go/parquet/GoUnusedProtection__.go | 6 +
 .../internal/gen-go/parquet/parquet-consts.go  |23 +
 go/parquet/internal/gen-go/parquet/parquet.go  | 10961 +++
 .../internal/gen-go/parquet/staticcheck.conf}  | 3 +-
 go/parquet/internal/thrift/helpers.go  |87 +
 go/parquet/internal/utils/Makefile | 4 +
 go/parquet/reader_properties.go|79 +
 go/parquet/reader_writer_properties_test.go|69 +
 go/parquet/types.go|   187 +
 go/parquet/writer_properties.go|   510 +
 30 files changed, 14392 insertions(+), 13 deletions(-)
 create mode 100644 go/parquet/compress/brotli.go
 create mode 100644 go/parquet/compress/compress.go
 create mode 100644 go/parquet/compress/compress_test.go
 create mode 100644 go/parquet/compress/gzip.go
 create mode 100644 go/parquet/compress/snappy.go
 create mode 100644 go/parquet/compress/zstd.go
 create mode 100644 go/parquet/encryption_properties.go
 create mode 100644 go/parquet/encryption_properties_test.go
 copy go/{arrow => parquet}/internal/debug/assert_off.go (100%)
 copy go/{arrow => parquet}/internal/debug/assert_on.go (91%)
 copy go/{arrow/internal/debug/assert_off.go => parquet/internal/debug/doc.go} 
(75%)
 create mode 100644 go/parquet/internal/encryption/aes.go
 create mode 100644 go/parquet/internal/encryption/decryptor.go
 create mode 100644 go/parquet/internal/encryption/encryptor.go
 create mode 100644 go/parquet/internal/encryption/key_handling.go
 create mode 100644 go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
 create mode 100644 go/parquet/internal/gen-go/parquet/parquet-consts.go
 create mode 100644 go/parquet/internal/gen-go/parquet/parquet.go
 copy go/{arrow/memory/_lib/.gitignore => 
parquet/internal/gen-go/parquet/staticcheck.conf} (95%)
 create mode 100644 go/parquet/internal/thrift/helpers.go
 create mode 100644 go/parquet/reader_properties.go
 create mode 100644 go/parquet/reader_writer_properties_test.go
 create mode 100644 go/parquet/writer_properties.go


[arrow] branch master updated (d7f90ca -> 3e5895d)

2021-04-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from d7f90ca  ARROW-12400: [Rust] Re-enable tests in arrow::array::transform
 add 3e5895d  ARROW-12111: [Java] Generate flatbuffer files using flatc 
1.12.0

No new revisions were added by this update.

Summary of changes:
 java/README.md |  52 +--
 java/format/pom.xml| 120 --
 .../main/java/org/apache/arrow/flatbuf/Binary.java |  51 ++
 .../main/java/org/apache/arrow/flatbuf/Block.java  |  61 
 .../org/apache/arrow/flatbuf/BodyCompression.java  |  72 +
 .../arrow/flatbuf/BodyCompressionMethod.java   |  43 +
 .../main/java/org/apache/arrow/flatbuf/Bool.java   |  48 ++
 .../main/java/org/apache/arrow/flatbuf/Buffer.java |  63 
 .../org/apache/arrow/flatbuf/CompressionType.java  |  30 
 .../main/java/org/apache/arrow/flatbuf/Date.java   |  65 
 .../java/org/apache/arrow/flatbuf/DateUnit.java|  30 
 .../java/org/apache/arrow/flatbuf/Decimal.java |  81 ++
 .../org/apache/arrow/flatbuf/DictionaryBatch.java  |  79 ++
 .../apache/arrow/flatbuf/DictionaryEncoding.java   |  88 +++
 .../org/apache/arrow/flatbuf/DictionaryKind.java   |  36 +
 .../java/org/apache/arrow/flatbuf/Duration.java|  57 +++
 .../java/org/apache/arrow/flatbuf/Endianness.java  |  34 
 .../java/org/apache/arrow/flatbuf/Feature.java |  62 
 .../main/java/org/apache/arrow/flatbuf/Field.java  | 120 ++
 .../java/org/apache/arrow/flatbuf/FieldNode.java   |  68 
 .../org/apache/arrow/flatbuf/FixedSizeBinary.java  |  60 +++
 .../org/apache/arrow/flatbuf/FixedSizeList.java|  60 +++
 .../org/apache/arrow/flatbuf/FloatingPoint.java|  57 +++
 .../main/java/org/apache/arrow/flatbuf/Footer.java | 100 
 .../main/java/org/apache/arrow/flatbuf/Int.java|  61 
 .../java/org/apache/arrow/flatbuf/Interval.java|  57 +++
 .../org/apache/arrow/flatbuf/IntervalUnit.java |  30 
 .../java/org/apache/arrow/flatbuf/KeyValue.java|  70 +
 .../java/org/apache/arrow/flatbuf/LargeBinary.java |  52 +++
 .../java/org/apache/arrow/flatbuf/LargeList.java   |  52 +++
 .../java/org/apache/arrow/flatbuf/LargeUtf8.java   |  52 +++
 .../main/java/org/apache/arrow/flatbuf/List.java   |  48 ++
 .../main/java/org/apache/arrow/flatbuf/Map.java|  87 +++
 .../java/org/apache/arrow/flatbuf/Message.java |  81 ++
 .../org/apache/arrow/flatbuf/MessageHeader.java|  44 ++
 .../org/apache/arrow/flatbuf/MetadataVersion.java  |  54 +++
 .../main/java/org/apache/arrow/flatbuf/Null.java   |  51 ++
 .../java/org/apache/arrow/flatbuf/Precision.java   |  31 
 .../java/org/apache/arrow/flatbuf/RecordBatch.java | 103 
 .../main/java/org/apache/arrow/flatbuf/Schema.java | 102 
 .../arrow/flatbuf/SparseMatrixCompressedAxis.java  |  30 
 .../apache/arrow/flatbuf/SparseMatrixIndexCSX.java | 114 ++
 .../org/apache/arrow/flatbuf/SparseTensor.java |  92 +++
 .../apache/arrow/flatbuf/SparseTensorIndex.java|  32 
 .../apache/arrow/flatbuf/SparseTensorIndexCOO.java | 118 ++
 .../apache/arrow/flatbuf/SparseTensorIndexCSF.java | 173 +
 .../java/org/apache/arrow/flatbuf/Struct_.java |  53 +++
 .../main/java/org/apache/arrow/flatbuf/Tensor.java |  91 +++
 .../java/org/apache/arrow/flatbuf/TensorDim.java   |  74 +
 .../main/java/org/apache/arrow/flatbuf/Time.java   |  66 
 .../java/org/apache/arrow/flatbuf/TimeUnit.java|  32 
 .../java/org/apache/arrow/flatbuf/Timestamp.java   |  93 +++
 .../main/java/org/apache/arrow/flatbuf/Type.java   |  55 +++
 .../main/java/org/apache/arrow/flatbuf/Union.java  |  74 +
 .../java/org/apache/arrow/flatbuf/UnionMode.java   |  30 
 .../main/java/org/apache/arrow/flatbuf/Utf8.java   |  51 ++
 java/pom.xml   |   5 +-
 57 files changed, 3561 insertions(+), 134 deletions(-)
 create mode 100644 
java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
 create mode 100644 
java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
 create mode 100644 
java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
 create mode 100644 
java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
 create mode 100644 
java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
 create mode 100644 
java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
 create mode

[arrow] branch fix_typo updated (1c11b9f -> 63c67d4)

2021-04-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch fix_typo
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 1c11b9f  MINOR: Fix "Parametrized" typo in docs
 add 63c67d4  Update docs/source/python/extending_types.rst

No new revisions were added by this update.

Summary of changes:
 docs/source/python/extending_types.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


[arrow] branch fix_typo updated: MINOR: Fix "Parametrized" typo in docs

2021-04-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch fix_typo
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/fix_typo by this push:
 new 1c11b9f  MINOR: Fix "Parametrized" typo in docs
1c11b9f is described below

commit 1c11b9f65692c5a86125b8d06e66bf6ae80448dc
Author: emkornfield 
AuthorDate: Wed Apr 7 08:41:40 2021 -0700

MINOR: Fix "Parametrized" typo in docs
---
 docs/source/python/extending_types.rst | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/python/extending_types.rst 
b/docs/source/python/extending_types.rst
index 6613dec..d3ab561 100644
--- a/docs/source/python/extending_types.rst
+++ b/docs/source/python/extending_types.rst
@@ -131,7 +131,7 @@ We can define the same type using the other option::
 pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid")
 
 def __arrow_ext_serialize__(self):
-# since we don't have a parametrized type, we don't need extra
+# since we don't have a parameterized type, we don't need extra
 # metadata to be deserialized
 return b''
 
@@ -159,11 +159,11 @@ type to receive it.
 If the type is not registered in the receiving application, it will fall back
 to the storage type.
 
-Parametrized extension type
+Parameterized extension type
 ~~~
 
 The above example used a fixed storage type with no further metadata. But
-more flexible, parametrized extension types are also possible.
+more flexible, parameterized extension types are also possible.
 
 The example given here implements an extension type for the `pandas "period"
 data type 
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-span-representation>`__,
@@ -207,7 +207,7 @@ the reconstruction of the type instance after IPC will be 
incorrect.
 In the example above, the ``freq`` parameter is therefore stored in a private
 attribute with a public read-only property to access it.
 
-Parametrized extension types are also possible using the pickle-based type
+Parameterized extension types are also possible using the pickle-based type
 subclassing :class:`PyExtensionType`. The equivalent example for the period
 data type from above would look like::
 
@@ -224,7 +224,7 @@ data type from above would look like::
 def __reduce__(self):
 return PeriodType, (self.freq,)
 
-Also the storage type does not need to be fixed but can be parametrized.
+Also the storage type does not need to be fixed but can be parameterized.
 
 Custom extension array class
 


[arrow] branch fix_typo created (now 53696db)

2021-04-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch fix_typo
in repository https://gitbox.apache.org/repos/asf/arrow.git.


  at 53696db  ARROW-12168: [Go][IPC] Implement Compression handling for 
Arrow IPC

No new revisions were added by this update.


[arrow] branch master updated (4db1b05 -> 53696db)

2021-04-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 4db1b05  ARROW-12154: [C++][Gandiva] Fix gandiva crash in certain 
OS/CPU combinations
 add 53696db  ARROW-12168: [Go][IPC] Implement Compression handling for 
Arrow IPC

No new revisions were added by this update.

Summary of changes:
 dev/archery/archery/integration/runner.py |   1 -
 docs/source/status.rst|  10 +--
 go/arrow/go.mod   |   5 +-
 go/arrow/go.sum   |  24 +++---
 go/arrow/internal/arrdata/ioutil.go   |  83 ++-
 go/arrow/ipc/cmd/arrow-cat/main_test.go   |  14 ++--
 go/arrow/ipc/cmd/arrow-ls/main_test.go|   8 +-
 go/arrow/ipc/compression.go   | 109 +
 go/arrow/ipc/file_reader.go   |  53 ++---
 go/arrow/ipc/file_test.go |  32 
 go/arrow/ipc/file_writer.go   |  16 ++--
 go/arrow/ipc/ipc.go   |  30 +++
 go/arrow/ipc/message.go   |   1 +
 go/arrow/ipc/metadata.go  |  23 +-
 go/arrow/ipc/stream_test.go   |  49 
 go/arrow/ipc/writer.go| 127 ++
 16 files changed, 521 insertions(+), 64 deletions(-)
 create mode 100644 go/arrow/ipc/compression.go


[arrow] branch master updated (beb1c1b -> 5cabd31)

2021-04-04 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from beb1c1b  ARROW-12186: [Rust][DataFusion] Fix regexp_match test
 add 5cabd31  ARROW-12034: [Developer Tools] Formalize Minor PRs

No new revisions were added by this update.

Summary of changes:
 .github/workflows/dev_pr/title_check.js |  3 +++
 .github/workflows/dev_pr/title_check.md |  9 +++--
 CONTRIBUTING.md | 10 ++
 dev/merge_arrow_pr.py   | 11 +--
 4 files changed, 29 insertions(+), 4 deletions(-)


[arrow] branch master updated (5216e04 -> 971a9d3)

2021-03-31 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 5216e04  PARQUET-1990: [C++] Refuse to write ConvertedType::NA
 add 971a9d3  ARROW-12110: [Java] Implement ZSTD compression

No new revisions were added by this update.

Summary of changes:
 dev/archery/archery/integration/runner.py  |  7 +-
 docs/source/status.rst |  8 ++-
 java/compression/pom.xml   |  7 +-
 .../compression/CommonsCompressionFactory.java |  6 +-
 .../arrow/compression/Lz4CompressionCodec.java | 13 ++--
 .../arrow/compression/ZstdCompressionCodec.java| 74 ++
 .../arrow/compression/TestCompressionCodec.java|  4 ++
 7 files changed, 102 insertions(+), 17 deletions(-)
 create mode 100644 
java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java


[arrow] branch master updated (ab435da -> 5216e04)

2021-03-31 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from ab435da  ARROW-12143: [CI] R builds should timeout and fail after some 
threshold and dump the output.
 add 5216e04  PARQUET-1990: [C++] Refuse to write ConvertedType::NA

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/printer.cc| 12 
 cpp/src/parquet/reader_test.cc|  2 +-
 cpp/src/parquet/schema.cc | 13 +++--
 cpp/src/parquet/schema_test.cc| 14 +-
 cpp/src/parquet/thrift_internal.h |  3 +++
 cpp/src/parquet/types.cc  | 37 ++---
 cpp/src/parquet/types.h   | 29 +++--
 python/pyarrow/_parquet.pxd   |  2 +-
 python/pyarrow/_parquet.pyx   |  2 +-
 9 files changed, 67 insertions(+), 47 deletions(-)


[arrow] branch master updated: ARROW-12138: [Go][IPC] Update flatbuffers definitions

2021-03-30 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 8de898d  ARROW-12138: [Go][IPC] Update flatbuffers definitions
8de898d is described below

commit 8de898d3ce5b9dd40bd03acb046a8199fed4f38b
Author: Matthew Topol 
AuthorDate: Tue Mar 30 21:30:23 2021 -0700

ARROW-12138: [Go][IPC] Update flatbuffers definitions

Updating the generated flatbuffer code so that newer features like 
compression in IPC can get implemented. Doing the updating of the flatbuffer 
generated code first as a separate change.

@sbinet @emkornfield

Closes #9836 from zeroshade/arrow-12138

Authored-by: Matthew Topol 
Signed-off-by: Micah Kornfield 
---
 go/arrow/internal/flatbuf/Block.go |   4 +-
 go/arrow/internal/flatbuf/BodyCompression.go   |  87 ++
 go/arrow/internal/flatbuf/BodyCompressionMethod.go |  52 
 .../{MetadataVersion.go => CompressionType.go} |  34 ++-
 go/arrow/internal/flatbuf/Date.go  |   8 +-
 go/arrow/internal/flatbuf/DateUnit.go  |  22 +-
 go/arrow/internal/flatbuf/Decimal.go   |  25 +-
 go/arrow/internal/flatbuf/DictionaryBatch.go   |   6 +-
 go/arrow/internal/flatbuf/DictionaryEncoding.go|  33 ++-
 .../flatbuf/{Endianness.go => DictionaryKind.go}   |  28 +-
 go/arrow/internal/flatbuf/Duration.go  |   8 +-
 go/arrow/internal/flatbuf/Endianness.go|  22 +-
 go/arrow/internal/flatbuf/Feature.go   |  71 +
 go/arrow/internal/flatbuf/Field.go |  12 +-
 go/arrow/internal/flatbuf/FieldNode.go |   2 +-
 go/arrow/internal/flatbuf/FloatingPoint.go |   8 +-
 go/arrow/internal/flatbuf/Footer.go|  38 ++-
 go/arrow/internal/flatbuf/Interval.go  |   8 +-
 go/arrow/internal/flatbuf/IntervalUnit.go  |  22 +-
 .../flatbuf/{Duration.go => LargeBinary.go}|  33 +--
 .../internal/flatbuf/{Duration.go => LargeList.go} |  33 +--
 .../internal/flatbuf/{Duration.go => LargeUtf8.go} |  33 +--
 go/arrow/internal/flatbuf/Map.go   |   3 +-
 go/arrow/internal/flatbuf/Message.go   |  20 +-
 go/arrow/internal/flatbuf/MessageHeader.go |  42 ++-
 go/arrow/internal/flatbuf/MetadataVersion.go   |  44 +++-
 go/arrow/internal/flatbuf/Precision.go |  25 +-
 go/arrow/internal/flatbuf/RecordBatch.go   |  20 +-
 go/arrow/internal/flatbuf/Schema.go|  44 +++-
 ...ataVersion.go => SparseMatrixCompressedAxis.go} |  34 ++-
 go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go  | 200 ++
 go/arrow/internal/flatbuf/SparseTensor.go  |  24 +-
 go/arrow/internal/flatbuf/SparseTensorIndex.go |  30 ++-
 go/arrow/internal/flatbuf/SparseTensorIndexCOO.go  |  43 ++-
 go/arrow/internal/flatbuf/SparseTensorIndexCSF.go  | 291 +
 go/arrow/internal/flatbuf/Tensor.go|  14 +-
 go/arrow/internal/flatbuf/Time.go  |   8 +-
 go/arrow/internal/flatbuf/TimeUnit.go  |  30 ++-
 go/arrow/internal/flatbuf/Timestamp.go |   8 +-
 go/arrow/internal/flatbuf/Type.go  | 122 +
 go/arrow/internal/flatbuf/Union.go |  10 +-
 go/arrow/internal/flatbuf/UnionMode.go |  22 +-
 go/arrow/ipc/file_writer.go|   2 +-
 go/arrow/ipc/message.go|   4 +-
 go/arrow/ipc/metadata.go   |   4 +-
 45 files changed, 1330 insertions(+), 303 deletions(-)

diff --git a/go/arrow/internal/flatbuf/Block.go 
b/go/arrow/internal/flatbuf/Block.go
index 9172778..57a697b 100644
--- a/go/arrow/internal/flatbuf/Block.go
+++ b/go/arrow/internal/flatbuf/Block.go
@@ -54,12 +54,12 @@ func (rcv *Block) MutateMetaDataLength(n int32) bool {
 }
 
 /// Length of the data (this is aligned so there can be a gap between this and
-/// the metatdata).
+/// the metadata).
 func (rcv *Block) BodyLength() int64 {
return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(16))
 }
 /// Length of the data (this is aligned so there can be a gap between this and
-/// the metatdata).
+/// the metadata).
 func (rcv *Block) MutateBodyLength(n int64) bool {
return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(16), n)
 }
diff --git a/go/arrow/internal/flatbuf/BodyCompression.go 
b/go/arrow/internal/flatbuf/BodyCompression.go
new file mode 100644
index 000..a0efeb1
--- /dev/null
+++ b/go/arrow/internal/flatbuf/BodyCompression.go
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses th

[arrow] branch master updated: ARROW-12006: [Java] Fix checkstyle config to work on Windows

2021-03-26 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 5be6978  ARROW-12006: [Java] Fix checkstyle config to work on Windows
5be6978 is described below

commit 5be69789eeac0f2c357cfcd0d329c518848adebc
Author: Bob Tinsman 
AuthorDate: Fri Mar 26 21:21:49 2021 -0700

ARROW-12006: [Java] Fix checkstyle config to work on Windows

On Windows checkstyle will fail if you preserve LF endings but this change 
looks for LF explicitly.

Closes #9819 from bobtins/java-win-ez

Authored-by: Bob Tinsman 
Signed-off-by: Micah Kornfield 
---
 java/dev/checkstyle/checkstyle.xml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/java/dev/checkstyle/checkstyle.xml 
b/java/dev/checkstyle/checkstyle.xml
index 40c0455..c27f382 100644
--- a/java/dev/checkstyle/checkstyle.xml
+++ b/java/dev/checkstyle/checkstyle.xml
@@ -48,7 +48,9 @@
   
 
 
-
+
+
+
 
 
 


[arrow] branch master updated (b2fa55d -> 1d4856f)

2021-03-26 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from b2fa55d  ARROW-12045: [Go][Parquet] Initial Chunk of Parquet port to Go
 add 1d4856f  ARROW-11899: [Java] Refactor the compression codec 
implementation into core/Arrow specific parts

No new revisions were added by this update.

Summary of changes:
 .../arrow/compression/Lz4CompressionCodec.java |  85 ++-
 .../compression/AbstractCompressionCodec.java  | 116 +
 2 files changed, 127 insertions(+), 74 deletions(-)
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java


[arrow] branch master updated (2c5e264 -> b2fa55d)

2021-03-26 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 2c5e264  ARROW-11365: [Rust] [Parquet] Logical type printer and parser
 add b2fa55d  ARROW-12045: [Go][Parquet] Initial Chunk of Parquet port to Go

No new revisions were added by this update.

Summary of changes:
 ci/scripts/go_build.sh |7 +
 ci/scripts/go_test.sh  |8 +
 dev/release/rat_exclude_files.txt  |1 +
 go/arrow/bitutil/bitutil.go|3 +
 go/parquet/.gitignore  |   31 +
 go/{arrow => parquet}/LICENSE.txt  |0
 go/parquet/doc.go  |   68 +
 go/parquet/go.mod  |   31 +
 go/parquet/go.sum  |  147 +
 go/parquet/internal/bmi/Makefile   |   40 +
 go/parquet/internal/bmi/_lib/bitmap_bmi2.c |   30 +
 go/parquet/internal/bmi/_lib/bitmap_bmi2.s |  140 +
 go/parquet/internal/bmi/bitmap_bmi2.go |   48 +
 go/parquet/internal/bmi/bitmap_bmi2.s  |  117 +
 go/parquet/internal/bmi/bmi_init.go|   60 +
 go/parquet/internal/bmi/bmi_noasm.go   |  249 +
 go/parquet/internal/testutils/random.go|  452 ++
 go/parquet/internal/testutils/random_arrow.go  |  488 ++
 go/parquet/internal/utils/Makefile |   68 +
 go/parquet/internal/utils/_lib/arch.h  |   27 +
 go/parquet/internal/utils/_lib/bit_packing_avx2.c  | 1879 ++
 go/parquet/internal/utils/_lib/bit_packing_avx2.s  | 4012 +++
 go/parquet/internal/utils/_lib/min_max.c   |   73 +
 go/parquet/internal/utils/_lib/min_max_avx2.s  | 1366 
 go/parquet/internal/utils/_lib/min_max_sse4.s  |  613 ++
 go/parquet/internal/utils/_lib/unpack_bool.c   |   30 +
 go/parquet/internal/utils/_lib/unpack_bool_avx2.s  | 6293 ++
 go/parquet/internal/utils/_lib/unpack_bool_sse4.s  |  104 +
 go/parquet/internal/utils/bit_benchmark_test.go|  220 +
 go/parquet/internal/utils/bit_block_counter.go |  263 +
 .../internal/utils/bit_block_counter_test.go   |  201 +
 go/parquet/internal/utils/bit_packing.go   |   35 +
 go/parquet/internal/utils/bit_packing_avx2.go  |   53 +
 go/parquet/internal/utils/bit_packing_avx2.s   | 3439 ++
 go/parquet/internal/utils/bit_packing_default.go   | 1941 ++
 go/parquet/internal/utils/bit_packing_noasm.go |   23 +
 go/parquet/internal/utils/bit_reader.go|  348 +
 go/parquet/internal/utils/bit_reader_test.go   |  619 ++
 go/parquet/internal/utils/bit_run_reader.go|  148 +
 go/parquet/internal/utils/bit_run_reader_test.go   |  158 +
 go/parquet/internal/utils/bit_set_run_reader.go|  345 +
 .../internal/utils/bit_set_run_reader_test.go  |  276 +
 go/parquet/internal/utils/bit_writer.go|  182 +
 go/parquet/internal/utils/bitmap_reader.go |   72 +
 go/parquet/internal/utils/bitmap_reader_test.go|   75 +
 go/parquet/internal/utils/bitmap_writer.go |  277 +
 go/parquet/internal/utils/bitmap_writer_test.go|  304 +
 go/parquet/internal/utils/clib_amd64.s |   87 +
 go/parquet/internal/utils/dictionary.go|   87 +
 go/parquet/internal/utils/math.go  |   49 +
 go/parquet/internal/utils/min_max.go   |  120 +
 go/parquet/internal/utils/min_max_amd64.go |   43 +
 go/parquet/internal/utils/min_max_avx2.go  |   58 +
 go/parquet/internal/utils/min_max_avx2.s   | 1352 
 go/parquet/internal/utils/min_max_noasm.go |   27 +
 go/parquet/internal/utils/min_max_sse4.go  |   56 +
 go/parquet/internal/utils/min_max_sse4.s   |  592 ++
 go/parquet/internal/utils/physical_types.tmpldata  |   52 +
 go/parquet/internal/utils/rle.go   |  583 ++
 go/parquet/internal/utils/typed_rle_dict.gen.go| 1375 
 .../internal/utils/typed_rle_dict.gen.go.tmpl  |  218 +
 go/parquet/internal/utils/unpack_bool.go   |   26 +
 go/parquet/internal/utils/unpack_bool_amd64.go |   41 +
 go/parquet/internal/utils/unpack_bool_avx2.go  |   29 +
 go/parquet/internal/utils/unpack_bool_avx2.s   | 6961 
 go/parquet/internal/utils/unpack_bool_noasm.go |   25 +
 go/parquet/internal/utils/unpack_bool_sse4.go  |   29 +
 go/parquet/internal/utils/unpack_bool_sse4.s   |   88 +
 go/parquet/internal/utils/write_utils.go   |   57 +
 go/parquet/tools.go|   25 +
 go/parquet/types.go|  167 +
 71 files changed, 37511 insertions(+)
 create mode 100644 go/parquet/.gitignore
 copy go/{arrow => parquet}/LICENSE.txt (100%)
 create mode 100644 go/parquet/doc.go
 create mode 

[arrow] branch master updated (d005075 -> 8e43f23)

2021-03-23 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from d005075  ARROW-12012: [Java][JDBC] Fix BinaryConsumer reallocation
 add 8e43f23  ARROW-11497: [Python] Provide parquet enable compliant nested 
type flag for python binding

No new revisions were added by this update.

Summary of changes:
 python/pyarrow/_dataset.pyx|   4 +
 python/pyarrow/_parquet.pxd|   5 +-
 python/pyarrow/_parquet.pyx|  17 +++-
 python/pyarrow/parquet.py  |  29 ++
 .../tests/parquet/test_compliant_nested_type.py| 113 +
 5 files changed, 164 insertions(+), 4 deletions(-)
 create mode 100644 python/pyarrow/tests/parquet/test_compliant_nested_type.py


[arrow] branch master updated (946bfd9 -> 7e711c9)

2021-03-17 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 946bfd9  ARROW-11066: [FlightRPC][Java] Make zero-copy writes a 
configurable option
 add 7e711c9  ARROW-10880: [Java] Support compressing RecordBatch IPC 
buffers by LZ4

No new revisions were added by this update.

Summary of changes:
 dev/archery/archery/integration/runner.py  |   3 +-
 java/{algorithm => compression}/pom.xml|  22 +--
 .../compression/CommonsCompressionFactory.java}|  23 ++-
 .../arrow/compression/Lz4CompressionCodec.java | 157 
 .../arrow/compression/TestCompressionCodec.java| 209 +
 .../org/apache/arrow/memory/util/MemoryUtil.java   |   8 +-
 java/pom.xml   |   1 +
 java/tools/pom.xml |   5 +
 .../java/org/apache/arrow/tools/Integration.java   |   3 +-
 .../java/org/apache/arrow/tools/StreamToFile.java  |   3 +-
 .../java/org/apache/arrow/vector/VectorLoader.java |  39 +++-
 .../arrow/vector/compression/CompressionCodec.java |  23 ++-
 .../arrow/vector/compression/CompressionUtil.java  |  81 ++--
 .../vector/compression/NoCompressionCodec.java |  21 ++-
 .../apache/arrow/vector/ipc/ArrowFileReader.java   |  16 +-
 .../org/apache/arrow/vector/ipc/ArrowReader.java   |  11 +-
 .../apache/arrow/vector/ipc/ArrowStreamReader.java |  42 -
 .../arrow/vector/ipc/message/ArrowRecordBatch.java |   4 +-
 18 files changed, 605 insertions(+), 66 deletions(-)
 copy java/{algorithm => compression}/pom.xml (82%)
 copy 
java/{flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java
 => 
compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java}
 (55%)
 create mode 100644 
java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
 create mode 100644 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java



[arrow] branch master updated (3decc46 -> 946bfd9)

2021-03-17 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 3decc46  ARROW-11997: [Python] concat_tables crashes python interpreter
 add 946bfd9  ARROW-11066: [FlightRPC][Java] Make zero-copy writes a 
configurable option

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/flight/ArrowMessage.java | 114 +
 .../arrow/flight/OutboundStreamListener.java   |  16 +++
 .../arrow/flight/OutboundStreamListenerImpl.java   |   8 +-
 .../arrow/flight/grpc/AddWritableBuffer.java   |  18 +++-
 .../apache/arrow/flight/TestBasicOperation.java|   9 +-
 .../org/apache/arrow/flight/TestDoExchange.java|  57 +++
 .../arrow/flight/perf/PerformanceTestServer.java   |   3 +-
 7 files changed, 199 insertions(+), 26 deletions(-)



[arrow] branch master updated: ARROW-11559: [C++] Add regression file

2021-02-10 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new b227038  ARROW-11559: [C++] Add regression file
b227038 is described below

commit b227038b26706c61bdae5b696d2e5b2f9427cf88
Author: Antoine Pitrou 
AuthorDate: Wed Feb 10 20:37:19 2021 -0800

ARROW-11559: [C++] Add regression file

Exercise that the following OSS-Fuzz issue is fixed:
- https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=30311

Closes #9463 from pitrou/ARROW-11559-regression-file

Authored-by: Antoine Pitrou 
Signed-off-by: Micah Kornfield 
---
 testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing b/testing
index 039ca8f..e8ce323 16
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 039ca8f480bcbd87d0fea63b36cfb16ec85dccc7
+Subproject commit e8ce32338f2dfeca3a5126f7677bdee159604000



[arrow] branch master updated (dcc56cc -> 0d4af19)

2021-02-09 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from dcc56cc  ARROW-11542: [Rust] fix validity bitmap buffer length count 
in json reader
 add 0d4af19  ARROW-11559: [C++] Use smarter Flatbuffers verification 
parameters

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/ipc/metadata_internal.h | 16 ++--
 cpp/src/arrow/ipc/reader.cc   |  8 +++-
 2 files changed, 17 insertions(+), 7 deletions(-)



[arrow] branch master updated (599a63e -> d25660e)

2021-02-09 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 599a63e  ARROW-11428: [Rust] Add power_scalar kernel
 add d25660e  ARROW-7808: [Java][Dataset] Implement Dataset Java API by JNI 
to C++

No new revisions were added by this update.

Summary of changes:
 .github/workflows/java_jni.yml |   2 +-
 ci/docker/linux-apt-jni.dockerfile |   3 +-
 ci/scripts/java_test.sh|   4 +-
 cpp/.gitignore |   3 +
 cpp/src/arrow/dataset/discovery.cc |  11 +
 cpp/src/arrow/dataset/discovery.h  |  10 +
 cpp/src/jni/CMakeLists.txt |   5 +-
 cpp/src/jni/dataset/CMakeLists.txt |  65 +++
 cpp/src/jni/dataset/jni_util.cc| 242 +
 cpp/src/jni/dataset/jni_util.h | 135 +
 cpp/src/jni/dataset/jni_util_test.cc   | 134 +
 cpp/src/jni/dataset/jni_wrapper.cc | 571 +
 {cpp/src/jni => java/dataset}/CMakeLists.txt   |  27 +-
 java/dataset/pom.xml   | 134 +
 .../org/apache/arrow/dataset/file/FileFormat.java  |  36 ++
 .../dataset/file/FileSystemDatasetFactory.java |  38 ++
 .../org/apache/arrow/dataset/file/JniWrapper.java  |  47 ++
 .../dataset/jni/DirectReservationListener.java |  97 
 .../org/apache/arrow/dataset/jni/JniLoader.java|  94 
 .../org/apache/arrow/dataset/jni/JniWrapper.java   | 105 
 .../apache/arrow/dataset/jni/NativeContext.java|  53 ++
 .../apache/arrow/dataset/jni/NativeDataset.java|  56 ++
 .../arrow/dataset/jni/NativeDatasetFactory.java| 104 
 .../jni/NativeInstanceReleasedException.java   |  31 ++
 .../apache/arrow/dataset/jni/NativeMemoryPool.java |  76 +++
 .../arrow/dataset/jni/NativeRecordBatchHandle.java | 106 
 .../apache/arrow/dataset/jni/NativeScanTask.java   |  46 ++
 .../apache/arrow/dataset/jni/NativeScanner.java| 170 ++
 .../arrow/dataset/jni/ReservationListener.java |  36 ++
 .../apache/arrow/dataset/scanner/ScanOptions.java  |  44 ++
 .../org/apache/arrow/dataset/scanner/ScanTask.java |  42 ++
 .../org/apache/arrow/dataset/scanner/Scanner.java  |  41 ++
 .../org/apache/arrow/dataset/source/Dataset.java   |  35 ++
 .../arrow/dataset/source/DatasetFactory.java   |  51 ++
 .../arrow/memory/NativeUnderlyingMemory.java   |  81 +++
 .../apache/arrow/dataset/ParquetWriteSupport.java  | 123 +
 .../java/org/apache/arrow/dataset/TestDataset.java |  97 
 .../arrow/dataset/file/TestFileSystemDataset.java  | 292 +++
 .../dataset/file/TestFileSystemDatasetFactory.java |  48 ++
 .../arrow/dataset/jni/TestNativeDataset.java   |  33 ++
 .../arrow/dataset/jni/TestReservationListener.java |  95 
 .../arrow/memory/TestNativeUnderlyingMemory.java   | 110 
 .../src/test/resources/avroschema/user.avsc|  26 +
 .../org/apache/arrow/memory/AllocationManager.java |   4 +-
 java/pom.xml   |   1 +
 .../apache/arrow/vector/util/SchemaUtility.java|  63 +++
 .../java/org/apache/arrow/util/TestSchemaUtil.java |  51 ++
 47 files changed, 3667 insertions(+), 11 deletions(-)
 create mode 100644 cpp/src/jni/dataset/CMakeLists.txt
 create mode 100644 cpp/src/jni/dataset/jni_util.cc
 create mode 100644 cpp/src/jni/dataset/jni_util.h
 create mode 100644 cpp/src/jni/dataset/jni_util_test.cc
 create mode 100644 cpp/src/jni/dataset/jni_wrapper.cc
 copy {cpp/src/jni => java/dataset}/CMakeLists.txt (51%)
 create mode 100644 java/dataset/pom.xml
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java
 create mode 100644 
java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeRecordBatchHandle.java
 create mode 100644 
java/dataset/sr

[arrow] branch master updated: ARROW-11081: [Java] Make IPC option immutable

2021-02-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 6609270  ARROW-11081: [Java] Make IPC option immutable
6609270 is described below

commit 66092708abadf616ff8d8edf099b07e1228cb96e
Author: liyafan82 
AuthorDate: Sun Feb 7 15:28:43 2021 -0800

ARROW-11081: [Java] Make IPC option immutable

By making it immutable, the following benefits can be obtained:

1. It makes the code easier to reason about.
2. It allows JIT to make more optimizations.
3. Immutable objects can be shared, so many object allocations can be 
avoided.

Closes #9053 from liyafan82/fly_1231_opt

Authored-by: liyafan82 
Signed-off-by: Micah Kornfield 
---
 .../main/java/org/apache/arrow/flight/ArrowMessage.java   | 14 +++---
 .../src/main/java/org/apache/arrow/flight/FlightInfo.java |  4 ++--
 .../org/apache/arrow/flight/OutboundStreamListener.java   |  4 ++--
 .../main/java/org/apache/arrow/flight/SchemaResult.java   |  2 +-
 .../java/org/apache/arrow/flight/TestBasicOperation.java  |  4 ++--
 .../java/org/apache/arrow/flight/TestMetadataVersion.java |  7 ---
 .../org/apache/arrow/vector/ipc/ArrowStreamWriter.java|  2 +-
 .../java/org/apache/arrow/vector/ipc/ArrowWriter.java |  2 +-
 .../org/apache/arrow/vector/ipc/message/IpcOption.java| 15 +--
 .../arrow/vector/ipc/message/MessageSerializer.java   | 14 +++---
 .../apache/arrow/vector/ipc/MessageSerializerTest.java|  8 
 .../apache/arrow/vector/ipc/TestArrowReaderWriter.java|  6 +++---
 .../java/org/apache/arrow/vector/ipc/TestRoundTrip.java   |  9 +++--
 13 files changed, 50 insertions(+), 41 deletions(-)

diff --git 
a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
 
b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
index 06d3bd3..9681fa8 100644
--- 
a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
+++ 
b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
@@ -155,7 +155,7 @@ class ArrowMessage implements AutoCloseable {
   }
 
   public ArrowMessage(ArrowDictionaryBatch batch, IpcOption option) {
-this.writeOption = new IpcOption();
+this.writeOption = option;
 ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(batch, 
writeOption);
 serializedMessage = serializedMessage.slice();
 this.message = MessageMetadataResult.create(serializedMessage, 
serializedMessage.remaining());
@@ -173,7 +173,7 @@ class ArrowMessage implements AutoCloseable {
*/
   public ArrowMessage(ArrowBuf appMetadata) {
 // No need to take IpcOption as it's not used to serialize this kind of 
message.
-this.writeOption = new IpcOption();
+this.writeOption = IpcOption.DEFAULT;
 this.message = null;
 this.bufs = ImmutableList.of();
 this.descriptor = null;
@@ -183,7 +183,7 @@ class ArrowMessage implements AutoCloseable {
 
   public ArrowMessage(FlightDescriptor descriptor) {
 // No need to take IpcOption as it's not used to serialize this kind of 
message.
-this.writeOption = new IpcOption();
+this.writeOption = IpcOption.DEFAULT;
 this.message = null;
 this.bufs = ImmutableList.of();
 this.descriptor = descriptor;
@@ -194,10 +194,10 @@ class ArrowMessage implements AutoCloseable {
   private ArrowMessage(FlightDescriptor descriptor, MessageMetadataResult 
message, ArrowBuf appMetadata,
ArrowBuf buf) {
 // No need to take IpcOption as this is used for deserialized ArrowMessage 
coming from the wire.
-this.writeOption = new IpcOption();
-if (message != null) {
-  this.writeOption.metadataVersion = 
MetadataVersion.fromFlatbufID(message.getMessage().version());
-}
+this.writeOption = message != null ?
+// avoid writing legacy ipc format by default
+new IpcOption(false, 
MetadataVersion.fromFlatbufID(message.getMessage().version())) :
+IpcOption.DEFAULT;
 this.message = message;
 this.descriptor = descriptor;
 this.appMetadata = appMetadata;
diff --git 
a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java 
b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
index 8eb456b..e57b311 100644
--- 
a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
+++ 
b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
@@ -61,7 +61,7 @@ public class FlightInfo {
*/
   public FlightInfo(Schema schema, FlightDescriptor descriptor, 
List endpoints, long bytes,
   long records) {
-this(schema, descriptor, endpoints, bytes, records, new IpcOption());
+this(schema, descriptor, endpoints, bytes, records

[arrow] branch master updated (c3b972c -> 3121a8d)

2021-02-07 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from c3b972c  ARROW-11545: [Rust] [DataFusion] SendableRecordBatchStream 
should implement Sync
 add 3121a8d  ARROW-11483: [C++] Write integration JSON files compatible 
with the Java reader

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/testing/json_integration_test.cc | 85 ++
 cpp/src/arrow/testing/json_internal.cc | 42 -
 2 files changed, 113 insertions(+), 14 deletions(-)



[arrow] branch master updated (3c03531 -> ee4c5c3)

2021-01-31 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 3c03531  ARROW-11448: [C++] Fix tdigest build failure on Windows with 
Visual Studio
 add ee4c5c3  ARROW-11066: Revert "ARROW-11066: [Java][FlightRPC] fix 
zero-copy opt…

No new revisions were added by this update.

Summary of changes:
 .../main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java   | 6 +-
 .../java/org/apache/arrow/flight/perf/PerformanceTestServer.java| 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)



[arrow] branch master updated (e0b3c9a -> c3e3073)

2021-01-31 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from e0b3c9a  ARROW-10297: [Rust] Parameter for parquet-read to output data 
in json format, add "cli" feature to parquet crate
 add c3e3073  ARROW-11066: [Java][FlightRPC] fix zero-copy optimization

No new revisions were added by this update.

Summary of changes:
 .../main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java   | 6 +-
 .../java/org/apache/arrow/flight/perf/PerformanceTestServer.java| 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)



[arrow] branch master updated (f58f29d -> dfaa215)

2021-01-29 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from f58f29d  ARROW-11423: [R] value_counts and some StructArray methods
 add dfaa215  ARROW-11179: [Format] Make FB comments friendly to rust

No new revisions were added by this update.

Summary of changes:
 format/Message.fbs  |  2 +-
 format/Schema.fbs   |  5 +++--
 format/SparseTensor.fbs | 53 +
 3 files changed, 31 insertions(+), 29 deletions(-)



[arrow] branch master updated (e642354 -> 32ce8e4)

2021-01-04 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from e642354  ARROW-11096: [Rust] C data interface for [Large]binary
 add 32ce8e4  ARROW-10492: [Java][JDBC] Allow users to config the mapping 
between SQL types and Arrow types

No new revisions were added by this update.

Summary of changes:
 .../arrow/adapter/jdbc/ArrowVectorIterator.java|   9 +-
 .../arrow/adapter/jdbc/JdbcToArrowConfig.java  | 136 +++--
 .../adapter/jdbc/JdbcToArrowConfigBuilder.java |  12 +-
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java   | 220 +
 .../arrow/adapter/jdbc/JdbcToArrowConfigTest.java  |  10 +-
 .../jdbc/h2/JdbcToArrowVectorIteratorTest.java |  54 +
 6 files changed, 250 insertions(+), 191 deletions(-)



[arrow] branch master updated (3453943 -> 57829f5)

2020-12-06 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 3453943  ARROW-10746: [C++] Bump gtest version + use GTEST_SKIP in 
tests
 add 57829f5  ARROW-10748: [Java][JDBC] Support consuming timestamp data 
when time zone is not available

No new revisions were added by this update.

Summary of changes:
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java   |  8 +++-
 .../adapter/jdbc/consumer/TimestampConsumer.java   | 49 ++
 ...stampConsumer.java => TimestampTZConsumer.java} | 41 ++
 .../jdbc/h2/JdbcToArrowVectorIteratorTest.java | 46 +++-
 4 files changed, 78 insertions(+), 66 deletions(-)
 copy 
java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/{TimestampConsumer.java
 => TimestampTZConsumer.java} (65%)



[arrow] branch master updated (1e9ed5f -> 219b0dc)

2020-11-10 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 1e9ed5f  ARROW-10412: [C++] Improve grpc_cpp_plugin detection
 add 219b0dc  ARROW-10493: [C++][Parquet] Fix offset lost in 
MaybeReplaceValidity

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 35 +
 cpp/src/parquet/column_writer.cc  | 93 +--
 2 files changed, 121 insertions(+), 7 deletions(-)



[arrow] branch master updated (3ab3b4b -> 765f20c)

2020-10-27 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 3ab3b4b  ARROW-10397: [C++] Update comment to match change made in 
b1a7a73ff2
 add 765f20c  ARROW-10408: [Java] Bump Avro to 1.10.0

No new revisions were added by this update.

Summary of changes:
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



[arrow] branch master updated (c3091dd -> 3ab3b4b)

2020-10-27 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from c3091dd  ARROW-10399: [R] Fix performance regression from 
cpp11::r_string
 add 3ab3b4b  ARROW-10397: [C++] Update comment to match change made in 
b1a7a73ff2

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/ipc/metadata_internal.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)



[arrow] branch master updated (c3091dd -> 3ab3b4b)

2020-10-27 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from c3091dd  ARROW-10399: [R] Fix performance regression from 
cpp11::r_string
 add 3ab3b4b  ARROW-10397: [C++] Update comment to match change made in 
b1a7a73ff2

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/ipc/metadata_internal.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)



[arrow] branch master updated (18495e0 -> 7189b91)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs
 add 7189b91  ARROW-9475: [Java] Clean up usages of BaseAllocator, use 
BufferAllocator in…

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/memory/Accountant.java   |  3 +-
 .../org/apache/arrow/memory/AllocationManager.java | 39 --
 .../org/apache/arrow/memory/BaseAllocator.java | 16 ++---
 .../org/apache/arrow/memory/BufferAllocator.java   | 32 ++
 .../java/org/apache/arrow/memory/BufferLedger.java | 22 ++--
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/NettyAllocationManager.java   |  6 ++--
 .../org/apache/arrow/memory/TestBaseAllocator.java |  2 +-
 .../arrow/memory/TestNettyAllocationManager.java   |  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/UnsafeAllocationManager.java  |  4 +--
 12 files changed, 87 insertions(+), 45 deletions(-)



[arrow] branch master updated (18495e0 -> 7189b91)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs
 add 7189b91  ARROW-9475: [Java] Clean up usages of BaseAllocator, use 
BufferAllocator in…

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/memory/Accountant.java   |  3 +-
 .../org/apache/arrow/memory/AllocationManager.java | 39 --
 .../org/apache/arrow/memory/BaseAllocator.java | 16 ++---
 .../org/apache/arrow/memory/BufferAllocator.java   | 32 ++
 .../java/org/apache/arrow/memory/BufferLedger.java | 22 ++--
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/NettyAllocationManager.java   |  6 ++--
 .../org/apache/arrow/memory/TestBaseAllocator.java |  2 +-
 .../arrow/memory/TestNettyAllocationManager.java   |  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/UnsafeAllocationManager.java  |  4 +--
 12 files changed, 87 insertions(+), 45 deletions(-)



[arrow] branch master updated (1d10f22 -> 18495e0)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 1d10f22  ARROW-10236: [Rust] Add can_cast_types to arrow cast kernel, 
use in DataFusion
 add 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/data/ValueVectorTypes.tdd  |  2 +-
 .../src/main/codegen/templates/ComplexWriters.java  |  4 ++--
 .../codegen/templates/UnionFixedSizeListWriter.java |  2 +-
 .../src/main/codegen/templates/UnionListWriter.java |  4 ++--
 .../java/org/apache/arrow/vector/DecimalVector.java | 12 ++--
 .../arrow/vector/complex/impl/PromotableWriter.java |  2 +-
 .../apache/arrow/vector/util/DecimalUtility.java|  2 +-
 .../org/apache/arrow/vector/ITTestLargeVector.java  | 21 -
 8 files changed, 34 insertions(+), 15 deletions(-)



[arrow] branch master updated (18495e0 -> 7189b91)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs
 add 7189b91  ARROW-9475: [Java] Clean up usages of BaseAllocator, use 
BufferAllocator in…

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/memory/Accountant.java   |  3 +-
 .../org/apache/arrow/memory/AllocationManager.java | 39 --
 .../org/apache/arrow/memory/BaseAllocator.java | 16 ++---
 .../org/apache/arrow/memory/BufferAllocator.java   | 32 ++
 .../java/org/apache/arrow/memory/BufferLedger.java | 22 ++--
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/NettyAllocationManager.java   |  6 ++--
 .../org/apache/arrow/memory/TestBaseAllocator.java |  2 +-
 .../arrow/memory/TestNettyAllocationManager.java   |  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/UnsafeAllocationManager.java  |  4 +--
 12 files changed, 87 insertions(+), 45 deletions(-)



[arrow] branch master updated (1d10f22 -> 18495e0)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 1d10f22  ARROW-10236: [Rust] Add can_cast_types to arrow cast kernel, 
use in DataFusion
 add 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/data/ValueVectorTypes.tdd  |  2 +-
 .../src/main/codegen/templates/ComplexWriters.java  |  4 ++--
 .../codegen/templates/UnionFixedSizeListWriter.java |  2 +-
 .../src/main/codegen/templates/UnionListWriter.java |  4 ++--
 .../java/org/apache/arrow/vector/DecimalVector.java | 12 ++--
 .../arrow/vector/complex/impl/PromotableWriter.java |  2 +-
 .../apache/arrow/vector/util/DecimalUtility.java|  2 +-
 .../org/apache/arrow/vector/ITTestLargeVector.java  | 21 -
 8 files changed, 34 insertions(+), 15 deletions(-)



[arrow] branch master updated (1d10f22 -> 18495e0)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 1d10f22  ARROW-10236: [Rust] Add can_cast_types to arrow cast kernel, 
use in DataFusion
 add 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/data/ValueVectorTypes.tdd  |  2 +-
 .../src/main/codegen/templates/ComplexWriters.java  |  4 ++--
 .../codegen/templates/UnionFixedSizeListWriter.java |  2 +-
 .../src/main/codegen/templates/UnionListWriter.java |  4 ++--
 .../java/org/apache/arrow/vector/DecimalVector.java | 12 ++--
 .../arrow/vector/complex/impl/PromotableWriter.java |  2 +-
 .../apache/arrow/vector/util/DecimalUtility.java|  2 +-
 .../org/apache/arrow/vector/ITTestLargeVector.java  | 21 -
 8 files changed, 34 insertions(+), 15 deletions(-)



[arrow] branch master updated (18495e0 -> 7189b91)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs
 add 7189b91  ARROW-9475: [Java] Clean up usages of BaseAllocator, use 
BufferAllocator in…

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/memory/Accountant.java   |  3 +-
 .../org/apache/arrow/memory/AllocationManager.java | 39 --
 .../org/apache/arrow/memory/BaseAllocator.java | 16 ++---
 .../org/apache/arrow/memory/BufferAllocator.java   | 32 ++
 .../java/org/apache/arrow/memory/BufferLedger.java | 22 ++--
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/NettyAllocationManager.java   |  6 ++--
 .../org/apache/arrow/memory/TestBaseAllocator.java |  2 +-
 .../arrow/memory/TestNettyAllocationManager.java   |  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/UnsafeAllocationManager.java  |  4 +--
 12 files changed, 87 insertions(+), 45 deletions(-)



[arrow] branch master updated (18495e0 -> 7189b91)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs
 add 7189b91  ARROW-9475: [Java] Clean up usages of BaseAllocator, use 
BufferAllocator in…

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/memory/Accountant.java   |  3 +-
 .../org/apache/arrow/memory/AllocationManager.java | 39 --
 .../org/apache/arrow/memory/BaseAllocator.java | 16 ++---
 .../org/apache/arrow/memory/BufferAllocator.java   | 32 ++
 .../java/org/apache/arrow/memory/BufferLedger.java | 22 ++--
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/NettyAllocationManager.java   |  6 ++--
 .../org/apache/arrow/memory/TestBaseAllocator.java |  2 +-
 .../arrow/memory/TestNettyAllocationManager.java   |  2 +-
 .../memory/DefaultAllocationManagerFactory.java|  2 +-
 .../arrow/memory/UnsafeAllocationManager.java  |  4 +--
 12 files changed, 87 insertions(+), 45 deletions(-)



[arrow] branch master updated (1d10f22 -> 18495e0)

2020-10-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 1d10f22  ARROW-10236: [Rust] Add can_cast_types to arrow cast kernel, 
use in DataFusion
 add 18495e0  ARROW-10294: [Java] Resolve problems of DecimalVector APIs on 
ArrowBufs

No new revisions were added by this update.

Summary of changes:
 .../src/main/codegen/data/ValueVectorTypes.tdd  |  2 +-
 .../src/main/codegen/templates/ComplexWriters.java  |  4 ++--
 .../codegen/templates/UnionFixedSizeListWriter.java |  2 +-
 .../src/main/codegen/templates/UnionListWriter.java |  4 ++--
 .../java/org/apache/arrow/vector/DecimalVector.java | 12 ++--
 .../arrow/vector/complex/impl/PromotableWriter.java |  2 +-
 .../apache/arrow/vector/util/DecimalUtility.java|  2 +-
 .../org/apache/arrow/vector/ITTestLargeVector.java  | 21 -
 8 files changed, 34 insertions(+), 15 deletions(-)



[arrow] branch decimal256 updated: Add BasicDecimal256 Multiplication Support (PR for decimal256 branch, not master) (#8344)

2020-10-12 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/decimal256 by this push:
 new ccd88e2  Add BasicDecimal256 Multiplication Support (PR for decimal256 
branch, not master) (#8344)
ccd88e2 is described below

commit ccd88e2edeb0d40e5c0ae1261fe9f8f53f4a2a47
Author: Ezra 
AuthorDate: Mon Oct 12 14:44:28 2020 -0700

Add BasicDecimal256 Multiplication Support (PR for decimal256 branch, not 
master) (#8344)
---
 cpp/src/arrow/util/basic_decimal.cc | 167 
 cpp/src/arrow/util/basic_decimal.h  |  36 +--
 cpp/src/arrow/util/decimal.cc   |   5 +
 cpp/src/arrow/util/decimal.h|   3 +
 cpp/src/arrow/util/decimal_benchmark.cc |  16 +++
 cpp/src/arrow/util/decimal_test.cc  |  63 
 6 files changed, 245 insertions(+), 45 deletions(-)

diff --git a/cpp/src/arrow/util/basic_decimal.cc 
b/cpp/src/arrow/util/basic_decimal.cc
index ac85bd0..ea247b7 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -123,7 +123,7 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
 #ifdef ARROW_USE_NATIVE_INT128
 static constexpr uint64_t kInt64Mask = 0x;
 #else
-static constexpr uint64_t kIntMask = 0x;
+static constexpr uint64_t kInt32Mask = 0x;
 #endif
 
 // same as ScaleMultipliers[38] - 1
@@ -254,67 +254,125 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
 
 namespace {
 
-// TODO: Remove this guard once it's used by BasicDecimal256
-#ifndef ARROW_USE_NATIVE_INT128
-// This method losslessly multiplies x and y into a 128 bit unsigned integer
-// whose high bits will be stored in hi and low bits in lo.
-void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+// Convenience wrapper type over 128 bit unsigned integers. We opt not to
+// replace the uint128_t type in int128_internal.h because it would require
+// significantly more implementation work to be done. This class merely
+// provides the minimum necessary set of functions to perform 128+ bit
+// multiplication operations when there may or may not be native support.
 #ifdef ARROW_USE_NATIVE_INT128
-  const __uint128_t r = static_cast<__uint128_t>(x) * y;
-  *lo = r & kInt64Mask;
-  *hi = r >> 64;
+struct uint128_t {
+  uint128_t() {}
+  uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) << 
64) | lo) {}
+  uint128_t(const BasicDecimal128& decimal) {
+val_ = (static_cast<__uint128_t>(decimal.high_bits()) << 64) | 
decimal.low_bits();
+  }
+
+  uint64_t hi() { return val_ >> 64; }
+  uint64_t lo() { return val_ & kInt64Mask; }
+
+  uint128_t& operator+=(const uint128_t& other) {
+val_ += other.val_;
+return *this;
+  }
+
+  uint128_t& operator*=(const uint128_t& other) {
+val_ *= other.val_;
+return *this;
+  }
+
+  __uint128_t val_;
+};
+
 #else
-  // If we can't use a native fallback, perform multiplication
+// Multiply two 64 bit word components into a 128 bit result, with high bits
+// stored in hi and low bits in lo.
+inline void ExtendAndMultiply(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  // Perform multiplication on two 64 bit words x and y into a 128 bit result
   // by splitting up x and y into 32 bit high/low bit components,
   // allowing us to represent the multiplication as
   // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
-  // + x_hi * y_hi * 2^64.
+  // + x_hi * y_hi * 2^64
   //
-  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi.
+  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
   // Therefore,
   // lo_lo is (x_lo * y_lo)_lo,
   // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
   // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
   // hi_hi is (x_hi * y_hi)_hi
-  const uint64_t x_lo = x & kIntMask;
-  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_lo = x & kInt32Mask;
+  const uint64_t y_lo = y & kInt32Mask;
   const uint64_t x_hi = x >> 32;
   const uint64_t y_hi = y >> 32;
 
   const uint64_t t = x_lo * y_lo;
-  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_lo = t & kInt32Mask;
   const uint64_t t_hi = t >> 32;
 
   const uint64_t u = x_hi * y_lo + t_hi;
-  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_lo = u & kInt32Mask;
   const uint64_t u_hi = u >> 32;
 
   const uint64_t v = x_lo * y_hi + u_lo;
   const uint64_t v_hi = v >> 32;
 
   *hi = x_hi * y_hi + u_hi + v_hi;
-  *lo = (v << 32) | t_lo;
-#endif
+  *lo = (v << 32) + t_lo;
 }
-#endif
 
-void MultiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,
- 

[arrow] branch master updated (8a03003 -> e9a12fa)

2020-10-06 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 8a03003  ARROW-10201: [C++][CI] Disable S3 in arm64 job on Travis CI
 add e9a12fa  ARROW-10058: [C++] Improve repeated levels conversion without 
BMI2

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/level_conversion.cc  |  57 +---
 cpp/src/parquet/level_conversion.h   |   2 +-
 cpp/src/parquet/level_conversion_inc.h   | 242 +++
 cpp/src/parquet/level_conversion_test.cc |  24 ++-
 4 files changed, 235 insertions(+), 90 deletions(-)



[arrow] branch master updated (238a949 -> 51a3c88)

2020-10-02 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 238a949  ARROW-10160: [Rust] Improve DictionaryType documentation 
(clarify which type is which)
 add 51a3c88  ARROW-10127: Update specification for Decimal to allow for 
256-bits

No new revisions were added by this update.

Summary of changes:
 docs/source/format/CDataInterface.rst | 32 +---
 format/Schema.fbs | 10 --
 2 files changed, 21 insertions(+), 21 deletions(-)



[arrow] branch master updated (238a949 -> 51a3c88)

2020-10-02 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 238a949  ARROW-10160: [Rust] Improve DictionaryType documentation 
(clarify which type is which)
 add 51a3c88  ARROW-10127: Update specification for Decimal to allow for 
256-bits

No new revisions were added by this update.

Summary of changes:
 docs/source/format/CDataInterface.rst | 32 +---
 format/Schema.fbs | 10 --
 2 files changed, 21 insertions(+), 21 deletions(-)



[arrow] branch master updated (ad712e5 -> c68a76c)

2020-10-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from ad712e5  ARROW-10103: [Rust] Add contains kernel
 add c68a76c  ARROW-10057: [C++] Add hand-written Parquet nested tests

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 140 ++
 cpp/src/parquet/arrow/test_util.h |   6 +
 2 files changed, 146 insertions(+)



[arrow] branch decimal256 updated: ARROW-10102: [C++] Refactor BasicDecimal128 Multiplication to use unsigned helper

2020-10-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/decimal256 by this push:
 new e6dc833  ARROW-10102: [C++] Refactor BasicDecimal128 Multiplication to 
use unsigned helper
e6dc833 is described below

commit e6dc83343d47c0c7c4ecc0a547359652defd69e2
Author: Ezra 
AuthorDate: Thu Oct 1 09:00:28 2020 -0700

ARROW-10102: [C++] Refactor BasicDecimal128 Multiplication to use unsigned 
helper

Closes #8279 from Luminarys/master

Authored-by: Ezra 
Signed-off-by: Micah Kornfield 
---
 cpp/src/arrow/util/basic_decimal.cc | 110 ++--
 cpp/src/arrow/util/decimal_test.cc  |   8 +++
 2 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/cpp/src/arrow/util/basic_decimal.cc 
b/cpp/src/arrow/util/basic_decimal.cc
index 3e7daa3..ac85bd0 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -28,6 +28,7 @@
 #include 
 
 #include "arrow/util/bit_util.h"
+#include "arrow/util/int128_internal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
@@ -119,8 +120,11 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
 BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
 BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
+#ifdef ARROW_USE_NATIVE_INT128
+static constexpr uint64_t kInt64Mask = 0x;
+#else
 static constexpr uint64_t kIntMask = 0x;
-static constexpr auto kCarryBit = static_cast(1) << 
static_cast(32);
+#endif
 
 // same as ScaleMultipliers[38] - 1
 static constexpr BasicDecimal128 kMaxValue =
@@ -248,40 +252,86 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast(high_bits_) >> 32;
-  const uint64_t L1 = static_cast(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
-
-  const uint64_t R0 = static_cast(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
+namespace {
 
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
-
-  uint64_t sum = product >> 32;
-
-  product = L2 * R3;
-  sum += product;
-  high_bits_ = static_cast(sum < product ? kCarryBit : 0);
+// TODO: Remove this guard once it's used by BasicDecimal256
+#ifndef ARROW_USE_NATIVE_INT128
+// This method losslessly multiplies x and y into a 128 bit unsigned integer
+// whose high bits will be stored in hi and low bits in lo.
+void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+#ifdef ARROW_USE_NATIVE_INT128
+  const __uint128_t r = static_cast<__uint128_t>(x) * y;
+  *lo = r & kInt64Mask;
+  *hi = r >> 64;
+#else
+  // If we can't use a native fallback, perform multiplication
+  // by splitting up x and y into 32 bit high/low bit components,
+  // allowing us to represent the multiplication as
+  // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
+  // + x_hi * y_hi * 2^64.
+  //
+  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi.
+  // Therefore,
+  // lo_lo is (x_lo * y_lo)_lo,
+  // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
+  // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
+  // hi_hi is (x_hi * y_hi)_hi
+  const uint64_t x_lo = x & kIntMask;
+  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_hi = x >> 32;
+  const uint64_t y_hi = y >> 32;
+
+  const uint64_t t = x_lo * y_lo;
+  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_hi = t >> 32;
+
+  const uint64_t u = x_hi * y_lo + t_hi;
+  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_hi = u >> 32;
+
+  const uint64_t v = x_lo * y_hi + u_lo;
+  const uint64_t v_hi = v >> 32;
+
+  *hi = x_hi * y_hi + u_hi + v_hi;
+  *lo = (v << 32) | t_lo;
+#endif
+}
+#endif
 
-  product = L3 * R2;
-  sum += product;
+void MultiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,
+ uint64_t* hi, uint64_t* lo) {
+#ifdef ARROW_USE_NATIVE_INT128
+  const __uint128_t x = (static_cast<__uint128_t>(x_hi) << 64) | x_lo;
+  const __uint128_t y = (static_cast<__uint128_t>(y_hi) << 64) | y_lo;
+  const __uint128_t r = x * y;
+  *lo = r & kInt64Mask;
+  *hi = r >> 64;
+#

[arrow] branch master updated (424bcc6 -> fa44134)

2020-10-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 424bcc6  ARROW-10102: [C++] Refactor BasicDecimal128 Multiplication to 
use unsigned helper
 add fa44134  ARROW-10150: [C++] Fix crashes on invalid Parquet file

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/schema.cc  | 10 ++
 cpp/src/parquet/column_reader.cc |  4 
 testing  |  2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)



[arrow] branch master updated (4e563bf -> 424bcc6)

2020-10-01 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 4e563bf  ARROW-7372: [C++] Allow creating dictionary array from simple 
JSON
 add 424bcc6  ARROW-10102: [C++] Refactor BasicDecimal128 Multiplication to 
use unsigned helper

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/util/basic_decimal.cc | 110 ++--
 cpp/src/arrow/util/decimal_test.cc  |   8 +++
 2 files changed, 88 insertions(+), 30 deletions(-)



[arrow] branch decimal256 updated: Decimal256 java implementation with working integration tests. (#8281)

2020-09-25 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/decimal256 by this push:
 new 9965614  Decimal256 java implementation with working integration 
tests. (#8281)
9965614 is described below

commit 99656142ad139dd6f16903cec50af15877b10de6
Author: emkornfield 
AuthorDate: Fri Sep 25 20:50:45 2020 -0700

Decimal256 java implementation with working integration tests. (#8281)

This PR completes round trip between C++ and Java integration tests.
---
 cpp/src/arrow/testing/json_internal.cc |  14 ++-
 dev/archery/archery/integration/datagen.py |  30 --
 .../java/org/apache/arrow/AvroToArrowUtils.java|   2 +-
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java   |   2 +-
 java/vector/src/main/codegen/data/ArrowTypes.tdd   |   2 +-
 .../src/main/codegen/data/ValueVectorTypes.tdd |  17 +++
 .../codegen/templates/AbstractFieldWriter.java |   8 +-
 .../templates/AbstractPromotableFieldWriter.java   |  26 -
 .../src/main/codegen/templates/ArrowType.java  |   5 +-
 .../src/main/codegen/templates/ComplexCopier.java  |   9 +-
 .../src/main/codegen/templates/ComplexWriters.java |  27 ++---
 .../main/codegen/templates/DenseUnionReader.java   |   4 +-
 .../main/codegen/templates/DenseUnionVector.java   |  12 +--
 .../main/codegen/templates/DenseUnionWriter.java   |  12 +--
 .../main/codegen/templates/HolderReaderImpl.java   |   5 +
 .../src/main/codegen/templates/StructWriters.java  |   4 +-
 .../templates/UnionFixedSizeListWriter.java|  54 ++
 .../main/codegen/templates/UnionListWriter.java|  46 
 .../src/main/codegen/templates/UnionMapWriter.java |  15 +++
 .../src/main/codegen/templates/UnionReader.java|   8 +-
 .../src/main/codegen/templates/UnionVector.java|  28 ++---
 .../src/main/codegen/templates/UnionWriter.java|  50 -
 .../{DecimalVector.java => BigDecimalVector.java}  |  84 +++
 .../java/org/apache/arrow/vector/BufferLayout.java |   5 +-
 .../org/apache/arrow/vector/DecimalVector.java |   6 +-
 .../java/org/apache/arrow/vector/TypeLayout.java   |   2 +-
 .../vector/complex/impl/PromotableWriter.java  |  38 ++-
 .../apache/arrow/vector/ipc/JsonFileReader.java|  24 -
 .../apache/arrow/vector/ipc/JsonFileWriter.java|  13 ++-
 .../java/org/apache/arrow/vector/types/Types.java  |  19 
 .../apache/arrow/vector/util/DecimalUtility.java   |  65 ++-
 .../vector/validate/ValidateVectorTypeVisitor.java |   3 +-
 ...ecimalVector.java => TestBigDecimalVector.java} |  76 ++---
 .../org/apache/arrow/vector/TestDecimalVector.java |  20 ++--
 .../org/apache/arrow/vector/TestTypeLayout.java|   6 +-
 .../org/apache/arrow/vector/TestVectorAlloc.java   |   2 +-
 .../vector/complex/impl/TestComplexCopier.java |  18 ++--
 .../vector/complex/writer/TestComplexWriter.java   |   6 +-
 .../apache/arrow/vector/types/pojo/TestSchema.java |   2 +-
 .../arrow/vector/util/DecimalUtilityTest.java  | 119 +++--
 40 files changed, 592 insertions(+), 296 deletions(-)

diff --git a/cpp/src/arrow/testing/json_internal.cc 
b/cpp/src/arrow/testing/json_internal.cc
index 4a191a6..fae0e35 100644
--- a/cpp/src/arrow/testing/json_internal.cc
+++ b/cpp/src/arrow/testing/json_internal.cc
@@ -839,8 +839,20 @@ Status GetDecimal(const RjObject& json_type, 
std::shared_ptr* type) {
   ARROW_ASSIGN_OR_RAISE(const int32_t precision,
 GetMemberInt(json_type, "precision"));
   ARROW_ASSIGN_OR_RAISE(const int32_t scale, GetMemberInt(json_type, 
"scale"));
+  int32_t bit_width = 128;
+  Result maybe_bit_width = GetMemberInt(json_type, 
"bitWidth");
+  if (maybe_bit_width.ok()) {
+bit_width = maybe_bit_width.ValueOrDie();
+  }
 
-  *type = decimal(precision, scale);
+  if (bit_width == 128) {
+*type = decimal128(precision, scale);
+  } else if (bit_width == 256) {
+*type = decimal256(precision, scale);
+  } else {
+return Status::Invalid("Only 128 bit and 256 Decimals are supported. 
Received",
+   bit_width);
+  }
   return Status::OK();
 }
 
diff --git a/dev/archery/archery/integration/datagen.py 
b/dev/archery/archery/integration/datagen.py
index b740198..5d62835 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -401,7 +401,7 @@ class FloatingPointField(PrimitiveField):
 DECIMAL_PRECISION_TO_VALUE = {
 key: (1 << (8 * i - 1)) - 1 for i, key in enumerate(
 [1, 3, 5, 7, 10, 12, 15, 17, 19, 22, 24, 27, 29, 32, 34, 36,
- 38, 40, 42, 44, 50, 60, 70],
+ 40, 42, 44, 50, 60, 70],
 start=1,
 )
 }
@@ -1274,20 +1274,29 @@ def generate_null_trivial_case(batch_sizes):
 return _generate_file('null_trivi

[arrow] branch decimal256 updated: Archery C++ round trip working. Java disabled. Fix c-bridge (#8268)

2020-09-25 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/decimal256 by this push:
 new 50c956b  Archery C++ round trip working.  Java disabled.  Fix c-bridge 
(#8268)
50c956b is described below

commit 50c956bce0f38566e79ff2b7318f3da3b837d917
Author: emkornfield 
AuthorDate: Fri Sep 25 01:00:58 2020 -0700

Archery C++ round trip working.  Java disabled.  Fix c-bridge (#8268)

Archery lint issue needs to be fixed, i'll do that in a follow-up
---
 cpp/src/arrow/c/bridge.cc  | 22 ++
 cpp/src/arrow/c/bridge_test.cc |  2 ++
 cpp/src/arrow/ipc/metadata_internal.cc | 28 +---
 dev/archery/archery/integration/datagen.py | 19 ++-
 4 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 1585b50..b5af364 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -304,8 +304,15 @@ struct SchemaExporter {
   }
 
   Status Visit(const DecimalType& type) {
-return SetFormat("d:" + std::to_string(type.precision()) + "," +
- std::to_string(type.scale()));
+if (type.bit_width() == 128) {
+  // 128 is the default bit-width
+  return SetFormat("d:" + std::to_string(type.precision()) + "," +
+   std::to_string(type.scale()));
+} else {
+  return SetFormat("d:" + std::to_string(type.precision()) + "," +
+   std::to_string(type.scale()) + "," +
+   std::to_string(type.bit_width()));
+}
   }
 
   Status Visit(const BinaryType& type) { return SetFormat("z"); }
@@ -972,13 +979,20 @@ struct SchemaImporter {
   Status ProcessDecimal() {
 RETURN_NOT_OK(f_parser_.CheckNext(':'));
 ARROW_ASSIGN_OR_RAISE(auto prec_scale, 
f_parser_.ParseInts(f_parser_.Rest()));
-if (prec_scale.size() != 2) {
+// 3 elements indicates bit width was communicated as well.
+if (prec_scale.size() != 2 && prec_scale.size() != 3) {
   return f_parser_.Invalid();
 }
 if (prec_scale[0] <= 0 || prec_scale[1] <= 0) {
   return f_parser_.Invalid();
 }
-type_ = decimal(prec_scale[0], prec_scale[1]);
+if (prec_scale.size() == 2 || prec_scale[2] == 128) {
+  type_ = decimal(prec_scale[0], prec_scale[1]);
+} else if (prec_scale[2] == 256) {
+  type_ = decimal256(prec_scale[0], prec_scale[1]);
+} else {
+  return f_parser_.Invalid();
+}
 return Status::OK();
   }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 6695d6e..ecb5655 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -277,6 +277,7 @@ TEST_F(TestSchemaExport, Primitive) {
   TestPrimitive(large_utf8(), "U");
 
   TestPrimitive(decimal(16, 4), "d:16,4");
+  TestPrimitive(decimal256(16, 4), "d:16,4,256");
 }
 
 TEST_F(TestSchemaExport, Temporal) {
@@ -736,6 +737,7 @@ TEST_F(TestArrayExport, Primitive) {
   TestPrimitive(large_utf8(), R"(["foo", "bar", null])");
 
   TestPrimitive(decimal(16, 4), R"(["1234.5670", null])");
+  TestPrimitive(decimal256(16, 4), R"(["1234.5670", null])");
 }
 
 TEST_F(TestArrayExport, PrimitiveSliced) {
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc 
b/cpp/src/arrow/ipc/metadata_internal.cc
index fe43149..cb26a15 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -236,7 +236,8 @@ static inline TimeUnit::type 
FromFlatbufferUnit(flatbuf::TimeUnit unit) {
   return TimeUnit::SECOND;
 }
 
-constexpr int32_t kDecimalBitWidth = 128;
+constexpr int32_t kDecimalBitWidth128 = 128;
+constexpr int32_t kDecimalBitWidth256 = 256;
 
 Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
   const std::vector>& 
children,
@@ -273,10 +274,13 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, 
const void* type_data,
   return Status::OK();
 case flatbuf::Type::Decimal: {
   auto dec_type = static_cast(type_data);
-  if (dec_type->bitWidth() != kDecimalBitWidth) {
-return Status::Invalid("Library only supports 128-bit decimal values");
+  if (dec_type->bitWidth() == kDecimalBitWidth128) {
+return Decimal128Type::Make(dec_type->precision(), 
dec_type->scale()).Value(out);
+  } else if (dec_type->bitWidth() == kDecimalBitWidth256) {
+return Decimal256Type::Make(dec_type->precision(), 
dec_type->scale()).Value(out);
+  } else {
+return Status::Invalid("Library only supports 128-bit

[arrow] branch master updated (8563b42 -> 66aad9d)

2020-09-22 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 8563b42  PARQUET-1878: [C++] lz4 codec is not compatible with Hadoop 
Lz4Codec
 add 66aad9d  ARROW-9010: [Java] Framework and interface changes for 
RecordBatch IPC buffer compression

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/arrow/flight/ArrowMessage.java |  9 
 .../java/org/apache/arrow/vector/VectorLoader.java | 13 +++--
 .../org/apache/arrow/vector/VectorUnloader.java| 30 +--
 .../arrow/vector/compression/CompressionCodec.java | 51 ++
 .../arrow/vector/compression/CompressionUtil.java  | 60 ++
 .../vector/compression/NoCompressionCodec.java | 54 +++
 ...Serializable.java => ArrowBodyCompression.java} | 37 ++---
 .../arrow/vector/ipc/message/ArrowRecordBatch.java | 41 +--
 .../vector/ipc/message/MessageSerializer.java  |  8 ++-
 9 files changed, 283 insertions(+), 20 deletions(-)
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java
 copy 
java/vector/src/main/java/org/apache/arrow/vector/ipc/message/{FBSerializable.java
 => ArrowBodyCompression.java} (56%)



[arrow] branch master updated (ca12cd1 -> 7a532ed)

2020-09-17 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from ca12cd1  ARROW-10024: [C++][Parquet] Create nested reading benchmarks
 add 7a532ed  ARROW-8678: [C++/Python][Parquet] Remove old writer code path

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/writer.cc  | 356 +--
 cpp/src/parquet/arrow/writer.h   |   2 -
 python/pyarrow/_dataset.pyx  |   3 +-
 python/pyarrow/_parquet.pyx  |   1 +
 python/pyarrow/parquet.py|   7 +-
 python/pyarrow/tests/test_parquet.py |   8 -
 6 files changed, 5 insertions(+), 372 deletions(-)



[arrow] branch master updated (ca12cd1 -> 7a532ed)

2020-09-17 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from ca12cd1  ARROW-10024: [C++][Parquet] Create nested reading benchmarks
 add 7a532ed  ARROW-8678: [C++/Python][Parquet] Remove old writer code path

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/arrow/writer.cc  | 356 +--
 cpp/src/parquet/arrow/writer.h   |   2 -
 python/pyarrow/_dataset.pyx  |   3 +-
 python/pyarrow/_parquet.pyx  |   1 +
 python/pyarrow/parquet.py|   7 +-
 python/pyarrow/tests/test_parquet.py |   8 -
 6 files changed, 5 insertions(+), 372 deletions(-)



[arrow] branch master updated (0b83c92 -> ca12cd1)

2020-09-17 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 0b83c92  ARROW-7302: [C++] CSV: allow dictionary types in explicit 
column types
 add ca12cd1  ARROW-10024: [C++][Parquet] Create nested reading benchmarks

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/testing/random.cc  |  35 +-
 cpp/src/arrow/testing/random.h   |  49 +---
 cpp/src/parquet/arrow/reader_writer_benchmark.cc | 142 +++
 docs/source/developers/cpp/conventions.rst   |  21 
 4 files changed, 204 insertions(+), 43 deletions(-)



[arrow] branch decimal256 updated: Basic support of Decimal256 (PR for merging into decimal256 branch NOT master) (#8190)

2020-09-16 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/decimal256 by this push:
 new 12e81e6  Basic support of Decimal256 (PR for merging into decimal256 
branch NOT master) (#8190)
12e81e6 is described below

commit 12e81e666ffbadf6d817921ec052b23f2d269dad
Author: Mingyu Zhong <69326943+mingyuzh...@users.noreply.github.com>
AuthorDate: Tue Sep 15 23:02:39 2020 -0700

Basic support of Decimal256 (PR for merging into decimal256 branch NOT 
master) (#8190)

Initial merge of C++ Decimal256 functionality into decimal256 branch.
---
 c_glib/test/test-decimal128.rb   |   2 +-
 cpp/src/arrow/array/array_base.cc|   4 +
 cpp/src/arrow/array/array_decimal.cc |  18 +++-
 cpp/src/arrow/array/array_decimal.h  |  16 
 cpp/src/arrow/array/array_dict_test.cc   |  66 +-
 cpp/src/arrow/array/array_test.cc|  59 ++--
 cpp/src/arrow/array/builder_decimal.cc   |  35 +++
 cpp/src/arrow/array/builder_decimal.h|  29 ++
 cpp/src/arrow/array/concatenate.cc   |   2 +-
 cpp/src/arrow/array/validate.cc  |   7 ++
 cpp/src/arrow/builder.cc |   1 +
 cpp/src/arrow/c/bridge.cc|   2 +-
 cpp/src/arrow/compare.cc |  16 
 cpp/src/arrow/dataset/filter.cc  |   1 +
 cpp/src/arrow/ipc/json_simple.cc |  24 +++--
 cpp/src/arrow/ipc/json_simple_test.cc|  41 +
 cpp/src/arrow/pretty_print.cc|   5 +
 cpp/src/arrow/pretty_print_test.cc   |  11 ++-
 cpp/src/arrow/python/arrow_to_pandas.cc  |  25 +
 cpp/src/arrow/python/decimal.cc  |  42 +++--
 cpp/src/arrow/python/decimal.h   |  18 
 cpp/src/arrow/python/inference.cc|  13 ++-
 cpp/src/arrow/python/python_test.cc  |  67 --
 cpp/src/arrow/python/python_to_arrow.cc  |  19 +++-
 cpp/src/arrow/scalar.cc  |   8 ++
 cpp/src/arrow/scalar.h   |  11 +++
 cpp/src/arrow/scalar_test.cc |  21 -
 cpp/src/arrow/testing/gtest_util.cc  |   3 +-
 cpp/src/arrow/testing/json_internal.cc   |  25 -
 cpp/src/arrow/type.cc|  36 +++-
 cpp/src/arrow/type.h |  29 +-
 cpp/src/arrow/type_fwd.h |  26 +-
 cpp/src/arrow/type_test.cc   |  90 ++
 cpp/src/arrow/type_traits.h  |  14 ++-
 cpp/src/arrow/util/basic_decimal.cc  |  81 
 cpp/src/arrow/util/basic_decimal.h   |  56 
 cpp/src/arrow/util/decimal.cc| 132 +++
 cpp/src/arrow/util/decimal.h |  60 
 cpp/src/arrow/util/decimal_test.cc   |  91 ++
 cpp/src/arrow/visitor.cc |   3 +
 cpp/src/arrow/visitor.h  |   3 +
 cpp/src/arrow/visitor_inline.h   |   1 +
 python/pyarrow/__init__.py   |   9 +-
 python/pyarrow/array.pxi |   9 +-
 python/pyarrow/includes/libarrow.pxd |  22 -
 python/pyarrow/lib.pxd   |   9 ++
 python/pyarrow/lib.pyx   |   3 +-
 python/pyarrow/public-api.pxi|   4 +-
 python/pyarrow/scalar.pxi|  23 -
 python/pyarrow/tests/strategies.py   |  10 +-
 python/pyarrow/tests/test_array.py   |   6 +-
 python/pyarrow/tests/test_convert_builtin.py |  41 +
 python/pyarrow/tests/test_scalars.py |  23 -
 python/pyarrow/tests/test_schema.py  |   1 +
 python/pyarrow/tests/test_types.py   |  31 ++-
 python/pyarrow/types.pxi |  49 +-
 python/pyarrow/types.py  |  17 +++-
 r/R/enums.R  |  25 ++---
 58 files changed, 1280 insertions(+), 215 deletions(-)

diff --git a/c_glib/test/test-decimal128.rb b/c_glib/test/test-decimal128.rb
index 0e4bc82..98789d3 100644
--- a/c_glib/test/test-decimal128.rb
+++ b/c_glib/test/test-decimal128.rb
@@ -214,7 +214,7 @@ class TestDecimal128 < Test::Unit::TestCase
 decimal = Arrow::Decimal128.new(10)
 message =
   "[decimal128][rescale]: Invalid: " +
-  "Rescaling decimal value would cause data loss"
+  "Rescaling Decimal128 value would cause data loss"
 assert_raise(Arrow::Error::Invalid.new(message)) do
   decimal.rescale(1, -1)
 end
diff --git a/cpp/src/arrow/array/array_base.cc 
b/cpp/src/arrow/array/array_base.cc
index 0781dd4..c62d20e 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -73,6 +73,10 @@ struc

[arrow] branch decimal256 updated (4e06c1e -> d201b13)

2020-09-14 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 4e06c1e  ARROW-9711: [Rust] Add new benchmark derived from TPC-H
 add e553b73  ARROW-9743: [R] Sanitize paths in open_dataset
 add 2dcc9a1  ARROW-9654: [Rust][DataFusion] Add `EXPLAIN ` statement
 add 5677f9e  ARROW-8581: [C#] Accept and return DateTime from DateXXArray
 add 3941b66  ARROW-9739: [CI][Ruby] Don't install gem documents
 add 222859d  ARROW-9358: [Integration] remove generated_large_batch.json
 add 0d0a0cf  ARROW-9377: [Java] Support unsigned dictionary indices
 add 5d88f10  ARROW-8402: [Java] Support ValidateFull methods in Java
 add afa3eed  ARROW-9729: [Java] Disable Error Prone when project is 
imported into …
 add 597ad62  ARROW-9617: [Rust] [DataFusion] Add length of string array
 add 613ab4a  ARROW-9742: [Rust] [DataFusion] Improved DataFrame trait 
(formerly known as the Table trait)
 add 2c58141  ARROW-9758: [Rust] [DataFusion] Allow physical planner to be 
replaced
 add a94f2b3  ARROW-9673: [Rust] [DataFusion] Add a param "dialect" for 
DFParser::parse_sql
 add 58b38a6  ARROW-9618: [Rust] [DataFusion] Made it easier to write 
optimizers
 add 2e3d7ec  ARROW-9528: [Python] Honor tzinfo when converting from 
datetime
 add 9bd3d50  ARROW-9759: [Rust] [DataFusion] Implement DataFrame.sort()
 add 51e574f  ARROW-9764: [CI][Java] Fix wrong image name for push
 add 4d836ef  ARROW-9757: [Rust] [DataFusion] Add prelude.rs
 add 7593c9a  ARROW-9556: [Python][C++] Segfaults in UnionArray with null 
values
 add 1018a4f  ARROW-9517: [C++/Python] Add support for temporary 
credentials to S3Options
 add 18181fe  ARROW-9768 [Rust] [DataFusion] Rename PhysicalPlannerImpl to 
DefaultPhysicalPlanner
 add c4f8436  ARROW-9495: [C++] Equality assertions don't handle Inf / -Inf 
properly
 add 2f98d1e  ARROW-9710: [C++] Improve performance of Decimal128::ToString 
by 10x, and make the implementation reusable for Decimal256.
 add 8a0db9e  ARROW-9783: [Rust] [DataFusion] Remove aggregate expression 
data type
 add 59dbe54  ARROW-9785: [Python] Fix excessively slow S3 options test
 add d61c8a6  ARROW-9744: [Python] Fix build failure on aarch64
 add ae60bad  ARROW-9789: [C++] Don't install jemalloc in parallel
 add 197f903  ARROW-9619: [Rust] [DataFusion] Add predicate push-down
 add fa4b8d4  ARROW-9781: [C++] Fix valgrind uninitialized value warnings
 add 4db4859  ARROW-9670: [C++][FlightRPC] don't hang if Close and Read 
called simultaneously
 add 0cced8f  ARROW-9793: [Rust] [DataFusion] Fixed unit tests
 add 41fa221  ARROW-9792: [Rust] [DataFusion] Aggregate expression 
functions should not return result
 add 5abe72f  ARROW-9788: [Rust] [DataFusion] Rename SelectionExec to 
FilterExec
 add 2ebde1c  ARROW-9800: [Rust][Parquet] Remove println! when writing 
column statistics
 add 01f06cf  ARROW-9778: [Rust] [DataFusion] Implement Expr.nullable() and 
make consistent between logical and physical plans
 add 3cb0bd8  ARROW-9760: [Rust] [DataFusion] Added DataFrame::explain
 add f0f02c6  ARROW-9784: [Rust][DataFusion] Make running TPCH benchmark 
repeatable
 add 9e73081  ARROW-9733: [Rust] [DataFusion] Added support for 
COUNT/MIN/MAX on string columns
 add 25b0b1b  ARROW-9790: [Rust][Parquet] Fix PrimitiveArrayReader boundary 
conditions
 add c90ad63  ARROW-9532: [Python][Doc] Use Python3_EXECUTABLE instead of 
PYTHON_EXECUTABLE for finding Python executable
 add de8bfdd  ARROW-9808: [Python] Update read_table doc string
 add 60987f5  ARROW-8773: [Python] Preserve nullability of fields in 
schema.empty_table()
 add cb7d1c1  ARROW-9388: [C++] Division kernels
 add 0576da6  ARROW-9768: [Python] Check overflow in conversion of datetime 
objects to nanosecond timestamps
 add 5d9ccb7  ARROW-6437: [R] Add AWS SDK to system dependencies for macOS 
and Windows
 add 36d267b  [MINOR] Fix typo and use more concise word in README.md
 add 597a26e  ARROW-9807: [R] News update/version bump post-1.0.1
 add 5e7be07  ARROW-9678: [Rust] [DataFusion] Improve projection push down 
to remove unused columns
 add f98de24  ARROW-9815 [Rust] [DataFusion] Fixed deadlock caused by 
accessing the scalar functions' registry.
 add 085b44d  ARROW-9490: [Python][C++] Bug in pa.array when input mixes 
int8 with float
 add 0a698c0  ARROW-9831: [Rust][DataFusion] Fixed compilation error
 add 2e8fcd4  ARROW-9762: [Rust] [DataFusion] ExecutionContext::sql now 
returns DataFrame
 add 85f4324  ARROW-9819: [C++] Bump mimalloc to 1.6.4
 add 735c870  ARROW-9809: [Rust][DataFusion] Fixed type coercion, 
supertypes and type checking.
 add 657b3d3  ARROW-9833: [Rust] [DataFusion] TableProvider.scan now 
returns ExecutionPlan
 add d1d85db  ARROW-9

[arrow] branch decimal256 updated (4e06c1e -> d201b13)

2020-09-14 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 4e06c1e  ARROW-9711: [Rust] Add new benchmark derived from TPC-H
 add e553b73  ARROW-9743: [R] Sanitize paths in open_dataset
 add 2dcc9a1  ARROW-9654: [Rust][DataFusion] Add `EXPLAIN ` statement
 add 5677f9e  ARROW-8581: [C#] Accept and return DateTime from DateXXArray
 add 3941b66  ARROW-9739: [CI][Ruby] Don't install gem documents
 add 222859d  ARROW-9358: [Integration] remove generated_large_batch.json
 add 0d0a0cf  ARROW-9377: [Java] Support unsigned dictionary indices
 add 5d88f10  ARROW-8402: [Java] Support ValidateFull methods in Java
 add afa3eed  ARROW-9729: [Java] Disable Error Prone when project is 
imported into …
 add 597ad62  ARROW-9617: [Rust] [DataFusion] Add length of string array
 add 613ab4a  ARROW-9742: [Rust] [DataFusion] Improved DataFrame trait 
(formerly known as the Table trait)
 add 2c58141  ARROW-9758: [Rust] [DataFusion] Allow physical planner to be 
replaced
 add a94f2b3  ARROW-9673: [Rust] [DataFusion] Add a param "dialect" for 
DFParser::parse_sql
 add 58b38a6  ARROW-9618: [Rust] [DataFusion] Made it easier to write 
optimizers
 add 2e3d7ec  ARROW-9528: [Python] Honor tzinfo when converting from 
datetime
 add 9bd3d50  ARROW-9759: [Rust] [DataFusion] Implement DataFrame.sort()
 add 51e574f  ARROW-9764: [CI][Java] Fix wrong image name for push
 add 4d836ef  ARROW-9757: [Rust] [DataFusion] Add prelude.rs
 add 7593c9a  ARROW-9556: [Python][C++] Segfaults in UnionArray with null 
values
 add 1018a4f  ARROW-9517: [C++/Python] Add support for temporary 
credentials to S3Options
 add 18181fe  ARROW-9768 [Rust] [DataFusion] Rename PhysicalPlannerImpl to 
DefaultPhysicalPlanner
 add c4f8436  ARROW-9495: [C++] Equality assertions don't handle Inf / -Inf 
properly
 add 2f98d1e  ARROW-9710: [C++] Improve performance of Decimal128::ToString 
by 10x, and make the implementation reusable for Decimal256.
 add 8a0db9e  ARROW-9783: [Rust] [DataFusion] Remove aggregate expression 
data type
 add 59dbe54  ARROW-9785: [Python] Fix excessively slow S3 options test
 add d61c8a6  ARROW-9744: [Python] Fix build failure on aarch64
 add ae60bad  ARROW-9789: [C++] Don't install jemalloc in parallel
 add 197f903  ARROW-9619: [Rust] [DataFusion] Add predicate push-down
 add fa4b8d4  ARROW-9781: [C++] Fix valgrind uninitialized value warnings
 add 4db4859  ARROW-9670: [C++][FlightRPC] don't hang if Close and Read 
called simultaneously
 add 0cced8f  ARROW-9793: [Rust] [DataFusion] Fixed unit tests
 add 41fa221  ARROW-9792: [Rust] [DataFusion] Aggregate expression 
functions should not return result
 add 5abe72f  ARROW-9788: [Rust] [DataFusion] Rename SelectionExec to 
FilterExec
 add 2ebde1c  ARROW-9800: [Rust][Parquet] Remove println! when writing 
column statistics
 add 01f06cf  ARROW-9778: [Rust] [DataFusion] Implement Expr.nullable() and 
make consistent between logical and physical plans
 add 3cb0bd8  ARROW-9760: [Rust] [DataFusion] Added DataFrame::explain
 add f0f02c6  ARROW-9784: [Rust][DataFusion] Make running TPCH benchmark 
repeatable
 add 9e73081  ARROW-9733: [Rust] [DataFusion] Added support for 
COUNT/MIN/MAX on string columns
 add 25b0b1b  ARROW-9790: [Rust][Parquet] Fix PrimitiveArrayReader boundary 
conditions
 add c90ad63  ARROW-9532: [Python][Doc] Use Python3_EXECUTABLE instead of 
PYTHON_EXECUTABLE for finding Python executable
 add de8bfdd  ARROW-9808: [Python] Update read_table doc string
 add 60987f5  ARROW-8773: [Python] Preserve nullability of fields in 
schema.empty_table()
 add cb7d1c1  ARROW-9388: [C++] Division kernels
 add 0576da6  ARROW-9768: [Python] Check overflow in conversion of datetime 
objects to nanosecond timestamps
 add 5d9ccb7  ARROW-6437: [R] Add AWS SDK to system dependencies for macOS 
and Windows
 add 36d267b  [MINOR] Fix typo and use more concise word in README.md
 add 597a26e  ARROW-9807: [R] News update/version bump post-1.0.1
 add 5e7be07  ARROW-9678: [Rust] [DataFusion] Improve projection push down 
to remove unused columns
 add f98de24  ARROW-9815 [Rust] [DataFusion] Fixed deadlock caused by 
accessing the scalar functions' registry.
 add 085b44d  ARROW-9490: [Python][C++] Bug in pa.array when input mixes 
int8 with float
 add 0a698c0  ARROW-9831: [Rust][DataFusion] Fixed compilation error
 add 2e8fcd4  ARROW-9762: [Rust] [DataFusion] ExecutionContext::sql now 
returns DataFrame
 add 85f4324  ARROW-9819: [C++] Bump mimalloc to 1.6.4
 add 735c870  ARROW-9809: [Rust][DataFusion] Fixed type coercion, 
supertypes and type checking.
 add 657b3d3  ARROW-9833: [Rust] [DataFusion] TableProvider.scan now 
returns ExecutionPlan
 add d1d85db  ARROW-9

[arrow] branch master updated (c6994f1 -> cfa2363)

2020-09-13 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from c6994f1  ARROW-9980: [Rust] [Parquet] Fix clippy lints
 add cfa2363  ARROW-9737: [C++][Gandiva] Add bitwise_xor() for integers

No new revisions were added by this update.

Summary of changes:
 cpp/src/gandiva/function_registry_arithmetic.cc| 2 ++
 cpp/src/gandiva/precompiled/arithmetic_ops.cc  | 2 ++
 cpp/src/gandiva/precompiled/arithmetic_ops_test.cc | 9 +
 cpp/src/gandiva/precompiled/types.h| 2 ++
 4 files changed, 15 insertions(+)



[arrow] branch master updated (b4063cc -> 7ce498e)

2020-08-26 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from b4063cc  ARROW-9853: [RUST] Implement take kernel for dictionary arrays
 add 7ce498e  PARQUET-1904: [C++] Export file_offset in RowGroupMetaData

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/metadata.cc | 2 ++
 cpp/src/parquet/metadata.h  | 7 +++
 2 files changed, 9 insertions(+)



[arrow] branch master updated (5d88f10 -> afa3eed)

2020-08-15 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 5d88f10  ARROW-8402: [Java] Support ValidateFull methods in Java
 add afa3eed  ARROW-9729: [Java] Disable Error Prone when project is 
imported into …

No new revisions were added by this update.

Summary of changes:
 java/pom.xml | 51 ---
 1 file changed, 40 insertions(+), 11 deletions(-)



[arrow] branch master updated (0d0a0cf -> 5d88f10)

2020-08-14 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 0d0a0cf  ARROW-9377: [Java] Support unsigned dictionary indices
 add 5d88f10  ARROW-8402: [Java] Support ValidateFull methods in Java

No new revisions were added by this update.

Summary of changes:
 .../main/codegen/templates/DenseUnionVector.java   |   2 +-
 .../src/main/codegen/templates/UnionVector.java|   2 +-
 .../org/apache/arrow/vector/DurationVector.java|   8 +
 .../arrow/vector/util/ValueVectorUtility.java  |  84 +++--
 .../apache/arrow/vector/validate/ValidateUtil.java |  61 
 .../validate/ValidateVectorBufferVisitor.java  | 239 ++
 .../vector/validate/ValidateVectorDataVisitor.java | 173 ++
 .../vector/validate/ValidateVectorTypeVisitor.java | 355 +
 .../vector/testing/ValueVectorDataPopulator.java   |  32 ++
 ...eVectorVisitor.java => TestValidateVector.java} |  73 +++--
 .../vector/validate/TestValidateVectorFull.java| 234 ++
 .../validate/TestValidateVectorSchemaRoot.java | 101 ++
 .../validate/TestValidateVectorTypeVisitor.java| 301 +
 13 files changed, 1604 insertions(+), 61 deletions(-)
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
 create mode 100644 
java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
 rename 
java/vector/src/test/java/org/apache/arrow/vector/validate/{TestValidateVectorVisitor.java
 => TestValidateVector.java} (71%)
 create mode 100644 
java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java
 create mode 100644 
java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java
 create mode 100644 
java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java



[arrow] branch master updated: ARROW-9377: [Java] Support unsigned dictionary indices

2020-08-14 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 0d0a0cf  ARROW-9377: [Java] Support unsigned dictionary indices
0d0a0cf is described below

commit 0d0a0cfef367d2aefc0596b0f627ba2b594f7306
Author: liyafan82 
AuthorDate: Fri Aug 14 21:26:47 2020 -0700

ARROW-9377: [Java] Support unsigned dictionary indices

See https://issues.apache.org/jira/browse/ARROW-9377

Closes #7817 from liyafan82/fly_0721_uns

Authored-by: liyafan82 
Signed-off-by: Micah Kornfield 
---
 .../java/org/apache/arrow/vector/UInt1Vector.java  |  14 +-
 .../java/org/apache/arrow/vector/UInt2Vector.java  |   6 +
 .../java/org/apache/arrow/vector/UInt4Vector.java  |  15 +-
 .../java/org/apache/arrow/vector/UInt8Vector.java  |   6 +
 .../apache/arrow/vector/TestDictionaryVector.java  | 104 +
 .../org/apache/arrow/vector/TestValueVector.java   |  43 
 .../vector/ipc/TestUIntDictionaryRoundTrip.java| 246 +
 7 files changed, 430 insertions(+), 4 deletions(-)

diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
index f91d171..4a2e5b1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
@@ -36,6 +36,16 @@ import org.apache.arrow.vector.util.TransferPair;
  * maintained to track which elements in the vector are null.
  */
 public final class UInt1Vector extends BaseFixedWidthVector implements 
BaseIntVector {
+  /**
+   * The mask to use when promoting the unsigned byte value to an integer.
+   */
+  public static final int PROMOTION_MASK = 0xFF;
+
+  /**
+   * The maximum 8-bit unsigned integer.
+   */
+  public static final byte MAX_UINT1 = (byte) 0XFF;
+
   private static final byte TYPE_WIDTH = 1;
   private final FieldReader reader;
 
@@ -83,7 +93,7 @@ public final class UInt1Vector extends BaseFixedWidthVector 
implements BaseIntVe
*/
   public static short getNoOverflow(final ArrowBuf buffer, final int index) {
 byte b = buffer.getByte(index * TYPE_WIDTH);
-return (short) (0xFF & b);
+return (short) (PROMOTION_MASK & b);
   }
 
 
@@ -315,7 +325,7 @@ public final class UInt1Vector extends BaseFixedWidthVector 
implements BaseIntVe
 
   @Override
   public long getValueAsLong(int index) {
-return this.get(index);
+return this.get(index) & PROMOTION_MASK;
   }
 
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
index de024ff..660194b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
@@ -36,6 +36,12 @@ import org.apache.arrow.vector.util.TransferPair;
  * maintained to track which elements in the vector are null.
  */
 public final class UInt2Vector extends BaseFixedWidthVector implements 
BaseIntVector {
+
+  /**
+   * The maximum 16-bit unsigned integer.
+   */
+  public static final char MAX_UINT2 = (char) 0X;
+
   private static final byte TYPE_WIDTH = 2;
   private final FieldReader reader;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
index d749478..8166dfd 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
@@ -36,6 +36,17 @@ import org.apache.arrow.vector.util.TransferPair;
  * maintained to track which elements in the vector are null.
  */
 public final class UInt4Vector extends BaseFixedWidthVector implements 
BaseIntVector {
+
+  /**
+   * The mask to use when promoting the unsigned int value to a long int.
+   */
+  public static final long PROMOTION_MASK = 0xL;
+
+  /**
+   * The maximum 32-bit unsigned integer.
+   */
+  public static final int MAX_UINT4 = 0X;
+
   private static final byte TYPE_WIDTH = 4;
   private final FieldReader reader;
 
@@ -83,7 +94,7 @@ public final class UInt4Vector extends BaseFixedWidthVector 
implements BaseIntVe
*/
   public static long getNoOverflow(final ArrowBuf buffer, final int index) {
 long l = buffer.getInt((long) index * TYPE_WIDTH);
-return (0xL) & l;
+return PROMOTION_MASK & l;
   }
 
   /**
@@ -286,7 +297,7 @@ public final class UInt4Vector extends BaseFixedWidthVector 
implements BaseIntVe
 
   @Override
   public long getValueAsLong(int index) {
-return this.get(index);
+return this.get(index) & PROMOTION_MASK;
   }
 
   private class TransferImpl implements TransferPair {
diff --git a/java/vector/src/ma

[arrow] branch master updated: ARROW-9358: [Integration] remove generated_large_batch.json

2020-08-14 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 222859d  ARROW-9358: [Integration] remove generated_large_batch.json
222859d is described below

commit 222859de38fb0b286b1c44fbd873ca2eeb335858
Author: David Li 
AuthorDate: Fri Aug 14 21:22:40 2020 -0700

ARROW-9358: [Integration] remove generated_large_batch.json

This should speed up integration tests by moving the expensive large batch 
test to the individual Flight implementations.

Closes #7908 from lidavidm/arrow-9358

Authored-by: David Li 
Signed-off-by: Micah Kornfield 
---
 cpp/src/arrow/flight/flight_test.cc| 28 +++-
 cpp/src/arrow/flight/test_util.cc  | 29 +
 cpp/src/arrow/flight/test_util.h   |  6 ++
 dev/archery/archery/integration/datagen.py |  7 +-
 dev/archery/archery/integration/runner.py  |  5 +-
 .../apache/arrow/flight/TestBasicOperation.java| 74 +-
 6 files changed, 135 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/flight/flight_test.cc 
b/cpp/src/arrow/flight/flight_test.cc
index 3808699..cb88d85 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -354,8 +354,6 @@ class TestFlightClient : public ::testing::Test {
   template 
   void CheckDoGet(const FlightDescriptor& descr, const BatchVector& 
expected_batches,
   EndpointCheckFunc&& check_endpoints) {
-auto num_batches = static_cast(expected_batches.size());
-ASSERT_GE(num_batches, 2);
 auto expected_schema = expected_batches[0]->schema();
 
 std::unique_ptr info;
@@ -369,6 +367,13 @@ class TestFlightClient : public ::testing::Test {
 
 // By convention, fetch the first endpoint
 Ticket ticket = info->endpoints()[0].ticket;
+CheckDoGet(ticket, expected_batches);
+  }
+
+  void CheckDoGet(const Ticket& ticket, const BatchVector& expected_batches) {
+auto num_batches = static_cast(expected_batches.size());
+ASSERT_GE(num_batches, 2);
+
 std::unique_ptr stream;
 ASSERT_OK(client_->DoGet(ticket, ));
 
@@ -1105,6 +1110,15 @@ TEST_F(TestFlightClient, DoGetDicts) {
   CheckDoGet(descr, expected_batches, check_endpoints);
 }
 
+// Ensure the gRPC client is configured to allow large messages
+// Tests a 32 MiB batch
+TEST_F(TestFlightClient, DoGetLargeBatch) {
+  BatchVector expected_batches;
+  ASSERT_OK(ExampleLargeBatches(_batches));
+  Ticket ticket{"ticket-large-batch-1"};
+  CheckDoGet(ticket, expected_batches);
+}
+
 TEST_F(TestFlightClient, DoExchange) {
   auto descr = FlightDescriptor::Command("counter");
   BatchVector batches;
@@ -1515,6 +1529,16 @@ TEST_F(TestDoPut, DoPutDicts) {
   CheckDoPut(descr, schema, batches);
 }
 
+// Ensure the gRPC server is configured to allow large messages
+// Tests a 32 MiB batch
+TEST_F(TestDoPut, DoPutLargeBatch) {
+  auto descr = FlightDescriptor::Path({"large-batches"});
+  auto schema = ExampleLargeSchema();
+  BatchVector batches;
+  ASSERT_OK(ExampleLargeBatches());
+  CheckDoPut(descr, schema, batches);
+}
+
 TEST_F(TestDoPut, DoPutSizeLimit) {
   const int64_t size_limit = 4096;
   Location location;
diff --git a/cpp/src/arrow/flight/test_util.cc 
b/cpp/src/arrow/flight/test_util.cc
index 302fda1..f398a1d 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -38,6 +38,7 @@
 #include 
 
 #include "arrow/ipc/test_common.h"
+#include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/logging.h"
@@ -156,6 +157,11 @@ Status GetBatchForFlight(const Ticket& ticket, 
std::shared_ptr(batches[0]->schema(), batches);
 return Status::OK();
+  } else if (ticket.ticket == "ticket-large-batch-1") {
+BatchVector batches;
+RETURN_NOT_OK(ExampleLargeBatches());
+*out = std::make_shared(batches[0]->schema(), batches);
+return Status::OK();
   } else {
 return Status::NotImplemented("no stream implemented for ticket: " + 
ticket.ticket);
   }
@@ -504,6 +510,15 @@ std::shared_ptr ExampleDictSchema() {
   return batch->schema();
 }
 
+std::shared_ptr ExampleLargeSchema() {
+  std::vector> fields;
+  for (int i = 0; i < 128; i++) {
+const auto field_name = "f" + std::to_string(i);
+fields.push_back(arrow::field(field_name, arrow::float64()));
+  }
+  return arrow::schema(fields);
+}
+
 std::vector ExampleFlightInfo() {
   Location location1;
   Location location2;
@@ -582,6 +597,20 @@ Status ExampleNestedBatches(BatchVector* out) {
   return Status::OK();
 }
 
+Status ExampleLargeBatches(BatchVector* out) {

[arrow] branch decimal256 updated (7525286 -> 4e06c1e)

2020-08-14 Thread emkornfield
This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a change to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git.


from 7525286  ARROW-9573: [Python][Dataset] Provide 
read_table(ignore_prefixes=)
 add fd6f545  ARROW-9666: [Python][wheel][Windows] Fix wheel build for 
Windows
 add 634fcd1  ARROW-9548: [Go] Test output files are not removed correctly
 add a3596c9  ARROW-9546: [Python] Clean up Pandas Metadata Conversion test
 add 4489cb7  ARROW-9462:[Go] The Indentation after the first Record in 
arrjson writer is incorrect
 add 9c04867  ARROW-9643: [C++] Only register the SIMD variants when it's 
supported.
 add f40e287  ARROW-9536: [Java] Miss parameters in 
PlasmaOutOfMemoryException.java
 add 3d0a9d5  ARROW-9671: [C++] Fix a bug in BasicDecimal128 constructor 
that interprets uint64_t integers with highest bit set as negative.
 add 40ac6e3  ARROW-9205: [Documentation] Fix typos
 add 52d0fe6  ARROW-9429: [Python] ChunkedArray.to_numpy
 add 811d8f6  ARROW-9402: [C++] Rework portable wrappers for checked 
integer arithmetic
 add 1c375b5  ARROW-9631: [Rust] Make arrow not depend on flight
 add a84e7b0  ARROW-9652: [Rust][DataFusion] Error message rather than 
panic for external csv tables with no column defs
 add 1e48d6a  ARROW-7218: [Python] Conversion from boolean numpy scalars 
not working
 add 858059f  ARROW-9606: [C++][Dataset] Support 
`"a"_.In(<>).Assume()`
 add 37ee600  ARROW-9628: [Rust] Disable artifact caching for Mac OSX builds
 add 1b0aebe  ARROW-9598: [C++][Parquet] Fix writing nullable structs
 add e31e5d4  ARROW-9683: [Rust][DataFusion] Add debug printing to physical 
plans and associated types
 add 0e15d26  ARROW-9521: [Rust][DataFusion] Handle custom CSV file 
extensions
 add a0ec4f2  ARROW-9638: [C++][Compute] Implement mode kernel
 add 66a8f04  ARROW-9692: [Python] Fix distutils-related warning
 add e934a8a  ARROW-9602: [R] Improve cmake detection in Linux build
 add b1a30e6  ARROW-9653: [Rust][DataFusion] Do not error in planner with 
SQL has multiple group by expressions
 add aec21b2  ARROW-9696: [Rust] [DataFusion] fix nested binary expressions
 add 3fc7fe4  ARROW-9691: [Rust] [DataFusion] Make sql_statement_to_plan 
method public
 add dd98a9f  ARROW-9577: [C++] Ignore EBADF error in posix_madvise()
 add ebbe60c  ARROW-9684: [C++] Fix undefined behaviour on invalid IPC / 
Parquet input
 add e1e3188  ARROW-9659: [C++] Fix RecordBatchStreamReader when source is 
CudaBufferReader
 add faee652  ARROW-9604: [C++] Add aggregate min/max benchmark
 add 042998c  ARROW-9679: [Rust] [DataFusion] More efficient creation of 
final batch from HashAggregateExec
 add db9185e  ARROW-9695: [Rust] Improve comments on LogicalPlan enum 
variants
 add 12e31a5  ARROW-9715: [R] changelog/doc updates for 1.0.1
 add 796b050  ARROW-9700: [Python] fix create_library_symlinks for macos
 add 90d1ab7  ARROW-9721: [Packaging][Python] Update wheel dependency files
 add 69d7b2b  ARROW-9698: [C++] Remove -DNDEBUG flag leak in .pc file
 add b5955d8  ARROW-9713: [Rust] [DataFusion] Remove explicit panics
 add 9919e3e  ARROW-9712: [Rust] [DataFusion] Fix parquet error handling 
and general code improvements
 add aaf467a  ARROW-9644: [C++][Dataset] Don't apply ignore_prefixes to 
partition base_dir
 add 898bef8  ARROW-9665: [R] head/tail/take for Datasets
 add 8150008  ARROW-9722: [Rust] Shorten key lifetime for dict lookup key
 add 586c060  ARROW-9615: [Rust] Added kernel to compute length of a string.
 add 525a5e9  ARROW-9693: [CI][Docs] Nightly docs build fails
 add 570184b  ARROW-9727: [C++] Fix crashes on invalid IPC input (OSS-Fuzz)
 add 7efc4f3  ARROW-9714: [Rust] [DataFusion] Implement type coercion rule 
for limit and sort
 add b2788c5  ARROW-9725: [Rust] [DataFusion] SortExec and LimitExec re-use 
MergeExec
 add d23f0a6  ARROW-9706: [Java] Tests of TestLargeListVector correctly 
read offset
 add cf1c749  ARROW-9681: [Java] Fix test failures of Arrow Memory - Core 
on big-endian platform
 add 3368159  ARROW-9734: [Rust] [DataFusion] TableProvider.scan now 
returns partitions instead of iterators
 add ecba35c  ARROW-9726: [Rust] [DataFusion] Do not create parquet reader 
thread until execute is called
 add 2f36cc4  ARROW-9716: [Rust] [DataFusion] Implement limit on concurrent 
threads in MergeExec
 add 4e06c1e  ARROW-9711: [Rust] Add new benchmark derived from TPC-H

No new revisions were added by this update.

Summary of changes:
 .github/workflows/rust.yml |8 +-
 LICENSE.txt|   16 +
 ci/conda_env_sphinx.yml|4 +-
 ci/docker/linux-apt-docs.dockerfile|3 +-
 cpp/cmake_modules/DefineOptions.cmake

  1   2   3   4   >