This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 72c7f5d  ARROW-2454: [C++] Allow zero-array chunked arrays
72c7f5d is described below

commit 72c7f5d3bcacc6d36e309a3731deca48c3c5345d
Author: Antoine Pitrou <anto...@python.org>
AuthorDate: Mon Apr 16 17:35:20 2018 +0200

    ARROW-2454: [C++] Allow zero-array chunked arrays
    
    This allows code to be more regular and less fragile.
    
    Also fix the chunked array slicing logic.
    
    Author: Antoine Pitrou <anto...@python.org>
    
    Closes #1897 from pitrou/ARROW-2454-zero-length-chunked-arrays and squashes 
the following commits:
    
    4ad2c6f <Antoine Pitrou> ARROW-2454:  Allow zero-array chunked arrays
---
 cpp/src/arrow/table-test.cc        | 21 ++++++++++++++++++++-
 cpp/src/arrow/table.cc             | 26 +++++++++++++++++++-------
 cpp/src/arrow/table.h              |  4 +++-
 cpp/src/arrow/type.h               |  4 ++--
 python/pyarrow/tests/test_table.py |  4 ++++
 5 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index b1cf6e5..0b9f75d 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -43,7 +43,9 @@ class TestChunkedArray : public TestBase {
  protected:
   virtual void Construct() {
     one_ = std::make_shared<ChunkedArray>(arrays_one_);
-    another_ = std::make_shared<ChunkedArray>(arrays_another_);
+    if (!arrays_another_.empty()) {
+      another_ = std::make_shared<ChunkedArray>(arrays_another_);
+    }
   }
 
   ArrayVector arrays_one_;
@@ -121,6 +123,23 @@ TEST_F(TestChunkedArray, SliceEquals) {
   std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 
50);
   ASSERT_EQ(slice2->length(), 50);
   test::AssertChunkedEqual(*slice, *slice2);
+
+  // Making empty slices of a ChunkedArray
+  std::shared_ptr<ChunkedArray> slice3 = one_->Slice(one_->length(), 99);
+  ASSERT_EQ(slice3->length(), 0);
+  ASSERT_EQ(slice3->num_chunks(), 0);
+  ASSERT_TRUE(slice3->type()->Equals(one_->type()));
+
+  std::shared_ptr<ChunkedArray> slice4 = one_->Slice(10, 0);
+  ASSERT_EQ(slice4->length(), 0);
+  ASSERT_EQ(slice4->num_chunks(), 0);
+  ASSERT_TRUE(slice4->type()->Equals(one_->type()));
+
+  // Slicing an empty ChunkedArray
+  std::shared_ptr<ChunkedArray> slice5 = slice4->Slice(0, 10);
+  ASSERT_EQ(slice5->length(), 0);
+  ASSERT_EQ(slice5->num_chunks(), 0);
+  ASSERT_TRUE(slice5->type()->Equals(one_->type()));
 }
 
 class TestColumn : public TestChunkedArray {
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index f6ac6dd..8af47ea 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -39,13 +39,25 @@ namespace arrow {
 ChunkedArray::ChunkedArray(const ArrayVector& chunks) : chunks_(chunks) {
   length_ = 0;
   null_count_ = 0;
+  DCHECK_GT(chunks.size(), 0)
+      << "cannot construct ChunkedArray from empty vector and omitted type";
+  type_ = chunks[0]->type();
   for (const std::shared_ptr<Array>& chunk : chunks) {
     length_ += chunk->length();
     null_count_ += chunk->null_count();
   }
 }
 
-std::shared_ptr<DataType> ChunkedArray::type() const { return 
chunks_[0]->type(); }
+ChunkedArray::ChunkedArray(const ArrayVector& chunks,
+                           const std::shared_ptr<DataType>& type)
+    : chunks_(chunks), type_(type) {
+  length_ = 0;
+  null_count_ = 0;
+  for (const std::shared_ptr<Array>& chunk : chunks) {
+    length_ += chunk->length();
+    null_count_ += chunk->null_count();
+  }
+}
 
 bool ChunkedArray::Equals(const ChunkedArray& other) const {
   if (length_ != other.length()) {
@@ -107,20 +119,20 @@ std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t 
offset, int64_t length
   DCHECK_LE(offset, length_);
 
   int curr_chunk = 0;
-  while (offset >= chunk(curr_chunk)->length()) {
+  while (curr_chunk < num_chunks() && offset >= chunk(curr_chunk)->length()) {
     offset -= chunk(curr_chunk)->length();
     curr_chunk++;
   }
 
   ArrayVector new_chunks;
-  while (length > 0 && curr_chunk < num_chunks()) {
+  while (curr_chunk < num_chunks() && length > 0) {
     new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
     length -= chunk(curr_chunk)->length() - offset;
     offset = 0;
     curr_chunk++;
   }
 
-  return std::make_shared<ChunkedArray>(new_chunks);
+  return std::make_shared<ChunkedArray>(new_chunks, type_);
 }
 
 std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
@@ -129,15 +141,15 @@ std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t 
offset) const {
 
 Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
     : field_(field) {
-  data_ = std::make_shared<ChunkedArray>(chunks);
+  data_ = std::make_shared<ChunkedArray>(chunks, field->type());
 }
 
 Column::Column(const std::shared_ptr<Field>& field, const 
std::shared_ptr<Array>& data)
     : field_(field) {
   if (!data) {
-    data_ = std::make_shared<ChunkedArray>(ArrayVector({}));
+    data_ = std::make_shared<ChunkedArray>(ArrayVector({}), field->type());
   } else {
-    data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
+    data_ = std::make_shared<ChunkedArray>(ArrayVector({data}), field->type());
   }
 }
 
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 20d027d..32af224 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -40,6 +40,7 @@ class Status;
 class ARROW_EXPORT ChunkedArray {
  public:
   explicit ChunkedArray(const ArrayVector& chunks);
+  ChunkedArray(const ArrayVector& chunks, const std::shared_ptr<DataType>& 
type);
 
   /// \return the total length of the chunked array; computed on construction
   int64_t length() const { return length_; }
@@ -68,7 +69,7 @@ class ARROW_EXPORT ChunkedArray {
   /// \brief Slice from offset until end of the chunked array
   std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
 
-  std::shared_ptr<DataType> type() const;
+  std::shared_ptr<DataType> type() const { return type_; }
 
   bool Equals(const ChunkedArray& other) const;
   bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
@@ -77,6 +78,7 @@ class ARROW_EXPORT ChunkedArray {
   ArrayVector chunks_;
   int64_t length_;
   int64_t null_count_;
+  std::shared_ptr<DataType> type_;
 
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index ce213b9..e50760b 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -140,8 +140,8 @@ class ARROW_EXPORT DataType {
 
   // Return whether the types are equal
   //
-  // Types that are logically convertable from one to another e.g. List<UInt8>
-  // and Binary are NOT equal).
+  // Types that are logically convertible from one to another (e.g. List<UInt8>
+  // and Binary) are NOT equal.
   virtual bool Equals(const DataType& other) const;
   bool Equals(const std::shared_ptr<DataType>& other) const;
 
diff --git a/python/pyarrow/tests/test_table.py 
b/python/pyarrow/tests/test_table.py
index 8156435..5303cb2 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -44,6 +44,10 @@ def test_chunked_array_getitem():
     data_slice = data[4:-1]
     assert data_slice.to_pylist() == [5]
 
+    data_slice = data[99:99]
+    assert data_slice.type == data.type
+    assert data_slice.to_pylist() == []
+
 
 def test_column_basics():
     data = [

-- 
To stop receiving notification emails like this one, please contact
apit...@apache.org.

Reply via email to