This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 884474c  ARROW-2699: [C++/Python] Add Table method that replaces a 
column with a new supplied column
884474c is described below

commit 884474ca5ca1b8da55c0b23eb7cb784c2cd9bdb4
Author: Korn, Uwe <[email protected]>
AuthorDate: Thu Jun 21 07:28:20 2018 -0400

    ARROW-2699: [C++/Python] Add Table method that replaces a column with a new 
supplied column
    
    Author: Korn, Uwe <[email protected]>
    
    Closes #2140 from xhochy/ARROW-2699 and squashes the following commits:
    
    95f264db <Korn, Uwe> Incorporate review comments
    8290c07f <Korn, Uwe> ARROW-2699:  Add Table method that replaces a column 
with a new supplied column
---
 cpp/src/arrow/table-test.cc          | 19 +++++++++++++++++++
 cpp/src/arrow/table.cc               | 18 ++++++++++++++++++
 cpp/src/arrow/table.h                |  4 ++++
 cpp/src/arrow/type.cc                | 11 +++++++++++
 cpp/src/arrow/type.h                 |  2 ++
 cpp/src/arrow/util/stl.h             | 16 ++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  4 ++++
 python/pyarrow/table.pxi             | 11 +++++++++++
 python/pyarrow/tests/test_table.py   | 18 ++++++++++++++++++
 python/pyarrow/types.pxi             | 24 ++++++++++++++++++++++++
 10 files changed, 127 insertions(+)

diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index 0b9f75d..f178e61 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -465,6 +465,25 @@ TEST_F(TestTable, RemoveColumn) {
   ASSERT_TRUE(result->Equals(*expected));
 }
 
+TEST_F(TestTable, SetColumn) {
+  const int64_t length = 10;
+  MakeExample1(length);
+
+  auto table_sp = Table::Make(schema_, columns_);
+  const Table& table = *table_sp;
+
+  std::shared_ptr<Table> result;
+  ASSERT_OK(table.SetColumn(0, table.column(1), &result));
+
+  auto ex_schema =
+      ::arrow::schema({schema_->field(1), schema_->field(1), 
schema_->field(2)});
+  std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), 
table.column(1),
+                                                     table.column(2)};
+
+  auto expected = Table::Make(ex_schema, ex_columns);
+  ASSERT_TRUE(result->Equals(*expected));
+}
+
 TEST_F(TestTable, RemoveColumnEmpty) {
   // ARROW-1865
   const int64_t length = 10;
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 0926ba8..87196e7 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -308,6 +308,24 @@ class SimpleTable : public Table {
     return Status::OK();
   }
 
+  Status SetColumn(int i, const std::shared_ptr<Column>& col,
+                   std::shared_ptr<Table>* out) const override {
+    DCHECK(col != nullptr);
+
+    if (col->length() != num_rows_) {
+      std::stringstream ss;
+      ss << "Added column's length must match table's length. Expected length "
+         << num_rows_ << " but got length " << col->length();
+      return Status::Invalid(ss.str());
+    }
+
+    std::shared_ptr<Schema> new_schema;
+    RETURN_NOT_OK(schema_->SetField(i, col->field(), &new_schema));
+
+    *out = Table::Make(new_schema, internal::ReplaceVectorElement(columns_, i, 
col));
+    return Status::OK();
+  }
+
   std::shared_ptr<Table> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->AddMetadata(metadata);
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 7fa207f..43c51a7 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -220,6 +220,10 @@ class ARROW_EXPORT Table {
   virtual Status AddColumn(int i, const std::shared_ptr<Column>& column,
                            std::shared_ptr<Table>* out) const = 0;
 
+  /// \brief Replace a column in the table, producing a new Table
+  virtual Status SetColumn(int i, const std::shared_ptr<Column>& column,
+                           std::shared_ptr<Table>* out) const = 0;
+
   /// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL)
   /// \since 0.5.0
   ///
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 695a72e..51c4bbf 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -346,6 +346,17 @@ Status Schema::AddField(int i, const 
std::shared_ptr<Field>& field,
   return Status::OK();
 }
 
+Status Schema::SetField(int i, const std::shared_ptr<Field>& field,
+                        std::shared_ptr<Schema>* out) const {
+  if (i < 0 || i > this->num_fields()) {
+    return Status::Invalid("Invalid column index to add field.");
+  }
+
+  *out = std::make_shared<Schema>(internal::ReplaceVectorElement(fields_, i, 
field),
+                                  metadata_);
+  return Status::OK();
+}
+
 bool Schema::HasMetadata() const {
   return (metadata_ != nullptr) && (metadata_->size() > 0);
 }
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 889f389..8e12880 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -776,6 +776,8 @@ class ARROW_EXPORT Schema {
   Status AddField(int i, const std::shared_ptr<Field>& field,
                   std::shared_ptr<Schema>* out) const;
   Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
+  Status SetField(int i, const std::shared_ptr<Field>& field,
+                  std::shared_ptr<Schema>* out) const;
 
   /// \brief Replace key-value metadata with new metadata
   ///
diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h
index 27c1778..163ed40 100644
--- a/cpp/src/arrow/util/stl.h
+++ b/cpp/src/arrow/util/stl.h
@@ -56,6 +56,22 @@ inline std::vector<T> AddVectorElement(const std::vector<T>& 
values, size_t inde
   return out;
 }
 
+template <typename T>
+inline std::vector<T> ReplaceVectorElement(const std::vector<T>& values, 
size_t index,
+                                           const T& new_element) {
+  DCHECK_LE(index, values.size());
+  std::vector<T> out;
+  out.reserve(values.size());
+  for (size_t i = 0; i < index; ++i) {
+    out.push_back(values[i]);
+  }
+  out.push_back(new_element);
+  for (size_t i = index + 1; i < values.size(); ++i) {
+    out.push_back(values[i]);
+  }
+  return out;
+}
+
 }  // namespace internal
 }  // namespace arrow
 
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index a6188b6..8180276 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -302,6 +302,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CStatus AddField(int i, const shared_ptr[CField]& field,
                          shared_ptr[CSchema]* out)
         CStatus RemoveField(int i, shared_ptr[CSchema]* out)
+        CStatus SetField(int i, const shared_ptr[CField]& field,
+                         shared_ptr[CSchema]* out)
 
         # Removed const in Cython so don't have to cast to get code to generate
         shared_ptr[CSchema] AddMetadata(
@@ -509,6 +511,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CStatus AddColumn(int i, const shared_ptr[CColumn]& column,
                           shared_ptr[CTable]* out)
         CStatus RemoveColumn(int i, shared_ptr[CTable]* out)
+        CStatus SetColumn(int i, const shared_ptr[CColumn]& column,
+                          shared_ptr[CTable]* out)
 
         CStatus Flatten(CMemoryPool* pool, shared_ptr[CTable]* out)
 
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 6e64d32..fe2def1 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1331,6 +1331,17 @@ cdef class Table:
 
         return pyarrow_wrap_table(c_table)
 
+    def set_column(self, int i, Column column):
+        """
+        Replace column in Table at position. Returns new table
+        """
+        cdef shared_ptr[CTable] c_table
+
+        with nogil:
+            check_status(self.table.SetColumn(i, column.sp_column, &c_table))
+
+        return pyarrow_wrap_table(c_table)
+
     def drop(self, columns):
         """
         Drop one or more columns and return a new table.
diff --git a/python/pyarrow/tests/test_table.py 
b/python/pyarrow/tests/test_table.py
index 1df57bd..3d1e3d3 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -382,6 +382,24 @@ def test_table_add_column():
     assert t4.equals(expected)
 
 
+def test_table_set_column():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+
+    col = pa.Column.from_array('d', data[1])
+    t2 = table.set_column(0, col)
+
+    expected_data = list(data)
+    expected_data[0] = data[1]
+    expected = pa.Table.from_arrays(expected_data,
+                                    names=('d', 'b', 'c'))
+    assert t2.equals(expected)
+
+
 def test_table_drop():
     """ drop one or more columns given labels"""
     a = pa.array(range(5))
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 9be2ef6..4860fe5 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -635,6 +635,30 @@ cdef class Schema:
 
         return pyarrow_wrap_schema(new_schema)
 
+    def set(self, int i, Field field):
+        """
+        Replace a field at position i in the schema.
+
+        Parameters
+        ----------
+        i: int
+        field: Field
+
+        Returns
+        -------
+        schema: Schema
+        """
+        cdef:
+            shared_ptr[CSchema] new_schema
+            shared_ptr[CField] c_field
+
+        c_field = field.sp_field
+
+        with nogil:
+            check_status(self.schema.SetField(i, c_field, &new_schema))
+
+        return pyarrow_wrap_schema(new_schema)
+
     def add_metadata(self, dict metadata):
         """
         Add metadata as dict of string keys and values to Schema

Reply via email to