mathyingzhou commented on a change in pull request #8648:
URL: https://github.com/apache/arrow/pull/8648#discussion_r614556421



##########
File path: cpp/src/arrow/adapters/orc/adapter_util.cc
##########
@@ -315,13 +344,662 @@ Status AppendBatch(const liborc::Type* type, 
liborc::ColumnVectorBatch* batch,
       return Status::NotImplemented("Not implemented type kind: ", kind);
   }
 }
+}  // namespace orc
+}  // namespace adapters
+}  // namespace arrow
+
+namespace {
+
+using arrow::internal::checked_cast;
+
+arrow::Status WriteBatch(const arrow::Array& parray, int64_t orc_offset,
+                         liborc::ColumnVectorBatch* column_vector_batch,
+                         bool normalized = false);
+
+// Make sure children of StructArray have appropriate null.
+std::shared_ptr<arrow::Array> NormalizeArray(const 
std::shared_ptr<arrow::Array>& array) {
+  arrow::Type::type kind = array->type_id();
+  switch (kind) {
+    case arrow::Type::type::BOOL:
+    case arrow::Type::type::INT8:
+    case arrow::Type::type::INT16:
+    case arrow::Type::type::INT32:
+    case arrow::Type::type::INT64:
+    case arrow::Type::type::FLOAT:
+    case arrow::Type::type::DOUBLE:
+    case arrow::Type::type::STRING:
+    case arrow::Type::type::LARGE_STRING:
+    case arrow::Type::type::BINARY:
+    case arrow::Type::type::LARGE_BINARY:
+    case arrow::Type::type::FIXED_SIZE_BINARY:
+    case arrow::Type::type::DATE32:
+    case arrow::Type::type::DATE64:
+    case arrow::Type::type::TIMESTAMP:
+    case arrow::Type::type::DECIMAL128: {
+      return array;
+    }
+    case arrow::Type::type::STRUCT: {
+      if (array->null_count() == 0) {
+        return array;
+      } else {
+        auto struct_array = 
std::static_pointer_cast<arrow::StructArray>(array);
+        const std::shared_ptr<arrow::Buffer> bitmap = 
struct_array->null_bitmap();
+        std::shared_ptr<arrow::DataType> struct_type = struct_array->type();
+        std::size_t size = struct_type->fields().size();
+        std::vector<std::shared_ptr<arrow::Array>> new_children(size, nullptr);
+        for (std::size_t i = 0; i < size; i++) {
+          std::shared_ptr<arrow::Array> child = struct_array->field(i);
+          const std::shared_ptr<arrow::Buffer> child_bitmap = 
child->null_bitmap();
+          std::shared_ptr<arrow::Buffer> final_child_bitmap;
+          if (child_bitmap == nullptr) {
+            final_child_bitmap = bitmap;
+          } else {
+            final_child_bitmap = arrow::internal::BitmapAnd(
+                                     arrow::default_memory_pool(), 
bitmap->data(), 0,
+                                     child_bitmap->data(), 0, 
struct_array->length(), 0)
+                                     .ValueOrDie();
+          }
+          std::shared_ptr<arrow::ArrayData> child_array_data = child->data();
+          std::vector<std::shared_ptr<arrow::Buffer>> child_buffers =
+              child_array_data->buffers;
+          child_buffers[0] = final_child_bitmap;
+          std::shared_ptr<arrow::ArrayData> new_child_array_data = 
arrow::ArrayData::Make(
+              child->type(), child->length(), child_buffers, 
child_array_data->child_data,
+              child_array_data->dictionary);
+          new_children[i] = 
NormalizeArray(arrow::MakeArray(new_child_array_data));
+        }
+        return std::make_shared<arrow::StructArray>(struct_type, 
struct_array->length(),
+                                                    new_children, bitmap);
+      }
+    }
+    case arrow::Type::type::LIST: {
+      auto list_array = std::static_pointer_cast<arrow::ListArray>(array);
+      return std::make_shared<arrow::ListArray>(
+          list_array->type(), list_array->length(), 
list_array->value_offsets(),
+          NormalizeArray(list_array->values()), list_array->null_bitmap());
+    }
+    case arrow::Type::type::LARGE_LIST: {
+      auto list_array = std::static_pointer_cast<arrow::LargeListArray>(array);
+      return std::make_shared<arrow::LargeListArray>(
+          list_array->type(), list_array->length(), 
list_array->value_offsets(),
+          NormalizeArray(list_array->values()), list_array->null_bitmap());
+    }
+    case arrow::Type::type::FIXED_SIZE_LIST: {
+      auto list_array = 
std::static_pointer_cast<arrow::FixedSizeListArray>(array);
+      return std::make_shared<arrow::FixedSizeListArray>(
+          list_array->type(), list_array->length(), 
NormalizeArray(list_array->values()),
+          list_array->null_bitmap());
+    }
+    case arrow::Type::type::MAP: {
+      auto map_array = std::static_pointer_cast<arrow::MapArray>(array);
+      return std::make_shared<arrow::MapArray>(
+          map_array->type(), map_array->length(), map_array->value_offsets(),
+          NormalizeArray(map_array->keys()), 
NormalizeArray(map_array->items()),
+          map_array->null_bitmap());
+    }
+    default: {
+      return array;
+    }
+  }
+}
+
+template <class DataType, class BatchType, typename Enable = void>
+struct Appender {};
+
+// Types for long/double-like Appender, that is, numeric, boolean or date32
+template <typename T>
+using is_generic_type =
+    std::integral_constant<bool, arrow::is_number_type<T>::value ||
+                                     std::is_same<arrow::Date32Type, T>::value 
||
+                                     arrow::is_boolean_type<T>::value>;
+template <typename T, typename R = void>
+using enable_if_generic = arrow::enable_if_t<is_generic_type<T>::value, R>;
+
+// Number-like
+template <class DataType, class BatchType>
+struct Appender<DataType, BatchType, enable_if_generic<DataType>> {
+  using ArrayType = typename arrow::TypeTraits<DataType>::ArrayType;
+  using ValueType = typename arrow::TypeTraits<DataType>::CType;
+  arrow::Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return arrow::Status::OK();
+  }
+  arrow::Status VisitValue(ValueType v) {
+    batch->data[running_orc_offset] = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return arrow::Status::OK();
+  }
+  const ArrayType& array;
+  BatchType* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Binary
+template <class DataType>
+struct Appender<DataType, liborc::StringVectorBatch> {
+  using ArrayType = typename arrow::TypeTraits<DataType>::ArrayType;
+  using COffsetType = typename arrow::TypeTraits<DataType>::OffsetType::c_type;
+  arrow::Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return arrow::Status::OK();
+  }
+  arrow::Status VisitValue(arrow::util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    COffsetType data_length = 0;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset, 
&data_length)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return arrow::Status::OK();
+  }
+  const ArrayType& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Decimal
+template <>
+struct Appender<arrow::Decimal128Type, liborc::Decimal64VectorBatch> {
+  arrow::Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return arrow::Status::OK();
+  }
+  arrow::Status VisitValue(arrow::util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    uint8_t* raw_int128 = 
const_cast<uint8_t*>(array.GetValue(running_arrow_offset));
+    int64_t* lower_bits = reinterpret_cast<int64_t*>(raw_int128);
+    batch->values[running_orc_offset] = *lower_bits;

Review comment:
       Fixed!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to