kou commented on issue #33701:
URL: https://github.com/apache/arrow/issues/33701#issuecomment-1396632780
Ah, it seems that we have more places that construct `arrow::DataType`
directly instead of using type construct function such as `arrow::int32()`. The
following patch uses type construct function but there are still some places
that construct `arrow::DataType` directly:
```diff
diff --git a/cpp/src/arrow/array/builder_nested.h
b/cpp/src/arrow/array/builder_nested.h
index 3e9328bfdf..6204aafdcb 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -185,7 +185,8 @@ class BaseListBuilder : public ArrayBuilder {
}
std::shared_ptr<DataType> type() const override {
- return
std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
+ return TypeTraits<TYPE>::type_instance(
+ value_field_->WithType(value_builder_->type()));
}
protected:
diff --git a/cpp/src/arrow/compute/exec/hash_join_dict.cc
b/cpp/src/arrow/compute/exec/hash_join_dict.cc
index 4ce89446d3..aef6244e90 100644
--- a/cpp/src/arrow/compute/exec/hash_join_dict.cc
+++ b/cpp/src/arrow/compute/exec/hash_join_dict.cc
@@ -359,7 +359,7 @@ Result<std::shared_ptr<ArrayData>>
HashJoinDictBuild::RemapOutput(
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> indices,
HashJoinDictUtil::ConvertFromInt32(
index_type_, Datum(indices32Bit),
indices32Bit.length, ctx));
- auto type = std::make_shared<DictionaryType>(index_type_, value_type_);
+ auto type = dictionary(index_type_, value_type_);
return ArrayData::Make(type, indices->length, indices->buffers, {},
unified_dictionary_);
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index c0dc747e49..ce54473c38 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -1299,7 +1299,7 @@ struct Strptime {
return Status::OK();
} else {
return Status::Invalid("Failed to parse string: '", s, "' as a
scalar of type ",
- TimestampType(self.unit).ToString());
+ timestamp(self.unit)->ToString());
}
};
RETURN_NOT_OK(VisitArraySpanInline<InType>(in, visit_value,
visit_null));
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 5873969066..e98816a418 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -415,6 +415,14 @@ struct TypeTraits<ListType> {
using OffsetBuilderType = Int32Builder;
using OffsetScalarType = Int32Scalar;
constexpr static bool is_parameter_free = false;
+ static inline std::shared_ptr<DataType> type_instance(
+ const std::shared_ptr<Field>& value_type) {
+ return list(value_type);
+ }
+ static inline std::shared_ptr<DataType> type_instance(
+ const std::shared_ptr<DataType>& value_type) {
+ return list(value_type);
+ }
};
template <>
@@ -427,6 +435,14 @@ struct TypeTraits<LargeListType> {
using OffsetBuilderType = Int64Builder;
using OffsetScalarType = Int64Scalar;
constexpr static bool is_parameter_free = false;
+ static inline std::shared_ptr<DataType> type_instance(
+ const std::shared_ptr<Field>& value_type) {
+ return large_list(value_type);
+ }
+ static inline std::shared_ptr<DataType> type_instance(
+ const std::shared_ptr<DataType>& value_type) {
+ return large_list(value_type);
+ }
};
template <>
diff --git a/cpp/src/arrow/util/byte_size.cc
b/cpp/src/arrow/util/byte_size.cc
index fe232c9acc..e43d4316c3 100644
--- a/cpp/src/arrow/util/byte_size.cc
+++ b/cpp/src/arrow/util/byte_size.cc
@@ -131,13 +131,13 @@ struct GetByteRangesArray {
return Status::OK();
}
- Status VisitFixedWidthArray(const Buffer& buffer, const FixedWidthType&
type) const {
+ Status VisitFixedWidthArray(const Buffer& buffer, const FixedWidthType*
type) const {
uint64_t data_start = reinterpret_cast<uint64_t>(buffer.data());
- uint64_t offset_bits = offset * type.bit_width();
+ uint64_t offset_bits = offset * type->bit_width();
uint64_t offset_bytes =
bit_util::RoundDown(static_cast<int64_t>(offset_bits), 8) / 8;
uint64_t end_byte =
- bit_util::RoundUp(static_cast<int64_t>(offset_bits + (length *
type.bit_width())),
- 8) /
+ bit_util::RoundUp(
+ static_cast<int64_t>(offset_bits + (length *
type->bit_width())), 8) /
8;
uint64_t length_bytes = (end_byte - offset_bytes);
RETURN_NOT_OK(range_starts->Append(data_start));
@@ -149,7 +149,7 @@ struct GetByteRangesArray {
static_assert(sizeof(uint8_t*) <= sizeof(uint64_t),
"Undefined behavior if pointer larger than uint64_t");
RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
- RETURN_NOT_OK(VisitFixedWidthArray(*input.buffers[1], type));
+ RETURN_NOT_OK(VisitFixedWidthArray(*input.buffers[1], &type));
if (input.dictionary) {
// This is slightly imprecise because we always assume the entire
dictionary is
// referenced. If this array has an offset it may only be
referencing a portion of
@@ -241,11 +241,11 @@ struct GetByteRangesArray {
Status Visit(const DenseUnionType& type) const {
// Skip validity map for DenseUnionType
// Types buffer is always int8
- RETURN_NOT_OK(VisitFixedWidthArray(
- *input.buffers[1],
*std::dynamic_pointer_cast<FixedWidthType>(int8())));
+ RETURN_NOT_OK(VisitFixedWidthArray(*input.buffers[1],
+
static_cast<FixedWidthType*>(int8().get())));
// Offsets buffer is always int32
- RETURN_NOT_OK(VisitFixedWidthArray(
- *input.buffers[2],
*std::dynamic_pointer_cast<FixedWidthType>(int32())));
+ RETURN_NOT_OK(VisitFixedWidthArray(*input.buffers[2],
+
static_cast<FixedWidthType*>(int32().get())));
// We have to loop through the types buffer to figure out the correct
// offset / length being referenced in the child arrays
@@ -278,8 +278,8 @@ struct GetByteRangesArray {
Status Visit(const SparseUnionType& type) const {
// Skip validity map for SparseUnionType
// Types buffer is always int8
- RETURN_NOT_OK(VisitFixedWidthArray(
- *input.buffers[1],
*std::dynamic_pointer_cast<FixedWidthType>(int8())));
+ RETURN_NOT_OK(VisitFixedWidthArray(*input.buffers[1],
+
static_cast<FixedWidthType*>(int8().get())));
for (int i = 0; i < type.num_fields(); i++) {
GetByteRangesArray child{*input.child_data[i],
diff --git a/cpp/src/arrow/util/value_parsing.h
b/cpp/src/arrow/util/value_parsing.h
index 5193f0af75..d444bf68ec 100644
--- a/cpp/src/arrow/util/value_parsing.h
+++ b/cpp/src/arrow/util/value_parsing.h
@@ -920,8 +920,8 @@ bool ParseValue(const T& type, const char* s, size_t
length,
template <typename T>
enable_if_parameter_free<T, bool> ParseValue(
const char* s, size_t length, typename StringConverter<T>::value_type*
out) {
- static T type;
- return StringConverter<T>{}.Convert(type, s, length, out);
+ auto type = std::static_pointer_cast<T>(TypeTraits<T>::type_singleton());
+ return StringConverter<T>{}.Convert(*type, s, length, out);
}
} // namespace internal
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index 89b4ba2e05..75b23db631 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -1262,7 +1262,7 @@ std::shared_ptr<Array> MakeSimpleArray(SEXP x) {
buffers[0] = std::move(null_bitmap);
}
- auto data = ArrayData::Make(std::make_shared<Type>(), LENGTH(x),
std::move(buffers),
+ auto data = ArrayData::Make(TypeTraits<Type>::type_singleton(),
LENGTH(x), std::move(buffers),
null_count, 0 /*offset*/);
// return the right Array class
@@ -1387,7 +1387,7 @@ bool vector_from_r_memory_impl(SEXP x, const
std::shared_ptr<DataType>& type,
buffers[0] = std::move(null_bitmap);
}
- auto data = ArrayData::Make(std::make_shared<Type>(), n,
std::move(buffers),
+ auto data = ArrayData::Make(TypeTraits<Type>::type_singleton(), n,
std::move(buffers),
null_count, 0 /*offset*/);
auto array = std::make_shared<typename
TypeTraits<Type>::ArrayType>(data);
columns[j] = std::make_shared<arrow::ChunkedArray>(array);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]