bkietz commented on a change in pull request #10550:
URL: https://github.com/apache/arrow/pull/10550#discussion_r654509706
##########
File path: cpp/src/arrow/buffer_test.cc
##########
@@ -653,18 +653,78 @@ TEST(TestBufferBuilder, ResizeReserve) {
ASSERT_OK(builder.Resize(128));
ASSERT_EQ(128, builder.capacity());
+ ASSERT_EQ(9, builder.length());
// Do not shrink to fit
ASSERT_OK(builder.Resize(64, false));
ASSERT_EQ(128, builder.capacity());
+ ASSERT_EQ(9, builder.length());
// Shrink to fit
ASSERT_OK(builder.Resize(64));
ASSERT_EQ(64, builder.capacity());
+ ASSERT_EQ(9, builder.length());
// Reserve elements
ASSERT_OK(builder.Reserve(60));
ASSERT_EQ(128, builder.capacity());
+ ASSERT_EQ(9, builder.length());
+}
+
+TEST(TestBufferBuilder, Finish) {
+ const std::string data = "some data";
+ auto data_ptr = data.c_str();
+
+ for (const bool shrink_to_fit : {true, false}) {
+ SCOPED_TRACE(shrink_to_fit);
+ BufferBuilder builder;
+ ASSERT_OK(builder.Append(data_ptr, 9));
+ ASSERT_OK(builder.Append(data_ptr, 9));
+ ASSERT_EQ(18, builder.length());
+ ASSERT_EQ(64, builder.capacity());
+
+ ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+ ASSERT_EQ(buf->size(), 18);
+ ASSERT_EQ(buf->capacity(), 64);
+ }
+ for (const bool shrink_to_fit : {true, false}) {
+ // XXX would be nice to be able to write TEST_TRACE("shrink_to_fit =",
shrink_to_fit)
Review comment:
Agreed.
```c++
#define ARROW_SCOPED_TRACE(...) \
SCOPED_TRACE(StringBuilder(__VA_ARGS__))
```
##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -1816,7 +1817,8 @@ class TestPrimitiveQuantileKernel : public
::testing::Test {
for (auto interpolation : this->interpolations_) {
options.interpolation = interpolation;
ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
- ASSERT_OK(out.make_array()->ValidateFull());
+ auto out_array = out.make_array();
+ ValidateOutput(*out_array);
Review comment:
And actually: it would be simpler and not too expensive to only make the
Datum overload public.
##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -1816,7 +1817,8 @@ class TestPrimitiveQuantileKernel : public
::testing::Test {
for (auto interpolation : this->interpolations_) {
options.interpolation = interpolation;
ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
- ASSERT_OK(out.make_array()->ValidateFull());
+ auto out_array = out.make_array();
+ ValidateOutput(*out_array);
Review comment:
```suggestion
ValidateOutput(out);
```
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
##########
@@ -255,7 +255,12 @@ static bool CanCastFromDictionary(Type::type type_id) {
void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction*
func) {
// From null to this type
- DCHECK_OK(func->AddKernel(Type::NA, {null()}, out_ty, CastFromNull));
+ ScalarKernel kernel;
+ kernel.exec = CastFromNull;
+ kernel.signature = KernelSignature::Make({null()}, out_ty);
+ kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
Review comment:
Not necessary for this PR, but: the correct fix here is to modify
CastFromNull to write 0s into a preallocated validity bitmap
(NullHandling::COMPUTED_PREALLOCATE) and write the bare minimum to data
buffers. Note we can even preallocate for binary since all the strings will be
empty
##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -442,6 +442,9 @@ struct GrouperImpl : Grouper {
};
struct GrouperFastImpl : Grouper {
+ static constexpr int kBitmapPaddingForSIMD = 64; // bits
+ static constexpr int kPaddingForSIMD = 32; // bytes
Review comment:
Could you add a comment explaining why these are 64 and 32 respectively?
##########
File path: cpp/src/arrow/buffer_builder.h
##########
@@ -64,23 +64,17 @@ class ARROW_EXPORT BufferBuilder {
/// \brief Resize the buffer to the nearest multiple of 64 bytes
///
/// \param new_capacity the new capacity of the of the builder. Will be
- /// rounded up to a multiple of 64 bytes for padding \param shrink_to_fit if
- /// new capacity is smaller than the existing size, reallocate internal
- /// buffer. Set to false to avoid reallocations when shrinking the builder.
+ /// rounded up to a multiple of 64 bytes for padding
+ /// \param shrink_to_fit if new capacity is smaller than the existing,
+ /// reallocate internal buffer. Set to false to avoid reallocations when
+ /// shrinking the builder.
/// \return Status
Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
- // Resize(0) is a no-op
+ // Resize(0) is a no-op (XXX: why??)
Review comment:
Note: this comment predates the `shrink_to_fit` parameter
https://github.com/apache/arrow/commit/c8d15d467f7a1950cf08bfcc1ead2e7ab828be00#diff-2edce6fb9f08ce495de42b07497d9d43d57b12051ef5aa997ad6056eb207b70dR159-R161
`Resize(0)` causes poolbuffer to free its buffer, which at that time would
incur `std::free(nullptr)`
https://github.com/apache/arrow/blob/c8d15d467f7a1950cf08bfcc1ead2e7ab828be00/cpp/src/arrow/buffer.cc#L117
so I'd guess the intent was to avert that.
Suffice to say: I think we can delete this comment and the special case. If
anything breaks, it needs to be fixed in PoolBuffer or MemoryPool and not here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]