mathyingzhou commented on a change in pull request #8648:
URL: https://github.com/apache/arrow/pull/8648#discussion_r614561787



##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -58,6 +82,189 @@ class MemoryOutputStream : public liborc::OutputStream {
   uint64_t length_, natural_write_size_;
 };
 
+std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
+                                                      int64_t length) {
+  BufferBuilder builder;
+  int32_t offsets[length];
+  ARROW_EXPECT_OK(builder.Resize(4 * length));
+  for (int32_t i = 0; i < length; i++) {
+    offsets[i] = fixed_length * i;
+  }
+  ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
+  std::shared_ptr<Buffer> buffer;
+  ARROW_EXPECT_OK(builder.Finish(&buffer));
+  return buffer;
+}
+
+std::shared_ptr<Array> CastFixedSizeBinaryArrayToBinaryArray(
+    std::shared_ptr<Array> array) {
+  auto fixed_size_binary_array = 
std::static_pointer_cast<FixedSizeBinaryArray>(array);
+  std::shared_ptr<Buffer> value_offsets = GenerateFixedDifferenceBuffer(
+      fixed_size_binary_array->byte_width(), array->length() + 1);
+  return std::make_shared<BinaryArray>(array->length(), value_offsets,
+                                       array->data()->buffers[1],
+                                       array->data()->buffers[0]);
+}
+
+template <typename TargetArrayType>
+std::shared_ptr<Array> CastInt64ArrayToTemporalArray(
+    const std::shared_ptr<DataType>& type, std::shared_ptr<Array> array) {
+  std::shared_ptr<ArrayData> new_array_data =
+      ArrayData::Make(type, array->length(), array->data()->buffers);
+  return std::make_shared<TargetArrayType>(new_array_data);
+}
+
+/// \brief Construct a random weak composition of a nonnegative integer
+/// i.e. a way of writing it as the sum of a sequence of n non-negative
+/// integers.
+///
+/// \param[in] n the number of integers in the weak composition
+/// \param[in] sum the integer of which a random weak composition is generated
+/// \param[out] out The generated weak composition
+template <typename T, typename U>
+void RandWeakComposition(int64_t n, T sum, std::vector<U>* out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  out->resize(n, static_cast<T>(0));
+  T remaining_sum = sum;
+  std::generate(out->begin(), out->end() - 1, [&gen, &remaining_sum] {
+    std::uniform_int_distribution<T> d(static_cast<T>(0), remaining_sum);
+    auto res = d(gen);
+    remaining_sum -= res;
+    return static_cast<U>(res);
+  });
+  (*out)[n - 1] += remaining_sum;
+  std::random_shuffle(out->begin(), out->end());
+}
+
+Result<std::shared_ptr<Array>> GenerateRandomDate64Array(int64_t size,
+                                                         double 
null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  return CastInt64ArrayToTemporalArray<Date64Array>(
+      date64(), rand.Int64(size, kMilliMin, kMilliMax, null_probability));
+}
+
+Result<std::shared_ptr<Array>> GenerateRandomTimestampArray(int64_t size,
+                                                            
arrow::TimeUnit::type type,
+                                                            double 
null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  switch (type) {
+    case arrow::TimeUnit::type::SECOND: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::SECOND),
+          rand.Int64(size, kSecondMin, kSecondMax, null_probability));
+    }
+    case arrow::TimeUnit::type::MILLI: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::MILLI),
+          rand.Int64(size, kMilliMin, kMilliMax, null_probability));
+    }
+    case arrow::TimeUnit::type::MICRO: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::MICRO),
+          rand.Int64(size, kMicroMin, kMicroMax, null_probability));
+    }
+    case arrow::TimeUnit::type::NANO: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::NANO),
+          rand.Int64(size, kNanoMin, kNanoMax, null_probability));
+    }
+    default: {
+      return arrow::Status::Invalid("Unknown or unsupported Arrow TimeUnit: ", 
type);
+    }
+  }
+}
+
+std::shared_ptr<ChunkedArray> GenerateRandomChunkedArray(
+    const std::shared_ptr<DataType>& data_type, int64_t size, int64_t 
min_num_chunks,
+    int64_t max_num_chunks, double null_probability) {

Review comment:
       Wait. This function itself actually contains ORC-specific code such as 
the requirement that Date64 and Timestamp scalars must not overflow when cast 
to Timestamp NANO. Unless this requirement is actually universal in which case 
we should change how random arrays are canonically generated for these types we 
shouldn't really leave some function so ORC-specific in `testing/random.h`.
   
   However `void RandWeakComposition(int64_t n, T sum, std::vector<U>* out)` 
indeed belongs to `testing/random.h`. I have moved that one.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to