felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1389970408
##########
cpp/src/arrow/testing/random.cc:
##########
@@ -608,6 +609,218 @@ std::shared_ptr<Array>
OffsetsFromLengthsArray(OffsetArrayType* lengths,
std::make_shared<typename OffsetArrayType::TypeClass>(), size, buffers,
null_count);
return std::make_shared<OffsetArrayType>(array_data);
}
+
+// Helper for RandomArrayGenerator::ArrayOf: extract some C value from
+// a given metadata key.
+template <typename T, typename ArrowType = typename CTypeTraits<T>::ArrowType>
+enable_if_parameter_free<ArrowType, T> GetMetadata(const KeyValueMetadata*
metadata,
+ const std::string& key,
+ T default_value) {
+ if (!metadata) return default_value;
+ const auto index = metadata->FindKey(key);
+ if (index < 0) return default_value;
+ const auto& value = metadata->value(index);
+ T output{};
+ if (!internal::ParseValue<ArrowType>(value.data(), value.length(), &output))
{
+ ABORT_NOT_OK(Status::Invalid("Could not parse ", key, " = ", value, " as ",
+ ArrowType::type_name()));
+ }
+ return output;
+}
+
+/// \brief Shuffle a list-view array in place using the Fisher–Yates algorithm
[1].
+///
+/// [1]
https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+///
+/// \param[in] seed The seed for the random number generator
+/// \param[in,out] data The array to shuffle
+template <typename ListViewType>
+void ShuffleListViewDataInPlace(SeedType seed, ArrayData& data) {
+ DCHECK_EQ(data.type->id(), ListViewType::type_id);
+ using offset_type = typename ListViewType::offset_type;
+
+ auto* validity = data.GetMutableValues<uint8_t>(0, 0);
+ auto* offsets = data.GetMutableValues<offset_type>(1);
+ auto* sizes = data.GetMutableValues<offset_type>(2);
+
+ pcg32_fast rng(seed);
+ using UniformDist = std::uniform_int_distribution<int64_t>;
+ UniformDist dist;
+ for (int64_t i = data.length - 1; i > 0; --i) {
+ const auto j = dist(rng, UniformDist::param_type(0, i));
Review Comment:
I got it from cppreference and it seems to be The Right Way :tm: to change
the sampling range instead of creating a new distribution every time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]