pitrou commented on code in PR #48729:
URL: https://github.com/apache/arrow/pull/48729#discussion_r2683182201
##########
cpp/src/arrow/compute/kernels/scalar_string_ascii.cc:
##########
@@ -1220,7 +1223,62 @@ void StringBoolTransform(KernelContext* ctx, const
ExecSpan& batch,
}
}
-using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
+// Similar to OptionsWrapper, but caches a compiled object to avoid
recompiling on each
+// invocation (e.g., regex matchers). Follows the same pattern as
OptionsWrapper.
+template <typename OptionsType>
+struct CachedOptionsWrapper : public KernelState {
Review Comment:
Can this perhaps inherit from `OptionsWrapper` if it can reduce the amount
of additional code?
##########
cpp/src/arrow/compute/kernels/scalar_string_ascii.cc:
##########
@@ -1220,7 +1223,62 @@ void StringBoolTransform(KernelContext* ctx, const
ExecSpan& batch,
}
}
-using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
+// Similar to OptionsWrapper, but caches a compiled object to avoid
recompiling on each
+// invocation (e.g., regex matchers). Follows the same pattern as
OptionsWrapper.
+template <typename OptionsType>
+struct CachedOptionsWrapper : public KernelState {
+ explicit CachedOptionsWrapper(OptionsType options) :
options(std::move(options)) {}
+
+ static Result<std::unique_ptr<KernelState>> Init(KernelContext*,
+ const KernelInitArgs& args)
{
+ if (auto options_ptr = static_cast<const OptionsType*>(args.options)) {
+ return std::make_unique<CachedOptionsWrapper>(*options_ptr);
+ }
+ return Status::Invalid(
+ "Attempted to initialize KernelState from null FunctionOptions");
+ }
+
+ static const OptionsType& Get(const KernelState& state) {
+ return checked_cast<const CachedOptionsWrapper&>(state).options;
+ }
+
+ static const OptionsType& Get(KernelContext* ctx) { return
Get(*ctx->state()); }
+
+ static CachedOptionsWrapper& GetMutable(KernelContext* ctx) {
+ return checked_cast<CachedOptionsWrapper&>(*ctx->state());
+ }
+
+ // Get or create cached object of a specific type
+ template <typename ObjectType, typename... Args>
+ Result<const ObjectType*> GetOrCreate(Args&&... args) {
Review Comment:
Since this is meant to be independent of `ObjectType`, do you think it's
worth to make it take a callable to avoid relying on the existence of
`ObjectType::Make`?
Something like this perhaps:
```suggestion
// Get or create cached object of a specific type
template <typename ObjectType, typename... Args>
auto GetOrCreate(Factory&& factory, Args&&... args)
-> Result<const std::decay_t<decltype(factory(args...))>*> {
```
##########
cpp/src/arrow/compute/kernels/scalar_string_ascii.cc:
##########
@@ -1220,7 +1223,62 @@ void StringBoolTransform(KernelContext* ctx, const
ExecSpan& batch,
}
}
-using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
+// Similar to OptionsWrapper, but caches a compiled object to avoid
recompiling on each
+// invocation (e.g., regex matchers). Follows the same pattern as
OptionsWrapper.
+template <typename OptionsType>
+struct CachedOptionsWrapper : public KernelState {
+ explicit CachedOptionsWrapper(OptionsType options) :
options(std::move(options)) {}
+
+ static Result<std::unique_ptr<KernelState>> Init(KernelContext*,
+ const KernelInitArgs& args)
{
+ if (auto options_ptr = static_cast<const OptionsType*>(args.options)) {
+ return std::make_unique<CachedOptionsWrapper>(*options_ptr);
+ }
+ return Status::Invalid(
+ "Attempted to initialize KernelState from null FunctionOptions");
+ }
+
+ static const OptionsType& Get(const KernelState& state) {
+ return checked_cast<const CachedOptionsWrapper&>(state).options;
+ }
+
+ static const OptionsType& Get(KernelContext* ctx) { return
Get(*ctx->state()); }
+
+ static CachedOptionsWrapper& GetMutable(KernelContext* ctx) {
+ return checked_cast<CachedOptionsWrapper&>(*ctx->state());
+ }
+
+ // Get or create cached object of a specific type
+ template <typename ObjectType, typename... Args>
+ Result<const ObjectType*> GetOrCreate(Args&&... args) {
+ if (!cached_object) {
+ ARROW_ASSIGN_OR_RAISE(auto object,
ObjectType::Make(std::forward<Args>(args)...));
+ // Convert to shared_ptr (handles both unique_ptr and value returns)
+ cached_object = MakeCachedObject<ObjectType>(std::move(object));
+ }
+ return static_cast<const ObjectType*>(cached_object.get());
+ }
+
+ OptionsType options;
+ // Type-erased cache for compiled objects (can store any object type)
+ std::shared_ptr<void> cached_object;
Review Comment:
Why not `std::any`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]