pitrou commented on code in PR #45577: URL: https://github.com/apache/arrow/pull/45577#discussion_r1969150543
########## cpp/src/arrow/compute/kernels/scalar_string_ascii.cc: ########## @@ -2185,51 +2186,61 @@ void AddAsciiStringReplaceSubstring(FunctionRegistry* registry) { using ExtractRegexState = OptionsWrapper<ExtractRegexOptions>; // TODO cache this once per ExtractRegexOptions -struct ExtractRegexData { - // Use unique_ptr<> because RE2 is non-movable (for ARROW_ASSIGN_OR_RAISE) - std::unique_ptr<RE2> regex; - std::vector<std::string> group_names; - +class ExtractRegexData { + public: static Result<ExtractRegexData> Make(const ExtractRegexOptions& options, bool is_utf8 = true) { ExtractRegexData data(options.pattern, is_utf8); - RETURN_NOT_OK(RegexStatus(*data.regex)); - - const int group_count = data.regex->NumberOfCapturingGroups(); - const auto& name_map = data.regex->CapturingGroupNames(); - data.group_names.reserve(group_count); - - for (int i = 0; i < group_count; i++) { - auto item = name_map.find(i + 1); // re2 starts counting from 1 - if (item == name_map.end()) { - // XXX should we instead just create fields with an empty name? - return Status::Invalid("Regular expression contains unnamed groups"); - } - data.group_names.emplace_back(item->second); - } + ARROW_RETURN_NOT_OK(data.Init()); return data; } Result<TypeHolder> ResolveOutputType(const std::vector<TypeHolder>& types) const { const DataType* input_type = types[0].type; - if (input_type == nullptr) { + // as mentioned here + // https://arrow.apache.org/docs/developers/cpp/development.html#code-style-linting-and-ci + // nullptr should not be used + if (input_type == NULLPTR) { // No input type specified - return nullptr; + return NULLPTR; } // Input type is either [Large]Binary or [Large]String and is also the type // of each field in the output struct type. DCHECK(is_base_binary_like(input_type->id())); FieldVector fields; - fields.reserve(group_names.size()); + fields.reserve(group_names_.size()); std::shared_ptr<DataType> owned_type = input_type->GetSharedPtr(); - std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields), + std::transform(group_names_.begin(), group_names_.end(), std::back_inserter(fields), [&](const std::string& name) { return field(name, owned_type); }); - return struct_(std::move(fields)); + return struct_(fields); } + int64_t num_group() const { return group_names_.size(); } + std::shared_ptr<RE2> regex() const { return regex_; } - private: + protected: Review Comment: Oops, sorry, I hadn't seen that those were `private` before. Nevermind. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org