sanjibansg commented on code in PR #12852:
URL: https://github.com/apache/arrow/pull/12852#discussion_r875294573
##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -40,125 +39,86 @@ struct TypePtrHashEq {
}
};
-struct IdHashEq {
- using Id = ExtensionSet::Id;
-
- size_t operator()(Id id) const {
- constexpr ::arrow::internal::StringViewHash hash = {};
- auto out = static_cast<size_t>(hash(id.uri));
- ::arrow::internal::hash_combine(out, hash(id.name));
- return out;
- }
-
- bool operator()(Id l, Id r) const { return l.uri == r.uri && l.name ==
r.name; }
-};
-
} // namespace
// A builder used when creating a Substrait plan from an Arrow execution plan.
In
// that situation we do not have a set of anchor values already defined so we
keep
// a map of what Ids we have seen.
-struct ExtensionSet::Impl {
- void AddUri(util::string_view uri, ExtensionSet* self) {
- if (uris_.find(uri) != uris_.end()) return;
-
- self->uris_.push_back(uri);
- uris_.insert(self->uris_.back()); // lookup helper's keys should
reference memory
- // owned by this ExtensionSet
- }
-
- Status CheckHasUri(util::string_view uri) {
- if (uris_.find(uri) != uris_.end()) return Status::OK();
-
- return Status::Invalid(
- "Uri ", uri,
- " was referenced by an extension but was not declared in the
ExtensionSet.");
- }
-
- uint32_t EncodeType(ExtensionIdRegistry::TypeRecord type_record,
ExtensionSet* self) {
- // note: at this point we're guaranteed to have an Id which points to
memory owned by
- // the set's registry.
- AddUri(type_record.id.uri, self);
- auto it_success =
- types_.emplace(type_record.id, static_cast<uint32_t>(types_.size()));
-
- if (it_success.second) {
- self->types_.push_back(
- {type_record.id, type_record.type, type_record.is_variation});
- }
-
- return it_success.first->second;
- }
-
- uint32_t EncodeFunction(Id id, util::string_view function_name,
ExtensionSet* self) {
- // note: at this point we're guaranteed to have an Id which points to
memory owned by
- // the set's registry.
- AddUri(id.uri, self);
- auto it_success = functions_.emplace(id,
static_cast<uint32_t>(functions_.size()));
+ExtensionSet::ExtensionSet(ExtensionIdRegistry* registry) :
registry_(registry) {}
+
+Status ExtensionSet::CheckHasUri(util::string_view uri) {
+ auto it =
+ std::find_if(uris_.begin(), uris_.end(),
+ [&uri](const std::pair<uint32_t, util::string_view>&
anchor_uri_pair) {
+ return anchor_uri_pair.second == uri;
+ });
+ if (it != uris_.end()) return Status::OK();
+
+ return Status::Invalid(
+ "Uri ", uri,
+ " was referenced by an extension but was not declared in the
ExtensionSet.");
+}
- if (it_success.second) {
- self->functions_.push_back({id, function_name});
- }
+void ExtensionSet::AddUri(std::pair<uint32_t, util::string_view> uri) {
+ auto it =
+ std::find_if(uris_.begin(), uris_.end(),
+ [&uri](const std::pair<uint32_t, util::string_view>&
anchor_uri_pair) {
+ return anchor_uri_pair.second == uri.second;
+ });
+ if (it != uris_.end()) return;
+ uris_[uri.first] = uri.second;
+}
- return it_success.first->second;
+Status ExtensionSet::AddUri(Id id) {
+ auto uris_size = static_cast<unsigned int>(uris_.size());
+ if (uris_.find(uris_size) != uris_.end()) {
+ return Status::Invalid("Key already exist in the uris map");
}
+ uris_[uris_size] = id.uri;
+ return Status::OK();
+}
- std::unordered_set<util::string_view, ::arrow::internal::StringViewHash>
uris_;
- std::unordered_map<Id, uint32_t, IdHashEq, IdHashEq> types_, functions_;
-};
-
-ExtensionSet::ExtensionSet(ExtensionIdRegistry* registry)
- : registry_(registry), impl_(new Impl(), [](Impl* impl) { delete impl; })
{}
-
-Result<ExtensionSet> ExtensionSet::Make(std::vector<util::string_view> uris,
- std::vector<Id> type_ids,
- std::vector<bool> type_is_variation,
- std::vector<Id> function_ids,
- ExtensionIdRegistry* registry) {
+Result<ExtensionSet> ExtensionSet::Make(
+ std::unordered_map<uint32_t, util::string_view> uris,
+ std::unordered_map<uint32_t, Id> type_ids,
+ std::unordered_map<uint32_t, Id> function_ids, ExtensionIdRegistry*
registry) {
ExtensionSet set;
set.registry_ = registry;
// TODO(bkietz) move this into the registry as registry->OwnUris(&uris) or so
std::unordered_set<util::string_view, ::arrow::internal::StringViewHash>
uris_owned_by_registry;
- for (util::string_view uri : registry->Uris()) {
+ for (auto uri : registry->Uris()) {
Review Comment:
Using `util::string_view` instead of auto here now
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]