wgtmac commented on code in PR #589: URL: https://github.com/apache/iceberg-cpp/pull/589#discussion_r3315074050
########## src/iceberg/metrics/metrics_context.h: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/metrics/metrics_context.h +/// \brief Factory interface for creating named Counter and Timer instances. + +#include <memory> +#include <string_view> +#include <unordered_map> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/counter.h" +#include "iceberg/metrics/timer.h" + +namespace iceberg { + +/// \brief Factory for creating named Counter and Timer instances. +/// +/// A MetricsContext owns all metrics it creates. Asking for the same name +/// twice returns the same object (identity by name). The null context returns +/// noop singletons without allocating. +class ICEBERG_EXPORT MetricsContext { + public: + virtual ~MetricsContext() = default; + + /// \brief Get or create a named Counter with an explicit unit. + virtual std::shared_ptr<Counter> GetCounter(std::string_view name, + CounterUnit unit) = 0; + + /// \brief Get or create a count-unit Counter by name. + /// + /// Convenience overload defaulting to CounterUnit::kCount. + std::shared_ptr<Counter> GetCounter(std::string_view name) { + return GetCounter(name, CounterUnit::kCount); + } + + /// \brief Get or create a named Timer (nanosecond precision). + virtual std::shared_ptr<Timer> GetTimer(std::string_view name) = 0; + + /// \brief Return the null (no-op) MetricsContext singleton. + /// + /// All metrics returned by the null context are noop; nothing is allocated. + static MetricsContext& Null(); + + /// \brief Create a new DefaultMetricsContext. + static std::unique_ptr<MetricsContext> Default(); +}; + +/// \brief MetricsContext backed by DefaultCounter and DefaultTimer instances. +/// +/// Thread-safe for metric *increments*; the unordered_map lookup/insert is NOT +/// protected. Concurrent GetCounter/GetTimer calls with the same name may create +/// duplicate Counter/Timer instances, breaking identity-by-name semantics. +/// Register all metrics during single-threaded setup, then pass the returned +/// shared_ptrs freely across threads. +class ICEBERG_EXPORT DefaultMetricsContext : public MetricsContext { + public: + using MetricsContext::GetCounter; // expose the one-arg base overload + std::shared_ptr<Counter> GetCounter(std::string_view name, CounterUnit unit) override; + + std::shared_ptr<Timer> GetTimer(std::string_view name) override; + + private: + std::unordered_map<std::string, std::shared_ptr<Counter>> counters_; Review Comment: Should we use `StringHash` and `StringEqual` defined in string_util.h to enable heterougenous lookup to avoid repeated string allocation? ########## src/iceberg/metrics/metrics_reporters.h: ########## @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/metrics/metrics_reporters.h +/// \brief Factory for creating MetricsReporter instances. + +#include <functional> +#include <memory> +#include <string> +#include <string_view> +#include <unordered_map> +#include <unordered_set> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/metrics_reporter.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief Property key for configuring the metrics reporter implementation. +/// +/// Set this property in catalog properties to specify which metrics reporter +/// implementation to use. The value should match a registered reporter type. +constexpr std::string_view kMetricsReporterImpl = "metrics-reporter-impl"; + +/// \brief Property value for the noop metrics reporter. +constexpr std::string_view kMetricsReporterTypeNoop = "noop"; + +/// \brief Function type for creating MetricsReporter instances. +/// +/// \param properties Configuration properties for the reporter. +/// \return A new MetricsReporter instance or an error. +using MetricsReporterFactory = std::function<Result<std::unique_ptr<MetricsReporter>>( + const std::unordered_map<std::string, std::string>& properties)>; + +/// \brief A MetricsReporter that delegates to multiple reporters. +/// +/// Combines several reporters so that every report is delivered to each of them. +/// Any exception thrown by an individual reporter is caught and swallowed; +/// the remaining reporters still receive the report. +/// +/// Use MetricsReporters::Combine() to create instances — that helper flattens +/// nested composites and deduplicates reporters by identity. +class ICEBERG_EXPORT CompositeMetricsReporter : public MetricsReporter { + public: + explicit CompositeMetricsReporter( + std::unordered_set<std::shared_ptr<MetricsReporter>> reporters); + + void Report(const MetricsReport& report) override; + + /// \brief The reporters contained in this composite. + /// + /// Used by MetricsReporters::Combine() for flattening. + const std::unordered_set<std::shared_ptr<MetricsReporter>>& Reporters() const; + + private: + std::unordered_set<std::shared_ptr<MetricsReporter>> reporters_; +}; + +/// \brief Factory class for creating and managing MetricsReporter instances. +/// +/// This class provides a registry-based factory for creating MetricsReporter +/// implementations. Custom reporter implementations can be registered using +/// the Register() method. +class ICEBERG_EXPORT MetricsReporters { + public: + /// \brief Load a metrics reporter based on properties. + /// + /// This method looks up the "metrics-reporter-impl" property to determine + /// which reporter implementation to create. If not specified, returns a + /// NoopMetricsReporter. + /// + /// \param properties Configuration properties containing reporter type. + /// \return A new MetricsReporter instance or an error. + static Result<std::unique_ptr<MetricsReporter>> Load( + const std::unordered_map<std::string, std::string>& properties); + + /// \brief Register a factory for a metrics reporter type. + /// + /// This method is not thread-safe. All registrations should be done during Review Comment: How about making it thread safe? ########## src/iceberg/metrics/metrics_context.h: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/metrics/metrics_context.h +/// \brief Factory interface for creating named Counter and Timer instances. + +#include <memory> +#include <string_view> +#include <unordered_map> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/counter.h" +#include "iceberg/metrics/timer.h" + +namespace iceberg { + +/// \brief Factory for creating named Counter and Timer instances. +/// +/// A MetricsContext owns all metrics it creates. Asking for the same name +/// twice returns the same object (identity by name). The null context returns +/// noop singletons without allocating. +class ICEBERG_EXPORT MetricsContext { + public: + virtual ~MetricsContext() = default; + + /// \brief Get or create a named Counter with an explicit unit. + virtual std::shared_ptr<Counter> GetCounter(std::string_view name, + CounterUnit unit) = 0; + + /// \brief Get or create a count-unit Counter by name. + /// + /// Convenience overload defaulting to CounterUnit::kCount. + std::shared_ptr<Counter> GetCounter(std::string_view name) { + return GetCounter(name, CounterUnit::kCount); + } + + /// \brief Get or create a named Timer (nanosecond precision). + virtual std::shared_ptr<Timer> GetTimer(std::string_view name) = 0; + + /// \brief Return the null (no-op) MetricsContext singleton. + /// + /// All metrics returned by the null context are noop; nothing is allocated. + static MetricsContext& Null(); Review Comment: Should we change the return type to `std::shared_ptr<MetricsContext>`? It really depends on the downstream use case. It will not be helpful if any place that uses it expects a shared_ptr. ########## src/iceberg/metrics/metrics_context.h: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/metrics/metrics_context.h +/// \brief Factory interface for creating named Counter and Timer instances. + +#include <memory> +#include <string_view> +#include <unordered_map> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/counter.h" +#include "iceberg/metrics/timer.h" + +namespace iceberg { + +/// \brief Factory for creating named Counter and Timer instances. +/// +/// A MetricsContext owns all metrics it creates. Asking for the same name +/// twice returns the same object (identity by name). The null context returns +/// noop singletons without allocating. +class ICEBERG_EXPORT MetricsContext { + public: + virtual ~MetricsContext() = default; + + /// \brief Get or create a named Counter with an explicit unit. + virtual std::shared_ptr<Counter> GetCounter(std::string_view name, + CounterUnit unit) = 0; + + /// \brief Get or create a count-unit Counter by name. + /// + /// Convenience overload defaulting to CounterUnit::kCount. + std::shared_ptr<Counter> GetCounter(std::string_view name) { + return GetCounter(name, CounterUnit::kCount); + } + + /// \brief Get or create a named Timer (nanosecond precision). + virtual std::shared_ptr<Timer> GetTimer(std::string_view name) = 0; + + /// \brief Return the null (no-op) MetricsContext singleton. + /// + /// All metrics returned by the null context are noop; nothing is allocated. + static MetricsContext& Null(); + + /// \brief Create a new DefaultMetricsContext. + static std::unique_ptr<MetricsContext> Default(); +}; + +/// \brief MetricsContext backed by DefaultCounter and DefaultTimer instances. +/// +/// Thread-safe for metric *increments*; the unordered_map lookup/insert is NOT Review Comment: Should we use a mutex to make it thread safe? It should not hurt too much. ########## src/iceberg/metrics/metrics_reporters.h: ########## @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/metrics/metrics_reporters.h +/// \brief Factory for creating MetricsReporter instances. + +#include <functional> +#include <memory> +#include <string> +#include <string_view> +#include <unordered_map> +#include <unordered_set> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/metrics_reporter.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief Property key for configuring the metrics reporter implementation. +/// +/// Set this property in catalog properties to specify which metrics reporter +/// implementation to use. The value should match a registered reporter type. +constexpr std::string_view kMetricsReporterImpl = "metrics-reporter-impl"; + +/// \brief Property value for the noop metrics reporter. +constexpr std::string_view kMetricsReporterTypeNoop = "noop"; + +/// \brief Function type for creating MetricsReporter instances. +/// +/// \param properties Configuration properties for the reporter. +/// \return A new MetricsReporter instance or an error. +using MetricsReporterFactory = std::function<Result<std::unique_ptr<MetricsReporter>>( + const std::unordered_map<std::string, std::string>& properties)>; + +/// \brief A MetricsReporter that delegates to multiple reporters. +/// +/// Combines several reporters so that every report is delivered to each of them. +/// Any exception thrown by an individual reporter is caught and swallowed; Review Comment: Should we make it a contract that a MetricsReporter should not throw but return an error status instead? ########## src/iceberg/metrics/metrics_reporter.h: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <string> +#include <string_view> +#include <unordered_map> +#include <utility> +#include <variant> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/commit_report.h" +#include "iceberg/metrics/scan_report.h" + +namespace iceberg { + +/// \brief The type of a metrics report. +enum class MetricsReportType { + kScanReport, + kCommitReport, +}; + +/// \brief Get the string representation of a metrics report type. +ICEBERG_EXPORT constexpr std::string_view ToString(MetricsReportType type) noexcept { + switch (type) { + case MetricsReportType::kScanReport: + return "scan"; + case MetricsReportType::kCommitReport: + return "commit"; + } + std::unreachable(); +} + +/// \brief A metrics report, which can be either a ScanReport or CommitReport. +/// +/// This variant type allows handling both report types uniformly through +/// the MetricsReporter interface. +using MetricsReport = std::variant<ScanReport, CommitReport>; + +/// \brief Get the type of a metrics report. +/// +/// \param report The metrics report to get the type of. +/// \return The type of the metrics report. +ICEBERG_EXPORT inline MetricsReportType GetReportType(const MetricsReport& report) { + return std::visit( + [](const auto& r) -> MetricsReportType { + using T = std::decay_t<decltype(r)>; + if constexpr (std::is_same_v<T, ScanReport>) { + return MetricsReportType::kScanReport; + } else { + return MetricsReportType::kCommitReport; + } + }, + report); +} + +/// \brief Interface for reporting metrics from Iceberg operations. +/// +/// Implementations of this interface can be used to collect and report +/// metrics about scan and commit operations. Common implementations include +/// logging reporters, metrics collectors, and the noop reporter for testing. +class ICEBERG_EXPORT MetricsReporter { + public: + virtual ~MetricsReporter() = default; + + /// \brief Initialize the reporter with catalog properties after construction. + /// + /// Called by MetricsReporters::Load() before the first Report() invocation. + /// The default implementation is a no-op. Override to perform property-based + /// setup (e.g., configure endpoints, credentials, sampling rates). + virtual void Initialize( Review Comment: Do we want to return `Status` for both `Initialize` and `Report` just in case a report impl will actually return any error? ########## src/iceberg/metrics/metrics_reporters.h: ########## @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/metrics/metrics_reporters.h +/// \brief Factory for creating MetricsReporter instances. + +#include <functional> +#include <memory> +#include <string> +#include <string_view> +#include <unordered_map> +#include <unordered_set> + +#include "iceberg/iceberg_export.h" +#include "iceberg/metrics/metrics_reporter.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief Property key for configuring the metrics reporter implementation. +/// +/// Set this property in catalog properties to specify which metrics reporter +/// implementation to use. The value should match a registered reporter type. +constexpr std::string_view kMetricsReporterImpl = "metrics-reporter-impl"; + +/// \brief Property value for the noop metrics reporter. +constexpr std::string_view kMetricsReporterTypeNoop = "noop"; + +/// \brief Function type for creating MetricsReporter instances. +/// +/// \param properties Configuration properties for the reporter. +/// \return A new MetricsReporter instance or an error. +using MetricsReporterFactory = std::function<Result<std::unique_ptr<MetricsReporter>>( + const std::unordered_map<std::string, std::string>& properties)>; + +/// \brief A MetricsReporter that delegates to multiple reporters. +/// +/// Combines several reporters so that every report is delivered to each of them. +/// Any exception thrown by an individual reporter is caught and swallowed; +/// the remaining reporters still receive the report. +/// +/// Use MetricsReporters::Combine() to create instances — that helper flattens +/// nested composites and deduplicates reporters by identity. +class ICEBERG_EXPORT CompositeMetricsReporter : public MetricsReporter { + public: + explicit CompositeMetricsReporter( + std::unordered_set<std::shared_ptr<MetricsReporter>> reporters); + + void Report(const MetricsReport& report) override; + + /// \brief The reporters contained in this composite. + /// + /// Used by MetricsReporters::Combine() for flattening. + const std::unordered_set<std::shared_ptr<MetricsReporter>>& Reporters() const; + + private: + std::unordered_set<std::shared_ptr<MetricsReporter>> reporters_; Review Comment: Usually we won't have too much reporters. Perhaps std::vector is sufficient and more performant -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
