rdblue commented on a change in pull request #4050:
URL: https://github.com/apache/iceberg/pull/4050#discussion_r806005965
##########
File path: aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputStream.java
##########
@@ -113,18 +120,21 @@
this.location = location;
this.awsProperties = awsProperties;
- createStack = Thread.currentThread().getStackTrace();
+ this.createStack = Thread.currentThread().getStackTrace();
- multiPartSize = awsProperties.s3FileIoMultiPartSize();
- multiPartThresholdSize = (int) (multiPartSize *
awsProperties.s3FileIOMultipartThresholdFactor());
- stagingDirectory = new File(awsProperties.s3fileIoStagingDirectory());
- isChecksumEnabled = awsProperties.isS3ChecksumEnabled();
+ this.multiPartSize = awsProperties.s3FileIoMultiPartSize();
+ this.multiPartThresholdSize = (int) (multiPartSize *
awsProperties.s3FileIOMultipartThresholdFactor());
+ this.stagingDirectory = new File(awsProperties.s3fileIoStagingDirectory());
+ this.isChecksumEnabled = awsProperties.isS3ChecksumEnabled();
Review comment:
Thanks for fixing these.
##########
File path:
core/src/main/java/org/apache/iceberg/hadoop/HadoopMetricsContext.java
##########
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hadoop;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.Map;
+import java.util.function.Consumer;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.io.FileIOMetricsContext;
+
+/**
+ * FileIO Metrics implementation that delegates to Hadoop FileSystem
+ * statistics implementation using the provided scheme.
+ */
+public class HadoopMetricsContext implements FileIOMetricsContext {
+ public static final String SCHEME = "fileio.scheme";
+
+ private String scheme;
+ private transient FileSystem.Statistics statistics;
+
+ @Override
+ public void initialize(Map<String, String> properties) {
+ ValidationException.check(properties.containsKey(SCHEME),
+ "Scheme is required for Hadoop FileSystem metrics reporting");
+
+ // FileIO has no specific implementation class, but Hadoop will
+ // still track and report for the provided scheme.
+ this.scheme = properties.get(SCHEME);
+ this.statistics = FileSystem.getStatistics(scheme, null);
+ }
+
+ /**
+ * The Hadoop implementation delegates to the Hadoop delegates to the
+ * FileSystem.Statistics implementation and therefore does not require
+ * support for operations like unit() and count() as the counter
+ * values are not directly consumed.
+ *
+ * @param name name of the metric
+ * @param type numeric type of the counter value
+ * @param unit ignored
+ * @param <T> Counter numeric type
+ * @return counter
+ */
+ @Override
+ @SuppressWarnings("unchecked")
+ public <T extends Number> Counter<T> counter(String name, Class<T> type,
Unit unit) {
+ switch (name) {
+ case READ_BYTES:
+ ValidationException.check(type == Long.class, "'%s' requires Long
type", READ_BYTES);
+ return (Counter<T>) longCounter(statistics::incrementBytesRead);
+ case READ_OPERATIONS:
+ ValidationException.check(type == Integer.class, "'%s' requires
Integer type", READ_OPERATIONS);
+ return (Counter<T>) integerCounter(statistics::incrementReadOps);
+ case WRITE_BYTES:
+ ValidationException.check(type == Long.class, "'%s' requires Long
type", WRITE_BYTES);
+ return (Counter<T>) longCounter(statistics::incrementBytesWritten);
+ case WRITE_OPERATIONS:
+ ValidationException.check(type == Integer.class, "'%s' requires
Integer type", WRITE_OPERATIONS);
+ return (Counter<T>) integerCounter(statistics::incrementWriteOps);
+ default:
+ throw new IllegalArgumentException(String.format("Unsupported counter:
'%s'", name));
+ }
+ }
+
+ private Counter<Long> longCounter(Consumer<Long> consumer) {
+ return new Counter<Long>() {
+ @Override
+ public void increment() {
+ increment(1L);
+ }
+ @Override
Review comment:
Style: missing newline between methods.
##########
File path:
core/src/main/java/org/apache/iceberg/hadoop/HadoopMetricsContext.java
##########
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hadoop;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.Map;
+import java.util.function.Consumer;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.io.FileIOMetricsContext;
+
+/**
+ * FileIO Metrics implementation that delegates to Hadoop FileSystem
+ * statistics implementation using the provided scheme.
+ */
+public class HadoopMetricsContext implements FileIOMetricsContext {
+ public static final String SCHEME = "fileio.scheme";
+
+ private String scheme;
+ private transient FileSystem.Statistics statistics;
+
+ @Override
+ public void initialize(Map<String, String> properties) {
+ ValidationException.check(properties.containsKey(SCHEME),
+ "Scheme is required for Hadoop FileSystem metrics reporting");
+
+ // FileIO has no specific implementation class, but Hadoop will
+ // still track and report for the provided scheme.
+ this.scheme = properties.get(SCHEME);
+ this.statistics = FileSystem.getStatistics(scheme, null);
+ }
+
+ /**
+ * The Hadoop implementation delegates to the Hadoop delegates to the
+ * FileSystem.Statistics implementation and therefore does not require
+ * support for operations like unit() and count() as the counter
+ * values are not directly consumed.
+ *
+ * @param name name of the metric
+ * @param type numeric type of the counter value
+ * @param unit ignored
Review comment:
Why include unit if it is ignored? Is this to supply information for
other metrics collection libraries that require it?
##########
File path: api/src/main/java/org/apache/iceberg/metrics/MetricsContext.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.metrics;
+
+import java.io.Serializable;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Generalized interface for creating telemetry related instances for tracking
+ * operations. Implementations must take into account usage considerations
+ * like thread safety and serialization.
+ */
+public interface MetricsContext extends Serializable {
+ enum Unit {
+ UNDEFINED("undefined"),
+ BYTES("bytes"),
+ SCALAR("scalar");
+
+ private final String displayName;
+
+ Unit(String displayName) {
+ this.displayName = displayName;
+ }
+
+ public String displayName() {
+ return displayName;
+ }
+ }
+
+ default void initialize(Map<String, String> properties) {
+ }
+
+ interface Counter<T extends Number> {
+ /**
+ * Increment the counter by a single whole number value (i.e. 1).
+ */
+ void increment();
+
+ /**
+ * Increment the counter by the provided amount.
+ *
+ * @param amount to be incremented
+ */
+ void increment(T amount);
+
+ /**
+ * Reporting count is optional if the counter is reporting externally.
+ *
+ * @return current count if available
+ */
+ default Optional<T> count() {
Review comment:
It is a little odd that these are defined, but not implemented in the
initial use of `MetricsContext`. How do you intend to use these (`count` and
`unit`) later on? Since they have default implementations, can we add them
later instead of now when we have a use for them?
##########
File path: api/src/main/java/org/apache/iceberg/metrics/MetricsContext.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.metrics;
+
+import java.io.Serializable;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Generalized interface for creating telemetry related instances for tracking
+ * operations. Implementations must take into account usage considerations
+ * like thread safety and serialization.
+ */
+public interface MetricsContext extends Serializable {
+ enum Unit {
+ UNDEFINED("undefined"),
+ BYTES("bytes"),
+ SCALAR("scalar");
+
+ private final String displayName;
+
+ Unit(String displayName) {
+ this.displayName = displayName;
+ }
+
+ public String displayName() {
+ return displayName;
+ }
+ }
+
+ default void initialize(Map<String, String> properties) {
+ }
+
+ interface Counter<T extends Number> {
+ /**
+ * Increment the counter by a single whole number value (i.e. 1).
+ */
+ void increment();
+
+ /**
+ * Increment the counter by the provided amount.
+ *
+ * @param amount to be incremented
+ */
+ void increment(T amount);
+
+ /**
+ * Reporting count is optional if the counter is reporting externally.
+ *
+ * @return current count if available
+ */
+ default Optional<T> count() {
+ return Optional.empty();
+ }
+
+ default Unit unit() {
+ return Unit.UNDEFINED;
+ }
+ }
+
+ /**
+ * Get a named counter of a specific type. Metric implementations may impose
+ * restrictions on what types are supported for specific counters.
+ *
+ * @param name name of the metric
+ * @param type numeric type of the counter value
+ * @param unit the unit designation of the metric
+ * @return a counter implementation
+ */
+ default <T extends Number> Counter<T> counter(String name, Class<T> type,
Unit unit) {
Review comment:
I think it would be better to leave this empty so that it is required
for implementations. Then put the default noop counter implementation in
`nullMetrics`. That signals what people need to implement.
##########
File path: api/src/main/java/org/apache/iceberg/metrics/MetricsContext.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.metrics;
+
+import java.io.Serializable;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Generalized interface for creating telemetry related instances for tracking
+ * operations. Implementations must take into account usage considerations
+ * like thread safety and serialization.
+ */
+public interface MetricsContext extends Serializable {
+ enum Unit {
+ UNDEFINED("undefined"),
+ BYTES("bytes"),
+ SCALAR("scalar");
Review comment:
Rather than "scalar", what about using "operations" as a unit?
##########
File path: aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
##########
@@ -104,8 +112,23 @@ private S3Client client() {
@Override
public void initialize(Map<String, String> properties) {
this.awsProperties = new AwsProperties(properties);
- this.awsClientFactory = AwsClientFactories.from(properties);
- this.s3 = awsClientFactory::s3;
+
+ // Do not override s3 client if it was provided
+ if (s3 == null) {
+ this.s3 = AwsClientFactories.from(properties)::s3;
+ }
+
+ // Report Hadoop metrics if Hadoop is available
+ try {
+ Class<? extends MetricsContext> clazz =
DynClasses.builder().impl(DEFAULT_METRICS_IMPL).buildChecked();
+
+ this.metrics = clazz.getDeclaredConstructor().newInstance();
+ metrics.initialize(ImmutableMap.of("fileio.scheme", "s3"));
Review comment:
Do you think that other metrics objects will also be loaded dynamically?
If not, I'd probably just pass `"s3"` to the constructor:
```java
this.metrics = DynConstructors.builder()
.hiddenImpl("org.apache.iceberg.hadoop.HadoopMetricsContext",
String.class)
.buildChecked()
.newInstance("s3");
```
##########
File path: aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
##########
@@ -104,8 +112,23 @@ private S3Client client() {
@Override
public void initialize(Map<String, String> properties) {
this.awsProperties = new AwsProperties(properties);
- this.awsClientFactory = AwsClientFactories.from(properties);
- this.s3 = awsClientFactory::s3;
+
+ // Do not override s3 client if it was provided
+ if (s3 == null) {
+ this.s3 = AwsClientFactories.from(properties)::s3;
+ }
+
+ // Report Hadoop metrics if Hadoop is available
+ try {
+ Class<? extends MetricsContext> clazz =
DynClasses.builder().impl(DEFAULT_METRICS_IMPL).buildChecked();
+
+ this.metrics = clazz.getDeclaredConstructor().newInstance();
+ metrics.initialize(ImmutableMap.of("fileio.scheme", "s3"));
+ } catch (ClassNotFoundException e) {
Review comment:
I'd recommend using `DynConstructors` rather than `DynClasses` because
`DynConstructors` will catch and handle `NoClassDefFoundError` as well as
`ClassNotFoundException` and will always throw `ClassNotFoundException`. That
handles cases where dependencies of the class aren't found, like when Hadoop is
missing.
Otherwise, this may leak the `NoClassDefFoundError` and fail when Hadoop is
missing.
I'd also update the error message here to `Unable to load metrics class`.
And since this is a warning, consider suppressing the exception because we
expect it. Exception traces that are expected tend to confuse people looking at
logs.
##########
File path: api/src/main/java/org/apache/iceberg/io/FileIOMetricsContext.java
##########
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.io;
+
+import org.apache.iceberg.metrics.MetricsContext;
+
+/**
+ * Extension of MetricsContext for use with FileIO to define standard metrics
+ * that should be reported.
+ */
+public interface FileIOMetricsContext extends MetricsContext {
Review comment:
I like having a place for the constants, but what is the value of making
an identical subinterface of `MetricsContext` rather than using the parent
directly?
Also, instead of just names, should we have some metric definition that
includes unit (assuming we move forward with units)?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]