This is an automated email from the ASF dual-hosted git repository.
stevenwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 6d6fd0b5e9 [core] fix #9997 - Handle s3a file upload interrupt which
results in table metadata pointing to files that doesn't exist (#9998)
6d6fd0b5e9 is described below
commit 6d6fd0b5e967327967142758c999483ae50f95c5
Author: Abid Mohammed <[email protected]>
AuthorDate: Fri Mar 29 10:32:01 2024 -0700
[core] fix #9997 - Handle s3a file upload interrupt which results in table
metadata pointing to files that doesn't exist (#9998)
Co-authored-by: Abid Mohammed <[email protected]>
---
.../org/apache/iceberg/hadoop/HadoopStreams.java | 11 ++++
.../apache/hadoop/fs/s3a/S3ABlockOutputStream.java | 67 ++++++++++++++++++++++
.../apache/iceberg/hadoop/TestHadoopStreams.java | 53 +++++++++++++++++
3 files changed, 131 insertions(+)
diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopStreams.java
b/core/src/main/java/org/apache/iceberg/hadoop/HadoopStreams.java
index 44023326a0..f9b43b6846 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopStreams.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopStreams.java
@@ -187,6 +187,17 @@ public class HadoopStreams {
public void close() throws IOException {
stream.close();
this.closed = true;
+ // {@link org.apache.hadoop.fs.s3a.S3ABlockOutputStream#close()} calls
{@link
+ // org.apache.hadoop.fs.s3a.S3ABlockOutputStream#putObject()}
+ // which doesn't throw an exception when interrupted.
+ // Need to check the interrupted flag to detect failed object upload
+ // and propagate the error up.
+ if (Thread.interrupted()
+ && "org.apache.hadoop.fs.s3a.S3ABlockOutputStream"
+ .equals(stream.getWrappedStream().getClass().getName())) {
+ throw new IOException(
+ "S3ABlockOutputStream failed to upload object after stream was
closed");
+ }
}
@SuppressWarnings("checkstyle:NoFinalizer")
diff --git
a/core/src/test/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java
b/core/src/test/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java
new file mode 100644
index 0000000000..63bddf7b79
--- /dev/null
+++ b/core/src/test/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/** mock class for testing hadoop s3a writer */
+public class S3ABlockOutputStream extends OutputStream {
+ public ExecutorService mockCloseService;
+ public Future<?> mockUploadOnClose;
+
+ public S3ABlockOutputStream() {
+ mockCloseService = Executors.newSingleThreadExecutor();
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ throw new IOException("mocked class, do not use");
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ mockUploadOnClose =
+ mockCloseService.submit(
+ () -> {
+ try {
+ Thread.sleep(30 * 1000);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ });
+ mockUploadOnClose.get();
+ } catch (CancellationException | InterruptedException e) {
+ // mock interrupt in S3ABlockOutputStream#putObject
+ Thread.currentThread().interrupt();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ super.close();
+ }
+
+ public void interruptClose() {
+ mockUploadOnClose.cancel(true);
+ }
+}
diff --git
a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopStreams.java
b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopStreams.java
new file mode 100644
index 0000000000..09b478e4a6
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopStreams.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.hadoop;
+
+import java.io.IOException;
+import java.util.concurrent.Executors;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.s3a.S3ABlockOutputStream;
+import org.apache.iceberg.io.PositionOutputStream;
+import org.assertj.core.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class TestHadoopStreams {
+
+ @Test
+ void closeShouldThrowIOExceptionWhenInterrupted() throws Exception {
+
+ S3ABlockOutputStream s3ABlockOutputStream = new S3ABlockOutputStream();
+ FSDataOutputStream fsDataOutputStream = new
FSDataOutputStream(s3ABlockOutputStream, null);
+ PositionOutputStream wrap = HadoopStreams.wrap(fsDataOutputStream);
+ // interrupt mock upload on close after a delay
+ Executors.newSingleThreadExecutor()
+ .execute(
+ () -> {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ s3ABlockOutputStream.interruptClose();
+ });
+
+ Assertions.assertThatThrownBy(wrap::close)
+ .isInstanceOf(IOException.class)
+ .hasMessage("S3ABlockOutputStream failed to upload object after stream
was closed");
+ }
+}