This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 0475d917972b5a905bc94d65411275a09d557321
Author: Peeyush Gupta <[email protected]>
AuthorDate: Wed May 21 07:32:56 2025 -0700

    [ASTERIXDB-3612][EXT] Reading gzip file with multiple gzip streams
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Ext-ref: MB-66818
    
    Change-Id: I47c2eb600dddba1198a92f5ff7dfc7f2da652c3e
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19807
    Reviewed-by: Hussain Towaileb <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../input/record/reader/aws/AwsS3InputStream.java  |  3 +-
 .../reader/azure/blob/AzureBlobInputStream.java    |  3 +-
 .../azure/datalake/AzureDataLakeInputStream.java   |  3 +-
 .../input/record/reader/gcs/GCSInputStream.java    |  3 +-
 .../record/reader/stream/AvailableInputStream.java | 57 ++++++++++++++++++++++
 5 files changed, 65 insertions(+), 4 deletions(-)

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
index e7b7b293e0..45a40b0940 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
@@ -31,6 +31,7 @@ import 
org.apache.asterix.common.exceptions.CompilationException;
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.exceptions.RuntimeDataException;
 import 
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStream;
+import 
org.apache.asterix.external.input.record.reader.stream.AvailableInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -71,7 +72,7 @@ public class AwsS3InputStream extends 
AbstractExternalInputStream {
         }
         // Use gzip stream if needed
         if (StringUtils.endsWithIgnoreCase(fileName, ".gz") || 
StringUtils.endsWithIgnoreCase(fileName, ".gzip")) {
-            in = new GZIPInputStream(in, 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+            in = new GZIPInputStream(new AvailableInputStream(in), 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
         }
         return true;
     }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/blob/AzureBlobInputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/blob/AzureBlobInputStream.java
index cdb3834a6c..567ceb78d9 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/blob/AzureBlobInputStream.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/blob/AzureBlobInputStream.java
@@ -30,6 +30,7 @@ import 
org.apache.asterix.common.exceptions.CompilationException;
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.exceptions.RuntimeDataException;
 import 
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStream;
+import 
org.apache.asterix.external.input.record.reader.stream.AvailableInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -66,7 +67,7 @@ public class AzureBlobInputStream extends 
AbstractExternalInputStream {
             // Use gzip stream if needed
             String lowerCaseFileName = fileName.toLowerCase();
             if (lowerCaseFileName.endsWith(".gz") || 
lowerCaseFileName.endsWith(".gzip")) {
-                in = new GZIPInputStream(in, 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+                in = new GZIPInputStream(new AvailableInputStream(in), 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
             }
         } catch (BlobStorageException ex) {
             if (ex.getErrorCode().equals(BlobErrorCode.BLOB_NOT_FOUND)) {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/datalake/AzureDataLakeInputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/datalake/AzureDataLakeInputStream.java
index e34d1885e0..5fa8fd8ab2 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/datalake/AzureDataLakeInputStream.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/datalake/AzureDataLakeInputStream.java
@@ -30,6 +30,7 @@ import 
org.apache.asterix.common.exceptions.CompilationException;
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.exceptions.RuntimeDataException;
 import 
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStream;
+import 
org.apache.asterix.external.input.record.reader.stream.AvailableInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -66,7 +67,7 @@ public class AzureDataLakeInputStream extends 
AbstractExternalInputStream {
             // Use gzip stream if needed
             String lowerCaseFileName = fileName.toLowerCase();
             if (lowerCaseFileName.endsWith(".gz") || 
lowerCaseFileName.endsWith(".gzip")) {
-                in = new GZIPInputStream(in, 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+                in = new GZIPInputStream(new AvailableInputStream(in), 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
             }
         } catch (BlobStorageException ex) {
             if (ex.getErrorCode().equals(BlobErrorCode.BLOB_NOT_FOUND)) {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStream.java
index 5da4583935..f154ee8f9c 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStream.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStream.java
@@ -31,6 +31,7 @@ import 
org.apache.asterix.common.exceptions.CompilationException;
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.exceptions.RuntimeDataException;
 import 
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStream;
+import 
org.apache.asterix.external.input.record.reader.stream.AvailableInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -67,7 +68,7 @@ public class GCSInputStream extends 
AbstractExternalInputStream {
 
         // Use gzip stream if needed
         if (StringUtils.endsWithIgnoreCase(fileName, ".gz") || 
StringUtils.endsWithIgnoreCase(fileName, ".gzip")) {
-            in = new GZIPInputStream(in, 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+            in = new GZIPInputStream(new AvailableInputStream(in), 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
         }
         return true;
     }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/AvailableInputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/AvailableInputStream.java
new file mode 100644
index 0000000000..22f8df4470
--- /dev/null
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/AvailableInputStream.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.stream;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class AvailableInputStream extends InputStream {
+    private final InputStream is;
+
+    public AvailableInputStream(InputStream inputstream) {
+        is = inputstream;
+    }
+
+    public int read() throws IOException {
+        return (is.read());
+    }
+
+    public int read(byte[] b) throws IOException {
+        return (is.read(b));
+    }
+
+    public int read(byte[] b, int off, int len) throws IOException {
+        return (is.read(b, off, len));
+    }
+
+    public void close() throws IOException {
+        is.close();
+    }
+
+    public int available() throws IOException {
+        // Always say that we have 1 more byte in the
+        // buffer, even when we don't
+        int a = is.available();
+        if (a == 0) {
+            return (1);
+        } else {
+            return (a);
+        }
+    }
+}

Reply via email to