This is an automated email from the ASF dual-hosted git repository.

htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 7b0c6f0ba99683bb4c7159e24765a55416b512dc
Author: Hussain Towaileb <[email protected]>
AuthorDate: Sat Jul 2 17:42:31 2022 +0300

    [ASTERIXDB-3049][EXT]: Handle external failures for Parquet at compilation 
stage
    
    Change-Id: I1581c193de73fb220f05aa994c0d8b66bfff60fa
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16803
    Reviewed-by: Hussain Towaileb <[email protected]>
    Tested-by: Hussain Towaileb <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Integration-Tests: Hussain Towaileb <[email protected]>
    Integration-Tests: Jenkins <[email protected]>
---
 .../external/input/HDFSDataSourceFactory.java      |  5 +++--
 .../aws/parquet/AwsS3ParquetReaderFactory.java     | 26 ++++++++++++++++++----
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index 8ea9ed43ac..419782fd9e 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -25,7 +25,8 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.asterix.common.api.IApplicationContext;
-import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.api.IExternalIndexer;
 import org.apache.asterix.external.api.IIndexibleExternalDataSource;
@@ -131,7 +132,7 @@ public class HDFSDataSourceFactory implements 
IRecordReaderFactory<Object>, IInd
                 this.recordClass = char[].class;
             }
         } catch (IOException e) {
-            throw new AsterixException(e);
+            throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, e);
         }
     }
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
index 803e657dd8..4e3d1ec044 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
@@ -18,12 +18,16 @@
  */
 package org.apache.asterix.external.input.record.reader.aws.parquet;
 
+import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
+
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
 import org.apache.asterix.external.input.HDFSDataSourceFactory;
 import 
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher;
 import org.apache.asterix.external.util.ExternalDataConstants;
@@ -33,7 +37,11 @@ import 
org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.api.application.IServiceContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.util.ExceptionUtils;
+
+import com.amazonaws.SdkBaseException;
 
+import software.amazon.awssdk.core.exception.SdkException;
 import software.amazon.awssdk.services.s3.model.S3Object;
 
 public class AwsS3ParquetReaderFactory extends HDFSDataSourceFactory {
@@ -50,10 +58,20 @@ public class AwsS3ParquetReaderFactory extends 
HDFSDataSourceFactory {
         putS3ConfToHadoopConf(configuration, path);
 
         //Configure Hadoop S3 input splits
-        JobConf conf = createHdfsConf(serviceCtx, configuration);
-        int numberOfPartitions = 
getPartitionConstraint().getLocations().length;
-        ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration, 
numberOfPartitions);
-        configureHdfsConf(conf, configuration);
+        try {
+            JobConf conf = createHdfsConf(serviceCtx, configuration);
+            int numberOfPartitions = 
getPartitionConstraint().getLocations().length;
+            ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, 
configuration, numberOfPartitions);
+            configureHdfsConf(conf, configuration);
+        } catch (SdkException | SdkBaseException ex) {
+            throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, 
getMessageOrToString(ex));
+        } catch (AlgebricksException ex) {
+            Throwable root = ExceptionUtils.getRootCause(ex);
+            if (root instanceof SdkException || root instanceof 
SdkBaseException) {
+                throw new 
RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, 
getMessageOrToString(root));
+            }
+            throw ex;
+        }
     }
 
     @Override

Reply via email to