This is an automated email from the ASF dual-hosted git repository. htowaileb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 7b0c6f0ba99683bb4c7159e24765a55416b512dc Author: Hussain Towaileb <[email protected]> AuthorDate: Sat Jul 2 17:42:31 2022 +0300 [ASTERIXDB-3049][EXT]: Handle external failures for Parquet at compilation stage Change-Id: I1581c193de73fb220f05aa994c0d8b66bfff60fa Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16803 Reviewed-by: Hussain Towaileb <[email protected]> Tested-by: Hussain Towaileb <[email protected]> Tested-by: Jenkins <[email protected]> Integration-Tests: Hussain Towaileb <[email protected]> Integration-Tests: Jenkins <[email protected]> --- .../external/input/HDFSDataSourceFactory.java | 5 +++-- .../aws/parquet/AwsS3ParquetReaderFactory.java | 26 ++++++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java index 8ea9ed43ac..419782fd9e 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java @@ -25,7 +25,8 @@ import java.util.List; import java.util.Map; import org.apache.asterix.common.api.IApplicationContext; -import org.apache.asterix.common.exceptions.AsterixException; +import org.apache.asterix.common.exceptions.CompilationException; +import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.external.api.AsterixInputStream; import org.apache.asterix.external.api.IExternalIndexer; import org.apache.asterix.external.api.IIndexibleExternalDataSource; @@ -131,7 +132,7 @@ public class HDFSDataSourceFactory implements IRecordReaderFactory<Object>, IInd this.recordClass = char[].class; } } catch (IOException e) { - throw new AsterixException(e); + throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, e); } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java index 803e657dd8..4e3d1ec044 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java @@ -18,12 +18,16 @@ */ package org.apache.asterix.external.input.record.reader.aws.parquet; +import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString; + import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.asterix.common.exceptions.CompilationException; +import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.common.exceptions.RuntimeDataException; import org.apache.asterix.external.input.HDFSDataSourceFactory; import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher; import org.apache.asterix.external.util.ExternalDataConstants; @@ -33,7 +37,11 @@ import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.application.IServiceContext; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; +import org.apache.hyracks.api.util.ExceptionUtils; + +import com.amazonaws.SdkBaseException; +import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.model.S3Object; public class AwsS3ParquetReaderFactory extends HDFSDataSourceFactory { @@ -50,10 +58,20 @@ public class AwsS3ParquetReaderFactory extends HDFSDataSourceFactory { putS3ConfToHadoopConf(configuration, path); //Configure Hadoop S3 input splits - JobConf conf = createHdfsConf(serviceCtx, configuration); - int numberOfPartitions = getPartitionConstraint().getLocations().length; - ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration, numberOfPartitions); - configureHdfsConf(conf, configuration); + try { + JobConf conf = createHdfsConf(serviceCtx, configuration); + int numberOfPartitions = getPartitionConstraint().getLocations().length; + ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration, numberOfPartitions); + configureHdfsConf(conf, configuration); + } catch (SdkException | SdkBaseException ex) { + throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex)); + } catch (AlgebricksException ex) { + Throwable root = ExceptionUtils.getRootCause(ex); + if (root instanceof SdkException || root instanceof SdkBaseException) { + throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(root)); + } + throw ex; + } } @Override
