This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit a694220b7ea2a86909c8bc0e447138219349b88f
Merge: 313b9c2f9e 8ba8273518
Author: Michael Blow <[email protected]>
AuthorDate: Tue Nov 19 00:01:13 2024 -0500

    Merge branch 'gerrit/goldfish' into 'master'
    
    Change-Id: Ic8b85f85eb80fcfd046ebfca721de09c0ad5c753

 .../deltalake/DeltaTableGenerator.java             |   2 +-
 .../deltalake-empty/deltalake-empty.00.ddl.sqlpp   |   2 +-
 asterixdb/asterix-external-data/pom.xml            |  16 +
 .../reader/aws/delta/AwsS3DeltaReaderFactory.java  | 116 ++++-
 .../record/reader/aws/delta/DataTypeJsonSerDe.java | 511 +++++++++++++++++++++
 .../reader/aws/delta/DeltaFileRecordReader.java    | 171 +++++++
 .../input/record/reader/aws/delta/RowSerDe.java    | 138 ++++++
 .../aws/delta/converter/DeltaConverterContext.java |  99 ++++
 .../asterix/external/parser/DeltaDataParser.java   | 330 +++++++++++++
 .../factory/DeltaTableDataParserFactory.java       |  65 +++
 .../provider/StreamRecordReaderProvider.java       |   6 +-
 .../external/util/ExternalDataConstants.java       |  16 +-
 .../asterix/external/util/ExternalDataUtils.java   |  34 +-
 .../asterix/external/util/google/gcs/GCSUtils.java |   7 +-
 ....apache.asterix.external.api.IDataParserFactory |   3 +-
 15 files changed, 1474 insertions(+), 42 deletions(-)

diff --cc 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 2bc65f2941,ffe75cb68b..202e1315f1
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@@ -18,9 -18,7 +18,8 @@@
   */
  package org.apache.asterix.external.util;
  
 +import java.util.List;
  import java.util.Set;
- import java.util.TimeZone;
  import java.util.function.LongSupplier;
  import java.util.function.Supplier;
  import java.util.regex.Pattern;
@@@ -366,30 -318,25 +365,40 @@@ public class ExternalDataConstants 
      public static final int WRITER_MAX_RESULT_MINIMUM = 1000;
      public static final Set<String> WRITER_SUPPORTED_FORMATS;
      public static final Set<String> WRITER_SUPPORTED_ADAPTERS;
 -    public static final Set<String> WRITER_SUPPORTED_COMPRESSION;
 +    public static final Set<String> TEXTUAL_WRITER_SUPPORTED_COMPRESSION;
 +    public static final Set<String> PARQUET_WRITER_SUPPORTED_COMPRESSION;
 +    public static final Set<String> PARQUET_WRITER_SUPPORTED_VERSION;
 +    public static final int PARQUET_DICTIONARY_PAGE_SIZE = 1048576;
 +    public static final List<String> WRITER_SUPPORTED_QUOTES;
 +    public static final List<ATypeTag> CSV_WRITER_SUPPORTED_DATA_TYPES =
 +            List.of(ATypeTag.TINYINT, ATypeTag.SMALLINT, ATypeTag.INTEGER, 
ATypeTag.BIGINT, ATypeTag.UINT8,
 +                    ATypeTag.UINT16, ATypeTag.UINT64, ATypeTag.FLOAT, 
ATypeTag.DOUBLE, ATypeTag.STRING,
 +                    ATypeTag.BOOLEAN, ATypeTag.DATETIME, ATypeTag.UINT32, 
ATypeTag.DATE, ATypeTag.TIME);
 +    public static final String PARQUET_MAX_SCHEMAS_KEY = "max-schemas";
 +    public static final int PARQUET_MAX_SCHEMAS_DEFAULT_VALUE = 5;
 +    public static final int PARQUET_MAX_SCHEMAS_MAX_VALUE = 10;
  
      static {
 -        WRITER_SUPPORTED_FORMATS = Set.of(FORMAT_JSON_LOWER_CASE);
 +        WRITER_SUPPORTED_FORMATS = Set.of(FORMAT_JSON_LOWER_CASE, 
FORMAT_PARQUET, FORMAT_CSV_LOWER_CASE);
          WRITER_SUPPORTED_ADAPTERS = 
Set.of(ALIAS_LOCALFS_ADAPTER.toLowerCase(), 
KEY_ADAPTER_NAME_AWS_S3.toLowerCase(),
 -                KEY_ADAPTER_NAME_GCS.toLowerCase());
 -        WRITER_SUPPORTED_COMPRESSION = Set.of(KEY_COMPRESSION_GZIP);
 +                KEY_ADAPTER_NAME_GCS.toLowerCase(), 
KEY_ADAPTER_NAME_HDFS.toLowerCase());
 +        TEXTUAL_WRITER_SUPPORTED_COMPRESSION = Set.of(KEY_COMPRESSION_GZIP);
 +        PARQUET_WRITER_SUPPORTED_COMPRESSION =
 +                Set.of(KEY_COMPRESSION_GZIP, KEY_COMPRESSION_SNAPPY, 
KEY_COMPRESSION_ZSTD);
 +        PARQUET_WRITER_SUPPORTED_VERSION = 
Set.of(PARQUET_WRITER_VERSION_VALUE_1, PARQUET_WRITER_VERSION_VALUE_2);
 +        WRITER_SUPPORTED_QUOTES = List.of(DEFAULT_QUOTE, 
DEFAULT_SINGLE_QUOTE, NONE);
      }
  
+     public static class DeltaOptions {
+         private DeltaOptions() {
+         }
+ 
+         public static final String DECIMAL_TO_DOUBLE = "decimal-to-double";
+         public static final String TIMESTAMP_AS_LONG = "timestamp-to-long";
+         public static final String DATE_AS_INT = "date-to-int";
+         public static final String TIMEZONE = "timezone";
+     }
+ 
      public static class ParquetOptions {
          private ParquetOptions() {
          }

Reply via email to