This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit c5851fed33e421f197c709f578c1c7a50a727917 Merge: 5ff8ab05cd f9bb94e4b2 Author: Michael Blow <[email protected]> AuthorDate: Mon Dec 16 11:41:18 2024 -0500 Merge branch 'gerrit/goldfish' into 'master' Change-Id: I5a7b060a02b083a92419397b4e6e9e79cfe95ee1 asterixdb/asterix-app/pom.xml | 2 +- .../app/resource/PlanStagesGeneratorTest.java | 10 +- .../external_dataset/ExternalDatasetTestUtils.java | 2 + .../avro/AvroFileExampleGeneratorUtil.java | 55 +- .../ASTERIXDB-3534/ASTERIXDB-3534.001.ddl.sqlpp} | 27 +- .../ASTERIXDB-3534/ASTERIXDB-3534.002.update.sqlpp | 33 + .../ASTERIXDB-3534/ASTERIXDB-3534.003.query.sqlpp} | 23 +- .../avro-types/avro-union/avro-union.01.ddl.sqlpp | 11 +- ...nion.01.ddl.sqlpp => avro-union.03.query.sqlpp} | 25 +- .../heterogeneous-access.1.ddl.sqlpp | 2 +- .../assembly/ASTERIXDB-3534/ASTERIXDB-3534.003.adm | 3 + .../avro/avro-types/avro-map/avro-map.02.adm | 2 + .../avro/avro-types/avro-map/avro-map.03.adm | 4 +- .../avro-nested-records/avro-nested-records.02.adm | 4 +- .../avro-nested-records/avro-nested-records.03.adm | 4 +- .../avro-primitives/avro-primitives.02.adm | 4 +- .../avro-primitives/avro-primitives.03.adm | 4 +- .../avro/avro-types/avro-union/avro-union.02.adm | 2 + .../avro/avro-types/avro-union/avro-union.03.adm | 100 +++ .../org/apache/asterix/column/ColumnManager.java | 4 +- .../operation/lsm/flush/FlushColumnMetadata.java | 47 +- .../lsm/flush/FlushColumnTupleWriter.java | 48 +- .../lsm/flush/NoWriteColumnTransformer.java | 141 ++++ .../lsm/flush/NoWriteFlushColumnMetadata.java | 202 ++++++ .../lsm/merge/MergeColumnTupleWriter.java | 7 +- .../values/reader/AbstractColumnValuesReader.java | 59 +- .../values/writer/NoOpColumnValuesWriter.java | 124 ++++ .../column/test/bytes/AbstractBytesTest.java | 2 +- asterixdb/asterix-external-data/pom.xml | 12 +- .../asterix/external/parser/AvroDataParser.java | 5 + asterixdb/asterix-server/pom.xml | 4 + asterixdb/pom.xml | 109 +++- .../appended-resources/supplemental-models.xml | 714 ++++++++++++++------- ...-library-for-java_refs_tags_v1.17.1_LICENSE.txt | 34 +- ..._tags_azure-core-http-netty_1.13.11_LICENSE.txt | 21 + ..._tags_azure-core-http-netty_1.13.11_NOTICE.txt} | 22 +- ...or-java_refs_tags_azure-core_1.45.1_LICENSE.txt | 21 + ...or-java_refs_tags_azure-core_1.45.1_NOTICE.txt} | 22 +- ...ava_refs_tags_azure-identity_1.13.3_LICENSE.txt | 21 + ...java_refs_tags_azure-identity_1.13.3_NOTICE.txt | 2 +- ...for-java_refs_tags_azure-json_1.3.0_LICENSE.txt | 21 + ...for-java_refs_tags_azure-json_1.3.0_NOTICE.txt} | 2 +- ...efs_tags_azure-storage-blob_12.25.1_LICENSE.txt | 21 + ...efs_tags_azure-storage-blob_12.25.1_NOTICE.txt} | 22 +- ...s_tags_azure-storage-common_12.24.1_LICENSE.txt | 21 + ...s_tags_azure-storage-common_12.24.1_NOTICE.txt} | 20 - ...azure-storage-file-datalake_12.18.1_LICENSE.txt | 21 + ...azure-storage-file-datalake_12.18.1_NOTICE.txt} | 20 - ...azure-storage-internal-avro_12.10.1_LICENSE.txt | 21 + ...azure-storage-internal-avro_12.10.1_NOTICE.txt} | 20 - ...it_bc-java_refs_tags_r1rv78v1_LICENSE.html.txt} | 2 +- ...rcontent.com_grpc_grpc-java_v1.68.1_NOTICE.txt} | 0 ...com_netty_netty_netty-4.1.115.Final_NOTICE.txt} | 2 +- ...ntent.com_perfmark_perfmark_v0.27.0_NOTICE.txt} | 1 + ...streams_reactive-streams-jvm_v1.0.3_COPYING.txt | 121 ---- ...streams_reactive-streams-jvm_v1.0.3_LICENSE.txt | 8 - .../column/api/AbstractColumnTupleWriter.java | 8 +- .../column/impls/btree/ColumnBTreeBulkloader.java | 3 +- .../impls/btree/ColumnBTreeWriteLeafFrame.java | 2 +- hyracks-fullstack/pom.xml | 17 +- 60 files changed, 1613 insertions(+), 678 deletions(-) diff --cc asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp index ca20804bac,8c00ded11a..4159f0e75e --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp @@@ -34,8 -34,17 +34,17 @@@ CREATE TYPE AvroType as CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter% ( %template%, - ("container"="playground"), - ("definition"="avro-data/reviews"), + %additional_Properties%, + ("definition"="%path_prefix%avro-data/reviews"), ("include"="*avro_type.avro"), ("format" = "avro") - ); + ); + + CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter% + ( + %template%, + ("container"="playground"), + ("definition"="avro-data/reviews"), + ("include"="*partition_heterogeneous.avro"), + ("format" = "avro") + ); diff --cc asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp index 9445a2c05e,c7dcf7a330..0ea0cc2904 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp @@@ -34,8 -34,8 +34,8 @@@ CREATE TYPE AvroType as CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter% ( %template%, - ("container"="playground"), - ("definition"="avro-data/reviews"), + %additional_Properties%, + ("definition"="%path_prefix%avro-data/reviews"), - ("include"="*heterogeneous*"), + ("include"="*heterogeneous_*"), ("format" = "avro") ); diff --cc asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/NoWriteFlushColumnMetadata.java index 0000000000,c988a1136f..88e6cc2063 mode 000000,100644..100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/NoWriteFlushColumnMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/NoWriteFlushColumnMetadata.java @@@ -1,0 -1,202 +1,202 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.asterix.column.operation.lsm.flush; + + import static org.apache.asterix.column.util.ColumnValuesUtil.getNormalizedTypeTag; + + import java.io.ByteArrayInputStream; + import java.io.DataInput; + import java.io.DataInputStream; + import java.io.IOException; + import java.util.ArrayList; + import java.util.HashMap; + import java.util.List; + import java.util.Map; + -import org.apache.asterix.column.metadata.IFieldNamesDictionary; -import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary; + import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode; + import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; + import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; + import org.apache.asterix.column.metadata.schema.UnionSchemaNode; + import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode; + import org.apache.asterix.column.metadata.schema.collection.ArraySchemaNode; + import org.apache.asterix.column.metadata.schema.collection.MultisetSchemaNode; + import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode; + import org.apache.asterix.column.util.RunLengthIntArray; + import org.apache.asterix.column.values.IColumnValuesWriter; + import org.apache.asterix.column.values.IColumnValuesWriterFactory; + import org.apache.asterix.column.values.writer.NoOpColumnValuesWriter; ++import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary; ++import org.apache.asterix.om.dictionary.IFieldNamesDictionary; + import org.apache.asterix.om.types.ARecordType; + import org.apache.asterix.om.types.ATypeTag; + import org.apache.commons.lang3.mutable.Mutable; + import org.apache.hyracks.api.exceptions.HyracksDataException; + import org.apache.hyracks.data.std.api.IValueReference; + import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; + import org.apache.hyracks.storage.am.lsm.btree.column.api.IColumnWriteMultiPageOp; + import org.apache.hyracks.storage.am.lsm.common.api.ILSMMemoryComponent; + + /** + * Flush column metadata belongs to a flushing {@link ILSMMemoryComponent} + * The schema here is mutable and can change according to the flushed records + */ + public final class NoWriteFlushColumnMetadata extends FlushColumnMetadata { + + private int numColumns; + + public NoWriteFlushColumnMetadata(ARecordType datasetType, ARecordType metaType, int numPrimaryKeys, + boolean metaContainsKeys, IColumnValuesWriterFactory columnWriterFactory, + Mutable<IColumnWriteMultiPageOp> multiPageOpRef, List<IColumnValuesWriter> writers, + IFieldNamesDictionary fieldNamesDictionary, ObjectSchemaNode root, ObjectSchemaNode metaRoot, + Map<AbstractSchemaNestedNode, RunLengthIntArray> definitionLevels, ArrayBackedValueStorage schemaStorage) { + super(datasetType, metaType, numPrimaryKeys, metaContainsKeys, columnWriterFactory, multiPageOpRef, writers, + fieldNamesDictionary, root, metaRoot, definitionLevels, schemaStorage); + numColumns = 0; + } + + public static NoWriteFlushColumnMetadata createMutableMetadata(ARecordType datasetType, ARecordType metaType, + int numPrimaryKeys, boolean metaContainsKeys, IColumnValuesWriterFactory columnWriterFactory, + Mutable<IColumnWriteMultiPageOp> multiPageOpRef, IValueReference serializedMetadata) throws IOException { + DataInput input = new DataInputStream(new ByteArrayInputStream(serializedMetadata.getByteArray(), + serializedMetadata.getStartOffset(), serializedMetadata.getLength())); + //Skip offsets + input.skipBytes(OFFSETS_SIZE); + + //ColumnWriter + List<IColumnValuesWriter> writers = new ArrayList<>(); + deserializeWriters(input, writers, columnWriterFactory); + + //FieldNames + IFieldNamesDictionary fieldNamesDictionary = AbstractFieldNamesDictionary.deserialize(input); + + //Schema + Map<AbstractSchemaNestedNode, RunLengthIntArray> definitionLevels = new HashMap<>(); + ObjectSchemaNode root = (ObjectSchemaNode) AbstractSchemaNode.deserialize(input, definitionLevels); + ObjectSchemaNode metaRoot = null; + if (metaType != null) { + metaRoot = (ObjectSchemaNode) AbstractSchemaNode.deserialize(input, definitionLevels); + } + + ArrayBackedValueStorage schemaStorage = new ArrayBackedValueStorage(serializedMetadata.getLength()); + schemaStorage.append(serializedMetadata); + return new NoWriteFlushColumnMetadata(datasetType, metaType, numPrimaryKeys, metaContainsKeys, + columnWriterFactory, multiPageOpRef, writers, fieldNamesDictionary, root, metaRoot, definitionLevels, + schemaStorage); + } + + public void close() { + } + + @Override + public void flushDefinitionLevels(int level, AbstractSchemaNestedNode parent, AbstractSchemaNode node) + throws HyracksDataException { + //NoOp + } + + @Override + protected void flushDefinitionLevels(int parentMask, int childMask, RunLengthIntArray parentDefLevels, + AbstractSchemaNode node) throws HyracksDataException { + //NoOp + } + + @Override + public void enterLevel(AbstractSchemaNestedNode node) { + //NoOp + } + + @Override + public void exitNode(AbstractSchemaNode node) { + //NoOp + } + + @Override + public void exitLevel(AbstractSchemaNestedNode node) { + //NoOp + } + + @Override + public void exitCollectionNode(AbstractCollectionSchemaNode collectionNode, int numberOfItems) { + //NoOp + } + + @Override + public void addNestedNull(AbstractSchemaNestedNode parent, AbstractSchemaNestedNode node) + throws HyracksDataException { + //NoOp + } + + @Override + protected AbstractSchemaNode createChild(AbstractSchemaNode child, ATypeTag childTypeTag) + throws HyracksDataException { + AbstractSchemaNode createdChild; + ATypeTag normalizedTypeTag = getNormalizedTypeTag(childTypeTag); + if (child != null) { + if (child.getTypeTag() == ATypeTag.NULL) { + int columnIndex = ((PrimitiveSchemaNode) child).getColumnIndex(); + nullWriterIndexes.add(columnIndex); + createdChild = createChild(normalizedTypeTag); + } else { + createdChild = addDefinitionLevelsAndGet(new UnionSchemaNode(child, createChild(normalizedTypeTag))); + } + } else { + createdChild = createChild(normalizedTypeTag); + } + return createdChild; + } + + @Override + protected AbstractSchemaNode createChild(ATypeTag childTypeTag) throws HyracksDataException { + switch (childTypeTag) { + case OBJECT: + return addDefinitionLevelsAndGet(new ObjectSchemaNode()); + case ARRAY: + return addDefinitionLevelsAndGet(new ArraySchemaNode()); + case MULTISET: + return addDefinitionLevelsAndGet(new MultisetSchemaNode()); + case NULL: + case MISSING: + case BOOLEAN: + case FLOAT: + case DOUBLE: + case TINYINT: + case SMALLINT: + case INTEGER: + case BIGINT: + case STRING: + case UUID: + int columnIndex = nullWriterIndexes.isEmpty() ? columnWriters.size() : nullWriterIndexes.removeInt(0); + boolean primaryKey = columnIndex < getNumberOfPrimaryKeys(); + ATypeTag normalizedTypeTag = primaryKey ? childTypeTag : getNormalizedTypeTag(childTypeTag); + if (columnIndex == numColumns) { + numColumns++; + } + IColumnValuesWriter writer = NoOpColumnValuesWriter.INSTANCE; + addColumn(columnIndex, writer); + return new PrimitiveSchemaNode(columnIndex, normalizedTypeTag, primaryKey); + default: + throw new IllegalStateException("Unsupported type " + childTypeTag); + + } + } + + @Override + protected AbstractSchemaNode addDefinitionLevelsAndGet(AbstractSchemaNestedNode nestedNode) { + return nestedNode; + } -} ++} diff --cc asterixdb/pom.xml index 495d73ff34,c0f97927fe..649906aa8c --- a/asterixdb/pom.xml +++ b/asterixdb/pom.xml @@@ -91,13 -91,13 +91,13 @@@ <!-- Versions under dependencymanagement or used in many projects via properties --> <algebricks.version>0.3.10-SNAPSHOT</algebricks.version> <hyracks.version>0.3.10-SNAPSHOT</hyracks.version> - <hadoop.version>3.3.6</hadoop.version> + <hadoop.version>3.4.1</hadoop.version> <jacoco.version>0.7.6.201602180812</jacoco.version> <log4j.version>2.22.1</log4j.version> - <awsjavasdk.version>2.24.9</awsjavasdk.version> + <awsjavasdk.version>2.29.27</awsjavasdk.version> <awsjavasdk.crt.version>0.29.10</awsjavasdk.crt.version> - <parquet.version>1.14.1</parquet.version> + <parquet.version>1.14.3</parquet.version> - <hadoop-awsjavasdk.version>1.12.637</hadoop-awsjavasdk.version> + <hadoop-awsjavasdk.version>1.12.779</hadoop-awsjavasdk.version> <azureblobjavasdk.version>12.25.1</azureblobjavasdk.version> <azurecommonjavasdk.version>12.24.1</azurecommonjavasdk.version> <azureidentity.version>1.13.3</azureidentity.version>
