>From Wail Alkowaileet <[email protected]>: Wail Alkowaileet has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663 )
Change subject: [ASTERIXDB-3392] Add new FieldNamesDictionary ...................................................................... [ASTERIXDB-3392] Add new FieldNamesDictionary Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Wail Alkowaileet <[email protected]> Tested-by: Wail Alkowaileet <[email protected]> --- M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java M hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java R asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java M asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java M asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java 24 files changed, 85 insertions(+), 76 deletions(-) Approvals: Wail Alkowaileet: Looks good to me, approved; Verified Anon. E. Moose #1000171: Jenkins: Verified diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java index 6480c30..cb447c8 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java @@ -18,7 +18,7 @@ */ package org.apache.asterix.column.assembler; -import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX; +import static org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX; import java.util.ArrayList; import java.util.BitSet; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java index 0bea188..1f74fb3 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java @@ -18,7 +18,7 @@ */ package org.apache.asterix.column.metadata.schema; -import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX; +import static org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX; import java.io.DataInput; import java.io.DataInputStream; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java index 05c4eda..c7d3df1 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java @@ -22,12 +22,12 @@ import java.util.List; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode; import org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode; import org.apache.asterix.column.operation.lsm.flush.FlushColumnMetadata; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.om.types.AUnionType; import org.apache.asterix.om.types.AbstractCollectionType; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java index afe8368..ff05568 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java @@ -21,13 +21,13 @@ import java.io.IOException; import java.util.Map; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; import org.apache.asterix.column.metadata.schema.UnionSchemaNode; import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode; import org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode; import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.om.types.ATypeTag; import org.apache.asterix.om.types.AUnionType; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java index f514638..87f9ff3 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java @@ -33,9 +33,7 @@ import java.util.Map; import org.apache.asterix.column.metadata.AbstractColumnMetadata; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; import org.apache.asterix.column.metadata.PathInfoSerializer; -import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; @@ -51,6 +49,8 @@ import org.apache.asterix.column.values.IColumnValuesWriter; import org.apache.asterix.column.values.IColumnValuesWriterFactory; import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter; +import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.om.types.ATypeTag; import org.apache.commons.lang3.mutable.Mutable; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java index e507d53..15a6277 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java @@ -41,8 +41,6 @@ import org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory; import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor; import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; -import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor; @@ -50,6 +48,8 @@ import org.apache.asterix.column.values.IColumnValuesReader; import org.apache.asterix.column.values.IColumnValuesReaderFactory; import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader; +import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.runtime.projection.FunctionCallInformation; import org.apache.hyracks.api.context.IHyracksTaskContext; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java index 356ddaa..d931242 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java @@ -38,14 +38,14 @@ import org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory; import org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory; import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; -import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor; import org.apache.asterix.column.values.IColumnValuesReader; import org.apache.asterix.column.values.IColumnValuesReaderFactory; import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader; +import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.runtime.projection.FunctionCallInformation; import org.apache.hyracks.api.context.IHyracksTaskContext; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java index b425a26..a4b9240 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.List; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; @@ -30,6 +29,7 @@ import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode; import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer; import org.apache.asterix.om.base.AString; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IValueReference; import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java index 6d991cf..ad0e460 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.List; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; @@ -30,6 +29,7 @@ import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode; import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer; import org.apache.asterix.om.base.AString; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IValueReference; import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream; diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java index 63c2b22..4b33e19 100644 --- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java +++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java @@ -20,11 +20,11 @@ import java.util.concurrent.TimeUnit; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; -import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary; -import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary; import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer; import org.apache.asterix.om.base.AMutableString; +import org.apache.asterix.om.dictionary.FieldNamesHashDictionary; +import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary; +import org.apache.asterix.om.dictionary.IFieldNamesDictionary; import org.apache.commons.lang3.RandomStringUtils; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IValueReference; diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java index c9f58d5..1ec468c 100644 --- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java +++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java @@ -28,8 +28,8 @@ import java.util.Collections; import java.util.List; -import org.apache.asterix.column.metadata.dictionary.FieldNameTrie; -import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary; +import org.apache.asterix.om.dictionary.FieldNameTrie; +import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IValueReference; import org.apache.hyracks.data.std.primitive.UTF8StringPointable; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java index b07e857..ca2ad55 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java @@ -79,7 +79,7 @@ .withType(schema).withTypeInfo(typeInfo).withRowGroupSize(rowGroupSize).withPageSize(pageSize) .withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE) .enableDictionaryEncoding().withValidation(false) - .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).build(); + .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0).withConf(conf).build(); } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java index cc54676..5db600f 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java @@ -31,6 +31,14 @@ public class ParquetOutputFile implements OutputFile { private final FSDataOutputStream fs; + /* + This class wraps OutputStream as a file that Parquet SDK supports writing to. + By default, this assumes output stream doesn't support block size which distributed file systems use. + Hadoop File System Library use this as a default block size + Ref : https://github.com/apache/hadoop/blob/74ff00705cf67911f1ff8320c6c97354350d6952/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java#L2756 + */ + private static final long DEFAULT_BLOCK_SIZE = 33554432L; + public ParquetOutputFile(OutputStream os) { this.fs = new FSDataOutputStream(os, new FileSystem.Statistics("test")); } @@ -52,6 +60,6 @@ @Override public long defaultBlockSize() { - return 33554432L; + return DEFAULT_BLOCK_SIZE; } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java index f36df1a..7058bf6 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java @@ -18,42 +18,35 @@ */ package org.apache.asterix.external.writer.printer.parquet; -import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction; +import java.util.ArrayList; +import java.util.List; + +import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary; import org.apache.hyracks.api.exceptions.HyracksDataException; -import org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory; import org.apache.hyracks.data.std.api.IValueReference; -import org.apache.hyracks.data.std.primitive.UTF8StringPointable; import org.apache.hyracks.util.string.UTF8StringUtil; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; - public class FieldNamesDictionary { - private final IBinaryHashFunction fieldNameHashFunction; - private final Int2ObjectMap<String> hashToFieldNameIndexMap; + private final FieldNamesTrieDictionary trie; + private final List<String> fieldNames; + private final StringBuilder builder; public FieldNamesDictionary() { - fieldNameHashFunction = - new PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY).createBinaryHashFunction(); - hashToFieldNameIndexMap = new Int2ObjectOpenHashMap<>(); + trie = new FieldNamesTrieDictionary(); + fieldNames = new ArrayList<>(); + builder = new StringBuilder(); } - //TODO solve collision (they're so rare that I haven't seen any) public String getOrCreateFieldNameIndex(IValueReference pointable) throws HyracksDataException { - - int hash = getHash(pointable); - if (!hashToFieldNameIndexMap.containsKey(hash)) { - String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset()); - hashToFieldNameIndexMap.put(hash, fieldName); - return fieldName; + int index = trie.getOrCreateFieldNameIndex(pointable); + if (index < fieldNames.size()) { + return fieldNames.get(index); } - return hashToFieldNameIndexMap.get(hash); + + builder.setLength(0); + String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset(), builder); + fieldNames.add(fieldName); + return fieldName; } - private int getHash(IValueReference fieldName) throws HyracksDataException { - byte[] object = fieldName.getByteArray(); - int start = fieldName.getStartOffset(); - int length = fieldName.getLength(); - return fieldNameHashFunction.hash(object, start, length); - } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java index 6ad9608..cf46cc6 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java @@ -18,10 +18,10 @@ */ package org.apache.asterix.external.writer.printer.parquet; -import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.ELEMENT_FIELD; -import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.GROUP_TYPE_ERROR_FIELD; -import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.LIST_FIELD; -import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.PRIMITIVE_TYPE_ERROR_FIELD; +import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.ELEMENT_FIELD; +import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.GROUP_TYPE_ERROR_FIELD; +import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD; +import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD; import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable; import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable; @@ -48,7 +48,7 @@ private RecordConsumer recordConsumer; private FieldNamesDictionary fieldNamesDictionary; - private final ParquetRecordVisitorUtils parquetRecordVisitorUtils; + private final ParquetValueWriter parquetValueWriter; public ParquetRecordLazyVisitor(MessageType schema, IAType typeInfo) { this.schema = schema; @@ -61,7 +61,7 @@ throw new RuntimeException("Type Unsupported for parquet printing"); } this.fieldNamesDictionary = new FieldNamesDictionary(); - this.parquetRecordVisitorUtils = new ParquetRecordVisitorUtils(); + this.parquetValueWriter = new ParquetValueWriter(); } public MessageType getSchema() { @@ -150,7 +150,7 @@ throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, PRIMITIVE_TYPE_ERROR_FIELD, GROUP_TYPE_ERROR_FIELD, type.getName()); } - parquetRecordVisitorUtils.addValueToColumn(recordConsumer, pointable, type.asPrimitiveType()); + parquetValueWriter.addValueToColumn(recordConsumer, pointable, type.asPrimitiveType()); return null; } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java similarity index 95% rename from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java rename to asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java index 54978c3..206a3c9 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java @@ -43,24 +43,19 @@ import org.apache.parquet.io.api.RecordConsumer; import org.apache.parquet.schema.PrimitiveType; -public class ParquetRecordVisitorUtils { - +public class ParquetValueWriter { public static final String LIST_FIELD = "list"; public static final String ELEMENT_FIELD = "element"; public static final String GROUP_TYPE_ERROR_FIELD = "group"; public static final String PRIMITIVE_TYPE_ERROR_FIELD = "primitive"; - private VoidPointable voidPointable; - private ATypeTag typeTag; + private final VoidPointable voidPointable; + private final ResettableByteArrayOutputStream byteArrayOutputStream; - private byte[] b; - int s, l; - private ResettableByteArrayOutputStream byteArrayOutputStream; - - public ParquetRecordVisitorUtils() { + ParquetValueWriter() { this.voidPointable = VoidPointable.FACTORY.createPointable(); - byteArrayOutputStream = new ResettableByteArrayOutputStream(); + this.byteArrayOutputStream = new ResettableByteArrayOutputStream(); } private void addIntegerType(long value, PrimitiveType.PrimitiveTypeName primitiveTypeName, ATypeTag typeTag, @@ -86,8 +81,9 @@ public void addValueToColumn(RecordConsumer recordConsumer, FlatLazyVisitablePointable pointable, PrimitiveType type) throws HyracksDataException { - typeTag = pointable.getTypeTag(); - b = pointable.getByteArray(); + ATypeTag typeTag = pointable.getTypeTag(); + byte[] b = pointable.getByteArray(); + int s, l; if (pointable.isTagged()) { s = pointable.getStartOffset() + 1; @@ -196,7 +192,5 @@ default: throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName); } - } - } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java similarity index 96% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java index bffdb33..b2cd223 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java @@ -16,14 +16,12 @@ * specific language governing permissions and limitations * under the License. */ - -package org.apache.asterix.column.metadata.dictionary; +package org.apache.asterix.om.dictionary; import java.io.DataInput; import java.io.IOException; import java.util.List; -import org.apache.asterix.column.metadata.IFieldNamesDictionary; import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer; import org.apache.asterix.om.base.AMutableString; import org.apache.hyracks.api.exceptions.HyracksDataException; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java similarity index 97% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java index 73c034b..7bfae0b 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.column.metadata.dictionary; +package org.apache.asterix.om.dictionary; import java.io.DataInput; import java.io.DataOutput; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java similarity index 98% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java index 4a19cd6..5333af5 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java @@ -16,9 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.column.metadata.dictionary; +package org.apache.asterix.om.dictionary; -import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames; +import static org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames; import java.io.DataInput; import java.io.DataOutput; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java similarity index 98% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java index 73c9a73..591f322 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.column.metadata.dictionary; +package org.apache.asterix.om.dictionary; import java.io.DataInput; import java.io.DataInputStream; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java similarity index 97% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java index 10de829..30e10d6 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.column.metadata.dictionary; +package org.apache.asterix.om.dictionary; import java.io.DataInput; import java.io.DataInputStream; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java similarity index 97% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java index 8aa0e88..ec494fa 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.column.metadata; +package org.apache.asterix.om.dictionary; import java.io.DataInputStream; import java.io.DataOutput; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java similarity index 98% rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java index 32e902b..9ea2978 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java @@ -16,8 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - -package org.apache.asterix.column.metadata.dictionary; +package org.apache.asterix.om.dictionary; import java.io.DataInput; import java.io.DataOutput; diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java index 5045c86..4022961 100644 --- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java +++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java @@ -332,6 +332,10 @@ public static String toString(byte[] bytes, int start) { StringBuilder builder = new StringBuilder(); + return toString(bytes, start, builder); + } + + public static String toString(byte[] bytes, int start, StringBuilder builder) { return toString(builder, bytes, start).toString(); } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e Gerrit-Change-Number: 18663 Gerrit-PatchSet: 8 Gerrit-Owner: [email protected] Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-Reviewer: [email protected] Gerrit-MessageType: merged
