This is an automated email from the ASF dual-hosted git repository.
wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new c0cdec64ea [ASTERIXDB-3392] Add new FieldNamesDictionary
c0cdec64ea is described below
commit c0cdec64ea5d472a9f643371ad5bc3fc56ade83b
Author: preetham.poluparthi <[email protected]>
AuthorDate: Mon Aug 12 13:08:13 2024 +0000
[ASTERIXDB-3392] Add new FieldNamesDictionary
Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663
Integration-Tests: Jenkins <[email protected]>
Reviewed-by: Wail Alkowaileet <[email protected]>
Tested-by: Wail Alkowaileet <[email protected]>
---
.../column/assembler/AssemblerBuilderVisitor.java | 2 +-
.../column/metadata/schema/ObjectSchemaNode.java | 2 +-
.../visitor/SchemaBuilderFromIATypeVisitor.java | 2 +-
.../schema/visitor/SchemaClipperVisitor.java | 2 +-
.../operation/lsm/flush/FlushColumnMetadata.java | 4 +-
.../operation/query/QueryColumnMetadata.java | 4 +-
.../query/QueryColumnWithMetaMetadata.java | 4 +-
.../column/util/SchemaJSONBuilderVisitor.java | 2 +-
.../column/util/SchemaStringBuilderVisitor.java | 2 +-
.../metadata/trie/FieldNameDictionaryPerfTest.java | 6 +--
.../column/metadata/trie/FieldNameTrieTest.java | 4 +-
.../writer/printer/ParquetExternalFilePrinter.java | 2 +-
.../external/writer/printer/ParquetOutputFile.java | 10 ++++-
.../printer/parquet/FieldNamesDictionary.java | 43 +++++++++-------------
.../printer/parquet/ParquetRecordLazyVisitor.java | 14 +++----
...rdVisitorUtils.java => ParquetValueWriter.java} | 22 ++++-------
.../dictionary/AbstractFieldNamesDictionary.java | 4 +-
.../asterix/om}/dictionary/ByteToNodeMap.java | 2 +-
.../asterix/om}/dictionary/FieldNameTrie.java | 4 +-
.../om}/dictionary/FieldNamesHashDictionary.java | 2 +-
.../om}/dictionary/FieldNamesTrieDictionary.java | 2 +-
.../om/dictionary}/IFieldNamesDictionary.java | 2 +-
.../apache/asterix/om}/dictionary/TrieNode.java | 3 +-
.../apache/hyracks/util/string/UTF8StringUtil.java | 4 ++
24 files changed, 72 insertions(+), 76 deletions(-)
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index 6480c30492..cb447c85e8 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.assembler;
-import static
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.util.ArrayList;
import java.util.BitSet;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 0bea1884fe..1f74fb3c0a 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.metadata.schema;
-import static
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
index 05c4eda884..c7d3df165f 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
@@ -22,12 +22,12 @@ import static
org.apache.asterix.om.typecomputer.impl.TypeComputeUtils.getActual
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
import
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
import org.apache.asterix.column.operation.lsm.flush.FlushColumnMetadata;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.AbstractCollectionType;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
index afe8368205..ff05568d8b 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
@@ -21,13 +21,13 @@ package org.apache.asterix.column.metadata.schema.visitor;
import java.io.IOException;
import java.util.Map;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
import
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
import
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AUnionType;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index f5146386d0..87f9ff3ae1 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -33,9 +33,7 @@ import java.util.List;
import java.util.Map;
import org.apache.asterix.column.metadata.AbstractColumnMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.PathInfoSerializer;
-import
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -51,6 +49,8 @@ import
org.apache.asterix.column.util.SchemaJSONBuilderVisitor;
import org.apache.asterix.column.values.IColumnValuesWriter;
import org.apache.asterix.column.values.IColumnValuesWriterFactory;
import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.commons.lang3.mutable.Mutable;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index e507d530a1..15a6277e49 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -41,8 +41,6 @@ import
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorF
import
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -50,6 +48,8 @@ import
org.apache.asterix.column.util.SchemaJSONBuilderVisitor;
import org.apache.asterix.column.values.IColumnValuesReader;
import org.apache.asterix.column.values.IColumnValuesReaderFactory;
import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.runtime.projection.FunctionCallInformation;
import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index 356ddaa843..d931242972 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -38,14 +38,14 @@ import
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluator;
import
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory;
import
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
import org.apache.asterix.column.values.IColumnValuesReader;
import org.apache.asterix.column.values.IColumnValuesReaderFactory;
import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.runtime.projection.FunctionCallInformation;
import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
index b425a26453..a4b92405e2 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
@@ -21,7 +21,6 @@ package org.apache.asterix.column.util;
import java.util.ArrayList;
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@ import
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSc
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
import
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
index 6d991cf803..ad0e460c6b 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
@@ -21,7 +21,6 @@ package org.apache.asterix.column.util;
import java.util.ArrayList;
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@ import
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSc
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
import
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
index 63c2b22804..4b33e19e58 100644
---
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
+++
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
@@ -20,11 +20,11 @@ package org.apache.asterix.column.metadata.trie;
import java.util.concurrent.TimeUnit;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
import
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.dictionary.FieldNamesHashDictionary;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
diff --git
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
index c9f58d5394..1ec468c13e 100644
---
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
+++
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
@@ -28,8 +28,8 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import org.apache.asterix.column.metadata.dictionary.FieldNameTrie;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.FieldNameTrie;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index b07e857d5d..ca2ad55c03 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -79,7 +79,7 @@ public class ParquetExternalFilePrinter implements
IExternalPrinter {
.withType(schema).withTypeInfo(typeInfo).withRowGroupSize(rowGroupSize).withPageSize(pageSize)
.withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE)
.enableDictionaryEncoding().withValidation(false)
-
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).build();
+
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0).withConf(conf).build();
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
index cc54676b56..5db600f83a 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
@@ -31,6 +31,14 @@ import org.apache.parquet.io.PositionOutputStream;
public class ParquetOutputFile implements OutputFile {
private final FSDataOutputStream fs;
+ /*
+ This class wraps OutputStream as a file that Parquet SDK supports writing
to.
+ By default, this assumes output stream doesn't support block size which
distributed file systems use.
+ Hadoop File System Library use this as a default block size
+ Ref :
https://github.com/apache/hadoop/blob/74ff00705cf67911f1ff8320c6c97354350d6952/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java#L2756
+ */
+ private static final long DEFAULT_BLOCK_SIZE = 33554432L;
+
public ParquetOutputFile(OutputStream os) {
this.fs = new FSDataOutputStream(os, new
FileSystem.Statistics("test"));
}
@@ -52,6 +60,6 @@ public class ParquetOutputFile implements OutputFile {
@Override
public long defaultBlockSize() {
- return 33554432L;
+ return DEFAULT_BLOCK_SIZE;
}
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
index f36df1a9c6..7058bf6287 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
@@ -18,42 +18,35 @@
*/
package org.apache.asterix.external.writer.printer.parquet;
-import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
-import
org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.util.string.UTF8StringUtil;
-import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
-import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
-
public class FieldNamesDictionary {
- private final IBinaryHashFunction fieldNameHashFunction;
- private final Int2ObjectMap<String> hashToFieldNameIndexMap;
+ private final FieldNamesTrieDictionary trie;
+ private final List<String> fieldNames;
+ private final StringBuilder builder;
public FieldNamesDictionary() {
- fieldNameHashFunction =
- new
PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY).createBinaryHashFunction();
- hashToFieldNameIndexMap = new Int2ObjectOpenHashMap<>();
+ trie = new FieldNamesTrieDictionary();
+ fieldNames = new ArrayList<>();
+ builder = new StringBuilder();
}
- //TODO solve collision (they're so rare that I haven't seen any)
public String getOrCreateFieldNameIndex(IValueReference pointable) throws
HyracksDataException {
-
- int hash = getHash(pointable);
- if (!hashToFieldNameIndexMap.containsKey(hash)) {
- String fieldName =
UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset());
- hashToFieldNameIndexMap.put(hash, fieldName);
- return fieldName;
+ int index = trie.getOrCreateFieldNameIndex(pointable);
+ if (index < fieldNames.size()) {
+ return fieldNames.get(index);
}
- return hashToFieldNameIndexMap.get(hash);
- }
- private int getHash(IValueReference fieldName) throws HyracksDataException
{
- byte[] object = fieldName.getByteArray();
- int start = fieldName.getStartOffset();
- int length = fieldName.getLength();
- return fieldNameHashFunction.hash(object, start, length);
+ builder.setLength(0);
+ String fieldName = UTF8StringUtil.toString(pointable.getByteArray(),
pointable.getStartOffset(), builder);
+ fieldNames.add(fieldName);
+ return fieldName;
}
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index 6ad9608106..cf46cc61b6 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -18,10 +18,10 @@
*/
package org.apache.asterix.external.writer.printer.parquet;
-import static
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.ELEMENT_FIELD;
-import static
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.GROUP_TYPE_ERROR_FIELD;
-import static
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.LIST_FIELD;
-import static
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.PRIMITIVE_TYPE_ERROR_FIELD;
+import static
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.ELEMENT_FIELD;
+import static
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.GROUP_TYPE_ERROR_FIELD;
+import static
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD;
+import static
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD;
import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
@@ -48,7 +48,7 @@ public class ParquetRecordLazyVisitor implements
ILazyVisitablePointableVisitor<
private RecordConsumer recordConsumer;
private FieldNamesDictionary fieldNamesDictionary;
- private final ParquetRecordVisitorUtils parquetRecordVisitorUtils;
+ private final ParquetValueWriter parquetValueWriter;
public ParquetRecordLazyVisitor(MessageType schema, IAType typeInfo) {
this.schema = schema;
@@ -61,7 +61,7 @@ public class ParquetRecordLazyVisitor implements
ILazyVisitablePointableVisitor<
throw new RuntimeException("Type Unsupported for parquet
printing");
}
this.fieldNamesDictionary = new FieldNamesDictionary();
- this.parquetRecordVisitorUtils = new ParquetRecordVisitorUtils();
+ this.parquetValueWriter = new ParquetValueWriter();
}
public MessageType getSchema() {
@@ -150,7 +150,7 @@ public class ParquetRecordLazyVisitor implements
ILazyVisitablePointableVisitor<
throw new
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA,
PRIMITIVE_TYPE_ERROR_FIELD,
GROUP_TYPE_ERROR_FIELD, type.getName());
}
- parquetRecordVisitorUtils.addValueToColumn(recordConsumer, pointable,
type.asPrimitiveType());
+ parquetValueWriter.addValueToColumn(recordConsumer, pointable,
type.asPrimitiveType());
return null;
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
similarity index 95%
rename from
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
rename to
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 54978c340d..206a3c9af0 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -43,24 +43,19 @@ import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.PrimitiveType;
-public class ParquetRecordVisitorUtils {
-
+public class ParquetValueWriter {
public static final String LIST_FIELD = "list";
public static final String ELEMENT_FIELD = "element";
public static final String GROUP_TYPE_ERROR_FIELD = "group";
public static final String PRIMITIVE_TYPE_ERROR_FIELD = "primitive";
- private VoidPointable voidPointable;
- private ATypeTag typeTag;
-
- private byte[] b;
- int s, l;
- private ResettableByteArrayOutputStream byteArrayOutputStream;
+ private final VoidPointable voidPointable;
+ private final ResettableByteArrayOutputStream byteArrayOutputStream;
- public ParquetRecordVisitorUtils() {
+ ParquetValueWriter() {
this.voidPointable = VoidPointable.FACTORY.createPointable();
- byteArrayOutputStream = new ResettableByteArrayOutputStream();
+ this.byteArrayOutputStream = new ResettableByteArrayOutputStream();
}
private void addIntegerType(long value, PrimitiveType.PrimitiveTypeName
primitiveTypeName, ATypeTag typeTag,
@@ -86,8 +81,9 @@ public class ParquetRecordVisitorUtils {
public void addValueToColumn(RecordConsumer recordConsumer,
FlatLazyVisitablePointable pointable,
PrimitiveType type) throws HyracksDataException {
- typeTag = pointable.getTypeTag();
- b = pointable.getByteArray();
+ ATypeTag typeTag = pointable.getTypeTag();
+ byte[] b = pointable.getByteArray();
+ int s, l;
if (pointable.isTagged()) {
s = pointable.getStartOffset() + 1;
@@ -196,7 +192,5 @@ public class ParquetRecordVisitorUtils {
default:
throw
RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag,
primitiveTypeName);
}
-
}
-
}
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
similarity index 96%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
index bffdb33002..b2cd2230cd 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
@@ -16,14 +16,12 @@
* specific language governing permissions and limitations
* under the License.
*/
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.IOException;
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AMutableString;
import org.apache.hyracks.api.exceptions.HyracksDataException;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
similarity index 98%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
index 73c034b1da..7bfae0b00b 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataOutput;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
similarity index 98%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
index 4a19cd6a5a..5333af5c4d 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
@@ -16,9 +16,9 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
-import static
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
+import static
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
import java.io.DataInput;
import java.io.DataOutput;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
similarity index 99%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
index 73c9a738ba..591f322772 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
similarity index 98%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
index 10de8298dd..30e10d61e7 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
similarity index 98%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
index 8aa0e88a30..ec494fac7d 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.om.dictionary;
import java.io.DataInputStream;
import java.io.DataOutput;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
similarity index 98%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
rename to
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
index 32e902b31e..9ea29785b4 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
@@ -16,8 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataOutput;
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 5045c8698c..4022961ad6 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -332,6 +332,10 @@ public class UTF8StringUtil {
public static String toString(byte[] bytes, int start) {
StringBuilder builder = new StringBuilder();
+ return toString(bytes, start, builder);
+ }
+
+ public static String toString(byte[] bytes, int start, StringBuilder
builder) {
return toString(builder, bytes, start).toString();
}