This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new c0cdec64ea [ASTERIXDB-3392] Add new FieldNamesDictionary
c0cdec64ea is described below

commit c0cdec64ea5d472a9f643371ad5bc3fc56ade83b
Author: preetham.poluparthi <[email protected]>
AuthorDate: Mon Aug 12 13:08:13 2024 +0000

    [ASTERIXDB-3392] Add new FieldNamesDictionary
    
    Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Wail Alkowaileet <[email protected]>
    Tested-by: Wail Alkowaileet <[email protected]>
---
 .../column/assembler/AssemblerBuilderVisitor.java  |  2 +-
 .../column/metadata/schema/ObjectSchemaNode.java   |  2 +-
 .../visitor/SchemaBuilderFromIATypeVisitor.java    |  2 +-
 .../schema/visitor/SchemaClipperVisitor.java       |  2 +-
 .../operation/lsm/flush/FlushColumnMetadata.java   |  4 +-
 .../operation/query/QueryColumnMetadata.java       |  4 +-
 .../query/QueryColumnWithMetaMetadata.java         |  4 +-
 .../column/util/SchemaJSONBuilderVisitor.java      |  2 +-
 .../column/util/SchemaStringBuilderVisitor.java    |  2 +-
 .../metadata/trie/FieldNameDictionaryPerfTest.java |  6 +--
 .../column/metadata/trie/FieldNameTrieTest.java    |  4 +-
 .../writer/printer/ParquetExternalFilePrinter.java |  2 +-
 .../external/writer/printer/ParquetOutputFile.java | 10 ++++-
 .../printer/parquet/FieldNamesDictionary.java      | 43 +++++++++-------------
 .../printer/parquet/ParquetRecordLazyVisitor.java  | 14 +++----
 ...rdVisitorUtils.java => ParquetValueWriter.java} | 22 ++++-------
 .../dictionary/AbstractFieldNamesDictionary.java   |  4 +-
 .../asterix/om}/dictionary/ByteToNodeMap.java      |  2 +-
 .../asterix/om}/dictionary/FieldNameTrie.java      |  4 +-
 .../om}/dictionary/FieldNamesHashDictionary.java   |  2 +-
 .../om}/dictionary/FieldNamesTrieDictionary.java   |  2 +-
 .../om/dictionary}/IFieldNamesDictionary.java      |  2 +-
 .../apache/asterix/om}/dictionary/TrieNode.java    |  3 +-
 .../apache/hyracks/util/string/UTF8StringUtil.java |  4 ++
 24 files changed, 72 insertions(+), 76 deletions(-)

diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index 6480c30492..cb447c85e8 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -18,7 +18,7 @@
  */
 package org.apache.asterix.column.assembler;
 
-import static 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static 
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
 
 import java.util.ArrayList;
 import java.util.BitSet;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 0bea1884fe..1f74fb3c0a 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -18,7 +18,7 @@
  */
 package org.apache.asterix.column.metadata.schema;
 
-import static 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static 
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
 
 import java.io.DataInput;
 import java.io.DataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
index 05c4eda884..c7d3df165f 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
@@ -22,12 +22,12 @@ import static 
org.apache.asterix.om.typecomputer.impl.TypeComputeUtils.getActual
 
 import java.util.List;
 
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
 import org.apache.asterix.column.operation.lsm.flush.FlushColumnMetadata;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.AUnionType;
 import org.apache.asterix.om.types.AbstractCollectionType;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
index afe8368205..ff05568d8b 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
@@ -21,13 +21,13 @@ package org.apache.asterix.column.metadata.schema.visitor;
 import java.io.IOException;
 import java.util.Map;
 
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.AUnionType;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index f5146386d0..87f9ff3ae1 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -33,9 +33,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.asterix.column.metadata.AbstractColumnMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.PathInfoSerializer;
-import 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -51,6 +49,8 @@ import 
org.apache.asterix.column.util.SchemaJSONBuilderVisitor;
 import org.apache.asterix.column.values.IColumnValuesWriter;
 import org.apache.asterix.column.values.IColumnValuesWriterFactory;
 import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.commons.lang3.mutable.Mutable;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index e507d530a1..15a6277e49 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -41,8 +41,6 @@ import 
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorF
 import 
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
 import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
 import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -50,6 +48,8 @@ import 
org.apache.asterix.column.util.SchemaJSONBuilderVisitor;
 import org.apache.asterix.column.values.IColumnValuesReader;
 import org.apache.asterix.column.values.IColumnValuesReaderFactory;
 import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index 356ddaa843..d931242972 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -38,14 +38,14 @@ import 
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluator;
 import 
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory;
 import 
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
 import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
 import org.apache.asterix.column.values.IColumnValuesReader;
 import org.apache.asterix.column.values.IColumnValuesReaderFactory;
 import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
index b425a26453..a4b92405e2 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
@@ -21,7 +21,6 @@ package org.apache.asterix.column.util;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@ import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSc
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
index 6d991cf803..ad0e460c6b 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
@@ -21,7 +21,6 @@ package org.apache.asterix.column.util;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@ import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSc
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
index 63c2b22804..4b33e19e58 100644
--- 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
+++ 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
@@ -20,11 +20,11 @@ package org.apache.asterix.column.metadata.trie;
 
 import java.util.concurrent.TimeUnit;
 
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.dictionary.FieldNamesHashDictionary;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
diff --git 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
index c9f58d5394..1ec468c13e 100644
--- 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
+++ 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
@@ -28,8 +28,8 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.asterix.column.metadata.dictionary.FieldNameTrie;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.FieldNameTrie;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index b07e857d5d..ca2ad55c03 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -79,7 +79,7 @@ public class ParquetExternalFilePrinter implements 
IExternalPrinter {
                     
.withType(schema).withTypeInfo(typeInfo).withRowGroupSize(rowGroupSize).withPageSize(pageSize)
                     
.withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE)
                     .enableDictionaryEncoding().withValidation(false)
-                    
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).build();
+                    
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0).withConf(conf).build();
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
index cc54676b56..5db600f83a 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
@@ -31,6 +31,14 @@ import org.apache.parquet.io.PositionOutputStream;
 public class ParquetOutputFile implements OutputFile {
     private final FSDataOutputStream fs;
 
+    /*
+     This class wraps OutputStream as a file that Parquet SDK supports writing 
to.
+     By default, this assumes output stream doesn't support block size which 
distributed file systems use.
+     Hadoop File System Library use this as a default block size
+     Ref : 
https://github.com/apache/hadoop/blob/74ff00705cf67911f1ff8320c6c97354350d6952/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java#L2756
+     */
+    private static final long DEFAULT_BLOCK_SIZE = 33554432L;
+
     public ParquetOutputFile(OutputStream os) {
         this.fs = new FSDataOutputStream(os, new 
FileSystem.Statistics("test"));
     }
@@ -52,6 +60,6 @@ public class ParquetOutputFile implements OutputFile {
 
     @Override
     public long defaultBlockSize() {
-        return 33554432L;
+        return DEFAULT_BLOCK_SIZE;
     }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
index f36df1a9c6..7058bf6287 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
@@ -18,42 +18,35 @@
  */
 package org.apache.asterix.external.writer.printer.parquet;
 
-import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
-import 
org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
 import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.util.string.UTF8StringUtil;
 
-import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
-import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
-
 public class FieldNamesDictionary {
-    private final IBinaryHashFunction fieldNameHashFunction;
-    private final Int2ObjectMap<String> hashToFieldNameIndexMap;
+    private final FieldNamesTrieDictionary trie;
+    private final List<String> fieldNames;
+    private final StringBuilder builder;
 
     public FieldNamesDictionary() {
-        fieldNameHashFunction =
-                new 
PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY).createBinaryHashFunction();
-        hashToFieldNameIndexMap = new Int2ObjectOpenHashMap<>();
+        trie = new FieldNamesTrieDictionary();
+        fieldNames = new ArrayList<>();
+        builder = new StringBuilder();
     }
 
-    //TODO solve collision (they're so rare that I haven't seen any)
     public String getOrCreateFieldNameIndex(IValueReference pointable) throws 
HyracksDataException {
-
-        int hash = getHash(pointable);
-        if (!hashToFieldNameIndexMap.containsKey(hash)) {
-            String fieldName = 
UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset());
-            hashToFieldNameIndexMap.put(hash, fieldName);
-            return fieldName;
+        int index = trie.getOrCreateFieldNameIndex(pointable);
+        if (index < fieldNames.size()) {
+            return fieldNames.get(index);
         }
-        return hashToFieldNameIndexMap.get(hash);
-    }
 
-    private int getHash(IValueReference fieldName) throws HyracksDataException 
{
-        byte[] object = fieldName.getByteArray();
-        int start = fieldName.getStartOffset();
-        int length = fieldName.getLength();
-        return fieldNameHashFunction.hash(object, start, length);
+        builder.setLength(0);
+        String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), 
pointable.getStartOffset(), builder);
+        fieldNames.add(fieldName);
+        return fieldName;
     }
+
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index 6ad9608106..cf46cc61b6 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -18,10 +18,10 @@
  */
 package org.apache.asterix.external.writer.printer.parquet;
 
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.ELEMENT_FIELD;
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.GROUP_TYPE_ERROR_FIELD;
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.LIST_FIELD;
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.PRIMITIVE_TYPE_ERROR_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.ELEMENT_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.GROUP_TYPE_ERROR_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD;
 
 import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
 import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
@@ -48,7 +48,7 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
     private RecordConsumer recordConsumer;
     private FieldNamesDictionary fieldNamesDictionary;
 
-    private final ParquetRecordVisitorUtils parquetRecordVisitorUtils;
+    private final ParquetValueWriter parquetValueWriter;
 
     public ParquetRecordLazyVisitor(MessageType schema, IAType typeInfo) {
         this.schema = schema;
@@ -61,7 +61,7 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
             throw new RuntimeException("Type Unsupported for parquet 
printing");
         }
         this.fieldNamesDictionary = new FieldNamesDictionary();
-        this.parquetRecordVisitorUtils = new ParquetRecordVisitorUtils();
+        this.parquetValueWriter = new ParquetValueWriter();
     }
 
     public MessageType getSchema() {
@@ -150,7 +150,7 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
             throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, 
PRIMITIVE_TYPE_ERROR_FIELD,
                     GROUP_TYPE_ERROR_FIELD, type.getName());
         }
-        parquetRecordVisitorUtils.addValueToColumn(recordConsumer, pointable, 
type.asPrimitiveType());
+        parquetValueWriter.addValueToColumn(recordConsumer, pointable, 
type.asPrimitiveType());
         return null;
     }
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
similarity index 95%
rename from 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
rename to 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 54978c340d..206a3c9af0 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -43,24 +43,19 @@ import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.RecordConsumer;
 import org.apache.parquet.schema.PrimitiveType;
 
-public class ParquetRecordVisitorUtils {
-
+public class ParquetValueWriter {
     public static final String LIST_FIELD = "list";
     public static final String ELEMENT_FIELD = "element";
 
     public static final String GROUP_TYPE_ERROR_FIELD = "group";
     public static final String PRIMITIVE_TYPE_ERROR_FIELD = "primitive";
 
-    private VoidPointable voidPointable;
-    private ATypeTag typeTag;
-
-    private byte[] b;
-    int s, l;
-    private ResettableByteArrayOutputStream byteArrayOutputStream;
+    private final VoidPointable voidPointable;
+    private final ResettableByteArrayOutputStream byteArrayOutputStream;
 
-    public ParquetRecordVisitorUtils() {
+    ParquetValueWriter() {
         this.voidPointable = VoidPointable.FACTORY.createPointable();
-        byteArrayOutputStream = new ResettableByteArrayOutputStream();
+        this.byteArrayOutputStream = new ResettableByteArrayOutputStream();
     }
 
     private void addIntegerType(long value, PrimitiveType.PrimitiveTypeName 
primitiveTypeName, ATypeTag typeTag,
@@ -86,8 +81,9 @@ public class ParquetRecordVisitorUtils {
     public void addValueToColumn(RecordConsumer recordConsumer, 
FlatLazyVisitablePointable pointable,
             PrimitiveType type) throws HyracksDataException {
 
-        typeTag = pointable.getTypeTag();
-        b = pointable.getByteArray();
+        ATypeTag typeTag = pointable.getTypeTag();
+        byte[] b = pointable.getByteArray();
+        int s, l;
 
         if (pointable.isTagged()) {
             s = pointable.getStartOffset() + 1;
@@ -196,7 +192,5 @@ public class ParquetRecordVisitorUtils {
             default:
                 throw 
RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, 
primitiveTypeName);
         }
-
     }
-
 }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
similarity index 96%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
index bffdb33002..b2cd2230cd 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
@@ -16,14 +16,12 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
 
 import java.io.DataInput;
 import java.io.IOException;
 import java.util.List;
 
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AMutableString;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
index 73c034b1da..7bfae0b00b 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
 
 import java.io.DataInput;
 import java.io.DataOutput;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
index 4a19cd6a5a..5333af5c4d 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
@@ -16,9 +16,9 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
 
-import static 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
+import static 
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
 
 import java.io.DataInput;
 import java.io.DataOutput;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
similarity index 99%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
index 73c9a738ba..591f322772 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
 
 import java.io.DataInput;
 import java.io.DataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
index 10de8298dd..30e10d61e7 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
 
 import java.io.DataInput;
 import java.io.DataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
index 8aa0e88a30..ec494fac7d 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.om.dictionary;
 
 import java.io.DataInputStream;
 import java.io.DataOutput;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
index 32e902b31e..9ea29785b4 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
@@ -16,8 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
 
 import java.io.DataInput;
 import java.io.DataOutput;
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 5045c8698c..4022961ad6 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -332,6 +332,10 @@ public class UTF8StringUtil {
 
     public static String toString(byte[] bytes, int start) {
         StringBuilder builder = new StringBuilder();
+        return toString(bytes, start, builder);
+    }
+
+    public static String toString(byte[] bytes, int start, StringBuilder 
builder) {
         return toString(builder, bytes, start).toString();
     }
 


Reply via email to