>From Wail Alkowaileet <[email protected]>:

Wail Alkowaileet has submitted this change. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663 )

Change subject: [ASTERIXDB-3392] Add new FieldNamesDictionary
......................................................................

[ASTERIXDB-3392] Add new FieldNamesDictionary

Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663
Integration-Tests: Jenkins <[email protected]>
Reviewed-by: Wail Alkowaileet <[email protected]>
Tested-by: Wail Alkowaileet <[email protected]>
---
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
M 
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
R 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
M 
asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
M 
asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
24 files changed, 85 insertions(+), 76 deletions(-)

Approvals:
  Wail Alkowaileet: Looks good to me, approved; Verified
  Anon. E. Moose #1000171:
  Jenkins: Verified




diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index 6480c30..cb447c8 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -18,7 +18,7 @@
  */
 package org.apache.asterix.column.assembler;

-import static 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static 
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;

 import java.util.ArrayList;
 import java.util.BitSet;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 0bea188..1f74fb3 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -18,7 +18,7 @@
  */
 package org.apache.asterix.column.metadata.schema;

-import static 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static 
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;

 import java.io.DataInput;
 import java.io.DataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
index 05c4eda..c7d3df1 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
@@ -22,12 +22,12 @@

 import java.util.List;

-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
 import org.apache.asterix.column.operation.lsm.flush.FlushColumnMetadata;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.AUnionType;
 import org.apache.asterix.om.types.AbstractCollectionType;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
index afe8368..ff05568 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
@@ -21,13 +21,13 @@
 import java.io.IOException;
 import java.util.Map;

-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.AUnionType;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index f514638..87f9ff3 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -33,9 +33,7 @@
 import java.util.Map;

 import org.apache.asterix.column.metadata.AbstractColumnMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.PathInfoSerializer;
-import 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -51,6 +49,8 @@
 import org.apache.asterix.column.values.IColumnValuesWriter;
 import org.apache.asterix.column.values.IColumnValuesWriterFactory;
 import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.commons.lang3.mutable.Mutable;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index e507d53..15a6277 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -41,8 +41,6 @@
 import 
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
 import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
 import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -50,6 +48,8 @@
 import org.apache.asterix.column.values.IColumnValuesReader;
 import org.apache.asterix.column.values.IColumnValuesReaderFactory;
 import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index 356ddaa..d931242 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -38,14 +38,14 @@
 import 
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory;
 import 
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
 import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
 import org.apache.asterix.column.values.IColumnValuesReader;
 import org.apache.asterix.column.values.IColumnValuesReaderFactory;
 import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
index b425a26..a4b9240 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
@@ -21,7 +21,6 @@
 import java.util.ArrayList;
 import java.util.List;

-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
index 6d991cf..ad0e460 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
@@ -21,7 +21,6 @@
 import java.util.ArrayList;
 import java.util.List;

-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
index 63c2b22..4b33e19 100644
--- 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
+++ 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
@@ -20,11 +20,11 @@

 import java.util.concurrent.TimeUnit;

-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.dictionary.FieldNamesHashDictionary;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
diff --git 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
index c9f58d5..1ec468c 100644
--- 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
+++ 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
@@ -28,8 +28,8 @@
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.asterix.column.metadata.dictionary.FieldNameTrie;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.FieldNameTrie;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index b07e857..ca2ad55 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -79,7 +79,7 @@
                     
.withType(schema).withTypeInfo(typeInfo).withRowGroupSize(rowGroupSize).withPageSize(pageSize)
                     
.withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE)
                     .enableDictionaryEncoding().withValidation(false)
-                    
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).build();
+                    
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0).withConf(conf).build();
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
index cc54676..5db600f 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
@@ -31,6 +31,14 @@
 public class ParquetOutputFile implements OutputFile {
     private final FSDataOutputStream fs;

+    /*
+     This class wraps OutputStream as a file that Parquet SDK supports writing 
to.
+     By default, this assumes output stream doesn't support block size which 
distributed file systems use.
+     Hadoop File System Library use this as a default block size
+     Ref : 
https://github.com/apache/hadoop/blob/74ff00705cf67911f1ff8320c6c97354350d6952/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java#L2756
+     */
+    private static final long DEFAULT_BLOCK_SIZE = 33554432L;
+
     public ParquetOutputFile(OutputStream os) {
         this.fs = new FSDataOutputStream(os, new 
FileSystem.Statistics("test"));
     }
@@ -52,6 +60,6 @@

     @Override
     public long defaultBlockSize() {
-        return 33554432L;
+        return DEFAULT_BLOCK_SIZE;
     }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
index f36df1a..7058bf6 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
@@ -18,42 +18,35 @@
  */
 package org.apache.asterix.external.writer.printer.parquet;

-import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
-import 
org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
 import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.util.string.UTF8StringUtil;

-import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
-import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
-
 public class FieldNamesDictionary {
-    private final IBinaryHashFunction fieldNameHashFunction;
-    private final Int2ObjectMap<String> hashToFieldNameIndexMap;
+    private final FieldNamesTrieDictionary trie;
+    private final List<String> fieldNames;
+    private final StringBuilder builder;

     public FieldNamesDictionary() {
-        fieldNameHashFunction =
-                new 
PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY).createBinaryHashFunction();
-        hashToFieldNameIndexMap = new Int2ObjectOpenHashMap<>();
+        trie = new FieldNamesTrieDictionary();
+        fieldNames = new ArrayList<>();
+        builder = new StringBuilder();
     }

-    //TODO solve collision (they're so rare that I haven't seen any)
     public String getOrCreateFieldNameIndex(IValueReference pointable) throws 
HyracksDataException {
-
-        int hash = getHash(pointable);
-        if (!hashToFieldNameIndexMap.containsKey(hash)) {
-            String fieldName = 
UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset());
-            hashToFieldNameIndexMap.put(hash, fieldName);
-            return fieldName;
+        int index = trie.getOrCreateFieldNameIndex(pointable);
+        if (index < fieldNames.size()) {
+            return fieldNames.get(index);
         }
-        return hashToFieldNameIndexMap.get(hash);
+
+        builder.setLength(0);
+        String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), 
pointable.getStartOffset(), builder);
+        fieldNames.add(fieldName);
+        return fieldName;
     }

-    private int getHash(IValueReference fieldName) throws HyracksDataException 
{
-        byte[] object = fieldName.getByteArray();
-        int start = fieldName.getStartOffset();
-        int length = fieldName.getLength();
-        return fieldNameHashFunction.hash(object, start, length);
-    }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index 6ad9608..cf46cc6 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -18,10 +18,10 @@
  */
 package org.apache.asterix.external.writer.printer.parquet;

-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.ELEMENT_FIELD;
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.GROUP_TYPE_ERROR_FIELD;
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.LIST_FIELD;
-import static 
org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.PRIMITIVE_TYPE_ERROR_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.ELEMENT_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.GROUP_TYPE_ERROR_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD;
+import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD;

 import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
 import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
@@ -48,7 +48,7 @@
     private RecordConsumer recordConsumer;
     private FieldNamesDictionary fieldNamesDictionary;

-    private final ParquetRecordVisitorUtils parquetRecordVisitorUtils;
+    private final ParquetValueWriter parquetValueWriter;

     public ParquetRecordLazyVisitor(MessageType schema, IAType typeInfo) {
         this.schema = schema;
@@ -61,7 +61,7 @@
             throw new RuntimeException("Type Unsupported for parquet 
printing");
         }
         this.fieldNamesDictionary = new FieldNamesDictionary();
-        this.parquetRecordVisitorUtils = new ParquetRecordVisitorUtils();
+        this.parquetValueWriter = new ParquetValueWriter();
     }

     public MessageType getSchema() {
@@ -150,7 +150,7 @@
             throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, 
PRIMITIVE_TYPE_ERROR_FIELD,
                     GROUP_TYPE_ERROR_FIELD, type.getName());
         }
-        parquetRecordVisitorUtils.addValueToColumn(recordConsumer, pointable, 
type.asPrimitiveType());
+        parquetValueWriter.addValueToColumn(recordConsumer, pointable, 
type.asPrimitiveType());
         return null;
     }

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
similarity index 95%
rename from 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
rename to 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 54978c3..206a3c9 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -43,24 +43,19 @@
 import org.apache.parquet.io.api.RecordConsumer;
 import org.apache.parquet.schema.PrimitiveType;

-public class ParquetRecordVisitorUtils {
-
+public class ParquetValueWriter {
     public static final String LIST_FIELD = "list";
     public static final String ELEMENT_FIELD = "element";

     public static final String GROUP_TYPE_ERROR_FIELD = "group";
     public static final String PRIMITIVE_TYPE_ERROR_FIELD = "primitive";

-    private VoidPointable voidPointable;
-    private ATypeTag typeTag;
+    private final VoidPointable voidPointable;
+    private final ResettableByteArrayOutputStream byteArrayOutputStream;

-    private byte[] b;
-    int s, l;
-    private ResettableByteArrayOutputStream byteArrayOutputStream;
-
-    public ParquetRecordVisitorUtils() {
+    ParquetValueWriter() {
         this.voidPointable = VoidPointable.FACTORY.createPointable();
-        byteArrayOutputStream = new ResettableByteArrayOutputStream();
+        this.byteArrayOutputStream = new ResettableByteArrayOutputStream();
     }

     private void addIntegerType(long value, PrimitiveType.PrimitiveTypeName 
primitiveTypeName, ATypeTag typeTag,
@@ -86,8 +81,9 @@
     public void addValueToColumn(RecordConsumer recordConsumer, 
FlatLazyVisitablePointable pointable,
             PrimitiveType type) throws HyracksDataException {

-        typeTag = pointable.getTypeTag();
-        b = pointable.getByteArray();
+        ATypeTag typeTag = pointable.getTypeTag();
+        byte[] b = pointable.getByteArray();
+        int s, l;

         if (pointable.isTagged()) {
             s = pointable.getStartOffset() + 1;
@@ -196,7 +192,5 @@
             default:
                 throw 
RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, 
primitiveTypeName);
         }
-
     }
-
 }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
similarity index 96%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
index bffdb33..b2cd223 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
@@ -16,14 +16,12 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;

 import java.io.DataInput;
 import java.io.IOException;
 import java.util.List;

-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
 import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.om.base.AMutableString;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
similarity index 97%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
index 73c034b..7bfae0b 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;

 import java.io.DataInput;
 import java.io.DataOutput;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
index 4a19cd6..5333af5 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
@@ -16,9 +16,9 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;

-import static 
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
+import static 
org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;

 import java.io.DataInput;
 import java.io.DataOutput;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
index 73c9a73..591f322 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;

 import java.io.DataInput;
 import java.io.DataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
similarity index 97%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
index 10de829..30e10d6 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;

 import java.io.DataInput;
 import java.io.DataInputStream;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
similarity index 97%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
index 8aa0e88..ec494fa 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.om.dictionary;

 import java.io.DataInputStream;
 import java.io.DataOutput;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
similarity index 98%
rename from 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
index 32e902b..9ea2978 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
@@ -16,8 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;

 import java.io.DataInput;
 import java.io.DataOutput;
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 5045c86..4022961 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -332,6 +332,10 @@

     public static String toString(byte[] bytes, int start) {
         StringBuilder builder = new StringBuilder();
+        return toString(bytes, start, builder);
+    }
+
+    public static String toString(byte[] bytes, int start, StringBuilder 
builder) {
         return toString(builder, bytes, start).toString();
     }


--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e
Gerrit-Change-Number: 18663
Gerrit-PatchSet: 8
Gerrit-Owner: [email protected]
Gerrit-Reviewer: Anon. E. Moose #1000171
Gerrit-Reviewer: Jenkins <[email protected]>
Gerrit-Reviewer: Wail Alkowaileet <[email protected]>
Gerrit-Reviewer: [email protected]
Gerrit-MessageType: merged

Reply via email to