This is an automated email from the ASF dual-hosted git repository.
wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 00f84b0c4f [ASTERIXDB-3432][STO] Improve Trie-based field name
dictionary
00f84b0c4f is described below
commit 00f84b0c4f66ff61978173a45378bfe21eb4281a
Author: Wail Alkowaileet <[email protected]>
AuthorDate: Thu Jun 13 14:46:21 2024 -0700
[ASTERIXDB-3432][STO] Improve Trie-based field name dictionary
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Avoid using Byte2ObjectArrayMap
Change-Id: If0a47d7f140f367f59560e695b1b93416adab8a1
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18376
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Wail Alkowaileet <[email protected]>
Reviewed-by: Murtadha Hubail <[email protected]>
---
.../column/assembler/AssemblerBuilderVisitor.java | 2 +-
.../AbstractFieldNamesDictionary.java | 26 ++++-
.../column/metadata/dictionary/ByteToNodeMap.java | 101 ++++++++++++++++++
.../metadata/{ => dictionary}/FieldNameTrie.java | 69 +++++--------
.../{ => dictionary}/FieldNamesHashDictionary.java | 18 +---
.../{ => dictionary}/FieldNamesTrieDictionary.java | 4 +-
.../column/metadata/{ => dictionary}/TrieNode.java | 37 ++-----
.../column/metadata/schema/ObjectSchemaNode.java | 2 +-
.../operation/lsm/flush/FlushColumnMetadata.java | 6 +-
.../operation/query/QueryColumnMetadata.java | 4 +-
.../query/QueryColumnWithMetaMetadata.java | 4 +-
.../metadata/trie/FieldNameDictionaryPerfTest.java | 115 +++++++++++++++++++++
.../column/metadata/trie/FieldNameTrieTest.java | 4 +-
13 files changed, 296 insertions(+), 96 deletions(-)
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index 764b1b91cf..6480c30492 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.assembler;
-import static
org.apache.asterix.column.metadata.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.util.ArrayList;
import java.util.BitSet;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/AbstractFieldNamesDictionary.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
similarity index 75%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/AbstractFieldNamesDictionary.java
rename to
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
index f22631bb15..bffdb33002 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/AbstractFieldNamesDictionary.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
@@ -17,8 +17,13 @@
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
+import java.io.DataInput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AMutableString;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -50,6 +55,14 @@ public abstract class AbstractFieldNamesDictionary
implements IFieldNamesDiction
stringSerDer = new AStringSerializerDeserializer(new
UTF8StringWriter(), new UTF8StringReader());
}
+ public static IFieldNamesDictionary create() {
+ return new FieldNamesTrieDictionary();
+ }
+
+ public static IFieldNamesDictionary deserialize(DataInput input) throws
IOException {
+ return FieldNamesTrieDictionary.deserialize(input);
+ }
+
static ArrayBackedValueStorage creatFieldName(IValueReference fieldName)
throws HyracksDataException {
ArrayBackedValueStorage copy = new
ArrayBackedValueStorage(fieldName.getLength());
copy.append(fieldName);
@@ -66,4 +79,15 @@ public abstract class AbstractFieldNamesDictionary
implements IFieldNamesDiction
mutableString.setValue(fieldName);
stringSerDer.serialize(mutableString, storage.getDataOutput());
}
+
+ static void deserializeFieldNames(DataInput input, List<IValueReference>
fieldNames, int numberOfFieldNames)
+ throws IOException {
+ for (int i = 0; i < numberOfFieldNames; i++) {
+ int length = input.readInt();
+ ArrayBackedValueStorage fieldName = new
ArrayBackedValueStorage(length);
+ fieldName.setSize(length);
+ input.readFully(fieldName.getByteArray(), 0, length);
+ fieldNames.add(fieldName);
+ }
+ }
}
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
new file mode 100644
index 0000000000..73c034b1da
--- /dev/null
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.column.metadata.dictionary;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+
+import it.unimi.dsi.fastutil.objects.ObjectArrays;
+
+final class ByteToNodeMap {
+ private static final TrieNode[] EMPTY = new TrieNode[0];
+ private TrieNode[] children;
+ private int numberOfChildren;
+
+ ByteToNodeMap() {
+ children = EMPTY;
+ numberOfChildren = 0;
+ }
+
+ private ByteToNodeMap(TrieNode[] children, int numberOfChildren) {
+ this.children = children;
+ this.numberOfChildren = numberOfChildren;
+ }
+
+ void put(byte key, TrieNode node) {
+ int index = Byte.toUnsignedInt(key);
+ ensure(index);
+ children[index] = node;
+ numberOfChildren++;
+ }
+
+ TrieNode get(byte key) {
+ int index = Byte.toUnsignedInt(key);
+ if (index < children.length) {
+ return children[index];
+ }
+
+ return null;
+ }
+
+ private void ensure(int index) {
+ if (index >= children.length) {
+ children = ObjectArrays.grow(children, index + 1, children.length);
+ }
+ }
+
+ void addAllChildren(Collection<TrieNode> collection) {
+ int addedChildren = 0;
+ for (int i = 0; i < children.length && addedChildren <
numberOfChildren; i++) {
+ TrieNode child = children[i];
+ if (child != null) {
+ collection.add(children[i]);
+ addedChildren++;
+ }
+ }
+ }
+
+ void serialize(DataOutput out) throws IOException {
+ out.writeInt(numberOfChildren);
+ out.writeInt(children.length);
+ int addedChildren = 0;
+ for (int i = 0; i < children.length && addedChildren <
numberOfChildren; i++) {
+ TrieNode child = children[i];
+ if (child != null) {
+ out.writeInt(i);
+ child.serialize(out);
+ addedChildren++;
+ }
+ }
+ }
+
+ static ByteToNodeMap deserialize(DataInput in) throws IOException {
+ int numberOfChildren = in.readInt();
+ int length = in.readInt();
+ TrieNode[] children = length == 0 ? EMPTY : new TrieNode[length];
+ for (int i = 0; i < numberOfChildren; i++) {
+ int index = in.readInt();
+ children[index] = TrieNode.deserialize(in);
+ }
+
+ return new ByteToNodeMap(children, numberOfChildren);
+ }
+}
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNameTrie.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
similarity index 89%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNameTrie.java
rename to
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
index e31026ec95..4a19cd6a5a 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNameTrie.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
@@ -16,19 +16,20 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
+
+import static
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayDeque;
import java.util.ArrayList;
-import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
import org.apache.hyracks.util.string.UTF8StringUtil;
public class FieldNameTrie {
@@ -129,11 +130,10 @@ public class FieldNameTrie {
// resume from the stored node.
int bytesToStoreLength = UTF8StringUtil.getNumBytesToStoreLength(len);
- int start = bytesToStoreLength;
int byteIndex = lookupState.getRelativeOffsetFromStart() +
bytesToStoreLength;
byte[] bytes = fieldName.getByteArray();
- int lastIndex = (start + len - 1);
+ int lastIndex = (bytesToStoreLength + len - 1);
while (byteIndex <= lastIndex) {
byte b = bytes[byteIndex];
TrieNode nextNode = searchNode.getChild(b);
@@ -191,7 +191,7 @@ public class FieldNameTrie {
// find absolute starting point in the current fieldName
int diff = searchNode.getStart() - searchNode.getBytesToStoreLength();
// since hookup happens on a new fieldName, hence start will be
bytesToStoreLength
- searchNode.setIndex(fieldNames.size(), start + diff,
searchNode.getLength(), bytesToStoreLength);
+ searchNode.setIndex(fieldNames.size(), bytesToStoreLength + diff,
searchNode.getLength(), bytesToStoreLength);
searchNode.setIsEndOfField(true);
fieldNames.add(fieldName);
return searchNode.getIndex();
@@ -210,14 +210,25 @@ public class FieldNameTrie {
rootNode.serialize(out);
}
+ public List<IValueReference> getFieldNames() {
+ return fieldNames;
+ }
+
+ public IValueReference getFieldName(int fieldIndex) {
+ return fieldNames.get(fieldIndex);
+ }
+
+ public void clear() {
+ rootNode = null;
+ fieldNames.clear();
+ }
+
public static FieldNameTrie deserialize(DataInput in) throws IOException {
int version = in.readInt();
- switch (version) {
- case VERSION:
- return deserializeV1(in);
- default:
- throw new IllegalStateException("Unsupported version: " +
version);
+ if (version == VERSION) {
+ return deserializeV1(in);
}
+ throw new IllegalStateException("Unsupported version: " + version);
}
private static FieldNameTrie deserializeV1(DataInput in) throws
IOException {
@@ -232,37 +243,11 @@ public class FieldNameTrie {
return newTrie;
}
- private static void deserializeFieldNames(DataInput input,
List<IValueReference> fieldNames, int numberOfFieldNames)
- throws IOException {
- for (int i = 0; i < numberOfFieldNames; i++) {
- int length = input.readInt();
- ArrayBackedValueStorage fieldName = new
ArrayBackedValueStorage(length);
- fieldName.setSize(length);
- input.readFully(fieldName.getByteArray(), 0, length);
- fieldNames.add(fieldName);
- }
- }
-
- public List<IValueReference> getFieldNames() {
- return fieldNames;
- }
-
- public IValueReference getFieldName(int fieldIndex) {
- return fieldNames.get(fieldIndex);
- }
-
- public void clear() {
- rootNode = null;
- fieldNames.clear();
- }
-
@Override
public String toString() {
TrieNode currentNode = rootNode;
- Queue<TrieNode> queue = new LinkedList<>();
- for (TrieNode node : currentNode.getChildren().values()) {
- queue.offer(node);
- }
+ Queue<TrieNode> queue = new ArrayDeque<>();
+ currentNode.getChildren().addAllChildren(queue);
StringBuilder treeBuilder = new StringBuilder();
while (!queue.isEmpty()) {
int len = queue.size();
@@ -278,16 +263,14 @@ public class FieldNameTrie {
treeBuilder.append(" | ");
}
- for (TrieNode child : node.getChildren().values()) {
- queue.offer(child);
- }
+ node.getChildren().addAllChildren(queue);
}
treeBuilder.append("\n");
}
return treeBuilder.toString();
}
- class LookupState {
+ private static class LookupState {
private TrieNode lastNode;
private int relativeOffsetFromStart;
private int fieldLength;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesHashDictionary.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
similarity index 93%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesHashDictionary.java
rename to
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
index c83b2892da..73c9a738ba 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesHashDictionary.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
@@ -37,6 +37,10 @@ import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
+/**
+ * @deprecated Use {@link FieldNamesTrieDictionary}
+ */
+@Deprecated
public class FieldNamesHashDictionary extends AbstractFieldNamesDictionary {
//For both declared and inferred fields
private final List<IValueReference> fieldNames;
@@ -174,18 +178,6 @@ public class FieldNamesHashDictionary extends
AbstractFieldNamesDictionary {
deserializeHashToFieldNameIndex(input, hashToFieldNameIndexMap,
numberOfFieldNames);
}
- private static void deserializeFieldNames(DataInput input,
List<IValueReference> fieldNames, int numberOfFieldNames)
- throws IOException {
-
- for (int i = 0; i < numberOfFieldNames; i++) {
- int length = input.readInt();
- ArrayBackedValueStorage fieldName = new
ArrayBackedValueStorage(length);
- fieldName.setSize(length);
- input.readFully(fieldName.getByteArray(), 0, length);
- fieldNames.add(fieldName);
- }
- }
-
private static void deserializeDeclaredFieldNames(DataInput input,
Object2IntMap<String> declaredFieldNamesToIndexMap) throws
IOException {
int numberOfDeclaredFieldNames = input.readInt();
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesTrieDictionary.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
similarity index 96%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesTrieDictionary.java
rename to
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
index 8b2d54852b..10de8298dd 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesTrieDictionary.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
@@ -37,7 +37,7 @@ public class FieldNamesTrieDictionary extends
AbstractFieldNamesDictionary {
this(new FieldNameTrie());
}
- public FieldNamesTrieDictionary(FieldNameTrie dictionary) {
+ private FieldNamesTrieDictionary(FieldNameTrie dictionary) {
super();
this.dictionary = dictionary;
lookupStorage = new ArrayBackedValueStorage();
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/TrieNode.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
similarity index 79%
rename from
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/TrieNode.java
rename to
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
index 18e645b2af..32e902b31e 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/TrieNode.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
@@ -17,22 +17,18 @@
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.util.Map;
import org.apache.hyracks.data.std.api.IValueReference;
-import it.unimi.dsi.fastutil.bytes.Byte2ObjectArrayMap;
-import it.unimi.dsi.fastutil.bytes.Byte2ObjectMap;
-
class TrieNode {
public static final int NOT_FOUND_INDEX = -1;
- private Byte2ObjectMap<TrieNode> children;
+ private ByteToNodeMap children;
private boolean isEndOfField;
private int index;
private int start; // includes the edges' byte
@@ -40,11 +36,11 @@ class TrieNode {
private int bytesToStoreLength;
TrieNode() {
- children = new Byte2ObjectArrayMap<>();
+ this.children = new ByteToNodeMap();
index = NOT_FOUND_INDEX;
}
- TrieNode(Byte2ObjectMap<TrieNode> children) {
+ TrieNode(ByteToNodeMap children) {
this.children = children;
index = NOT_FOUND_INDEX;
}
@@ -60,10 +56,6 @@ class TrieNode {
this.isEndOfField = isEndOfField;
}
- public boolean containsKey(byte key) {
- return children.containsKey(key);
- }
-
public TrieNode getChild(byte key) {
return children.get(key);
}
@@ -72,7 +64,7 @@ class TrieNode {
children.put(key, child);
}
- public Byte2ObjectMap<TrieNode> getChildren() {
+ public ByteToNodeMap getChildren() {
return children;
}
@@ -98,7 +90,7 @@ class TrieNode {
public void reset() {
// since this object went to the new node.
- children = new Byte2ObjectArrayMap<>();
+ children = new ByteToNodeMap();
}
public void split(IValueReference fieldName, int splitIndex) {
@@ -107,6 +99,7 @@ class TrieNode {
// something to be split, have to create a new node
// and do the linking.
TrieNode childNode = new TrieNode(children);
+
int leftToSplit = length - splitIndex;
childNode.setIndex(index, start + splitIndex, leftToSplit,
bytesToStoreLength);
childNode.setIsEndOfField(isEndOfField);
@@ -120,33 +113,25 @@ class TrieNode {
}
public void serialize(DataOutput out) throws IOException {
+ // Serialize child first
+ children.serialize(out);
// serialize fields
out.writeBoolean(isEndOfField);
out.writeInt(index);
out.writeInt(start);
out.writeInt(length);
out.writeInt(bytesToStoreLength);
-
- out.writeInt(children.size());
- for (Map.Entry<Byte, TrieNode> entry : children.byte2ObjectEntrySet())
{
- out.writeByte(entry.getKey());
- entry.getValue().serialize(out);
- }
}
public static TrieNode deserialize(DataInput in) throws IOException {
- TrieNode node = new TrieNode();
+ ByteToNodeMap children = ByteToNodeMap.deserialize(in);
+ TrieNode node = new TrieNode(children);
node.isEndOfField = in.readBoolean();
node.index = in.readInt();
node.start = in.readInt();
node.length = in.readInt();
node.bytesToStoreLength = in.readInt();
- int childrenSize = in.readInt();
- for (int i = 0; i < childrenSize; i++) {
- byte b = in.readByte();
- node.children.put(b, TrieNode.deserialize(in));
- }
return node;
}
}
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 6014bf6d4f..0bea1884fe 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.metadata.schema;
-import static
org.apache.asterix.column.metadata.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index 04334a3739..a966d61d16 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -31,9 +31,9 @@ import java.util.List;
import java.util.Map;
import org.apache.asterix.column.metadata.AbstractColumnMetadata;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.PathInfoSerializer;
+import
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -95,7 +95,7 @@ public final class FlushColumnMetadata extends
AbstractColumnMetadata {
columnWriters = new ArrayList<>();
level = -1;
repeated = 0;
- fieldNamesDictionary = new FieldNamesTrieDictionary();
+ fieldNamesDictionary = AbstractFieldNamesDictionary.create();
root = new ObjectSchemaNode();
metaRoot = metaType != null ? new ObjectSchemaNode() : null;
pathInfoSerializer = new PathInfoSerializer();
@@ -250,7 +250,7 @@ public final class FlushColumnMetadata extends
AbstractColumnMetadata {
deserializeWriters(input, writers, columnWriterFactory);
//FieldNames
- IFieldNamesDictionary fieldNamesDictionary =
FieldNamesTrieDictionary.deserialize(input);
+ IFieldNamesDictionary fieldNamesDictionary =
AbstractFieldNamesDictionary.deserialize(input);
//Schema
Map<AbstractSchemaNestedNode, RunLengthIntArray> definitionLevels =
new HashMap<>();
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index 967369ff8b..67631cddde 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -39,8 +39,8 @@ import
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorF
import
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
import org.apache.asterix.column.metadata.IFieldNamesDictionary;
+import
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -189,7 +189,7 @@ public class QueryColumnMetadata extends
AbstractColumnImmutableReadMetadata {
DataInput input = new DataInputStream(new ByteArrayInputStream(bytes,
fieldNamesStart, length));
//FieldNames
- IFieldNamesDictionary fieldNamesDictionary =
FieldNamesTrieDictionary.deserialize(input);
+ IFieldNamesDictionary fieldNamesDictionary =
AbstractFieldNamesDictionary.deserialize(input);
//Schema
ObjectSchemaNode root = (ObjectSchemaNode)
AbstractSchemaNode.deserialize(input, null);
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index cda492c09a..1869415328 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -35,8 +35,8 @@ import
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluator;
import
org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory;
import
org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
import org.apache.asterix.column.metadata.IFieldNamesDictionary;
+import
org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -133,7 +133,7 @@ public final class QueryColumnWithMetaMetadata extends
QueryColumnMetadata {
DataInput input = new DataInputStream(new ByteArrayInputStream(bytes,
fieldNamesStart, length));
//FieldNames
- IFieldNamesDictionary fieldNamesDictionary =
FieldNamesTrieDictionary.deserialize(input);
+ IFieldNamesDictionary fieldNamesDictionary =
AbstractFieldNamesDictionary.deserialize(input);
//Schema
ObjectSchemaNode root = (ObjectSchemaNode)
AbstractSchemaNode.deserialize(input, null);
diff --git
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
new file mode 100644
index 0000000000..63c2b22804
--- /dev/null
+++
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.column.metadata.trie;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.asterix.column.metadata.IFieldNamesDictionary;
+import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary;
+import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
+import
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
+import org.junit.Ignore;
+import org.junit.Test;
+
+@Ignore
+public class FieldNameDictionaryPerfTest {
+ private static final int NUM_RECORDS = 1000000;
+ private static final int NUMBER_OF_RANDOM_FIELD_NAMES = 1000;
+ private static final int NUM_ITER = 5;
+ private static final String[] FIELD_NAMES = { "country", "address",
"free_parking", "city", "type", "url",
+ "reviews", "date", "author", "ratings", "Value", "Cleanliness",
"Overall", "Check in / front desk", "Rooms",
+ "date", "author", "ratings", "Value", "Cleanliness", "Overall",
"Check in / front desk", "Rooms", "date",
+ "author", "ratings", "Value", "Cleanliness", "Overall", "Check in
/ front desk", "Rooms", "phone", "price",
+ "avg_rating", "free_breakfast", "name", "public_likes", "email" };
+ private static final FieldNameDictionaryFactory HASH =
FieldNamesHashDictionary::new;
+ private static final FieldNameDictionaryFactory TRIE =
FieldNamesTrieDictionary::new;
+
+ private final AStringSerializerDeserializer stringSerDer =
+ new AStringSerializerDeserializer(new UTF8StringWriter(), new
UTF8StringReader());
+ private final AMutableString string = new AMutableString("");
+
+ @Test
+ public void benchmarkRandom() throws HyracksDataException {
+ IValueReference[] fieldNames = new
IValueReference[NUMBER_OF_RANDOM_FIELD_NAMES];
+ for (int i = 0; i < NUMBER_OF_RANDOM_FIELD_NAMES; i++) {
+ fieldNames[i] = getRandomString();
+ }
+ runAndReportTime(fieldNames);
+ }
+
+ @Test
+ public void benchmarkRepeated() throws HyracksDataException {
+ IValueReference[] fieldNames = new IValueReference[FIELD_NAMES.length];
+ for (int i = 0; i < FIELD_NAMES.length; i++) {
+ fieldNames[i] = serialize(FIELD_NAMES[i]);
+ }
+
+ runAndReportTime(fieldNames);
+ }
+
+ private void runAndReportTime(IValueReference[] fieldNames) throws
HyracksDataException {
+ long start;
+
+ start = System.nanoTime();
+ for (int i = 0; i < NUM_ITER; i++) {
+ createAndRun(HASH, fieldNames);
+ }
+ System.out.println("HASH: " +
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
+
+ start = System.nanoTime();
+ for (int i = 0; i < NUM_ITER; i++) {
+ createAndRun(TRIE, fieldNames);
+ }
+ System.out.println("TRIE: " +
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
+ }
+
+ private void createAndRun(FieldNameDictionaryFactory factory,
IValueReference[] fieldNames)
+ throws HyracksDataException {
+ IFieldNamesDictionary dictionary = factory.create();
+ for (int i = 0; i < NUM_RECORDS; i++) {
+ for (int j = 0; j < fieldNames.length; j++) {
+ dictionary.getOrCreateFieldNameIndex(fieldNames[j]);
+ }
+ }
+ }
+
+ private IValueReference getRandomString() throws HyracksDataException {
+ return serialize(RandomStringUtils.randomAlphanumeric(5, 20));
+ }
+
+ private IValueReference serialize(String value) throws
HyracksDataException {
+ ArrayBackedValueStorage storage = new ArrayBackedValueStorage();
+ storage.reset();
+ string.setValue(value);
+ stringSerDer.serialize(string, storage.getDataOutput());
+ return storage;
+ }
+
+ @FunctionalInterface
+ private interface FieldNameDictionaryFactory {
+ IFieldNamesDictionary create();
+ }
+}
diff --git
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
index d99696327e..c9f58d5394 100644
---
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
+++
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
@@ -28,8 +28,8 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import org.apache.asterix.column.metadata.FieldNameTrie;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
+import org.apache.asterix.column.metadata.dictionary.FieldNameTrie;
+import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;