Jianfeng Jia has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/449
Change subject: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray ...................................................................... ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray This patch is to change the encoding format that stores the length value of the variable length type (e.g. String, ByteArray) from fix-size encoding (2bytes) to variable-size encoding ( 1 to 5bytes) It will solve the issue 1102 to enable us to store a String that longer than 64K. Also for the common case of storing the short string ( <= 127), it will save one byte per string. Some important changes include: 1. Add one hyracks-util package to consolidate all the hyracks independent utility functions. It will reduce the chances of having duplicate utils in different packages. 2. Move parts of Asterix string functions down to Hyracks UTF8StringPointable object, which will benefit the other dependencies, such as VXQuery. Change-Id: I7e95df0f06984b784ebac2c84b97e56a50207d27 --- M algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java M algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java M algebricks/algebricks-examples/piglet-example/pom.xml M algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java M algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java M algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java M algebricks/algebricks-examples/pom.xml M algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java M algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java M hyracks/hyracks-data/hyracks-data-std/pom.xml M hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java M hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java M hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java M hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java D hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java M hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java M hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java A hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java M hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java A hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java A hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java A hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java M hyracks/hyracks-dataflow-common/pom.xml M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java D hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java M hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java M hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java M hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java M hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java M hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java M hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java M hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java M hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java M hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java M hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java M hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java M hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java M hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java M hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java M hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java M hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java M hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java M hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java M hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java M hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java M hyracks/hyracks-storage-am-common/pom.xml M hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java M hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java M hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java M hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java D hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java M hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java M hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java M hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java M hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java M hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java M hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java M hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java M hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java M hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java M hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java M hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml M hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java M hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java M hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java A hyracks/hyracks-util/pom.xml A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java A hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java A hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java A hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java A hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java A hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java M hyracks/pom.xml M pom.xml 123 files changed, 4,213 insertions(+), 1,880 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/hyracks refs/changes/49/449/1 -- To view, visit https://asterix-gerrit.ics.uci.edu/449 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7e95df0f06984b784ebac2c84b97e56a50207d27 Gerrit-PatchSet: 1 Gerrit-Project: hyracks Gerrit-Branch: master Gerrit-Owner: Jianfeng Jia <[email protected]>
