Repository: parquet-cpp Updated Branches: refs/heads/master 7abb9c476 -> 176b08c30
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/types.h ---------------------------------------------------------------------- diff --git a/src/parquet/types.h b/src/parquet/types.h index a4285be..520326b 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -117,7 +117,7 @@ struct PageType { // ---------------------------------------------------------------------- struct ByteArray { - ByteArray() {} + ByteArray() : len(0), ptr(nullptr) {} ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {} uint32_t len; const uint8_t* ptr; @@ -132,7 +132,7 @@ struct ByteArray { }; struct FixedLenByteArray { - FixedLenByteArray() {} + FixedLenByteArray() : ptr(nullptr) {} explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {} const uint8_t* ptr; }; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt index 52c4811..3a4b1c9 100644 --- a/src/parquet/util/CMakeLists.txt +++ b/src/parquet/util/CMakeLists.txt @@ -71,6 +71,7 @@ endif() ADD_PARQUET_TEST(bit-util-test) ADD_PARQUET_TEST(buffer-test) +ADD_PARQUET_TEST(comparison-test) ADD_PARQUET_TEST(input-output-test) ADD_PARQUET_TEST(mem-allocator-test) ADD_PARQUET_TEST(mem-pool-test) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/comparison-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/comparison-test.cc b/src/parquet/util/comparison-test.cc new file mode 100644 index 0000000..d2689ff --- /dev/null +++ b/src/parquet/util/comparison-test.cc @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include <cstdint> +#include <iostream> +#include <vector> + +#include "parquet/schema/descriptor.h" +#include "parquet/types.h" +#include "parquet/util/comparison.h" + +namespace parquet { + +namespace test { + +using parquet::schema::NodePtr; +using parquet::schema::PrimitiveNode; + +static ByteArray ByteArrayFromString(const std::string& s) { + auto ptr = reinterpret_cast<const uint8_t*>(s.data()); + return ByteArray(s.size(), ptr); +} + +static FLBA FLBAFromString(const std::string& s) { + auto ptr = reinterpret_cast<const uint8_t*>(s.data()); + return FLBA(ptr); +} + +TEST(Comparison, ByteArray) { + NodePtr node = PrimitiveNode::Make("bytearray", Repetition::REQUIRED, Type::BYTE_ARRAY); + ColumnDescriptor descr(node, 0, 0); + Compare<parquet::ByteArray> less(&descr); + + std::string a = "arrange"; + std::string b = "arrangement"; + auto arr1 = ByteArrayFromString(a); + auto arr2 = ByteArrayFromString(b); + ASSERT_TRUE(less(arr1, arr2)); + + a = u8"braten"; + b = u8"bügeln"; + auto arr3 = ByteArrayFromString(a); + auto arr4 = ByteArrayFromString(b); + // see PARQUET-686 discussion about binary comparison + ASSERT_TRUE(!less(arr3, arr4)); +} + +TEST(Comparison, FLBA) { + std::string a = "Antidisestablishmentarianism"; + std::string b = "Bundesgesundheitsministerium"; + auto arr1 = FLBAFromString(a); + auto arr2 = FLBAFromString(b); + + NodePtr node = PrimitiveNode::Make("FLBA", Repetition::REQUIRED, + Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, a.size()); + ColumnDescriptor descr(node, 0, 0); + Compare<parquet::FixedLenByteArray> less(&descr); + ASSERT_TRUE(less(arr1, arr2)); +} + +TEST(Comparison, Int96) { + parquet::Int96 a{{1, 41, 14}}, b{{1, 41, 42}}; + + NodePtr node = PrimitiveNode::Make("int96", Repetition::REQUIRED, Type::INT96); + ColumnDescriptor descr(node, 0, 0); + Compare<parquet::Int96> less(&descr); + ASSERT_TRUE(less(a, b)); + b.value[2] = 14; + ASSERT_TRUE(!less(a, b) && !less(b, a)); +} + +} // namespace test + +} // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/comparison.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/comparison.h b/src/parquet/util/comparison.h new file mode 100644 index 0000000..9d44e7e --- /dev/null +++ b/src/parquet/util/comparison.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef PARQUET_UTIL_COMPARISON_H +#define PARQUET_UTIL_COMPARISON_H + +#include <algorithm> + +#include "parquet/types.h" +#include "parquet/schema/descriptor.h" + +namespace parquet { + +template <typename T> +struct Compare { + explicit Compare(const ColumnDescriptor* descr) : type_length_(descr->type_length()) {} + + inline bool operator()(const T& a, const T& b) { return a < b; } + + private: + int32_t type_length_; +}; + +template <> +inline bool Compare<Int96>::operator()(const Int96& a, const Int96& b) { + return std::lexicographical_compare(a.value, a.value + 3, b.value, b.value + 3); +} + +template <> +inline bool Compare<ByteArray>::operator()(const ByteArray& a, const ByteArray& b) { + auto aptr = reinterpret_cast<const int8_t*>(a.ptr); + auto bptr = reinterpret_cast<const int8_t*>(b.ptr); + return std::lexicographical_compare(aptr, aptr + a.len, bptr, bptr + b.len); +} + +template <> +inline bool Compare<FLBA>::operator()(const FLBA& a, const FLBA& b) { + auto aptr = reinterpret_cast<const int8_t*>(a.ptr); + auto bptr = reinterpret_cast<const int8_t*>(b.ptr); + return std::lexicographical_compare( + aptr, aptr + type_length_, bptr, bptr + type_length_); +} + +} // namespace parquet + +#endif // PARQUET_UTIL_COMPARISON_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/test-common.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h index 2327aeb..edadb53 100644 --- a/src/parquet/util/test-common.h +++ b/src/parquet/util/test-common.h @@ -32,8 +32,7 @@ namespace parquet { namespace test { typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType, - DoubleType, ByteArrayType, FLBAType> - ParquetTypes; + DoubleType, ByteArrayType, FLBAType> ParquetTypes; template <typename T> static inline void assert_vector_equal(const vector<T>& left, const vector<T>& right) {
