This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4e563bf ARROW-7372: [C++] Allow creating dictionary array from simple
JSON
4e563bf is described below
commit 4e563bf1374f9d10fb7d66e292359b9999975318
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Oct 1 12:03:55 2020 +0200
ARROW-7372: [C++] Allow creating dictionary array from simple JSON
Simple value types are supported: integers, string-like, decimal
Closes #8309 from pitrou/ARROW-7372-json-simple-dict
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/array/builder_dict.h | 6 +
cpp/src/arrow/builder.cc | 1 +
cpp/src/arrow/builder.h | 4 +-
cpp/src/arrow/ipc/json_simple.cc | 149 +++++++++++++++++-------
cpp/src/arrow/ipc/json_simple_test.cc | 205 ++++++++++++++++++++++++----------
5 files changed, 261 insertions(+), 104 deletions(-)
diff --git a/cpp/src/arrow/array/builder_dict.h
b/cpp/src/arrow/array/builder_dict.h
index db3db0f..c5db0d1 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -190,6 +190,12 @@ class DictionaryBuilderBase : public ArrayBuilder {
/// \brief The current number of entries in the dictionary
int64_t dictionary_length() const { return memo_table_->size(); }
+ /// \brief The value byte width (for FixedSizeBinaryType)
+ template <typename T1 = T>
+ enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
+ return byte_width_;
+ }
+
/// \brief Append a scalar value
Status Append(Value value) {
ARROW_RETURN_NOT_OK(Reserve(1));
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index e07e9ad..1dcbf78 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -50,6 +50,7 @@ struct DictionaryBuilderCase {
return Create<DictionaryBuilder<LargeStringType>>();
}
Status Visit(const FixedSizeBinaryType&) { return
CreateFor<FixedSizeBinaryType>(); }
+ Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }
Status Visit(const DataType& value_type) { return
NotImplemented(value_type); }
Status Visit(const HalfFloatType& value_type) { return
NotImplemented(value_type); }
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 3202312..54ff290 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -39,7 +39,7 @@ class MemoryPool;
/// \brief Construct an empty ArrayBuilder corresponding to the data
/// type
/// \param[in] pool the MemoryPool to use for allocations
-/// \param[in] type an instance of DictionaryType
+/// \param[in] type the data type to create the builder for
/// \param[out] out the created ArrayBuilder
ARROW_EXPORT
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
@@ -48,7 +48,7 @@ Status MakeBuilder(MemoryPool* pool, const
std::shared_ptr<DataType>& type,
/// \brief Construct an empty DictionaryBuilder initialized optionally
/// with a pre-existing dictionary
/// \param[in] pool the MemoryPool to use for allocations
-/// \param[in] type an instance of DictionaryType
+/// \param[in] type the dictionary type to create the builder for
/// \param[in] dictionary the initial dictionary, if any. May be nullptr
/// \param[out] out the created ArrayBuilder
ARROW_EXPORT
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index d307373..39d19d6 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -27,6 +27,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
#include "arrow/util/string_view.h"
#include "arrow/util/value_parsing.h"
@@ -50,9 +51,11 @@ namespace json {
using ::arrow::internal::checked_cast;
using ::arrow::internal::checked_pointer_cast;
-static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag |
rj::kParseNanAndInfFlag;
+namespace {
-static Status JSONTypeError(const char* expected_type, rj::Type json_type) {
+constexpr auto kParseFlags = rj::kParseFullPrecisionFlag |
rj::kParseNanAndInfFlag;
+
+Status JSONTypeError(const char* expected_type, rj::Type json_type) {
return Status::Invalid("Expected ", expected_type, " or null, got JSON type
",
json_type);
}
@@ -101,6 +104,22 @@ class ConcreteConverter : public Converter {
}
return Status::OK();
}
+
+ const std::shared_ptr<DataType>& value_type() {
+ if (type_->id() != Type::DICTIONARY) {
+ return type_;
+ }
+ return checked_cast<const DictionaryType&>(*type_).value_type();
+ }
+
+ template <typename BuilderType>
+ Status MakeConcreteBuilder(std::shared_ptr<BuilderType>* out) {
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
+ *out = checked_pointer_cast<BuilderType>(std::move(builder));
+ DCHECK(*out);
+ return Status::OK();
+ }
};
// ------------------------------------------------------------------------
@@ -213,20 +232,17 @@ enable_if_physical_floating_point<T, Status>
ConvertNumber(const rj::Value& json
// ------------------------------------------------------------------------
// Converter for int arrays
-template <typename Type>
-class IntegerConverter final : public
ConcreteConverter<IntegerConverter<Type>> {
+template <typename Type, typename BuilderType = typename
TypeTraits<Type>::BuilderType>
+class IntegerConverter final
+ : public ConcreteConverter<IntegerConverter<Type, BuilderType>> {
using c_type = typename Type::c_type;
+
static constexpr auto is_signed = std::is_signed<c_type>::value;
public:
explicit IntegerConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type; }
- Status Init() override {
- std::unique_ptr<ArrayBuilder> builder;
- RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
- builder_ = checked_pointer_cast<NumericBuilder<Type>>(std::move(builder));
- return Status::OK();
- }
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
@@ -240,21 +256,20 @@ class IntegerConverter final : public
ConcreteConverter<IntegerConverter<Type>>
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
private:
- std::shared_ptr<NumericBuilder<Type>> builder_;
+ std::shared_ptr<BuilderType> builder_;
};
// ------------------------------------------------------------------------
// Converter for float arrays
-template <typename Type>
-class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> {
+template <typename Type, typename BuilderType = typename
TypeTraits<Type>::BuilderType>
+class FloatConverter final : public ConcreteConverter<FloatConverter<Type,
BuilderType>> {
using c_type = typename Type::c_type;
public:
- explicit FloatConverter(const std::shared_ptr<DataType>& type) {
- this->type_ = type;
- builder_ = std::make_shared<NumericBuilder<Type>>();
- }
+ explicit FloatConverter(const std::shared_ptr<DataType>& type) { this->type_
= type; }
+
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
@@ -268,20 +283,22 @@ class FloatConverter final : public
ConcreteConverter<FloatConverter<Type>> {
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
private:
- std::shared_ptr<NumericBuilder<Type>> builder_;
+ std::shared_ptr<BuilderType> builder_;
};
// ------------------------------------------------------------------------
// Converter for decimal arrays
-class DecimalConverter final : public ConcreteConverter<DecimalConverter> {
+template <typename BuilderType = typename
TypeTraits<Decimal128Type>::BuilderType>
+class DecimalConverter final : public
ConcreteConverter<DecimalConverter<BuilderType>> {
public:
explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type;
- decimal_type_ = checked_cast<Decimal128Type*>(type.get());
- builder_ = std::make_shared<DecimalBuilder>(type);
+ decimal_type_ = &checked_cast<const Decimal128Type&>(*this->value_type());
}
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
return this->AppendNull();
@@ -303,8 +320,8 @@ class DecimalConverter final : public
ConcreteConverter<DecimalConverter> {
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
private:
- std::shared_ptr<DecimalBuilder> builder_;
- Decimal128Type* decimal_type_;
+ std::shared_ptr<BuilderType> builder_;
+ const Decimal128Type* decimal_type_;
};
// ------------------------------------------------------------------------
@@ -381,15 +398,13 @@ class DayTimeIntervalConverter final
// ------------------------------------------------------------------------
// Converter for binary and string arrays
-template <typename TYPE>
-class StringConverter final : public ConcreteConverter<StringConverter<TYPE>> {
+template <typename Type, typename BuilderType = typename
TypeTraits<Type>::BuilderType>
+class StringConverter final
+ : public ConcreteConverter<StringConverter<Type, BuilderType>> {
public:
- using BuilderType = typename TypeTraits<TYPE>::BuilderType;
+ explicit StringConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type; }
- explicit StringConverter(const std::shared_ptr<DataType>& type) {
- this->type_ = type;
- builder_ = std::make_shared<BuilderType>(type, default_memory_pool());
- }
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
@@ -412,14 +427,16 @@ class StringConverter final : public
ConcreteConverter<StringConverter<TYPE>> {
// ------------------------------------------------------------------------
// Converter for fixed-size binary arrays
+template <typename BuilderType = typename
TypeTraits<FixedSizeBinaryType>::BuilderType>
class FixedSizeBinaryConverter final
- : public ConcreteConverter<FixedSizeBinaryConverter> {
+ : public ConcreteConverter<FixedSizeBinaryConverter<BuilderType>> {
public:
explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type;
- builder_ = std::make_shared<FixedSizeBinaryBuilder>(type,
default_memory_pool());
}
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
return this->AppendNull();
@@ -441,7 +458,7 @@ class FixedSizeBinaryConverter final
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
private:
- std::shared_ptr<FixedSizeBinaryBuilder> builder_;
+ std::shared_ptr<BuilderType> builder_;
};
// ------------------------------------------------------------------------
@@ -720,14 +737,62 @@ class UnionConverter final : public
ConcreteConverter<UnionConverter> {
// ------------------------------------------------------------------------
// General conversion functions
+Status ConversionNotImplemented(const std::shared_ptr<DataType>& type) {
+ return Status::NotImplemented("JSON conversion to ", type->ToString(),
+ " not implemented");
+}
+
+Status GetDictConverter(const std::shared_ptr<DataType>& type,
+ std::shared_ptr<Converter>* out) {
+ std::shared_ptr<Converter> res;
+
+ const auto value_type = checked_cast<const
DictionaryType&>(*type).value_type();
+
+#define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE) \
+ case ID: \
+ res = std::make_shared<CLASS<DictionaryBuilder<TYPE>>>(type); \
+ break;
+
+#define PARAM_CONVERTER_CASE(ID, CLASS, TYPE) \
+ case ID: \
+ res = std::make_shared<CLASS<TYPE, DictionaryBuilder<TYPE>>>(type); \
+ break;
+
+ switch (value_type->id()) {
+ PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type)
+ PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type)
+ PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type)
+ PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type)
+ PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type)
+ PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
+ PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
+ PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
+ PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
+ PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
+ PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)
+ PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType)
+ SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter,
+ FixedSizeBinaryType)
+ SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter, Decimal128Type)
+ default:
+ return ConversionNotImplemented(type);
+ }
+
+#undef SIMPLE_CONVERTER_CASE
+#undef PARAM_CONVERTER_CASE
+
+ RETURN_NOT_OK(res->Init());
+ *out = res;
+ return Status::OK();
+}
+
Status GetConverter(const std::shared_ptr<DataType>& type,
std::shared_ptr<Converter>* out) {
- std::shared_ptr<Converter> res;
+ if (type->id() == Type::DICTIONARY) {
+ return GetDictConverter(type, out);
+ }
- auto not_implemented = [&]() -> Status {
- return Status::NotImplemented("JSON conversion to ", type->ToString(),
- " not implemented");
- };
+ std::shared_ptr<Converter> res;
#define SIMPLE_CONVERTER_CASE(ID, CLASS) \
case ID: \
@@ -763,14 +828,14 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
- SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
- SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
+ SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>)
+ SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter<>)
SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter)
SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter)
SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS,
IntegerConverter<MonthIntervalType>)
SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter)
default:
- return not_implemented();
+ return ConversionNotImplemented(type);
}
#undef SIMPLE_CONVERTER_CASE
@@ -780,6 +845,8 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
return Status::OK();
}
+} // namespace
+
Status ArrayFromJSON(const std::shared_ptr<DataType>& type, util::string_view
json_string,
std::shared_ptr<Array>* out) {
std::shared_ptr<Converter> converter;
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc
b/cpp/src/arrow/ipc/json_simple_test.cc
index fe1b027..f6a6a92 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -113,6 +113,25 @@ void AssertJSONArray(const std::shared_ptr<DataType>&
type, const std::string& j
AssertArraysEqual(*expected, *actual);
}
+void AssertJSONDictArray(const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& value_type,
+ const std::string& json,
+ const std::string& expected_indices_json,
+ const std::string& expected_values_json) {
+ auto type = dictionary(index_type, value_type);
+ std::shared_ptr<Array> actual, expected_indices, expected_values;
+
+ ASSERT_OK(ArrayFromJSON(index_type, expected_indices_json,
&expected_indices));
+ ASSERT_OK(ArrayFromJSON(value_type, expected_values_json, &expected_values));
+
+ ASSERT_OK(ArrayFromJSON(type, json, &actual));
+ ASSERT_OK(actual->ValidateFull());
+
+ const auto& dict_array = checked_cast<const DictionaryArray&>(*actual);
+ AssertArraysEqual(*expected_indices, *dict_array.indices());
+ AssertArraysEqual(*expected_values, *dict_array.dictionary());
+}
+
TEST(TestHelper, JSONArray) {
// Test the JSONArray helper func
std::string s =
@@ -133,7 +152,10 @@ TEST(TestHelper, SafeSignedAdd) {
}
template <typename T>
-class TestIntegers : public ::testing::Test {};
+class TestIntegers : public ::testing::Test {
+ public:
+ std::shared_ptr<DataType> type() { return TypeTraits<T>::type_singleton(); }
+};
TYPED_TEST_SUITE_P(TestIntegers);
@@ -142,7 +164,7 @@ TYPED_TEST_P(TestIntegers, Basics) {
using c_type = typename T::c_type;
std::shared_ptr<Array> expected, actual;
- std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+ auto type = this->type();
AssertJSONArray<T>(type, "[]", {});
AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
@@ -158,10 +180,8 @@ TYPED_TEST_P(TestIntegers, Basics) {
}
TYPED_TEST_P(TestIntegers, Errors) {
- using T = TypeParam;
-
std::shared_ptr<Array> array;
- std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+ auto type = this->type();
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "", &array));
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[", &array));
@@ -177,7 +197,7 @@ TYPED_TEST_P(TestIntegers, OutOfBounds) {
using c_type = typename T::c_type;
std::shared_ptr<Array> array;
- std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+ auto type = this->type();
if (type->id() == Type::UINT64) {
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]",
&array));
@@ -200,7 +220,21 @@ TYPED_TEST_P(TestIntegers, OutOfBounds) {
}
}
-REGISTER_TYPED_TEST_SUITE_P(TestIntegers, Basics, Errors, OutOfBounds);
+TYPED_TEST_P(TestIntegers, Dictionary) {
+ std::shared_ptr<Array> array;
+ std::shared_ptr<DataType> value_type = this->type();
+
+ if (value_type->id() == Type::HALF_FLOAT) {
+ // Unsupported, skip
+ return;
+ }
+
+ AssertJSONDictArray(int8(), value_type, "[1, 2, 3, null, 3, 1]",
+ /*indices=*/"[0, 1, 2, null, 2, 0]",
+ /*values=*/"[1, 2, 3]");
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestIntegers, Basics, Errors, OutOfBounds,
Dictionary);
INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestIntegers, Int8Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestInt16, TestIntegers, Int16Type);
@@ -212,6 +246,66 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers,
UInt32Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
+template <typename T>
+class TestStrings : public ::testing::Test {
+ public:
+ std::shared_ptr<DataType> type() { return TypeTraits<T>::type_singleton(); }
+};
+
+TYPED_TEST_SUITE_P(TestStrings);
+
+TYPED_TEST_P(TestStrings, Basics) {
+ using T = TypeParam;
+ auto type = this->type();
+
+ std::shared_ptr<Array> expected, actual;
+
+ AssertJSONArray<T, std::string>(type, "[]", {});
+ AssertJSONArray<T, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
+ AssertJSONArray<T, std::string>(type, "[\"\", null]", {true, false}, {"",
""});
+ // NUL character in string
+ std::string s = "some";
+ s += '\x00';
+ s += "char";
+ AssertJSONArray<T, std::string>(type, "[\"\", \"some\\u0000char\"]", {"",
s});
+ // UTF8 sequence in string
+ AssertJSONArray<T, std::string>(type, "[\"\xc3\xa9\"]", {"\xc3\xa9"});
+
+ if (!T::is_utf8) {
+ // Arbitrary binary (non-UTF8) sequence in string
+ s = "\xff\x9f";
+ AssertJSONArray<T, std::string>(type, "[\"" + s + "\"]", {s});
+ }
+
+ // Bytes < 0x20 can be represented as JSON unicode escapes
+ s = '\x00';
+ s += "\x1f";
+ AssertJSONArray<T, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
+}
+
+TYPED_TEST_P(TestStrings, Errors) {
+ auto type = this->type();
+ std::shared_ptr<Array> array;
+
+ ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+ ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
+}
+
+TYPED_TEST_P(TestStrings, Dictionary) {
+ auto value_type = this->type();
+
+ AssertJSONDictArray(int16(), value_type, R"(["foo", "bar", null, "bar",
"foo"])",
+ /*indices=*/"[0, 1, null, 1, 0]",
+ /*values=*/R"(["foo", "bar"])");
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestStrings, Basics, Errors, Dictionary);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(TestString, TestStrings, StringType);
+INSTANTIATE_TYPED_TEST_SUITE_P(TestBinary, TestStrings, BinaryType);
+INSTANTIATE_TYPED_TEST_SUITE_P(TestLargeString, TestStrings, LargeStringType);
+INSTANTIATE_TYPED_TEST_SUITE_P(TestLargeBinary, TestStrings, LargeBinaryType);
+
TEST(TestNull, Basics) {
std::shared_ptr<DataType> type = null();
std::shared_ptr<Array> expected, actual;
@@ -297,50 +391,6 @@ TEST(TestDouble, Errors) {
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
}
-TEST(TestString, Basics) {
- // String type
- std::shared_ptr<DataType> type = utf8();
- std::shared_ptr<Array> expected, actual;
-
- AssertJSONArray<StringType, std::string>(type, "[]", {});
- AssertJSONArray<StringType, std::string>(type, "[\"\", \"foo\"]", {"",
"foo"});
- AssertJSONArray<StringType, std::string>(type, "[\"\", null]", {true,
false}, {"", ""});
- // NUL character in string
- std::string s = "some";
- s += '\x00';
- s += "char";
- AssertJSONArray<StringType, std::string>(type, "[\"\",
\"some\\u0000char\"]", {"", s});
- // UTF8 sequence in string
- AssertJSONArray<StringType, std::string>(type, "[\"\xc3\xa9\"]",
{"\xc3\xa9"});
-
- // Binary type
- type = binary();
- AssertJSONArray<BinaryType, std::string>(type, "[\"\", \"foo\", null]",
- {true, true, false}, {"", "foo",
""});
- // Arbitrary binary (non-UTF8) sequence in string
- s = "\xff\x9f";
- AssertJSONArray<BinaryType, std::string>(type, "[\"" + s + "\"]", {s});
- // Bytes < 0x20 can be represented as JSON unicode escapes
- s = '\x00';
- s += "\x1f";
- AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
-}
-
-TEST(TestLargeString, Basics) {
- // Similar as TestString above, only testing the basics
- std::shared_ptr<DataType> type = large_utf8();
- std::shared_ptr<Array> expected, actual;
-
- AssertJSONArray<LargeStringType, std::string>(type, "[\"\", \"foo\"]", {"",
"foo"});
- AssertJSONArray<LargeStringType, std::string>(type, "[\"\", null]", {true,
false},
- {"", ""});
-
- // Large binary type
- type = large_binary();
- AssertJSONArray<LargeBinaryType, std::string>(type, "[\"\", \"foo\", null]",
- {true, true, false}, {"",
"foo", ""});
-}
-
TEST(TestTimestamp, Basics) {
// Timestamp type
auto type = timestamp(TimeUnit::SECOND);
@@ -407,14 +457,6 @@ TEST(TestDayTimeInterval, Basics) {
{{1, -600}, {}});
}
-TEST(TestString, Errors) {
- std::shared_ptr<DataType> type = utf8();
- std::shared_ptr<Array> array;
-
- ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
- ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
-}
-
TEST(TestFixedSizeBinary, Basics) {
std::shared_ptr<DataType> type = fixed_size_binary(3);
std::shared_ptr<Array> expected, actual;
@@ -440,6 +482,18 @@ TEST(TestFixedSizeBinary, Errors) {
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]", &array));
}
+TEST(TestFixedSizeBinary, Dictionary) {
+ std::shared_ptr<DataType> type = fixed_size_binary(3);
+
+ AssertJSONDictArray(int8(), type, R"(["foo", "bar", "foo", null])",
+ /*indices=*/"[0, 1, 0, null]",
+ /*values=*/R"(["foo", "bar"])");
+
+ // Invalid length
+ std::shared_ptr<Array> array;
+ ASSERT_RAISES(Invalid, ArrayFromJSON(dictionary(int8(), type), R"(["x"])",
&array));
+}
+
TEST(TestDecimal, Basics) {
std::shared_ptr<DataType> type = decimal(10, 4);
std::shared_ptr<Array> expected, actual;
@@ -484,6 +538,14 @@ TEST(TestDecimal, Errors) {
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]", &array));
}
+TEST(TestDecimal, Dictionary) {
+ std::shared_ptr<DataType> type = decimal(10, 2);
+
+ AssertJSONDictArray(int32(), type, R"(["123.45", "-78.90", "-78.90", null,
"123.45"])",
+ /*indices=*/"[0, 1, 1, null, 0]",
+ /*values=*/R"(["123.45", "-78.90"])");
+}
+
TEST(TestList, IntegerList) {
auto pool = default_memory_pool();
std::shared_ptr<DataType> type = list(int64());
@@ -1208,7 +1270,28 @@ TEST(TestSparseUnion, Errors) {
ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
}
-TEST(TestDictionary, Basics) {
+TEST(TestNestedDictionary, ListOfDict) {
+ auto index_type = int8();
+ auto value_type = utf8();
+ auto dict_type = dictionary(index_type, value_type);
+ auto type = list(dict_type);
+
+ std::shared_ptr<Array> array, expected, indices, values, dicts, offsets;
+
+ ASSERT_OK(ArrayFromJSON(type, R"([["ab", "cd", null], null, ["cd", "cd"]])",
&array));
+ ASSERT_OK(array->ValidateFull());
+
+ // Build expected array
+ ASSERT_OK(ArrayFromJSON(index_type, "[0, 1, null, 1, 1]", &indices));
+ ASSERT_OK(ArrayFromJSON(value_type, R"(["ab", "cd"])", &values));
+ ASSERT_OK_AND_ASSIGN(dicts, DictionaryArray::FromArrays(dict_type, indices,
values));
+ ASSERT_OK(ArrayFromJSON(int32(), "[0, null, 3, 5]", &offsets));
+ ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *dicts));
+
+ AssertArraysEqual(*expected, *array, /*verbose=*/true);
+}
+
+TEST(TestDictArrayFromJSON, Basics) {
auto type = dictionary(int32(), utf8());
auto array =
DictArrayFromJSON(type, "[null, 2, 1, 0]", R"(["whiskey", "tango",
"foxtrot"])");
@@ -1220,7 +1303,7 @@ TEST(TestDictionary, Basics) {
*array);
}
-TEST(TestDictionary, Errors) {
+TEST(TestDictArrayFromJSON, Errors) {
auto type = dictionary(int32(), utf8());
std::shared_ptr<Array> array;