This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4e563bf  ARROW-7372: [C++] Allow creating dictionary array from simple 
JSON
4e563bf is described below

commit 4e563bf1374f9d10fb7d66e292359b9999975318
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Oct 1 12:03:55 2020 +0200

    ARROW-7372: [C++] Allow creating dictionary array from simple JSON
    
    Simple value types are supported: integers, string-like, decimal
    
    Closes #8309 from pitrou/ARROW-7372-json-simple-dict
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/array/builder_dict.h    |   6 +
 cpp/src/arrow/builder.cc              |   1 +
 cpp/src/arrow/builder.h               |   4 +-
 cpp/src/arrow/ipc/json_simple.cc      | 149 +++++++++++++++++-------
 cpp/src/arrow/ipc/json_simple_test.cc | 205 ++++++++++++++++++++++++----------
 5 files changed, 261 insertions(+), 104 deletions(-)

diff --git a/cpp/src/arrow/array/builder_dict.h 
b/cpp/src/arrow/array/builder_dict.h
index db3db0f..c5db0d1 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -190,6 +190,12 @@ class DictionaryBuilderBase : public ArrayBuilder {
   /// \brief The current number of entries in the dictionary
   int64_t dictionary_length() const { return memo_table_->size(); }
 
+  /// \brief The value byte width (for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
+    return byte_width_;
+  }
+
   /// \brief Append a scalar value
   Status Append(Value value) {
     ARROW_RETURN_NOT_OK(Reserve(1));
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index e07e9ad..1dcbf78 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -50,6 +50,7 @@ struct DictionaryBuilderCase {
     return Create<DictionaryBuilder<LargeStringType>>();
   }
   Status Visit(const FixedSizeBinaryType&) { return 
CreateFor<FixedSizeBinaryType>(); }
+  Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }
 
   Status Visit(const DataType& value_type) { return 
NotImplemented(value_type); }
   Status Visit(const HalfFloatType& value_type) { return 
NotImplemented(value_type); }
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 3202312..54ff290 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -39,7 +39,7 @@ class MemoryPool;
 /// \brief Construct an empty ArrayBuilder corresponding to the data
 /// type
 /// \param[in] pool the MemoryPool to use for allocations
-/// \param[in] type an instance of DictionaryType
+/// \param[in] type the data type to create the builder for
 /// \param[out] out the created ArrayBuilder
 ARROW_EXPORT
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
@@ -48,7 +48,7 @@ Status MakeBuilder(MemoryPool* pool, const 
std::shared_ptr<DataType>& type,
 /// \brief Construct an empty DictionaryBuilder initialized optionally
 /// with a pre-existing dictionary
 /// \param[in] pool the MemoryPool to use for allocations
-/// \param[in] type an instance of DictionaryType
+/// \param[in] type the dictionary type to create the builder for
 /// \param[in] dictionary the initial dictionary, if any. May be nullptr
 /// \param[out] out the created ArrayBuilder
 ARROW_EXPORT
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index d307373..39d19d6 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -27,6 +27,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/value_parsing.h"
 
@@ -50,9 +51,11 @@ namespace json {
 using ::arrow::internal::checked_cast;
 using ::arrow::internal::checked_pointer_cast;
 
-static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | 
rj::kParseNanAndInfFlag;
+namespace {
 
-static Status JSONTypeError(const char* expected_type, rj::Type json_type) {
+constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | 
rj::kParseNanAndInfFlag;
+
+Status JSONTypeError(const char* expected_type, rj::Type json_type) {
   return Status::Invalid("Expected ", expected_type, " or null, got JSON type 
",
                          json_type);
 }
@@ -101,6 +104,22 @@ class ConcreteConverter : public Converter {
     }
     return Status::OK();
   }
+
+  const std::shared_ptr<DataType>& value_type() {
+    if (type_->id() != Type::DICTIONARY) {
+      return type_;
+    }
+    return checked_cast<const DictionaryType&>(*type_).value_type();
+  }
+
+  template <typename BuilderType>
+  Status MakeConcreteBuilder(std::shared_ptr<BuilderType>* out) {
+    std::unique_ptr<ArrayBuilder> builder;
+    RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
+    *out = checked_pointer_cast<BuilderType>(std::move(builder));
+    DCHECK(*out);
+    return Status::OK();
+  }
 };
 
 // ------------------------------------------------------------------------
@@ -213,20 +232,17 @@ enable_if_physical_floating_point<T, Status> 
ConvertNumber(const rj::Value& json
 // ------------------------------------------------------------------------
 // Converter for int arrays
 
-template <typename Type>
-class IntegerConverter final : public 
ConcreteConverter<IntegerConverter<Type>> {
+template <typename Type, typename BuilderType = typename 
TypeTraits<Type>::BuilderType>
+class IntegerConverter final
+    : public ConcreteConverter<IntegerConverter<Type, BuilderType>> {
   using c_type = typename Type::c_type;
+
   static constexpr auto is_signed = std::is_signed<c_type>::value;
 
  public:
   explicit IntegerConverter(const std::shared_ptr<DataType>& type) { 
this->type_ = type; }
 
-  Status Init() override {
-    std::unique_ptr<ArrayBuilder> builder;
-    RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
-    builder_ = checked_pointer_cast<NumericBuilder<Type>>(std::move(builder));
-    return Status::OK();
-  }
+  Status Init() override { return this->MakeConcreteBuilder(&builder_); }
 
   Status AppendValue(const rj::Value& json_obj) override {
     if (json_obj.IsNull()) {
@@ -240,21 +256,20 @@ class IntegerConverter final : public 
ConcreteConverter<IntegerConverter<Type>>
   std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
 
  private:
-  std::shared_ptr<NumericBuilder<Type>> builder_;
+  std::shared_ptr<BuilderType> builder_;
 };
 
 // ------------------------------------------------------------------------
 // Converter for float arrays
 
-template <typename Type>
-class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> {
+template <typename Type, typename BuilderType = typename 
TypeTraits<Type>::BuilderType>
+class FloatConverter final : public ConcreteConverter<FloatConverter<Type, 
BuilderType>> {
   using c_type = typename Type::c_type;
 
  public:
-  explicit FloatConverter(const std::shared_ptr<DataType>& type) {
-    this->type_ = type;
-    builder_ = std::make_shared<NumericBuilder<Type>>();
-  }
+  explicit FloatConverter(const std::shared_ptr<DataType>& type) { this->type_ 
= type; }
+
+  Status Init() override { return this->MakeConcreteBuilder(&builder_); }
 
   Status AppendValue(const rj::Value& json_obj) override {
     if (json_obj.IsNull()) {
@@ -268,20 +283,22 @@ class FloatConverter final : public 
ConcreteConverter<FloatConverter<Type>> {
   std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
 
  private:
-  std::shared_ptr<NumericBuilder<Type>> builder_;
+  std::shared_ptr<BuilderType> builder_;
 };
 
 // ------------------------------------------------------------------------
 // Converter for decimal arrays
 
-class DecimalConverter final : public ConcreteConverter<DecimalConverter> {
+template <typename BuilderType = typename 
TypeTraits<Decimal128Type>::BuilderType>
+class DecimalConverter final : public 
ConcreteConverter<DecimalConverter<BuilderType>> {
  public:
   explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
     this->type_ = type;
-    decimal_type_ = checked_cast<Decimal128Type*>(type.get());
-    builder_ = std::make_shared<DecimalBuilder>(type);
+    decimal_type_ = &checked_cast<const Decimal128Type&>(*this->value_type());
   }
 
+  Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
   Status AppendValue(const rj::Value& json_obj) override {
     if (json_obj.IsNull()) {
       return this->AppendNull();
@@ -303,8 +320,8 @@ class DecimalConverter final : public 
ConcreteConverter<DecimalConverter> {
   std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
 
  private:
-  std::shared_ptr<DecimalBuilder> builder_;
-  Decimal128Type* decimal_type_;
+  std::shared_ptr<BuilderType> builder_;
+  const Decimal128Type* decimal_type_;
 };
 
 // ------------------------------------------------------------------------
@@ -381,15 +398,13 @@ class DayTimeIntervalConverter final
 // ------------------------------------------------------------------------
 // Converter for binary and string arrays
 
-template <typename TYPE>
-class StringConverter final : public ConcreteConverter<StringConverter<TYPE>> {
+template <typename Type, typename BuilderType = typename 
TypeTraits<Type>::BuilderType>
+class StringConverter final
+    : public ConcreteConverter<StringConverter<Type, BuilderType>> {
  public:
-  using BuilderType = typename TypeTraits<TYPE>::BuilderType;
+  explicit StringConverter(const std::shared_ptr<DataType>& type) { 
this->type_ = type; }
 
-  explicit StringConverter(const std::shared_ptr<DataType>& type) {
-    this->type_ = type;
-    builder_ = std::make_shared<BuilderType>(type, default_memory_pool());
-  }
+  Status Init() override { return this->MakeConcreteBuilder(&builder_); }
 
   Status AppendValue(const rj::Value& json_obj) override {
     if (json_obj.IsNull()) {
@@ -412,14 +427,16 @@ class StringConverter final : public 
ConcreteConverter<StringConverter<TYPE>> {
 // ------------------------------------------------------------------------
 // Converter for fixed-size binary arrays
 
+template <typename BuilderType = typename 
TypeTraits<FixedSizeBinaryType>::BuilderType>
 class FixedSizeBinaryConverter final
-    : public ConcreteConverter<FixedSizeBinaryConverter> {
+    : public ConcreteConverter<FixedSizeBinaryConverter<BuilderType>> {
  public:
   explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
     this->type_ = type;
-    builder_ = std::make_shared<FixedSizeBinaryBuilder>(type, 
default_memory_pool());
   }
 
+  Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
   Status AppendValue(const rj::Value& json_obj) override {
     if (json_obj.IsNull()) {
       return this->AppendNull();
@@ -441,7 +458,7 @@ class FixedSizeBinaryConverter final
   std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
 
  private:
-  std::shared_ptr<FixedSizeBinaryBuilder> builder_;
+  std::shared_ptr<BuilderType> builder_;
 };
 
 // ------------------------------------------------------------------------
@@ -720,14 +737,62 @@ class UnionConverter final : public 
ConcreteConverter<UnionConverter> {
 // ------------------------------------------------------------------------
 // General conversion functions
 
+Status ConversionNotImplemented(const std::shared_ptr<DataType>& type) {
+  return Status::NotImplemented("JSON conversion to ", type->ToString(),
+                                " not implemented");
+}
+
+Status GetDictConverter(const std::shared_ptr<DataType>& type,
+                        std::shared_ptr<Converter>* out) {
+  std::shared_ptr<Converter> res;
+
+  const auto value_type = checked_cast<const 
DictionaryType&>(*type).value_type();
+
+#define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE)                    \
+  case ID:                                                        \
+    res = std::make_shared<CLASS<DictionaryBuilder<TYPE>>>(type); \
+    break;
+
+#define PARAM_CONVERTER_CASE(ID, CLASS, TYPE)                           \
+  case ID:                                                              \
+    res = std::make_shared<CLASS<TYPE, DictionaryBuilder<TYPE>>>(type); \
+    break;
+
+  switch (value_type->id()) {
+    PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type)
+    PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type)
+    PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type)
+    PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type)
+    PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type)
+    PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
+    PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
+    PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
+    PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
+    PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
+    PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)
+    PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType)
+    SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter,
+                          FixedSizeBinaryType)
+    SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter, Decimal128Type)
+    default:
+      return ConversionNotImplemented(type);
+  }
+
+#undef SIMPLE_CONVERTER_CASE
+#undef PARAM_CONVERTER_CASE
+
+  RETURN_NOT_OK(res->Init());
+  *out = res;
+  return Status::OK();
+}
+
 Status GetConverter(const std::shared_ptr<DataType>& type,
                     std::shared_ptr<Converter>* out) {
-  std::shared_ptr<Converter> res;
+  if (type->id() == Type::DICTIONARY) {
+    return GetDictConverter(type, out);
+  }
 
-  auto not_implemented = [&]() -> Status {
-    return Status::NotImplemented("JSON conversion to ", type->ToString(),
-                                  " not implemented");
-  };
+  std::shared_ptr<Converter> res;
 
 #define SIMPLE_CONVERTER_CASE(ID, CLASS) \
   case ID:                               \
@@ -763,14 +828,14 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
     SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
     SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
     SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
-    SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
-    SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
+    SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>)
+    SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter<>)
     SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter)
     SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter)
     SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, 
IntegerConverter<MonthIntervalType>)
     SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter)
     default:
-      return not_implemented();
+      return ConversionNotImplemented(type);
   }
 
 #undef SIMPLE_CONVERTER_CASE
@@ -780,6 +845,8 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
   return Status::OK();
 }
 
+}  // namespace
+
 Status ArrayFromJSON(const std::shared_ptr<DataType>& type, util::string_view 
json_string,
                      std::shared_ptr<Array>* out) {
   std::shared_ptr<Converter> converter;
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc 
b/cpp/src/arrow/ipc/json_simple_test.cc
index fe1b027..f6a6a92 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -113,6 +113,25 @@ void AssertJSONArray(const std::shared_ptr<DataType>& 
type, const std::string& j
   AssertArraysEqual(*expected, *actual);
 }
 
+void AssertJSONDictArray(const std::shared_ptr<DataType>& index_type,
+                         const std::shared_ptr<DataType>& value_type,
+                         const std::string& json,
+                         const std::string& expected_indices_json,
+                         const std::string& expected_values_json) {
+  auto type = dictionary(index_type, value_type);
+  std::shared_ptr<Array> actual, expected_indices, expected_values;
+
+  ASSERT_OK(ArrayFromJSON(index_type, expected_indices_json, 
&expected_indices));
+  ASSERT_OK(ArrayFromJSON(value_type, expected_values_json, &expected_values));
+
+  ASSERT_OK(ArrayFromJSON(type, json, &actual));
+  ASSERT_OK(actual->ValidateFull());
+
+  const auto& dict_array = checked_cast<const DictionaryArray&>(*actual);
+  AssertArraysEqual(*expected_indices, *dict_array.indices());
+  AssertArraysEqual(*expected_values, *dict_array.dictionary());
+}
+
 TEST(TestHelper, JSONArray) {
   // Test the JSONArray helper func
   std::string s =
@@ -133,7 +152,10 @@ TEST(TestHelper, SafeSignedAdd) {
 }
 
 template <typename T>
-class TestIntegers : public ::testing::Test {};
+class TestIntegers : public ::testing::Test {
+ public:
+  std::shared_ptr<DataType> type() { return TypeTraits<T>::type_singleton(); }
+};
 
 TYPED_TEST_SUITE_P(TestIntegers);
 
@@ -142,7 +164,7 @@ TYPED_TEST_P(TestIntegers, Basics) {
   using c_type = typename T::c_type;
 
   std::shared_ptr<Array> expected, actual;
-  std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+  auto type = this->type();
 
   AssertJSONArray<T>(type, "[]", {});
   AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
@@ -158,10 +180,8 @@ TYPED_TEST_P(TestIntegers, Basics) {
 }
 
 TYPED_TEST_P(TestIntegers, Errors) {
-  using T = TypeParam;
-
   std::shared_ptr<Array> array;
-  std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+  auto type = this->type();
 
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "", &array));
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[", &array));
@@ -177,7 +197,7 @@ TYPED_TEST_P(TestIntegers, OutOfBounds) {
   using c_type = typename T::c_type;
 
   std::shared_ptr<Array> array;
-  std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+  auto type = this->type();
 
   if (type->id() == Type::UINT64) {
     ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]", 
&array));
@@ -200,7 +220,21 @@ TYPED_TEST_P(TestIntegers, OutOfBounds) {
   }
 }
 
-REGISTER_TYPED_TEST_SUITE_P(TestIntegers, Basics, Errors, OutOfBounds);
+TYPED_TEST_P(TestIntegers, Dictionary) {
+  std::shared_ptr<Array> array;
+  std::shared_ptr<DataType> value_type = this->type();
+
+  if (value_type->id() == Type::HALF_FLOAT) {
+    // Unsupported, skip
+    return;
+  }
+
+  AssertJSONDictArray(int8(), value_type, "[1, 2, 3, null, 3, 1]",
+                      /*indices=*/"[0, 1, 2, null, 2, 0]",
+                      /*values=*/"[1, 2, 3]");
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestIntegers, Basics, Errors, OutOfBounds, 
Dictionary);
 
 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestIntegers, Int8Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt16, TestIntegers, Int16Type);
@@ -212,6 +246,66 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, 
UInt32Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
 
+template <typename T>
+class TestStrings : public ::testing::Test {
+ public:
+  std::shared_ptr<DataType> type() { return TypeTraits<T>::type_singleton(); }
+};
+
+TYPED_TEST_SUITE_P(TestStrings);
+
+TYPED_TEST_P(TestStrings, Basics) {
+  using T = TypeParam;
+  auto type = this->type();
+
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<T, std::string>(type, "[]", {});
+  AssertJSONArray<T, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
+  AssertJSONArray<T, std::string>(type, "[\"\", null]", {true, false}, {"", 
""});
+  // NUL character in string
+  std::string s = "some";
+  s += '\x00';
+  s += "char";
+  AssertJSONArray<T, std::string>(type, "[\"\", \"some\\u0000char\"]", {"", 
s});
+  // UTF8 sequence in string
+  AssertJSONArray<T, std::string>(type, "[\"\xc3\xa9\"]", {"\xc3\xa9"});
+
+  if (!T::is_utf8) {
+    // Arbitrary binary (non-UTF8) sequence in string
+    s = "\xff\x9f";
+    AssertJSONArray<T, std::string>(type, "[\"" + s + "\"]", {s});
+  }
+
+  // Bytes < 0x20 can be represented as JSON unicode escapes
+  s = '\x00';
+  s += "\x1f";
+  AssertJSONArray<T, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
+}
+
+TYPED_TEST_P(TestStrings, Errors) {
+  auto type = this->type();
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
+}
+
+TYPED_TEST_P(TestStrings, Dictionary) {
+  auto value_type = this->type();
+
+  AssertJSONDictArray(int16(), value_type, R"(["foo", "bar", null, "bar", 
"foo"])",
+                      /*indices=*/"[0, 1, null, 1, 0]",
+                      /*values=*/R"(["foo", "bar"])");
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestStrings, Basics, Errors, Dictionary);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(TestString, TestStrings, StringType);
+INSTANTIATE_TYPED_TEST_SUITE_P(TestBinary, TestStrings, BinaryType);
+INSTANTIATE_TYPED_TEST_SUITE_P(TestLargeString, TestStrings, LargeStringType);
+INSTANTIATE_TYPED_TEST_SUITE_P(TestLargeBinary, TestStrings, LargeBinaryType);
+
 TEST(TestNull, Basics) {
   std::shared_ptr<DataType> type = null();
   std::shared_ptr<Array> expected, actual;
@@ -297,50 +391,6 @@ TEST(TestDouble, Errors) {
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
 }
 
-TEST(TestString, Basics) {
-  // String type
-  std::shared_ptr<DataType> type = utf8();
-  std::shared_ptr<Array> expected, actual;
-
-  AssertJSONArray<StringType, std::string>(type, "[]", {});
-  AssertJSONArray<StringType, std::string>(type, "[\"\", \"foo\"]", {"", 
"foo"});
-  AssertJSONArray<StringType, std::string>(type, "[\"\", null]", {true, 
false}, {"", ""});
-  // NUL character in string
-  std::string s = "some";
-  s += '\x00';
-  s += "char";
-  AssertJSONArray<StringType, std::string>(type, "[\"\", 
\"some\\u0000char\"]", {"", s});
-  // UTF8 sequence in string
-  AssertJSONArray<StringType, std::string>(type, "[\"\xc3\xa9\"]", 
{"\xc3\xa9"});
-
-  // Binary type
-  type = binary();
-  AssertJSONArray<BinaryType, std::string>(type, "[\"\", \"foo\", null]",
-                                           {true, true, false}, {"", "foo", 
""});
-  // Arbitrary binary (non-UTF8) sequence in string
-  s = "\xff\x9f";
-  AssertJSONArray<BinaryType, std::string>(type, "[\"" + s + "\"]", {s});
-  // Bytes < 0x20 can be represented as JSON unicode escapes
-  s = '\x00';
-  s += "\x1f";
-  AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
-}
-
-TEST(TestLargeString, Basics) {
-  // Similar as TestString above, only testing the basics
-  std::shared_ptr<DataType> type = large_utf8();
-  std::shared_ptr<Array> expected, actual;
-
-  AssertJSONArray<LargeStringType, std::string>(type, "[\"\", \"foo\"]", {"", 
"foo"});
-  AssertJSONArray<LargeStringType, std::string>(type, "[\"\", null]", {true, 
false},
-                                                {"", ""});
-
-  // Large binary type
-  type = large_binary();
-  AssertJSONArray<LargeBinaryType, std::string>(type, "[\"\", \"foo\", null]",
-                                                {true, true, false}, {"", 
"foo", ""});
-}
-
 TEST(TestTimestamp, Basics) {
   // Timestamp type
   auto type = timestamp(TimeUnit::SECOND);
@@ -407,14 +457,6 @@ TEST(TestDayTimeInterval, Basics) {
                                        {{1, -600}, {}});
 }
 
-TEST(TestString, Errors) {
-  std::shared_ptr<DataType> type = utf8();
-  std::shared_ptr<Array> array;
-
-  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
-  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
-}
-
 TEST(TestFixedSizeBinary, Basics) {
   std::shared_ptr<DataType> type = fixed_size_binary(3);
   std::shared_ptr<Array> expected, actual;
@@ -440,6 +482,18 @@ TEST(TestFixedSizeBinary, Errors) {
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]", &array));
 }
 
+TEST(TestFixedSizeBinary, Dictionary) {
+  std::shared_ptr<DataType> type = fixed_size_binary(3);
+
+  AssertJSONDictArray(int8(), type, R"(["foo", "bar", "foo", null])",
+                      /*indices=*/"[0, 1, 0, null]",
+                      /*values=*/R"(["foo", "bar"])");
+
+  // Invalid length
+  std::shared_ptr<Array> array;
+  ASSERT_RAISES(Invalid, ArrayFromJSON(dictionary(int8(), type), R"(["x"])", 
&array));
+}
+
 TEST(TestDecimal, Basics) {
   std::shared_ptr<DataType> type = decimal(10, 4);
   std::shared_ptr<Array> expected, actual;
@@ -484,6 +538,14 @@ TEST(TestDecimal, Errors) {
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]", &array));
 }
 
+TEST(TestDecimal, Dictionary) {
+  std::shared_ptr<DataType> type = decimal(10, 2);
+
+  AssertJSONDictArray(int32(), type, R"(["123.45", "-78.90", "-78.90", null, 
"123.45"])",
+                      /*indices=*/"[0, 1, 1, null, 0]",
+                      /*values=*/R"(["123.45", "-78.90"])");
+}
+
 TEST(TestList, IntegerList) {
   auto pool = default_memory_pool();
   std::shared_ptr<DataType> type = list(int64());
@@ -1208,7 +1270,28 @@ TEST(TestSparseUnion, Errors) {
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
 }
 
-TEST(TestDictionary, Basics) {
+TEST(TestNestedDictionary, ListOfDict) {
+  auto index_type = int8();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto type = list(dict_type);
+
+  std::shared_ptr<Array> array, expected, indices, values, dicts, offsets;
+
+  ASSERT_OK(ArrayFromJSON(type, R"([["ab", "cd", null], null, ["cd", "cd"]])", 
&array));
+  ASSERT_OK(array->ValidateFull());
+
+  // Build expected array
+  ASSERT_OK(ArrayFromJSON(index_type, "[0, 1, null, 1, 1]", &indices));
+  ASSERT_OK(ArrayFromJSON(value_type, R"(["ab", "cd"])", &values));
+  ASSERT_OK_AND_ASSIGN(dicts, DictionaryArray::FromArrays(dict_type, indices, 
values));
+  ASSERT_OK(ArrayFromJSON(int32(), "[0, null, 3, 5]", &offsets));
+  ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *dicts));
+
+  AssertArraysEqual(*expected, *array, /*verbose=*/true);
+}
+
+TEST(TestDictArrayFromJSON, Basics) {
   auto type = dictionary(int32(), utf8());
   auto array =
       DictArrayFromJSON(type, "[null, 2, 1, 0]", R"(["whiskey", "tango", 
"foxtrot"])");
@@ -1220,7 +1303,7 @@ TEST(TestDictionary, Basics) {
                       *array);
 }
 
-TEST(TestDictionary, Errors) {
+TEST(TestDictArrayFromJSON, Errors) {
   auto type = dictionary(int32(), utf8());
   std::shared_ptr<Array> array;
 

Reply via email to