pitrou commented on code in PR #14242:
URL: https://github.com/apache/arrow/pull/14242#discussion_r982526049


##########
cpp/src/arrow/json/parser.cc:
##########
@@ -536,6 +549,9 @@ class RawBuilderSet {
       case Kind::kObject:
         return 
Cast<Kind::kObject>(builder)->Finish(std::move(finish_children), out);
 
+      case Kind::kNumberOrString:
+        return FinishScalar(scalar_values, 
Cast<Kind::kNumberOrString>(builder), out);

Review Comment:
   Also here.



##########
cpp/src/arrow/json/parser.cc:
##########
@@ -102,7 +104,7 @@ Status Kind::ForType(const DataType& type, Kind::type* 
kind) {
     Status Visit(const BinaryType&) { return SetKind(Kind::kString); }
     Status Visit(const LargeBinaryType&) { return SetKind(Kind::kString); }
     Status Visit(const TimestampType&) { return SetKind(Kind::kString); }
-    Status Visit(const FixedSizeBinaryType&) { return SetKind(Kind::kString); }
+    Status Visit(const FixedSizeBinaryType&) { return 
SetKind(Kind::kNumberOrString); }

Review Comment:
   Hmm, that doesn't seem right to me. Ideally we would only allow numbers for 
decimal, not for all fixed-size binary types.



##########
cpp/src/arrow/json/reader_test.cc:
##########
@@ -274,5 +275,37 @@ TEST(ReaderTest, ListArrayWithFewValues) {
   AssertTablesEqual(*actual_table, *expected_table);
 }
 
+TEST_P(ReaderTest, UnquotedDecimal) {

Review Comment:
   Can you move these tests near the other parameterized (TEST_P) tests?



##########
cpp/src/arrow/json/parser.cc:
##########
@@ -454,6 +462,8 @@ class RawBuilderSet {
         }
         return Status::OK();
       }
+      case Kind::kNumberOrString:
+        return MakeBuilder<Kind::kNumberOrString>(leading_nulls, builder);

Review Comment:
   For clarity, move this up just below the `kString` case?



##########
cpp/src/arrow/json/parser.cc:
##########
@@ -610,12 +627,22 @@ class HandlerBase : public BlockParser,
   }
 
   bool RawNumber(const char* data, rj::SizeType size, ...) {
-    status_ = AppendScalar<Kind::kNumber>(builder_, std::string_view(data, 
size));
+    if (builder_.kind == Kind::kNumberOrString) {
+      status_ =
+          AppendScalar<Kind::kNumberOrString>(builder_, std::string_view(data, 
size));
+    } else {

Review Comment:
   Instead of doing this check here, how about doing it in `AppendScalar`, e.g.:
   ```c++
     template <Kind::type kind>
     Status AppendScalar(BuilderPtr builder, std::string_view scalar) {
       bool kind_changed;
       if (kind == Kind::kNumber || kind == Kind::kString) {
         kind_changed = builder.kind != kind && builder.kind != 
Kind::kNumberOrString;
       } else {
         kind_changed = builder.kind != kind;
       }
       if (ARROW_PREDICT_FALSE(kind_changed)) {
         return IllegallyChangedTo(kind);
       }
   ```



##########
cpp/src/arrow/json/reader_test.cc:
##########
@@ -274,5 +275,37 @@ TEST(ReaderTest, ListArrayWithFewValues) {
   AssertTablesEqual(*actual_table, *expected_table);
 }
 
+TEST_P(ReaderTest, UnquotedDecimal) {
+  parse_options_.unexpected_field_behavior = 
UnexpectedFieldBehavior::InferType;

Review Comment:
   I don't think this line is useful since we're passing the entire expected 
schema?



##########
cpp/src/arrow/json/parser.cc:
##########
@@ -504,6 +514,9 @@ class RawBuilderSet {
         }
         return Status::OK();
       }
+      case Kind::kNumberOrString: {
+        return Cast<Kind::kNumberOrString>(builder)->AppendNull();

Review Comment:
   Same here: move this up below the `kString` case?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to