[ 
https://issues.apache.org/jira/browse/ARROW-1716?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16242907#comment-16242907
 ] 

ASF GitHub Bot commented on ARROW-1716:
---------------------------------------

wesm closed pull request #1267: ARROW-1716: [Format/JSON] Use string integer 
value for Decimals in JSON
URL: https://github.com/apache/arrow/pull/1267
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc 
b/cpp/src/arrow/ipc/ipc-read-write-test.cc
index 6f2f5cf85..40cd3f0ee 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-test.cc
+++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc
@@ -727,7 +727,7 @@ TEST_F(TestTensorRoundTrip, BasicRoundtrip) {
   int64_t size = 24;
 
   std::vector<int64_t> values;
-  test::randint<int64_t>(size, 0, 100, &values);
+  test::randint(size, 0, 100, &values);
 
   auto data = test::GetBufferFromVector(values);
 
@@ -748,7 +748,7 @@ TEST_F(TestTensorRoundTrip, NonContiguous) {
   ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_));
 
   std::vector<int64_t> values;
-  test::randint<int64_t>(24, 0, 100, &values);
+  test::randint(24, 0, 100, &values);
 
   auto data = test::GetBufferFromVector(values);
   Tensor tensor(int64(), data, {4, 3}, {48, 16});
diff --git a/cpp/src/arrow/ipc/json-internal.cc 
b/cpp/src/arrow/ipc/json-internal.cc
index 025f6c276..c1c0661d6 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -33,6 +33,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 #include "arrow/visitor_inline.h"
@@ -448,7 +449,8 @@ class ArrayWriter {
   }
 
   void WriteDataValues(const FixedSizeBinaryArray& arr) {
-    int32_t width = arr.byte_width();
+    const int32_t width = arr.byte_width();
+
     for (int64_t i = 0; i < arr.length(); ++i) {
       const uint8_t* buf = arr.GetValue(i);
       std::string encoded = HexEncode(buf, width);
@@ -456,6 +458,13 @@ class ArrayWriter {
     }
   }
 
+  void WriteDataValues(const DecimalArray& arr) {
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      const Decimal128 value(arr.GetValue(i));
+      writer_->String(value.ToIntegerString());
+    }
+  }
+
   void WriteDataValues(const BooleanArray& arr) {
     for (int i = 0; i < arr.length(); ++i) {
       writer_->Bool(arr.Value(i));
@@ -1053,7 +1062,9 @@ class ArrayReader {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T>::value, 
Status>::type
+  typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T>::value &&
+                              !std::is_base_of<DecimalType, T>::value,
+                          Status>::type
   Visit(const T& type) {
     typename TypeTraits<T>::BuilderType builder(type_, pool_);
 
@@ -1073,22 +1084,52 @@ class ArrayReader {
     for (int i = 0; i < length_; ++i) {
       if (!is_valid_[i]) {
         RETURN_NOT_OK(builder.AppendNull());
-        continue;
-      }
+      } else {
+        const rj::Value& val = json_data_arr[i];
+        DCHECK(val.IsString())
+            << "Found non-string JSON value when parsing FixedSizeBinary 
value";
+        std::string hex_string = val.GetString();
+        if (static_cast<int32_t>(hex_string.size()) != byte_width * 2) {
+          DCHECK(false) << "Expected size: " << byte_width * 2
+                        << " got: " << hex_string.size();
+        }
+        const char* hex_data = hex_string.c_str();
 
-      const rj::Value& val = json_data_arr[i];
-      DCHECK(val.IsString());
-      std::string hex_string = val.GetString();
-      if (static_cast<int32_t>(hex_string.size()) != byte_width * 2) {
-        DCHECK(false) << "Expected size: " << byte_width * 2
-                      << " got: " << hex_string.size();
+        for (int32_t j = 0; j < byte_width; ++j) {
+          RETURN_NOT_OK(ParseHexValue(hex_data + j * 2, &byte_buffer_data[j]));
+        }
+        RETURN_NOT_OK(builder.Append(byte_buffer_data));
       }
-      const char* hex_data = hex_string.c_str();
+    }
+    return builder.Finish(&result_);
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<DecimalType, T>::value, 
Status>::type Visit(
+      const T& type) {
+    typename TypeTraits<T>::BuilderType builder(type_, pool_);
+
+    const auto& json_data = obj_->FindMember("DATA");
+    RETURN_NOT_ARRAY("DATA", json_data, *obj_);
 
-      for (int32_t j = 0; j < byte_width; ++j) {
-        RETURN_NOT_OK(ParseHexValue(hex_data + j * 2, &byte_buffer_data[j]));
+    const auto& json_data_arr = json_data->value.GetArray();
+
+    DCHECK_EQ(static_cast<int32_t>(json_data_arr.Size()), length_);
+
+    for (int i = 0; i < length_; ++i) {
+      if (!is_valid_[i]) {
+        RETURN_NOT_OK(builder.AppendNull());
+      } else {
+        const rj::Value& val = json_data_arr[i];
+        DCHECK(val.IsString())
+            << "Found non-string JSON value when parsing Decimal128 value";
+        DCHECK_GT(val.GetStringLength(), 0)
+            << "Empty string found when parsing Decimal128 value";
+
+        Decimal128 value;
+        RETURN_NOT_OK(Decimal128::FromString(val.GetString(), &value));
+        RETURN_NOT_OK(builder.Append(value));
       }
-      RETURN_NOT_OK(builder.Append(byte_buffer_data));
     }
     return builder.Finish(&result_);
   }
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index b2137b7db..91023db48 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -671,8 +671,11 @@ Status MakeFWBinary(std::shared_ptr<RecordBatch>* out) {
 }
 
 Status MakeDecimal(std::shared_ptr<RecordBatch>* out) {
-  auto f0 = field("f0", decimal(19, 4));
-  auto schema = ::arrow::schema({f0, f0});
+  constexpr int kDecimalPrecision = 38;
+  auto type = decimal(kDecimalPrecision, 4);
+  auto f0 = field("f0", type);
+  auto f1 = field("f1", type);
+  auto schema = ::arrow::schema({f0, f1});
 
   constexpr int kDecimalSize = 16;
   constexpr int length = 10;
@@ -682,7 +685,7 @@ Status MakeDecimal(std::shared_ptr<RecordBatch>* out) {
 
   RETURN_NOT_OK(AllocateBuffer(default_memory_pool(), kDecimalSize * length, 
&data));
 
-  test::random_bytes(kDecimalSize * length, 0, data->mutable_data());
+  test::random_decimals(length, 1, kDecimalPrecision, data->mutable_data());
   test::random_null_bytes(length, 0.1, is_valid_bytes.data());
 
   RETURN_NOT_OK(BitUtil::BytesToBits(is_valid_bytes, default_memory_pool(), 
&is_valid));
@@ -690,10 +693,10 @@ Status MakeDecimal(std::shared_ptr<RecordBatch>* out) {
   auto a1 = std::make_shared<DecimalArray>(f0->type(), length, data, is_valid,
                                            kUnknownNullCount);
 
-  auto a2 = std::make_shared<DecimalArray>(f0->type(), length, data);
+  auto a2 = std::make_shared<DecimalArray>(f1->type(), length, data);
 
   ArrayVector arrays = {a1, a2};
-  *out = std::make_shared<RecordBatch>(schema, a1->length(), arrays);
+  *out = std::make_shared<RecordBatch>(schema, length, arrays);
   return Status::OK();
 }
 
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 59a1de5a4..205176ecc 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -65,24 +65,16 @@ def rands(nchars):
     return ''.join(np.random.choice(RANDS_CHARS, nchars))
 
 
-if six.PY2:
-    def frombytes(o):
-        return o
+def tobytes(o):
+    if isinstance(o, six.text_type):
+        return o.encode('utf8')
+    return o
 
-    def tobytes(o):
-        if isinstance(o, unicode):
-            return o.encode('utf8')
-        else:
-            return o
-else:
-    def tobytes(o):
-        if isinstance(o, str):
-            return o.encode('utf8')
-        else:
-            return o
 
-    def frombytes(o):
+def frombytes(o):
+    if isinstance(o, six.binary_type):
         return o.decode('utf8')
+    return o
 
 
 # from the merge_arrow_pr.py script
@@ -177,7 +169,7 @@ def _get_type_layout(self):
 class PrimitiveColumn(Column):
 
     def __init__(self, name, count, is_valid, values):
-        Column.__init__(self, name, count)
+        super(PrimitiveColumn, self).__init__(name, count)
         self.is_valid = is_valid
         self.values = values
 
@@ -191,15 +183,16 @@ def _get_buffers(self):
         ]
 
 
-TEST_INT_MIN = - 2**31 + 1
-TEST_INT_MAX = 2**31 - 1
+TEST_INT_MAX = 2 ** 31 - 1
+TEST_INT_MIN = ~TEST_INT_MAX
+
 
 class IntegerType(PrimitiveType):
 
     def __init__(self, name, is_signed, bit_width, nullable=True,
                  min_value=TEST_INT_MIN,
                  max_value=TEST_INT_MAX):
-        PrimitiveType.__init__(self, name, nullable=nullable)
+        super(IntegerType, self).__init__(name, nullable=nullable)
         self.is_signed = is_signed
         self.bit_width = bit_width
         self.min_value = min_value
@@ -239,9 +232,11 @@ class DateType(IntegerType):
     MILLISECOND = 1
 
     def __init__(self, name, unit, nullable=True):
-        self.unit = unit
         bit_width = 32 if unit == self.DAY else 64
-        IntegerType.__init__(self, name, True, bit_width, nullable=nullable)
+        super(DateType, self).__init__(
+            name, True, bit_width, nullable=nullable
+        )
+        self.unit = unit
 
     def _get_type(self):
         return OrderedDict([
@@ -268,9 +263,10 @@ class TimeType(IntegerType):
     }
 
     def __init__(self, name, unit='s', nullable=True):
+        super(TimeType, self).__init__(
+            name, True, self.BIT_WIDTHS[unit], nullable=nullable
+        )
         self.unit = unit
-        IntegerType.__init__(self, name, True, self.BIT_WIDTHS[unit],
-                             nullable=nullable)
 
     def _get_type(self):
         return OrderedDict([
@@ -283,9 +279,9 @@ def _get_type(self):
 class TimestampType(IntegerType):
 
     def __init__(self, name, unit='s', tz=None, nullable=True):
+        super(TimestampType, self).__init__(name, True, 64, nullable=nullable)
         self.unit = unit
         self.tz = tz
-        IntegerType.__init__(self, name, True, 64, nullable=nullable)
 
     def _get_type(self):
         fields = [
@@ -302,7 +298,7 @@ def _get_type(self):
 class FloatingPointType(PrimitiveType):
 
     def __init__(self, name, bit_width, nullable=True):
-        PrimitiveType.__init__(self, name, nullable=nullable)
+        super(FloatingPointType, self).__init__(name, nullable=nullable)
 
         self.bit_width = bit_width
         self.precision = {
@@ -331,13 +327,30 @@ def generate_column(self, size, name=None):
         return PrimitiveColumn(name, size, is_valid, values)
 
 
-class DecimalType(PrimitiveType):
-    def __init__(self, name, bit_width, precision, scale, nullable=True):
-        PrimitiveType.__init__(self, name, nullable=True)
+DECIMAL_PRECISION_TO_VALUE = {
+    key: (1 << (8 * i - 1)) - 1 for i, key in enumerate(
+        [1, 3, 5, 7, 10, 12, 15, 17, 19, 22, 24, 27, 29, 32, 34, 36],
+        start=1,
+    )
+}
 
-        self.bit_width = bit_width
+
+def decimal_range_from_precision(precision):
+    assert 1 <= precision <= 38
+    try:
+        max_value = DECIMAL_PRECISION_TO_VALUE[precision]
+    except KeyError:
+        return decimal_range_from_precision(precision - 1)
+    else:
+        return ~max_value, max_value
+
+
+class DecimalType(PrimitiveType):
+    def __init__(self, name, precision, scale, bit_width=128, nullable=True):
+        super(DecimalType, self).__init__(name, nullable=True)
         self.precision = precision
         self.scale = scale
+        self.bit_width = bit_width
 
     @property
     def numpy_type(self):
@@ -359,7 +372,8 @@ def _get_type_layout(self):
                            ('typeBitWidth', self.bit_width)])])])
 
     def generate_column(self, size, name=None):
-        values = [random.randint(0, 2**self.bit_width - 1) for x in 
range(size)]
+        min_value, max_value = decimal_range_from_precision(self.precision)
+        values = [random.randint(min_value, max_value) for _ in range(size)]
 
         is_valid = self._make_is_valid(size)
         if name is None:
@@ -369,14 +383,12 @@ def generate_column(self, size, name=None):
 
 class DecimalColumn(PrimitiveColumn):
 
-    def __init__(self, name, count, is_valid, values, bit_width):
-        PrimitiveColumn.__init__(self, name, count, is_valid, values)
+    def __init__(self, name, count, is_valid, values, bit_width=128):
+        super(DecimalColumn, self).__init__(name, count, is_valid, values)
         self.bit_width = bit_width
-        self.hex_width = bit_width / 4
 
     def _encode_value(self, x):
-        hex_format_str = '%%0%dx' % self.hex_width
-        return (hex_format_str % x).upper()
+        return str(x)
 
 
 class BooleanType(PrimitiveType):
@@ -510,7 +522,7 @@ def _encode_value(self, x):
 class ListType(DataType):
 
     def __init__(self, name, value_type, nullable=True):
-        DataType.__init__(self, name, nullable=nullable)
+        super(ListType, self).__init__(name, nullable=nullable)
         self.value_type = value_type
 
     def _get_type(self):
@@ -553,7 +565,7 @@ def generate_column(self, size, name=None):
 class ListColumn(Column):
 
     def __init__(self, name, count, is_valid, offsets, values):
-        Column.__init__(self, name, count)
+        super(ListColumn, self).__init__(name, count)
         self.is_valid = is_valid
         self.offsets = offsets
         self.values = values
@@ -571,7 +583,7 @@ def _get_children(self):
 class StructType(DataType):
 
     def __init__(self, name, field_types, nullable=True):
-        DataType.__init__(self, name, nullable=nullable)
+        super(StructType, self).__init__(name, nullable=nullable)
         self.field_types = field_types
 
     def _get_type(self):
@@ -620,7 +632,7 @@ def get_json(self):
 class DictionaryType(DataType):
 
     def __init__(self, name, index_type, dictionary, nullable=True):
-        DataType.__init__(self, name, nullable=nullable)
+        super(DictionaryType, self).__init__(name, nullable=nullable)
         assert isinstance(index_type, IntegerType)
         assert isinstance(dictionary, Dictionary)
 
@@ -655,7 +667,7 @@ def generate_column(self, size, name=None):
 class StructColumn(Column):
 
     def __init__(self, name, count, is_valid, field_values):
-        Column.__init__(self, name, count)
+        super(StructColumn, self).__init__(name, count)
         self.is_valid = is_valid
         self.field_values = field_values
 
@@ -758,11 +770,12 @@ def generate_primitive_case(batch_sizes):
 
 def generate_decimal_case():
     fields = [
-        DecimalType('f1', 128, 24, 10, True),
-        DecimalType('f2', 128, 32, -10, True)
+        DecimalType(name='f{}'.format(i), precision=precision, scale=2)
+        for i, precision in enumerate(range(3, 39))
     ]
 
-    batch_sizes = [7, 10]
+    possible_batch_sizes = 7, 10
+    batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
 
     return _generate_file('decimal', fields, batch_sizes)
 
@@ -867,8 +880,9 @@ def run(self):
 
     def _compare_implementations(self, producer, consumer):
         print('##########################################################')
-        print('{0} producing, {1} consuming'.format(producer.name,
-                                                       consumer.name))
+        print(
+            '{0} producing, {1} consuming'.format(producer.name, consumer.name)
+        )
         print('##########################################################')
 
         for json_path in self.json_files:
@@ -1033,6 +1047,7 @@ def run_all_tests(debug=False):
     runner.run()
     print('-- All tests passed!')
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Arrow integration test CLI')
     parser.add_argument('--debug', dest='debug', action='store_true',
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
index c6ebd61aa..e1c7c909f 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
@@ -27,6 +27,8 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -332,9 +334,10 @@ private void setValueFromParser(ValueVector valueVector, 
int i) throws IOExcepti
         ((Float8Vector) valueVector).getMutator().set(i, 
parser.readValueAs(Double.class));
         break;
       case DECIMAL: {
-          DecimalVector decimalVector = ((DecimalVector) valueVector);
-          byte[] value = decodeHexSafe(parser.readValueAs(String.class));
-          DecimalUtility.writeByteArrayToArrowBuf(value, 
decimalVector.getBuffer(), i);
+          DecimalVector decimalVector = (DecimalVector) valueVector;
+          // Here we assume the decimal value is the unscaled integer value as 
a string
+          BigDecimal decimalValue = new 
BigDecimal(parser.readValueAs(String.class));
+          DecimalUtility.writeBigDecimalToArrowBuf(decimalValue, 
decimalVector.getBuffer(), i);
         }
         break;
       case VARBINARY:
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
index 04e44379e..05341bec4 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
@@ -20,6 +20,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -48,6 +49,7 @@
 import org.apache.arrow.vector.dictionary.Dictionary;
 import org.apache.arrow.vector.dictionary.DictionaryProvider;
 import org.apache.arrow.vector.schema.ArrowVectorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 
@@ -242,9 +244,9 @@ private void writeValueToGenerator(ValueVector valueVector, 
int i) throws IOExce
         }
         break;
       case DECIMAL: {
-          ArrowBuf bytebuf = valueVector.getDataBuffer();
-          String hexString = 
Hex.encodeHexString(DecimalUtility.getByteArrayFromArrowBuf(bytebuf, i));
-          generator.writeString(hexString);
+          BigDecimal decimalValue = ((DecimalVector) 
valueVector).getAccessor().getObject(i);
+          // We write the unscaled value, because the scale is stored in the 
type metadata.
+          generator.writeString(decimalValue.unscaledValue().toString());
         }
         break;
       default:
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java 
b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index 033ae6c09..acf7c58a1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -142,8 +142,18 @@ public static StringBuilder toStringWithZeroes(long 
number, int desiredLength) {
    */
   public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf bytebuf, int 
index, int scale) {
     byte[] value = new byte[DECIMAL_BYTE_LENGTH];
+    byte temp;
     final int startIndex = index * DECIMAL_BYTE_LENGTH;
+
+    // Decimal stored as little endian, need to swap bytes to make BigDecimal
     bytebuf.getBytes(startIndex, value, 0, DECIMAL_BYTE_LENGTH);
+    int stop = DECIMAL_BYTE_LENGTH / 2;
+    for (int i = 0, j; i < stop; i++) {
+      temp = value[i];
+      j = (DECIMAL_BYTE_LENGTH - 1) - i;
+      value[i] = value[j];
+      value[j] = temp;
+    }
     BigInteger unscaledValue = new BigInteger(value);
     return new BigDecimal(unscaledValue, scale);
   }
@@ -212,10 +222,26 @@ private static void writeByteArrayToArrowBuf(byte[] 
bytes, ArrowBuf bytebuf, int
     if (bytes.length > DECIMAL_BYTE_LENGTH) {
       throw new UnsupportedOperationException("Decimal size greater than 16 
bytes");
     }
-    final int padLength = DECIMAL_BYTE_LENGTH - bytes.length;
-    for (int i = 0; i < padLength; i++) {
+
+    // Decimal stored as little endian, need to swap data bytes before writing 
to ArrowBuf
+    byte[] bytesLE = new byte[bytes.length];
+    int stop = bytes.length / 2;
+    for (int i = 0, j; i < stop; i++) {
+      j = (bytes.length - 1) - i;
+      bytesLE[i] = bytes[j];
+      bytesLE[j] = bytes[i];
+    }
+    if (bytes.length % 2 != 0) {
+      int i = (bytes.length / 2);
+      bytesLE[i] = bytes[i];
+    }
+
+    // Write LE data
+    bytebuf.setBytes(startIndex, bytesLE, 0, bytes.length);
+
+    // Write padding after data
+    for (int i = bytes.length; i < DECIMAL_BYTE_LENGTH; i++) {
       bytebuf.setByte(startIndex + i, padValue);
     }
-    bytebuf.setBytes(startIndex + padLength, bytes, 0, bytes.length);
   }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Format/JSON] Use string integer value for Decimals in JSON
> -----------------------------------------------------------
>
>                 Key: ARROW-1716
>                 URL: https://issues.apache.org/jira/browse/ARROW-1716
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: C++, Java - Vectors
>    Affects Versions: 0.7.1
>            Reporter: Phillip Cloud
>            Assignee: Phillip Cloud
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> Suprisingly, Java and C++ integration tests pass after ARROW-1588. This hides 
> a bug, because we're writing decimal values as hex encoded bytes.
> C++ and Java compare that the bytes are the same, but because C++ is 
> interpreting everything as little endian after ARROW-1588 and Java is big 
> endian the numbers these bytes represent will be different in their 
> respective systems.
> I propose that instead of encoding DecimaArray/DecimalVector values as hex 
> encoded bytes, we store the integer as a string when writing Arrow 
> DecimalArray/DecimalVector data to JSON. This will allow us to compare that 
> the bytes have the same meaning in both systems.
> This requires a change to the way Arrow writes JSON.
> [~icexelloss] was extremely helpful in helping me get to the bottom of this.
> cc [~icexelloss] [~wesmckinn] [~jnadeau]



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to