This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 053973a06 Support decoding decimals in raw decoder (#3820)
053973a06 is described below
commit 053973a06a7194a1d10f5b206375957a2fcaa049
Author: bold <[email protected]>
AuthorDate: Thu Mar 9 10:28:17 2023 +0100
Support decoding decimals in raw decoder (#3820)
---
arrow-json/src/raw/decimal_array.rs | 76 +++++++++++++++++++++++++++++++++++++
arrow-json/src/raw/mod.rs | 61 +++++++++++++++++++++++++++++
2 files changed, 137 insertions(+)
diff --git a/arrow-json/src/raw/decimal_array.rs
b/arrow-json/src/raw/decimal_array.rs
new file mode 100644
index 000000000..0518b4cef
--- /dev/null
+++ b/arrow-json/src/raw/decimal_array.rs
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::marker::PhantomData;
+
+use arrow_array::builder::PrimitiveBuilder;
+use arrow_array::types::DecimalType;
+use arrow_array::Array;
+use arrow_cast::parse::parse_decimal;
+use arrow_data::ArrayData;
+use arrow_schema::ArrowError;
+
+use crate::raw::tape::{Tape, TapeElement};
+use crate::raw::{tape_error, ArrayDecoder};
+
+pub struct DecimalArrayDecoder<D: DecimalType> {
+ precision: u8,
+ scale: i8,
+ // Invariant and Send
+ phantom: PhantomData<fn(D) -> D>,
+}
+
+impl<D: DecimalType> DecimalArrayDecoder<D> {
+ pub fn new(precision: u8, scale: i8) -> Self {
+ Self {
+ precision,
+ scale,
+ phantom: PhantomData,
+ }
+ }
+}
+
+impl<D> ArrayDecoder for DecimalArrayDecoder<D>
+where
+ D: DecimalType,
+{
+ fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData,
ArrowError> {
+ let mut builder = PrimitiveBuilder::<D>::with_capacity(pos.len());
+
+ for p in pos {
+ match tape.get(*p) {
+ TapeElement::Null => builder.append_null(),
+ TapeElement::String(idx) => {
+ let s = tape.get_string(idx);
+ let value = parse_decimal::<D>(s, self.precision,
self.scale)?;
+ builder.append_value(value)
+ }
+ TapeElement::Number(idx) => {
+ let s = tape.get_string(idx);
+ let value = parse_decimal::<D>(s, self.precision,
self.scale)?;
+ builder.append_value(value)
+ }
+ d => return Err(tape_error(d, "decimal")),
+ }
+ }
+
+ Ok(builder
+ .finish()
+ .with_precision_and_scale(self.precision, self.scale)?
+ .into_data())
+ }
+}
diff --git a/arrow-json/src/raw/mod.rs b/arrow-json/src/raw/mod.rs
index a0dbcbd53..5b699b1d5 100644
--- a/arrow-json/src/raw/mod.rs
+++ b/arrow-json/src/raw/mod.rs
@@ -20,6 +20,7 @@
//! [`Reader`]: crate::reader::Reader
use crate::raw::boolean_array::BooleanArrayDecoder;
+use crate::raw::decimal_array::DecimalArrayDecoder;
use crate::raw::list_array::ListArrayDecoder;
use crate::raw::map_array::MapArrayDecoder;
use crate::raw::primitive_array::PrimitiveArrayDecoder;
@@ -33,6 +34,7 @@ use arrow_schema::{ArrowError, DataType, SchemaRef};
use std::io::BufRead;
mod boolean_array;
+mod decimal_array;
mod list_array;
mod map_array;
mod primitive_array;
@@ -291,6 +293,8 @@ fn make_decoder(
data_type => (primitive_decoder, data_type),
DataType::Float32 => primitive_decoder!(Float32Type, data_type),
DataType::Float64 => primitive_decoder!(Float64Type, data_type),
+ DataType::Decimal128(p, s) =>
Ok(Box::new(DecimalArrayDecoder::<Decimal128Type>::new(p, s))),
+ DataType::Decimal256(p, s) =>
Ok(Box::new(DecimalArrayDecoder::<Decimal256Type>::new(p, s))),
DataType::Boolean => Ok(Box::<BooleanArrayDecoder>::default()),
DataType::Utf8 =>
Ok(Box::new(StringArrayDecoder::<i32>::new(coerce_primitive))),
DataType::LargeUtf8 =>
Ok(Box::new(StringArrayDecoder::<i64>::new(coerce_primitive))),
@@ -321,6 +325,7 @@ mod tests {
};
use arrow_array::types::Int32Type;
use arrow_array::Array;
+ use arrow_buffer::ArrowNativeType;
use arrow_cast::display::{ArrayFormatter, FormatOptions};
use arrow_schema::{DataType, Field, Schema};
use std::fs::File;
@@ -721,4 +726,60 @@ mod tests {
assert!(col3.is_null(4));
assert!(col3.is_null(5));
}
+
+ fn test_decimal<T: DecimalType>(data_type: DataType) {
+ let buf = r#"
+ {"a": 1, "b": 2, "c": 38.30}
+ {"a": 2, "b": 4, "c": 123.456}
+
+ {"b": 1337, "a": "2.0452"}
+ {"b": "5", "a": "11034.2"}
+ {"b": 40}
+ {"b": 1234, "a": null}
+ "#;
+
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", data_type.clone(), true),
+ Field::new("b", data_type.clone(), true),
+ Field::new("c", data_type, true),
+ ]));
+
+ let batches = do_read(buf, 1024, true, schema);
+ assert_eq!(batches.len(), 1);
+
+ let col1 = as_primitive_array::<T>(batches[0].column(0));
+ assert_eq!(col1.null_count(), 2);
+ assert!(col1.is_null(4));
+ assert!(col1.is_null(5));
+ assert_eq!(
+ col1.values(),
+ &[100, 200, 204, 1103420, 0, 0].map(T::Native::usize_as)
+ );
+
+ let col2 = as_primitive_array::<T>(batches[0].column(1));
+ assert_eq!(col2.null_count(), 0);
+ assert_eq!(
+ col2.values(),
+ &[200, 400, 133700, 500, 4000, 123400].map(T::Native::usize_as)
+ );
+
+ let col3 = as_primitive_array::<T>(batches[0].column(2));
+ assert_eq!(col3.null_count(), 4);
+ assert!(!col3.is_null(0));
+ assert!(!col3.is_null(1));
+ assert!(col3.is_null(2));
+ assert!(col3.is_null(3));
+ assert!(col3.is_null(4));
+ assert!(col3.is_null(5));
+ assert_eq!(
+ col3.values(),
+ &[3830, 12345, 0, 0, 0, 0].map(T::Native::usize_as)
+ );
+ }
+
+ #[test]
+ fn test_decimals() {
+ test_decimal::<Decimal128Type>(DataType::Decimal128(10, 2));
+ test_decimal::<Decimal256Type>(DataType::Decimal256(10, 2));
+ }
}