rguerreiromsft commented on code in PR #3736:
URL: https://github.com/apache/arrow-rs/pull/3736#discussion_r1112224044


##########
arrow-json/src/raw/converter.rs:
##########
@@ -0,0 +1,388 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines trait to convert data while decoding json and make them conform to 
the Schema
+
+use crate::raw::tape::{Tape, TapeElement};
+use crate::raw::tape_error;
+use arrow_array::ArrowPrimitiveType;
+use arrow_cast::parse::Parser;
+use arrow_schema::{ArrowError, DataType};
+use num::NumCast;
+use std::borrow::Cow;
+
+pub trait TapeConverter: Send {
+    /// Converts this tape entry to bool
+    fn to_bool(
+        &self,
+        tape: &Tape<'_, Self>,
+        idx: u32,
+    ) -> Result<Option<bool>, ArrowError>
+    where
+        Self: Sized;
+
+    /// Converts this tape entry to primitive number
+    fn to_primitive<P>(
+        &self,
+        tape: &Tape<'_, Self>,
+        data_type: &DataType,
+        idx: u32,
+    ) -> Result<Option<P::Native>, ArrowError>
+    where
+        Self: Sized,
+        P: ArrowPrimitiveType + Parser,
+        P::Native: NumCast;
+
+    /// Converts this tape entry to string
+    fn to_string<'a>(
+        &self,
+        tape: &'a Tape<'_, Self>,
+        idx: u32,
+    ) -> Result<Option<Cow<'a, str>>, ArrowError>
+    where
+        Self: Sized;
+}
+
+/// A Strict version of the TapeConverter, which is useful when the json entry 
must match the schema
+#[derive(Copy, Clone, Default, Debug)]
+pub struct StrictTapeConverter;
+
+impl TapeConverter for StrictTapeConverter {
+    #[inline]
+    fn to_bool(&self, tape: &Tape<'_, Self>, idx: u32) -> Result<Option<bool>, 
ArrowError>
+    where
+        Self: Sized,
+    {
+        match tape.get(idx) {
+            TapeElement::Null => Ok(None),
+            TapeElement::True => Ok(Some(true)),
+            TapeElement::False => Ok(Some(false)),
+            d => Err(tape_error(d, "boolean")),
+        }
+    }
+
+    #[inline]
+    fn to_primitive<P>(
+        &self,
+        tape: &Tape<'_, Self>,
+        data_type: &DataType,
+        idx: u32,
+    ) -> Result<Option<P::Native>, ArrowError>
+    where
+        Self: Sized,
+        P: ArrowPrimitiveType + Parser,
+        P::Native: NumCast,
+    {
+        match tape.get(idx) {
+            TapeElement::Null => Ok(None),
+            TapeElement::String(idx) => {
+                let s = tape.get_string(idx);
+                let value = P::parse(s).ok_or_else(|| {
+                    ArrowError::JsonError(format!(
+                        "failed to parse \"{s}\" as {}",
+                        data_type
+                    ))
+                })?;
+
+                Ok(Some(value))
+            }
+            TapeElement::Number(idx) => {
+                parse_number::<P>(tape.get_string(idx), data_type)
+            }
+            d => Err(tape_error(d, "primitive")),
+        }
+    }
+
+    #[inline]
+    fn to_string<'a>(
+        &self,
+        tape: &'a Tape<'_, Self>,
+        idx: u32,
+    ) -> Result<Option<Cow<'a, str>>, ArrowError>
+    where
+        Self: Sized,
+    {
+        match tape.get(idx) {
+            TapeElement::String(idx) => Ok(Some(tape.get_string(idx).into())),
+            TapeElement::Null => Ok(None),
+            d => Err(tape_error(d, "string")),
+        }
+    }
+}
+
+/// A Loose version of the TapeConverter, which is useful when the json entry 
doesn't match the schema.
+/// It will try its best to convert the data:
+/// - Any number can be converted into strings or bools. For string it will 
just read as is, but for bool it will be false when equals zero, true otherwise.
+/// - A bool can be converted into string or number. For string it will just 
read as is, but for number it will consider 1 for true and 0 for false.
+/// - A string can be converted into number like the Strict version, but it 
can be converted to bool if it's "true" or "1" it will be considered true, 
otherwise it will be considered false.
+/// - Lists and Structs can be converted into strings, and their json 
representation will be used.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct LooseTapeConverter;
+
+impl TapeConverter for LooseTapeConverter {
+    #[inline]
+    fn to_bool(&self, tape: &Tape<'_, Self>, idx: u32) -> Result<Option<bool>, 
ArrowError>
+    where
+        Self: Sized,
+    {
+        match tape.get(idx) {
+            TapeElement::Null => Ok(None),
+            TapeElement::True => Ok(Some(true)),
+            TapeElement::False => Ok(Some(false)),
+            TapeElement::Number(idx) => {
+                let s = tape.get_string(idx);
+                let value = number_as_f64(s)? as i64;
+                Ok(Some(value != 0))
+            }
+            TapeElement::String(idx) => {
+                let s = tape.get_string(idx).trim().to_lowercase();
+                Ok(Some(matches!(s.as_str(), "true" | "1")))
+            }
+            d => Err(tape_error(d, "boolean")),
+        }
+    }
+
+    #[inline]
+    fn to_primitive<P>(
+        &self,
+        tape: &Tape<'_, Self>,
+        data_type: &DataType,
+        idx: u32,
+    ) -> Result<Option<P::Native>, ArrowError>
+    where
+        Self: Sized,
+        P: ArrowPrimitiveType + Parser,
+        P::Native: NumCast,
+    {
+        match tape.get(idx) {
+            TapeElement::Null => Ok(None),
+            TapeElement::True => Ok(NumCast::from(1)),
+            TapeElement::False => Ok(NumCast::from(0)),
+            TapeElement::String(idx) => {
+                let s = tape.get_string(idx);
+                let value = P::parse(s).ok_or_else(|| {
+                    ArrowError::JsonError(format!(
+                        "failed to parse \"{s}\" as {}",
+                        data_type
+                    ))
+                })?;
+
+                Ok(Some(value))
+            }
+            TapeElement::Number(idx) => {
+                parse_number::<P>(tape.get_string(idx), data_type)
+            }
+            d => Err(tape_error(d, "primitive")),
+        }
+    }
+
+    #[inline]
+    fn to_string<'a>(
+        &self,
+        tape: &'a Tape<'_, Self>,
+        idx: u32,
+    ) -> Result<Option<Cow<'a, str>>, ArrowError>
+    where
+        Self: Sized,
+    {
+        match tape.get(idx) {
+            TapeElement::Null => Ok(None),
+            TapeElement::True => Ok(Some("true".into())),
+            TapeElement::False => Ok(Some("false".into())),
+            TapeElement::String(idx) | TapeElement::Number(idx) => {
+                Ok(Some(tape.get_string(idx).into()))
+            }
+            TapeElement::StartList(_) => Ok(Some(obj_to_json_string(tape, 
idx)?)),
+            TapeElement::StartObject(_) => Ok(Some(obj_to_json_string(tape, 
idx)?)),
+            d => Err(tape_error(d, "string")),
+        }
+    }
+}
+
+fn obj_to_json_string<'a, C: TapeConverter>(
+    tape: &'a Tape<'_, C>,
+    idx: u32,
+) -> Result<Cow<'a, str>, ArrowError> {
+    match tape.get(idx) {
+        TapeElement::Null => Ok("null".into()),
+        TapeElement::True => Ok("true".into()),
+        TapeElement::False => Ok("false".into()),
+        TapeElement::Number(idx) => Ok(tape.get_string(idx).into()),
+        TapeElement::String(idx) => Ok(format!("\"{}\"", 
tape.get_string(idx)).into()),

Review Comment:
   Then I'll keep it very simple. Just a bool in a struct for now.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to