tustvold commented on code in PR #3979:
URL: https://github.com/apache/arrow-rs/pull/3979#discussion_r1153035655
##########
arrow-json/src/raw/serializer.rs:
##########
@@ -0,0 +1,395 @@
+use crate::raw::tape::TapeElement;
+use lexical_core::FormattedSize;
+use serde::ser::{
+ Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple,
+ SerializeTupleStruct,
+};
+use serde::{Serialize, Serializer};
+
+#[derive(Debug)]
+pub struct SerializerError(String);
+
+impl std::error::Error for SerializerError {}
+
+impl std::fmt::Display for SerializerError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.0)
+ }
+}
+
+impl serde::ser::Error for SerializerError {
+ fn custom<T>(msg: T) -> Self
+ where
+ T: std::fmt::Display,
+ {
+ Self(msg.to_string())
+ }
+}
+
+/// [`Serializer`] for [`TapeElement`]
+///
+/// Heavily based on <https://serde.rs/impl-serializer.html>
+pub struct TapeSerializer<'a> {
+ elements: &'a mut Vec<TapeElement>,
+
+ /// A buffer of parsed string data
+ bytes: &'a mut Vec<u8>,
+
+ /// Offsets into `data`
+ offsets: &'a mut Vec<usize>,
+}
+
+impl<'a> TapeSerializer<'a> {
+ pub fn new(
+ elements: &'a mut Vec<TapeElement>,
+ bytes: &'a mut Vec<u8>,
+ offsets: &'a mut Vec<usize>,
+ ) -> Self {
+ Self {
+ elements,
+ bytes,
+ offsets,
+ }
+ }
+}
+
+/// Need to use macro as const generic expressions are unstable
+/// <https://github.com/rust-lang/rust/issues/76560>
+macro_rules! serialize_lexical {
+ ($s:ident, $t:ty, $v:ident) => {{
+ let mut buffer = [0_u8; <$t>::FORMATTED_SIZE];
+ let s = lexical_core::write($v, &mut buffer);
+ $s.serialize_bytes(s)
+ }};
+}
+
+impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
+ type Ok = ();
+
+ type Error = SerializerError;
+
+ type SerializeSeq = ListSerializer<'a, 'b>;
+ type SerializeTuple = ListSerializer<'a, 'b>;
+ type SerializeTupleStruct = ListSerializer<'a, 'b>;
+ type SerializeTupleVariant = Impossible<(), SerializerError>;
+ type SerializeMap = ObjectSerializer<'a, 'b>;
+ type SerializeStruct = ObjectSerializer<'a, 'b>;
+ type SerializeStructVariant = Impossible<(), SerializerError>;
+
+ fn serialize_bool(self, v: bool) -> Result<(), SerializerError> {
+ self.elements.push(match v {
+ true => TapeElement::True,
+ false => TapeElement::False,
+ });
+ Ok(())
+ }
+
+ fn serialize_i8(self, v: i8) -> Result<(), SerializerError> {
+ serialize_lexical!(self, i8, v)
+ }
+
+ fn serialize_i16(self, v: i16) -> Result<(), SerializerError> {
+ serialize_lexical!(self, i16, v)
+ }
+
+ fn serialize_i32(self, v: i32) -> Result<(), SerializerError> {
+ serialize_lexical!(self, i32, v)
+ }
+
+ fn serialize_i64(self, v: i64) -> Result<(), SerializerError> {
+ serialize_lexical!(self, i64, v)
+ }
+
+ fn serialize_u8(self, v: u8) -> Result<(), SerializerError> {
+ serialize_lexical!(self, u8, v)
+ }
+
+ fn serialize_u16(self, v: u16) -> Result<(), SerializerError> {
+ serialize_lexical!(self, u16, v)
+ }
+
+ fn serialize_u32(self, v: u32) -> Result<(), SerializerError> {
+ serialize_lexical!(self, u32, v)
+ }
+
+ fn serialize_u64(self, v: u64) -> Result<(), SerializerError> {
+ serialize_lexical!(self, u64, v)
+ }
+
+ fn serialize_f32(self, v: f32) -> Result<(), SerializerError> {
+ serialize_lexical!(self, f32, v)
+ }
+
+ fn serialize_f64(self, v: f64) -> Result<(), SerializerError> {
+ serialize_lexical!(self, f64, v)
Review Comment:
Formatting to a string only to parse it back again is rather wasteful, a
future PR can likely tweak the tape representation to allow storing raw bytes
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]