alamb commented on code in PR #4861:
URL: https://github.com/apache/arrow-rs/pull/4861#discussion_r1337332146
##########
arrow-json/src/reader/tape.rs:
##########
@@ -54,6 +55,25 @@ pub enum TapeElement {
///
/// Contains the offset into the [`Tape`] string data
Number(u32),
+
+ /// The high bits of a i64
Review Comment:
👍
##########
arrow-json/src/reader/serializer.rs:
##########
@@ -77,22 +77,6 @@ impl<'a> TapeSerializer<'a> {
}
}
-/// The tape stores all values as strings, and so must serialize numeric types
-///
-/// Formatting to a string only to parse it back again is rather wasteful,
-/// it may be possible to tweak the tape representation to avoid this
-///
-/// Need to use macro as const generic expressions are unstable
Review Comment:
🎉
##########
arrow-json/src/reader/serializer.rs:
##########
@@ -77,22 +77,6 @@ impl<'a> TapeSerializer<'a> {
}
}
-/// The tape stores all values as strings, and so must serialize numeric types
-///
-/// Formatting to a string only to parse it back again is rather wasteful,
-/// it may be possible to tweak the tape representation to avoid this
-///
-/// Need to use macro as const generic expressions are unstable
Review Comment:
🎉
##########
arrow-json/src/reader/tape.rs:
##########
@@ -54,6 +55,25 @@ pub enum TapeElement {
///
/// Contains the offset into the [`Tape`] string data
Number(u32),
+
+ /// The high bits of a i64
Review Comment:
👍
##########
arrow-json/src/reader/tape.rs:
##########
@@ -104,10 +124,15 @@ impl<'a> Tape<'a> {
| TapeElement::Number(_)
| TapeElement::True
| TapeElement::False
- | TapeElement::Null => Ok(cur_idx + 1),
+ | TapeElement::Null
+ | TapeElement::I32(_)
+ | TapeElement::F32(_) => Ok(cur_idx + 1),
+ TapeElement::I64(_) | TapeElement::F64(_) => Ok(cur_idx + 2),
TapeElement::StartList(end_idx) => Ok(end_idx + 1),
TapeElement::StartObject(end_idx) => Ok(end_idx + 1),
- _ => Err(self.error(cur_idx, expected)),
+ TapeElement::EndObject(_) | TapeElement::EndList(_) => {
Review Comment:
+1 for removing the catch all
##########
arrow-json/src/reader/serializer.rs:
##########
@@ -115,43 +99,63 @@ impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
}
fn serialize_i8(self, v: i8) -> Result<(), SerializerError> {
- serialize_numeric!(self, i8, v)
+ self.serialize_i32(v as _)
}
fn serialize_i16(self, v: i16) -> Result<(), SerializerError> {
- serialize_numeric!(self, i16, v)
+ self.serialize_i32(v as _)
}
fn serialize_i32(self, v: i32) -> Result<(), SerializerError> {
- serialize_numeric!(self, i32, v)
+ self.elements.push(TapeElement::I32(v));
+ Ok(())
}
fn serialize_i64(self, v: i64) -> Result<(), SerializerError> {
- serialize_numeric!(self, i64, v)
+ let low = v as i32;
+ let high = (v >> 32) as i32;
+ self.elements.push(TapeElement::I64(high));
+ self.elements.push(TapeElement::I32(low));
+ Ok(())
}
fn serialize_u8(self, v: u8) -> Result<(), SerializerError> {
- serialize_numeric!(self, u8, v)
+ self.serialize_i32(v as _)
}
fn serialize_u16(self, v: u16) -> Result<(), SerializerError> {
- serialize_numeric!(self, u16, v)
+ self.serialize_i32(v as _)
}
fn serialize_u32(self, v: u32) -> Result<(), SerializerError> {
- serialize_numeric!(self, u32, v)
+ match i32::try_from(v) {
+ Ok(v) => self.serialize_i32(v),
+ Err(_) => self.serialize_i64(v as _),
+ }
}
fn serialize_u64(self, v: u64) -> Result<(), SerializerError> {
- serialize_numeric!(self, u64, v)
+ match i64::try_from(v) {
+ Ok(v) => self.serialize_i64(v),
+ Err(_) => {
+ let mut buffer = [0_u8; u64::FORMATTED_SIZE];
Review Comment:
Would you imagine doing it via `i128` or something?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]