chaokunyang commented on code in PR #2585:
URL: https://github.com/apache/fory/pull/2585#discussion_r2341139697
##########
rust/fory-core/src/buffer.rs:
##########
@@ -88,70 +88,209 @@ impl Writer {
}
pub fn var_int32(&mut self, value: i32) {
- if value >> 7 == 0 {
+ let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31);
+ self.internal_var_uint32(zigzag as u32)
+ }
+
+ pub fn var_uint32(&mut self, value: u32) {
+ self.internal_var_uint32(value)
+ }
+
+ fn internal_var_uint32(&mut self, value: u32) {
+ if value < 0x80 {
self.u8(value as u8);
- } else if value >> 14 == 0 {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = value >> 7;
- self.u16(((u2 << 8) | u1) as u16);
- } else if value >> 21 == 0 {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = ((value >> 7) & 0x7F) | 0x80;
- self.u16(((u2 << 8) | u1) as u16);
- self.u8((value >> 14) as u8);
- } else if value >> 28 == 0 {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = ((value >> 7) & 0x7F) | 0x80;
- let u3 = ((value >> 14) & 0x7F) | 0x80;
- let u4 = value >> 21;
- self.u32(((u4 << 24) | (u3 << 16) | (u2 << 8) | u1) as u32);
+ } else if value < 0x4000 {
+ // 2 bytes
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (value >> 7) as u8;
+ self.u16(((u2 as u16) << 8) | u1 as u16);
+ } else if value < 0x200000 {
+ // 3 bytes
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (value >> 14) as u8;
+ self.u16(((u2 as u16) << 8) | u1 as u16);
+ self.u8(u3);
+ } else if value < 0x10000000 {
+ // 4 bytes
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (value >> 21) as u8;
+ self.u32(((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32)
<< 8) | u1 as u32);
} else {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = ((value >> 7) & 0x7F) | 0x80;
- let u3 = ((value >> 14) & 0x7F) | 0x80;
- let u4 = ((value >> 21) & 0x7F) | 0x80;
- self.u32(((u4 << 24) | (u3 << 16) | (u2 << 8) | u1) as u32);
- self.u8((value >> 28) as u8);
+ // 5 bytes
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
+ let u5 = (value >> 28) as u8;
+ self.u32(((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32)
<< 8) | u1 as u32);
+ self.u8(u5);
}
}
- pub fn var_uint32(&mut self, value: u32) {
- if value >> 7 == 0 {
+ pub fn var_int64(&mut self, value: i64) {
+ let zigzag = ((value << 1) ^ (value >> 63)) as u64;
+ self.internal_var_uint64(zigzag)
+ }
+
+ pub fn var_uint64(&mut self, value: u64) {
+ self.internal_var_uint64(value)
+ }
+
+ fn internal_var_uint64(&mut self, value: u64) {
+ if value < 0x80 {
self.u8(value as u8);
- } else if value >> 14 == 0 {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = value >> 7;
- self.u16(((u2 << 8) | u1) as u16);
- } else if value >> 21 == 0 {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = ((value >> 7) & 0x7F) | 0x80;
- let u3 = value >> 14;
- self.u8(u1 as u8);
- self.u8(u2 as u8);
- self.u8(u3 as u8);
- } else if value >> 28 == 0 {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = ((value >> 7) & 0x7F) | 0x80;
- let u3 = ((value >> 14) & 0x7F) | 0x80;
- let u4 = value >> 21;
- self.u32((u4 << 24) | (u3 << 16) | (u2 << 8) | u1);
+ } else if value < 0x4000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (value >> 7) as u8;
+ self.u16(((u2 as u16) << 8) | u1 as u16);
+ } else if value < 0x200000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (value >> 14) as u8;
+ self.u16(((u2 as u16) << 8) | u1 as u16);
+ self.u8(u3);
+ } else if value < 0x10000000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (value >> 21) as u8;
+ self.u32(((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32)
<< 8) | u1 as u32);
+ } else if value < 0x800000000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
+ let u5 = (value >> 28) as u8;
+ self.u32(((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32)
<< 8) | u1 as u32);
+ self.u8(u5);
+ } else if value < 0x40000000000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
+ let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
+ let u6 = (value >> 35) as u8;
+ self.u32(((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32)
<< 8) | u1 as u32);
+ self.u16(((u6 as u16) << 8) | u5 as u16);
+ } else if value < 0x2000000000000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
+ let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
+ let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
+ let u7 = (value >> 42) as u8;
+ self.u32(((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32)
<< 8) | u1 as u32);
+ self.u16(((u6 as u16) << 8) | u5 as u16);
+ self.u8(u7);
+ } else if value < 0x100000000000000 {
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
+ let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
+ let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
+ let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
+ let u8 = (value >> 49) as u8;
+ self.u64(
+ (u8 as u64) << 56
+ | (u7 as u64) << 48
+ | (u6 as u64) << 40
+ | (u5 as u64) << 32
+ | (u4 as u64) << 24
+ | (u3 as u64) << 16
+ | (u2 as u64) << 8
+ | (u1 as u64),
+ );
} else {
- let u1 = (value & 0x7F) | 0x80;
- let u2 = ((value >> 7) & 0x7F) | 0x80;
- let u3 = ((value >> 14) & 0x7F) | 0x80;
- let u4 = ((value >> 21) & 0x7F) | 0x80;
- let u5 = value >> 28;
- self.u8(u1 as u8);
- self.u8(u2 as u8);
- self.u8(u3 as u8);
- self.u8(u4 as u8);
- self.u8(u5 as u8);
+ let u1 = ((value as u8) & 0x7F) | 0x80;
+ let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
+ let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
+ let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
+ let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
+ let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
+ let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
+ let u8 = (((value >> 49) as u8) & 0x7F) | 0x80;
+ let u9 = (value >> 56) as u8;
+ self.u64(
+ (u8 as u64) << 56
+ | (u7 as u64) << 48
+ | (u6 as u64) << 40
+ | (u5 as u64) << 32
+ | (u4 as u64) << 24
+ | (u3 as u64) << 16
+ | (u2 as u64) << 8
+ | (u1 as u64),
+ );
+ self.u8(u9);
}
}
- pub fn bytes(&mut self, v: &[u8]) {
+ pub fn var_uint36_small(&mut self, value: u64) {
+ assert!(value < (1u64 << 36), "value too large for 36-bit varint");
+ if value < 0x80 {
+ self.u8(value as u8);
+ } else if value < 0x4000 {
+ let b0 = ((value & 0x7F) as u8) | 0x80;
+ let b1 = (value >> 7) as u8;
+ let combined = ((b1 as u16) << 8) | (b0 as u16);
+ self.u16(combined);
+ } else if value < 0x200000 {
+ let b0 = (value & 0x7F) | 0x80;
+ let b1 = ((value >> 7) & 0x7F) | 0x80;
+ let b2 = value >> 14;
+ let combined = b0 | (b1 << 8) | (b2 << 16);
+ self.u32(combined as u32);
+ } else if value < 0x10000000 {
+ let b0 = (value & 0x7F) | 0x80;
+ let b1 = ((value >> 7) & 0x7F) | 0x80;
+ let b2 = ((value >> 14) & 0x7F) | 0x80;
+ let b3 = value >> 21;
+ let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+ self.u32(combined as u32);
+ } else {
+ let b0 = (value & 0x7F) | 0x80;
+ let b1 = ((value >> 7) & 0x7F) | 0x80;
+ let b2 = ((value >> 14) & 0x7F) | 0x80;
+ let b3 = ((value >> 21) & 0x7F) | 0x80;
+ let b4 = value >> 28;
+ let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 <<
32);
+ self.u64(combined);
+ }
+ }
+
+ pub fn latin1_string(&mut self, s: &str) -> usize {
Review Comment:
This is prettly costly, and it looks strange that the buffer has a
`latin1_string` method.
We should add a convert_to_latin1 method in string serializer and write the
converted string to buffer. And we can get length of encoded latin1 binary.
To reduce memory allocation, we can also cache a buffer in string serializer.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]