This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to tag v0.13.2-rc1
in repository https://gitbox.apache.org/repos/asf/fory.git

commit c3f7c3e444e18d3e4123649ff96075d3e767c1bd
Author: Shawn Yang <[email protected]>
AuthorDate: Wed Nov 5 02:09:27 2025 +0800

    perf(rust): optimize buffer write read perf (#2892)
    
    ## Why?
    
    <!-- Describe the purpose of this PR. -->
    
    ## What does this PR do?
    
    optimize buffer write read perf
    
    ## Related issues
    
    <!--
    Is there any related issue? If this PR closes them you say say
    fix/closes:
    
    - #xxxx0
    - #xxxx1
    - Fixes #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fory/issues/new/choose) describing the
    need to do so and update the document if necessary.
    
    Delete section if not applicable.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    
    Delete section if not applicable.
    -->
---
 rust/benches/Cargo.toml                    |  11 ++
 rust/benches/benches/buffer_read_bench.rs  | 288 +++++++++++++++++++++++++++++
 rust/benches/benches/buffer_write_bench.rs | 266 ++++++++++++++++++++++++++
 rust/fory-core/src/buffer.rs               |  99 ++++++----
 rust/fory-core/src/error.rs                |  69 ++++---
 rust/fory-core/src/serializer/struct_.rs   |  23 +--
 rust/fory-core/src/util.rs                 |   4 +
 rust/fory-derive/src/object/util.rs        |  17 +-
 8 files changed, 689 insertions(+), 88 deletions(-)

diff --git a/rust/benches/Cargo.toml b/rust/benches/Cargo.toml
index 3a23c12f6..cc6b203c8 100644
--- a/rust/benches/Cargo.toml
+++ b/rust/benches/Cargo.toml
@@ -29,6 +29,16 @@ name = "serialization_bench"
 path = "benches/serialization_bench.rs"
 harness = false
 
+[[bench]]
+name = "buffer_write_bench"
+path = "benches/buffer_write_bench.rs"
+harness = false
+
+[[bench]]
+name = "buffer_read_bench"
+path = "benches/buffer_read_bench.rs"
+harness = false
+
 [dependencies]
 fory = { path = "../fory" }
 fory-core = { path = "../fory-core" }
@@ -38,6 +48,7 @@ serde_json = "1.0"
 chrono = { version = "0.4", features = ["serde"] }
 prost = "0.12"
 prost-types = "0.12"
+byteorder = "1.5"
 rand = "0.8"
 criterion = "0.5"
 clap = { version = "4.0", features = ["derive"] }
diff --git a/rust/benches/benches/buffer_read_bench.rs 
b/rust/benches/benches/buffer_read_bench.rs
new file mode 100644
index 000000000..58c623d58
--- /dev/null
+++ b/rust/benches/benches/buffer_read_bench.rs
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion, 
Throughput};
+use fory_core::buffer::{Reader, Writer};
+
+fn prepare_i32_buffer() -> Vec<u8> {
+    let mut buf = Vec::new();
+    let mut writer = Writer::from_buffer(&mut buf);
+    for i in 0..1000 {
+        writer.write_i32(i * 12345);
+    }
+    buf
+}
+
+fn prepare_i64_buffer() -> Vec<u8> {
+    let mut buf = Vec::new();
+    let mut writer = Writer::from_buffer(&mut buf);
+    for i in 0..1000 {
+        writer.write_i64(i * 123456789);
+    }
+    buf
+}
+
+fn prepare_f32_buffer() -> Vec<u8> {
+    let mut buf = Vec::new();
+    let mut writer = Writer::from_buffer(&mut buf);
+    for i in 0..1000 {
+        writer.write_f32(i as f32 * 1.23);
+    }
+    buf
+}
+
+fn prepare_f64_buffer() -> Vec<u8> {
+    let mut buf = Vec::new();
+    let mut writer = Writer::from_buffer(&mut buf);
+    for i in 0..1000 {
+        writer.write_f64(i as f64 * 1.23456);
+    }
+    buf
+}
+
+fn prepare_varint32_buffer(multiplier: i32) -> Vec<u8> {
+    let mut buf = Vec::new();
+    let mut writer = Writer::from_buffer(&mut buf);
+    for i in 0..1000 {
+        writer.write_varint32((i % 1000) * multiplier);
+    }
+    buf
+}
+
+fn prepare_varint64_buffer(multiplier: i64) -> Vec<u8> {
+    let mut buf = Vec::new();
+    let mut writer = Writer::from_buffer(&mut buf);
+    for i in 0..1000 {
+        writer.write_varint64((i % 1000) * multiplier);
+    }
+    buf
+}
+
+fn bench_read_i32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_i32");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_i32_buffer();
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum += reader.read_i32().unwrap() as i64;
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_i64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_i64");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_i64_buffer();
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum = sum.wrapping_add(reader.read_i64().unwrap());
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_f32");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_f32_buffer();
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0.0f32;
+            for _ in 0..1000 {
+                sum += reader.read_f32().unwrap();
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_f64");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_f64_buffer();
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0.0f64;
+            for _ in 0..1000 {
+                sum += reader.read_f64().unwrap();
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_varint32_small(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_varint32_small");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_varint32_buffer(1); // Small values (1 byte)
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum += reader.read_varint32().unwrap() as i64;
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_varint32_medium(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_varint32_medium");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_varint32_buffer(1000); // Medium values (2-3 bytes)
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum += reader.read_varint32().unwrap() as i64;
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_varint32_large(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_varint32_large");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_varint32_buffer(1000000); // Large values (4-5 bytes)
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum += reader.read_varint32().unwrap() as i64;
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_varint64_small(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_varint64_small");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_varint64_buffer(1); // Small values (1 byte)
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum = sum.wrapping_add(reader.read_varint64().unwrap());
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_varint64_medium(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_varint64_medium");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_varint64_buffer(1000000); // Medium values (3-4 bytes)
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum = sum.wrapping_add(reader.read_varint64().unwrap());
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_read_varint64_large(c: &mut Criterion) {
+    let mut group = c.benchmark_group("read_varint64_large");
+    group.throughput(Throughput::Elements(1000));
+
+    let buf = prepare_varint64_buffer(1000000000000); // Large values (6-9 
bytes)
+
+    group.bench_function("current", |b| {
+        b.iter(|| {
+            let mut reader = Reader::new(&buf);
+            let mut sum = 0i64;
+            for _ in 0..1000 {
+                sum = sum.wrapping_add(reader.read_varint64().unwrap());
+            }
+            black_box(sum);
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_read_i32,
+    bench_read_i64,
+    bench_read_f32,
+    bench_read_f64,
+    bench_read_varint32_small,
+    bench_read_varint32_medium,
+    bench_read_varint32_large,
+    bench_read_varint64_small,
+    bench_read_varint64_medium,
+    bench_read_varint64_large
+);
+criterion_main!(benches);
diff --git a/rust/benches/benches/buffer_write_bench.rs 
b/rust/benches/benches/buffer_write_bench.rs
new file mode 100644
index 000000000..fd48e6083
--- /dev/null
+++ b/rust/benches/benches/buffer_write_bench.rs
@@ -0,0 +1,266 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion, 
Throughput};
+use fory_core::buffer::Writer;
+
+fn bench_write_u8(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_u8");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(10000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_u8(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_i32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_i32");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i32> = (0..1000).map(|i| i * 12345).collect();
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(4000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_i32(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_i64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_i64");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i64> = (0..1000).map(|i| i * 123456789).collect();
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(8000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_i64(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_f32");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<f32> = (0..1000).map(|i| i as f32 * 1.23).collect();
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(4000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_f32(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_f64");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<f64> = (0..1000).map(|i| i as f64 * 1.23456).collect();
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(8000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_f64(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_varint32_small(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_varint32_small");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i32> = (0..1000).map(|i| i % 128).collect(); // Small 
values (1 byte)
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(2000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_varint32(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_varint32_medium(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_varint32_medium");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i32> = (0..1000).map(|i| i * 1000).collect(); // Medium 
values (2-3 bytes)
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(3000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_varint32(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_varint32_large(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_varint32_large");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i32> = (0..1000).map(|i| i * 1000000).collect(); // Large 
values (4-5 bytes)
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(5000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_varint32(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_varint64_small(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_varint64_small");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i64> = (0..1000).map(|i| i % 128).collect(); // Small 
values (1 byte)
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(2000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_varint64(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_varint64_medium(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_varint64_medium");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i64> = (0..1000).map(|i| i * 1000000).collect(); // Medium 
values (3-4 bytes)
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(5000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_varint64(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_write_varint64_large(c: &mut Criterion) {
+    let mut group = c.benchmark_group("write_varint64_large");
+    group.throughput(Throughput::Elements(1000));
+
+    let values: Vec<i64> = (0..1000).map(|i| i as i64 * 
1000000000000).collect(); // Large values (6-9 bytes)
+
+    group.bench_function("current", |b| {
+        let mut buf = Vec::with_capacity(10000);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = Writer::from_buffer(&mut buf);
+            for &val in &values {
+                writer.write_varint64(black_box(val));
+            }
+            black_box(&buf);
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_write_u8,
+    bench_write_i32,
+    bench_write_i64,
+    bench_write_f32,
+    bench_write_f64,
+    bench_write_varint32_small,
+    bench_write_varint32_medium,
+    bench_write_varint32_large,
+    bench_write_varint64_small,
+    bench_write_varint64_medium,
+    bench_write_varint64_large
+);
+criterion_main!(benches);
diff --git a/rust/fory-core/src/buffer.rs b/rust/fory-core/src/buffer.rs
index 6d214710b..1141d035c 100644
--- a/rust/fory-core/src/buffer.rs
+++ b/rust/fory-core/src/buffer.rs
@@ -17,7 +17,7 @@
 
 use crate::error::Error;
 use crate::meta::buffer_rw_string::read_latin1_simd;
-use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
+use byteorder::{ByteOrder, LittleEndian};
 use std::cmp::max;
 
 /// Threshold for using SIMD optimizations in string operations.
@@ -81,62 +81,102 @@ impl<'a> Writer<'a> {
 
     #[inline(always)]
     pub fn write_bool(&mut self, value: bool) {
-        self.bf.write_u8(if value { 1 } else { 0 }).unwrap();
+        self.bf.push(if value { 1 } else { 0 });
     }
 
     #[inline(always)]
     pub fn write_u8(&mut self, value: u8) {
-        self.bf.write_u8(value).unwrap();
+        self.bf.push(value);
     }
 
     #[inline(always)]
     pub fn write_i8(&mut self, value: i8) {
-        self.bf.write_i8(value).unwrap();
+        self.bf.push(value as u8);
     }
 
     #[inline(always)]
     pub fn write_u16(&mut self, value: u16) {
-        self.bf.write_u16::<LittleEndian>(value).unwrap();
+        #[cfg(target_endian = "little")]
+        {
+            let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) };
+            self.bf.extend_from_slice(bytes);
+        }
+        #[cfg(target_endian = "big")]
+        {
+            self.bf.extend_from_slice(&value.to_le_bytes());
+        }
     }
 
     #[inline(always)]
     pub fn write_i16(&mut self, value: i16) {
-        self.bf.write_i16::<LittleEndian>(value).unwrap();
+        self.write_u16(value as u16);
     }
 
     #[inline(always)]
     pub fn write_u32(&mut self, value: u32) {
-        self.bf.write_u32::<LittleEndian>(value).unwrap();
+        #[cfg(target_endian = "little")]
+        {
+            let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) };
+            self.bf.extend_from_slice(bytes);
+        }
+        #[cfg(target_endian = "big")]
+        {
+            self.bf.extend_from_slice(&value.to_le_bytes());
+        }
     }
 
     #[inline(always)]
     pub fn write_i32(&mut self, value: i32) {
-        self.bf.write_i32::<LittleEndian>(value).unwrap();
+        self.write_u32(value as u32);
     }
 
     #[inline(always)]
     pub fn write_f32(&mut self, value: f32) {
-        self.bf.write_f32::<LittleEndian>(value).unwrap();
+        #[cfg(target_endian = "little")]
+        {
+            let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) };
+            self.bf.extend_from_slice(bytes);
+        }
+        #[cfg(target_endian = "big")]
+        {
+            self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
+        }
     }
 
     #[inline(always)]
     pub fn write_i64(&mut self, value: i64) {
-        self.bf.write_i64::<LittleEndian>(value).unwrap();
+        self.write_u64(value as u64);
     }
 
     #[inline(always)]
     pub fn write_f64(&mut self, value: f64) {
-        self.bf.write_f64::<LittleEndian>(value).unwrap();
+        #[cfg(target_endian = "little")]
+        {
+            let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) };
+            self.bf.extend_from_slice(bytes);
+        }
+        #[cfg(target_endian = "big")]
+        {
+            self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
+        }
     }
 
     #[inline(always)]
     pub fn write_u64(&mut self, value: u64) {
-        self.bf.write_u64::<LittleEndian>(value).unwrap();
+        #[cfg(target_endian = "little")]
+        {
+            let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) };
+            self.bf.extend_from_slice(bytes);
+        }
+        #[cfg(target_endian = "big")]
+        {
+            self.bf.extend_from_slice(&value.to_le_bytes());
+        }
     }
 
     #[inline(always)]
     pub fn write_usize(&mut self, value: usize) {
-        self.bf.write_u64::<LittleEndian>(value as u64).unwrap();
+        self.write_u64(value as u64);
     }
 
     #[inline(always)]
@@ -153,7 +193,7 @@ impl<'a> Writer<'a> {
     #[inline(always)]
     fn _write_varuint32(&mut self, value: u32) {
         if value < 0x80 {
-            self.write_u8(value as u8);
+            self.bf.push(value as u8);
         } else if value < 0x4000 {
             // 2 bytes
             let u1 = ((value as u8) & 0x7F) | 0x80;
@@ -165,7 +205,7 @@ impl<'a> Writer<'a> {
             let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
             let u3 = (value >> 14) as u8;
             self.write_u16(((u2 as u16) << 8) | u1 as u16);
-            self.write_u8(u3);
+            self.bf.push(u3);
         } else if value < 0x10000000 {
             // 4 bytes
             let u1 = ((value as u8) & 0x7F) | 0x80;
@@ -185,7 +225,7 @@ impl<'a> Writer<'a> {
             self.write_u32(
                 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) 
| u1 as u32,
             );
-            self.write_u8(u5);
+            self.bf.push(u5);
         }
     }
 
@@ -203,7 +243,7 @@ impl<'a> Writer<'a> {
     #[inline(always)]
     fn _write_varuint64(&mut self, value: u64) {
         if value < 0x80 {
-            self.write_u8(value as u8);
+            self.bf.push(value as u8);
         } else if value < 0x4000 {
             let u1 = ((value as u8) & 0x7F) | 0x80;
             let u2 = (value >> 7) as u8;
@@ -213,7 +253,7 @@ impl<'a> Writer<'a> {
             let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
             let u3 = (value >> 14) as u8;
             self.write_u16(((u2 as u16) << 8) | u1 as u16);
-            self.write_u8(u3);
+            self.bf.push(u3);
         } else if value < 0x10000000 {
             let u1 = ((value as u8) & 0x7F) | 0x80;
             let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
@@ -231,7 +271,7 @@ impl<'a> Writer<'a> {
             self.write_u32(
                 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) 
| u1 as u32,
             );
-            self.write_u8(u5);
+            self.bf.push(u5);
         } else if value < 0x40000000000 {
             let u1 = ((value as u8) & 0x7F) | 0x80;
             let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
@@ -255,7 +295,7 @@ impl<'a> Writer<'a> {
                 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) 
| u1 as u32,
             );
             self.write_u16(((u6 as u16) << 8) | u5 as u16);
-            self.write_u8(u7);
+            self.bf.push(u7);
         } else if value < 0x100000000000000 {
             let u1 = ((value as u8) & 0x7F) | 0x80;
             let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
@@ -295,7 +335,7 @@ impl<'a> Writer<'a> {
                     | (u2 as u64) << 8
                     | (u1 as u64),
             );
-            self.write_u8(u9);
+            self.bf.push(u9);
         }
     }
 
@@ -303,7 +343,7 @@ impl<'a> Writer<'a> {
     pub fn write_varuint36_small(&mut self, value: u64) {
         assert!(value < (1u64 << 36), "value too large for 36-bit varint");
         if value < 0x80 {
-            self.write_u8(value as u8);
+            self.bf.push(value as u8);
         } else if value < 0x4000 {
             let b0 = ((value & 0x7F) as u8) | 0x80;
             let b1 = (value >> 7) as u8;
@@ -448,7 +488,7 @@ impl<'a> Reader<'a> {
     pub fn read_u16(&mut self) -> Result<u16, Error> {
         let slice = self.slice_after_cursor();
         let result = LittleEndian::read_u16(slice);
-        self.move_next(2);
+        self.cursor += 2;
         Ok(result)
     }
 
@@ -461,7 +501,7 @@ impl<'a> Reader<'a> {
     pub fn read_u32(&mut self) -> Result<u32, Error> {
         let slice = self.slice_after_cursor();
         let result = LittleEndian::read_u32(slice);
-        self.move_next(4);
+        self.cursor += 4;
         Ok(result)
     }
 
@@ -474,16 +514,13 @@ impl<'a> Reader<'a> {
     pub fn read_u64(&mut self) -> Result<u64, Error> {
         let slice = self.slice_after_cursor();
         let result = LittleEndian::read_u64(slice);
-        self.move_next(8);
+        self.cursor += 8;
         Ok(result)
     }
 
     #[inline(always)]
     pub fn read_usize(&mut self) -> Result<usize, Error> {
-        let slice = self.slice_after_cursor();
-        let result = LittleEndian::read_u64(slice);
-        self.move_next(8);
-        Ok(result as usize)
+        Ok(self.read_u64()? as usize)
     }
 
     #[inline(always)]
@@ -495,7 +532,7 @@ impl<'a> Reader<'a> {
     pub fn read_f32(&mut self) -> Result<f32, Error> {
         let slice = self.slice_after_cursor();
         let result = LittleEndian::read_f32(slice);
-        self.move_next(4);
+        self.cursor += 4;
         Ok(result)
     }
 
@@ -503,7 +540,7 @@ impl<'a> Reader<'a> {
     pub fn read_f64(&mut self) -> Result<f64, Error> {
         let slice = self.slice_after_cursor();
         let result = LittleEndian::read_f64(slice);
-        self.move_next(8);
+        self.cursor += 8;
         Ok(result)
     }
 
diff --git a/rust/fory-core/src/error.rs b/rust/fory-core/src/error.rs
index 0218b9047..12c119364 100644
--- a/rust/fory-core/src/error.rs
+++ b/rust/fory-core/src/error.rs
@@ -15,22 +15,30 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! # PERFORMANCE CRITICAL MODULE
+//!
+//! **WARNING**: This module is highly performance-sensitive. Changes to error
+//! constructor attributes (`#[inline]`, `#[cold]`, `#[track_caller]`) can
+//! impact serialization/deserialization performance throughout the entire 
codebase.
+//!
+//! ## Why This Module Is Performance Critical
+//!
+//! Error constructors are called in **every** buffer read/write operation and 
type check.
+//! Even though these functions are rarely executed (error paths), their mere 
presence and
+//! inlining behavior affects how LLVM optimizes the **hot paths** (successful 
operations).
+
 use std::borrow::Cow;
-use std::sync::OnceLock;
 
 use thiserror::Error;
 
-/// Global flag to check if FORY_PANIC_ON_ERROR environment variable is set.
-static PANIC_ON_ERROR: OnceLock<bool> = OnceLock::new();
+/// Global flag to check if FORY_PANIC_ON_ERROR environment variable is set at 
compile time.
+/// Set FORY_PANIC_ON_ERROR=1 at compile time to enable panic on error.
+pub const PANIC_ON_ERROR: bool = option_env!("FORY_PANIC_ON_ERROR").is_some();
 
 /// Check if FORY_PANIC_ON_ERROR environment variable is set.
-#[inline]
-pub fn should_panic_on_error() -> bool {
-    *PANIC_ON_ERROR.get_or_init(|| {
-        std::env::var("FORY_PANIC_ON_ERROR")
-            .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
-            .unwrap_or(false)
-    })
+#[inline(always)]
+pub const fn should_panic_on_error() -> bool {
+    PANIC_ON_ERROR
 }
 
 /// Error type for Fory serialization and deserialization operations.
@@ -188,10 +196,11 @@ impl Error {
     /// let err = Error::type_mismatch(1, 2);
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn type_mismatch(type_a: u32, type_b: u32) -> Self {
         let err = Error::TypeMismatch(type_a, type_b);
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -208,10 +217,11 @@ impl Error {
     /// let err = Error::buffer_out_of_bound(10, 20, 25);
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn buffer_out_of_bound(offset: usize, length: usize, capacity: usize) 
-> Self {
         let err = Error::BufferOutOfBound(offset, length, capacity);
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -229,10 +239,11 @@ impl Error {
     /// let err = Error::encode_error(format!("Failed to encode field {}", 
"name"));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn encode_error<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::EncodeError(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -250,10 +261,11 @@ impl Error {
     /// let err = Error::invalid_data(format!("Invalid data at position {}", 
42));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn invalid_data<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::InvalidData(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -271,10 +283,11 @@ impl Error {
     /// let err = Error::invalid_ref(format!("Invalid ref id {}", 123));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn invalid_ref<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::InvalidRef(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -292,10 +305,11 @@ impl Error {
     /// let err = Error::unknown_enum(format!("Unknown variant {}", 5));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn unknown_enum<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::UnknownEnum(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -313,10 +327,11 @@ impl Error {
     /// let err = Error::type_error(format!("Expected type {}", "String"));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn type_error<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::TypeError(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -334,10 +349,11 @@ impl Error {
     /// let err = Error::encoding_error(format!("Failed to encode as {}", 
"UTF-8"));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn encoding_error<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::EncodingError(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -355,10 +371,11 @@ impl Error {
     /// let err = Error::depth_exceed(format!("Depth {} exceeds max {}", 100, 
64));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn depth_exceed<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::DepthExceed(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -376,10 +393,11 @@ impl Error {
     /// let err = Error::unsupported(format!("Type {} not supported", 
"MyType"));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn unsupported<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::Uunsupported(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -397,10 +415,11 @@ impl Error {
     /// let err = Error::not_allowed(format!("Cannot perform {}", "delete"));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn not_allowed<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::NotAllowed(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -418,10 +437,11 @@ impl Error {
     /// let err = Error::struct_version_mismatch(format!("Class {} version 
mismatch", "Foo"));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn struct_version_mismatch<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::StructVersionMismatch(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -443,10 +463,11 @@ impl Error {
     /// let err = Error::unknown(format!("ID:{} not found", 1));
     /// ```
     #[inline(always)]
+    #[cold]
     #[track_caller]
     pub fn unknown<S: Into<Cow<'static, str>>>(s: S) -> Self {
         let err = Error::Unknown(s.into());
-        if should_panic_on_error() {
+        if PANIC_ON_ERROR {
             panic!("FORY_PANIC_ON_ERROR: {}", err);
         }
         err
@@ -465,7 +486,7 @@ impl Error {
     /// let enhanced = Error::enhance_type_error::<String>(err);
     /// // Result: "Type not registered (type: alloc::string::String)"
     /// ```
-    #[inline(always)]
+    #[inline(never)]
     pub fn enhance_type_error<T: ?Sized + 'static>(err: Error) -> Error {
         if let Error::TypeError(s) = err {
             let mut msg = s.to_string();
diff --git a/rust/fory-core/src/serializer/struct_.rs 
b/rust/fory-core/src/serializer/struct_.rs
index cebeb39ad..68289d48c 100644
--- a/rust/fory-core/src/serializer/struct_.rs
+++ b/rust/fory-core/src/serializer/struct_.rs
@@ -20,8 +20,8 @@ use crate::error::Error;
 use crate::resolver::context::{ReadContext, WriteContext};
 use crate::serializer::Serializer;
 use crate::types::{RefFlag, TypeId};
+use crate::util::ENABLE_FORY_DEBUG_OUTPUT;
 use std::any::Any;
-use std::sync::OnceLock;
 
 #[inline(always)]
 pub fn actual_type_id(type_id: u32, register_by_name: bool, compatible: bool) 
-> u32 {
@@ -104,19 +104,6 @@ pub fn write<T: Serializer>(
     this.fory_write_data(context)
 }
 
-/// Global flag to check if ENABLE_FORY_DEBUG_OUTPUT environment variable is 
set.
-static ENABLE_FORY_DEBUG_OUTPUT: OnceLock<bool> = OnceLock::new();
-
-/// Check if ENABLE_FORY_DEBUG_OUTPUT environment variable is set.
-#[inline]
-fn enable_debug_output() -> bool {
-    *ENABLE_FORY_DEBUG_OUTPUT.get_or_init(|| {
-        std::env::var("ENABLE_FORY_DEBUG_OUTPUT")
-            .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
-            .unwrap_or(false)
-    })
-}
-
 pub type BeforeWriteFieldFunc =
     fn(struct_name: &str, field_name: &str, field_value: &dyn Any, context: 
&mut WriteContext);
 pub type AfterWriteFieldFunc =
@@ -131,7 +118,7 @@ fn default_before_write_field(
     _field_value: &dyn Any,
     context: &mut WriteContext,
 ) {
-    if enable_debug_output() {
+    if ENABLE_FORY_DEBUG_OUTPUT {
         println!(
             
"before_write_field:\tstruct={struct_name},\tfield={field_name},\twriter_len={}",
             context.writer.len()
@@ -145,7 +132,7 @@ fn default_after_write_field(
     _field_value: &dyn Any,
     context: &mut WriteContext,
 ) {
-    if enable_debug_output() {
+    if ENABLE_FORY_DEBUG_OUTPUT {
         println!(
             
"after_write_field:\tstruct={struct_name},\tfield={field_name},\twriter_len={}",
             context.writer.len()
@@ -154,7 +141,7 @@ fn default_after_write_field(
 }
 
 fn default_before_read_field(struct_name: &str, field_name: &str, context: 
&mut ReadContext) {
-    if enable_debug_output() {
+    if ENABLE_FORY_DEBUG_OUTPUT {
         println!(
             
"before_read_field:\tstruct={struct_name},\tfield={field_name},\treader_cursor={}",
             context.reader.get_cursor()
@@ -168,7 +155,7 @@ fn default_after_read_field(
     _field_value: &dyn Any,
     context: &mut ReadContext,
 ) {
-    if enable_debug_output() {
+    if ENABLE_FORY_DEBUG_OUTPUT {
         println!(
             
"after_read_field:\tstruct={struct_name},\tfield={field_name},\treader_cursor={}",
             context.reader.get_cursor()
diff --git a/rust/fory-core/src/util.rs b/rust/fory-core/src/util.rs
index 589a2f77d..1851e859a 100644
--- a/rust/fory-core/src/util.rs
+++ b/rust/fory-core/src/util.rs
@@ -198,3 +198,7 @@ impl<'a, T> DerefMut for SpinlockGuard<'a, T> {
         unsafe { &mut *self.lock.data.get() }
     }
 }
+
+/// Global flag to check if ENABLE_FORY_DEBUG_OUTPUT environment variable is 
set at compile time.
+/// Set ENABLE_FORY_DEBUG_OUTPUT=1 at compile time to enable debug output.
+pub const ENABLE_FORY_DEBUG_OUTPUT: bool = 
option_env!("ENABLE_FORY_DEBUG_OUTPUT").is_some();
diff --git a/rust/fory-derive/src/object/util.rs 
b/rust/fory-derive/src/object/util.rs
index b9d8bd2e8..8d567adc0 100644
--- a/rust/fory-derive/src/object/util.rs
+++ b/rust/fory-derive/src/object/util.rs
@@ -20,12 +20,12 @@ use crate::util::{
     CollectionTraitInfo,
 };
 use fory_core::types::{TypeId, PRIMITIVE_ARRAY_TYPE_MAP};
+use fory_core::util::ENABLE_FORY_DEBUG_OUTPUT;
 use proc_macro2::{Ident, TokenStream};
 use quote::{format_ident, quote, ToTokens};
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::fmt;
-use std::sync::OnceLock;
 use syn::{Field, GenericArgument, PathArguments, Type};
 
 thread_local! {
@@ -57,19 +57,6 @@ pub(super) fn get_struct_name() -> Option<String> {
     MACRO_CONTEXT.with(|ctx| ctx.borrow().as_ref().map(|c| 
c.struct_name.clone()))
 }
 
-/// Global flag to check if ENABLE_FORY_DEBUG_OUTPUT environment variable is 
set.
-static ENABLE_FORY_DEBUG_OUTPUT: OnceLock<bool> = OnceLock::new();
-
-/// Check if ENABLE_FORY_DEBUG_OUTPUT environment variable is set.
-#[inline]
-fn enable_debug_output() -> bool {
-    *ENABLE_FORY_DEBUG_OUTPUT.get_or_init(|| {
-        std::env::var("ENABLE_FORY_DEBUG_OUTPUT")
-            .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
-            .unwrap_or(false)
-    })
-}
-
 pub(super) fn is_debug_enabled() -> bool {
     MACRO_CONTEXT.with(|ctx| {
         ctx.borrow()
@@ -994,7 +981,7 @@ pub(crate) fn compute_struct_version_hash(fields: 
&[&Field]) -> i32 {
     let version = (hash & 0xFFFF_FFFF) as u32;
     let version = version as i32;
 
-    if enable_debug_output() {
+    if ENABLE_FORY_DEBUG_OUTPUT {
         if let Some(struct_name) = get_struct_name() {
             println!(
                 "[fory-debug] struct {struct_name} version 
fingerprint=\"{fingerprint}\" hash={version}"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to