This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 456aec726 feat(xlang): use little endian when serializing array of 
multiple byte element size (#3140)
456aec726 is described below

commit 456aec7261d7471cdf2d770faabbc18767ea5bd7
Author: Shawn Yang <[email protected]>
AuthorDate: Tue Jan 13 16:51:44 2026 +0800

    feat(xlang): use little endian when serializing array of multiple byte 
element size (#3140)
    
    ## Why?
    
    
    
    ## What does this PR do?
    
    - use little endian when serializing array of multiple byte element size
    - remove endian flat from protocol header
    
    
    ## Related issues
    
    
    
    ## Does this PR introduce any user-facing change?
    
    
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
---
 .github/workflows/ci.yml                           |   6 +-
 cpp/fory/serialization/array_serializer.h          |  41 +++-
 cpp/fory/serialization/fory.h                      |  13 +-
 cpp/fory/serialization/serializer.h                |   8 +-
 dart/packages/fory/lib/src/config/fory_config.dart |   4 -
 .../fory/lib/src/const/fory_header_const.dart      |   5 +-
 dart/packages/fory/lib/src/fory_impl.dart          |   2 -
 .../fory/lib/src/manager/fory_config_manager.dart  |   2 -
 .../fory/lib/src/serializer/array_serializer.dart  |  38 +++-
 .../lib/src/serializer/fory_header_serializer.dart |  10 +-
 .../serializer/typed_data_array_serializer.dart    | 111 ++++++++--
 docs/specification/xlang_serialization_spec.md     |  20 +-
 go/fory/fory.go                                    |  11 +-
 .../src/main/java/org/apache/fory/Fory.java        |  13 +-
 .../java/org/apache/fory/memory/MemoryBuffer.java  | 115 ++++++----
 .../apache/fory/serializer/ArraySerializers.java   | 234 +++++++++++++++++++--
 .../org/apache/fory/memory/MemoryBufferTest.java   |   4 -
 .../org/apache/fory/xlang/PyCrossLanguageTest.java |  24 +--
 javascript/packages/fory/lib/fory.ts               |   5 -
 javascript/packages/fory/lib/type.ts               |   5 +-
 javascript/test/fory.test.ts                       |  20 +-
 python/pyfory/_fory.py                             |  21 +-
 python/pyfory/buffer.pyx                           |   4 +
 python/pyfory/serialization.pyx                    |  23 +-
 python/pyfory/serializer.py                        |  58 ++++-
 rust/fory-core/src/fory.rs                         |  19 +-
 rust/fory-core/src/serializer/primitive_list.rs    |  43 +++-
 rust/fory-core/src/types.rs                        |   5 +-
 28 files changed, 609 insertions(+), 255 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 42f8607e2..cc37b856c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -72,8 +72,10 @@ jobs:
         uses: actions/upload-artifact@v4
         if: ${{ !cancelled() }}
         with:
-          name: surefire-reports-${{ matrix.java-version }}
-          path: "**/target/surefire-reports/**"
+          name: test-reports-${{ matrix.java-version }}
+          path: |
+            **/target/surefire-reports/**
+            **/hs_err*.log
 
   openj9:
     name: Openj9 Java CI
diff --git a/cpp/fory/serialization/array_serializer.h 
b/cpp/fory/serialization/array_serializer.h
index a5be9925a..1d54bc56e 100644
--- a/cpp/fory/serialization/array_serializer.h
+++ b/cpp/fory/serialization/array_serializer.h
@@ -20,6 +20,7 @@
 #pragma once
 
 #include "fory/serialization/serializer.h"
+#include "fory/util/bit_util.h"
 #include <array>
 
 namespace fory {
@@ -91,12 +92,23 @@ struct Serializer<
     constexpr size_t max_size = 8 + N * sizeof(T);
     buffer.Grow(static_cast<uint32_t>(max_size));
     uint32_t writer_index = buffer.writer_index();
-    // Write array length
-    writer_index += buffer.PutVarUint32(writer_index, 
static_cast<uint32_t>(N));
+    // Write array length in bytes
+    writer_index +=
+        buffer.PutVarUint32(writer_index, static_cast<uint32_t>(N * 
sizeof(T)));
 
-    // Write raw binary data
+    // Write data
     if constexpr (N > 0) {
-      buffer.UnsafePut(writer_index, arr.data(), N * sizeof(T));
+      if constexpr (FORY_LITTLE_ENDIAN || sizeof(T) == 1) {
+        // Fast path: direct memory copy on little-endian or for single-byte
+        // types
+        buffer.UnsafePut(writer_index, arr.data(), N * sizeof(T));
+      } else {
+        // Slow path: element-by-element write on big-endian machines
+        for (size_t i = 0; i < N; ++i) {
+          T value = util::ToLittleEndian(arr[i]);
+          buffer.UnsafePut(writer_index + i * sizeof(T), &value, sizeof(T));
+        }
+      }
     }
     buffer.WriterIndex(writer_index + N * sizeof(T));
   }
@@ -127,12 +139,13 @@ struct Serializer<
   }
 
   static inline std::array<T, N> read_data(ReadContext &ctx) {
-    // Read array length
-    uint32_t length = ctx.read_varuint32(ctx.error());
+    // Read array length in bytes
+    uint32_t size_bytes = ctx.read_varuint32(ctx.error());
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::array<T, N>();
     }
 
+    uint32_t length = size_bytes / sizeof(T);
     if (length != N) {
       ctx.set_error(Error::invalid_data("Array size mismatch: expected " +
                                         std::to_string(N) + " but got " +
@@ -142,7 +155,21 @@ struct Serializer<
 
     std::array<T, N> arr;
     if constexpr (N > 0) {
-      ctx.read_bytes(arr.data(), N * sizeof(T), ctx.error());
+      if constexpr (FORY_LITTLE_ENDIAN || sizeof(T) == 1) {
+        // Fast path: direct memory copy on little-endian or for single-byte
+        // types
+        ctx.read_bytes(arr.data(), N * sizeof(T), ctx.error());
+      } else {
+        // Slow path: element-by-element read on big-endian machines
+        for (size_t i = 0; i < N; ++i) {
+          T value;
+          ctx.read_bytes(&value, sizeof(T), ctx.error());
+          if (FORY_PREDICT_FALSE(ctx.has_error())) {
+            return arr;
+          }
+          arr[i] = util::ToLittleEndian(value); // ToLittleEndian swaps on BE
+        }
+      }
     }
     return arr;
   }
diff --git a/cpp/fory/serialization/fory.h b/cpp/fory/serialization/fory.h
index 0637c9697..6f6855508 100644
--- a/cpp/fory/serialization/fory.h
+++ b/cpp/fory/serialization/fory.h
@@ -485,10 +485,6 @@ public:
     if (header.is_null) {
       return Unexpected(Error::invalid_data("Cannot deserialize null object"));
     }
-    if (header.is_little_endian != is_little_endian_system()) {
-      return Unexpected(
-          Error::unsupported("Cross-endian deserialization not yet 
supported"));
-    }
 
     read_ctx_->attach(buffer);
     ReadContextGuard guard(*read_ctx_);
@@ -526,10 +522,6 @@ public:
     if (header.is_null) {
       return Unexpected(Error::invalid_data("Cannot deserialize null object"));
     }
-    if (header.is_little_endian != is_little_endian_system()) {
-      return Unexpected(
-          Error::unsupported("Cross-endian deserialization not yet 
supported"));
-    }
 
     read_ctx_->attach(buffer);
     ReadContextGuard guard(*read_ctx_);
@@ -593,11 +585,8 @@ private:
     uint16_t header = 0;
     // Flags byte at position 0
     uint8_t flags = 0;
-    if (is_little_endian_system()) {
-      flags |= (1 << 1); // bit 1: endian flag
-    }
     if (xlang) {
-      flags |= (1 << 2); // bit 2: xlang flag
+      flags |= (1 << 1); // bit 1: xlang flag
     }
     header |= flags;
     // Language byte at position 1 (only used if xlang)
diff --git a/cpp/fory/serialization/serializer.h 
b/cpp/fory/serialization/serializer.h
index ae6c4f24e..16b9c67b2 100644
--- a/cpp/fory/serialization/serializer.h
+++ b/cpp/fory/serialization/serializer.h
@@ -88,7 +88,6 @@ inline bool is_little_endian_system() {
 /// Fory header information
 struct HeaderInfo {
   bool is_null;
-  bool is_little_endian;
   bool is_xlang;
   bool is_oob;
   Language language;
@@ -112,14 +111,13 @@ inline Result<HeaderInfo, Error> read_header(Buffer 
&buffer) {
   // Read flags byte
   uint8_t flags = buffer.GetByteAs<uint8_t>(start_pos);
   info.is_null = (flags & (1 << 0)) != 0;
-  info.is_little_endian = (flags & (1 << 1)) != 0;
-  info.is_xlang = (flags & (1 << 2)) != 0;
-  info.is_oob = (flags & (1 << 3)) != 0;
+  info.is_xlang = (flags & (1 << 1)) != 0;
+  info.is_oob = (flags & (1 << 2)) != 0;
 
   // Update reader index (1 byte consumed: flags)
   buffer.IncreaseReaderIndex(1);
 
-  // Java writes a language byte after header in xlang mode - read and ignore 
it
+  // Language byte after header in xlang mode
   if (info.is_xlang) {
     Error error;
     uint8_t lang_byte = buffer.ReadUint8(error);
diff --git a/dart/packages/fory/lib/src/config/fory_config.dart 
b/dart/packages/fory/lib/src/config/fory_config.dart
index 5a319f1f8..2c1385fc8 100644
--- a/dart/packages/fory/lib/src/config/fory_config.dart
+++ b/dart/packages/fory/lib/src/config/fory_config.dart
@@ -21,7 +21,6 @@ import 'package:fory/src/config/config.dart';
 
 class ForyConfig extends Config{
   final int  _configId;
-  //final bool _isLittleEndian;
   final bool _refTracking;
   final bool _basicTypesRefIgnored;
   final bool _timeRefIgnored;
@@ -29,7 +28,6 @@ class ForyConfig extends Config{
 
   ForyConfig.onlyForManager(
     this._configId, {
-    bool isLittleEndian = true,
     bool refTracking = true,
     bool basicTypesRefIgnored = true,
     bool timeRefIgnored = true,
@@ -42,11 +40,9 @@ class ForyConfig extends Config{
   {
     // some checking works
     // assert(_xlangMode == true, 'currently only support xlang mode');
-    //assert(_isLittleEndian == true, 'Non-Little-Endian format detected. Only 
Little-Endian is supported.');
   }
 
   //getters
-  //bool get isLittleEndian => _isLittleEndian;
   bool get refTracking => _refTracking;
   int get configId => _configId;
   bool get basicTypesRefIgnored => _basicTypesRefIgnored;
diff --git a/dart/packages/fory/lib/src/const/fory_header_const.dart 
b/dart/packages/fory/lib/src/const/fory_header_const.dart
index 36758952d..60729799e 100644
--- a/dart/packages/fory/lib/src/const/fory_header_const.dart
+++ b/dart/packages/fory/lib/src/const/fory_header_const.dart
@@ -19,7 +19,6 @@
 
 class ForyHeaderConst{
   static const int nullFlag = 1;
-  static const int littleEndianFlag = 1 << 1;
-  static const int crossLanguageFlag = 1 << 2;
-  static const int outOfBandFlag = 1 << 3;
+  static const int crossLanguageFlag = 1 << 1;
+  static const int outOfBandFlag = 1 << 2;
 }
\ No newline at end of file
diff --git a/dart/packages/fory/lib/src/fory_impl.dart 
b/dart/packages/fory/lib/src/fory_impl.dart
index 41617731d..016d3f214 100644
--- a/dart/packages/fory/lib/src/fory_impl.dart
+++ b/dart/packages/fory/lib/src/fory_impl.dart
@@ -40,13 +40,11 @@ final class Fory implements BaseFory{
   late final XtypeResolver _xtypeResolver;
 
   Fory({
-    bool isLittleEndian = true,
     bool refTracking = true,
     bool basicTypesRefIgnored = true,
     bool timeRefIgnored = true,
     // bool stringRefIgnored = true,
   }) : _conf = ForyConfigManager.inst.createConfig(
-    isLittleEndian: isLittleEndian,
     refTracking: refTracking,
     basicTypesRefIgnored: basicTypesRefIgnored,
     timeRefIgnored: timeRefIgnored,
diff --git a/dart/packages/fory/lib/src/manager/fory_config_manager.dart 
b/dart/packages/fory/lib/src/manager/fory_config_manager.dart
index 58eaaccbf..5166677fa 100644
--- a/dart/packages/fory/lib/src/manager/fory_config_manager.dart
+++ b/dart/packages/fory/lib/src/manager/fory_config_manager.dart
@@ -29,7 +29,6 @@ class ForyConfigManager{
   int get nextConfigId => configId++;
 
   ForyConfig createConfig({
-    bool isLittleEndian = true,
     bool refTracking = true,
     bool basicTypesRefIgnored = true,
     bool timeRefIgnored = true,
@@ -37,7 +36,6 @@ class ForyConfigManager{
   }) {
     return ForyConfig.onlyForManager(
       nextConfigId,
-      isLittleEndian: isLittleEndian,
       refTracking: refTracking,
       basicTypesRefIgnored: basicTypesRefIgnored,
       timeRefIgnored: timeRefIgnored,
diff --git a/dart/packages/fory/lib/src/serializer/array_serializer.dart 
b/dart/packages/fory/lib/src/serializer/array_serializer.dart
index 4e6e4bf41..4ae42f747 100644
--- a/dart/packages/fory/lib/src/serializer/array_serializer.dart
+++ b/dart/packages/fory/lib/src/serializer/array_serializer.dart
@@ -27,6 +27,9 @@ import 'package:fory/src/config/fory_config.dart';
 import 'package:fory/src/serializer/serializer.dart';
 import 'package:fory/src/serializer/serializer_cache.dart';
 
+/// Whether the host machine is little-endian
+const bool isLittleEndian = Endian.host == Endian.little;
+
 abstract base class ArraySerializerCache extends SerializerCache{
   const ArraySerializerCache();
 
@@ -44,14 +47,29 @@ abstract base class ArraySerializer<T> extends 
Serializer<List<T>> {
 abstract base class NumericArraySerializer<T extends num> extends 
ArraySerializer<T> {
   const NumericArraySerializer(super.type, super.writeRef);
 
-  TypedDataList<T> readToList(Uint8List copiedMem);
+  /// Reads bytes and converts to a typed list.
+  /// Subclasses must handle endianness for multi-byte types.
+  TypedDataList<T> readToList(Uint8List copiedMem, ByteReader br);
 
   int get bytesPerNum;
 
   @override
   TypedDataList<T> read(ByteReader br, int refId, DeserializerPack pack) {
-    int num = br.readVarUint32Small7();
-    return readToList(br.copyBytes(num));
+    int numBytes = br.readVarUint32Small7();
+    int length = numBytes ~/ bytesPerNum;
+    if (isLittleEndian || bytesPerNum == 1) {
+      // Fast path: direct memory copy on little-endian or for single-byte 
types
+      return readToList(br.copyBytes(numBytes), br);
+    } else {
+      // Slow path: element-by-element read on big-endian machines
+      return readToListBigEndian(length, br);
+    }
+  }
+
+  /// Read elements one by one on big-endian machines.
+  /// Default implementation; subclasses should override for multi-byte types.
+  TypedDataList<T> readToListBigEndian(int length, ByteReader br) {
+    throw UnsupportedError('readToListBigEndian not implemented for 
$runtimeType');
   }
 
   @override
@@ -60,6 +78,18 @@ abstract base class NumericArraySerializer<T extends num> 
extends ArraySerialize
       throw ArgumentError('NumArray lengthInBytes is not valid int32: 
${v.lengthInBytes}');
     }
     bw.writeVarUint32(v.lengthInBytes);
-    bw.writeBytes(v.buffer.asUint8List(v.offsetInBytes, v.lengthInBytes));
+    if (isLittleEndian || bytesPerNum == 1) {
+      // Fast path: direct memory copy on little-endian or for single-byte 
types
+      bw.writeBytes(v.buffer.asUint8List(v.offsetInBytes, v.lengthInBytes));
+    } else {
+      // Slow path: element-by-element write on big-endian machines
+      writeListBigEndian(bw, v);
+    }
+  }
+
+  /// Write elements one by one on big-endian machines.
+  /// Default implementation; subclasses should override for multi-byte types.
+  void writeListBigEndian(ByteWriter bw, TypedDataList<T> v) {
+    throw UnsupportedError('writeListBigEndian not implemented for 
$runtimeType');
   }
 }
\ No newline at end of file
diff --git a/dart/packages/fory/lib/src/serializer/fory_header_serializer.dart 
b/dart/packages/fory/lib/src/serializer/fory_header_serializer.dart
index eeacd3947..bc8931090 100644
--- a/dart/packages/fory/lib/src/serializer/fory_header_serializer.dart
+++ b/dart/packages/fory/lib/src/serializer/fory_header_serializer.dart
@@ -25,7 +25,6 @@ import 'package:fory/src/memory/byte_reader.dart';
 import 'package:fory/src/memory/byte_writer.dart';
 
 typedef HeaderBrief = ({
-  bool isLittleEndian,
   bool isXLang,
   Language peerLang,
   bool oobEnabled,
@@ -43,11 +42,6 @@ final class ForyHeaderSerializer {
     if ((bitmap & ForyHeaderConst.nullFlag) != 0){
       return null;
     }
-    // header: endian
-    bool isLittleEndian = (bitmap & ForyHeaderConst.littleEndianFlag) != 0;
-    if (!isLittleEndian){
-      throw ArgumentError('Non-Little-Endian format detected. Only 
Little-Endian is supported.');
-    }
     // header: xlang
     bool isXLang = (bitmap & ForyHeaderConst.crossLanguageFlag) != 0;
     assert (isXLang, 'Now Fory Dart only supports xlang mode');
@@ -66,7 +60,6 @@ final class ForyHeaderSerializer {
     //   );
     // }
     return (
-      isLittleEndian: isLittleEndian,
       isXLang: isXLang,
       peerLang: Language.values[peerLangInd],
       oobEnabled: oobEnabled,
@@ -74,8 +67,7 @@ final class ForyHeaderSerializer {
   }
 
   void write(ByteWriter bd, bool objNull, ForyConfig conf) {
-    int bitmap = ForyHeaderConst.littleEndianFlag;
-    bitmap |= ForyHeaderConst.crossLanguageFlag;
+    int bitmap = ForyHeaderConst.crossLanguageFlag;
     if (objNull){
       bitmap |= ForyHeaderConst.nullFlag;
     }
diff --git 
a/dart/packages/fory/lib/src/serializer/typed_data_array_serializer.dart 
b/dart/packages/fory/lib/src/serializer/typed_data_array_serializer.dart
index abf22c4f9..cefa513c7 100644
--- a/dart/packages/fory/lib/src/serializer/typed_data_array_serializer.dart
+++ b/dart/packages/fory/lib/src/serializer/typed_data_array_serializer.dart
@@ -20,6 +20,8 @@
 import 'dart:typed_data';
 import 'package:fory/src/const/obj_type.dart';
 import 'package:fory/src/dev_annotation/optimize.dart';
+import 'package:fory/src/memory/byte_reader.dart';
+import 'package:fory/src/memory/byte_writer.dart';
 import 'package:fory/src/serializer/array_serializer.dart';
 import 'package:fory/src/serializer/serializer_cache.dart';
 
@@ -49,8 +51,8 @@ final class Uint8ListSerializer extends 
NumericArraySerializer<int> {
 
   @inline
   @override
-  TypedDataList<int> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<int> readToList(Uint8List copiedMem, ByteReader br) {
+    // Single-byte type, no endian conversion needed
     return copiedMem;
   }
 
@@ -81,8 +83,8 @@ final class Int8ListSerializer extends 
NumericArraySerializer<int> {
   const Int8ListSerializer(bool writeRef) : super(ObjType.INT8_ARRAY, 
writeRef);
 
   @override
-  TypedDataList<int> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<int> readToList(Uint8List copiedMem, ByteReader br) {
+    // Single-byte type, no endian conversion needed
     Int8List list = copiedMem.buffer.asInt8List();
     return list;
   }
@@ -114,12 +116,28 @@ final class Int16ListSerializer extends 
NumericArraySerializer<int> {
   const Int16ListSerializer(bool writeRef) : super(ObjType.INT16_ARRAY, 
writeRef);
 
   @override
-  TypedDataList<int> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<int> readToList(Uint8List copiedMem, ByteReader br) {
+    // Fast path: direct memory interpretation (only called on little-endian)
     Int16List list = copiedMem.buffer.asInt16List();
     return list;
   }
 
+  @override
+  TypedDataList<int> readToListBigEndian(int length, ByteReader br) {
+    Int16List list = Int16List(length);
+    for (int i = 0; i < length; i++) {
+      list[i] = br.readInt16();
+    }
+    return list;
+  }
+
+  @override
+  void writeListBigEndian(ByteWriter bw, TypedDataList<int> v) {
+    for (int i = 0; i < v.length; i++) {
+      bw.writeInt16(v[i]);
+    }
+  }
+
   @override
   int get bytesPerNum => 2;
 }
@@ -147,12 +165,28 @@ final class Int32ListSerializer extends 
NumericArraySerializer<int> {
   const Int32ListSerializer(bool writeRef) : super(ObjType.INT32_ARRAY, 
writeRef);
 
   @override
-  TypedDataList<int> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<int> readToList(Uint8List copiedMem, ByteReader br) {
+    // Fast path: direct memory interpretation (only called on little-endian)
     Int32List list = copiedMem.buffer.asInt32List();
     return list;
   }
 
+  @override
+  TypedDataList<int> readToListBigEndian(int length, ByteReader br) {
+    Int32List list = Int32List(length);
+    for (int i = 0; i < length; i++) {
+      list[i] = br.readInt32();
+    }
+    return list;
+  }
+
+  @override
+  void writeListBigEndian(ByteWriter bw, TypedDataList<int> v) {
+    for (int i = 0; i < v.length; i++) {
+      bw.writeInt32(v[i]);
+    }
+  }
+
   @override
   int get bytesPerNum => 4;
 }
@@ -198,12 +232,28 @@ final class Int64ListSerializer extends 
NumericArraySerializer<int> {
   const Int64ListSerializer(bool writeRef) : super(ObjType.INT64_ARRAY, 
writeRef);
 
   @override
-  TypedDataList<int> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<int> readToList(Uint8List copiedMem, ByteReader br) {
+    // Fast path: direct memory interpretation (only called on little-endian)
     Int64List list = copiedMem.buffer.asInt64List();
     return list;
   }
 
+  @override
+  TypedDataList<int> readToListBigEndian(int length, ByteReader br) {
+    Int64List list = Int64List(length);
+    for (int i = 0; i < length; i++) {
+      list[i] = br.readInt64();
+    }
+    return list;
+  }
+
+  @override
+  void writeListBigEndian(ByteWriter bw, TypedDataList<int> v) {
+    for (int i = 0; i < v.length; i++) {
+      bw.writeInt64(v[i]);
+    }
+  }
+
   @override
   int get bytesPerNum => 8;
 }
@@ -214,11 +264,28 @@ final class Float32ListSerializer extends 
NumericArraySerializer<double> {
   const Float32ListSerializer(bool writeRef) : super(ObjType.FLOAT32_ARRAY, 
writeRef);
 
   @override
-  TypedDataList<double> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<double> readToList(Uint8List copiedMem, ByteReader br) {
+    // Fast path: direct memory interpretation (only called on little-endian)
     Float32List list = copiedMem.buffer.asFloat32List();
     return list;
   }
+
+  @override
+  TypedDataList<double> readToListBigEndian(int length, ByteReader br) {
+    Float32List list = Float32List(length);
+    for (int i = 0; i < length; i++) {
+      list[i] = br.readFloat32();
+    }
+    return list;
+  }
+
+  @override
+  void writeListBigEndian(ByteWriter bw, TypedDataList<double> v) {
+    for (int i = 0; i < v.length; i++) {
+      bw.writeFloat32(v[i]);
+    }
+  }
+
   @override
   int get bytesPerNum => 4;
 }
@@ -246,12 +313,28 @@ final class Float64ListSerializer extends 
NumericArraySerializer<double> {
   const Float64ListSerializer(bool writeRef) : super(ObjType.FLOAT64_ARRAY, 
writeRef);
 
   @override
-  TypedDataList<double> readToList(Uint8List copiedMem) {
-    // need copy
+  TypedDataList<double> readToList(Uint8List copiedMem, ByteReader br) {
+    // Fast path: direct memory interpretation (only called on little-endian)
     Float64List list = copiedMem.buffer.asFloat64List();
     return list;
   }
 
+  @override
+  TypedDataList<double> readToListBigEndian(int length, ByteReader br) {
+    Float64List list = Float64List(length);
+    for (int i = 0; i < length; i++) {
+      list[i] = br.readFloat64();
+    }
+    return list;
+  }
+
+  @override
+  void writeListBigEndian(ByteWriter bw, TypedDataList<double> v) {
+    for (int i = 0; i < v.length; i++) {
+      bw.writeFloat64(v[i]);
+    }
+  }
+
   @override
   int get bytesPerNum => 8;
 }
\ No newline at end of file
diff --git a/docs/specification/xlang_serialization_spec.md 
b/docs/specification/xlang_serialization_spec.md
index ef653cd96..49084975d 100644
--- a/docs/specification/xlang_serialization_spec.md
+++ b/docs/specification/xlang_serialization_spec.md
@@ -243,8 +243,7 @@ Here is the overall format:
 | fory header | object ref meta | object type meta | object value data |
 ```
 
-The data are serialized using little endian byte order overall. If bytes swap 
is costly for some object,
-Fory will write the byte order for that object into the data instead of 
converting it to little endian.
+The data are serialized using little endian byte order for all types.
 
 ## Fory header
 
@@ -261,18 +260,19 @@ Detailed byte layout:
 ```
 Byte 0:   Bitmap flags
           - Bit 0: null flag (0x01)
-          - Bit 1: endian flag (0x02)
-          - Bit 2: xlang flag (0x04)
-          - Bit 3: oob flag (0x08)
-          - Bits 4-7: reserved
+          - Bit 1: xlang flag (0x02)
+          - Bit 2: oob flag (0x04)
+          - Bits 3-7: reserved
 Byte 1:   Language ID (only present when xlang flag is set)
 Byte 2-5: Meta start offset (only present when meta share mode is enabled)
 ```
 
-- **null flag** (bit 0): 1 when object is null, 0 otherwise. If an object is 
null, only this flag and endian flag are set.
-- **endian flag** (bit 1): 1 when data is encoded by little endian, 0 for big 
endian. Modern implementations always use little endian.
-- **xlang flag** (bit 2): 1 when serialization uses Fory xlang format, 0 when 
serialization uses Fory language-native format.
-- **oob flag** (bit 3): 1 when out-of-band serialization is enabled 
(BufferCallback is not null), 0 otherwise.
+- **null flag** (bit 0): 1 when object is null, 0 otherwise. If an object is 
null, only this flag is set.
+- **xlang flag** (bit 1): 1 when serialization uses Fory xlang format, 0 when 
serialization uses Fory language-native format.
+- **oob flag** (bit 2): 1 when out-of-band serialization is enabled 
(BufferCallback is not null), 0 otherwise.
+
+All data is encoded in little-endian format.
+
 - **language**: 1 byte indicating the source language. This allows 
deserializers to optimize for specific language characteristics.
 
 ### Language IDs
diff --git a/go/fory/fory.go b/go/fory/fory.go
index 970374e70..49e3ca2c1 100644
--- a/go/fory/fory.go
+++ b/go/fory/fory.go
@@ -18,7 +18,6 @@
 package fory
 
 import (
-       "encoding/binary"
        "errors"
        "fmt"
        "reflect"
@@ -49,10 +48,9 @@ const (
 
 // Bitmap flags for protocol header
 const (
-       IsNilFlag        = 1
-       LittleEndianFlag = 2
-       XLangFlag        = 4
-       OutOfBandFlag    = 8
+       IsNilFlag     = 1 << 0
+       XLangFlag     = 1 << 1
+       OutOfBandFlag = 1 << 2
 )
 
 // ============================================================================
@@ -847,9 +845,6 @@ func (f *Fory) serializeReflectValue(value reflect.Value) 
([]byte, error) {
 // writeHeader writes the Fory protocol header
 func writeHeader(ctx *WriteContext, config Config) {
        var bitmap byte = 0
-       if nativeEndian == binary.LittleEndian {
-               bitmap |= LittleEndianFlag
-       }
        if config.IsXlang {
                bitmap |= XLangFlag
        }
diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java 
b/java/fory-core/src/main/java/org/apache/fory/Fory.java
index dd9c11e30..ada96d291 100644
--- a/java/fory-core/src/main/java/org/apache/fory/Fory.java
+++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java
@@ -22,7 +22,6 @@ package org.apache.fory;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -99,11 +98,8 @@ public final class Fory implements BaseFory {
   public static final byte REF_VALUE_FLAG = 0;
   public static final byte NOT_SUPPORT_XLANG = 0;
   private static final byte isNilFlag = 1;
-  private static final byte isLittleEndianFlag = 1 << 1;
-  private static final byte isCrossLanguageFlag = 1 << 2;
-  private static final byte isOutOfBandFlag = 1 << 3;
-  private static final boolean isLittleEndian = ByteOrder.nativeOrder() == 
ByteOrder.LITTLE_ENDIAN;
-  private static final byte BITMAP = isLittleEndian ? isLittleEndianFlag : 0;
+  private static final byte isCrossLanguageFlag = 1 << 1;
+  private static final byte isOutOfBandFlag = 1 << 2;
 
   private final Config config;
   private final boolean refTracking;
@@ -297,7 +293,7 @@ public final class Fory implements BaseFory {
 
   @Override
   public MemoryBuffer serialize(MemoryBuffer buffer, Object obj, 
BufferCallback callback) {
-    byte bitmap = BITMAP;
+    byte bitmap = 0;
     if (crossLanguage) {
       bitmap |= isCrossLanguageFlag;
     }
@@ -831,9 +827,6 @@ public final class Fory implements BaseFory {
       if ((bitmap & isNilFlag) == isNilFlag) {
         return null;
       }
-      Preconditions.checkArgument(
-          Fory.isLittleEndian,
-          "Non-Little-Endian format detected. Only Little-Endian is 
supported.");
       boolean isTargetXLang = (bitmap & isCrossLanguageFlag) == 
isCrossLanguageFlag;
       if (isTargetXLang) {
         peerLanguage = Language.values()[buffer.readByte()];
diff --git 
a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java 
b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java
index 9ce16158e..41fbc2b48 100644
--- a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java
+++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java
@@ -468,6 +468,22 @@ public final class MemoryBuffer {
     UNSAFE.putShort(heapMemory, pos, value);
   }
 
+  // CHECKSTYLE.OFF:MethodName
+  public short _unsafeGetInt16(int index) {
+    // CHECKSTYLE.ON:MethodName
+    short v = UNSAFE.getShort(heapMemory, address + index);
+    return LITTLE_ENDIAN ? v : Short.reverseBytes(v);
+  }
+
+  // CHECKSTYLE.OFF:MethodName
+  public void _unsafePutInt16(int index, short value) {
+    // CHECKSTYLE.ON:MethodName
+    if (!LITTLE_ENDIAN) {
+      value = Short.reverseBytes(value);
+    }
+    UNSAFE.putShort(heapMemory, address + index, value);
+  }
+
   public int getInt32(int index) {
     final long pos = address + index;
     checkPosition(index, pos, 4);
@@ -485,7 +501,7 @@ public final class MemoryBuffer {
   }
 
   // CHECKSTYLE.OFF:MethodName
-  private int _unsafeGetInt32(int index) {
+  public int _unsafeGetInt32(int index) {
     // CHECKSTYLE.ON:MethodName
     int v = UNSAFE.getInt(heapMemory, address + index);
     return LITTLE_ENDIAN ? v : Integer.reverseBytes(v);
@@ -517,14 +533,14 @@ public final class MemoryBuffer {
   }
 
   // CHECKSTYLE.OFF:MethodName
-  long _unsafeGetInt64(int index) {
+  public long _unsafeGetInt64(int index) {
     // CHECKSTYLE.ON:MethodName
     long v = UNSAFE.getLong(heapMemory, address + index);
     return LITTLE_ENDIAN ? v : Long.reverseBytes(v);
   }
 
   // CHECKSTYLE.OFF:MethodName
-  private void _unsafePutInt64(int index, long value) {
+  public void _unsafePutInt64(int index, long value) {
     // CHECKSTYLE.ON:MethodName
     if (!LITTLE_ENDIAN) {
       value = Long.reverseBytes(value);
@@ -745,7 +761,9 @@ public final class MemoryBuffer {
    */
   public int writeVarInt32(int v) {
     ensure(writerIndex + 8);
-    int varintBytes = _unsafePutVarUint36Small(writerIndex, ((long) v << 1) ^ 
(v >> 31));
+    // Zigzag encoding: maps negative values to positive values
+    // This works entirely in int without conversion to long
+    int varintBytes = _unsafePutVarUint32(writerIndex, (v << 1) ^ (v >> 31));
     writerIndex += varintBytes;
     return varintBytes;
   }
@@ -758,8 +776,8 @@ public final class MemoryBuffer {
   // CHECKSTYLE.OFF:MethodName
   public int _unsafeWriteVarInt32(int v) {
     // CHECKSTYLE.ON:MethodName
-    // Ensure negatives close to zero is encode in little bytes.
-    int varintBytes = _unsafePutVarUint36Small(writerIndex, ((long) v << 1) ^ 
(v >> 31));
+    // Zigzag encoding ensures negatives close to zero are encoded in few bytes
+    int varintBytes = _unsafePutVarUint32(writerIndex, (v << 1) ^ (v >> 31));
     writerIndex += varintBytes;
     return varintBytes;
   }
@@ -774,10 +792,7 @@ public final class MemoryBuffer {
     // generated code is smaller. Otherwise, `MapRefResolver.writeRefOrNull`
     // may be `callee is too large`/`already compiled into a big method`
     ensure(writerIndex + 8);
-    // Use Integer.toUnsignedLong to handle values > INT32_MAX correctly
-    // Without this, negative int values would be sign-extended to long,
-    // causing incorrect varint encoding (9+ bytes instead of 5)
-    int varintBytes = _unsafePutVarUint36Small(writerIndex, 
Integer.toUnsignedLong(v));
+    int varintBytes = _unsafePutVarUint32(writerIndex, v);
     writerIndex += varintBytes;
     return varintBytes;
   }
@@ -789,8 +804,7 @@ public final class MemoryBuffer {
   // CHECKSTYLE.OFF:MethodName
   public int _unsafeWriteVarUint32(int v) {
     // CHECKSTYLE.ON:MethodName
-    // Use Integer.toUnsignedLong to handle values > INT32_MAX correctly
-    int varintBytes = _unsafePutVarUint36Small(writerIndex, 
Integer.toUnsignedLong(v));
+    int varintBytes = _unsafePutVarUint32(writerIndex, v);
     writerIndex += varintBytes;
     return varintBytes;
   }
@@ -822,6 +836,54 @@ public final class MemoryBuffer {
     return diff;
   }
 
+  /**
+   * Writes an unsigned 32-bit varint at the given index using int operations. 
Caller must ensure
+   * there are at least 8 bytes available for writing. This method avoids 
int-to-long conversion
+   * overhead for the common cases (1-4 bytes).
+   *
+   * @param index the position to write at
+   * @param value the unsigned 32-bit value (high bit may be set)
+   * @return the number of bytes written (1-5)
+   */
+  // CHECKSTYLE.OFF:MethodName
+  public int _unsafePutVarUint32(int index, int value) {
+    // CHECKSTYLE.ON:MethodName
+    int encoded = (value & 0x7F);
+    if (value >>> 7 == 0) {
+      UNSAFE.putByte(heapMemory, address + index, (byte) value);
+      return 1;
+    }
+    // bit 8 `set` indicates have next data bytes.
+    // 0x3f80: 0b1111111 << 7
+    encoded |= (((value & 0x3f80) << 1) | 0x80);
+    if (value >>> 14 == 0) {
+      _unsafePutInt32(index, encoded);
+      return 2;
+    }
+    return continuePutVarUint32(index, encoded, value);
+  }
+
+  private int continuePutVarUint32(int index, int encoded, int value) {
+    // 0x1fc000: 0b1111111 << 14
+    encoded |= (((value & 0x1fc000) << 2) | 0x8000);
+    if (value >>> 21 == 0) {
+      _unsafePutInt32(index, encoded);
+      return 3;
+    }
+    // 0xfe00000: 0b1111111 << 21
+    encoded |= ((value & 0xfe00000) << 3) | 0x800000;
+    if (value >>> 28 == 0) {
+      _unsafePutInt32(index, encoded);
+      return 4;
+    }
+    // 5-byte case: bits 28-31 go to the 5th byte
+    // Need long for the final write to include the 5th byte
+    long encodedLong = Integer.toUnsignedLong(encoded) | 0x80000000L;
+    encodedLong |= (long) (value >>> 28) << 32;
+    _unsafePutInt64(index, encodedLong);
+    return 5;
+  }
+
   /**
    * Caller must ensure there must be at least 8 bytes for writing, otherwise 
the crash may occur.
    * Don't pass int value to avoid sign extension.
@@ -1256,15 +1318,6 @@ public final class MemoryBuffer {
     writerIndex = idx + numBytes;
   }
 
-  public void writePrimitiveArrayAlignedSize(Object arr, int offset, int 
numBytes) {
-    writeVarUint32Aligned(numBytes);
-    final int writerIdx = writerIndex;
-    final int newIdx = writerIdx + numBytes;
-    ensure(newIdx);
-    Platform.copyMemory(arr, offset, heapMemory, address + writerIdx, 
numBytes);
-    writerIndex = newIdx;
-  }
-
   public void writePrimitiveArray(Object arr, int offset, int numBytes) {
     final int writerIdx = writerIndex;
     final int newIdx = writerIdx + numBytes;
@@ -2428,21 +2481,6 @@ public final class MemoryBuffer {
     return arr;
   }
 
-  public byte[] readBytesWithAlignedSize() {
-    final int numBytes = readAlignedVarUint32();
-    int readerIdx = readerIndex;
-    final byte[] arr = new byte[numBytes];
-    // use subtract to avoid overflow
-    if (readerIdx > size - numBytes) {
-      streamReader.readTo(arr, 0, numBytes);
-      return arr;
-    }
-    Platform.UNSAFE.copyMemory(
-        this.heapMemory, this.address + readerIdx, arr, 
Platform.BYTE_ARRAY_OFFSET, numBytes);
-    readerIndex = readerIdx + numBytes;
-    return arr;
-  }
-
   /** This method should be used to read data written by {@link 
#writePrimitiveArrayWithSize}. */
   public char[] readChars(int numBytes) {
     int readerIdx = readerIndex;
@@ -2485,11 +2523,6 @@ public final class MemoryBuffer {
     return arr;
   }
 
-  public char[] readCharsWithAlignedSize() {
-    final int numBytes = readAlignedVarUint32();
-    return readChars(numBytes);
-  }
-
   public long[] readLongs(int numBytes) {
     int readerIdx = readerIndex;
     int numElements = numBytes >> 3;
diff --git 
a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java 
b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
index 946062cdc..fe3ffc77b 100644
--- 
a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
+++ 
b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
@@ -357,7 +357,11 @@ public class ArraySerializers {
     public void write(MemoryBuffer buffer, char[] value) {
       if (fory.getBufferCallback() == null) {
         int size = Math.multiplyExact(value.length, 2);
-        buffer.writePrimitiveArrayWithSize(value, Platform.CHAR_ARRAY_OFFSET, 
size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.writePrimitiveArrayWithSize(value, 
Platform.CHAR_ARRAY_OFFSET, size);
+        } else {
+          writeCharBySwapEndian(buffer, value);
+        }
       } else {
         fory.writeBufferObject(
             buffer,
@@ -365,6 +369,17 @@ public class ArraySerializers {
       }
     }
 
+    private void writeCharBySwapEndian(MemoryBuffer buffer, char[] value) {
+      int idx = buffer.writerIndex();
+      int length = value.length;
+      buffer.ensure(idx + 5 + length * 2);
+      idx += buffer._unsafeWriteVarUint32(length * 2);
+      for (int i = 0; i < length; i++) {
+        buffer._unsafePutInt16(idx + i * 2, (short) value[i]);
+      }
+      buffer._unsafeWriterIndex(idx + length * 2);
+    }
+
     @Override
     public char[] copy(char[] originArray) {
       return Arrays.copyOf(originArray, originArray.length);
@@ -377,17 +392,35 @@ public class ArraySerializers {
         int size = buf.remaining();
         int numElements = size / 2;
         char[] values = new char[numElements];
-        buf.copyToUnsafe(0, values, Platform.CHAR_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buf.copyToUnsafe(0, values, Platform.CHAR_ARRAY_OFFSET, size);
+        } else {
+          readCharBySwapEndian(buf, values, numElements);
+        }
         return values;
       } else {
         int size = buffer.readVarUint32Small7();
         int numElements = size / 2;
         char[] values = new char[numElements];
-        buffer.readToUnsafe(values, Platform.CHAR_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.readToUnsafe(values, Platform.CHAR_ARRAY_OFFSET, size);
+        } else {
+          readCharBySwapEndian(buffer, values, numElements);
+        }
         return values;
       }
     }
 
+    private void readCharBySwapEndian(MemoryBuffer buffer, char[] values, int 
numElements) {
+      int idx = buffer.readerIndex();
+      int size = numElements * 2;
+      buffer.checkReadableBytes(size);
+      for (int i = 0; i < numElements; i++) {
+        values[i] = (char) buffer._unsafeGetInt16(idx + i * 2);
+      }
+      buffer._increaseReaderIndexUnsafe(size);
+    }
+
     @Override
     public void xwrite(MemoryBuffer buffer, char[] value) {
       throw new UnsupportedOperationException();
@@ -409,7 +442,11 @@ public class ArraySerializers {
     public void write(MemoryBuffer buffer, short[] value) {
       if (fory.getBufferCallback() == null) {
         int size = Math.multiplyExact(value.length, 2);
-        buffer.writePrimitiveArrayWithSize(value, Platform.SHORT_ARRAY_OFFSET, 
size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.writePrimitiveArrayWithSize(value, 
Platform.SHORT_ARRAY_OFFSET, size);
+        } else {
+          writeInt16BySwapEndian(buffer, value);
+        }
       } else {
         fory.writeBufferObject(
             buffer,
@@ -417,6 +454,17 @@ public class ArraySerializers {
       }
     }
 
+    private void writeInt16BySwapEndian(MemoryBuffer buffer, short[] value) {
+      int idx = buffer.writerIndex();
+      int length = value.length;
+      buffer.ensure(idx + 5 + length * 2);
+      idx += buffer._unsafeWriteVarUint32(length * 2);
+      for (int i = 0; i < length; i++) {
+        buffer._unsafePutInt16(idx + i * 2, value[i]);
+      }
+      buffer._unsafeWriterIndex(idx + length * 2);
+    }
+
     @Override
     public short[] copy(short[] originArray) {
       return Arrays.copyOf(originArray, originArray.length);
@@ -429,16 +477,34 @@ public class ArraySerializers {
         int size = buf.remaining();
         int numElements = size / 2;
         short[] values = new short[numElements];
-        buf.copyToUnsafe(0, values, Platform.SHORT_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buf.copyToUnsafe(0, values, Platform.SHORT_ARRAY_OFFSET, size);
+        } else {
+          readInt16BySwapEndian(buf, values, numElements);
+        }
         return values;
       } else {
         int size = buffer.readVarUint32Small7();
         int numElements = size / 2;
         short[] values = new short[numElements];
-        buffer.readToUnsafe(values, Platform.SHORT_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.readToUnsafe(values, Platform.SHORT_ARRAY_OFFSET, size);
+        } else {
+          readInt16BySwapEndian(buffer, values, numElements);
+        }
         return values;
       }
     }
+
+    private void readInt16BySwapEndian(MemoryBuffer buffer, short[] values, 
int numElements) {
+      int idx = buffer.readerIndex();
+      int size = numElements * 2;
+      buffer.checkReadableBytes(size);
+      for (int i = 0; i < numElements; i++) {
+        values[i] = buffer._unsafeGetInt16(idx + i * 2);
+      }
+      buffer._increaseReaderIndexUnsafe(size);
+    }
   }
 
   public static final class IntArraySerializer extends 
PrimitiveArraySerializer<int[]> {
@@ -455,7 +521,11 @@ public class ArraySerializers {
           return;
         }
         int size = Math.multiplyExact(value.length, 4);
-        buffer.writePrimitiveArrayWithSize(value, Platform.INT_ARRAY_OFFSET, 
size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.writePrimitiveArrayWithSize(value, Platform.INT_ARRAY_OFFSET, 
size);
+        } else {
+          writeInt32BySwapEndian(buffer, value);
+        }
       } else {
         fory.writeBufferObject(
             buffer,
@@ -463,6 +533,17 @@ public class ArraySerializers {
       }
     }
 
+    private void writeInt32BySwapEndian(MemoryBuffer buffer, int[] value) {
+      int idx = buffer.writerIndex();
+      int length = value.length;
+      buffer.ensure(idx + 5 + length * 4);
+      idx += buffer._unsafeWriteVarUint32(length * 4);
+      for (int i = 0; i < length; i++) {
+        buffer._unsafePutInt32(idx + i * 4, value[i]);
+      }
+      buffer._unsafeWriterIndex(idx + length * 4);
+    }
+
     @Override
     public int[] copy(int[] originArray) {
       return Arrays.copyOf(originArray, originArray.length);
@@ -476,7 +557,11 @@ public class ArraySerializers {
         int numElements = size / 4;
         int[] values = new int[numElements];
         if (size > 0) {
-          buf.copyToUnsafe(0, values, Platform.INT_ARRAY_OFFSET, size);
+          if (Platform.IS_LITTLE_ENDIAN) {
+            buf.copyToUnsafe(0, values, Platform.INT_ARRAY_OFFSET, size);
+          } else {
+            readInt32BySwapEndian(buf, values, numElements);
+          }
         }
         return values;
       }
@@ -487,11 +572,25 @@ public class ArraySerializers {
       int numElements = size / 4;
       int[] values = new int[numElements];
       if (size > 0) {
-        buffer.readToUnsafe(values, Platform.INT_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.readToUnsafe(values, Platform.INT_ARRAY_OFFSET, size);
+        } else {
+          readInt32BySwapEndian(buffer, values, numElements);
+        }
       }
       return values;
     }
 
+    private void readInt32BySwapEndian(MemoryBuffer buffer, int[] values, int 
numElements) {
+      int idx = buffer.readerIndex();
+      int size = numElements * 4;
+      buffer.checkReadableBytes(size);
+      for (int i = 0; i < numElements; i++) {
+        values[i] = buffer._unsafeGetInt32(idx + i * 4);
+      }
+      buffer._increaseReaderIndexUnsafe(size);
+    }
+
     private void writeInt32Compressed(MemoryBuffer buffer, int[] value) {
       buffer.writeVarUint32Small7(value.length);
       for (int i : value) {
@@ -528,7 +627,11 @@ public class ArraySerializers {
           return;
         }
         int size = Math.multiplyExact(value.length, 8);
-        buffer.writePrimitiveArrayWithSize(value, Platform.LONG_ARRAY_OFFSET, 
size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.writePrimitiveArrayWithSize(value, 
Platform.LONG_ARRAY_OFFSET, size);
+        } else {
+          writeInt64BySwapEndian(buffer, value);
+        }
       } else {
         fory.writeBufferObject(
             buffer,
@@ -536,6 +639,17 @@ public class ArraySerializers {
       }
     }
 
+    private void writeInt64BySwapEndian(MemoryBuffer buffer, long[] value) {
+      int idx = buffer.writerIndex();
+      int length = value.length;
+      buffer.ensure(idx + 5 + length * 8);
+      idx += buffer._unsafeWriteVarUint32(length * 8);
+      for (int i = 0; i < length; i++) {
+        buffer._unsafePutInt64(idx + i * 8, value[i]);
+      }
+      buffer._unsafeWriterIndex(idx + length * 8);
+    }
+
     @Override
     public long[] copy(long[] originArray) {
       return Arrays.copyOf(originArray, originArray.length);
@@ -549,7 +663,11 @@ public class ArraySerializers {
         int numElements = size / 8;
         long[] values = new long[numElements];
         if (size > 0) {
-          buf.copyToUnsafe(0, values, Platform.LONG_ARRAY_OFFSET, size);
+          if (Platform.IS_LITTLE_ENDIAN) {
+            buf.copyToUnsafe(0, values, Platform.LONG_ARRAY_OFFSET, size);
+          } else {
+            readInt64BySwapEndian(buf, values, numElements);
+          }
         }
         return values;
       }
@@ -560,11 +678,25 @@ public class ArraySerializers {
       int numElements = size / 8;
       long[] values = new long[numElements];
       if (size > 0) {
-        buffer.readToUnsafe(values, Platform.LONG_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.readToUnsafe(values, Platform.LONG_ARRAY_OFFSET, size);
+        } else {
+          readInt64BySwapEndian(buffer, values, numElements);
+        }
       }
       return values;
     }
 
+    private void readInt64BySwapEndian(MemoryBuffer buffer, long[] values, int 
numElements) {
+      int idx = buffer.readerIndex();
+      int size = numElements * 8;
+      buffer.checkReadableBytes(size);
+      for (int i = 0; i < numElements; i++) {
+        values[i] = buffer._unsafeGetInt64(idx + i * 8);
+      }
+      buffer._increaseReaderIndexUnsafe(size);
+    }
+
     private void writeInt64Compressed(
         MemoryBuffer buffer, long[] value, LongEncoding longEncoding) {
       int length = value.length;
@@ -608,7 +740,11 @@ public class ArraySerializers {
     public void write(MemoryBuffer buffer, float[] value) {
       if (fory.getBufferCallback() == null) {
         int size = Math.multiplyExact(value.length, 4);
-        buffer.writePrimitiveArrayWithSize(value, Platform.FLOAT_ARRAY_OFFSET, 
size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.writePrimitiveArrayWithSize(value, 
Platform.FLOAT_ARRAY_OFFSET, size);
+        } else {
+          writeFloat32BySwapEndian(buffer, value);
+        }
       } else {
         fory.writeBufferObject(
             buffer,
@@ -616,6 +752,17 @@ public class ArraySerializers {
       }
     }
 
+    private void writeFloat32BySwapEndian(MemoryBuffer buffer, float[] value) {
+      int idx = buffer.writerIndex();
+      int length = value.length;
+      buffer.ensure(idx + 5 + length * 4);
+      idx += buffer._unsafeWriteVarUint32(length * 4);
+      for (int i = 0; i < length; i++) {
+        buffer._unsafePutInt32(idx + i * 4, Float.floatToRawIntBits(value[i]));
+      }
+      buffer._unsafeWriterIndex(idx + length * 4);
+    }
+
     @Override
     public float[] copy(float[] originArray) {
       return Arrays.copyOf(originArray, originArray.length);
@@ -628,16 +775,34 @@ public class ArraySerializers {
         int size = buf.remaining();
         int numElements = size / 4;
         float[] values = new float[numElements];
-        buf.copyToUnsafe(0, values, Platform.FLOAT_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buf.copyToUnsafe(0, values, Platform.FLOAT_ARRAY_OFFSET, size);
+        } else {
+          readFloat32BySwapEndian(buf, values, numElements);
+        }
         return values;
       } else {
         int size = buffer.readVarUint32Small7();
         int numElements = size / 4;
         float[] values = new float[numElements];
-        buffer.readToUnsafe(values, Platform.FLOAT_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.readToUnsafe(values, Platform.FLOAT_ARRAY_OFFSET, size);
+        } else {
+          readFloat32BySwapEndian(buffer, values, numElements);
+        }
         return values;
       }
     }
+
+    private void readFloat32BySwapEndian(MemoryBuffer buffer, float[] values, 
int numElements) {
+      int idx = buffer.readerIndex();
+      int size = numElements * 4;
+      buffer.checkReadableBytes(size);
+      for (int i = 0; i < numElements; i++) {
+        values[i] = Float.intBitsToFloat(buffer._unsafeGetInt32(idx + i * 4));
+      }
+      buffer._increaseReaderIndexUnsafe(size);
+    }
   }
 
   public static final class DoubleArraySerializer extends 
PrimitiveArraySerializer<double[]> {
@@ -650,7 +815,11 @@ public class ArraySerializers {
     public void write(MemoryBuffer buffer, double[] value) {
       if (fory.getBufferCallback() == null) {
         int size = Math.multiplyExact(value.length, 8);
-        buffer.writePrimitiveArrayWithSize(value, 
Platform.DOUBLE_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.writePrimitiveArrayWithSize(value, 
Platform.DOUBLE_ARRAY_OFFSET, size);
+        } else {
+          writeFloat64BySwapEndian(buffer, value);
+        }
       } else {
         fory.writeBufferObject(
             buffer,
@@ -658,6 +827,17 @@ public class ArraySerializers {
       }
     }
 
+    private void writeFloat64BySwapEndian(MemoryBuffer buffer, double[] value) 
{
+      int idx = buffer.writerIndex();
+      int length = value.length;
+      buffer.ensure(idx + 5 + length * 8);
+      idx += buffer._unsafeWriteVarUint32(length * 8);
+      for (int i = 0; i < length; i++) {
+        buffer._unsafePutInt64(idx + i * 8, 
Double.doubleToRawLongBits(value[i]));
+      }
+      buffer._unsafeWriterIndex(idx + length * 8);
+    }
+
     @Override
     public double[] copy(double[] originArray) {
       return Arrays.copyOf(originArray, originArray.length);
@@ -670,16 +850,34 @@ public class ArraySerializers {
         int size = buf.remaining();
         int numElements = size / 8;
         double[] values = new double[numElements];
-        buf.copyToUnsafe(0, values, Platform.DOUBLE_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buf.copyToUnsafe(0, values, Platform.DOUBLE_ARRAY_OFFSET, size);
+        } else {
+          readFloat64BySwapEndian(buf, values, numElements);
+        }
         return values;
       } else {
         int size = buffer.readVarUint32Small7();
         int numElements = size / 8;
         double[] values = new double[numElements];
-        buffer.readToUnsafe(values, Platform.DOUBLE_ARRAY_OFFSET, size);
+        if (Platform.IS_LITTLE_ENDIAN) {
+          buffer.readToUnsafe(values, Platform.DOUBLE_ARRAY_OFFSET, size);
+        } else {
+          readFloat64BySwapEndian(buffer, values, numElements);
+        }
         return values;
       }
     }
+
+    private void readFloat64BySwapEndian(MemoryBuffer buffer, double[] values, 
int numElements) {
+      int idx = buffer.readerIndex();
+      int size = numElements * 8;
+      buffer.checkReadableBytes(size);
+      for (int i = 0; i < numElements; i++) {
+        values[i] = Double.longBitsToDouble(buffer._unsafeGetInt64(idx + i * 
8));
+      }
+      buffer._increaseReaderIndexUnsafe(size);
+    }
   }
 
   public static final class StringArraySerializer extends Serializer<String[]> 
{
diff --git 
a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java 
b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java
index 46248b20f..927c4ca2f 100644
--- a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java
+++ b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java
@@ -239,10 +239,6 @@ public class MemoryBufferTest {
     buf.writePrimitiveArrayWithSize(chars, Platform.CHAR_ARRAY_OFFSET, 
chars.length * 2);
     assertEquals(bytes, buf.readBytesAndSize());
     assertEquals(chars, buf.readChars(buf.readVarUint32()));
-    buf.writePrimitiveArrayAlignedSize(bytes, Platform.BYTE_ARRAY_OFFSET, 
bytes.length);
-    buf.writePrimitiveArrayAlignedSize(chars, Platform.CHAR_ARRAY_OFFSET, 
chars.length * 2);
-    assertEquals(bytes, buf.readBytesWithAlignedSize());
-    assertEquals(chars, buf.readCharsWithAlignedSize());
   }
 
   @Test
diff --git 
a/java/fory-core/src/test/java/org/apache/fory/xlang/PyCrossLanguageTest.java 
b/java/fory-core/src/test/java/org/apache/fory/xlang/PyCrossLanguageTest.java
index 6d22bbae3..ac9222389 100644
--- 
a/java/fory-core/src/test/java/org/apache/fory/xlang/PyCrossLanguageTest.java
+++ 
b/java/fory-core/src/test/java/org/apache/fory/xlang/PyCrossLanguageTest.java
@@ -103,9 +103,9 @@ public class PyCrossLanguageTest extends ForyTestBase {
    *
    * @return Whether the command succeeded.
    */
-  private boolean executeCommand(List<String> command, int waitTimeoutSeconds) 
{
+  private boolean executeCommand(List<String> command) {
     return TestUtils.executeCommand(
-        command, waitTimeoutSeconds, 
ImmutableMap.of("ENABLE_CROSS_LANGUAGE_TESTS", "true"));
+        command, 60, ImmutableMap.of("ENABLE_CROSS_LANGUAGE_TESTS", "true"));
   }
 
   @Data
@@ -147,7 +147,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
             PYTHON_MODULE,
             "test_buffer",
             dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     buffer = MemoryUtils.wrap(Files.readAllBytes(dataFile));
     Assert.assertTrue(buffer.readBoolean());
     Assert.assertEquals(buffer.readByte(), Byte.MAX_VALUE);
@@ -179,14 +179,14 @@ public class PyCrossLanguageTest extends ForyTestBase {
             PYTHON_MODULE,
             "test_murmurhash3",
             dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     long[] longs = MurmurHash3.murmurhash3_x64_128(new byte[] {1, 2, 8}, 0, 3, 
47);
     buffer.writerIndex(0);
     buffer.writeInt64(longs[0]);
     buffer.writeInt64(longs[1]);
     Files.write(
         dataFile, buffer.getBytes(0, buffer.writerIndex()), 
StandardOpenOption.TRUNCATE_EXISTING);
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
   }
 
   /** Keep this in sync with `foo_schema` in test_cross_language.py */
@@ -320,7 +320,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
             PYTHON_MODULE,
             "test_cross_language_serializer",
             dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     MemoryBuffer buffer2 = MemoryUtils.wrap(Files.readAllBytes(dataFile));
     function.accept(buffer2, true);
   }
@@ -443,7 +443,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
             PYTHON_MODULE,
             "test_cross_language_reference",
             dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     MemoryBuffer buffer2 = MemoryUtils.wrap(Files.readAllBytes(dataFile));
     function.accept(buffer2);
   }
@@ -607,7 +607,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
     ImmutableList<String> command =
         ImmutableList.of(
             PYTHON_EXECUTABLE, "-m", PYTHON_MODULE, testName, 
dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     Assert.assertEquals(fory.deserialize(Files.readAllBytes(dataFile)), obj);
   }
 
@@ -624,7 +624,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
         ImmutableList.of(
             PYTHON_EXECUTABLE, "-m", PYTHON_MODULE, testName, 
dataFile.toAbsolutePath().toString());
     // Just test that Python can read the data - don't check round-trip
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
   }
 
   private static class ComplexObject1Serializer extends 
Serializer<ComplexObject1> {
@@ -687,7 +687,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
             PYTHON_MODULE,
             "test_register_serializer",
             dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     Assert.assertEquals(fory.deserialize(Files.readAllBytes(dataFile)), obj);
   }
 
@@ -700,7 +700,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
     ImmutableList<String> command =
         ImmutableList.of(
             PYTHON_EXECUTABLE, "-m", PYTHON_MODULE, testName, 
dataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
     return Files.readAllBytes(dataFile);
   }
 
@@ -754,7 +754,7 @@ public class PyCrossLanguageTest extends ForyTestBase {
             "test_oob_buffer",
             intBandDataFile.toAbsolutePath().toString(),
             outOfBandDataFile.toAbsolutePath().toString());
-    Assert.assertTrue(executeCommand(command, 30));
+    Assert.assertTrue(executeCommand(command));
 
     MemoryBuffer inBandBuffer = 
MemoryUtils.wrap(Files.readAllBytes(intBandDataFile));
     outOfBandBuffer = MemoryUtils.wrap(Files.readAllBytes(outOfBandDataFile));
diff --git a/javascript/packages/fory/lib/fory.ts 
b/javascript/packages/fory/lib/fory.ts
index 21f1ed117..2adea0214 100644
--- a/javascript/packages/fory/lib/fory.ts
+++ b/javascript/packages/fory/lib/fory.ts
@@ -114,10 +114,6 @@ export default class {
     if ((bitmap & ConfigFlags.isNullFlag) === ConfigFlags.isNullFlag) {
       return null;
     }
-    const isLittleEndian = (bitmap & ConfigFlags.isLittleEndianFlag) === 
ConfigFlags.isLittleEndianFlag;
-    if (!isLittleEndian) {
-      throw new Error("big endian is not supported now");
-    }
     const isCrossLanguage = (bitmap & ConfigFlags.isCrossLanguageFlag) == 
ConfigFlags.isCrossLanguageFlag;
     if (!isCrossLanguage) {
       throw new Error("support crosslanguage mode only");
@@ -147,7 +143,6 @@ export default class {
     if (data === null) {
       bitmap |= ConfigFlags.isNullFlag;
     }
-    bitmap |= ConfigFlags.isLittleEndianFlag;
     bitmap |= ConfigFlags.isCrossLanguageFlag;
     this.binaryWriter.uint8(bitmap);
     this.binaryWriter.uint8(Language.JAVASCRIPT);
diff --git a/javascript/packages/fory/lib/type.ts 
b/javascript/packages/fory/lib/type.ts
index cf06f9cbe..aafb9c47c 100644
--- a/javascript/packages/fory/lib/type.ts
+++ b/javascript/packages/fory/lib/type.ts
@@ -173,9 +173,8 @@ export enum InternalSerializerType {
 
 export enum ConfigFlags {
   isNullFlag = 1 << 0,
-  isLittleEndianFlag = 2,
-  isCrossLanguageFlag = 4,
-  isOutOfBandFlag = 8,
+  isCrossLanguageFlag = 1 << 1,
+  isOutOfBandFlag = 1 << 2,
 }
 
 // read, write
diff --git a/javascript/test/fory.test.ts b/javascript/test/fory.test.ts
index 86bc93f22..c8c4fb79f 100644
--- a/javascript/test/fory.test.ts
+++ b/javascript/test/fory.test.ts
@@ -28,30 +28,24 @@ describe('fory', () => {
         expect(fory.deserialize(new Uint8Array([1]))).toBe(null)
     });
 
-    test('should deserialize big endian work', () => {
-        const fory = new Fory();
-        try {
-            fory.deserialize(new Uint8Array([0]))
-            throw new Error('unreachable code')
-        } catch (error) {
-            expect(error.message).toBe('big endian is not supported now');
-        }
-    });
-
     test('should deserialize xlang disable work', () => {
         const fory = new Fory();
         try {
-            fory.deserialize(new Uint8Array([2]))
+            // bit 0 = null flag, bit 1 = xlang flag, bit 2 = oob flag
+            // value 0 means xlang is disabled
+            fory.deserialize(new Uint8Array([0]))
             throw new Error('unreachable code')
         } catch (error) {
             expect(error.message).toBe('support crosslanguage mode only');
         }
     });
 
-    test('should deserialize xlang disable work', () => {
+    test('should deserialize oob mode work', () => {
         const fory = new Fory();
         try {
-            fory.deserialize(new Uint8Array([14]))
+            // bit 0 = null flag, bit 1 = xlang flag, bit 2 = oob flag
+            // value 6 = xlang (2) + oob (4) = 6
+            fory.deserialize(new Uint8Array([6]))
             throw new Error('unreachable code')
         } catch (error) {
             expect(error.message).toBe('outofband mode is not supported now');
diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py
index b11028e82..6b091ce81 100644
--- a/python/pyfory/_fory.py
+++ b/python/pyfory/_fory.py
@@ -28,7 +28,7 @@ from pyfory.resolver import (
     NULL_FLAG,
     NOT_NULL_VALUE_FLAG,
 )
-from pyfory.utils import is_little_endian, set_bit, get_bit, clear_bit
+from pyfory.utils import set_bit, get_bit, clear_bit
 from pyfory.types import TypeId
 from pyfory.policy import DeserializationPolicy, DEFAULT_POLICY
 
@@ -460,24 +460,19 @@ class Fory:
             set_bit(buffer, mask_index, 0)
         else:
             clear_bit(buffer, mask_index, 0)
-        # set endian
-        if is_little_endian:
-            set_bit(buffer, mask_index, 1)
-        else:
-            clear_bit(buffer, mask_index, 1)
 
         if self.language == Language.XLANG:
             # set reader as x_lang.
-            set_bit(buffer, mask_index, 2)
+            set_bit(buffer, mask_index, 1)
             # set writer language.
             buffer.write_int8(Language.PYTHON.value)
         else:
             # set reader as native.
-            clear_bit(buffer, mask_index, 2)
+            clear_bit(buffer, mask_index, 1)
         if self._buffer_callback is not None:
-            set_bit(buffer, mask_index, 3)
+            set_bit(buffer, mask_index, 2)
         else:
-            clear_bit(buffer, mask_index, 3)
+            clear_bit(buffer, mask_index, 2)
         # Reserve space for type definitions offset, similar to Java 
implementation
         type_defs_offset_pos = None
         if self.serialization_context.scoped_meta_share_enabled:
@@ -611,14 +606,12 @@ class Fory:
         buffer.reader_index = reader_index + 1
         if get_bit(buffer, reader_index, 0):
             return None
-        is_little_endian_ = get_bit(buffer, reader_index, 1)
-        assert is_little_endian_, "Big endian is not supported for now, please 
ensure peer machine is little endian."
-        is_target_x_lang = get_bit(buffer, reader_index, 2)
+        is_target_x_lang = get_bit(buffer, reader_index, 1)
         if is_target_x_lang:
             self._peer_language = Language(buffer.read_int8())
         else:
             self._peer_language = Language.PYTHON
-        is_out_of_band_serialization_enabled = get_bit(buffer, reader_index, 3)
+        is_out_of_band_serialization_enabled = get_bit(buffer, reader_index, 2)
         if is_out_of_band_serialization_enabled:
             assert buffers is not None, "buffers shouldn't be null when the 
serialized stream is produced with buffer_callback not null."
             self._buffers = iter(buffers)
diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx
index b4e97a15d..33f2bd619 100644
--- a/python/pyfory/buffer.pyx
+++ b/python/pyfory/buffer.pyx
@@ -822,6 +822,10 @@ cdef inline uint8_t* get_address(v):
         return <uint8_t*>(PyBytes_AsString(v))
     view = memoryview(v)
     cdef str dtype = view.format
+    # Handle little-endian format codes (e.g., "<h", "<i", etc.)
+    # Strip the endian prefix since we only care about the type
+    if len(dtype) > 1 and dtype[0] in ('<', '>', '=', '@', '!'):
+        dtype = dtype[1:]
     cdef:
         const char[:] signed_char_data
         const unsigned char[:] unsigned_data
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index f73c809f3..3d3adb7e7 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -35,7 +35,6 @@ from pyfory.lib import mmh3
 from pyfory.meta.metastring import Encoding
 from pyfory.types import is_primitive_type
 from pyfory.policy import DeserializationPolicy, DEFAULT_POLICY
-from pyfory.utils import is_little_endian
 from pyfory.includes.libserialization cimport \
     (TypeId, IsNamespacedType, IsTypeShareMeta, 
Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer)
 
@@ -1195,24 +1194,19 @@ cdef class Fory:
             set_bit(buffer, mask_index, 0)
         else:
             clear_bit(buffer, mask_index, 0)
-        # set endian
-        if is_little_endian:
-            set_bit(buffer, mask_index, 1)
-        else:
-            clear_bit(buffer, mask_index, 1)
 
         if self.language == Language.XLANG:
             # set reader as x_lang.
-            set_bit(buffer, mask_index, 2)
+            set_bit(buffer, mask_index, 1)
             # set writer language.
             buffer.write_int8(Language.PYTHON.value)
         else:
             # set reader as native.
-            clear_bit(buffer, mask_index, 2)
+            clear_bit(buffer, mask_index, 1)
         if self.buffer_callback is not None:
-            set_bit(buffer, mask_index, 3)
+            set_bit(buffer, mask_index, 2)
         else:
-            clear_bit(buffer, mask_index, 3)
+            clear_bit(buffer, mask_index, 2)
         # Reserve space for type definitions offset, similar to Java 
implementation
         cdef int32_t type_defs_offset_pos = -1
         if self.serialization_context.scoped_meta_share_enabled:
@@ -1356,18 +1350,13 @@ cdef class Fory:
         buffer.reader_index = reader_index + 1
         if get_bit(buffer, reader_index, 0):
             return None
-        cdef c_bool is_little_endian_ = get_bit(buffer, reader_index, 1)
-        assert is_little_endian_, (
-            "Big endian is not supported for now, "
-            "please ensure peer machine is little endian."
-        )
-        cdef c_bool is_target_x_lang = get_bit(buffer, reader_index, 2)
+        cdef c_bool is_target_x_lang = get_bit(buffer, reader_index, 1)
         if is_target_x_lang:
             self._peer_language = Language(buffer.read_int8())
         else:
             self._peer_language = Language.PYTHON
         cdef c_bool is_out_of_band_serialization_enabled = \
-            get_bit(buffer, reader_index, 3)
+            get_bit(buffer, reader_index, 2)
         if is_out_of_band_serialization_enabled:
             assert buffers is not None, (
                 "buffers shouldn't be null when the serialized stream is "
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index d68c5736f..4142fb67d 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -131,6 +131,7 @@ from pyfory.types import (
     Float64NDArrayType,
     TypeId,
 )
+from pyfory.utils import is_little_endian
 
 
 class NoneSerializer(Serializer):
@@ -290,25 +291,43 @@ class PyArraySerializer(XlangCompatibleSerializer):
         assert view.c_contiguous  # TODO handle contiguous
         nbytes = len(value) * self.itemsize
         buffer.write_varuint32(nbytes)
-        buffer.write_buffer(value)
+        if is_little_endian or self.itemsize == 1:
+            buffer.write_buffer(value)
+        else:
+            # Swap bytes on big-endian machines for multi-byte types
+            swapped = array.array(self.typecode, value)
+            swapped.byteswap()
+            buffer.write_buffer(swapped)
 
     def xread(self, buffer):
         data = buffer.read_bytes_and_size()
         arr = array.array(self.typecode, [])
         arr.frombytes(data)
+        if not is_little_endian and self.itemsize > 1:
+            # Swap bytes on big-endian machines for multi-byte types
+            arr.byteswap()
         return arr
 
     def write(self, buffer, value: array.array):
         nbytes = len(value) * value.itemsize
         buffer.write_string(value.typecode)
         buffer.write_varuint32(nbytes)
-        buffer.write_buffer(value)
+        if is_little_endian or value.itemsize == 1:
+            buffer.write_buffer(value)
+        else:
+            # Swap bytes on big-endian machines for multi-byte types
+            swapped = array.array(value.typecode, value)
+            swapped.byteswap()
+            buffer.write_buffer(swapped)
 
     def read(self, buffer):
         typecode = buffer.read_string()
         data = buffer.read_bytes_and_size()
         arr = array.array(typecode[0], [])  # Take first character
         arr.frombytes(data)
+        if not is_little_endian and arr.itemsize > 1:
+            # Swap bytes on big-endian machines for multi-byte types
+            arr.byteswap()
         return arr
 
 
@@ -326,16 +345,30 @@ class DynamicPyArraySerializer(Serializer):
         buffer.write_varuint32(type_id)
         buffer.write_varuint32(nbytes)
         if not view.c_contiguous:
-            buffer.write_bytes(value.tobytes())
-        else:
+            data = value.tobytes()
+            if not is_little_endian and itemsize > 1:
+                swapped = array.array(value.typecode, [])
+                swapped.frombytes(data)
+                swapped.byteswap()
+                data = swapped.tobytes()
+            buffer.write_bytes(data)
+        elif is_little_endian or itemsize == 1:
             buffer.write_buffer(value)
+        else:
+            # Swap bytes on big-endian machines for multi-byte types
+            swapped = array.array(value.typecode, value)
+            swapped.byteswap()
+            buffer.write_buffer(swapped)
 
     def xread(self, buffer):
         type_id = buffer.read_varint32()
         typecode = typeid_code[type_id]
+        itemsize = typecode_dict[typecode][0]
         data = buffer.read_bytes_and_size()
         arr = array.array(typecode, [])
         arr.frombytes(data)
+        if not is_little_endian and itemsize > 1:
+            arr.byteswap()
         return arr
 
     def write(self, buffer, value):
@@ -390,13 +423,24 @@ class Numpy1DArraySerializer(Serializer):
         nbytes = len(value) * self.itemsize
         buffer.write_varuint32(nbytes)
         if self.dtype == np.dtype("bool") or not view.c_contiguous:
-            buffer.write_bytes(value.tobytes())
-        else:
+            if not is_little_endian and self.itemsize > 1:
+                # Swap bytes on big-endian machines for multi-byte types
+                
buffer.write_bytes(value.astype(value.dtype.newbyteorder("<")).tobytes())
+            else:
+                buffer.write_bytes(value.tobytes())
+        elif is_little_endian or self.itemsize == 1:
             buffer.write_buffer(value)
+        else:
+            # Swap bytes on big-endian machines for multi-byte types
+            
buffer.write_bytes(value.astype(value.dtype.newbyteorder("<")).tobytes())
 
     def xread(self, buffer):
         data = buffer.read_bytes_and_size()
-        return np.frombuffer(data, dtype=self.dtype)
+        arr = np.frombuffer(data, dtype=self.dtype.newbyteorder("<"))
+        if not is_little_endian and self.itemsize > 1:
+            # Convert from little-endian to native byte order
+            arr = arr.astype(self.dtype)
+        return arr
 
     def write(self, buffer, value):
         self._serializer.write(buffer, value)
diff --git a/rust/fory-core/src/fory.rs b/rust/fory-core/src/fory.rs
index 59d37ca30..2f1318309 100644
--- a/rust/fory-core/src/fory.rs
+++ b/rust/fory-core/src/fory.rs
@@ -23,11 +23,8 @@ use crate::resolver::context::{ContextCache, ReadContext, 
WriteContext};
 use crate::resolver::type_resolver::TypeResolver;
 use crate::serializer::ForyDefault;
 use crate::serializer::{Serializer, StructSerializer};
-use crate::types::config_flags::IS_NULL_FLAG;
-use crate::types::{
-    config_flags::{IS_CROSS_LANGUAGE_FLAG, IS_LITTLE_ENDIAN_FLAG},
-    Language, RefMode, SIZE_OF_REF_AND_TYPE,
-};
+use crate::types::config_flags::{IS_CROSS_LANGUAGE_FLAG, IS_NULL_FLAG};
+use crate::types::{Language, RefMode, SIZE_OF_REF_AND_TYPE};
 use std::cell::UnsafeCell;
 use std::mem;
 use std::sync::atomic::{AtomicU64, Ordering};
@@ -833,10 +830,7 @@ impl Fory {
     pub fn write_head<T: Serializer>(&self, is_none: bool, writer: &mut 
Writer) {
         const HEAD_SIZE: usize = 10;
         writer.reserve(T::fory_reserved_space() + SIZE_OF_REF_AND_TYPE + 
HEAD_SIZE);
-        #[cfg(target_endian = "big")]
-        let mut bitmap = 0;
-        #[cfg(target_endian = "little")]
-        let mut bitmap = IS_LITTLE_ENDIAN_FLAG;
+        let mut bitmap: u8 = 0;
         if self.config.xlang {
             bitmap |= IS_CROSS_LANGUAGE_FLAG;
         }
@@ -1034,13 +1028,6 @@ impl Fory {
             self.config.xlang == peer_is_xlang,
             Error::invalid_data("header bitmap mismatch at xlang bit")
         );
-        let is_little_endian = (bitmap & IS_LITTLE_ENDIAN_FLAG) != 0;
-        ensure!(
-            is_little_endian,
-            Error::invalid_data(
-                "Big endian is not supported for now, please ensure peer 
machine is little endian."
-            )
-        );
         let is_none = (bitmap & IS_NULL_FLAG) != 0;
         if is_none {
             return Ok(true);
diff --git a/rust/fory-core/src/serializer/primitive_list.rs 
b/rust/fory-core/src/serializer/primitive_list.rs
index 3d63a87f5..824f6a660 100644
--- a/rust/fory-core/src/serializer/primitive_list.rs
+++ b/rust/fory-core/src/serializer/primitive_list.rs
@@ -53,10 +53,21 @@ pub fn fory_write_data<T: Serializer>(this: &[T], context: 
&mut WriteContext) ->
     context.writer.write_varuint32(len_bytes as u32);
 
     if !this.is_empty() {
-        unsafe {
-            let ptr = this.as_ptr() as *const u8;
-            let slice = std::slice::from_raw_parts(ptr, len_bytes);
-            context.writer.write_bytes(slice);
+        #[cfg(target_endian = "little")]
+        {
+            // Fast path: direct memory copy on little-endian machines
+            unsafe {
+                let ptr = this.as_ptr() as *const u8;
+                let slice = std::slice::from_raw_parts(ptr, len_bytes);
+                context.writer.write_bytes(slice);
+            }
+        }
+        #[cfg(target_endian = "big")]
+        {
+            // Slow path: element-by-element write on big-endian machines
+            for item in this {
+                item.write(context)?;
+            }
         }
     }
     Ok(())
@@ -67,18 +78,30 @@ pub fn fory_write_type_info(context: &mut WriteContext, 
type_id: TypeId) -> Resu
     Ok(())
 }
 
-pub fn fory_read_data<T>(context: &mut ReadContext) -> Result<Vec<T>, Error> {
+pub fn fory_read_data<T: Serializer>(context: &mut ReadContext) -> 
Result<Vec<T>, Error> {
     let size_bytes = context.reader.read_varuint32()? as usize;
     if size_bytes % std::mem::size_of::<T>() != 0 {
         return Err(Error::invalid_data("Invalid data length"));
     }
     let len = size_bytes / std::mem::size_of::<T>();
     let mut vec: Vec<T> = Vec::with_capacity(len);
-    unsafe {
-        let dst_ptr = vec.as_mut_ptr() as *mut u8;
-        let src = context.reader.read_bytes(size_bytes)?;
-        std::ptr::copy_nonoverlapping(src.as_ptr(), dst_ptr, size_bytes);
-        vec.set_len(len);
+
+    #[cfg(target_endian = "little")]
+    {
+        // Fast path: direct memory copy on little-endian machines
+        unsafe {
+            let dst_ptr = vec.as_mut_ptr() as *mut u8;
+            let src = context.reader.read_bytes(size_bytes)?;
+            std::ptr::copy_nonoverlapping(src.as_ptr(), dst_ptr, size_bytes);
+            vec.set_len(len);
+        }
+    }
+    #[cfg(target_endian = "big")]
+    {
+        // Slow path: element-by-element read on big-endian machines
+        for _ in 0..len {
+            vec.push(T::read(context)?);
+        }
     }
     Ok(vec)
 }
diff --git a/rust/fory-core/src/types.rs b/rust/fory-core/src/types.rs
index de54603ac..94d13fa22 100644
--- a/rust/fory-core/src/types.rs
+++ b/rust/fory-core/src/types.rs
@@ -474,9 +474,8 @@ pub fn compute_field_hash(hash: u32, id: i16) -> u32 {
 
 pub mod config_flags {
     pub const IS_NULL_FLAG: u8 = 1 << 0;
-    pub const IS_LITTLE_ENDIAN_FLAG: u8 = 2;
-    pub const IS_CROSS_LANGUAGE_FLAG: u8 = 4;
-    pub const IS_OUT_OF_BAND_FLAG: u8 = 8;
+    pub const IS_CROSS_LANGUAGE_FLAG: u8 = 1 << 1;
+    pub const IS_OUT_OF_BAND_FLAG: u8 = 1 << 2;
 }
 
 #[derive(Debug, PartialEq)]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to