This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 668bb32c9 feat(go): update codegen field sorting to generate smaller 
and faster code (#2779)
668bb32c9 is described below

commit 668bb32c9c393394555a733f43c3156fd10d651e
Author: thisingl <[email protected]>
AuthorDate: Mon Oct 20 23:42:14 2025 +0800

    feat(go): update codegen field sorting to generate smaller and faster code 
(#2779)
    
    <!--
    **Thanks for contributing to Apache Fory™.**
    
    **If this is your first time opening a PR on fory, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fory™** community has requirements on the naming of pr
    titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
    
    - Apache Fory™ has a strong focus on performance. If the PR you submit
    will have an impact on performance, please benchmark it first and
    provide the benchmark result here.
    -->
    
    ## Why?
    
    The previous field ordering algorithm mixed different field types
    (strings, lists, maps, etc.) into broad groups that were difficult to
    maintain and reason about at compile time. The new specification
    provides a clearer categorization that:
    1. Improves compile-time predictability: Separates fields into distinct
    groups (primitives, other internal types, lists, sets, maps, others)
    with clear sorting rules for each group
    2. Enhances cross-language compatibility: Aligns with the updated xlang
    serialization specification for consistent behavior across all Fory
    language implementations
    3. Simplifies code generation: Each field group has its own simple
    sorting rule, making the generated code more maintainable and easier to
    verify
    
    ## What does this PR do?
    Modified `fory/codegen/utils.go`:
    * Rewrote sortFields() function to implement the new 6-group
    categorization
    * Added getFieldGroup() function to categorize fields into their
    respective sorting groups
    * Added field group constants (groupPrimitive, groupOtherInternalType,
    groupList, groupSet, groupMap, groupOther)
    * Each group now has its own clear sorting logic as specified in the
    xlang serialization spec
    
    ## Related issues
    More details see in pr https://github.com/apache/fory/pull/2749
    <!--
    Is there any related issue? If this PR closes them you say say
    fix/closes:
    
    - #xxxx0
    - #xxxx1
    - Fixes #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fory/issues/new/choose) describing the
    need to do so and update the document if necessary.
    
    Delete section if not applicable.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    
    Delete section if not applicable.
    -->
    
    Co-authored-by: Shawn Yang <[email protected]>
    Co-authored-by: Pan Li <[email protected]>
---
 ci/run_ci.sh             |  10 ++---
 go/fory/codegen/utils.go | 115 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 102 insertions(+), 23 deletions(-)

diff --git a/ci/run_ci.sh b/ci/run_ci.sh
index 5a3e01ed1..f27835341 100755
--- a/ci/run_ci.sh
+++ b/ci/run_ci.sh
@@ -372,11 +372,11 @@ case $1 in
     ;;
     go)
       echo "Executing fory go tests for go"
-#      cd "$ROOT/go/fory"
-#      go install ./cmd/fory
-#      cd "$ROOT/go/fory/tests"
-#      go generate
-#      go test -v
+      cd "$ROOT/go/fory"
+      go install ./cmd/fory
+      cd "$ROOT/go/fory/tests"
+      go generate
+      go test -v
       cd "$ROOT/go/fory"
       go test -v
       echo "Executing fory go tests succeeds"
diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go
index 18c5de055..06915952d 100644
--- a/go/fory/codegen/utils.go
+++ b/go/fory/codegen/utils.go
@@ -273,22 +273,28 @@ func getTypeIDValue(typeID string) int {
 }
 
 // sortFields sorts fields according to Fory protocol specification
-// This matches the reflection-based sorting exactly for cross-language 
compatibility
+// This matches the new field ordering specification for cross-language 
compatibility
 func sortFields(fields []*FieldInfo) {
        sort.Slice(fields, func(i, j int) bool {
                f1, f2 := fields[i], fields[j]
 
-               // Group primitives first (matching reflection's boxed group)
-               if f1.IsPrimitive && !f2.IsPrimitive {
-                       return true
-               }
-               if !f1.IsPrimitive && f2.IsPrimitive {
-                       return false
+               // Categorize fields into groups
+               group1 := getFieldGroup(f1)
+               group2 := getFieldGroup(f2)
+
+               // Sort by group first
+               if group1 != group2 {
+                       return group1 < group2
                }
 
-               if f1.IsPrimitive && f2.IsPrimitive {
-                       // Match reflection's boxed sorting logic exactly
-                       // First: handle compression types 
(INT32/INT64/VAR_INT32/VAR_INT64)
+               // Within same group, apply group-specific sorting
+               switch group1 {
+               case groupPrimitive:
+                       // Primitive fields: larger size first, smaller later, 
variable size last
+                       // When same size, sort by type id
+                       // When same size and type id, sort by snake case field 
name
+
+                       // Handle compression types 
(INT32/INT64/VAR_INT32/VAR_INT64)
                        compressI := f1.TypeID == "INT32" || f1.TypeID == 
"INT64" ||
                                f1.TypeID == "VAR_INT32" || f1.TypeID == 
"VAR_INT64"
                        compressJ := f2.TypeID == "INT32" || f2.TypeID == 
"INT64" ||
@@ -298,24 +304,97 @@ func sortFields(fields []*FieldInfo) {
                                return !compressI && compressJ // non-compress 
comes first
                        }
 
-                       // Then: by size (descending)
+                       // Sort by size (descending)
                        if f1.PrimitiveSize != f2.PrimitiveSize {
                                return f1.PrimitiveSize > f2.PrimitiveSize
                        }
 
-                       // Finally: by name (ascending)
+                       // Sort by type ID
+                       if f1.TypeID != f2.TypeID {
+                               return getTypeIDValue(f1.TypeID) < 
getTypeIDValue(f2.TypeID)
+                       }
+
+                       // Finally by name
                        return f1.SnakeName < f2.SnakeName
-               }
 
-               // For non-primitives: STRING comes in final group, others in 
others group
-               // All sorted by type ID, then by name (matching reflection)
-               if f1.TypeID != f2.TypeID {
-                       return getTypeIDValue(f1.TypeID) < 
getTypeIDValue(f2.TypeID)
+               case groupOtherInternalType:
+                       // Other internal type fields: sort by type id then 
snake case field name
+                       if f1.TypeID != f2.TypeID {
+                               return getTypeIDValue(f1.TypeID) < 
getTypeIDValue(f2.TypeID)
+                       }
+                       return f1.SnakeName < f2.SnakeName
+
+               case groupList, groupSet, groupMap, groupOther:
+                       // List/Set/Map/Other fields: sort by snake case field 
name only
+                       return f1.SnakeName < f2.SnakeName
+
+               default:
+                       // Fallback: sort by name
+                       return f1.SnakeName < f2.SnakeName
                }
-               return f1.SnakeName < f2.SnakeName
        })
 }
 
+// Field group constants for sorting
+const (
+       groupPrimitive         = 0 // primitive and nullable primitive fields
+       groupOtherInternalType = 1 // other internal type fields (string, 
timestamp, etc.)
+       groupList              = 2 // list fields
+       groupSet               = 3 // set fields
+       groupMap               = 4 // map fields
+       groupOther             = 5 // other fields
+)
+
+// getFieldGroup categorizes a field into its sorting group
+func getFieldGroup(field *FieldInfo) int {
+       typeID := field.TypeID
+
+       // Primitive fields (including nullable primitives)
+       // types: 
bool/int8/int16/int32/varint32/int64/varint64/sliint64/float16/float32/float64
+       if field.IsPrimitive {
+               return groupPrimitive
+       }
+
+       // List fields
+       if typeID == "LIST" {
+               return groupList
+       }
+
+       // Set fields
+       if typeID == "SET" {
+               return groupSet
+       }
+
+       // Map fields
+       if typeID == "MAP" {
+               return groupMap
+       }
+
+       // Other internal type fields
+       // These are fory internal types that are not primitives/lists/sets/maps
+       // Examples: STRING, TIMESTAMP, LOCAL_DATE, NAMED_STRUCT, etc.
+       internalTypes := map[string]bool{
+               "STRING":       true,
+               "TIMESTAMP":    true,
+               "LOCAL_DATE":   true,
+               "NAMED_STRUCT": true,
+               "STRUCT":       true,
+               "BINARY":       true,
+               "ENUM":         true,
+               "NAMED_ENUM":   true,
+               "EXT":          true,
+               "NAMED_EXT":    true,
+               "INTERFACE":    true, // for interface{} types
+       }
+
+       if internalTypes[typeID] {
+               return groupOtherInternalType
+       }
+
+       // Everything else goes to "other fields"
+       return groupOther
+}
+
 // computeStructHash computes a hash for struct schema compatibility
 // This implementation aligns with the reflection-based hash calculation
 func computeStructHash(s *StructInfo) int32 {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to