This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 668bb32c9 feat(go): update codegen field sorting to generate smaller
and faster code (#2779)
668bb32c9 is described below
commit 668bb32c9c393394555a733f43c3156fd10d651e
Author: thisingl <[email protected]>
AuthorDate: Mon Oct 20 23:42:14 2025 +0800
feat(go): update codegen field sorting to generate smaller and faster code
(#2779)
<!--
**Thanks for contributing to Apache Fory™.**
**If this is your first time opening a PR on fory, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fory™** community has requirements on the naming of pr
titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
- Apache Fory™ has a strong focus on performance. If the PR you submit
will have an impact on performance, please benchmark it first and
provide the benchmark result here.
-->
## Why?
The previous field ordering algorithm mixed different field types
(strings, lists, maps, etc.) into broad groups that were difficult to
maintain and reason about at compile time. The new specification
provides a clearer categorization that:
1. Improves compile-time predictability: Separates fields into distinct
groups (primitives, other internal types, lists, sets, maps, others)
with clear sorting rules for each group
2. Enhances cross-language compatibility: Aligns with the updated xlang
serialization specification for consistent behavior across all Fory
language implementations
3. Simplifies code generation: Each field group has its own simple
sorting rule, making the generated code more maintainable and easier to
verify
## What does this PR do?
Modified `fory/codegen/utils.go`:
* Rewrote sortFields() function to implement the new 6-group
categorization
* Added getFieldGroup() function to categorize fields into their
respective sorting groups
* Added field group constants (groupPrimitive, groupOtherInternalType,
groupList, groupSet, groupMap, groupOther)
* Each group now has its own clear sorting logic as specified in the
xlang serialization spec
## Related issues
More details see in pr https://github.com/apache/fory/pull/2749
<!--
Is there any related issue? If this PR closes them you say say
fix/closes:
- #xxxx0
- #xxxx1
- Fixes #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fory/issues/new/choose) describing the
need to do so and update the document if necessary.
Delete section if not applicable.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
Delete section if not applicable.
-->
Co-authored-by: Shawn Yang <[email protected]>
Co-authored-by: Pan Li <[email protected]>
---
ci/run_ci.sh | 10 ++---
go/fory/codegen/utils.go | 115 +++++++++++++++++++++++++++++++++++++++--------
2 files changed, 102 insertions(+), 23 deletions(-)
diff --git a/ci/run_ci.sh b/ci/run_ci.sh
index 5a3e01ed1..f27835341 100755
--- a/ci/run_ci.sh
+++ b/ci/run_ci.sh
@@ -372,11 +372,11 @@ case $1 in
;;
go)
echo "Executing fory go tests for go"
-# cd "$ROOT/go/fory"
-# go install ./cmd/fory
-# cd "$ROOT/go/fory/tests"
-# go generate
-# go test -v
+ cd "$ROOT/go/fory"
+ go install ./cmd/fory
+ cd "$ROOT/go/fory/tests"
+ go generate
+ go test -v
cd "$ROOT/go/fory"
go test -v
echo "Executing fory go tests succeeds"
diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go
index 18c5de055..06915952d 100644
--- a/go/fory/codegen/utils.go
+++ b/go/fory/codegen/utils.go
@@ -273,22 +273,28 @@ func getTypeIDValue(typeID string) int {
}
// sortFields sorts fields according to Fory protocol specification
-// This matches the reflection-based sorting exactly for cross-language
compatibility
+// This matches the new field ordering specification for cross-language
compatibility
func sortFields(fields []*FieldInfo) {
sort.Slice(fields, func(i, j int) bool {
f1, f2 := fields[i], fields[j]
- // Group primitives first (matching reflection's boxed group)
- if f1.IsPrimitive && !f2.IsPrimitive {
- return true
- }
- if !f1.IsPrimitive && f2.IsPrimitive {
- return false
+ // Categorize fields into groups
+ group1 := getFieldGroup(f1)
+ group2 := getFieldGroup(f2)
+
+ // Sort by group first
+ if group1 != group2 {
+ return group1 < group2
}
- if f1.IsPrimitive && f2.IsPrimitive {
- // Match reflection's boxed sorting logic exactly
- // First: handle compression types
(INT32/INT64/VAR_INT32/VAR_INT64)
+ // Within same group, apply group-specific sorting
+ switch group1 {
+ case groupPrimitive:
+ // Primitive fields: larger size first, smaller later,
variable size last
+ // When same size, sort by type id
+ // When same size and type id, sort by snake case field
name
+
+ // Handle compression types
(INT32/INT64/VAR_INT32/VAR_INT64)
compressI := f1.TypeID == "INT32" || f1.TypeID ==
"INT64" ||
f1.TypeID == "VAR_INT32" || f1.TypeID ==
"VAR_INT64"
compressJ := f2.TypeID == "INT32" || f2.TypeID ==
"INT64" ||
@@ -298,24 +304,97 @@ func sortFields(fields []*FieldInfo) {
return !compressI && compressJ // non-compress
comes first
}
- // Then: by size (descending)
+ // Sort by size (descending)
if f1.PrimitiveSize != f2.PrimitiveSize {
return f1.PrimitiveSize > f2.PrimitiveSize
}
- // Finally: by name (ascending)
+ // Sort by type ID
+ if f1.TypeID != f2.TypeID {
+ return getTypeIDValue(f1.TypeID) <
getTypeIDValue(f2.TypeID)
+ }
+
+ // Finally by name
return f1.SnakeName < f2.SnakeName
- }
- // For non-primitives: STRING comes in final group, others in
others group
- // All sorted by type ID, then by name (matching reflection)
- if f1.TypeID != f2.TypeID {
- return getTypeIDValue(f1.TypeID) <
getTypeIDValue(f2.TypeID)
+ case groupOtherInternalType:
+ // Other internal type fields: sort by type id then
snake case field name
+ if f1.TypeID != f2.TypeID {
+ return getTypeIDValue(f1.TypeID) <
getTypeIDValue(f2.TypeID)
+ }
+ return f1.SnakeName < f2.SnakeName
+
+ case groupList, groupSet, groupMap, groupOther:
+ // List/Set/Map/Other fields: sort by snake case field
name only
+ return f1.SnakeName < f2.SnakeName
+
+ default:
+ // Fallback: sort by name
+ return f1.SnakeName < f2.SnakeName
}
- return f1.SnakeName < f2.SnakeName
})
}
+// Field group constants for sorting
+const (
+ groupPrimitive = 0 // primitive and nullable primitive fields
+ groupOtherInternalType = 1 // other internal type fields (string,
timestamp, etc.)
+ groupList = 2 // list fields
+ groupSet = 3 // set fields
+ groupMap = 4 // map fields
+ groupOther = 5 // other fields
+)
+
+// getFieldGroup categorizes a field into its sorting group
+func getFieldGroup(field *FieldInfo) int {
+ typeID := field.TypeID
+
+ // Primitive fields (including nullable primitives)
+ // types:
bool/int8/int16/int32/varint32/int64/varint64/sliint64/float16/float32/float64
+ if field.IsPrimitive {
+ return groupPrimitive
+ }
+
+ // List fields
+ if typeID == "LIST" {
+ return groupList
+ }
+
+ // Set fields
+ if typeID == "SET" {
+ return groupSet
+ }
+
+ // Map fields
+ if typeID == "MAP" {
+ return groupMap
+ }
+
+ // Other internal type fields
+ // These are fory internal types that are not primitives/lists/sets/maps
+ // Examples: STRING, TIMESTAMP, LOCAL_DATE, NAMED_STRUCT, etc.
+ internalTypes := map[string]bool{
+ "STRING": true,
+ "TIMESTAMP": true,
+ "LOCAL_DATE": true,
+ "NAMED_STRUCT": true,
+ "STRUCT": true,
+ "BINARY": true,
+ "ENUM": true,
+ "NAMED_ENUM": true,
+ "EXT": true,
+ "NAMED_EXT": true,
+ "INTERFACE": true, // for interface{} types
+ }
+
+ if internalTypes[typeID] {
+ return groupOtherInternalType
+ }
+
+ // Everything else goes to "other fields"
+ return groupOther
+}
+
// computeStructHash computes a hash for struct schema compatibility
// This implementation aligns with the reflection-based hash calculation
func computeStructHash(s *StructInfo) int32 {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]