This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new a0723d07e feat(go): implement new field ordering and type hash
algorithm (#2868)
a0723d07e is described below
commit a0723d07e4cfab8f8bc89d9912660acb22d7dd5f
Author: thisingl <[email protected]>
AuthorDate: Sat Nov 1 21:33:33 2025 +0800
feat(go): implement new field ordering and type hash algorithm (#2868)
<!--
**Thanks for contributing to Apache Fory™.**
**If this is your first time opening a PR on fory, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fory™** community has requirements on the naming of pr
titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
- Apache Fory™ has a strong focus on performance. If the PR you submit
will have an impact on performance, please benchmark it first and
provide the benchmark result here.
-->
## Why?
The xlang serialization specification has been updated with significant
improvements for compile-time code generation and cross-language
compatibility. Key motivations:
1. Clearer field categorization: The new 6-group field ordering provides
better structure and makes compile-time implementation more
straightforward
2. Consistent hashing: New murmurhash3-based algorithm ensures
deterministic type hash calculation across all language implementations
3. Simplified serialization logic: Each field type group now has
explicit serialization rules, reducing ambiguity
<!-- Describe the purpose of this PR. -->
## What does this PR do?
Updates the Go codegen implementation to align with the xlang
serialization specification , ensuring full compatibility with
reflection-based serialization and other language implementations.
<!-- Describe the details of this PR. -->
## Related issues
More details can see in pr (#2793)
<!--
Is there any related issue? If this PR closes them you say say
fix/closes:
- #xxxx0
- #xxxx1
- Fixes #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fory/issues/new/choose) describing the
need to do so and update the document if necessary.
Delete section if not applicable.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
Delete section if not applicable.
-->
---
ci/run_ci.sh | 10 +--
go/fory/codegen/decoder.go | 46 +++++---------
go/fory/codegen/encoder.go | 40 ++++--------
go/fory/codegen/utils.go | 148 +++++++++++++++++++++++++++++++--------------
4 files changed, 136 insertions(+), 108 deletions(-)
diff --git a/ci/run_ci.sh b/ci/run_ci.sh
index d20359653..f27835341 100755
--- a/ci/run_ci.sh
+++ b/ci/run_ci.sh
@@ -372,11 +372,11 @@ case $1 in
;;
go)
echo "Executing fory go tests for go"
- # cd "$ROOT/go/fory"
- # go install ./cmd/fory
- # cd "$ROOT/go/fory/tests"
- # go generate
- # go test -v
+ cd "$ROOT/go/fory"
+ go install ./cmd/fory
+ cd "$ROOT/go/fory/tests"
+ go generate
+ go test -v
cd "$ROOT/go/fory"
go test -v
echo "Executing fory go tests succeeds"
diff --git a/go/fory/codegen/decoder.go b/go/fory/codegen/decoder.go
index aea956e3d..f3ad94c69 100644
--- a/go/fory/codegen/decoder.go
+++ b/go/fory/codegen/decoder.go
@@ -80,23 +80,14 @@ func generateFieldReadTyped(buf *bytes.Buffer, field
*FieldInfo) error {
fieldAccess := fmt.Sprintf("v.%s", field.GoName)
// Handle special named types first
+ // According to new spec, time types are "other internal types" and use
ReadReferencable
if named, ok := field.Type.(*types.Named); ok {
typeStr := named.String()
switch typeStr {
- case "time.Time":
- fmt.Fprintf(buf, "\tusec := buf.ReadInt64()\n")
- fmt.Fprintf(buf, "\t%s =
fory.CreateTimeFromUnixMicro(usec)\n", fieldAccess)
- return nil
- case "github.com/apache/fory/go/fory.Date":
- fmt.Fprintf(buf, "\tdays := buf.ReadInt32()\n")
- fmt.Fprintf(buf, "\t// Handle zero date marker\n")
- fmt.Fprintf(buf, "\tif days == int32(-2147483648) {\n")
- fmt.Fprintf(buf, "\t\t%s = fory.Date{Year: 0, Month: 0,
Day: 0}\n", fieldAccess)
- fmt.Fprintf(buf, "\t} else {\n")
- fmt.Fprintf(buf, "\t\tdiff := time.Duration(days) * 24
* time.Hour\n")
- fmt.Fprintf(buf, "\t\tt := time.Date(1970, 1, 1, 0, 0,
0, 0, time.Local).Add(diff)\n")
- fmt.Fprintf(buf, "\t\t%s = fory.Date{Year: t.Year(),
Month: t.Month(), Day: t.Day()}\n", fieldAccess)
- fmt.Fprintf(buf, "\t}\n")
+ case "time.Time", "github.com/apache/fory/go/fory.Date":
+ // These types are "other internal types" in the new
spec
+ // They use: | null flag | value data | format
+ fmt.Fprintf(buf, "\tf.ReadReferencable(buf,
reflect.ValueOf(&%s).Elem())\n", fieldAccess)
return nil
}
}
@@ -109,26 +100,18 @@ func generateFieldReadTyped(buf *bytes.Buffer, field
*FieldInfo) error {
}
// Handle basic types
+ // Note: primitive serializers read values directly without
NotNullValueFlag check
if basic, ok := field.Type.Underlying().(*types.Basic); ok {
switch basic.Kind() {
case types.Bool:
- fmt.Fprintf(buf, "\tif flag := buf.ReadInt8(); flag !=
-1 {\n")
- fmt.Fprintf(buf, "\t\treturn fmt.Errorf(\"expected
NotNullValueFlag for field %s, got %%d\", flag)\n", field.GoName)
- fmt.Fprintf(buf, "\t}\n")
fmt.Fprintf(buf, "\t%s = buf.ReadBool()\n", fieldAccess)
case types.Int8:
- fmt.Fprintf(buf, "\t%s = buf.ReadInt8()\n", fieldAccess)
+ fmt.Fprintf(buf, "\t%s = int8(buf.ReadByte_())\n",
fieldAccess)
case types.Int16:
fmt.Fprintf(buf, "\t%s = buf.ReadInt16()\n",
fieldAccess)
case types.Int32:
- fmt.Fprintf(buf, "\tif flag := buf.ReadInt8(); flag !=
-1 {\n")
- fmt.Fprintf(buf, "\t\treturn fmt.Errorf(\"expected
NotNullValueFlag for field %s, got %%d\", flag)\n", field.GoName)
- fmt.Fprintf(buf, "\t}\n")
fmt.Fprintf(buf, "\t%s = buf.ReadVarint32()\n",
fieldAccess)
case types.Int, types.Int64:
- fmt.Fprintf(buf, "\tif flag := buf.ReadInt8(); flag !=
-1 {\n")
- fmt.Fprintf(buf, "\t\treturn fmt.Errorf(\"expected
NotNullValueFlag for field %s, got %%d\", flag)\n", field.GoName)
- fmt.Fprintf(buf, "\t}\n")
fmt.Fprintf(buf, "\t%s = buf.ReadVarint64()\n",
fieldAccess)
case types.Uint8:
fmt.Fprintf(buf, "\t%s = buf.ReadByte_()\n",
fieldAccess)
@@ -141,13 +124,12 @@ func generateFieldReadTyped(buf *bytes.Buffer, field
*FieldInfo) error {
case types.Float32:
fmt.Fprintf(buf, "\t%s = buf.ReadFloat32()\n",
fieldAccess)
case types.Float64:
- fmt.Fprintf(buf, "\tif flag := buf.ReadInt8(); flag !=
-1 {\n")
- fmt.Fprintf(buf, "\t\treturn fmt.Errorf(\"expected
NotNullValueFlag for field %s, got %%d\", flag)\n", field.GoName)
- fmt.Fprintf(buf, "\t}\n")
fmt.Fprintf(buf, "\t%s = buf.ReadFloat64()\n",
fieldAccess)
case types.String:
+ // String is referencable but NeedWriteRef()=false
+ // In struct deserialization, it reads NotNullValueFlag
then value
fmt.Fprintf(buf, "\tif flag := buf.ReadInt8(); flag !=
-1 {\n")
- fmt.Fprintf(buf, "\t\treturn fmt.Errorf(\"expected
NotNullValueFlag for field %s, got %%d\", flag)\n", field.GoName)
+ fmt.Fprintf(buf, "\t\treturn fmt.Errorf(\"expected
NotNullValueFlag for string field %s, got %%d\", flag)\n", field.GoName)
fmt.Fprintf(buf, "\t}\n")
fmt.Fprintf(buf, "\t%s = fory.ReadString(buf)\n",
fieldAccess)
default:
@@ -634,8 +616,8 @@ func generateMapKeyRead(buf *bytes.Buffer, keyType
types.Type, varName string) e
// intSerializer uses ReadInt64, not ReadVarint64
fmt.Fprintf(buf, "\t\t\t\t\t%s =
int(buf.ReadInt64())\n", varName)
case types.String:
- // stringSerializer is referencable, need to use
ReadReferencable
- fmt.Fprintf(buf, "\t\t\t\t\tf.ReadReferencable(buf,
reflect.ValueOf(&%s).Elem())\n", varName)
+ // stringSerializer.NeedWriteRef() = false, read
directly
+ fmt.Fprintf(buf, "\t\t\t\t\t%s =
fory.ReadString(buf)\n", varName)
default:
return fmt.Errorf("unsupported map key type: %v",
keyType)
}
@@ -656,8 +638,8 @@ func generateMapValueRead(buf *bytes.Buffer, valueType
types.Type, varName strin
// intSerializer uses ReadInt64, not ReadVarint64
fmt.Fprintf(buf, "\t\t\t\t\t%s =
int(buf.ReadInt64())\n", varName)
case types.String:
- // stringSerializer is referencable, need to use
ReadReferencable
- fmt.Fprintf(buf, "\t\t\t\t\tf.ReadReferencable(buf,
reflect.ValueOf(&%s).Elem())\n", varName)
+ // stringSerializer.NeedWriteRef() = false, read
directly
+ fmt.Fprintf(buf, "\t\t\t\t\t%s =
fory.ReadString(buf)\n", varName)
default:
return fmt.Errorf("unsupported map value type: %v",
valueType)
}
diff --git a/go/fory/codegen/encoder.go b/go/fory/codegen/encoder.go
index 87f002439..17b5e86c7 100644
--- a/go/fory/codegen/encoder.go
+++ b/go/fory/codegen/encoder.go
@@ -75,20 +75,14 @@ func generateFieldWriteTyped(buf *bytes.Buffer, field
*FieldInfo) error {
fieldAccess := fmt.Sprintf("v.%s", field.GoName)
// Handle special named types first
+ // According to new spec, time types are "other internal types" and
need WriteReferencable
if named, ok := field.Type.(*types.Named); ok {
typeStr := named.String()
switch typeStr {
- case "time.Time":
- fmt.Fprintf(buf,
"\tbuf.WriteInt64(fory.GetUnixMicro(%s))\n", fieldAccess)
- return nil
- case "github.com/apache/fory/go/fory.Date":
- fmt.Fprintf(buf, "\t// Handle zero date specially\n")
- fmt.Fprintf(buf, "\tif %s.Year == 0 && %s.Month == 0 &&
%s.Day == 0 {\n", fieldAccess, fieldAccess, fieldAccess)
- fmt.Fprintf(buf,
"\t\tbuf.WriteInt32(int32(-2147483648)) // Special marker for zero date\n")
- fmt.Fprintf(buf, "\t} else {\n")
- fmt.Fprintf(buf, "\t\tdiff := time.Date(%s.Year,
%s.Month, %s.Day, 0, 0, 0, 0, time.Local).Sub(time.Date(1970, 1, 1, 0, 0, 0, 0,
time.Local))\n", fieldAccess, fieldAccess, fieldAccess)
- fmt.Fprintf(buf, "\t\tbuf.WriteInt32(int32(diff.Hours()
/ 24))\n")
- fmt.Fprintf(buf, "\t}\n")
+ case "time.Time", "github.com/apache/fory/go/fory.Date":
+ // These types are "other internal types" in the new
spec
+ // They use: | null flag | value data | format
+ fmt.Fprintf(buf, "\tf.WriteReferencable(buf,
reflect.ValueOf(%s))\n", fieldAccess)
return nil
}
}
@@ -101,42 +95,34 @@ func generateFieldWriteTyped(buf *bytes.Buffer, field
*FieldInfo) error {
}
// Handle basic types
+ // Note: primitive serializers write values directly without
NotNullValueFlag
if basic, ok := field.Type.Underlying().(*types.Basic); ok {
switch basic.Kind() {
case types.Bool:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteBool(%s)\n", fieldAccess)
case types.Int8:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
- fmt.Fprintf(buf, "\tbuf.WriteInt8(%s)\n", fieldAccess)
+ fmt.Fprintf(buf, "\tbuf.WriteByte_(byte(%s))\n",
fieldAccess)
case types.Int16:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteInt16(%s)\n", fieldAccess)
case types.Int32:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteVarint32(%s)\n",
fieldAccess)
case types.Int, types.Int64:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteVarint64(%s)\n",
fieldAccess)
case types.Uint8:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteByte_(%s)\n", fieldAccess)
case types.Uint16:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteInt16(int16(%s))\n",
fieldAccess)
case types.Uint32:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteInt32(int32(%s))\n",
fieldAccess)
case types.Uint, types.Uint64:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteInt64(int64(%s))\n",
fieldAccess)
case types.Float32:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteFloat32(%s)\n",
fieldAccess)
case types.Float64:
- fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tbuf.WriteFloat64(%s)\n",
fieldAccess)
case types.String:
+ // String is referencable but NeedWriteRef()=false
+ // In struct serialization, it writes NotNullValueFlag
then value
fmt.Fprintf(buf, "\tbuf.WriteInt8(-1) //
NotNullValueFlag\n")
fmt.Fprintf(buf, "\tfory.WriteString(buf, %s)\n",
fieldAccess)
default:
@@ -461,8 +447,8 @@ func generateMapKeyWrite(buf *bytes.Buffer, keyType
types.Type, varName string)
// intSerializer uses WriteInt64, not WriteVarint64
fmt.Fprintf(buf, "\t\t\t\tbuf.WriteInt64(int64(%s))\n",
varName)
case types.String:
- // stringSerializer is referencable, need to use
WriteReferencable
- fmt.Fprintf(buf, "\t\t\t\tf.WriteReferencable(buf,
reflect.ValueOf(%s))\n", varName)
+ // stringSerializer.NeedWriteRef() = false, write
directly
+ fmt.Fprintf(buf, "\t\t\t\tfory.WriteString(buf, %s)\n",
varName)
default:
return fmt.Errorf("unsupported map key type: %v",
keyType)
}
@@ -483,8 +469,8 @@ func generateMapValueWrite(buf *bytes.Buffer, valueType
types.Type, varName stri
// intSerializer uses WriteInt64, not WriteVarint64
fmt.Fprintf(buf, "\t\t\t\tbuf.WriteInt64(int64(%s))\n",
varName)
case types.String:
- // stringSerializer is referencable, need to use
WriteReferencable
- fmt.Fprintf(buf, "\t\t\t\tf.WriteReferencable(buf,
reflect.ValueOf(%s))\n", varName)
+ // stringSerializer.NeedWriteRef() = false, write
directly
+ fmt.Fprintf(buf, "\t\t\t\tfory.WriteString(buf, %s)\n",
varName)
default:
return fmt.Errorf("unsupported map value type: %v",
valueType)
}
diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go
index 06915952d..f39de85d0 100644
--- a/go/fory/codegen/utils.go
+++ b/go/fory/codegen/utils.go
@@ -21,9 +21,11 @@ import (
"fmt"
"go/types"
"sort"
+ "strings"
"unicode"
"github.com/apache/fory/go/fory"
+ "github.com/spaolacci/murmur3"
)
// FieldInfo contains metadata about a struct field
@@ -396,83 +398,141 @@ func getFieldGroup(field *FieldInfo) int {
}
// computeStructHash computes a hash for struct schema compatibility
-// This implementation aligns with the reflection-based hash calculation
+// This implementation follows the new xlang serialization spec:
+// 1. Sort fields by fields sort algorithm (already done in s.Fields)
+// 2. Build string: snake_case(field_name),$type_id,$nullable;
+// 3. For "other fields", use TypeId::UNKNOWN
+// 4. Convert to UTF8 bytes
+// 5. Compute murmurhash3_x64_128, use first 32 bits
func computeStructHash(s *StructInfo) int32 {
- // Use the same iterative algorithm as reflection
- var hash int32 = 17
+ var hashString strings.Builder
- // Process fields in the same order as reflection
+ // Iterate through sorted fields
for _, field := range s.Fields {
- id := getFieldHashID(field)
-
- // Same algorithm as reflection: hash = hash * 31 + id
- newHash := int64(hash)*31 + int64(id)
-
- // Same overflow handling as reflection
- const MaxInt32 = 2147483647
- for newHash >= MaxInt32 {
- newHash /= 7
+ // Append snake_case field name
+ hashString.WriteString(field.SnakeName)
+ hashString.WriteString(",")
+
+ // Append type_id
+ typeID := getTypeIDForHash(field)
+ hashString.WriteString(fmt.Sprintf("%d", typeID))
+ hashString.WriteString(",")
+
+ // Append nullable (1 if nullable, 0 otherwise)
+ // nullable is determined by field type (matching reflection's
nullable() function)
+ nullable := 0
+ if isNullableType(field.Type) {
+ nullable = 1
}
- hash = int32(newHash)
+ hashString.WriteString(fmt.Sprintf("%d", nullable))
+ hashString.WriteString(";")
}
+ // Convert to UTF8 bytes
+ hashBytes := []byte(hashString.String())
+
+ // Compute murmurhash3_x64_128 with seed 47, and use first 32 bits
+ // This matches the reflection implementation
+ h1, _ := murmur3.Sum128WithSeed(hashBytes, 47)
+ hash := int32(h1 & 0xFFFFFFFF)
+
if hash == 0 {
- // Same panic condition as reflection
panic(fmt.Errorf("hash for type %v is 0", s.Name))
}
return hash
}
-// getFieldHashID computes the field ID for hash calculation, matching
reflection logic exactly
-func getFieldHashID(field *FieldInfo) int32 {
- // Map Go types to Fory TypeIds (exactly matching reflection)
- var tid int16
+// isNullableType checks if a type is nullable (referencable)
+// This matches the reflection implementation's nullable() function
+func isNullableType(t types.Type) bool {
+ // Check pointer, slice, map, interface directly
+ switch t.(type) {
+ case *types.Pointer, *types.Slice, *types.Map, *types.Interface,
*types.Array:
+ return true
+ }
+
+ // Check basic types (String is nullable)
+ if basic, ok := t.Underlying().(*types.Basic); ok {
+ return basic.Kind() == types.String
+ }
+ // For named types (e.g., time.Time, fory.Date), check underlying type
+ // Struct types are not nullable unless they're pointers
+ return false
+}
+
+// getTypeIDForHash returns the TypeId for hash calculation according to new
spec
+// For "other fields" (groupOther), returns UNKNOWN (63)
+func getTypeIDForHash(field *FieldInfo) int16 {
+ // Determine field group
+ group := getFieldGroup(field)
+
+ // For "other fields", use UNKNOWN
+ if group == groupOther {
+ return fory.UNKNOWN
+ }
+
+ // For struct fields declared with concrete slice types,
+ // use typeID = LIST uniformly for hash calculation to align
cross-language behavior
+ // This matches the reflection implementation
+ if field.TypeID == "LIST" {
+ return fory.LIST
+ }
+
+ // Map field TypeID string to Fory TypeId value
switch field.TypeID {
case "BOOL":
- tid = fory.BOOL
+ return fory.BOOL
case "INT8":
- tid = fory.INT8
+ return fory.INT8
case "INT16":
- tid = fory.INT16
+ return fory.INT16
case "INT32":
- tid = fory.INT32
+ return fory.INT32
case "INT64":
- tid = fory.INT64
+ return fory.INT64
case "UINT8":
- tid = fory.UINT8
+ return fory.UINT8
case "UINT16":
- tid = fory.UINT16
+ return fory.UINT16
case "UINT32":
- tid = fory.UINT32
+ return fory.UINT32
case "UINT64":
- tid = fory.UINT64
+ return fory.UINT64
case "FLOAT32":
- tid = fory.FLOAT
+ return fory.FLOAT
case "FLOAT64":
- tid = fory.DOUBLE
+ return fory.DOUBLE
case "STRING":
- tid = fory.STRING
+ return fory.STRING
case "TIMESTAMP":
- tid = fory.TIMESTAMP
+ return fory.TIMESTAMP
case "LOCAL_DATE":
- tid = fory.LOCAL_DATE
+ return fory.LOCAL_DATE
case "NAMED_STRUCT":
- tid = fory.NAMED_STRUCT
- case "LIST":
- tid = fory.LIST
+ return fory.NAMED_STRUCT
+ case "STRUCT":
+ return fory.STRUCT
+ case "SET":
+ return fory.SET
case "MAP":
- tid = fory.MAP
+ return fory.MAP
+ case "BINARY":
+ return fory.BINARY
+ case "ENUM":
+ return fory.ENUM
+ case "NAMED_ENUM":
+ return fory.NAMED_ENUM
+ case "EXT":
+ return fory.EXT
+ case "NAMED_EXT":
+ return fory.NAMED_EXT
+ case "INTERFACE":
+ return fory.UNKNOWN // interface{} treated as UNKNOWN
default:
- tid = 0 // Unknown type
- }
-
- // Same logic as reflection: handle negative TypeIds
- if tid < 0 {
- return -int32(tid)
+ return fory.UNKNOWN
}
- return int32(tid)
}
// getStructNames extracts struct names from StructInfo slice
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]