This is an automated email from the ASF dual-hosted git repository.
mgrund pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark-connect-go.git
The following commit(s) were added to refs/heads/master by this push:
new 20be766 [FEAT] Converting a Row to a JSON string
20be766 is described below
commit 20be766d7549dc9db423c1a5ba8ba1d4324ed383
Author: Martin Grund <[email protected]>
AuthorDate: Wed Aug 20 10:24:17 2025 +0200
[FEAT] Converting a Row to a JSON string
### What changes were proposed in this pull request?
This PR adds JSON serialization capability to the Row interface by
implementing a `ToJsonString()` method. The
implementation includes:
- Added `ToJsonString()` method to the Row interface that converts Row
data to a JSON string representation
- Implemented comprehensive `convertToJsonValue()` function that handles
all Spark data types including:
- Basic types (bool, string, integers, floats)
- Binary data (base64 encoded)
- Decimal types (decimal128/256 as string representations)
- Arrow timestamp and date types (RFC3339 and ISO date formats)
- Arrays and nested structures with recursive conversion
- Maps with string keys (validates non-string keys return errors)
- Custom types using reflection fallback for underlying basic types
- Added extensive test suite with 238 lines covering all supported data
types and error scenarios
### Why are the changes needed?
This feature enables users to easily serialize Row data to JSON format,
which is essential for:
- Data export and interoperability with other systems
- Debugging and logging Row contents in human-readable format
- Integration with REST APIs and web services that expect JSON
- Data analysis workflows that require JSON output
Currently, there's no built-in way to convert Row data to JSON, forcing
users to manually iterate through fields
and handle type conversions.
### Does this PR introduce _any_ user-facing change?
Yes. This PR adds a new public method `ToJsonString() (string, error)` to
the Row interface. Users can now call
this method on any Row instance to get a JSON string representation:
```go
row := // ... get a Row from DataFrame operations
jsonStr, err := row.ToJsonString()
if err != nil {
// handle conversion error
}
// jsonStr contains: {"field1": "value1", "field2": 42, ...}
```
### How was this patch tested?
Added comprehensive unit tests in spark/sql/types/row_json_test.go
covering:
- Basic data types: strings, integers, floats, booleans, nil values
- Binary data: byte arrays converted to base64 encoding
- Decimal types: decimal128 and decimal256 number representations
- Temporal types: Arrow timestamps, Date32, Date64, and Go time.Time
- Collection types: arrays with recursive element conversion
- Map types: both map[string]any and map[any]any with string key
validation
- Nested structures: complex combinations of arrays and maps
- Error cases: invalid map keys and conversion failures
- JSON validity: all outputs are verified to be valid JSON through
round-trip parsing
Tests ensure both successful conversions produce expected JSON and error
cases properly return meaningful error
messages.
Closes #162 from grundprinzip/json_value.
Authored-by: Martin Grund <[email protected]>
Signed-off-by: Martin Grund <[email protected]>
---
spark/sql/types/row.go | 165 +++++++++++++++++++++++++++
spark/sql/types/row_json_test.go | 238 +++++++++++++++++++++++++++++++++++++++
2 files changed, 403 insertions(+)
diff --git a/spark/sql/types/row.go b/spark/sql/types/row.go
index cdcdeea..aacd4e5 100644
--- a/spark/sql/types/row.go
+++ b/spark/sql/types/row.go
@@ -17,7 +17,16 @@
package types
import (
+ "encoding/base64"
+ "encoding/json"
+ "fmt"
"maps"
+ "reflect"
+ "time"
+
+ "github.com/apache/arrow-go/v18/arrow"
+ "github.com/apache/arrow-go/v18/arrow/decimal128"
+ "github.com/apache/arrow-go/v18/arrow/decimal256"
)
type Row interface {
@@ -32,6 +41,9 @@ type Row interface {
// Len returns the number of fields within a [Row].
Len() int
FieldNames() []string
+ // ToJsonString converts the Row to a JSON string representation.
+ // Returns an error if the row contains data that cannot be properly
represented in JSON.
+ ToJsonString() (string, error)
}
type rowImpl struct {
@@ -70,3 +82,156 @@ func (r *rowImpl) FieldNames() []string {
}
return names
}
+
+func (r *rowImpl) ToJsonString() (string, error) {
+ jsonMap := make(map[string]any)
+ fieldNames := r.FieldNames()
+
+ for i, fieldName := range fieldNames {
+ value := r.values[i]
+ convertedValue, err := convertToJsonValue(value)
+ if err != nil {
+ return "", fmt.Errorf("failed to convert field '%s':
%w", fieldName, err)
+ }
+ jsonMap[fieldName] = convertedValue
+ }
+
+ jsonBytes, err := json.Marshal(jsonMap)
+ if err != nil {
+ return "", fmt.Errorf("failed to marshal JSON: %w", err)
+ }
+
+ return string(jsonBytes), nil
+}
+
+func convertToJsonValue(value any) (any, error) {
+ if value == nil {
+ return nil, nil
+ }
+
+ switch v := value.(type) {
+ case bool, string, int8, int16, int32, int64, float32, float64:
+ return v, nil
+
+ case []byte:
+ return base64.StdEncoding.EncodeToString(v), nil
+
+ case decimal128.Num:
+ return v.BigInt().String(), nil
+
+ case decimal256.Num:
+ return v.BigInt().String(), nil
+
+ case arrow.Timestamp:
+ epochUs := int64(v)
+ t := time.Unix(epochUs/1000000, (epochUs%1000000)*1000).UTC()
+ return t.Format(time.RFC3339), nil
+
+ case arrow.Date32:
+ epochDays := int64(v)
+ epochTime := time.Date(1970, 1, 1, 0, 0, 0, 0,
time.UTC).AddDate(0, 0, int(epochDays))
+ return epochTime.Format("2006-01-02"), nil
+
+ case arrow.Date64:
+ epochMs := int64(v)
+ t := time.Unix(epochMs/1000, (epochMs%1000)*1000000).UTC()
+ return t.Format("2006-01-02"), nil
+
+ case time.Time:
+ if v.IsZero() {
+ return nil, nil
+ }
+ return v.Format(time.RFC3339), nil
+
+ case []any:
+ result := make([]any, len(v))
+ for i, item := range v {
+ convertedItem, err := convertToJsonValue(item)
+ if err != nil {
+ return nil, fmt.Errorf("failed to convert array
element at index %d: %w", i, err)
+ }
+ result[i] = convertedItem
+ }
+ return result, nil
+
+ case map[any]any:
+ result := make(map[string]any)
+ for key, val := range v {
+ keyStr, ok := key.(string)
+ if !ok {
+ return nil, fmt.Errorf("map key must be string
for JSON conversion, got %T", key)
+ }
+ convertedVal, err := convertToJsonValue(val)
+ if err != nil {
+ return nil, fmt.Errorf("failed to convert map
value for key '%s': %w", keyStr, err)
+ }
+ result[keyStr] = convertedVal
+ }
+ return result, nil
+
+ case map[string]any:
+ result := make(map[string]any)
+ for key, val := range v {
+ convertedVal, err := convertToJsonValue(val)
+ if err != nil {
+ return nil, fmt.Errorf("failed to convert map
value for key '%s': %w", key, err)
+ }
+ result[key] = convertedVal
+ }
+ return result, nil
+
+ default:
+ // Use reflection to handle custom types that have basic types
as their underlying type.
+ // For example, a custom type like "type MyInt int32" would not
match the explicit
+ // int32 case above, but would match reflect.Int32 here. This
ensures we can still
+ // convert custom integer, float, bool, and string types to
their JSON representations.
+ rv := reflect.ValueOf(value)
+ switch rv.Kind() {
+ case reflect.Slice, reflect.Array:
+ length := rv.Len()
+ result := make([]any, length)
+ for i := 0; i < length; i++ {
+ convertedItem, err :=
convertToJsonValue(rv.Index(i).Interface())
+ if err != nil {
+ return nil, fmt.Errorf("failed to
convert array element at index %d: %w", i, err)
+ }
+ result[i] = convertedItem
+ }
+ return result, nil
+
+ case reflect.Map:
+ if rv.Type().Key().Kind() != reflect.String {
+ return nil, fmt.Errorf("map key must be string
for JSON conversion, got %s", rv.Type().Key().Kind())
+ }
+ result := make(map[string]any)
+ for _, key := range rv.MapKeys() {
+ keyStr := key.String()
+ val := rv.MapIndex(key)
+ convertedVal, err :=
convertToJsonValue(val.Interface())
+ if err != nil {
+ return nil, fmt.Errorf("failed to
convert map value for key '%s': %w", keyStr, err)
+ }
+ result[keyStr] = convertedVal
+ }
+ return result, nil
+
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
reflect.Int64:
+ return rv.Int(), nil
+
+ case reflect.Uint, reflect.Uint8, reflect.Uint16,
reflect.Uint32, reflect.Uint64:
+ return rv.Uint(), nil
+
+ case reflect.Float32, reflect.Float64:
+ return rv.Float(), nil
+
+ case reflect.Bool:
+ return rv.Bool(), nil
+
+ case reflect.String:
+ return rv.String(), nil
+
+ default:
+ return fmt.Sprintf("%v", value), nil
+ }
+ }
+}
diff --git a/spark/sql/types/row_json_test.go b/spark/sql/types/row_json_test.go
new file mode 100644
index 0000000..496ad11
--- /dev/null
+++ b/spark/sql/types/row_json_test.go
@@ -0,0 +1,238 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package types
+
+import (
+ "encoding/json"
+ "testing"
+ "time"
+
+ "github.com/apache/arrow-go/v18/arrow"
+ "github.com/apache/arrow-go/v18/arrow/decimal128"
+ "github.com/apache/arrow-go/v18/arrow/decimal256"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestRowToJsonString(t *testing.T) {
+ tests := []struct {
+ name string
+ row Row
+ expected string
+ hasError bool
+ }{
+ {
+ name: "basic types",
+ row: &rowImpl{
+ values: []any{
+ "hello",
+ int32(42),
+ int64(123),
+ float64(3.14),
+ true,
+ nil,
+ },
+ offsets: map[string]int{
+ "str_col": 0,
+ "int_col": 1,
+ "long_col": 2,
+ "double_col": 3,
+ "bool_col": 4,
+ "null_col": 5,
+ },
+ },
+ expected:
`{"bool_col":true,"double_col":3.14,"int_col":42,"long_col":123,"null_col":null,"str_col":"hello"}`,
+ hasError: false,
+ },
+ {
+ name: "binary data",
+ row: &rowImpl{
+ values: []any{
+ []byte("hello world"),
+ },
+ offsets: map[string]int{
+ "binary_col": 0,
+ },
+ },
+ expected: `{"binary_col":"aGVsbG8gd29ybGQ="}`,
+ hasError: false,
+ },
+ {
+ name: "decimal types",
+ row: &rowImpl{
+ values: []any{
+ decimal128.FromI64(123456),
+ decimal256.FromI64(789012),
+ },
+ offsets: map[string]int{
+ "decimal128_col": 0,
+ "decimal256_col": 1,
+ },
+ },
+ expected:
`{"decimal128_col":"123456","decimal256_col":"789012"}`,
+ hasError: false,
+ },
+ {
+ name: "timestamp and date",
+ row: &rowImpl{
+ values: []any{
+ arrow.Timestamp(1686981953115000), //
microseconds
+ arrow.Date32(19521), //
days since epoch (2023-06-13)
+ arrow.Date64(1686981953115), //
milliseconds
+ },
+ offsets: map[string]int{
+ "timestamp_col": 0,
+ "date32_col": 1,
+ "date64_col": 2,
+ },
+ },
+ expected:
`{"date32_col":"2023-06-13","date64_col":"2023-06-17","timestamp_col":"2023-06-17T06:05:53Z"}`,
+ hasError: false,
+ },
+ {
+ name: "arrays",
+ row: &rowImpl{
+ values: []any{
+ []any{1, 2, 3},
+ []any{"a", "b", "c"},
+ },
+ offsets: map[string]int{
+ "int_array": 0,
+ "str_array": 1,
+ },
+ },
+ expected:
`{"int_array":[1,2,3],"str_array":["a","b","c"]}`,
+ hasError: false,
+ },
+ {
+ name: "valid string map",
+ row: &rowImpl{
+ values: []any{
+ map[string]any{
+ "key1": "value1",
+ "key2": 42,
+ },
+ },
+ offsets: map[string]int{
+ "map_col": 0,
+ },
+ },
+ expected: `{"map_col":{"key1":"value1","key2":42}}`,
+ hasError: false,
+ },
+ {
+ name: "invalid map with non-string keys",
+ row: &rowImpl{
+ values: []any{
+ map[any]any{
+ 42: "value1",
+ "key2": "value2",
+ },
+ },
+ offsets: map[string]int{
+ "map_col": 0,
+ },
+ },
+ expected: "",
+ hasError: true,
+ },
+ {
+ name: "nested structures",
+ row: &rowImpl{
+ values: []any{
+ []any{
+ map[string]any{
+ "nested_key":
"nested_value",
+ "nested_num": 123,
+ },
+ },
+ },
+ offsets: map[string]int{
+ "nested_col": 0,
+ },
+ },
+ expected:
`{"nested_col":[{"nested_key":"nested_value","nested_num":123}]}`,
+ hasError: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := tt.row.ToJsonString()
+
+ if tt.hasError {
+ assert.Error(t, err)
+ assert.Empty(t, result)
+ } else {
+ require.NoError(t, err)
+
+ // Verify the result is valid JSON
+ var parsed map[string]any
+ err = json.Unmarshal([]byte(result), &parsed)
+ require.NoError(t, err)
+
+ // Verify the expected content (comparing
parsed JSON to avoid key ordering issues)
+ var expected map[string]any
+ err = json.Unmarshal([]byte(tt.expected),
&expected)
+ require.NoError(t, err)
+
+ assert.Equal(t, expected, parsed)
+ }
+ })
+ }
+}
+
+func TestConvertToJsonValue(t *testing.T) {
+ tests := []struct {
+ name string
+ input any
+ expected any
+ hasError bool
+ }{
+ {
+ name: "nil value",
+ input: nil,
+ expected: nil,
+ hasError: false,
+ },
+ {
+ name: "time.Time",
+ input: time.Date(2023, 6, 17, 10, 5, 53, 0,
time.UTC),
+ expected: "2023-06-17T10:05:53Z",
+ hasError: false,
+ },
+ {
+ name: "zero time.Time",
+ input: time.Time{},
+ expected: nil,
+ hasError: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := convertToJsonValue(tt.input)
+
+ if tt.hasError {
+ assert.Error(t, err)
+ } else {
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ }
+ })
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]