This is an automated email from the ASF dual-hosted git repository.

mgrund pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark-connect-go.git


The following commit(s) were added to refs/heads/master by this push:
     new 20be766  [FEAT] Converting a Row to a JSON string
20be766 is described below

commit 20be766d7549dc9db423c1a5ba8ba1d4324ed383
Author: Martin Grund <[email protected]>
AuthorDate: Wed Aug 20 10:24:17 2025 +0200

    [FEAT] Converting a Row to a JSON string
    
    ### What changes were proposed in this pull request?
    
      This PR adds JSON serialization capability to the Row interface by 
implementing a `ToJsonString()` method. The
      implementation includes:
    
      - Added `ToJsonString()` method to the Row interface that converts Row 
data to a JSON string representation
      - Implemented comprehensive `convertToJsonValue()` function that handles 
all Spark data types including:
        - Basic types (bool, string, integers, floats)
        - Binary data (base64 encoded)
        - Decimal types (decimal128/256 as string representations)
        - Arrow timestamp and date types (RFC3339 and ISO date formats)
        - Arrays and nested structures with recursive conversion
        - Maps with string keys (validates non-string keys return errors)
        - Custom types using reflection fallback for underlying basic types
      - Added extensive test suite with 238 lines covering all supported data 
types and error scenarios
    
      ### Why are the changes needed?
    
      This feature enables users to easily serialize Row data to JSON format, 
which is essential for:
      - Data export and interoperability with other systems
      - Debugging and logging Row contents in human-readable format
      - Integration with REST APIs and web services that expect JSON
      - Data analysis workflows that require JSON output
    
      Currently, there's no built-in way to convert Row data to JSON, forcing 
users to manually iterate through fields
      and handle type conversions.
    
      ### Does this PR introduce _any_ user-facing change?
    
      Yes. This PR adds a new public method `ToJsonString() (string, error)` to 
the Row interface. Users can now call
      this method on any Row instance to get a JSON string representation:
    
      ```go
      row := // ... get a Row from DataFrame operations
      jsonStr, err := row.ToJsonString()
      if err != nil {
          // handle conversion error
      }
      // jsonStr contains: {"field1": "value1", "field2": 42, ...}
      ```
    
      ### How was this patch tested?
    
      Added comprehensive unit tests in spark/sql/types/row_json_test.go 
covering:
    
      - Basic data types: strings, integers, floats, booleans, nil values
      - Binary data: byte arrays converted to base64 encoding
      - Decimal types: decimal128 and decimal256 number representations
      - Temporal types: Arrow timestamps, Date32, Date64, and Go time.Time
      - Collection types: arrays with recursive element conversion
      - Map types: both map[string]any and map[any]any with string key 
validation
      - Nested structures: complex combinations of arrays and maps
      - Error cases: invalid map keys and conversion failures
      - JSON validity: all outputs are verified to be valid JSON through 
round-trip parsing
    
      Tests ensure both successful conversions produce expected JSON and error 
cases properly return meaningful error
      messages.
    
    Closes #162 from grundprinzip/json_value.
    
    Authored-by: Martin Grund <[email protected]>
    Signed-off-by: Martin Grund <[email protected]>
---
 spark/sql/types/row.go           | 165 +++++++++++++++++++++++++++
 spark/sql/types/row_json_test.go | 238 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 403 insertions(+)

diff --git a/spark/sql/types/row.go b/spark/sql/types/row.go
index cdcdeea..aacd4e5 100644
--- a/spark/sql/types/row.go
+++ b/spark/sql/types/row.go
@@ -17,7 +17,16 @@
 package types
 
 import (
+       "encoding/base64"
+       "encoding/json"
+       "fmt"
        "maps"
+       "reflect"
+       "time"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/decimal128"
+       "github.com/apache/arrow-go/v18/arrow/decimal256"
 )
 
 type Row interface {
@@ -32,6 +41,9 @@ type Row interface {
        // Len returns the number of fields within a [Row].
        Len() int
        FieldNames() []string
+       // ToJsonString converts the Row to a JSON string representation.
+       // Returns an error if the row contains data that cannot be properly 
represented in JSON.
+       ToJsonString() (string, error)
 }
 
 type rowImpl struct {
@@ -70,3 +82,156 @@ func (r *rowImpl) FieldNames() []string {
        }
        return names
 }
+
+func (r *rowImpl) ToJsonString() (string, error) {
+       jsonMap := make(map[string]any)
+       fieldNames := r.FieldNames()
+
+       for i, fieldName := range fieldNames {
+               value := r.values[i]
+               convertedValue, err := convertToJsonValue(value)
+               if err != nil {
+                       return "", fmt.Errorf("failed to convert field '%s': 
%w", fieldName, err)
+               }
+               jsonMap[fieldName] = convertedValue
+       }
+
+       jsonBytes, err := json.Marshal(jsonMap)
+       if err != nil {
+               return "", fmt.Errorf("failed to marshal JSON: %w", err)
+       }
+
+       return string(jsonBytes), nil
+}
+
+func convertToJsonValue(value any) (any, error) {
+       if value == nil {
+               return nil, nil
+       }
+
+       switch v := value.(type) {
+       case bool, string, int8, int16, int32, int64, float32, float64:
+               return v, nil
+
+       case []byte:
+               return base64.StdEncoding.EncodeToString(v), nil
+
+       case decimal128.Num:
+               return v.BigInt().String(), nil
+
+       case decimal256.Num:
+               return v.BigInt().String(), nil
+
+       case arrow.Timestamp:
+               epochUs := int64(v)
+               t := time.Unix(epochUs/1000000, (epochUs%1000000)*1000).UTC()
+               return t.Format(time.RFC3339), nil
+
+       case arrow.Date32:
+               epochDays := int64(v)
+               epochTime := time.Date(1970, 1, 1, 0, 0, 0, 0, 
time.UTC).AddDate(0, 0, int(epochDays))
+               return epochTime.Format("2006-01-02"), nil
+
+       case arrow.Date64:
+               epochMs := int64(v)
+               t := time.Unix(epochMs/1000, (epochMs%1000)*1000000).UTC()
+               return t.Format("2006-01-02"), nil
+
+       case time.Time:
+               if v.IsZero() {
+                       return nil, nil
+               }
+               return v.Format(time.RFC3339), nil
+
+       case []any:
+               result := make([]any, len(v))
+               for i, item := range v {
+                       convertedItem, err := convertToJsonValue(item)
+                       if err != nil {
+                               return nil, fmt.Errorf("failed to convert array 
element at index %d: %w", i, err)
+                       }
+                       result[i] = convertedItem
+               }
+               return result, nil
+
+       case map[any]any:
+               result := make(map[string]any)
+               for key, val := range v {
+                       keyStr, ok := key.(string)
+                       if !ok {
+                               return nil, fmt.Errorf("map key must be string 
for JSON conversion, got %T", key)
+                       }
+                       convertedVal, err := convertToJsonValue(val)
+                       if err != nil {
+                               return nil, fmt.Errorf("failed to convert map 
value for key '%s': %w", keyStr, err)
+                       }
+                       result[keyStr] = convertedVal
+               }
+               return result, nil
+
+       case map[string]any:
+               result := make(map[string]any)
+               for key, val := range v {
+                       convertedVal, err := convertToJsonValue(val)
+                       if err != nil {
+                               return nil, fmt.Errorf("failed to convert map 
value for key '%s': %w", key, err)
+                       }
+                       result[key] = convertedVal
+               }
+               return result, nil
+
+       default:
+               // Use reflection to handle custom types that have basic types 
as their underlying type.
+               // For example, a custom type like "type MyInt int32" would not 
match the explicit
+               // int32 case above, but would match reflect.Int32 here. This 
ensures we can still
+               // convert custom integer, float, bool, and string types to 
their JSON representations.
+               rv := reflect.ValueOf(value)
+               switch rv.Kind() {
+               case reflect.Slice, reflect.Array:
+                       length := rv.Len()
+                       result := make([]any, length)
+                       for i := 0; i < length; i++ {
+                               convertedItem, err := 
convertToJsonValue(rv.Index(i).Interface())
+                               if err != nil {
+                                       return nil, fmt.Errorf("failed to 
convert array element at index %d: %w", i, err)
+                               }
+                               result[i] = convertedItem
+                       }
+                       return result, nil
+
+               case reflect.Map:
+                       if rv.Type().Key().Kind() != reflect.String {
+                               return nil, fmt.Errorf("map key must be string 
for JSON conversion, got %s", rv.Type().Key().Kind())
+                       }
+                       result := make(map[string]any)
+                       for _, key := range rv.MapKeys() {
+                               keyStr := key.String()
+                               val := rv.MapIndex(key)
+                               convertedVal, err := 
convertToJsonValue(val.Interface())
+                               if err != nil {
+                                       return nil, fmt.Errorf("failed to 
convert map value for key '%s': %w", keyStr, err)
+                               }
+                               result[keyStr] = convertedVal
+                       }
+                       return result, nil
+
+               case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, 
reflect.Int64:
+                       return rv.Int(), nil
+
+               case reflect.Uint, reflect.Uint8, reflect.Uint16, 
reflect.Uint32, reflect.Uint64:
+                       return rv.Uint(), nil
+
+               case reflect.Float32, reflect.Float64:
+                       return rv.Float(), nil
+
+               case reflect.Bool:
+                       return rv.Bool(), nil
+
+               case reflect.String:
+                       return rv.String(), nil
+
+               default:
+                       return fmt.Sprintf("%v", value), nil
+               }
+       }
+}
diff --git a/spark/sql/types/row_json_test.go b/spark/sql/types/row_json_test.go
new file mode 100644
index 0000000..496ad11
--- /dev/null
+++ b/spark/sql/types/row_json_test.go
@@ -0,0 +1,238 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package types
+
+import (
+       "encoding/json"
+       "testing"
+       "time"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/decimal128"
+       "github.com/apache/arrow-go/v18/arrow/decimal256"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/require"
+)
+
+func TestRowToJsonString(t *testing.T) {
+       tests := []struct {
+               name     string
+               row      Row
+               expected string
+               hasError bool
+       }{
+               {
+                       name: "basic types",
+                       row: &rowImpl{
+                               values: []any{
+                                       "hello",
+                                       int32(42),
+                                       int64(123),
+                                       float64(3.14),
+                                       true,
+                                       nil,
+                               },
+                               offsets: map[string]int{
+                                       "str_col":    0,
+                                       "int_col":    1,
+                                       "long_col":   2,
+                                       "double_col": 3,
+                                       "bool_col":   4,
+                                       "null_col":   5,
+                               },
+                       },
+                       expected: 
`{"bool_col":true,"double_col":3.14,"int_col":42,"long_col":123,"null_col":null,"str_col":"hello"}`,
+                       hasError: false,
+               },
+               {
+                       name: "binary data",
+                       row: &rowImpl{
+                               values: []any{
+                                       []byte("hello world"),
+                               },
+                               offsets: map[string]int{
+                                       "binary_col": 0,
+                               },
+                       },
+                       expected: `{"binary_col":"aGVsbG8gd29ybGQ="}`,
+                       hasError: false,
+               },
+               {
+                       name: "decimal types",
+                       row: &rowImpl{
+                               values: []any{
+                                       decimal128.FromI64(123456),
+                                       decimal256.FromI64(789012),
+                               },
+                               offsets: map[string]int{
+                                       "decimal128_col": 0,
+                                       "decimal256_col": 1,
+                               },
+                       },
+                       expected: 
`{"decimal128_col":"123456","decimal256_col":"789012"}`,
+                       hasError: false,
+               },
+               {
+                       name: "timestamp and date",
+                       row: &rowImpl{
+                               values: []any{
+                                       arrow.Timestamp(1686981953115000), // 
microseconds
+                                       arrow.Date32(19521),               // 
days since epoch (2023-06-13)
+                                       arrow.Date64(1686981953115),       // 
milliseconds
+                               },
+                               offsets: map[string]int{
+                                       "timestamp_col": 0,
+                                       "date32_col":    1,
+                                       "date64_col":    2,
+                               },
+                       },
+                       expected: 
`{"date32_col":"2023-06-13","date64_col":"2023-06-17","timestamp_col":"2023-06-17T06:05:53Z"}`,
+                       hasError: false,
+               },
+               {
+                       name: "arrays",
+                       row: &rowImpl{
+                               values: []any{
+                                       []any{1, 2, 3},
+                                       []any{"a", "b", "c"},
+                               },
+                               offsets: map[string]int{
+                                       "int_array": 0,
+                                       "str_array": 1,
+                               },
+                       },
+                       expected: 
`{"int_array":[1,2,3],"str_array":["a","b","c"]}`,
+                       hasError: false,
+               },
+               {
+                       name: "valid string map",
+                       row: &rowImpl{
+                               values: []any{
+                                       map[string]any{
+                                               "key1": "value1",
+                                               "key2": 42,
+                                       },
+                               },
+                               offsets: map[string]int{
+                                       "map_col": 0,
+                               },
+                       },
+                       expected: `{"map_col":{"key1":"value1","key2":42}}`,
+                       hasError: false,
+               },
+               {
+                       name: "invalid map with non-string keys",
+                       row: &rowImpl{
+                               values: []any{
+                                       map[any]any{
+                                               42:     "value1",
+                                               "key2": "value2",
+                                       },
+                               },
+                               offsets: map[string]int{
+                                       "map_col": 0,
+                               },
+                       },
+                       expected: "",
+                       hasError: true,
+               },
+               {
+                       name: "nested structures",
+                       row: &rowImpl{
+                               values: []any{
+                                       []any{
+                                               map[string]any{
+                                                       "nested_key": 
"nested_value",
+                                                       "nested_num": 123,
+                                               },
+                                       },
+                               },
+                               offsets: map[string]int{
+                                       "nested_col": 0,
+                               },
+                       },
+                       expected: 
`{"nested_col":[{"nested_key":"nested_value","nested_num":123}]}`,
+                       hasError: false,
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       result, err := tt.row.ToJsonString()
+
+                       if tt.hasError {
+                               assert.Error(t, err)
+                               assert.Empty(t, result)
+                       } else {
+                               require.NoError(t, err)
+
+                               // Verify the result is valid JSON
+                               var parsed map[string]any
+                               err = json.Unmarshal([]byte(result), &parsed)
+                               require.NoError(t, err)
+
+                               // Verify the expected content (comparing 
parsed JSON to avoid key ordering issues)
+                               var expected map[string]any
+                               err = json.Unmarshal([]byte(tt.expected), 
&expected)
+                               require.NoError(t, err)
+
+                               assert.Equal(t, expected, parsed)
+                       }
+               })
+       }
+}
+
+func TestConvertToJsonValue(t *testing.T) {
+       tests := []struct {
+               name     string
+               input    any
+               expected any
+               hasError bool
+       }{
+               {
+                       name:     "nil value",
+                       input:    nil,
+                       expected: nil,
+                       hasError: false,
+               },
+               {
+                       name:     "time.Time",
+                       input:    time.Date(2023, 6, 17, 10, 5, 53, 0, 
time.UTC),
+                       expected: "2023-06-17T10:05:53Z",
+                       hasError: false,
+               },
+               {
+                       name:     "zero time.Time",
+                       input:    time.Time{},
+                       expected: nil,
+                       hasError: false,
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       result, err := convertToJsonValue(tt.input)
+
+                       if tt.hasError {
+                               assert.Error(t, err)
+                       } else {
+                               require.NoError(t, err)
+                               assert.Equal(t, tt.expected, result)
+                       }
+               })
+       }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to