This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 6a82d7b55 feat(go/adbc/driver/bigquery): add `BIGQUERY:type` field 
metadata (#3604)
6a82d7b55 is described below

commit 6a82d7b55aba773219adadc3ebe436de86c4e9f6
Author: Lucas Valente <[email protected]>
AuthorDate: Thu Oct 30 03:09:22 2025 +0100

    feat(go/adbc/driver/bigquery): add `BIGQUERY:type` field metadata (#3604)
    
    ## Motivation
    The `Type` metadata key has two limitations which stems from BigQuery's
    API:
    1. it says fields of type `ARRAY<T>` are just `T` with `Repeated=true`
    2. it says `STRUCT<...>` fields are simply `RECORD`, and erases any
    information about the inner fields.
    
    These limitations can cause problems when trying to parse the `Type` key
    or when using it verbatim against the warehouse in a statement, e.g a
    `CREATE TABLE` statement or a `AS T` cast.
    
    ## Summary
    This PR adds a new `BIGQUERY:type` key that formats the original SQL
    string as [specified by
    
BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type).
    
    Most types remain unchanged as they come from `gobigquery`, and in those
    cases this key will contain the same value as `Type`.
    
    However, arrays and structs get transformed to match the richer type
    string.
    
    ## Testing
    I ran a `CREATE TABLE AS` query against BigQuery. Here's the result for
    fields of different types
    
    [1] Regular non-nested types are simply copied over from the value of
    `Type`
    <details>
    <summary>1</summary>
    <img width="331" height="1071" alt="image"
    
src="https://github.com/user-attachments/assets/ccd2ce17-37d8-4630-bef5-a503ed450c2a";
    />
    </details>
    
    [2] An array of integers becomes `ARRAY<INTEGER>`, while `Type` remains
    `INTEGER`
    <details>
    <summary>2</summary>
    <img width="319" height="369" alt="image"
    
src="https://github.com/user-attachments/assets/e588d7ac-c7ca-40fb-ab51-9795e566d240";
    />
    </details>
    
    [3] An array of structs becomes `ARRAY<STRUCT<...>>`
    <details>
    <summary>3</summary>
    <img width="551" height="816" alt="image"
    
src="https://github.com/user-attachments/assets/bb946ebc-747a-4529-88a8-68636f94e44e";
    />
    </details>
    
    [4] A struct of arrays' inner types are `ARRAY<...>`
    <details>
    <summary>4</summary>
    <img width="610" height="922" alt="image"
    
src="https://github.com/user-attachments/assets/932a3554-ea56-4b1f-8642-801ee91c4f63";
    />
    </details>
    
    [5] A deeply nested struct also has the correct inner types
    <details>
    <summary>5</summary>
    <img width="1327" height="1307" alt="image"
    
src="https://github.com/user-attachments/assets/3185651b-8809-42b0-adc4-ec956eaf9e87";
    />
    </details>
    
    ## Related issues
    - https://github.com/apache/arrow-adbc/issues/3449
---
 go/adbc/driver/bigquery/connection.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/go/adbc/driver/bigquery/connection.go 
b/go/adbc/driver/bigquery/connection.go
index 17ab6b43d..1fe26818b 100644
--- a/go/adbc/driver/bigquery/connection.go
+++ b/go/adbc/driver/bigquery/connection.go
@@ -795,6 +795,8 @@ func buildField(schema *bigquery.FieldSchema, level uint) 
(arrow.Field, error) {
        field.Nullable = !schema.Required
        metadata["Type"] = string(schema.Type)
 
+       richSqlType := string(schema.Type)
+
        if schema.PolicyTags != nil {
                policyTagList, err := json.Marshal(schema.PolicyTags)
                if err != nil {
@@ -823,14 +825,21 @@ func buildField(schema *bigquery.FieldSchema, level uint) 
(arrow.Field, error) {
        case bigquery.RecordFieldType:
                // create an Arrow struct for BigQuery Record fields
                nestedFields := make([]arrow.Field, len(schema.Schema))
+               nestedRichSqlTypes := make([]string, len(schema.Schema))
                for i, nestedFieldSchema := range schema.Schema {
                        f, err := buildField(nestedFieldSchema, level+1)
                        if err != nil {
                                return arrow.Field{}, err
                        }
                        nestedFields[i] = f
+
+                       fieldRichSqlType, found := 
f.Metadata.GetValue("BIGQUERY:type")
+                       if found {
+                               nestedRichSqlTypes[i] = fmt.Sprintf("`%s` %s", 
f.Name, fieldRichSqlType)
+                       }
                }
                structType := arrow.StructOf(nestedFields...)
+               richSqlType = fmt.Sprintf("STRUCT<%s>", 
strings.Join(nestedRichSqlTypes, ", "))
                if structType == nil {
                        return arrow.Field{}, adbc.Error{
                                Code: adbc.StatusInvalidArgument,
@@ -888,8 +897,12 @@ func buildField(schema *bigquery.FieldSchema, level uint) 
(arrow.Field, error) {
        // if the field is repeated, then it's a list of the type we just built
        if schema.Repeated {
                field.Type = arrow.ListOf(field.Type)
+               richSqlType = fmt.Sprintf("ARRAY<%s>", richSqlType)
        }
 
+       // derive the standard type string from the field
+       metadata["BIGQUERY:type"] = richSqlType
+
        if level == 0 {
                metadata["DefaultValueExpression"] = 
schema.DefaultValueExpression
        }

Reply via email to