tanmayrauth commented on code in PR #1379:
URL: https://github.com/apache/iceberg-go/pull/1379#discussion_r3523587439


##########
cmd/iceberg/utils_test.go:
##########
@@ -81,40 +81,90 @@ func TestParseProperties(t *testing.T) {
 }
 
 func TestParsePartitionSpec(t *testing.T) {
+       schema := iceberg.NewSchema(0,
+               iceberg.NestedField{
+                       ID:       10,
+                       Name:     "customer_id",
+                       Type:     iceberg.PrimitiveTypes.String,
+                       Required: false,
+               },
+               iceberg.NestedField{
+                       ID:       20,
+                       Name:     "event_time",
+                       Type:     iceberg.PrimitiveTypes.TimestampNs,
+                       Required: false,
+               },
+       )
+
        tests := []struct {
-               name  string
-               input string
-               isErr bool
+               name          string
+               input         string
+               wantSourceIDs [][]int
+               errContains   string
+               isErr         bool
        }{
                {
                        name:  "empty string",
-                       input: "",
+                       input: "", // keeps backward compatibility for no 
partitioning
                },
                {
                        name:  "single field",
-                       input: "field1",
+                       input: "customer_id",
+                       wantSourceIDs: [][]int{
+                               {10},
+                       },
                },
                {
                        name:  "multiple fields",
-                       input: "field1,field2,field3",
+                       input: "customer_id,event_time",
+                       wantSourceIDs: [][]int{
+                               {10},
+                               {20},
+                       },
                },
                {
                        name:  "with spaces",
-                       input: " field1 , field2 , field3 ",
+                       input: " customer_id , event_time ",
+                       wantSourceIDs: [][]int{
+                               {10},
+                               {20},
+                       },
+               },
+               {
+                       name:        "unknown field",
+                       input:       "no_such_field",
+                       isErr:       true,
+                       errContains: "cannot find source column with name: 
no_such_field in schema",
                },
        }
 
        for _, tt := range tests {
                t.Run(tt.name, func(t *testing.T) {
-                       got, err := parsePartitionSpec(tt.input)
+                       got, err := parsePartitionSpec(tt.input, schema)
                        if (err != nil) != tt.isErr {
                                t.Errorf("parsePartitionSpec() error = %v, 
isErr %v", err, tt.isErr)
 
                                return
                        }
+                       if tt.isErr {
+                               if tt.errContains != "" {
+                                       require.ErrorContains(t, err, 
tt.errContains)
+                               }
+
+                               return
+                       }
                        if !tt.isErr && got == nil {
                                t.Errorf("parsePartitionSpec() returned nil for 
valid input")
                        }
+                       if len(tt.wantSourceIDs) == 0 {
+                               require.Equal(t, iceberg.UnpartitionedSpec, got)
+
+                               return
+                       }
+                       require.Equal(t, len(tt.wantSourceIDs), got.NumFields())
+                       for i, wantIDs := range tt.wantSourceIDs {
+                               require.Equal(t, wantIDs, 
got.Field(i).SourceIDs)

Review Comment:
   `require.Equal(t, wantIDs, got.Field(i).SourceIDs)` — the tests only assert 
SourceIDs, never FieldID, which is why they pass with field-id: 0 still going 
out. Could you add a FieldID assertion (1000, 1001, …) and a REST 
create-payload regression test that marshals the spec and checks `field-id` 
isn't 0? That locks in the fix above.



##########
cmd/iceberg/utils.go:
##########
@@ -48,31 +49,33 @@ func parseProperties(propStr string) (iceberg.Properties, 
error) {
        return props, nil
 }
 
-func parsePartitionSpec(specStr string) (*iceberg.PartitionSpec, error) {
+func parsePartitionSpec(specStr string, schema *iceberg.Schema) 
(*iceberg.PartitionSpec, error) {
+       if schema == nil {
+               return nil, errors.New("schema is required for partition spec 
parsing")
+       }
+
        if specStr == "" {
                return iceberg.UnpartitionedSpec, nil
        }
 
        fields := strings.Split(specStr, ",")
-       var partitionFields []iceberg.PartitionField
+       opts := make([]iceberg.PartitionOption, 0)
 
-       for i, field := range fields {
+       for _, field := range fields {
                field = strings.TrimSpace(field)
                if field == "" {
                        continue
                }
 
-               partitionFields = append(partitionFields, 
iceberg.PartitionField{
-                       SourceIDs: []int{i + 1},
-                       FieldID:   i + iceberg.PartitionDataIDStart,
-                       Name:      field,
-                       Transform: iceberg.IdentityTransform{},
-               })
+               opts = append(opts, iceberg.AddPartitionFieldByName(field, 
field, iceberg.IdentityTransform{}, schema, nil))

Review Comment:
     `iceberg.AddPartitionFieldByName(field, field, 
iceberg.IdentityTransform{}, schema, nil)` — the nil field ID is the problem. 
`NewPartitionSpecOpts` only runs `initialize()`, never 
`assignPartitionFieldIds`, so every field keeps the
     unassigned value (0). rest.go serializes `cfg.PartitionSpec` directly into 
the create-table payload, so the server gets `field-id: 0` for all of them — 
which is also a duplicate and invalid.
     
     Assign a stable ID here instead, e.g.:
     
         id := iceberg.PartitionDataIDStart + len(opts)
         opts = append(opts, iceberg.AddPartitionFieldByName(field, field, 
iceberg.IdentityTransform{}, schema, &id))
         
     Since empty entries are `continue`d before the append, `len(opts)` gives 
dense 1000, 1001, … even for input like `a,,b` (the old `i + 
PartitionDataIDStart` would've left a gap there — small bonus).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to