This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 6e679fc6 fix: remove unnecessary safe-guard (#775)
6e679fc6 is described below

commit 6e679fc689a81b439da751b54a1f7718ddede388
Author: Tobias Pütz <[email protected]>
AuthorDate: Thu Mar 19 17:30:38 2026 +0100

    fix: remove unnecessary safe-guard (#775)
    
    `ArrowSchemaToIceberg` has a `hasIDs` branch which means it supports
    add-files with field_ids now
---
 table/arrow_utils.go | 14 ++++----
 table/table_test.go  | 99 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/table/arrow_utils.go b/table/arrow_utils.go
index d7f0ee89..16bbc008 100644
--- a/table/arrow_utils.go
+++ b/table/arrow_utils.go
@@ -1373,21 +1373,19 @@ func filesToDataFiles(ctx context.Context, fileIO 
iceio.IO, meta *MetadataBuilde
 
                        arrSchema := must(rdr.Schema())
 
-                       if hasIDs := must(VisitArrowSchema(arrSchema, 
hasIDs{})); hasIDs {
-                               yield(nil, fmt.Errorf("%w: cannot add file %s 
because it has field-ids. add-files only supports the addition of files without 
field_ids",
-                                       iceberg.ErrNotImplemented, filePath))
-
-                               return
-                       }
-
                        if err := checkArrowSchemaCompat(currentSchema, 
arrSchema, false); err != nil {
                                yield(nil, err)
 
                                return
                        }
 
+                       pathToIDSchema := currentSchema
+                       if fileHasIDs := must(VisitArrowSchema(arrSchema, 
hasIDs{})); fileHasIDs {
+                               pathToIDSchema = 
must(ArrowSchemaToIceberg(arrSchema, false, nil))
+                       }
+
                        statistics := 
format.DataFileStatsFromMeta(rdr.Metadata(), 
must(computeStatsPlan(currentSchema, meta.props)),
-                               must(format.PathToIDMapping(currentSchema)))
+                               must(format.PathToIDMapping(pathToIDSchema)))
 
                        partitionValues := make(map[int]any)
                        if !currentSpec.Equals(*iceberg.UnpartitionedSpec) {
diff --git a/table/table_test.go b/table/table_test.go
index 02e34482..d9949449 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -374,6 +374,14 @@ func (t *TableWritingTestSuite) writeParquet(fio 
iceio.WriteFileIO, filePath str
                nil, pqarrow.DefaultWriterProps()))
 }
 
+func (t *TableWritingTestSuite) writeParquetWithStoredSchema(fio 
iceio.WriteFileIO, filePath string, arrTbl arrow.Table) {
+       fo, err := fio.Create(filePath)
+       t.Require().NoError(err)
+
+       t.Require().NoError(pqarrow.WriteTable(arrTbl, fo, arrTbl.NumRows(),
+               nil, 
pqarrow.NewArrowWriterProperties(pqarrow.WithStoreSchema())))
+}
+
 func (t *TableWritingTestSuite) createTable(identifier table.Identifier, 
formatVersion int, spec iceberg.PartitionSpec, sc *iceberg.Schema) *table.Table 
{
        meta, err := table.NewMetadata(sc, &spec, table.UnsortedSortOrder,
                t.location, iceberg.Properties{table.PropertyFormatVersion: 
strconv.Itoa(formatVersion)})
@@ -462,8 +470,7 @@ func (t *TableWritingTestSuite) 
TestAddFilesUnpartitionedHasFieldIDs() {
        ident := table.Identifier{"default", "unpartitioned_table_with_ids_v" + 
strconv.Itoa(t.formatVersion)}
        tbl := t.createTable(ident, t.formatVersion,
                *iceberg.UnpartitionedSpec, t.tableSchema)
-
-       t.NotNil(tbl)
+       t.Require().NotNil(tbl)
 
        files := make([]string, 0)
        for i := range 5 {
@@ -473,9 +480,91 @@ func (t *TableWritingTestSuite) 
TestAddFilesUnpartitionedHasFieldIDs() {
        }
 
        tx := tbl.NewTransaction()
-       err := tx.AddFiles(t.ctx, files, nil, false)
-       t.Error(err)
-       t.ErrorIs(err, iceberg.ErrNotImplemented)
+       t.Require().NoError(tx.AddFiles(t.ctx, files, nil, false))
+
+       stagedTbl, err := tx.StagedTable()
+       t.Require().NoError(err)
+       t.NotNil(stagedTbl.NameMapping())
+
+       scan, err := tx.Scan()
+       t.Require().NoError(err)
+
+       contents, err := scan.ToArrowTable(context.Background())
+       t.Require().NoError(err)
+       defer contents.Release()
+
+       t.EqualValues(5, contents.NumRows())
+}
+
+func (t *TableWritingTestSuite) TestAddFilesFieldIDsWithDifferentNames() {
+       ident := table.Identifier{"default", 
"unpartitioned_table_ids_diff_names_v" + strconv.Itoa(t.formatVersion)}
+       tbl := t.createTable(ident, t.formatVersion,
+               *iceberg.UnpartitionedSpec, t.tableSchema)
+       t.Require().NotNil(tbl)
+
+       renamedSchema := arrow.NewSchema([]arrow.Field{
+               {
+                       Name: "alpha", Type: arrow.FixedWidthTypes.Boolean,
+                       Metadata: 
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "1"}),
+               },
+               {
+                       Name: "beta", Type: arrow.BinaryTypes.String,
+                       Metadata: 
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "2"}),
+               },
+               {
+                       Name: "gamma", Type: arrow.PrimitiveTypes.Int32,
+                       Metadata: 
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "3"}),
+               },
+               {
+                       Name: "delta", Type: arrow.PrimitiveTypes.Date32,
+                       Metadata: 
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "4"}),
+               },
+       }, nil)
+
+       renamedTbl, err := array.TableFromJSON(memory.DefaultAllocator, 
renamedSchema, []string{
+               `[{"alpha": true, "beta": "bar_string", "gamma": 123, "delta": 
"2024-03-07"}]`,
+       })
+       t.Require().NoError(err)
+       defer renamedTbl.Release()
+
+       files := make([]string, 0, 5)
+       for i := range 5 {
+               filePath := 
fmt.Sprintf("%s/unpartitioned_ids_diff_names/test-%d.parquet", t.location, i)
+               t.writeParquetWithStoredSchema(mustFS(t.T(), 
tbl).(iceio.WriteFileIO), filePath, renamedTbl)
+               files = append(files, filePath)
+       }
+
+       tx := tbl.NewTransaction()
+       t.Require().NoError(tx.AddFiles(t.ctx, files, nil, false))
+
+       stagedTbl, err := tx.StagedTable()
+       t.Require().NoError(err)
+       t.NotNil(stagedTbl.NameMapping())
+
+       scan, err := tx.Scan()
+       t.Require().NoError(err)
+
+       contents, err := scan.ToArrowTable(context.Background())
+       t.Require().NoError(err)
+       defer contents.Release()
+
+       t.EqualValues(5, contents.NumRows())
+
+       expectedSchema := arrow.NewSchema([]arrow.Field{
+               {Name: "foo", Type: arrow.FixedWidthTypes.Boolean, Nullable: 
true},
+               {Name: "bar", Type: arrow.BinaryTypes.String, Nullable: true},
+               {Name: "baz", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
+               {Name: "qux", Type: arrow.PrimitiveTypes.Date32, Nullable: 
true},
+       }, nil)
+       t.Truef(expectedSchema.Equal(contents.Schema()),
+               "expected table schema names, got: %s", contents.Schema())
+
+       for i := range 5 {
+               t.Equal(true, 
contents.Column(0).Data().Chunk(i).(*array.Boolean).Value(0))
+               t.Equal("bar_string", 
contents.Column(1).Data().Chunk(i).(*array.String).Value(0))
+               t.Equal(int32(123), 
contents.Column(2).Data().Chunk(i).(*array.Int32).Value(0))
+               t.Equal(arrow.Date32(19789), 
contents.Column(3).Data().Chunk(i).(*array.Date32).Value(0))
+       }
 }
 
 func (t *TableWritingTestSuite) TestAddFilesFailsSchemaMismatch() {

Reply via email to