This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new d32d89f refactor(catalog/glue): remove database_type parameter in
Glue catalog operations (#535)
d32d89f is described below
commit d32d89faaa7b3269bab3dca406d8b4d1f1468400
Author: Blue Li <[email protected]>
AuthorDate: Tue Aug 19 00:13:17 2025 +0800
refactor(catalog/glue): remove database_type parameter in Glue catalog
operations (#535)
### Description
This PR removes the requirement for the `database_type` parameter in
both `ListNamespaces` and `CreateNamespace` operations for the Glue
catalog to align with the Java and Python versions of Apache Iceberg.
### Background
After reviewing the Java and Python implementations of Apache Iceberg,
we found that neither version uses the `database_type` parameter in
their Glue catalog operations. The Go implementation was unnecessarily
handling this parameter, which was inconsistent with the reference
implementations.
This inconsistency caused a practical issue: namespaces created using
pyiceberg could not be listed in the Go implementation because pyiceberg
doesn't set the `database_type` parameter, while the Go version required
it for filtering.
### Changes
- **ListNamespaces**: Removed filtering logic that checked
`database_type` in database parameters
- **CreateNamespace**: Removed the `database_type` parameter from
database creation
- Updated to use AWS SDK paginator for improved reliability
---
catalog/glue/glue.go | 43 +++++++++----------------------------------
catalog/glue/glue_test.go | 9 +++------
2 files changed, 12 insertions(+), 40 deletions(-)
diff --git a/catalog/glue/glue.go b/catalog/glue/glue.go
index 866cf09..535eeee 100644
--- a/catalog/glue/glue.go
+++ b/catalog/glue/glue.go
@@ -43,10 +43,9 @@ import (
const (
// Use the same conventions as in the pyiceberg project.
// See:
https://github.com/apache/iceberg-python/blob/main/pyiceberg/catalog/__init__.py#L82-L96
- glueTypeIceberg = "ICEBERG"
- databaseTypePropsKey = "database_type"
- tableTypePropsKey = "table_type"
- descriptionPropsKey = "Description"
+ glueTypeIceberg = "ICEBERG"
+ tableTypePropsKey = "table_type"
+ descriptionPropsKey = "Description"
// Database location.
locationPropsKey = "Location"
@@ -519,10 +518,7 @@ func (c *Catalog) CreateNamespace(ctx context.Context,
namespace table.Identifie
return err
}
- databaseParameters := map[string]string{
- databaseTypePropsKey: glueTypeIceberg,
- }
-
+ databaseParameters := map[string]string{}
description := props[descriptionPropsKey]
locationURI := props[locationPropsKey]
@@ -658,20 +654,16 @@ func (c *Catalog) ListNamespaces(ctx context.Context,
parent table.Identifier) (
var icebergNamespaces []table.Identifier
- for {
- databasesResp, err := c.glueSvc.GetDatabases(ctx, params)
+ paginator := glue.NewGetDatabasesPaginator(c.glueSvc, params)
+ for paginator.HasMorePages() {
+ rsp, err := paginator.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list databases: %w",
err)
}
- icebergNamespaces = append(icebergNamespaces,
- filterDatabaseListByType(databasesResp.DatabaseList,
glueTypeIceberg)...)
-
- if databasesResp.NextToken == nil {
- break
+ for _, database := range rsp.DatabaseList {
+ icebergNamespaces = append(icebergNamespaces,
DatabaseIdentifier(aws.ToString(database.Name)))
}
-
- params.NextToken = databasesResp.NextToken
}
return icebergNamespaces, nil
@@ -714,10 +706,6 @@ func (c *Catalog) getDatabase(ctx context.Context,
databaseName string) (*types.
return nil, fmt.Errorf("failed to get namespace %s: %w",
databaseName, err)
}
- if database.Database.Parameters[databaseTypePropsKey] !=
glueTypeIceberg {
- return nil, fmt.Errorf("namespace %s is not an iceberg
namespace", databaseName)
- }
-
return database.Database, nil
}
@@ -759,19 +747,6 @@ func filterTableListByType(database string, tableList
[]types.Table, tableType s
return filtered
}
-func filterDatabaseListByType(databases []types.Database, databaseType string)
[]table.Identifier {
- var filtered []table.Identifier
-
- for _, database := range databases {
- if database.Parameters[databaseTypePropsKey] != databaseType {
- continue
- }
- filtered = append(filtered,
DatabaseIdentifier(aws.ToString(database.Name)))
- }
-
- return filtered
-}
-
func buildGlueTableInput(ctx context.Context, database string, tableName
string, staged *table.StagedTable, cat *Catalog) (*types.TableInput, error) {
glueTable, err := cat.getTable(ctx, database, tableName)
if err != nil {
diff --git a/catalog/glue/glue_test.go b/catalog/glue/glue_test.go
index 33d0dda..19cce5e 100644
--- a/catalog/glue/glue_test.go
+++ b/catalog/glue/glue_test.go
@@ -411,7 +411,7 @@ func TestGlueListNamespaces(t *testing.T) {
databases, err := glueCatalog.ListNamespaces(context.TODO(), nil)
assert.NoError(err)
- assert.Len(databases, 1)
+ assert.Len(databases, 2)
assert.Equal([]string{"test_database"}, databases[0])
}
@@ -449,9 +449,8 @@ func TestGlueCreateNamespace(t *testing.T) {
DatabaseInput: &types.DatabaseInput{
Name: aws.String("test_namespace"),
Parameters: map[string]string{
- databaseTypePropsKey: glueTypeIceberg,
- descriptionPropsKey: "Test Description",
- locationPropsKey: "s3://test-location",
+ descriptionPropsKey: "Test Description",
+ locationPropsKey: "s3://test-location",
},
},
}, mock.Anything).Return(&glue.CreateDatabaseOutput{}, nil).Once()
@@ -652,8 +651,6 @@ func TestGlueUpdateNamespaceProperties(t *testing.T) {
mockGlueSvc := &mockGlueClient{}
- tt.initial[databaseTypePropsKey] = glueTypeIceberg
-
mockGlueSvc.On("GetDatabase", mock.Anything,
&glue.GetDatabaseInput{
Name: aws.String("test_namespace"),
}, mock.Anything).Return(&glue.GetDatabaseOutput{