paleolimbot commented on code in PR #1138:
URL: https://github.com/apache/iceberg-go/pull/1138#discussion_r3392864964
##########
table/arrow_utils.go:
##########
@@ -658,20 +672,26 @@ func (c convertToArrow) VisitVariant() arrow.Field {
return arrow.Field{Type: extensions.NewDefaultVariantType()}
}
-func (c convertToArrow) VisitGeometry(iceberg.GeometryType) arrow.Field {
+func (c convertToArrow) VisitGeometry(g iceberg.GeometryType) arrow.Field {
+ meta := icebergCRSToGeoArrowMetadata(g.CRS())
if c.useLargeTypes {
- return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithLargeBinaryStorage())}
+ return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithLargeBinaryStorage(),
geoarrow.WKBWithMetadata(meta))}
}
- return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithBinaryStorage())}
+ return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithBinaryStorage(),
geoarrow.WKBWithMetadata(meta))}
}
-func (c convertToArrow) VisitGeography(iceberg.GeographyType) arrow.Field {
+func (c convertToArrow) VisitGeography(g iceberg.GeographyType) arrow.Field {
+ meta := icebergCRSToGeoArrowMetadata(g.CRS())
+ // Always add an edge to differentiate between Geography and Geometry
arrow fields.
+ // Note that the edge convention is a best-effort hint and planar
geography from other clients won't round-trip through Arrow alone.
+ meta.Edges = geoarrow.EdgeInterpolation(g.Algorithm())
Review Comment:
Planar "geography" doesn't exist in the iceberg or Parquet specs to my
knowledge. I believe all the other strings are the same in both specs on
purpose (if this is a string).
##########
table/arrow_utils.go:
##########
@@ -1828,3 +1848,154 @@ func positionDeleteRecordsToDataFilesDV(ctx
context.Context, rootLocation string
}
}
}
+
+func checkCRSString(rawCrs json.RawMessage) bool {
+ b := bytes.TrimSpace(rawCrs)
+
+ return len(b) > 0 && b[0] == '"'
+}
+
+func checkCRSSJSON(rawCrs json.RawMessage) bool {
+ b := bytes.TrimSpace(rawCrs)
+
+ return len(b) > 0 && b[0] == '{'
+}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+ if len(meta.CRS) == 0 {
+ return "srid:0", nil
+ }
+
+ switch {
+ case checkCRSString(meta.CRS):
+ var crs string
+ if len(meta.CRS) > 0 {
+ if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+ return "", fmt.Errorf("invalid geoarrow CRS
metadata: %w", err)
+ }
+ }
+
+ if strings.EqualFold(crs, "OGC:CRS84") ||
strings.EqualFold(crs, "EPSG:4326") {
+ return "OGC:CRS84", nil
+ }
+
+ switch meta.CRSType {
+ case geoarrow.CRSTypeSRID:
+ return "srid:" + crs, nil
+ case geoarrow.CRSTypePROJJSON:
+ return "", errors.New("JSON object written to crs as
string")
+ case geoarrow.CRSTypeWKT22019:
+ return "", errors.New("CRS type wkt2:2019 not
supported")
+ default:
+ if len(crs) <= 32 {
+ return crs, nil
+ }
+
+ return "", errors.New("crs length too long")
+ }
+ case checkCRSSJSON(meta.CRS):
+ var crs map[string]json.RawMessage
+ if len(meta.CRS) > 0 {
+ if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+ return "", fmt.Errorf("invalid geoarrow CRS
metadata: %w", err)
+ }
+ }
+
+ idRaw, ok := crs["id"]
+ if !ok || len(idRaw) == 0 {
+ return "", errors.New("unsupported CRS")
+ }
+
+ var id map[string]json.RawMessage
+ if err := json.Unmarshal(idRaw, &id); err != nil {
+ return "", errors.New("unsupported CRS")
+ }
+
+ var authority string
+ if err := json.Unmarshal(id["authority"], &authority); err !=
nil || authority == "" {
+ return "", errors.New("unsupported CRS")
+ }
+
+ codeRaw := id["code"]
+ if len(codeRaw) == 0 {
+ return "", errors.New("unsupported CRS")
+ }
+
+ var code string
+ if err := json.Unmarshal(codeRaw, &code); err != nil {
+ var codeNum json.Number
+ if err := json.Unmarshal(codeRaw, &codeNum); err != nil
|| codeNum.String() == "" {
+ return "", errors.New("unsupported CRS")
+ }
+ code = codeNum.String()
+ }
+ if code == "" {
+ return "", errors.New("unsupported CRS")
+ }
+
+ authorityCode := authority + ":" + code
+ if strings.EqualFold(authorityCode, "OGC:CRS84") ||
strings.EqualFold(authorityCode, "EPSG:4326") {
+ return "OGC:CRS84", nil
+ }
+
+ return authorityCode, nil
+ default:
+ return "", errors.New("unsupported CRS: CRS must either be
omitted, a string, or a JSON object")
+ }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type,
error) {
+ crs, err := geoArrowCRSToIcebergCRS(meta)
+ if err != nil {
+ return nil, err
+ }
+
+ switch meta.Edges {
+ case geoarrow.EdgePlanar:
+ return iceberg.GeometryTypeOf(crs)
+ case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas,
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+ return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+ default:
+ return nil, fmt.Errorf("unsupported geoarrow edges %q",
meta.Edges)
+ }
+}
+
+var authorityCodeCRS = regexp.MustCompile(`^[^:]+:.+$`)
Review Comment:
Maybe limit to `"^[A-Za-z][A-Za-z0-9_-]*:[A-Za-z0-9_.-]+$"`? That will match
any string with a colon in it (which includes JSON, I think)
##########
table/arrow_utils_test.go:
##########
@@ -430,6 +431,455 @@ func TestVariantArrowConversion(t *testing.T) {
})
}
+func assertGeoArrowWKB(t *testing.T, dt arrow.DataType, storage
arrow.DataType, want geoarrow.Metadata) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+ assert.Equal(t, want.CRS, wkb.Metadata().CRS)
+ assert.Equal(t, want.Edges, wkb.Metadata().Edges)
+}
+
+func assertGeoArrowWKBMetadataJSON(t *testing.T, dt arrow.DataType, storage
arrow.DataType, wantJSON string) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+
+ var want map[string]json.RawMessage
+ require.NoError(t, json.Unmarshal([]byte(wantJSON), &want))
+
+ meta := wkb.Metadata()
+
+ if wantCRS, ok := want["crs"]; ok {
+ assert.JSONEq(t, string(wantCRS), string(meta.CRS), "crs
mismatch")
+ } else {
+ assert.Empty(t, meta.CRS, "expected omitted crs")
+ }
+
+ if wantEdges, ok := want["edges"]; ok {
+ var edges string
+ require.NoError(t, json.Unmarshal(wantEdges, &edges))
+ assert.Equal(t, geoarrow.EdgeInterpolation(edges), meta.Edges,
"edges mismatch")
+ } else {
+ assert.Empty(t, meta.Edges, "expected omitted edges")
+ }
+}
+
+func jsonCRS(s string) json.RawMessage {
+ raw, _ := json.Marshal(s)
+
+ return raw
+}
+
+func TestIcebergGeoTypesToArrowSchema(t *testing.T) {
+ geomSRID, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geogKarney, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ // Note that these tests below are based on arrow-rs tests
(https://github.com/apache/arrow-rs/pull/10065)
+ geomSRID0, err := iceberg.GeometryTypeOf("srid:0")
+ require.NoError(t, err)
+ geomEPSG4267, err := iceberg.GeometryTypeOf("EPSG:4267")
+ require.NoError(t, err)
+ geogSpherical, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+ require.NoError(t, err)
+ geogKarneyDefaultCRS, err := iceberg.GeographyTypeOf("OGC:CRS84",
"karney")
+ require.NoError(t, err)
+ geogVincenty, err := iceberg.GeographyTypeOf("OGC:CRS84", "vincenty")
+ require.NoError(t, err)
+ geogAndoyer, err := iceberg.GeographyTypeOf("OGC:CRS84", "andoyer")
+ require.NoError(t, err)
+ geogThomas, err := iceberg.GeographyTypeOf("OGC:CRS84", "thomas")
+ require.NoError(t, err)
+ geogSRID0, err := iceberg.GeographyTypeOf("srid:0", "spherical")
+ require.NoError(t, err)
+ geogEPSG4267, err := iceberg.GeographyTypeOf("EPSG:4267", "spherical")
+ require.NoError(t, err)
+
+ defaultGeometry, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geomPROJJSON3857, err := iceberg.GeometryTypeOf("EPSG:3857")
+ require.NoError(t, err)
+ geogEPSG4267Karney, err := iceberg.GeographyTypeOf("EPSG:4267",
"karney")
+ require.NoError(t, err)
+ geogPROJJSON4267, err := iceberg.GeographyTypeOf("EPSG:4267",
"spherical")
+ require.NoError(t, err)
+
+ typeCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet
spec)
+ {
+ name: "geometry_default_crs",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84"}`,
+ },
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ {
+ name: "geometry_srid_0",
+ ice: geomSRID0,
+ wantMetaJSON: `{}`,
+ },
+ // Geometry with custom CRSes (authority:code and partial
projjson)
+ {
+ name: "geometry_epsg_4267",
+ ice: geomEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267"}`,
+ },
+ // Geography with default CRS (default OGC:CRS84, spherical
edges)
+ {
+ name: "geography_default_crs",
+ ice: iceberg.GeographyType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ // Geography with explicit edges
+ {
+ name: "geography_explicit_spherical",
+ ice: geogSpherical,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ {
+ name: "geography_karney",
+ ice: geogKarneyDefaultCRS,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"karney"}`,
+ },
+ {
+ name: "geography_vincenty",
+ ice: geogVincenty,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"vincenty"}`,
+ },
+ {
+ name: "geography_andoyer",
+ ice: geogAndoyer,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"andoyer"}`,
+ },
+ {
+ name: "geography_thomas",
+ ice: geogThomas,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"thomas"}`,
+ },
+ // Geography with srid:0 should result in an unset (omitted)
CRS and spherical edges
+ {
+ name: "geography_srid_0",
+ ice: geogSRID0,
+ wantMetaJSON: `{"edges":"spherical"}`,
+ },
+ // Geography with custom CRSes (authority:code and partial
projjson)
+ {
+ name: "geography_epsg_4267",
+ ice: geogEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267","edges":"spherical"}`,
+ },
+ }
+
+ // The following tests focus on edge cases and pinning specific
behavior for read case
+ readOnlyCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ {
+ name: "geometry_srid_0_with_crs_type",
+ ice: geomSRID0,
+ wantMetaJSON: `{"crs_type":"authority_code"}`,
+ },
+ {
+ name: "case_insensitive_default_geometry",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"OgC:cRs84"}`,
+ },
+ {
+ name:
"case_insensitive_default_geometry_epsg_4326",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"EpSg:4326"}`,
+ },
+ {
+ name: "geometry_epsg_4326",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"epsg:4326"}`,
+ },
+
+ // Translated from arrow-rs geo logical type read tests
(https://github.com/apache/arrow-rs/pull/10065)
+ // Geometry with no CRS should be GEOMETRY(srid:0)
+ {
+ name: "geometry_no_crs",
+ ice: geomSRID0,
+ wantMetaJSON: `{}`,
+ },
+ // Geometry with string CRS
+ {
+ name: "geometry_epsg_4267_from_crs",
+ ice: geomEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267"}`,
+ },
+ // Geometry with PROJJSON CRS
+ {
+ name: "geometry_projjson_epsg_3857",
+ ice: geomPROJJSON3857,
+ wantMetaJSON:
`{"crs":{"id":{"authority":"EPSG","code":3857}}}`,
+ },
Review Comment:
Or this one. The Iceberg CRSes we're dealing with at the Iceberg level are
all `"auth:code"`, so they can write `"crs":"auth:code"` to GeoArrow.
(Apologies if I'm missing something here)
##########
table/arrow_utils_test.go:
##########
@@ -430,6 +431,455 @@ func TestVariantArrowConversion(t *testing.T) {
})
}
+func assertGeoArrowWKB(t *testing.T, dt arrow.DataType, storage
arrow.DataType, want geoarrow.Metadata) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+ assert.Equal(t, want.CRS, wkb.Metadata().CRS)
+ assert.Equal(t, want.Edges, wkb.Metadata().Edges)
+}
+
+func assertGeoArrowWKBMetadataJSON(t *testing.T, dt arrow.DataType, storage
arrow.DataType, wantJSON string) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+
+ var want map[string]json.RawMessage
+ require.NoError(t, json.Unmarshal([]byte(wantJSON), &want))
+
+ meta := wkb.Metadata()
+
+ if wantCRS, ok := want["crs"]; ok {
+ assert.JSONEq(t, string(wantCRS), string(meta.CRS), "crs
mismatch")
+ } else {
+ assert.Empty(t, meta.CRS, "expected omitted crs")
+ }
+
+ if wantEdges, ok := want["edges"]; ok {
+ var edges string
+ require.NoError(t, json.Unmarshal(wantEdges, &edges))
+ assert.Equal(t, geoarrow.EdgeInterpolation(edges), meta.Edges,
"edges mismatch")
+ } else {
+ assert.Empty(t, meta.Edges, "expected omitted edges")
+ }
+}
+
+func jsonCRS(s string) json.RawMessage {
+ raw, _ := json.Marshal(s)
+
+ return raw
+}
+
+func TestIcebergGeoTypesToArrowSchema(t *testing.T) {
+ geomSRID, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geogKarney, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ // Note that these tests below are based on arrow-rs tests
(https://github.com/apache/arrow-rs/pull/10065)
+ geomSRID0, err := iceberg.GeometryTypeOf("srid:0")
+ require.NoError(t, err)
+ geomEPSG4267, err := iceberg.GeometryTypeOf("EPSG:4267")
+ require.NoError(t, err)
+ geogSpherical, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+ require.NoError(t, err)
+ geogKarneyDefaultCRS, err := iceberg.GeographyTypeOf("OGC:CRS84",
"karney")
+ require.NoError(t, err)
+ geogVincenty, err := iceberg.GeographyTypeOf("OGC:CRS84", "vincenty")
+ require.NoError(t, err)
+ geogAndoyer, err := iceberg.GeographyTypeOf("OGC:CRS84", "andoyer")
+ require.NoError(t, err)
+ geogThomas, err := iceberg.GeographyTypeOf("OGC:CRS84", "thomas")
+ require.NoError(t, err)
+ geogSRID0, err := iceberg.GeographyTypeOf("srid:0", "spherical")
+ require.NoError(t, err)
+ geogEPSG4267, err := iceberg.GeographyTypeOf("EPSG:4267", "spherical")
+ require.NoError(t, err)
+
+ defaultGeometry, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geomPROJJSON3857, err := iceberg.GeometryTypeOf("EPSG:3857")
+ require.NoError(t, err)
+ geogEPSG4267Karney, err := iceberg.GeographyTypeOf("EPSG:4267",
"karney")
+ require.NoError(t, err)
+ geogPROJJSON4267, err := iceberg.GeographyTypeOf("EPSG:4267",
"spherical")
+ require.NoError(t, err)
Review Comment:
I am not sure I understand why this one labelled PROJJSON
##########
table/arrow_utils.go:
##########
@@ -1828,3 +1848,154 @@ func positionDeleteRecordsToDataFilesDV(ctx
context.Context, rootLocation string
}
}
}
+
+func checkCRSString(rawCrs json.RawMessage) bool {
+ b := bytes.TrimSpace(rawCrs)
+
+ return len(b) > 0 && b[0] == '"'
+}
+
+func checkCRSSJSON(rawCrs json.RawMessage) bool {
+ b := bytes.TrimSpace(rawCrs)
+
+ return len(b) > 0 && b[0] == '{'
+}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+ if len(meta.CRS) == 0 {
+ return "srid:0", nil
+ }
+
+ switch {
+ case checkCRSString(meta.CRS):
+ var crs string
+ if len(meta.CRS) > 0 {
+ if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+ return "", fmt.Errorf("invalid geoarrow CRS
metadata: %w", err)
+ }
+ }
+
+ if strings.EqualFold(crs, "OGC:CRS84") ||
strings.EqualFold(crs, "EPSG:4326") {
+ return "OGC:CRS84", nil
+ }
+
+ switch meta.CRSType {
+ case geoarrow.CRSTypeSRID:
+ return "srid:" + crs, nil
+ case geoarrow.CRSTypePROJJSON:
+ return "", errors.New("JSON object written to crs as
string")
Review Comment:
Should these two lines be dropped so that if the CRS type is explicit for
some reason that it's handled by the branches below?
##########
table/arrow_utils_test.go:
##########
@@ -430,6 +431,455 @@ func TestVariantArrowConversion(t *testing.T) {
})
}
+func assertGeoArrowWKB(t *testing.T, dt arrow.DataType, storage
arrow.DataType, want geoarrow.Metadata) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+ assert.Equal(t, want.CRS, wkb.Metadata().CRS)
+ assert.Equal(t, want.Edges, wkb.Metadata().Edges)
+}
+
+func assertGeoArrowWKBMetadataJSON(t *testing.T, dt arrow.DataType, storage
arrow.DataType, wantJSON string) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+
+ var want map[string]json.RawMessage
+ require.NoError(t, json.Unmarshal([]byte(wantJSON), &want))
+
+ meta := wkb.Metadata()
+
+ if wantCRS, ok := want["crs"]; ok {
+ assert.JSONEq(t, string(wantCRS), string(meta.CRS), "crs
mismatch")
+ } else {
+ assert.Empty(t, meta.CRS, "expected omitted crs")
+ }
+
+ if wantEdges, ok := want["edges"]; ok {
+ var edges string
+ require.NoError(t, json.Unmarshal(wantEdges, &edges))
+ assert.Equal(t, geoarrow.EdgeInterpolation(edges), meta.Edges,
"edges mismatch")
+ } else {
+ assert.Empty(t, meta.Edges, "expected omitted edges")
+ }
+}
+
+func jsonCRS(s string) json.RawMessage {
+ raw, _ := json.Marshal(s)
+
+ return raw
+}
+
+func TestIcebergGeoTypesToArrowSchema(t *testing.T) {
+ geomSRID, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geogKarney, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ // Note that these tests below are based on arrow-rs tests
(https://github.com/apache/arrow-rs/pull/10065)
+ geomSRID0, err := iceberg.GeometryTypeOf("srid:0")
+ require.NoError(t, err)
+ geomEPSG4267, err := iceberg.GeometryTypeOf("EPSG:4267")
+ require.NoError(t, err)
+ geogSpherical, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+ require.NoError(t, err)
+ geogKarneyDefaultCRS, err := iceberg.GeographyTypeOf("OGC:CRS84",
"karney")
+ require.NoError(t, err)
+ geogVincenty, err := iceberg.GeographyTypeOf("OGC:CRS84", "vincenty")
+ require.NoError(t, err)
+ geogAndoyer, err := iceberg.GeographyTypeOf("OGC:CRS84", "andoyer")
+ require.NoError(t, err)
+ geogThomas, err := iceberg.GeographyTypeOf("OGC:CRS84", "thomas")
+ require.NoError(t, err)
+ geogSRID0, err := iceberg.GeographyTypeOf("srid:0", "spherical")
+ require.NoError(t, err)
+ geogEPSG4267, err := iceberg.GeographyTypeOf("EPSG:4267", "spherical")
+ require.NoError(t, err)
+
+ defaultGeometry, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geomPROJJSON3857, err := iceberg.GeometryTypeOf("EPSG:3857")
+ require.NoError(t, err)
+ geogEPSG4267Karney, err := iceberg.GeographyTypeOf("EPSG:4267",
"karney")
+ require.NoError(t, err)
+ geogPROJJSON4267, err := iceberg.GeographyTypeOf("EPSG:4267",
"spherical")
+ require.NoError(t, err)
+
+ typeCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet
spec)
+ {
+ name: "geometry_default_crs",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84"}`,
+ },
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ {
+ name: "geometry_srid_0",
+ ice: geomSRID0,
+ wantMetaJSON: `{}`,
+ },
+ // Geometry with custom CRSes (authority:code and partial
projjson)
+ {
+ name: "geometry_epsg_4267",
+ ice: geomEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267"}`,
+ },
+ // Geography with default CRS (default OGC:CRS84, spherical
edges)
+ {
+ name: "geography_default_crs",
+ ice: iceberg.GeographyType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ // Geography with explicit edges
+ {
+ name: "geography_explicit_spherical",
+ ice: geogSpherical,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ {
+ name: "geography_karney",
+ ice: geogKarneyDefaultCRS,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"karney"}`,
+ },
+ {
+ name: "geography_vincenty",
+ ice: geogVincenty,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"vincenty"}`,
+ },
+ {
+ name: "geography_andoyer",
+ ice: geogAndoyer,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"andoyer"}`,
+ },
+ {
+ name: "geography_thomas",
+ ice: geogThomas,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"thomas"}`,
+ },
+ // Geography with srid:0 should result in an unset (omitted)
CRS and spherical edges
+ {
+ name: "geography_srid_0",
+ ice: geogSRID0,
+ wantMetaJSON: `{"edges":"spherical"}`,
+ },
+ // Geography with custom CRSes (authority:code and partial
projjson)
+ {
+ name: "geography_epsg_4267",
+ ice: geogEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267","edges":"spherical"}`,
+ },
+ }
+
+ // The following tests focus on edge cases and pinning specific
behavior for read case
+ readOnlyCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ {
+ name: "geometry_srid_0_with_crs_type",
+ ice: geomSRID0,
+ wantMetaJSON: `{"crs_type":"authority_code"}`,
+ },
+ {
+ name: "case_insensitive_default_geometry",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"OgC:cRs84"}`,
+ },
+ {
+ name:
"case_insensitive_default_geometry_epsg_4326",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"EpSg:4326"}`,
+ },
+ {
+ name: "geometry_epsg_4326",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"epsg:4326"}`,
+ },
+
+ // Translated from arrow-rs geo logical type read tests
(https://github.com/apache/arrow-rs/pull/10065)
+ // Geometry with no CRS should be GEOMETRY(srid:0)
+ {
+ name: "geometry_no_crs",
+ ice: geomSRID0,
+ wantMetaJSON: `{}`,
+ },
+ // Geometry with string CRS
+ {
+ name: "geometry_epsg_4267_from_crs",
+ ice: geomEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267"}`,
+ },
+ // Geometry with PROJJSON CRS
+ {
+ name: "geometry_projjson_epsg_3857",
+ ice: geomPROJJSON3857,
+ wantMetaJSON:
`{"crs":{"id":{"authority":"EPSG","code":3857}}}`,
+ },
+ // Geometry with lon/lat CRSes (canonically removed because
lon/lat is the
+ // default Parquet CRS)
Review Comment:
```suggestion
// default Iceberg CRS)
```
##########
table/arrow_utils_test.go:
##########
@@ -430,6 +431,455 @@ func TestVariantArrowConversion(t *testing.T) {
})
}
+func assertGeoArrowWKB(t *testing.T, dt arrow.DataType, storage
arrow.DataType, want geoarrow.Metadata) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+ assert.Equal(t, want.CRS, wkb.Metadata().CRS)
+ assert.Equal(t, want.Edges, wkb.Metadata().Edges)
+}
+
+func assertGeoArrowWKBMetadataJSON(t *testing.T, dt arrow.DataType, storage
arrow.DataType, wantJSON string) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+
+ var want map[string]json.RawMessage
+ require.NoError(t, json.Unmarshal([]byte(wantJSON), &want))
+
+ meta := wkb.Metadata()
+
+ if wantCRS, ok := want["crs"]; ok {
+ assert.JSONEq(t, string(wantCRS), string(meta.CRS), "crs
mismatch")
+ } else {
+ assert.Empty(t, meta.CRS, "expected omitted crs")
+ }
+
+ if wantEdges, ok := want["edges"]; ok {
+ var edges string
+ require.NoError(t, json.Unmarshal(wantEdges, &edges))
+ assert.Equal(t, geoarrow.EdgeInterpolation(edges), meta.Edges,
"edges mismatch")
+ } else {
+ assert.Empty(t, meta.Edges, "expected omitted edges")
+ }
+}
+
+func jsonCRS(s string) json.RawMessage {
+ raw, _ := json.Marshal(s)
+
+ return raw
+}
+
+func TestIcebergGeoTypesToArrowSchema(t *testing.T) {
+ geomSRID, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geogKarney, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ // Note that these tests below are based on arrow-rs tests
(https://github.com/apache/arrow-rs/pull/10065)
+ geomSRID0, err := iceberg.GeometryTypeOf("srid:0")
+ require.NoError(t, err)
+ geomEPSG4267, err := iceberg.GeometryTypeOf("EPSG:4267")
+ require.NoError(t, err)
+ geogSpherical, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+ require.NoError(t, err)
+ geogKarneyDefaultCRS, err := iceberg.GeographyTypeOf("OGC:CRS84",
"karney")
+ require.NoError(t, err)
+ geogVincenty, err := iceberg.GeographyTypeOf("OGC:CRS84", "vincenty")
+ require.NoError(t, err)
+ geogAndoyer, err := iceberg.GeographyTypeOf("OGC:CRS84", "andoyer")
+ require.NoError(t, err)
+ geogThomas, err := iceberg.GeographyTypeOf("OGC:CRS84", "thomas")
+ require.NoError(t, err)
+ geogSRID0, err := iceberg.GeographyTypeOf("srid:0", "spherical")
+ require.NoError(t, err)
+ geogEPSG4267, err := iceberg.GeographyTypeOf("EPSG:4267", "spherical")
+ require.NoError(t, err)
+
+ defaultGeometry, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geomPROJJSON3857, err := iceberg.GeometryTypeOf("EPSG:3857")
+ require.NoError(t, err)
+ geogEPSG4267Karney, err := iceberg.GeographyTypeOf("EPSG:4267",
"karney")
+ require.NoError(t, err)
+ geogPROJJSON4267, err := iceberg.GeographyTypeOf("EPSG:4267",
"spherical")
+ require.NoError(t, err)
+
+ typeCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet
spec)
+ {
+ name: "geometry_default_crs",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84"}`,
+ },
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ {
+ name: "geometry_srid_0",
+ ice: geomSRID0,
+ wantMetaJSON: `{}`,
+ },
+ // Geometry with custom CRSes (authority:code and partial
projjson)
+ {
+ name: "geometry_epsg_4267",
+ ice: geomEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267"}`,
+ },
+ // Geography with default CRS (default OGC:CRS84, spherical
edges)
+ {
+ name: "geography_default_crs",
+ ice: iceberg.GeographyType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ // Geography with explicit edges
+ {
+ name: "geography_explicit_spherical",
+ ice: geogSpherical,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ {
+ name: "geography_karney",
+ ice: geogKarneyDefaultCRS,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"karney"}`,
+ },
+ {
+ name: "geography_vincenty",
+ ice: geogVincenty,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"vincenty"}`,
+ },
+ {
+ name: "geography_andoyer",
+ ice: geogAndoyer,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"andoyer"}`,
+ },
+ {
+ name: "geography_thomas",
+ ice: geogThomas,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"thomas"}`,
+ },
+ // Geography with srid:0 should result in an unset (omitted)
CRS and spherical edges
+ {
+ name: "geography_srid_0",
+ ice: geogSRID0,
+ wantMetaJSON: `{"edges":"spherical"}`,
+ },
+ // Geography with custom CRSes (authority:code and partial
projjson)
+ {
+ name: "geography_epsg_4267",
+ ice: geogEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267","edges":"spherical"}`,
+ },
+ }
+
+ // The following tests focus on edge cases and pinning specific
behavior for read case
+ readOnlyCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ {
+ name: "geometry_srid_0_with_crs_type",
+ ice: geomSRID0,
+ wantMetaJSON: `{"crs_type":"authority_code"}`,
+ },
+ {
+ name: "case_insensitive_default_geometry",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"OgC:cRs84"}`,
+ },
+ {
+ name:
"case_insensitive_default_geometry_epsg_4326",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"EpSg:4326"}`,
+ },
+ {
+ name: "geometry_epsg_4326",
+ ice: defaultGeometry,
+ wantMetaJSON: `{"crs":"epsg:4326"}`,
+ },
+
+ // Translated from arrow-rs geo logical type read tests
(https://github.com/apache/arrow-rs/pull/10065)
+ // Geometry with no CRS should be GEOMETRY(srid:0)
+ {
+ name: "geometry_no_crs",
+ ice: geomSRID0,
+ wantMetaJSON: `{}`,
+ },
+ // Geometry with string CRS
+ {
+ name: "geometry_epsg_4267_from_crs",
+ ice: geomEPSG4267,
+ wantMetaJSON: `{"crs":"EPSG:4267"}`,
+ },
+ // Geometry with PROJJSON CRS
+ {
+ name: "geometry_projjson_epsg_3857",
+ ice: geomPROJJSON3857,
+ wantMetaJSON:
`{"crs":{"id":{"authority":"EPSG","code":3857}}}`,
+ },
+ // Geometry with lon/lat CRSes (canonically removed because
lon/lat is the
+ // default Parquet CRS)
+ {
+ name: "geometry_ogc_crs84_canonical",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84"}`,
+ },
+ {
+ name: "geometry_epsg_4326_canonical",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON: `{"crs":"EPSG:4326"}`,
+ },
+ {
+ name: "geometry_projjson_epsg_4326_canonical",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON:
`{"crs":{"id":{"authority":"EPSG","code":4326}}}`,
+ },
+ {
+ name:
"geometry_projjson_epsg_4326_string_code_canonical",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON:
`{"crs":{"id":{"authority":"EPSG","code":"4326"}}}`,
+ },
+ {
+ name: "geometry_projjson_ogc_crs84_canonical",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON:
`{"crs":{"id":{"authority":"OGC","code":"CRS84"}}}`,
+ },
+ // Geography with no CRS, spherical edges
+ {
+ name: "geography_no_crs_spherical",
+ ice: geogSRID0,
+ wantMetaJSON: `{"edges":"spherical"}`,
+ },
+ // Geography with OGC:CRS84 and spherical edges
+ {
+ name: "geography_ogc_crs84_spherical_canonical",
+ ice: iceberg.GeographyType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"spherical"}`,
+ },
+ // Geography with different edge algorithms
+ {
+ name: "geography_ogc_crs84_karney",
+ ice: geogKarneyDefaultCRS,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"karney"}`,
+ },
+ {
+ name: "geography_ogc_crs84_vincenty",
+ ice: geogVincenty,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"vincenty"}`,
+ },
+ {
+ name: "geography_ogc_crs84_andoyer",
+ ice: geogAndoyer,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"andoyer"}`,
+ },
+ {
+ name: "geography_ogc_crs84_thomas",
+ ice: geogThomas,
+ wantMetaJSON: `{"crs":"OGC:CRS84","edges":"thomas"}`,
+ },
+ // Geography with custom CRS and edges
+ {
+ name: "geography_epsg_4267_karney",
+ ice: geogEPSG4267Karney,
+ wantMetaJSON: `{"crs":"EPSG:4267","edges":"karney"}`,
+ },
+ // Geography with PROJJSON CRS
+ {
+ name: "geography_projjson_epsg_4267_spherical",
+ ice: geogPROJJSON4267,
+ wantMetaJSON:
`{"crs":{"id":{"authority":"EPSG","code":4267}},"edges":"spherical"}`,
+ },
Review Comment:
I am also not sure I understand this output. Shouldn't this just be
`"crs":"EPSG:4267"`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]