laskoviymishka commented on code in PR #1138:
URL: https://github.com/apache/iceberg-go/pull/1138#discussion_r3402581162


##########
table/internal/geo_codec.go:
##########
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package internal

Review Comment:
   This is `package internal`, and `WKTToWKB`'s only callers are `_test.go` 
files, but go-geom is now a direct require in go.mod — so every consumer of 
`table/internal` (scanner, DV writer) transitively pulls it into the production 
module graph.
   
   I'd move this into a `_test.go` file in `package internal_test` (the test 
already lives there) or a small testutil package, so go-geom stays test-only. 
Non-blocking from my side, and I haven't seen it raised on the threads — if the 
maintainers are fine carrying it that's a reasonable call — but I'd lean toward 
keeping it out of the production deps.



##########
table/arrow_utils.go:
##########
@@ -1826,3 +1846,152 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func checkCRSString(rawCrs json.RawMessage) bool {
+       b := bytes.TrimSpace(rawCrs)
+
+       return len(b) > 0 && b[0] == '"'
+}
+
+func checkCRSSJSON(rawCrs json.RawMessage) bool {
+       b := bytes.TrimSpace(rawCrs)
+
+       return len(b) > 0 && b[0] == '{'
+}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 {
+               return "srid:0", nil
+       }
+
+       switch {
+       case checkCRSString(meta.CRS):
+               var crs string
+               if len(meta.CRS) > 0 {
+                       if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                               return "", fmt.Errorf("invalid geoarrow CRS 
metadata: %w", err)
+                       }
+               }
+
+               if strings.EqualFold(crs, "OGC:CRS84") || 
strings.EqualFold(crs, "EPSG:4326") {
+                       return "OGC:CRS84", nil
+               }
+
+               switch meta.CRSType {
+               case geoarrow.CRSTypeSRID:
+                       return "srid:" + crs, nil
+               case geoarrow.CRSTypeWKT22019:
+                       return "", errors.New("CRS type wkt2:2019 not 
supported")
+               default:
+                       if len(crs) <= 32 {
+                               return crs, nil
+                       }
+
+                       return "", errors.New("crs length too long")
+               }
+       case checkCRSSJSON(meta.CRS):
+               var crs map[string]json.RawMessage
+               if len(meta.CRS) > 0 {
+                       if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                               return "", fmt.Errorf("invalid geoarrow CRS 
metadata: %w", err)
+                       }
+               }
+
+               idRaw, ok := crs["id"]
+               if !ok || len(idRaw) == 0 {
+                       return "", errors.New("unsupported CRS")
+               }
+
+               var id map[string]json.RawMessage
+               if err := json.Unmarshal(idRaw, &id); err != nil {
+                       return "", errors.New("unsupported CRS")
+               }
+
+               var authority string
+               if err := json.Unmarshal(id["authority"], &authority); err != 
nil || authority == "" {
+                       return "", errors.New("unsupported CRS")
+               }
+
+               codeRaw := id["code"]
+               if len(codeRaw) == 0 {
+                       return "", errors.New("unsupported CRS")
+               }
+
+               var code string
+               if err := json.Unmarshal(codeRaw, &code); err != nil {
+                       var codeNum json.Number
+                       if err := json.Unmarshal(codeRaw, &codeNum); err != nil 
|| codeNum.String() == "" {
+                               return "", errors.New("unsupported CRS")
+                       }
+                       code = codeNum.String()
+               }
+               if code == "" {
+                       return "", errors.New("unsupported CRS")
+               }
+
+               authorityCode := authority + ":" + code
+               if strings.EqualFold(authorityCode, "OGC:CRS84") || 
strings.EqualFold(authorityCode, "EPSG:4326") {
+                       return "OGC:CRS84", nil
+               }
+
+               return authorityCode, nil
+       default:
+               return "", errors.New("unsupported CRS: CRS must either be 
omitted, a string, or a JSON object")
+       }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type, 
error) {
+       crs, err := geoArrowCRSToIcebergCRS(meta)
+       if err != nil {
+               return nil, err
+       }
+
+       switch meta.Edges {
+       case geoarrow.EdgePlanar:
+               return iceberg.GeometryTypeOf(crs)
+       case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas, 
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+               return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+       default:
+               return nil, fmt.Errorf("unsupported geoarrow edges %q", 
meta.Edges)
+       }
+}
+
+var authorityCodeCRS = 
regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_-]*:[A-Za-z0-9_.-]+$`)
+
+func icebergCRSToGeoArrowMetadata(crs string) geoarrow.Metadata {
+       lowerCRS := strings.ToLower(crs)
+       if strings.HasPrefix(lowerCRS, "srid:") {
+               id := lowerCRS[len("srid:"):]

Review Comment:
   The id is sliced out of the lowercased string, so a non-numeric SRID gets 
silently case-folded: `srid:MyDB` goes in, `mydb` gets marshalled, and it 
round-trips back as `srid:mydb`. Numeric SRIDs are fine, which is why it's 
latent.
   
   I'd slice the original `crs` instead — `id := crs[len("srid:"):]` — and keep 
the `HasPrefix` check on `lowerCRS`.
   
   The `iceberg_to_arrow_round_trip` loop only iterates `typeCases`, which has 
`geomSRID0` but no non-zero SRID, so the common path isn't exercised either. 
Adding a `srid:4326` case to `typeCases` covers the happy path and would have 
caught this — worth pairing the fix with that test case.



##########
table/arrow_utils.go:
##########
@@ -1826,3 +1846,152 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func checkCRSString(rawCrs json.RawMessage) bool {
+       b := bytes.TrimSpace(rawCrs)
+
+       return len(b) > 0 && b[0] == '"'
+}
+
+func checkCRSSJSON(rawCrs json.RawMessage) bool {

Review Comment:
   Minor: `checkCRSSJSON` has a double S — rename to `checkCRSJSON` at the def 
and the call site.



##########
table/arrow_utils.go:
##########
@@ -1826,3 +1846,152 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func checkCRSString(rawCrs json.RawMessage) bool {
+       b := bytes.TrimSpace(rawCrs)
+
+       return len(b) > 0 && b[0] == '"'
+}
+
+func checkCRSSJSON(rawCrs json.RawMessage) bool {
+       b := bytes.TrimSpace(rawCrs)
+
+       return len(b) > 0 && b[0] == '{'
+}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 {
+               return "srid:0", nil
+       }
+
+       switch {
+       case checkCRSString(meta.CRS):

Review Comment:
   A small cleanup cluster in this function:
   
   The `if len(meta.CRS) > 0` wrappers inside both this `checkCRSString` arm 
and the `checkCRSSJSON` arm below are dead — the guard at the top of the 
function already returns on empty, and the `check*` funcs only return true for 
non-empty input (staticcheck SA9003). I'd drop both wrappers.
   
   On the write path (line 1984), `strings.EqualFold(lowerCRS, "EPSG:4326")` 
runs a case-fold against an already-lowercased string — `lowerCRS == 
"epsg:4326"` is enough.
   
   And in the JSON-object code parsing (line 1923), `|| codeNum.String() == ""` 
is unreachable — a successful `json.Number` unmarshal is never empty. I'd drop 
just that clause; the trailing `if code == ""` is reachable for `{"code":""}`, 
so leave that one.



##########
table/internal/geo_codec.go:
##########
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package internal
+
+import (
+       "github.com/geoarrow/geoarrow-go"
+
+       "github.com/twpayne/go-geom/encoding/wkb"
+       "github.com/twpayne/go-geom/encoding/wkt"
+)
+
+// WKTToWKB is a helper which converts Well Known Text (WKT) to Well Known 
Bytes (WKB).
+// Note that return bytes are little endian.
+func WKTToWKB(s string) (geoarrow.WKBBytes, error) {
+       geometry, err := wkt.Unmarshal(s)

Review Comment:
   `geo_codec_test.go` is all happy-path — no invalid-WKT case. I'd add a 
`wantErr` row (e.g. `"not wkt"`) so the error path is exercised. While we're 
here, the `wkt.Unmarshal` / `wkb.Marshal` errors return bare; wrapping them 
(`fmt.Errorf("parse WKT: %w", err)`) makes failures legible at the call site.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to