szehon-ho commented on code in PR #54227:
URL: https://github.com/apache/spark/pull/54227#discussion_r2796092280


##########
sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/geo/WkbReader.java:
##########
@@ -25,29 +25,61 @@
 import java.util.List;
 
 /**
- * Reader for parsing Well-Known Binary (WKB) format geometries.
+ * Reader for parsing Well-Known Binary (WKB) format geometries and 
geographies.
  * This class implements the OGC Simple Features specification for WKB parsing.
+ * For geographies, coordinate bounds validation is enforced:
+ *   - X (longitude) must be between -180 and 180 (inclusive),
+ *   - Y (latitude) must be between -90 and 90 (inclusive).
  * This class is not thread-safe. Create a new instance for each thread.
  * This class should be catalyst-internal.
  */
 public class WkbReader {
   private ByteBuffer buffer;
   private final int validationLevel;
+  private final boolean geographyMode;
   private byte[] currentWkb;
 
+  // Geography coordinate bounds.
+  private static final double MIN_LONGITUDE = -180.0;
+  private static final double MAX_LONGITUDE = 180.0;
+  private static final double MIN_LATITUDE = -90.0;
+  private static final double MAX_LATITUDE = 90.0;
+  // Default WKB reader settings.
+  private static final int DEFAULT_VALIDATION_LEVEL = 1; // basic validation
+  private static final boolean DEFAULT_GEOGRAPHY_MODE = false; // no geography 
bounds checking

Review Comment:
   style (optional):  Calling it a mode is a bit confusing , i think just 
inline false and not having a constant, and rename the variable isGeography 
makes it easier to read.  



##########
sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/util/geo/WkbGeographyTest.java:
##########
@@ -0,0 +1,635 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util.geo;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * Test suite for WKB geography parsing with coordinate bounds validation. In 
geography mode,
+ * X must be between -180 and 180 (inclusive), and Y must be between -90 and 
90 (inclusive).
+ */
+public class WkbGeographyTest extends WkbTestBase {
+
+  /** Creates a geography-mode WkbReader with default validation (level 1). */
+  private WkbReader geographyReader1() {
+    return new WkbReader(1, true);
+  }
+
+  /** Constructs WKB for a 2D point in little endian format. */
+  private byte[] makePointWkb2D(double x, double y) {
+    ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(1); // Point type (2D).
+    buf.putDouble(x);
+    buf.putDouble(y);
+    return buf.array();
+  }
+
+  /** Constructs WKB for a 3DZ point in little endian format. */
+  private byte[] makePointWkb3DZ(double x, double y, double z) {
+    ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8 + 8);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(1001); // Point type (3DZ).
+    buf.putDouble(x);
+    buf.putDouble(y);
+    buf.putDouble(z);
+    return buf.array();
+  }
+
+  /** Constructs WKB for a 3DM point in little endian format. */
+  private byte[] makePointWkb3DM(double x, double y, double m) {
+    ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8 + 8);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(2001); // Point type (3DM).
+    buf.putDouble(x);
+    buf.putDouble(y);
+    buf.putDouble(m);
+    return buf.array();
+  }
+
+  /** Constructs WKB for a 4D (ZM) point in little endian format. */
+  private byte[] makePointWkb4D(double x, double y, double z, double m) {
+    ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8 + 8 + 8);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(3001); // Point type (4D).
+    buf.putDouble(x);
+    buf.putDouble(y);
+    buf.putDouble(z);
+    buf.putDouble(m);
+    return buf.array();
+  }
+
+  /** Constructs WKB for a 2D linestring in little endian format. */
+  private byte[] makeLineStringWkb2D(double[][] points) {
+    ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 4 + points.length * 16);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(2); // LineString type (2D).
+    buf.putInt(points.length);
+    for (double[] point : points) {
+      buf.putDouble(point[0]);
+      buf.putDouble(point[1]);
+    }
+    return buf.array();
+  }
+
+  /** Constructs WKB for a 2D polygon in little endian format. */
+  private byte[] makePolygonWkb2D(double[][][] rings) {
+    int size = 1 + 4 + 4;
+    for (double[][] ring : rings) {
+      size += 4 + ring.length * 16;
+    }
+    ByteBuffer buf = ByteBuffer.allocate(size);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(3); // Polygon type (2D).
+    buf.putInt(rings.length);
+    for (double[][] ring : rings) {
+      buf.putInt(ring.length);
+      for (double[] point : ring) {
+        buf.putDouble(point[0]);
+        buf.putDouble(point[1]);
+      }
+    }
+    return buf.array();
+  }
+
+  /** Constructs WKB for a 2D multipoint in little endian format. */
+  private byte[] makeMultiPointWkb2D(double[][] points) {
+    // Each nested point: 1 (endian) + 4 (type) + 16 (coords) = 21 bytes
+    ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 4 + points.length * 21);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.put((byte) 1); // Little endian.
+    buf.putInt(4); // MultiPoint type (2D).
+    buf.putInt(points.length);
+    for (double[] point : points) {
+      buf.put((byte) 1); // Little endian.
+      buf.putInt(1); // Point type (2D).
+      buf.putDouble(point[0]);
+      buf.putDouble(point[1]);
+    }
+    return buf.array();
+  }
+
+  // ========== Valid Geography 2D Point Tests ==========
+
+  @Test
+  public void testGeographyPointOrigin() {
+    // POINT(0 0) - valid geography
+    byte[] wkb = makePointWkb2D(0.0, 0.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertTrue(geom.isPoint(), "Should be a Point");
+    Point point = geom.asPoint();
+    Assertions.assertEquals(0.0, point.getX(), 1e-10, "X coordinate");
+    Assertions.assertEquals(0.0, point.getY(), 1e-10, "Y coordinate");
+  }
+
+  @Test
+  public void testGeographyPointBoundaryMinLonMinLat() {
+    // POINT(-180 -90) - valid at minimum boundary
+    byte[] wkb = makePointWkb2D(-180.0, -90.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Point point = geom.asPoint();
+    Assertions.assertEquals(-180.0, point.getX(), 1e-10, "X at min longitude");
+    Assertions.assertEquals(-90.0, point.getY(), 1e-10, "Y at min latitude");
+  }
+
+  @Test
+  public void testGeographyPointBoundaryMaxLonMaxLat() {
+    // POINT(180 90) - valid at maximum boundary
+    byte[] wkb = makePointWkb2D(180.0, 90.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Point point = geom.asPoint();
+    Assertions.assertEquals(180.0, point.getX(), 1e-10, "X at max longitude");
+    Assertions.assertEquals(90.0, point.getY(), 1e-10, "Y at max latitude");
+  }
+
+  @Test
+  public void testGeographyPointBoundaryMinLon() {
+    // POINT(-180 0) - valid at min longitude
+    byte[] wkb = makePointWkb2D(-180.0, 0.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertEquals(-180.0, geom.asPoint().getX(), 1e-10, "X at min 
longitude");
+  }
+
+  @Test
+  public void testGeographyPointBoundaryMaxLon() {
+    // POINT(180 0) - valid at max longitude
+    byte[] wkb = makePointWkb2D(180.0, 0.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertEquals(180.0, geom.asPoint().getX(), 1e-10, "X at max 
longitude");
+  }
+
+  @Test
+  public void testGeographyPointBoundaryMinLat() {
+    // POINT(0 -90) - valid at min latitude
+    byte[] wkb = makePointWkb2D(0.0, -90.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertEquals(-90.0, geom.asPoint().getY(), 1e-10, "Y at min 
latitude");
+  }
+
+  @Test
+  public void testGeographyPointBoundaryMaxLat() {
+    // POINT(0 90) - valid at max latitude
+    byte[] wkb = makePointWkb2D(0.0, 90.0);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertEquals(90.0, geom.asPoint().getY(), 1e-10, "Y at max 
latitude");
+  }
+
+  @Test
+  public void testGeographyPointTypicalCoordinates() {
+    // POINT(-73.9857 40.7484) - New York City
+    byte[] wkb = makePointWkb2D(-73.9857, 40.7484);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Point point = geom.asPoint();
+    Assertions.assertEquals(-73.9857, point.getX(), 1e-10, "X longitude");
+    Assertions.assertEquals(40.7484, point.getY(), 1e-10, "Y latitude");
+  }
+
+  // ========== Empty Point in Geography Mode ==========
+
+  @Test
+  public void testGeographyEmptyPoint2D() {
+    // POINT EMPTY - valid in geography mode (NaN coordinates skip bounds 
check)
+    String wkbHex = "0101000000000000000000f87f000000000000f87f";
+    byte[] wkb = hexToBytes(wkbHex);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertTrue(geom.isPoint(), "Should be a Point");
+    Assertions.assertTrue(geom.asPoint().isEmpty(), "Point should be empty");
+  }
+
+  @Test
+  public void testGeographyEmptyPoint3DZ() {
+    // POINT Z EMPTY
+    String wkbHex = 
"01e9030000000000000000f87f000000000000f87f000000000000f87f";
+    byte[] wkb = hexToBytes(wkbHex);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertTrue(geom.asPoint().isEmpty(), "Point Z should be empty");
+  }
+
+  @Test
+  public void testGeographyEmptyPoint4D() {
+    // POINT ZM EMPTY
+    String wkbHex =
+        
"01b90b0000000000000000f87f000000000000f87f000000000000f87f000000000000f87f";
+    byte[] wkb = hexToBytes(wkbHex);
+    GeometryModel geom = geographyReader1().read(wkb);
+    Assertions.assertTrue(geom.asPoint().isEmpty(), "Point ZM should be 
empty");
+  }
+
+  // ========== Invalid Geography Point - Longitude Out of Bounds ==========
+
+  @Test
+  public void testGeographyPointLongitudeJustOverMax() {
+    // POINT(180.0001 0) - longitude just over 180
+    byte[] wkb = makePointWkb2D(180.0001, 0.0);
+    Assertions.assertThrows(WkbParseException.class, () -> 
geographyReader1().read(wkb));
+  }
+
+  @Test
+  public void testGeographyPointLongitudeJustUnderMin() {

Review Comment:
   nice test



##########
sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/geo/WkbReader.java:
##########
@@ -69,6 +101,32 @@ private static boolean isValidCoordinateAllowEmpty(double 
value) {
     return Double.isFinite(value) || Double.isNaN(value);
   }
 
+  /**
+   * Returns true if the longitude value is within valid geography bounds 
[-180, 180].
+   */
+  private static boolean isValidLongitude(double value) {
+    return value >= MIN_LONGITUDE && value <= MAX_LONGITUDE;
+  }
+
+  /**
+   * Returns true if the latitude value is within valid geography bounds [-90, 
90].
+   */
+  private static boolean isValidLatitude(double value) {
+    return value >= MIN_LATITUDE && value <= MAX_LATITUDE;
+  }
+
+  /**
+   * Validates geography coordinate bounds for a point. In geography mode with 
validation
+   * level > 0, longitude must be between -180 and 180, and latitude must be 
between -90 and 90.
+   */
+  private void validateGeographyBounds(Point point, long pos) {
+    if (geographyMode && validationLevel > 0 && !point.isEmpty()) {
+      if (!isValidLongitude(point.getX()) || !isValidLatitude(point.getY())) {
+        throw new WkbParseException("Invalid coordinate value found", pos, 
currentWkb);

Review Comment:
   optional: be nice to print out the offending coordinate for debuggability



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to