uros-db commented on code in PR #54227: URL: https://github.com/apache/spark/pull/54227#discussion_r2797028174
########## sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/util/geo/WkbGeographyTest.java: ########## @@ -0,0 +1,635 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util.geo; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Test suite for WKB geography parsing with coordinate bounds validation. In geography mode, + * X must be between -180 and 180 (inclusive), and Y must be between -90 and 90 (inclusive). + */ +public class WkbGeographyTest extends WkbTestBase { + + /** Creates a geography-mode WkbReader with default validation (level 1). */ + private WkbReader geographyReader1() { + return new WkbReader(1, true); + } + + /** Constructs WKB for a 2D point in little endian format. */ + private byte[] makePointWkb2D(double x, double y) { + ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(1); // Point type (2D). + buf.putDouble(x); + buf.putDouble(y); + return buf.array(); + } + + /** Constructs WKB for a 3DZ point in little endian format. */ + private byte[] makePointWkb3DZ(double x, double y, double z) { + ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8 + 8); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(1001); // Point type (3DZ). + buf.putDouble(x); + buf.putDouble(y); + buf.putDouble(z); + return buf.array(); + } + + /** Constructs WKB for a 3DM point in little endian format. */ + private byte[] makePointWkb3DM(double x, double y, double m) { + ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8 + 8); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(2001); // Point type (3DM). + buf.putDouble(x); + buf.putDouble(y); + buf.putDouble(m); + return buf.array(); + } + + /** Constructs WKB for a 4D (ZM) point in little endian format. */ + private byte[] makePointWkb4D(double x, double y, double z, double m) { + ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 8 + 8 + 8 + 8); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(3001); // Point type (4D). + buf.putDouble(x); + buf.putDouble(y); + buf.putDouble(z); + buf.putDouble(m); + return buf.array(); + } + + /** Constructs WKB for a 2D linestring in little endian format. */ + private byte[] makeLineStringWkb2D(double[][] points) { + ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 4 + points.length * 16); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(2); // LineString type (2D). + buf.putInt(points.length); + for (double[] point : points) { + buf.putDouble(point[0]); + buf.putDouble(point[1]); + } + return buf.array(); + } + + /** Constructs WKB for a 2D polygon in little endian format. */ + private byte[] makePolygonWkb2D(double[][][] rings) { + int size = 1 + 4 + 4; + for (double[][] ring : rings) { + size += 4 + ring.length * 16; + } + ByteBuffer buf = ByteBuffer.allocate(size); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(3); // Polygon type (2D). + buf.putInt(rings.length); + for (double[][] ring : rings) { + buf.putInt(ring.length); + for (double[] point : ring) { + buf.putDouble(point[0]); + buf.putDouble(point[1]); + } + } + return buf.array(); + } + + /** Constructs WKB for a 2D multipoint in little endian format. */ + private byte[] makeMultiPointWkb2D(double[][] points) { + // Each nested point: 1 (endian) + 4 (type) + 16 (coords) = 21 bytes + ByteBuffer buf = ByteBuffer.allocate(1 + 4 + 4 + points.length * 21); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 1); // Little endian. + buf.putInt(4); // MultiPoint type (2D). + buf.putInt(points.length); + for (double[] point : points) { + buf.put((byte) 1); // Little endian. + buf.putInt(1); // Point type (2D). + buf.putDouble(point[0]); + buf.putDouble(point[1]); + } + return buf.array(); + } + + // ========== Valid Geography 2D Point Tests ========== + + @Test + public void testGeographyPointOrigin() { + // POINT(0 0) - valid geography + byte[] wkb = makePointWkb2D(0.0, 0.0); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertTrue(geom.isPoint(), "Should be a Point"); + Point point = geom.asPoint(); + Assertions.assertEquals(0.0, point.getX(), 1e-10, "X coordinate"); + Assertions.assertEquals(0.0, point.getY(), 1e-10, "Y coordinate"); + } + + @Test + public void testGeographyPointBoundaryMinLonMinLat() { + // POINT(-180 -90) - valid at minimum boundary + byte[] wkb = makePointWkb2D(-180.0, -90.0); + GeometryModel geom = geographyReader1().read(wkb); + Point point = geom.asPoint(); + Assertions.assertEquals(-180.0, point.getX(), 1e-10, "X at min longitude"); + Assertions.assertEquals(-90.0, point.getY(), 1e-10, "Y at min latitude"); + } + + @Test + public void testGeographyPointBoundaryMaxLonMaxLat() { + // POINT(180 90) - valid at maximum boundary + byte[] wkb = makePointWkb2D(180.0, 90.0); + GeometryModel geom = geographyReader1().read(wkb); + Point point = geom.asPoint(); + Assertions.assertEquals(180.0, point.getX(), 1e-10, "X at max longitude"); + Assertions.assertEquals(90.0, point.getY(), 1e-10, "Y at max latitude"); + } + + @Test + public void testGeographyPointBoundaryMinLon() { + // POINT(-180 0) - valid at min longitude + byte[] wkb = makePointWkb2D(-180.0, 0.0); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertEquals(-180.0, geom.asPoint().getX(), 1e-10, "X at min longitude"); + } + + @Test + public void testGeographyPointBoundaryMaxLon() { + // POINT(180 0) - valid at max longitude + byte[] wkb = makePointWkb2D(180.0, 0.0); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertEquals(180.0, geom.asPoint().getX(), 1e-10, "X at max longitude"); + } + + @Test + public void testGeographyPointBoundaryMinLat() { + // POINT(0 -90) - valid at min latitude + byte[] wkb = makePointWkb2D(0.0, -90.0); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertEquals(-90.0, geom.asPoint().getY(), 1e-10, "Y at min latitude"); + } + + @Test + public void testGeographyPointBoundaryMaxLat() { + // POINT(0 90) - valid at max latitude + byte[] wkb = makePointWkb2D(0.0, 90.0); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertEquals(90.0, geom.asPoint().getY(), 1e-10, "Y at max latitude"); + } + + @Test + public void testGeographyPointTypicalCoordinates() { + // POINT(-73.9857 40.7484) - New York City + byte[] wkb = makePointWkb2D(-73.9857, 40.7484); + GeometryModel geom = geographyReader1().read(wkb); + Point point = geom.asPoint(); + Assertions.assertEquals(-73.9857, point.getX(), 1e-10, "X longitude"); + Assertions.assertEquals(40.7484, point.getY(), 1e-10, "Y latitude"); + } + + // ========== Empty Point in Geography Mode ========== + + @Test + public void testGeographyEmptyPoint2D() { + // POINT EMPTY - valid in geography mode (NaN coordinates skip bounds check) + String wkbHex = "0101000000000000000000f87f000000000000f87f"; + byte[] wkb = hexToBytes(wkbHex); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertTrue(geom.isPoint(), "Should be a Point"); + Assertions.assertTrue(geom.asPoint().isEmpty(), "Point should be empty"); + } + + @Test + public void testGeographyEmptyPoint3DZ() { + // POINT Z EMPTY + String wkbHex = "01e9030000000000000000f87f000000000000f87f000000000000f87f"; + byte[] wkb = hexToBytes(wkbHex); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertTrue(geom.asPoint().isEmpty(), "Point Z should be empty"); + } + + @Test + public void testGeographyEmptyPoint4D() { + // POINT ZM EMPTY + String wkbHex = + "01b90b0000000000000000f87f000000000000f87f000000000000f87f000000000000f87f"; + byte[] wkb = hexToBytes(wkbHex); + GeometryModel geom = geographyReader1().read(wkb); + Assertions.assertTrue(geom.asPoint().isEmpty(), "Point ZM should be empty"); + } + + // ========== Invalid Geography Point - Longitude Out of Bounds ========== + + @Test + public void testGeographyPointLongitudeJustOverMax() { + // POINT(180.0001 0) - longitude just over 180 + byte[] wkb = makePointWkb2D(180.0001, 0.0); + Assertions.assertThrows(WkbParseException.class, () -> geographyReader1().read(wkb)); + } + + @Test + public void testGeographyPointLongitudeJustUnderMin() { Review Comment: Thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
