uros-db commented on code in PR #53227:
URL: https://github.com/apache/spark/pull/53227#discussion_r2728616981


##########
sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/geo/WkbReader.java:
##########
@@ -0,0 +1,460 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.util.geo;
+
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.spark.sql.catalyst.util.Geometry.DEFAULT_SRID;
+
+/**
+ * Reader for parsing Well-Known Binary (WKB) format geometries.
+ * This class implements the OGC Simple Features specification for WKB parsing.
+ * This class is not thread-safe. Create a new instance for each thread.
+ */
+public class WkbReader {
+  private ByteBuffer buffer;
+  private final int validationLevel;
+  private byte[] currentWkb;
+
+  /**
+   * Constructor for WkbReader with default validation level (1 = basic 
validation).
+   */
+  public WkbReader() {
+    this(1);
+  }
+
+  /**
+   * Constructor for WkbReader with specified validation level.
+   * @param validationLevel validation level (0 = no validation, 1 = basic 
validation)
+   */
+  public WkbReader(int validationLevel) {
+    this.validationLevel = validationLevel;
+  }
+
+  // ========== Coordinate Validation Helpers ==========
+
+  /**
+   * Returns true if the coordinate value is valid for a non-empty point.
+   * A valid coordinate is finite (not NaN and not Infinity).
+   */
+  private static boolean isValidCoordinate(double value) {
+    return Double.isFinite(value);
+  }
+
+  /**
+   * Returns true if the coordinate value is valid for a point that may be 
empty.
+   * A valid coordinate is either finite or NaN (for empty points).
+   * Infinity values are not allowed.
+   */
+  private static boolean isValidCoordinateAllowEmpty(double value) {
+    return Double.isFinite(value) || Double.isNaN(value);
+  }
+
+  /**
+   * Reads a geometry from WKB bytes.
+   */
+  public GeometryModel read(byte[] wkb) {
+    try {
+      currentWkb = wkb;
+      return readGeometry(DEFAULT_SRID);
+    } finally {
+      // Clear references to allow garbage collection
+      buffer = null;
+      currentWkb = null;
+    }
+  }
+
+  /**
+   * Reads a geometry from WKB bytes with a specified SRID.
+   */
+  public GeometryModel read(byte[] wkb, int srid) {
+    try {
+      currentWkb = wkb;
+      return readGeometry(srid);
+    } finally {
+      // Clear references to allow garbage collection
+      buffer = null;
+      currentWkb = null;
+    }
+  }
+
+  private void checkNotAtEnd(long pos) {
+    if (buffer.position() >= buffer.limit()) {
+      throw new WkbParseException("Unexpected end of WKB buffer", pos, 
currentWkb);
+    }
+  }
+
+  private ByteOrder readEndianness() {
+    checkNotAtEnd(buffer.position());
+    byte endianValue = buffer.get();
+    if (endianValue != WkbUtil.BIG_ENDIAN && endianValue != 
WkbUtil.LITTLE_ENDIAN) {
+      throw new WkbParseException("Invalid byte order " + endianValue, 
buffer.position() - 1,
+        currentWkb);
+    }
+    return endianValue == WkbUtil.LITTLE_ENDIAN ?
+      ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN;
+  }
+
+  private int readInt() {
+    if (buffer.remaining() < WkbUtil.INT_SIZE) {
+      throw new WkbParseException("Unexpected end of WKB buffer", 
buffer.position(), currentWkb);
+    }
+    return buffer.getInt();
+  }
+
+  /**
+   * Reads a double coordinate value, allowing NaN for empty points.
+   */
+  private double readDoubleAllowEmpty() {
+    if (buffer.remaining() < WkbUtil.DOUBLE_SIZE) {
+      throw new WkbParseException("Unexpected end of WKB buffer", 
buffer.position(), currentWkb);
+    }
+    double value = buffer.getDouble();
+    if (!isValidCoordinateAllowEmpty(value)) {
+      throw new WkbParseException("Invalid coordinate value found", 
buffer.position() - 8,
+        currentWkb);
+    }
+    return value;
+  }
+
+  /**
+   * Reads a double coordinate value, not allowing NaN (for non-point 
coordinates like rings).
+   */
+  private double readDoubleNoEmpty() {
+    if (buffer.remaining() < WkbUtil.DOUBLE_SIZE) {
+      throw new WkbParseException("Unexpected end of WKB buffer", 
buffer.position(), currentWkb);
+    }
+    double value = buffer.getDouble();
+    if (!isValidCoordinate(value)) {
+      throw new WkbParseException("Invalid coordinate value found", 
buffer.position() - 8,
+        currentWkb);
+    }
+    return value;
+  }
+
+  /**
+   * Reads a geometry from WKB bytes with a specified SRID.
+   *
+   * @param defaultSrid srid to use if not specified in WKB
+   * @return Geometry object
+   */
+  private GeometryModel readGeometry(int defaultSrid) {
+    // Check that we have at least one byte for endianness
+    if (currentWkb == null || currentWkb.length < 1) {
+      throw new WkbParseException("WKB data is empty or null", 0, currentWkb);
+    }
+
+    // Read endianness directly from the first byte
+    byte endianValue = currentWkb[0];
+    if (endianValue > 1) {

Review Comment:
   We perform a similar check above in `private ByteOrder readEndianness`, but 
using a slightly different condition:
   ```
       if (endianValue != WkbUtil.BIG_ENDIAN && endianValue != 
WkbUtil.LITTLE_ENDIAN) {
         throw new WkbParseException(...);
       }
   ```
   
   Let's stay consistent and use the same approach here in `private 
GeometryModel readGeometry` too.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to