This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 96093bdacba1 [SPARK-53922][GEO][SQL] Introduce physical Geometry and 
Geography types
96093bdacba1 is described below

commit 96093bdacba15854b06d279ee3ea73374bec1ea3
Author: Uros Bojanic <[email protected]>
AuthorDate: Thu Oct 23 15:59:26 2025 +0800

    [SPARK-53922][GEO][SQL] Introduce physical Geometry and Geography types
    
    ### What changes were proposed in this pull request?
    Introduce two new physical types to Spark:
    - `PhysicalGeographyType`
    - `PhysicalGeometryType`
    
    This PR also adds appropriate mapping from the logical geospatial types 
(introduced in: https://github.com/apache/spark/pull/52491) to the new physical 
types.
    
    ### Why are the changes needed?
    Extending the implementation of GEOMETRY and GEOGRAPHY types in Spark, 
laying the groundwork for full geospatial data type support.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Added new tests to:
    - `GeographyValSuite`
    - `GeometryValSuite`
    
    Also, added appropriate test cases to:
    - `GeographyTypeSuite`
    - `GeographyTypeSuite`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52629 from uros-db/geo-physical-types.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/unsafe/types/GeographyVal.java    | 50 +++++++++++++++++++
 .../org/apache/spark/unsafe/types/GeometryVal.java | 50 +++++++++++++++++++
 .../spark/unsafe/types/GeographyValSuite.java      | 57 ++++++++++++++++++++++
 .../spark/unsafe/types/GeometryValSuite.java       | 57 ++++++++++++++++++++++
 .../sql/catalyst/types/PhysicalDataType.scala      | 22 ++++++++-
 .../spark/sql/types/GeographyTypeSuite.scala       | 12 +++++
 .../apache/spark/sql/types/GeometryTypeSuite.scala | 14 ++++++
 7 files changed, 260 insertions(+), 2 deletions(-)

diff --git 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java
new file mode 100644
index 000000000000..48dc6f896e91
--- /dev/null
+++ 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import java.io.Serializable;
+
+// This class represents the physical type for the GEOGRAPHY data type.
+public final class GeographyVal implements Comparable<GeographyVal>, 
Serializable {
+
+  // The GEOGRAPHY type is implemented as a byte array. We provide `getBytes` 
and `fromBytes`
+  // methods for readers and writers to access this underlying array of bytes.
+  private final byte[] value;
+
+  // We make the constructor private. We should use `fromBytes` to create new 
instances.
+  private GeographyVal(byte[] value) {
+    this.value = value;
+  }
+
+  public byte[] getBytes() {
+    return value;
+  }
+
+  public static GeographyVal fromBytes(byte[] bytes) {
+    if (bytes == null) {
+      return null;
+    } else {
+      return new GeographyVal(bytes);
+    }
+  }
+
+  // Comparison is not yet supported for GEOGRAPHY.
+  public int compareTo(GeographyVal g) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java
new file mode 100644
index 000000000000..2bb7f194c940
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import java.io.Serializable;
+
+// This class represents the physical type for the GEOMETRY data type.
+public final class GeometryVal implements Comparable<GeometryVal>, 
Serializable {
+
+  // The GEOMETRY type is implemented as a byte array. We provide `getBytes` 
and `fromBytes`
+  // methods for readers and writers to access this underlying array of bytes.
+  private final byte[] value;
+
+  // We make the constructor private. We should use `fromBytes` to create new 
instances.
+  private GeometryVal(byte[] value) {
+    this.value = value;
+  }
+
+  public byte[] getBytes() {
+    return value;
+  }
+
+  public static GeometryVal fromBytes(byte[] bytes) {
+    if (bytes == null) {
+      return null;
+    } else {
+      return new GeometryVal(bytes);
+    }
+  }
+
+  // Comparison is not yet supported for GEOMETRY.
+  public int compareTo(GeometryVal g) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git 
a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
 
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
new file mode 100644
index 000000000000..639a8b2f7782
--- /dev/null
+++ 
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+public class GeographyValSuite {
+
+  @Test
+  public void roundTripBytes() {
+    // A simple byte array to test the round trip (`fromBytes` -> `getBytes`).
+    byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 };
+    GeographyVal geographyVal = GeographyVal.fromBytes(bytes);
+    assertNotNull(geographyVal);
+    assertArrayEquals(bytes, geographyVal.getBytes());
+  }
+
+  @Test
+  public void roundNullHandling() {
+    // A simple null byte array to test null handling for GEOGRAPHY.
+    byte[] bytes = null;
+    GeographyVal geographyVal = GeographyVal.fromBytes(bytes);
+    assertNull(geographyVal);
+  }
+
+  @Test
+  public void testCompareTo() {
+    // Comparison is not yet supported for GEOGRAPHY.
+    byte[] bytes1 = new byte[] { 1, 2, 3 };
+    byte[] bytes2 = new byte[] { 4, 5, 6 };
+    GeographyVal geographyVal1 = GeographyVal.fromBytes(bytes1);
+    GeographyVal geographyVal2 = GeographyVal.fromBytes(bytes2);
+    try {
+      geographyVal1.compareTo(geographyVal2);
+    } catch (UnsupportedOperationException e) {
+      assert(e.toString().equals("java.lang.UnsupportedOperationException"));
+    }
+  }
+}
diff --git 
a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
 
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
new file mode 100644
index 000000000000..e38c6903e6dd
--- /dev/null
+++ 
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+public class GeometryValSuite {
+
+  @Test
+  public void roundTripBytes() {
+    // A simple byte array to test the round trip (`fromBytes` -> `getBytes`).
+    byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 };
+    GeometryVal geometryVal = GeometryVal.fromBytes(bytes);
+    assertNotNull(geometryVal);
+    assertArrayEquals(bytes, geometryVal.getBytes());
+  }
+
+  @Test
+  public void roundNullHandling() {
+    // A simple null byte array to test null handling for GEOMETRY.
+    byte[] bytes = null;
+    GeometryVal geometryVal = GeometryVal.fromBytes(bytes);
+    assertNull(geometryVal);
+  }
+
+  @Test
+  public void testCompareTo() {
+    // Comparison is not yet supported for GEOMETRY.
+    byte[] bytes1 = new byte[] { 1, 2, 3 };
+    byte[] bytes2 = new byte[] { 4, 5, 6 };
+    GeometryVal geometryVal1 = GeometryVal.fromBytes(bytes1);
+    GeometryVal geometryVal2 = GeometryVal.fromBytes(bytes2);
+    try {
+      geometryVal1.compareTo(geometryVal2);
+    } catch (UnsupportedOperationException e) {
+      assert(e.toString().equals("java.lang.UnsupportedOperationException"));
+    }
+  }
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
index 1084e9973151..466fa2c0c8c7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
@@ -23,8 +23,8 @@ import scala.reflect.runtime.universe.typeTag
 import org.apache.spark.sql.catalyst.expressions.{Ascending, BoundReference, 
InterpretedOrdering, SortOrder}
 import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, 
MapData, SQLOrderingUtil}
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, 
ByteExactNumeric, ByteType, CalendarIntervalType, CharType, DataType, DateType, 
DayTimeIntervalType, Decimal, DecimalExactNumeric, DecimalType, 
DoubleExactNumeric, DoubleType, FloatExactNumeric, FloatType, FractionalType, 
IntegerExactNumeric, IntegerType, IntegralType, LongExactNumeric, LongType, 
MapType, NullType, NumericType, ShortExactNumeric, ShortType, StringType, 
StructField, StructType, TimestampNTZType, Timest [...]
-import org.apache.spark.unsafe.types.{ByteArray, UTF8String, VariantVal}
+import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, 
ByteExactNumeric, ByteType, CalendarIntervalType, CharType, DataType, DateType, 
DayTimeIntervalType, Decimal, DecimalExactNumeric, DecimalType, 
DoubleExactNumeric, DoubleType, FloatExactNumeric, FloatType, FractionalType, 
GeographyType, GeometryType, IntegerExactNumeric, IntegerType, IntegralType, 
LongExactNumeric, LongType, MapType, NullType, NumericType, ShortExactNumeric, 
ShortType, StringType, StructField, StructT [...]
+import org.apache.spark.unsafe.types.{ByteArray, GeographyVal, GeometryVal, 
UTF8String, VariantVal}
 import org.apache.spark.util.ArrayImplicits._
 
 sealed abstract class PhysicalDataType {
@@ -59,6 +59,8 @@ object PhysicalDataType {
     case StructType(fields) => PhysicalStructType(fields)
     case MapType(keyType, valueType, valueContainsNull) =>
       PhysicalMapType(keyType, valueType, valueContainsNull)
+    case _: GeometryType => PhysicalGeometryType
+    case _: GeographyType => PhysicalGeographyType
     case VariantType => PhysicalVariantType
     case _ => UninitializedPhysicalType
   }
@@ -411,3 +413,19 @@ object UninitializedPhysicalType extends PhysicalDataType {
   override private[sql] type InternalType = Any
   @transient private[sql] lazy val tag = typeTag[InternalType]
 }
+
+case class PhysicalGeographyType() extends PhysicalDataType {
+  private[sql] type InternalType = GeographyVal
+  @transient private[sql] lazy val tag = typeTag[InternalType]
+  private[sql] val ordering = implicitly[Ordering[InternalType]]
+}
+
+object PhysicalGeographyType extends PhysicalGeographyType
+
+case class PhysicalGeometryType() extends PhysicalDataType {
+  private[sql] type InternalType = GeometryVal
+  @transient private[sql] lazy val tag = typeTag[InternalType]
+  private[sql] val ordering = implicitly[Ordering[InternalType]]
+}
+
+object PhysicalGeometryType extends PhysicalGeometryType
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala
index 51de95826f81..ca377e739a0b 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala
@@ -23,6 +23,7 @@ import org.json4s.JsonAST.JString
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.SparkIllegalArgumentException
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, 
PhysicalGeographyType}
 
 class GeographyTypeSuite extends SparkFunSuite {
 
@@ -216,4 +217,15 @@ class GeographyTypeSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("PhysicalDataType maps GeographyType to PhysicalGeographyType") {
+    val geometryTypes: Seq[DataType] = Seq(
+      GeographyType(4326),
+      GeographyType("ANY")
+    )
+    geometryTypes.foreach { geometryType =>
+      val pdt = PhysicalDataType(geometryType)
+      assert(pdt.isInstanceOf[PhysicalGeographyType])
+    }
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala
index a6961f0c0343..9ae96eedef41 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala
@@ -23,6 +23,7 @@ import org.json4s.JsonAST.JString
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.SparkIllegalArgumentException
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, 
PhysicalGeometryType}
 
 class GeometryTypeSuite extends SparkFunSuite {
 
@@ -200,4 +201,17 @@ class GeometryTypeSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("PhysicalDataType maps GeometryType to PhysicalGeometryType") {
+    val geometryTypes: Seq[DataType] = Seq(
+      GeometryType(0),
+      GeometryType(3857),
+      GeometryType(4326),
+      GeometryType("ANY")
+    )
+    geometryTypes.foreach { geometryType =>
+      val pdt = PhysicalDataType(geometryType)
+      assert(pdt.isInstanceOf[PhysicalGeometryType])
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to