This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3b06a6580a34 [SPARK-55696][SQL] Add explicit error to Encoders.bean 
for interface class
3b06a6580a34 is described below

commit 3b06a6580a3484c71444b2229c1292c2b800eb20
Author: Szehon Ho <[email protected]>
AuthorDate: Fri Feb 27 16:49:33 2026 +0800

    [SPARK-55696][SQL] Add explicit error to Encoders.bean for interface class
    
    ### What changes were proposed in this pull request?
    Add explicit error message to Encoders.bean against interfaces
    
    ### Why are the changes needed?
    
    For bean Encoders, the de-serializer uses the constructor inferred from the 
bean Encoder.  ie, we get the bytes back, the Dataset deserialization will use 
the Encoder's interface class to try to construct  , and fail.
    
    Code ref:
    
https://github.com/apache/spark/blob/b3703755d80585297367d539de9fa8c5783b1c6b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala#L482
    
    ### Does this PR introduce _any_ user-facing change?
    No, it already fails today with another message:
    ```
    java.lang.IllegalStateException: found an unhandled type: null
          at 
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:915)
          at 
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:791)
          at 
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:886)
          at 
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:873)
          at 
org.apache.spark.sql.catalyst.JavaTypeInference$.encoderFor(JavaTypeInference.scala:159)
          at 
org.apache.spark.sql.catalyst.JavaTypeInference$.encoderFor(JavaTypeInference.scala:63)
          at 
org.apache.spark.sql.catalyst.JavaTypeInference$.encoderFor(JavaTypeInference.scala:56)
          at org.apache.spark.sql.Encoders$.bean(Encoders.scala:211)
          at org.apache.spark.sql.Encoders.bean(Encoders.scala)
    
    ```
    it will now fail more explicitly.
    
    ### How was this patch tested?
    Added unit test in JavaDatasetSuite
    
    ### Was this patch authored or co-authored using generative AI tooling?
    Yes, cursor
    
    Closes #54494 from szehon-ho/encoder_interface.
    
    Authored-by: Szehon Ho <[email protected]>
    Signed-off-by: Cheng Pan <[email protected]>
---
 .../src/main/resources/error/error-conditions.json |   6 ++
 .../main/scala/org/apache/spark/sql/Encoders.scala |   2 +-
 .../spark/sql/catalyst/JavaTypeInference.scala     |   8 ++
 .../org/apache/spark/sql/JavaDatasetSuite.java     | 115 +++++++++++++++++++++
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index 94111b8e9ee0..52a21e43ea9e 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -206,6 +206,12 @@
     ],
     "sqlState" : "42K03"
   },
+  "BEAN_ENCODER_INTERFACE_NOT_SUPPORTED" : {
+    "message" : [
+      "Bean encoder does not support interface type <className>."
+    ],
+    "sqlState" : "0A000"
+  },
   "BINARY_ARITHMETIC_OVERFLOW" : {
     "message" : [
       "<value1> <symbol> <value2> caused overflow. Use <functionName> to 
ignore overflow problem and return NULL."
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
index 7e698e58321e..72cd1190ba40 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -195,7 +195,7 @@ object Encoders {
   /**
    * Creates an encoder for Java Bean of type T.
    *
-   * T must be publicly accessible.
+   * T must be a concrete class (not an interface), and must be publicly 
accessible.
    *
    * supported types for java bean field:
    *   - primitive types: boolean, int, double, etc.
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 91947cf416fb..c7d3e4a47c7f 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -26,6 +26,7 @@ import scala.reflect.ClassTag
 
 import org.apache.commons.lang3.reflect.{TypeUtils => JavaTypeUtils}
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, 
BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, 
BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, 
DayTimeIntervalEncoder, DEFAULT_GEOGRAPHY_ENCODER, DEFAULT_GEOMETRY_ENCODER, 
DEFAULT_JAVA_DECIMAL_ENCODER, EncoderField, IterableEncoder, JavaBeanEncoder, 
JavaBigIntEncoder, JavaEnumEncoder, LocalDateTimeEncoder, LocalTimeEncoder, 
MapEncoder, PrimitiveBooleanEncoder, [...]
 import org.apache.spark.sql.errors.ExecutionErrors
@@ -151,6 +152,13 @@ object JavaTypeInference {
       if (seenTypeSet.contains(c)) {
         throw ExecutionErrors.cannotHaveCircularReferencesInBeanClassError(c)
       }
+      // Encoders for interfaces are not supported because de-serialization 
uses its
+      // Deserializer to instantiate the class, which will not work for 
interfaces.
+      if (c.isInterface) {
+        throw new SparkUnsupportedOperationException(
+          errorClass = "BEAN_ENCODER_INTERFACE_NOT_SUPPORTED",
+          messageParameters = Map("className" -> c.getName))
+      }
 
       // TODO: we should only collect properties that have getter and setter. 
However, some tests
       //   pass in scala case class as java bean class which doesn't have 
getter and setter.
diff --git 
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java 
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 5b1f98475d51..62d44e0af8b0 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -38,6 +38,7 @@ import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import org.apache.spark.SparkUnsupportedOperationException;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.*;
@@ -1919,6 +1920,120 @@ public class JavaDatasetSuite implements Serializable {
 
   }
 
+  /**
+   * Interface with JavaBean-style getters/setters for testing encoder with 
interface type.
+   */
+  public interface BeanInterface extends Serializable {
+    String getValue();
+    void setValue(String value);
+    int getId();
+    void setId(int id);
+  }
+
+  public static class BeanImplA implements BeanInterface {
+    private String value;
+    private int id;
+
+    @Override
+    public String getValue() { return value; }
+    @Override
+    public void setValue(String value) { this.value = value; }
+    @Override
+    public int getId() { return id; }
+    @Override
+    public void setId(int id) { this.id = id; }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (!(o instanceof BeanImplA)) return false;
+      BeanImplA that = (BeanImplA) o;
+      return id == that.id && Objects.equals(value, that.value);
+    }
+    @Override
+    public int hashCode() { return Objects.hash(value, id); }
+  }
+
+  public static class BeanImplB implements BeanInterface {
+    private String value;
+    private int id;
+
+    @Override
+    public String getValue() { return value; }
+    @Override
+    public void setValue(String value) { this.value = value; }
+    @Override
+    public int getId() { return id; }
+    @Override
+    public void setId(int id) { this.id = id; }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (!(o instanceof BeanImplB)) return false;
+      BeanImplB that = (BeanImplB) o;
+      return id == that.id && Objects.equals(value, that.value);
+    }
+    @Override
+    public int hashCode() { return Objects.hash(value, id); }
+  }
+
+  @Test
+  public void testBeanEncoderRejectsInterface() {
+    SparkUnsupportedOperationException e = Assertions.assertThrows(
+        SparkUnsupportedOperationException.class,
+        () -> Encoders.bean(BeanInterface.class));
+    Assertions.assertEquals("BEAN_ENCODER_INTERFACE_NOT_SUPPORTED", 
e.getCondition());
+    Assertions.assertEquals("0A000", e.getSqlState());
+    Assertions.assertEquals(
+        Collections.singletonMap("className", BeanInterface.class.getName()),
+        e.getMessageParameters());
+  }
+
+  @Test
+  public void testKryoEncoderWithInterface() {
+    BeanImplA a = new BeanImplA();
+    a.setValue("a");
+    a.setId(1);
+    BeanImplB b = new BeanImplB();
+    b.setValue("b");
+    b.setId(2);
+    List<BeanInterface> data = Arrays.asList(a, b);
+
+    Encoder<BeanInterface> kryoEncoder = Encoders.kryo(BeanInterface.class);
+    Dataset<BeanInterface> ds = spark.createDataset(data, kryoEncoder);
+    List<BeanInterface> collected = ds.collectAsList();
+    Assertions.assertEquals(2, collected.size());
+    Assertions.assertEquals("a", collected.get(0).getValue());
+    Assertions.assertEquals(1, collected.get(0).getId());
+    Assertions.assertEquals("b", collected.get(1).getValue());
+    Assertions.assertEquals(2, collected.get(1).getId());
+    Assertions.assertInstanceOf(BeanImplA.class, collected.get(0));
+    Assertions.assertInstanceOf(BeanImplB.class, collected.get(1));
+  }
+
+  @Test
+  public void testJavaSerializationEncoderWithInterface() {
+    BeanImplA a = new BeanImplA();
+    a.setValue("a");
+    a.setId(1);
+    BeanImplB b = new BeanImplB();
+    b.setValue("b");
+    b.setId(2);
+    List<BeanInterface> data = Arrays.asList(a, b);
+
+    Encoder<BeanInterface> javaEncoder = 
Encoders.javaSerialization(BeanInterface.class);
+    Dataset<BeanInterface> ds = spark.createDataset(data, javaEncoder);
+    List<BeanInterface> collected = ds.collectAsList();
+    Assertions.assertEquals(2, collected.size());
+    Assertions.assertEquals("a", collected.get(0).getValue());
+    Assertions.assertEquals(1, collected.get(0).getId());
+    Assertions.assertEquals("b", collected.get(1).getValue());
+    Assertions.assertEquals(2, collected.get(1).getId());
+    Assertions.assertInstanceOf(BeanImplA.class, collected.get(0));
+    Assertions.assertInstanceOf(BeanImplB.class, collected.get(1));
+  }
+
   public class CircularReference1Bean implements Serializable {
     private CircularReference2Bean child;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to