This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d8298bffd91 [SPARK-45081][SQL] Encoders.bean does no longer work with read-only properties d8298bffd91 is described below commit d8298bffd91de01299f9456b37e4454e8b4a6ae8 Author: Giambattista Bloisi <gblo...@gmail.com> AuthorDate: Tue Sep 12 16:16:04 2023 +0200 [SPARK-45081][SQL] Encoders.bean does no longer work with read-only properties ### What changes were proposed in this pull request? This PR re-enables Encoders.bean to be called against beans having read-only properties, that is properties that have only getters and no setter method. Beans with read only properties are even used in internal tests. Setter methods of a Java bean encoder are stored within an Option wrapper because they are missing in case of read-only properties. When a java bean has to be initialized, setter methods for the bean properties have to be called: this PR filters out read-only properties from that process. ### Why are the changes needed? The changes are required to avoid an exception to the thrown by getting the value of a None option object. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? An additional regression test has been added ### Was this patch authored or co-authored using generative AI tooling? No Closes #42829 from gbloisi-openaire/SPARK-45081. Authored-by: Giambattista Bloisi <gblo...@gmail.com> Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../sql/connect/client/arrow/ArrowDeserializer.scala | 20 +++++++++++--------- .../spark/sql/catalyst/DeserializerBuildHelper.scala | 4 +++- .../test/org/apache/spark/sql/JavaDatasetSuite.java | 17 +++++++++++++++++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala index cd54966ccf5..94295785987 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala @@ -332,15 +332,17 @@ object ArrowDeserializers { val constructor = methodLookup.findConstructor(tag.runtimeClass, MethodType.methodType(classOf[Unit])) val lookup = createFieldLookup(vectors) - val setters = fields.map { field => - val vector = lookup(field.name) - val deserializer = deserializerFor(field.enc, vector, timeZoneId) - val setter = methodLookup.findVirtual( - tag.runtimeClass, - field.writeMethod.get, - MethodType.methodType(classOf[Unit], field.enc.clsTag.runtimeClass)) - (bean: Any, i: Int) => setter.invoke(bean, deserializer.get(i)) - } + val setters = fields + .filter(_.writeMethod.isDefined) + .map { field => + val vector = lookup(field.name) + val deserializer = deserializerFor(field.enc, vector, timeZoneId) + val setter = methodLookup.findVirtual( + tag.runtimeClass, + field.writeMethod.get, + MethodType.methodType(classOf[Unit], field.enc.clsTag.runtimeClass)) + (bean: Any, i: Int) => setter.invoke(bean, deserializer.get(i)) + } new StructFieldSerializer[Any](struct) { def value(i: Int): Any = { val instance = constructor.invoke() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala index 16a7d7ff065..0b88d5a4130 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala @@ -390,7 +390,9 @@ object DeserializerBuildHelper { CreateExternalRow(convertedFields, enc.schema)) case JavaBeanEncoder(tag, fields) => - val setters = fields.map { f => + val setters = fields + .filter(_.writeMethod.isDefined) + .map { f => val newTypePath = walkedTypePath.recordField( f.enc.clsTag.runtimeClass.getName, f.name) diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java index 4f7cf8da787..f416d411322 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java @@ -1783,6 +1783,23 @@ public class JavaDatasetSuite implements Serializable { Assert.assertEquals(1, df.collectAsList().size()); } + public static class ReadOnlyPropertyBean implements Serializable { + public boolean isEmpty() { + return true; + } + } + + @Test + public void testReadOnlyPropertyBean() { + ReadOnlyPropertyBean bean = new ReadOnlyPropertyBean(); + List<ReadOnlyPropertyBean> data = Arrays.asList(bean); + Dataset<ReadOnlyPropertyBean> df = spark.createDataset(data, + Encoders.bean(ReadOnlyPropertyBean.class)); + Assert.assertEquals(1, df.schema().length()); + Assert.assertEquals(1, df.collectAsList().size()); + + } + public class CircularReference1Bean implements Serializable { private CircularReference2Bean child; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org