alexeykudinkin commented on code in PR #7702:
URL: https://github.com/apache/hudi/pull/7702#discussion_r1081601845
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/HoodieSparkKryoRegistrar.scala:
##########
@@ -42,22 +43,31 @@ import org.apache.spark.serializer.KryoRegistrator
* or renamed (w/o correspondingly updating such usages)</li>
* </ol>
*/
-class HoodieSparkKryoProvider extends HoodieCommonKryoProvider {
- override def registerClasses(): Array[Class[_]] = {
+class HoodieSparkKryoRegistrar extends HoodieCommonKryoRegistrar with
KryoRegistrator {
+ override def registerClasses(kryo: Kryo): Unit = {
///////////////////////////////////////////////////////////////////////////
// NOTE: DO NOT REORDER REGISTRATIONS
///////////////////////////////////////////////////////////////////////////
- val classes = super[HoodieCommonKryoProvider].registerClasses()
- classes ++ Array(
- classOf[HoodieWriteConfig],
- classOf[HoodieSparkRecord],
- classOf[HoodieInternalRow]
- )
+ super[HoodieCommonKryoRegistrar].registerClasses(kryo)
+
+ kryo.register(classOf[HoodieWriteConfig])
+
+ kryo.register(classOf[HoodieSparkRecord])
+ kryo.register(classOf[HoodieInternalRow])
+
+ // NOTE: Hadoop's configuration is not a serializable object by itself,
and hence
+ // we're relying on [[SerializableConfiguration]] wrapper to work it
around
+ kryo.register(classOf[SerializableConfiguration], new JavaSerializer())
}
}
-object HoodieSparkKryoProvider {
+object HoodieSparkKryoRegistrar {
+
+ // NOTE: We're copying definition of the config introduced in Spark 3.0
+ // (to stay compatible w/ Spark 2.4)
+ private val KRYO_USER_REGISTRATORS = "spark.kryo.registrator"
+
def register(conf: SparkConf): SparkConf = {
- conf.registerKryoClasses(new HoodieSparkKryoProvider().registerClasses())
+ conf.set(KRYO_USER_REGISTRATORS,
Seq(classOf[HoodieSparkKryoRegistrar].getName).mkString(","))
}
Review Comment:
We need to convert it to a string, so i kept it generic so that we can drop
in one more class. Not strictly necessary though
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]