anishshri-db commented on code in PR #47104:
URL: https://github.com/apache/spark/pull/47104#discussion_r1662982098
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala:
##########
@@ -313,3 +295,174 @@ class StatefulProcessorHandleImpl(
}
}
}
+
+/**
+ * This DriverStatefulProcessorHandleImpl is used within TransformWithExec
+ * on the driver side to collect the columnFamilySchemas before any processing
is
+ * actually done. We need this class because we can only collect the schemas
after
+ * the StatefulProcessor is initialized.
+ */
+class DriverStatefulProcessorHandleImpl(timeMode: TimeMode)
+ extends StatefulProcessorHandleImplBase(timeMode) {
+
+ private[sql] val columnFamilySchemaFactory = ColumnFamilySchemaFactory.
+ getFactory(StateSchemaV3File.COLUMN_FAMILY_SCHEMA_VERSION)
+
+ private[sql] val columnFamilySchemas: util.List[ColumnFamilySchema] =
+ new util.ArrayList[ColumnFamilySchema]()
+
+ private def verifyStateVarOperations(operationType: String): Unit = {
+ if (currState != PRE_INIT) {
+ throw
StateStoreErrors.cannotPerformOperationWithInvalidHandleState(operationType,
+ currState.toString)
+ }
+ }
+
+ /**
+ * Function to add the ValueState schema to the list of column family
schemas.
+ * The user must ensure to call this function only within the `init()`
method of the
+ * StatefulProcessor.
+ *
+ * @param stateName - name of the state variable
+ * @param valEncoder - SQL encoder for state variable
+ * @tparam T - type of state variable
+ * @return - instance of ValueState of type T that can be used to store
state persistently
+ */
+ override def getValueState[T](stateName: String, valEncoder: Encoder[T]):
ValueState[T] = {
+ verifyStateVarOperations("get_value_state")
+ val colFamilySchema =
columnFamilySchemaFactory.getValueStateSchema(stateName, valEncoder)
+ columnFamilySchemas.add(colFamilySchema)
+ null
+ }
+
+ /**
+ * Function to add the ValueStateWithTTL schema to the list of column family
schemas.
+ * The user must ensure to call this function only within the `init()`
method of the
+ * StatefulProcessor.
+ *
+ * @param stateName - name of the state variable
+ * @param valEncoder - SQL encoder for state variable
+ * @param ttlConfig - the ttl configuration (time to live duration etc.)
+ * @tparam T - type of state variable
+ * @return - instance of ValueState of type T that can be used to store
state persistently
+ */
+ override def getValueState[T](
+ stateName: String,
+ valEncoder: Encoder[T],
+ ttlConfig: TTLConfig): ValueState[T] = {
+ verifyStateVarOperations("get_value_state")
+ val colFamilySchema =
columnFamilySchemaFactory.getValueStateTtlSchema(stateName, valEncoder)
+ columnFamilySchemas.add(colFamilySchema)
+ null
Review Comment:
could we do something like `null.asInstanceOf[ValueState[T]]` ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]