ericm-db commented on code in PR #53872:
URL: https://github.com/apache/spark/pull/53872#discussion_r2722325001
##########
sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamReader.scala:
##########
@@ -18,14 +18,43 @@
package org.apache.spark.sql.connect
import scala.jdk.CollectionConverters._
+import scala.util.matching.Regex
-import org.apache.spark.annotation.Evolving
+import org.apache.spark.annotation.{Evolving, Experimental}
import org.apache.spark.connect.proto.Read.DataSource
import org.apache.spark.sql.connect.ConnectConversions._
import org.apache.spark.sql.errors.DataTypeErrors
import org.apache.spark.sql.streaming
import org.apache.spark.sql.types.StructType
+/**
+ * Companion object for DataStreamReader with validation utilities.
+ */
+private[sql] object DataStreamReader {
+
+ /**
+ * Pattern for valid source names. Names must only contain ASCII letters,
digits, and
+ * underscores.
+ */
+ private val VALID_NAME_PATTERN: Regex = "^[a-zA-Z0-9_]+$".r
+
+ /**
+ * Validates that a streaming source name only contains alphanumeric
characters and underscores.
+ *
+ * @param sourceName
+ * the source name to validate
+ * @throws IllegalArgumentException
+ * if the source name contains invalid characters
+ */
+ def validateSourceName(sourceName: String): Unit = {
Review Comment:
I would prefer the query fail-fast as opposed to the plan having to be sent
to the server-side for validation
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]