[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-25 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r756654435



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   @tdg5 Thanks for lending a helping hand! For such scenarios I've created 
a readme 
[here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/jdbc/README.md).
   Please don't forget to update this doc accordingly.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-25 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r756654435



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   @tdg5 Thanks for landing a helping hand! For such scenarios I've created 
a readme 
[here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/jdbc/README.md).
   Please don't forget to update this doc accordingly.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-25 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r756654435



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   @tdg5 Thanks for landing a helping hand! For such scenarios I've created 
a readme 
[here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/jdbc/README.md).




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-24 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r755792043



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   Unless somebody has a ground breaking idea providers must be synced 
where security enabled, we can free up some time when no authentication is in 
place.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-24 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r755792043



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   Unless somebody has a ground breaking idea the secure providers must be 
synced, we can free up some time when no authentication is in place.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-24 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r755788709



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   @srowen @HyukjinKwon I would like to help here either with code or 
review but I'm going to have surgery either this or next week so I'm going to 
be fully off for quite some time.
   
   If it's urgent maybe @maropu can help since he's reviewed this code.
   All in all I would implement something like this:
   ```
   if (filteredProviders.head.wantsToModifySecCtx()) {
 SecurityConfigurationLock.synchronized {
   filteredProviders.head.getConnection(driver, options)
 }
   } else {
 filteredProviders.head.getConnection(driver, options)
   }
   ```
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: 

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-24 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r755788709



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   @srowen @HyukjinKwon I would like to help here either with code or 
review but I'm going to have surgery either this or next week so I'm going to 
be fully off for quite some time.
   
   If it's urgent maybe @maropu can help since he's reviewed this code.
   All in all I would implement something like this:
   ```
   if (provider.wantsToModifySecCtx()) {
 SecurityConfigurationLock.synchronized {
   filteredProviders.head.getConnection(driver, options)
 }
   } else {
 filteredProviders.head.getConnection(driver, options)
   }
   ```
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-24 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r755782469



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   >  allowing the synchronization to be optional could be one path forward.
   
   Yes, considering the following:
   * In case of the JVM security context NOT changed then it's just a useless 
waste to time.
   * In case of the JVM security context changed then it's a must!
   
   I think there are multiple clean solutions:
   * Right before calling ` getConnection` the provider could be asked whether 
it's intended to change the security context and based on that either sync or 
not.
   * We can remove the ` SecurityConfigurationLock.synchronized` here and each 
and every provider can do it's own sync but only if needed
   
   I would vote on the first solution because it would be less error prone and 
more clear to provider developers.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this 

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-22 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r754487136



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   What I can imagine is to pre-set the JVM JAAS config for databases from 
file like this: `java -Djava.security.auth.login.config=jaas.conf`.
   And then a new flag can be introduced like: `spark.jdbc.doNotSyncronize` 
which is default `true`.
   That case security credentials MUSTN'T be provided by Spark's JDBC 
properties but only from JAAS file.
   However this would be super risky and for advanced users only. That said 
I've spent at least a month to debug the JVM security context what's going on...
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: 

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2021-11-22 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r754476290



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+// Seems duplicate but it's needed for Scala 2.13
+providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle " +
+s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+SecurityConfigurationLock.synchronized {

Review comment:
   > is getConnection() not thread-safe?
   
   Simply no.
   
   This is well thought initially even if it introduces bottlenecks. The main 
problem is that JVM has a single security context. In order to make a 
connection one MUST modify the security context, otherwise the JDBC connector 
is not able to provide the security credentials (TGT, keytab or whatever). 
Simply saying JDBC connectors are able to get credentials from the single 
security context.
   
   Since multiple connections are made concurrently (sometimes same DB type w/ 
different credentials) this must be synchronized not to have a malformed 
security context (we've made load test and added 10+ DBs and corrupted the 
security context pretty fast w/o sync and it was horror to find out what's 
going on).
   Workload are coming and going, nobody can tell in advance what kind of 
security context need to be created at the very beginning of JVM creation. If 
we would pre-bake the security context (not sure how?!) then the following 

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-10-07 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r501049935



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)

Review comment:
   > At least, until Spark v3.0, creating basic JDBC connections does not 
fail because of the loading failure.
   
   Here it's the same since `BasicConnectionProvider` is built-in and no 
pre-load inside. The main issue would come when one adds a new provider which 
unable to be loaded. That failure would make all the rest workload fail if we 
don't load them independently.
   





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-10-07 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r500985450



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)

Review comment:
   If the suggestion is let the exception fire then I would say it's bad 
idea. If a provider is not able to be loaded then the rest must go. We've had 
similar issue and expectation in hadoop delegation token area.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-10-07 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r500983444



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)

Review comment:
   How do you mean ignore? Providers must be loaded independently so we 
need to catch and ignore the exception.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-10-07 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r500955830



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)

Review comment:
   We can decrease it to warning. The main message is to notify the user.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-29 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r496537361



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
##
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases 
(database specific instance
+ * needed). If any authentication required then it's the provider's 
responsibility to set all
+ * the parameters.
+ * Important to mention connection providers within a JVM used from multiple 
threads so adding
+ * internal state is not advised. If any state added then it must be 
synchronized properly.
+ */
+@DeveloperApi
+@Unstable
+@Since("3.1.0")

Review comment:
   Moved into doc.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r496113026



##
File path: 
core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala
##
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.security
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * ::DeveloperApi::
+ * There are cases when global JVM security configuration must be modified.
+ * In order to avoid race the modification must be synchronized with this.
+ */
+@DeveloperApi
+object SecurityConfigurationLock

Review comment:
   Moved into a central place, now Spark makes sure that this happens and 
3rd-party developers don't have to care.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495795712



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala
##
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases 
(database specific instance
+ * needed). If any authentication required then it's the provider's 
responsibility to set all
+ * the parameters. If global JVM security configuration is changed then
+ * SecurityConfigurationLock must be used as lock to avoid race.
+ * Important to mention connection providers within a JVM used from multiple 
threads so adding
+ * internal state is not advised. If any state added then it must be 
synchronized properly.

Review comment:
   Added `since` and `unstable`.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495862098



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
##
@@ -23,12 +23,15 @@ import java.util.{Locale, Properties}
 import org.apache.commons.io.FilenameUtils
 
 import org.apache.spark.SparkFiles
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 
 /**
+ * ::DeveloperApi::
  * Options for the JDBC data source.
  */
+@DeveloperApi

Review comment:
   I've removed `JDBCOptions` from the API but keeping this open if further 
discussion needed.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495817692



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,41 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
+
+import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
-
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+providers
+  }
+
+  def create(driver: Driver, options: JDBCOptions): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+logDebug(s"Filtered providers: $filteredProviders")
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle it")

Review comment:
   Changed the debug message to error message.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495809335



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala
##
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection

Review comment:
   Since several developer API classes are in `execution` I thought it's a 
feature and not a bug. I've moved to `org.apache.spark.sql.jdbc`.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495808018



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala
##
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases 
(database specific instance
+ * needed). If any authentication required then it's the provider's 
responsibility to set all
+ * the parameters. If global JVM security configuration is changed then
+ * SecurityConfigurationLock must be used as lock to avoid race.
+ * Important to mention connection providers within a JVM used from multiple 
threads so adding
+ * internal state is not advised. If any state added then it must be 
synchronized properly.
+ */
+@DeveloperApi

Review comment:
   Thanks, I was not aware of this. Changed to abstract class.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495795612



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala
##
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases 
(database specific instance
+ * needed). If any authentication required then it's the provider's 
responsibility to set all
+ * the parameters. If global JVM security configuration is changed then
+ * SecurityConfigurationLock must be used as lock to avoid race.

Review comment:
   Changed to backquote.

##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala
##
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases 
(database specific instance
+ * needed). If any authentication required then it's the provider's 
responsibility to set all
+ * the parameters. If global JVM security configuration is changed then
+ * SecurityConfigurationLock must be used as lock to avoid race.
+ * Important to mention connection providers within a JVM used from multiple 
threads so adding
+ * internal state is not advised. If any state added then it must be 
synchronized properly.

Review comment:
   Added since and unstable.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-28 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r495725351



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##
@@ -18,60 +18,41 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
+
+import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases 
(database specific instance
- * needed). If kerberos authentication required then it's the provider's 
responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes 
precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
-
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-if (options.keytab == null || options.principal == null) {
-  logDebug("No authentication configuration found, using basic connection 
provider")
-  new BasicConnectionProvider(driver, options)
-} else {
-  logDebug("Authentication configuration found, using database specific 
connection provider")
-  options.driverClass match {
-case PostgresConnectionProvider.driverClass =>
-  logDebug("Postgres connection provider found")
-  new PostgresConnectionProvider(driver, options)
-
-case MariaDBConnectionProvider.driverClass =>
-  logDebug("MariaDB connection provider found")
-  new MariaDBConnectionProvider(driver, options)
-
-case DB2ConnectionProvider.driverClass =>
-  logDebug("DB2 connection provider found")
-  new DB2ConnectionProvider(driver, options)
-
-case MSSQLConnectionProvider.driverClass =>
-  logDebug("MS SQL connection provider found")
-  new MSSQLConnectionProvider(driver, options)
-
-case OracleConnectionProvider.driverClass =>
-  logDebug("Oracle connection provider found")
-  new OracleConnectionProvider(driver, options)
-
-case _ =>
-  throw new IllegalArgumentException(s"Driver ${options.driverClass} 
does not support " +
-"Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+  Utils.getContextOrSparkClassLoader)
+val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+val iterator = loader.iterator
+while (iterator.hasNext) {
+  try {
+val provider = iterator.next
+logDebug(s"Loaded built in provider: $provider")
+providers += provider
+  } catch {
+case t: Throwable =>
+  logError(s"Failed to load built in provider.", t)
   }
 }
+providers
+  }
+
+  def create(driver: Driver, options: JDBCOptions): Connection = {
+val filteredProviders = providers.filter(_.canHandle(driver, options))
+logDebug(s"Filtered providers: $filteredProviders")
+require(filteredProviders.size == 1,
+  "JDBC connection initiated but not exactly one connection provider found 
which can handle it")

Review comment:
   Good point, adding in the next commit.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-25 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r494903467



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
##
@@ -23,12 +23,15 @@ import java.util.{Locale, Properties}
 import org.apache.commons.io.FilenameUtils
 
 import org.apache.spark.SparkFiles
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 
 /**
+ * ::DeveloperApi::
  * Options for the JDBC data source.
  */
+@DeveloperApi

Review comment:
   @HyukjinKwon thanks for having a look!
   
   I agree that `JDBCOptions` mustn't be exposed. Let me change the code to 
show `option 1`. As said passing only `keytab: String, principal: String` is 
not enough because not all but some of the providers need further 
configurations. I've started to work on this this change (unless anybody has 
better option).





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

2020-09-25 Thread GitBox


gaborgsomogyi commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r494903467



##
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
##
@@ -23,12 +23,15 @@ import java.util.{Locale, Properties}
 import org.apache.commons.io.FilenameUtils
 
 import org.apache.spark.SparkFiles
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 
 /**
+ * ::DeveloperApi::
  * Options for the JDBC data source.
  */
+@DeveloperApi

Review comment:
   @HyukjinKwon thanks for having a look!
   
   I agree that `JDBCOptions` mustn't be exposed. Let me change the code to 
show `option 1`. As said passing only `keytab: String, principal: String` is 
not enough because not all but some of the providers need further 
configurations. I've started to work on this this change (unless anybody has 
better option).





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org