spark git commit: [SPARK-18141][SQL] Fix to quote column names in the predicate clause of the JDBC RDD generated sql statement
Repository: spark Updated Branches: refs/heads/branch-2.1 2d2e80180 -> 2f91b0154 [SPARK-18141][SQL] Fix to quote column names in the predicate clause of the JDBC RDD generated sql statement ## What changes were proposed in this pull request? SQL query generated for the JDBC data source is not quoting columns in the predicate clause. When the source table has quoted column names, spark jdbc read fails with column not found error incorrectly. Error: org.h2.jdbc.JdbcSQLException: Column "ID" not found; Source SQL statement: SELECT "Name","Id" FROM TEST."mixedCaseCols" WHERE (Id < 1) This PR fixes by quoting column names in the generated SQL for predicate clause when filters are pushed down to the data source. Source SQL statement after the fix: SELECT "Name","Id" FROM TEST."mixedCaseCols" WHERE ("Id" < 1) ## How was this patch tested? Added new test case to the JdbcSuite Author: sureshthalamatiCloses #15662 from sureshthalamati/filter_quoted_cols-SPARK-18141. (cherry picked from commit 70c5549ee9588228d18a7b405c977cf591e2efd4) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f91b015 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f91b015 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f91b015 Branch: refs/heads/branch-2.1 Commit: 2f91b0154ee0674b65e80f81f6498b94666c4b46 Parents: 2d2e801 Author: sureshthalamati Authored: Thu Dec 1 19:13:38 2016 -0800 Committer: gatorsmile Committed: Thu Dec 1 19:13:54 2016 -0800 -- .../execution/datasources/jdbc/JDBCRDD.scala| 45 ++-- .../datasources/jdbc/JDBCRelation.scala | 3 +- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 73 +++- 3 files changed, 82 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2f91b015/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index a1e5dfd..37df283 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -27,7 +27,7 @@ import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.jdbc.JdbcDialects +import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects} import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.util.CompletionIterator @@ -105,37 +105,40 @@ object JDBCRDD extends Logging { * Turns a single Filter into a String representing a SQL expression. * Returns None for an unhandled filter. */ - def compileFilter(f: Filter): Option[String] = { + def compileFilter(f: Filter, dialect: JdbcDialect): Option[String] = { +def quote(colName: String): String = dialect.quoteIdentifier(colName) + Option(f match { - case EqualTo(attr, value) => s"$attr = ${compileValue(value)}" + case EqualTo(attr, value) => s"${quote(attr)} = ${compileValue(value)}" case EqualNullSafe(attr, value) => -s"(NOT ($attr != ${compileValue(value)} OR $attr IS NULL OR " + - s"${compileValue(value)} IS NULL) OR ($attr IS NULL AND ${compileValue(value)} IS NULL))" - case LessThan(attr, value) => s"$attr < ${compileValue(value)}" - case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}" - case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" - case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}" - case IsNull(attr) => s"$attr IS NULL" - case IsNotNull(attr) => s"$attr IS NOT NULL" - case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" - case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" - case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" +val col = quote(attr) +s"(NOT ($col != ${compileValue(value)} OR $col IS NULL OR " + + s"${compileValue(value)} IS NULL) OR ($col IS NULL AND ${compileValue(value)} IS NULL))" + case LessThan(attr, value) => s"${quote(attr)} < ${compileValue(value)}" + case GreaterThan(attr, value) => s"${quote(attr)} > ${compileValue(value)}" + case LessThanOrEqual(attr, value) => s"${quote(attr)} <= ${compileValue(value)}" + case
spark git commit: [SPARK-18141][SQL] Fix to quote column names in the predicate clause of the JDBC RDD generated sql statement
Repository: spark Updated Branches: refs/heads/master 37e52f879 -> 70c5549ee [SPARK-18141][SQL] Fix to quote column names in the predicate clause of the JDBC RDD generated sql statement ## What changes were proposed in this pull request? SQL query generated for the JDBC data source is not quoting columns in the predicate clause. When the source table has quoted column names, spark jdbc read fails with column not found error incorrectly. Error: org.h2.jdbc.JdbcSQLException: Column "ID" not found; Source SQL statement: SELECT "Name","Id" FROM TEST."mixedCaseCols" WHERE (Id < 1) This PR fixes by quoting column names in the generated SQL for predicate clause when filters are pushed down to the data source. Source SQL statement after the fix: SELECT "Name","Id" FROM TEST."mixedCaseCols" WHERE ("Id" < 1) ## How was this patch tested? Added new test case to the JdbcSuite Author: sureshthalamatiCloses #15662 from sureshthalamati/filter_quoted_cols-SPARK-18141. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70c5549e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70c5549e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70c5549e Branch: refs/heads/master Commit: 70c5549ee9588228d18a7b405c977cf591e2efd4 Parents: 37e52f8 Author: sureshthalamati Authored: Thu Dec 1 19:13:38 2016 -0800 Committer: gatorsmile Committed: Thu Dec 1 19:13:38 2016 -0800 -- .../execution/datasources/jdbc/JDBCRDD.scala| 45 ++-- .../datasources/jdbc/JDBCRelation.scala | 3 +- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 73 +++- 3 files changed, 82 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/70c5549e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index a1e5dfd..37df283 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -27,7 +27,7 @@ import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.jdbc.JdbcDialects +import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects} import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.util.CompletionIterator @@ -105,37 +105,40 @@ object JDBCRDD extends Logging { * Turns a single Filter into a String representing a SQL expression. * Returns None for an unhandled filter. */ - def compileFilter(f: Filter): Option[String] = { + def compileFilter(f: Filter, dialect: JdbcDialect): Option[String] = { +def quote(colName: String): String = dialect.quoteIdentifier(colName) + Option(f match { - case EqualTo(attr, value) => s"$attr = ${compileValue(value)}" + case EqualTo(attr, value) => s"${quote(attr)} = ${compileValue(value)}" case EqualNullSafe(attr, value) => -s"(NOT ($attr != ${compileValue(value)} OR $attr IS NULL OR " + - s"${compileValue(value)} IS NULL) OR ($attr IS NULL AND ${compileValue(value)} IS NULL))" - case LessThan(attr, value) => s"$attr < ${compileValue(value)}" - case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}" - case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" - case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}" - case IsNull(attr) => s"$attr IS NULL" - case IsNotNull(attr) => s"$attr IS NOT NULL" - case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" - case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" - case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" +val col = quote(attr) +s"(NOT ($col != ${compileValue(value)} OR $col IS NULL OR " + + s"${compileValue(value)} IS NULL) OR ($col IS NULL AND ${compileValue(value)} IS NULL))" + case LessThan(attr, value) => s"${quote(attr)} < ${compileValue(value)}" + case GreaterThan(attr, value) => s"${quote(attr)} > ${compileValue(value)}" + case LessThanOrEqual(attr, value) => s"${quote(attr)} <= ${compileValue(value)}" + case GreaterThanOrEqual(attr, value) => s"${quote(attr)} >= ${compileValue(value)}" + case IsNull(attr) => s"${quote(attr)} IS