This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 5fa4160 [CALCITE-3771] TRIM Support for HIVE/SPARK Dialect (Dhirenda
Gautam)
5fa4160 is described below
commit 5fa41609cb0fe310a0a11d86319d861423850a36
Author: dhirenda.gautam <[email protected]>
AuthorDate: Wed Feb 5 19:17:00 2020 +0530
[CALCITE-3771] TRIM Support for HIVE/SPARK Dialect (Dhirenda Gautam)
Fix-up (by Danny):
* Move all the common logic for Hive and Spark TRIM unparsing to
RelToSqlConverterUtil
* Refactor the tests to split Hive and Spark from BigQuery for some TRIM
cases
close apache/calcite#1785
---
.../apache/calcite/sql/dialect/HiveSqlDialect.java | 30 +----
.../calcite/sql/dialect/SparkSqlDialect.java | 6 +-
.../apache/calcite/util/RelToSqlConverterUtil.java | 138 +++++++++++++++++++++
.../calcite/rel/rel2sql/RelToSqlConverterTest.java | 91 ++++++++++++--
4 files changed, 229 insertions(+), 36 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java
b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java
index 472559c..aab8748 100644
--- a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java
+++ b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java
@@ -22,16 +22,15 @@ import org.apache.calcite.sql.SqlAlienSystemTypeNameSpec;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDataTypeSpec;
import org.apache.calcite.sql.SqlDialect;
-import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlSyntax;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.fun.SqlSubstringFunction;
-import org.apache.calcite.sql.fun.SqlTrimFunction;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.BasicSqlType;
+import org.apache.calcite.util.RelToSqlConverterUtil;
/**
* A <code>SqlDialect</code> implementation for the Apache Hive database.
@@ -92,7 +91,7 @@ public class HiveSqlDialect extends SqlDialect {
SqlSyntax.BINARY.unparse(writer, op, call, leftPrec, rightPrec);
break;
case TRIM:
- unparseTrim(writer, call, leftPrec, rightPrec);
+ RelToSqlConverterUtil.unparseHiveTrim(writer, call, leftPrec, rightPrec);
break;
case OTHER_FUNCTION:
if (call.getOperator() instanceof SqlSubstringFunction) {
@@ -114,31 +113,6 @@ public class HiveSqlDialect extends SqlDialect {
}
}
- /**
- * For usage of TRIM, LTRIM and RTRIM in Hive, see
- * <a
href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF">Hive
UDF usage</a>.
- */
- private void unparseTrim(SqlWriter writer, SqlCall call, int leftPrec,
- int rightPrec) {
- assert call.operand(0) instanceof SqlLiteral : call.operand(0);
- SqlLiteral flag = call.operand(0);
- final String operatorName;
- switch (flag.getValueAs(SqlTrimFunction.Flag.class)) {
- case LEADING:
- operatorName = "LTRIM";
- break;
- case TRAILING:
- operatorName = "RTRIM";
- break;
- default:
- operatorName = call.getOperator().getName();
- break;
- }
- final SqlWriter.Frame frame = writer.startFunCall(operatorName);
- call.operand(2).unparse(writer, leftPrec, rightPrec);
- writer.endFunCall(frame);
- }
-
@Override public boolean supportsCharSet() {
return false;
}
diff --git
a/core/src/main/java/org/apache/calcite/sql/dialect/SparkSqlDialect.java
b/core/src/main/java/org/apache/calcite/sql/dialect/SparkSqlDialect.java
index 037e4ef..9423e9c 100644
--- a/core/src/main/java/org/apache/calcite/sql/dialect/SparkSqlDialect.java
+++ b/core/src/main/java/org/apache/calcite/sql/dialect/SparkSqlDialect.java
@@ -32,6 +32,8 @@ import org.apache.calcite.sql.fun.SqlFloorFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.ReturnTypes;
+import static org.apache.calcite.util.RelToSqlConverterUtil.unparseHiveTrim;
+
/**
* A <code>SqlDialect</code> implementation for the APACHE SPARK database.
*/
@@ -102,7 +104,9 @@ public class SparkSqlDialect extends SqlDialect {
timeUnitNode.getParserPosition());
SqlFloorFunction.unparseDatetimeFunction(writer, call2, "DATE_TRUNC",
false);
break;
-
+ case TRIM:
+ unparseHiveTrim(writer, call, leftPrec, rightPrec);
+ break;
default:
super.unparseCall(writer, call, leftPrec, rightPrec);
}
diff --git
a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java
b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java
new file mode 100644
index 0000000..bf4a53d
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlCharStringLiteral;
+import org.apache.calcite.sql.SqlLiteral;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlWriter;
+import org.apache.calcite.sql.fun.SqlTrimFunction;
+import org.apache.calcite.sql.parser.SqlParserPos;
+
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE;
+
+/**
+ * Utilities used by multiple dialect for RelToSql conversion.
+ */
+public abstract class RelToSqlConverterUtil {
+
+ /**
+ * For usage of TRIM, LTRIM and RTRIM in Hive, see
+ * <a
href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF">Hive
UDF usage</a>.
+ */
+ public static void unparseHiveTrim(
+ SqlWriter writer,
+ SqlCall call,
+ int leftPrec,
+ int rightPrec) {
+ final SqlLiteral valueToTrim = call.operand(1);
+ if (valueToTrim.toValue().matches("\\s+")) {
+ unparseTrimWithSpace(writer, call, leftPrec, rightPrec);
+ } else {
+ // SELECT TRIM(both 'A' from "ABC") -> SELECT REGEXP_REPLACE("ABC",
'^(A)*', '')
+ final SqlLiteral trimFlag = call.operand(0);
+ final SqlCharStringLiteral regexNode =
+ createRegexPatternLiteral(call.operand(1), trimFlag);
+ final SqlCharStringLiteral blankLiteral =
+ SqlLiteral.createCharString("", call.getParserPosition());
+ final SqlNode[] trimOperands = new SqlNode[] { call.operand(2),
regexNode, blankLiteral };
+ final SqlCall regexReplaceCall =
REGEXP_REPLACE.createCall(SqlParserPos.ZERO, trimOperands);
+ regexReplaceCall.unparse(writer, leftPrec, rightPrec);
+ }
+ }
+
+ /**
+ * Unparses TRIM function with value as space.
+ *
+ * <p>For example :
+ *
+ * <blockquote><pre>
+ * SELECT TRIM(both ' ' from "ABC") → SELECT TRIM(ABC)
+ * </pre></blockquote>
+ *
+ * @param writer writer
+ * @param call the call
+ */
+ private static void unparseTrimWithSpace(
+ SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) {
+ final String operatorName;
+ final SqlLiteral trimFlag = call.operand(0);
+ switch (trimFlag.getValueAs(SqlTrimFunction.Flag.class)) {
+ case LEADING:
+ operatorName = "LTRIM";
+ break;
+ case TRAILING:
+ operatorName = "RTRIM";
+ break;
+ default:
+ operatorName = call.getOperator().getName();
+ break;
+ }
+ final SqlWriter.Frame trimFrame = writer.startFunCall(operatorName);
+ call.operand(2).unparse(writer, leftPrec, rightPrec);
+ writer.endFunCall(trimFrame);
+ }
+
+ /**
+ * Creates regex pattern based on the TRIM flag.
+ *
+ * @param call SqlCall contains the values that need to be trimmed
+ * @param trimFlag the trimFlag, either BOTH, LEADING or TRAILING
+ * @return the regex pattern of the character to be trimmed
+ */
+ public static SqlCharStringLiteral createRegexPatternLiteral(SqlNode call,
SqlLiteral trimFlag) {
+ final String regexPattern = ((SqlCharStringLiteral) call).toValue();
+ String escaped = escapeSpecialChar(regexPattern);
+ final StringBuilder builder = new StringBuilder();
+ switch (trimFlag.getValueAs(SqlTrimFunction.Flag.class)) {
+ case LEADING:
+ builder.append("^(").append(escaped).append(")*");
+ break;
+ case TRAILING:
+ builder.append("(").append(escaped).append(")*$");
+ break;
+ default:
+ builder.append("^(")
+ .append(escaped)
+ .append(")*|(")
+ .append(escaped)
+ .append(")*$");
+ break;
+ }
+ return SqlLiteral.createCharString(builder.toString(),
+ call.getParserPosition());
+ }
+
+ /**
+ * Escapes the special character.
+ *
+ * @param inputString the string
+ * @return escape character if any special character is present in the string
+ */
+ private static String escapeSpecialChar(String inputString) {
+ final String[] specialCharacters = {"\\", "^", "$", "{", "}", "[", "]",
"(", ")", ".",
+ "*", "+", "?", "|", "<", ">", "-", "&", "%", "@"};
+
+ for (String specialCharacter : specialCharacters) {
+ if (inputString.contains(specialCharacter)) {
+ inputString = inputString.replace(specialCharacter, "\\" +
specialCharacter);
+ }
+ }
+ return inputString;
+ }
+}
diff --git
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
index c06f7ea..f7ba327 100644
---
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
+++
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
@@ -1009,7 +1009,11 @@ public class RelToSqlConverterTest {
* <a
href="https://issues.apache.org/jira/browse/CALCITE-3663">[CALCITE-3663]
* Support for TRIM function in BigQuery dialect</a>. */
- @Test public void testHiveAndBqTrim() {
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-3771">[CALCITE-3771]
+ * Support of TRIM function for SPARK dialect and improvement in HIVE
Dialect</a>. */
+
+ @Test public void testHiveSparkAndBqTrim() {
final String query = "SELECT TRIM(' str ')\n"
+ "from \"foodmart\".\"reserve_employee\"";
final String expected = "SELECT TRIM(' str ')\n"
@@ -1017,11 +1021,13 @@ public class RelToSqlConverterTest {
sql(query)
.withHive()
.ok(expected)
+ .withSpark()
+ .ok(expected)
.withBigQuery()
.ok(expected);
}
- @Test public void testHiveAndBqTrimWithBoth() {
+ @Test public void testHiveSparkAndBqTrimWithBoth() {
final String query = "SELECT TRIM(both ' ' from ' str ')\n"
+ "from \"foodmart\".\"reserve_employee\"";
final String expected = "SELECT TRIM(' str ')\n"
@@ -1029,11 +1035,13 @@ public class RelToSqlConverterTest {
sql(query)
.withHive()
.ok(expected)
+ .withSpark()
+ .ok(expected)
.withBigQuery()
.ok(expected);
}
- @Test public void testHiveAndBqTrimWithLeading() {
+ @Test public void testHiveSparkAndBqTrimWithLeading() {
final String query = "SELECT TRIM(LEADING ' ' from ' str ')\n"
+ "from \"foodmart\".\"reserve_employee\"";
final String expected = "SELECT LTRIM(' str ')\n"
@@ -1041,12 +1049,14 @@ public class RelToSqlConverterTest {
sql(query)
.withHive()
.ok(expected)
+ .withSpark()
+ .ok(expected)
.withBigQuery()
.ok(expected);
}
- @Test public void testHiveAndBqTrimWithTailing() {
+ @Test public void testHiveSparkAndBqTrimWithTailing() {
final String query = "SELECT TRIM(TRAILING ' ' from ' str ')\n"
+ "from \"foodmart\".\"reserve_employee\"";
final String expected = "SELECT RTRIM(' str ')\n"
@@ -1054,6 +1064,8 @@ public class RelToSqlConverterTest {
sql(query)
.withHive()
.ok(expected)
+ .withSpark()
+ .ok(expected)
.withBigQuery()
.ok(expected);
}
@@ -1067,11 +1079,29 @@ public class RelToSqlConverterTest {
+ "from \"foodmart\".\"reserve_employee\"";
final String expected = "SELECT LTRIM('abcd', 'a')\n"
+ "FROM foodmart.reserve_employee";
+ final String expectedHS = "SELECT REGEXP_REPLACE('abcd', '^(a)*', '')\n"
+ + "FROM foodmart.reserve_employee";
sql(query)
.withBigQuery()
.ok(expected);
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-3771">[CALCITE-3771]
+ * Support of TRIM function for SPARK dialect and improvement in HIVE
Dialect</a>. */
+
+ @Test public void testHiveAndSparkTrimWithLeadingChar() {
+ final String query = "SELECT TRIM(LEADING 'a' from 'abcd')\n"
+ + "from \"foodmart\".\"reserve_employee\"";
+ final String expected = "SELECT REGEXP_REPLACE('abcd', '^(a)*', '')\n"
+ + "FROM foodmart.reserve_employee";
+ sql(query)
+ .withHive()
+ .ok(expected)
+ .withSpark()
+ .ok(expected);
+ }
+
@Test public void testBqTrimWithBothChar() {
final String query = "SELECT TRIM(both 'a' from 'abcda')\n"
+ "from \"foodmart\".\"reserve_employee\"";
@@ -1082,16 +1112,63 @@ public class RelToSqlConverterTest {
.ok(expected);
}
- @Test public void testBqTrimWithTailingChar() {
+ @Test public void testHiveAndSparkTrimWithBothChar() {
+ final String query = "SELECT TRIM(both 'a' from 'abcda')\n"
+ + "from \"foodmart\".\"reserve_employee\"";
+ final String expected = "SELECT REGEXP_REPLACE('abcda', '^(a)*|(a)*$',
'')\n"
+ + "FROM foodmart.reserve_employee";
+ sql(query)
+ .withHive()
+ .ok(expected)
+ .withSpark()
+ .ok(expected);
+ }
+
+ @Test public void testHiveBqTrimWithTailingChar() {
final String query = "SELECT TRIM(TRAILING 'a' from 'abcd')\n"
- + "from \"foodmart\".\"reserve_employee\"";
+ + "from \"foodmart\".\"reserve_employee\"";
final String expected = "SELECT RTRIM('abcd', 'a')\n"
- + "FROM foodmart.reserve_employee";
+ + "FROM foodmart.reserve_employee";
+ sql(query)
+ .withBigQuery()
+ .ok(expected);
+ }
+
+ @Test public void testHiveAndSparkTrimWithTailingChar() {
+ final String query = "SELECT TRIM(TRAILING 'a' from 'abcd')\n"
+ + "from \"foodmart\".\"reserve_employee\"";
+ final String expected = "SELECT REGEXP_REPLACE('abcd', '(a)*$', '')\n"
+ + "FROM foodmart.reserve_employee";
+ sql(query)
+ .withHive()
+ .ok(expected)
+ .withSpark()
+ .ok(expected);
+ }
+
+ @Test public void testBqTrimWithBothSpecialCharacter() {
+ final String query = "SELECT TRIM(BOTH '$@*A' from
'$@*AABC$@*AADCAA$@*A')\n"
+ + "from \"foodmart\".\"reserve_employee\"";
+ final String expected = "SELECT TRIM('$@*AABC$@*AADCAA$@*A', '$@*A')\n"
+ + "FROM foodmart.reserve_employee";
sql(query)
.withBigQuery()
.ok(expected);
}
+ @Test public void testHiveAndSparkTrimWithBothSpecialCharacter() {
+ final String query = "SELECT TRIM(BOTH '$@*A' from
'$@*AABC$@*AADCAA$@*A')\n"
+ + "from \"foodmart\".\"reserve_employee\"";
+ final String expected = "SELECT REGEXP_REPLACE('$@*AABC$@*AADCAA$@*A',"
+ + " '^(\\$\\@\\*A)*|(\\$\\@\\*A)*$', '')\n"
+ + "FROM foodmart.reserve_employee";
+ sql(query)
+ .withHive()
+ .ok(expected)
+ .withSpark()
+ .ok(expected);
+ }
+
/** Test case for
* <a
href="https://issues.apache.org/jira/browse/CALCITE-2715">[CALCITE-2715]
* MS SQL Server does not support character set as part of data type</a>. */