This is an automated email from the ASF dual-hosted git repository.
tanner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 68d8d70e86 [CALCITE-5820] Add PARSE_URL function (enabled in Hive and
Spark library)
68d8d70e86 is described below
commit 68d8d70e8600830769c9209a222585a29e885022
Author: Runkang He <[email protected]>
AuthorDate: Sun Jul 9 10:42:47 2023 +0800
[CALCITE-5820] Add PARSE_URL function (enabled in Hive and Spark library)
---
.../calcite/adapter/enumerable/RexImpTable.java | 2 +
.../org/apache/calcite/runtime/SqlFunctions.java | 89 +++++++++++++++++++++
.../calcite/sql/fun/SqlLibraryOperators.java | 8 ++
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
site/_docs/reference.md | 1 +
.../org/apache/calcite/test/SqlOperatorTest.java | 91 ++++++++++++++++++++++
6 files changed, 192 insertions(+)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index ec3e881262..4dfba29852 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -211,6 +211,7 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_DATE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_DATETIME;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_TIME;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_TIMESTAMP;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_URL;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.POW;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
@@ -545,6 +546,7 @@ public class RexImpTable {
defineMethod(REVERSE, BuiltInMethod.REVERSE.method, NullPolicy.STRICT);
defineMethod(LEVENSHTEIN, BuiltInMethod.LEVENSHTEIN.method,
NullPolicy.STRICT);
defineMethod(SPLIT, "split", NullPolicy.STRICT);
+ defineMethod(PARSE_URL, BuiltInMethod.PARSE_URL.method,
NullPolicy.STRICT);
map.put(TRIM, new TrimImplementor());
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 4c0d257f1f..1bf6e855f9 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -64,6 +64,8 @@ import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext;
import java.math.RoundingMode;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
@@ -106,6 +108,7 @@ import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BinaryOperator;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.calcite.linq4j.Nullness.castNonNull;
@@ -911,6 +914,80 @@ public class SqlFunctions {
}
}
+ /** SQL {@code PARSE_URL(urlStr, partToExtract, keyToExtract)} function. */
+ public static @Nullable String parseUrl(@Nullable String urlStr,
+ @Nullable String partToExtract, @Nullable String keyToExtract) {
+ if (partToExtract == null || !partToExtract.equals("QUERY")) {
+ return null;
+ }
+
+ String query = parseUrl(urlStr, partToExtract);
+ if (query == null) {
+ return null;
+ }
+
+ Pattern p = Pattern.compile("(&|^)" + keyToExtract + "=([^&]*)");
+ Matcher m = p.matcher(query);
+ return m.find() ? m.group(2) : null;
+ }
+
+ /** SQL {@code PARSE_URL(urlStr, partToExtract)} function. */
+ public static @Nullable String parseUrl(@Nullable String urlStr,
+ @Nullable String partToExtract) {
+ if (urlStr == null || partToExtract == null) {
+ return null;
+ }
+
+ URI uri;
+ try {
+ uri = new URI(urlStr);
+ } catch (URISyntaxException e) {
+ return null;
+ }
+
+ String extractValue;
+ PartToExtract part;
+ try {
+ part = PartToExtract.valueOf(partToExtract);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+
+ switch (part) {
+ case HOST:
+ extractValue = uri.getHost();
+ break;
+ case PATH:
+ extractValue = uri.getRawPath();
+ break;
+ case QUERY:
+ extractValue = uri.getRawQuery();
+ break;
+ case REF:
+ extractValue = uri.getRawFragment();
+ break;
+ case PROTOCOL:
+ extractValue = uri.getScheme();
+ break;
+ case FILE:
+ if (uri.getRawQuery() != null) {
+ extractValue = uri.getRawPath() + "?" + uri.getRawQuery();
+ } else {
+ extractValue = uri.getRawPath();
+ }
+ break;
+ case AUTHORITY:
+ extractValue = uri.getRawAuthority();
+ break;
+ case USERINFO:
+ extractValue = uri.getRawUserInfo();
+ break;
+ default:
+ extractValue = null;
+ }
+ return extractValue;
+ }
+
/** SQL {@code RTRIM} function applied to string. */
public static String rtrim(String s) {
return trim(false, true, " ", s);
@@ -4601,4 +4678,16 @@ public class SqlFunctions {
SCALAR, LIST, MAP
}
+ /** Type of part to extract passed into {@link #parseUrl}. */
+ public enum PartToExtract {
+ HOST,
+ PATH,
+ QUERY,
+ REF,
+ PROTOCOL,
+ FILE,
+ AUTHORITY,
+ USERINFO;
+ }
+
}
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 520c34ac01..4bff7cb103 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -394,6 +394,14 @@ public abstract class SqlLibraryOperators {
public static final SqlFunction SUBSTR_POSTGRESQL =
SUBSTR.withKind(SqlKind.SUBSTR_POSTGRESQL);
+ /** The "PARSE_URL(urlString, partToExtract [, keyToExtract] )" function. */
+ @LibraryOperator(libraries = {HIVE, SPARK})
+ public static final SqlFunction PARSE_URL =
+ SqlBasicFunction.create("PARSE_URL",
+ ReturnTypes.VARCHAR_NULLABLE,
+ OperandTypes.STRING_STRING_OPTIONAL_STRING,
+ SqlFunctionCategory.STRING);
+
/** The "GREATEST(value, value)" function. */
@LibraryOperator(libraries = {BIG_QUERY, ORACLE})
public static final SqlFunction GREATEST =
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index e7ec9124b9..c7361bd472 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -430,6 +430,7 @@ public enum BuiltInMethod {
STRING_CONCAT(SqlFunctions.class, "concat", String.class, String.class),
STRING_CONCAT_WITH_NULL(SqlFunctions.class, "concatWithNull", String.class,
String.class),
+ PARSE_URL(SqlFunctions.class, "parseUrl", String.class, String.class,
String.class),
MULTI_STRING_CONCAT(SqlFunctions.class, "concatMulti", String[].class),
MULTI_STRING_CONCAT_WITH_NULL(SqlFunctions.class, "concatMultiWithNull",
String[].class),
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 75060150df..a9a147a0ca 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2774,6 +2774,7 @@ BigQuery's type system uses confusingly different names
for types and functions:
| b | PARSE_DATETIME(format, string) | Uses format specified
by *format* to convert *string* representation of datetime to a TIMESTAMP value
| b | PARSE_TIME(format, string) | Uses format specified
by *format* to convert *string* representation of time to a TIME value
| b | PARSE_TIMESTAMP(format, string[, timeZone]) | Uses format specified
by *format* to convert *string* representation of timestamp to a TIMESTAMP WITH
LOCAL TIME ZONE value in *timeZone*
+| h s | PARSE_URL(urlString, partToExtract [, keyToExtract] ) | Returns the
specified *partToExtract* from the *urlString*. Valid values for
*partToExtract* include HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and
USERINFO. *keyToExtract* specifies which query to extract
| b | POW(numeric1, numeric2) | Returns *numeric1*
raised to the power *numeric2*
| m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [,
matchType]]]) | Replaces all substrings of *string* that match *regexp* with
*rep* at the starting *pos* in expr (if omitted, the default is 1),
*occurrence* means which occurrence of a match to search for (if omitted, the
default is 1), *matchType* specifies how to perform matching
| b m p | REPEAT(string, integer) | Returns a string
consisting of *string* repeated of *integer* times; returns an empty string if
*integer* is less than 1
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 96d44ab2c7..82ba08dbe5 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -3961,6 +3961,97 @@ public class SqlOperatorTest {
f.checkNull("ASCII(cast(null as varchar(1)))");
}
+ @Test void testParseUrl() {
+ final SqlOperatorFixture f0 = fixture()
+ .setFor(SqlLibraryOperators.PARSE_URL);
+ f0.checkFails("^parse_url('http://calcite.apache.org/path1', 'HOST')^",
+ "No match found for function signature PARSE_URL\\(<CHARACTER>,
<CHARACTER>\\)",
+ false);
+ final Consumer<SqlOperatorFixture> consumer = f -> {
+ // test with each valid partToExtract
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'HOST')",
+ "calcite.apache.org",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'PATH')",
+ "/path1/p.php",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/%20p.php?k1=v1&k2=v2#Ref1',"
+ + " 'PATH')",
+ "/path1/%20p.php",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'QUERY')",
+ "k1=v1&k2=v2",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'REF')",
+ "Ref1",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'QUERY', 'k2')",
+ "v2",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'QUERY', 'k1')",
+ "v1",
+ "VARCHAR NOT NULL");
+
f.checkNull("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'QUERY', 'k3')");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'FILE')",
+ "/path1/p.php?k1=v1&k2=v2",
+ "VARCHAR NOT NULL");
+ f.checkString("parse_url('http://calcite.apache.org/path1/p.php',"
+ + " 'FILE')",
+ "/path1/p.php",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'PROTOCOL')",
+ "http",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('https://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'PROTOCOL')",
+ "https",
+ "VARCHAR NOT NULL");
+
f.checkString("parse_url('http://[email protected]/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'USERINFO')",
+ "bob",
+ "VARCHAR NOT NULL");
+
f.checkNull("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'USERINFO')");
+
f.checkString("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'AUTHORITY')",
+ "calcite.apache.org",
+ "VARCHAR NOT NULL");
+
+ // test with invalid partToExtract
+
f.checkNull("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'INVALID_PART_TO_EXTRACT')");
+
f.checkNull("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'HOST', 'k1')");
+
+ // test with invalid urlString
+
f.checkNull("parse_url('http:calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'HOST')");
+
f.checkNull("parse_url('calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'HOST')");
+ f.checkNull("parse_url('/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " 'HOST')");
+
+ // test with operands with null values
+
f.checkNull("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " cast(null as varchar))");
+
f.checkNull("parse_url('http://calcite.apache.org/path1/p.php?k1=v1&k2=v2#Ref1',"
+ + " cast(null as varchar), cast(null as varchar))");
+ f.checkNull("parse_url(cast(null as varchar), cast(null as varchar))");
+ f.checkNull("parse_url(cast(null as varchar), cast(null as varchar),"
+ + " cast(null as varchar))");
+ };
+ f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
+ }
+
@Test void testToBase64() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.MYSQL);
f.setFor(SqlLibraryOperators.TO_BASE64);