This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 290914c4f2 Normalize excessive whitespaces in sql to avoid regex
performance issues (#15498)
290914c4f2 is described below
commit 290914c4f2b907bcd52a3527d28a99787651da70
Author: Jitendra Kumar <[email protected]>
AuthorDate: Mon Apr 21 22:42:21 2025 +0530
Normalize excessive whitespaces in sql to avoid regex performance issues
(#15498)
---
.../apache/pinot/sql/parsers/CalciteSqlParser.java | 2 +
.../org/apache/pinot/sql/parsers/ParserUtils.java | 21 +++++++
.../apache/pinot/sql/parsers/ParserUtilsTest.java | 67 ++++++++++++++++++++++
3 files changed, 90 insertions(+)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index e28708faea..1742b406f7 100644
---
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -100,6 +100,8 @@ public class CalciteSqlParser {
throws SqlCompilationException {
long parseStartTimeNs = System.nanoTime();
+ sql = ParserUtils.sanitizeSql(sql);
+
// extract and remove OPTIONS string
List<String> options = extractOptionsFromSql(sql);
if (!options.isEmpty()) {
diff --git
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
index efaf195756..df8527ee4c 100644
--- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
@@ -39,6 +39,27 @@ public class ParserUtils {
}
}
+ /**
+ * Sanitize the sql string for parsing by normalizing whitespace
+ * which is likely to cause performance issues with regex parsing.
+ * @param sql string to sanitize
+ * @return sanitized sql string
+ */
+ public static String sanitizeSql(String sql) {
+
+ // 1. Remove trailing whitespaces
+
+ int endIndex = sql.length() - 1;
+ while (endIndex >= 0 && Character.isWhitespace(sql.charAt(endIndex))) {
+ endIndex--;
+ }
+ sql = sql.substring(0, endIndex + 1);
+
+ // Likewise extend for other improvements
+
+ return sql;
+ }
+
private static void validateJsonExtractScalarFunction(List<Expression>
operands) {
// Check that there are 3 or 4 arguments
int numOperands = operands.size();
diff --git
a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java
new file mode 100644
index 0000000000..7a3393c9b2
--- /dev/null
+++
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class ParserUtilsTest {
+
+ @Test
+ public void testRemoveExcessiveWhiteSpace() {
+
+ testRemoveExcessiveWhiteSpace(
+ "SELECT * FROM mytable " + " ".repeat(20000),
+ "SELECT * FROM mytable"
+ );
+
+ testRemoveExcessiveWhiteSpace(
+ "SELECT * FROM " + " ".repeat(20000) + " mytable",
+ "SELECT * FROM " + " ".repeat(20000) + " mytable"
+ );
+
+ testRemoveExcessiveWhiteSpace(
+ "SELECT * " + " ".repeat(20000) + "FROM mytable " + " ".repeat(20000),
+ "SELECT * " + " ".repeat(20000) + "FROM mytable"
+ );
+
+ testRemoveExcessiveWhiteSpace(
+ "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + "
".repeat(20000),
+ "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)"
+ );
+
+ testRemoveExcessiveWhiteSpace(
+ "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b) /* comment
*/" + " ".repeat(20000),
+ "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b) /* comment
*/"
+ );
+
+ testRemoveExcessiveWhiteSpace(
+ "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + "
".repeat(20000) + " /* comment */",
+ "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + "
".repeat(20000) + " /* comment */"
+ );
+ }
+
+ private void testRemoveExcessiveWhiteSpace(
+ String sqlWithExcessiveWhitespace,
+ String expectedSqlAfterSanitization
+ ) {
+ String sanitizedSql = ParserUtils.sanitizeSql(sqlWithExcessiveWhitespace);
+ Assert.assertEquals(sanitizedSql, expectedSqlAfterSanitization);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]