This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new aa7a903 HIVE-25659: Metastore direct sql queries with IN/(NOT IN)
should be split based on max parameters allowed by SQL DB (Nikhil Gupta,
reviewed by Adesh Rao, Sankar Hariappan)
aa7a903 is described below
commit aa7a9030ee4d457dd6da45db63a12ce7d972362a
Author: guptanikhil007 <[email protected]>
AuthorDate: Mon Nov 8 11:21:35 2021 +0530
HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split
based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao,
Sankar Hariappan)
Signed-off-by: Sankar Hariappan <[email protected]>
Closes (#2758)
---
.../hadoop/hive/metastore/conf/MetastoreConf.java | 3 +++
.../apache/hadoop/hive/metastore/txn/TxnUtils.java | 6 ++---
.../hadoop/hive/metastore/txn/TestTxnUtils.java | 29 +++++++++++++++++++---
3 files changed, 32 insertions(+), 6 deletions(-)
diff --git
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 0e05ad3..21ea1f8 100644
---
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -680,6 +680,9 @@ public class MetastoreConf {
DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause",
"hive.direct.sql.max.elements.values.clause",
1000, "The maximum number of values in a VALUES clause for INSERT
statement."),
+ DIRECT_SQL_MAX_PARAMETERS("metastore.direct.sql.max.parameters",
+ "hive.direct.sql.max.parameters", 1000, "The maximum query parameters
\n" +
+ "backend sql engine can support."),
DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length",
"hive.direct.sql.max.query.length", 100, "The maximum\n" +
" size of a query string (in KB)."),
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
index f2c881a..13d45d1 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
@@ -265,6 +265,7 @@ public class TxnUtils {
// Get configuration parameters
int maxQueryLength = MetastoreConf.getIntVar(conf,
ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH);
int batchSize = MetastoreConf.getIntVar(conf,
ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE);
+ int maxParameters = MetastoreConf.getIntVar(conf,
ConfVars.DIRECT_SQL_MAX_PARAMETERS);
// Check parameter set validity as a public method.
if (inList == null || inList.size() == 0 || maxQueryLength <= 0 ||
batchSize <= 0) {
@@ -316,7 +317,7 @@ public class TxnUtils {
// Compute the size of a query when the 'nextValue' is added to the
current query.
int querySize = querySizeExpected(buf.length(), nextValue.length(),
suffix.length(), addParens);
- if (querySize > maxQueryLength * 1024) {
+ if ((querySize > maxQueryLength * 1024) || (currentCount >=
maxParameters)) {
// Check an edge case where the DIRECT_SQL_MAX_QUERY_LENGTH does not
allow one 'IN' clause with single value.
if (cursor4queryOfInClauses == 1 && cursor4InClauseElements == 0) {
throw new IllegalArgumentException("The current " +
ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH.getVarname() + " is set too small to have
one IN clause with single value!");
@@ -351,9 +352,8 @@ public class TxnUtils {
continue;
} else if (cursor4InClauseElements >= batchSize-1 &&
cursor4InClauseElements != 0) {
// Finish the current 'IN'/'NOT IN' clause and start a new clause.
- buf.setCharAt(buf.length() - 1, ')'); // replace the "commar".
+ buf.setCharAt(buf.length() - 1, ')'); // replace the "comma".
buf.append(newInclausePrefix.toString());
-
newInclausePrefixJustAppended = true;
// increment cursor for per-query IN-clause list
diff --git
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
index 811a6ac..42f1ca4 100644
---
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
+++
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
@@ -63,6 +63,7 @@ public class TestTxnUtils {
// The first query happens to have 2 full batches.
MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 1);
MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE,
10);
+ MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 2000);
List<Long> inList = new ArrayList<>();
for (long i = 1; i <= 189; i++) {
inList.add(i);
@@ -150,12 +151,34 @@ public class TestTxnUtils {
ret = TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix,
inList, "TXN_ID", false, false);
Assert.assertEquals(3, queries.size());
Assert.assertEquals(queries.size(), ret.size());
- Assert.assertEquals(2255L, ret.get(0).longValue());
- Assert.assertEquals(2033L, ret.get(1).longValue());
- Assert.assertEquals(33L, ret.get(2).longValue());
+ Assert.assertEquals(2000L, ret.get(0).longValue());
+ Assert.assertEquals(2000L, ret.get(1).longValue());
+ Assert.assertEquals(321L, ret.get(2).longValue());
runAgainstDerby(queries);
}
+ @Test(expected = IllegalArgumentException.class)
+ public void testBuildQueryWithNOTINClauseFailure() throws Exception {
+ MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 10);
+ MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE,
100);
+ MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 1000);
+ List<String> queries = new ArrayList<>();
+ List<Long> deleteSet = new ArrayList<>();
+ for (long i=0; i < 2000; i++) {
+ deleteSet.add(i+1);
+ }
+ StringBuilder prefix = new StringBuilder();
+ StringBuilder suffix = new StringBuilder();
+
+ prefix.append("select count(*) from TXNS where ");
+
+ List<String> questions = new ArrayList<>(deleteSet.size());
+ for (int i = 0; i < deleteSet.size(); i++) {
+ questions.add("?");
+ }
+ TxnUtils.buildQueryWithINClauseStrings(conf, queries, prefix, suffix,
questions, "cc_id", false, true);
+ }
+
/** Verify queries can run against Derby DB.
* As long as Derby doesn't complain, we assume the query is
syntactically/semantically correct.
*/