This is an automated email from the ASF dual-hosted git repository.
csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 068158e49 IMPALA-12401: Support more info types for HS2 GetInfo() API
068158e49 is described below
commit 068158e495d18fc6c2548dd9868b284870f51bbe
Author: Arnab Karmakar <[email protected]>
AuthorDate: Sat Oct 11 23:29:33 2025 +0530
IMPALA-12401: Support more info types for HS2 GetInfo() API
This patch adds support for 40+ additional TGetInfoType values in the
HiveServer2 GetInfo() API, improving ODBC/JDBC driver compatibility.
Previously, only 3 info types were supported (CLI_SERVER_NAME,
CLI_DBMS_NAME, CLI_DBMS_VER).
The implementation follows the ODBC CLI specification and matches the
behavior of Hive's GetInfo implementation where applicable.
Testing:
- Added unit tests in test_hs2.py for new info types
- Tests verify correct return values and data types for each info type
Change-Id: I1ce5f2b9dcc2e4633b4679b002f57b5b4ea3e8bf
Reviewed-on: http://gerrit.cloudera.org:8080/23528
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Csaba Ringhofer <[email protected]>
---
be/src/service/CMakeLists.txt | 1 +
be/src/service/frontend.cc | 9 +-
be/src/service/frontend.h | 4 +
be/src/service/impala-hs2-server.cc | 20 +-
be/src/service/odbc-helper.cc | 262 +++++++++++++++++++++
be/src/service/odbc-helper.h | 49 ++++
common/thrift/hive-1-api/TCLIService.thrift | 1 +
.../org/apache/impala/service/JniFrontend.java | 30 +++
fe/src/main/jflex/sql-scanner.flex | 6 +
tests/hs2/test_hs2.py | 149 +++++++++++-
10 files changed, 512 insertions(+), 19 deletions(-)
diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index e35f8c846..066520bfe 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -41,6 +41,7 @@ add_library(Service
impalad-main.cc
impala-server.cc
internal-server.cc
+ odbc-helper.cc
query-options.cc
query-result-set.cc
query-state-record.cc
diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc
index 3ac463db3..6255f8fc8 100644
--- a/be/src/service/frontend.cc
+++ b/be/src/service/frontend.cc
@@ -156,7 +156,8 @@ Frontend::Frontend() {
{"validateSaml2Bearer", "([B)Ljava/lang/String;",
&validate_saml2_bearer_id_},
{"abortKuduTransaction", "([B)V", &abort_kudu_txn_},
{"commitKuduTransaction", "([B)V", &commit_kudu_txn_},
- {"cancelExecRequest", "([B)V", &cancel_exec_request_id_}
+ {"cancelExecRequest", "([B)V", &cancel_exec_request_id_},
+ {"getNonOdbcKeywords", "([B)Ljava/lang/String;",
&get_non_odbc_keywords_id_}
};
JniMethodDescriptor staticMethods[] = {
@@ -470,3 +471,9 @@ Status Frontend::HiveLegacyTimezoneConvert(
.with_thrift_arg(timezone_t).with_primitive_arg(utc_time_millis)
.Call(local_time);
}
+
+Status Frontend::GetNonOdbcKeywords(const string& odbc_keywords_csv, string*
response) {
+ TStringLiteral csv;
+ csv.__set_value(odbc_keywords_csv);
+ return JniUtil::CallJniMethod(fe_, get_non_odbc_keywords_id_, csv, response);
+}
diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h
index 85eef03ec..7a7297009 100644
--- a/be/src/service/frontend.h
+++ b/be/src/service/frontend.h
@@ -262,6 +262,9 @@ class Frontend {
Status HiveLegacyTimezoneConvert(
const string& timezone, long utc_time_millis, TCivilTime* local_time);
+ /// Returns a CSV list of Impala keywords excluding the provided
ODBC-reserved CSV.
+ Status GetNonOdbcKeywords(const std::string& odbc_keywords_csv, std::string*
response);
+
private:
jclass fe_class_; // org.apache.impala.service.JniFrontend class
jobject fe_; // instance of org.apache.impala.service.JniFrontend
@@ -309,6 +312,7 @@ class Frontend {
jmethodID get_secret_from_key_store_; // JniFrontend.getSecretFromKeyStore()
jmethodID hive_legacy_timezone_convert_; //
JniFrontend.hiveLegacyTimezoneConvert()
jmethodID cancel_exec_request_id_; // JniFrontend.cancelExecRequest()
+ jmethodID get_non_odbc_keywords_id_; //
JniFrontend.getNonOdbcKeywords(String)
// Only used for testing.
jmethodID build_test_descriptor_table_id_; //
JniFrontend.buildTestDescriptorTable()
diff --git a/be/src/service/impala-hs2-server.cc
b/be/src/service/impala-hs2-server.cc
index aace45fc5..b6dfddc53 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -39,6 +39,7 @@
#include "scheduling/admission-controller.h"
#include "service/client-request-state.h"
#include "service/hs2-util.h"
+#include "service/odbc-helper.h"
#include "service/query-options.h"
#include "service/query-result-set.h"
#include "util/auth-util.h"
@@ -558,23 +559,8 @@ void ImpalaServer::GetInfo(TGetInfoResp& return_val,
session_handle.WithSession(session_id, SecretArg::Session(secret),
&session),
SQLSTATE_GENERAL_ERROR);
- switch (request.infoType) {
- case TGetInfoType::CLI_SERVER_NAME:
- case TGetInfoType::CLI_DBMS_NAME:
- return_val.infoValue.__set_stringValue("Impala");
- break;
- case TGetInfoType::CLI_DBMS_VER:
- return_val.infoValue.__set_stringValue(GetDaemonBuildVersion());
- break;
- default:
- return_val.status.__set_statusCode(thrift::TStatusCode::ERROR_STATUS);
- return_val.status.__set_errorMessage(("Unsupported operation"));
-
return_val.status.__set_sqlState((SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED));
- // 'infoValue' is a required field of TGetInfoResp
- return_val.infoValue.__set_stringValue("");
- return;
- }
- return_val.status.__set_statusCode(thrift::TStatusCode::SUCCESS_STATUS);
+ PopulateOdbcGetInfo(return_val, request.infoType, session,
+ SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED);
}
void ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val,
diff --git a/be/src/service/odbc-helper.cc b/be/src/service/odbc-helper.cc
new file mode 100644
index 000000000..e92e32818
--- /dev/null
+++ b/be/src/service/odbc-helper.cc
@@ -0,0 +1,262 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "service/odbc-helper.h"
+
+#include <glog/logging.h>
+
+#include "common/version.h"
+#include "runtime/exec-env.h"
+#include "service/frontend.h"
+#include "service/impala-server.inline.h"
+#include "util/auth-util.h"
+
+#include "common/names.h"
+
+using namespace apache::hive::service::cli::thrift;
+using namespace apache::hive::service::cli;
+
+namespace impala {
+
+// ODBC reserved keywords as per ISO/IEF CLI specification and ODBC standard.
+// From
https://docs.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql#odbc-reserved-keywords
+const string ODBC_KEYWORDS =
+
"ABSOLUTE,ACTION,ADA,ADD,ALL,ALLOCATE,ALTER,AND,ANY,ARE,AS,ASC,ASSERTION,AT,"
+
"AUTHORIZATION,AVG,BEGIN,BETWEEN,BIT,BIT_LENGTH,BOTH,BY,CASCADE,CASCADED,CASE,"
+
"CAST,CATALOG,CHAR,CHAR_LENGTH,CHARACTER,CHARACTER_LENGTH,CHECK,CLOSE,COALESCE,"
+
"COLLATE,COLLATION,COLUMN,COMMIT,CONNECT,CONNECTION,CONSTRAINT,CONSTRAINTS,"
+ "CONTINUE,CONVERT,CORRESPONDING,COUNT,CREATE,CROSS,CURRENT,CURRENT_DATE,"
+
"CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATE,DAY,DEALLOCATE,DEC,"
+
"DECIMAL,DECLARE,DEFAULT,DEFERRABLE,DEFERRED,DELETE,DESC,DESCRIBE,DESCRIPTOR,"
+
"DIAGNOSTICS,DISCONNECT,DISTINCT,DOMAIN,DOUBLE,DROP,ELSE,END,ESCAPE,EXCEPT,"
+
"EXCEPTION,EXEC,EXECUTE,EXISTS,EXTERNAL,EXTRACT,FALSE,FETCH,FIRST,FLOAT,FOR,"
+
"FOREIGN,FORTRAN,FOUND,FROM,FULL,GET,GLOBAL,GO,GOTO,GRANT,GROUP,HAVING,HOUR,"
+ "IDENTITY,IMMEDIATE,IN,INCLUDE,INDEX,INDICATOR,INITIALLY,INNER,INPUT,"
+
"INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,IS,ISOLATION,JOIN,KEY,"
+
"LANGUAGE,LAST,LEADING,LEFT,LEVEL,LIKE,LOCAL,LOWER,MATCH,MAX,MIN,MINUTE,MODULE,"
+ "MONTH,NAMES,NATIONAL,NATURAL,NCHAR,NEXT,NO,NONE,NOT,NULL,NULLIF,NUMERIC,"
+
"OCTET_LENGTH,OF,ON,ONLY,OPEN,OPTION,OR,ORDER,OUTER,OUTPUT,OVERLAPS,PAD,PARTIAL,"
+
"PASCAL,POSITION,PRECISION,PREPARE,PRESERVE,PRIMARY,PRIOR,PRIVILEGES,PROCEDURE,"
+ "PUBLIC,READ,REAL,REFERENCES,RELATIVE,RESTRICT,REVOKE,RIGHT,ROLLBACK,ROWS,"
+
"SCHEMA,SCROLL,SECOND,SECTION,SELECT,SESSION,SESSION_USER,SET,SIZE,SMALLINT,"
+ "SOME,SPACE,SQL,SQLCA,SQLCODE,SQLERROR,SQLSTATE,SQLWARNING,SUBSTRING,SUM,"
+
"SYSTEM_USER,TABLE,TEMPORARY,THEN,TIME,TIMESTAMP,TIMEZONE_HOUR,TIMEZONE_MINUTE,"
+
"TO,TRAILING,TRANSACTION,TRANSLATE,TRANSLATION,TRIM,TRUE,UNION,UNIQUE,UNKNOWN,"
+
"UPDATE,UPPER,USAGE,USER,USING,VALUE,VALUES,VARCHAR,VARYING,VIEW,WHEN,WHENEVER,"
+ "WHERE,WITH,WORK,WRITE,YEAR,ZONE";
+
+void PopulateOdbcGetInfo(TGetInfoResp& return_val, TGetInfoType::type
info_type,
+ const shared_ptr<ImpalaServer::SessionState>& session,
+ const char* sqlstate_optional_feature_not_implemented) {
+ switch (info_type) {
+ case TGetInfoType::CLI_SERVER_NAME:
+ case TGetInfoType::CLI_DBMS_NAME:
+ return_val.infoValue.__set_stringValue("Impala");
+ break;
+ case TGetInfoType::CLI_DBMS_VER:
+ return_val.infoValue.__set_stringValue(GetDaemonBuildVersion());
+ break;
+ case TGetInfoType::CLI_MAX_COLUMN_NAME_LEN:
+ return_val.infoValue.__set_lenValue(767);
+ break;
+ case TGetInfoType::CLI_MAX_SCHEMA_NAME_LEN:
+ return_val.infoValue.__set_lenValue(128);
+ break;
+ case TGetInfoType::CLI_MAX_TABLE_NAME_LEN:
+ return_val.infoValue.__set_lenValue(128);
+ break;
+ case TGetInfoType::CLI_MAX_CATALOG_NAME_LEN:
+ return_val.infoValue.__set_lenValue(128);
+ break;
+ case TGetInfoType::CLI_MAX_CURSOR_NAME_LEN:
+ return_val.infoValue.__set_lenValue(128);
+ break;
+ case TGetInfoType::CLI_MAX_USER_NAME_LEN:
+ return_val.infoValue.__set_lenValue(128);
+ break;
+ case TGetInfoType::CLI_MAX_IDENTIFIER_LEN:
+ return_val.infoValue.__set_lenValue(128);
+ break;
+ case TGetInfoType::CLI_IDENTIFIER_CASE:
+ // SQL_IC_LOWER = 2 (case insensitive, stored in lowercase)
+ return_val.infoValue.__set_smallIntValue(2);
+ break;
+ case TGetInfoType::CLI_IDENTIFIER_QUOTE_CHAR:
+ return_val.infoValue.__set_stringValue("`");
+ break;
+ case TGetInfoType::CLI_SEARCH_PATTERN_ESCAPE:
+ return_val.infoValue.__set_stringValue("\\");
+ break;
+ case TGetInfoType::CLI_DATA_SOURCE_READ_ONLY:
+ // SQL_FALSE = 0 (not read-only)
+ return_val.infoValue.__set_smallIntValue(0);
+ break;
+ case TGetInfoType::CLI_TXN_CAPABLE:
+ // SQL_TC_NONE = 0 (no transaction support)
+ return_val.infoValue.__set_smallIntValue(0);
+ break;
+ case TGetInfoType::CLI_USER_NAME:
+ return_val.infoValue.__set_stringValue(GetEffectiveUser(*session));
+ break;
+ case TGetInfoType::CLI_ORDER_BY_COLUMNS_IN_SELECT:
+ // ODBC expects "Y"/"N" string: set "N" because Impala does
+ // not require ORDER BY columns to be in SELECT list.
+ return_val.infoValue.__set_stringValue("N");
+ break;
+ case TGetInfoType::CLI_MAX_COLUMNS_IN_SELECT:
+ // No hard documented limit for number of columns in a SELECT
+ // ODBC: 0 => "no fixed limit / driver dependent"
+ return_val.infoValue.__set_lenValue(0);
+ break;
+ case TGetInfoType::CLI_MAX_COLUMNS_IN_TABLE:
+ // No hard documented limit for number of columns in a table
+ return_val.infoValue.__set_lenValue(0);
+ break;
+ case TGetInfoType::CLI_MAX_COLUMNS_IN_GROUP_BY:
+ return_val.infoValue.__set_lenValue(0); // No limit
+ break;
+ case TGetInfoType::CLI_MAX_COLUMNS_IN_ORDER_BY:
+ return_val.infoValue.__set_lenValue(0); // No limit
+ break;
+ case TGetInfoType::CLI_MAX_TABLES_IN_SELECT:
+ return_val.infoValue.__set_lenValue(0); // No limit
+ break;
+ case TGetInfoType::CLI_MAX_STATEMENT_LEN: {
+ // Prefer the server's configured max_statement_length_bytes if
available.
+ // If the option is not set or is <= 0, return 0 (ODBC:
unknown/unlimited).
+ int32_t max_stmt_bytes = 0;
+ if (session &&
session->QueryOptions().__isset.max_statement_length_bytes) {
+ max_stmt_bytes = session->QueryOptions().max_statement_length_bytes;
+ }
+ if (max_stmt_bytes <= 0) {
+ return_val.infoValue.__set_lenValue(0);
+ } else {
+ // SQL_MAX_STATEMENT_LEN expects number of characters; using bytes is
+ // acceptable if the server's limit is in bytes and the client and
server
+ // agree on encoding. Use bytes here to match Impala option units.
+ return_val.infoValue.__set_lenValue(max_stmt_bytes);
+ }
+ break;
+ }
+ case TGetInfoType::CLI_MAX_ROW_SIZE: {
+ // Prefer the session's configured MAX_ROW_SIZE if available; otherwise
fall back
+ // to the documented default of 524288 (512 KB). MAX_ROW_SIZE is in
bytes.
+ int64_t max_row_size = 524288; // Default from
TQueryOptions.max_row_size
+ if (session && session->QueryOptions().__isset.max_row_size) {
+ max_row_size = session->QueryOptions().max_row_size;
+ }
+ if (max_row_size <= 0) {
+ // Invalid or unset value: return the default
+ return_val.infoValue.__set_lenValue(524288);
+ } else {
+ return_val.infoValue.__set_lenValue(max_row_size);
+ }
+ break;
+ }
+ case TGetInfoType::CLI_SPECIAL_CHARACTERS:
+ // Per ODBC SQL_SPECIAL_CHARACTERS: list characters that can appear in
identifiers
+ // beyond a-z/A-Z/0-9/_. Impala identifiers (unquoted)
+ // allow only underscore as "special"
+ // Impala does not allow arbitrary special characters in unquoted
identifiers,
+ // so return an empty string (no special chars allowed unquoted).
+ return_val.infoValue.__set_stringValue("");
+ break;
+ case TGetInfoType::CLI_NULL_COLLATION:
+ // SQL_NC_HIGH = 2 -> NULLs sort high
+ // (Impala treats NULL > all other values by default)
+ return_val.infoValue.__set_smallIntValue(2);
+ break;
+ case TGetInfoType::CLI_ALTER_TABLE:
+ // Bitmask of ALTER TABLE capabilities per ODBC
SQLGetInfo(SQL_ALTER_TABLE):
+ // - SQL_AT_ADD_COLUMN = 0x1 (Impala supports ADD COLUMN(S))
+ // - SQL_AT_DROP_COLUMN = 0x2 (Impala supports DROP COLUMN)
+ // Other bits (defaults, constraints, etc.) are not supported by Impala.
+ return_val.infoValue.__set_integerBitmask(0x1 | 0x2);
+ break;
+ case TGetInfoType::CLI_OJ_CAPABILITIES:
+ // SQL_OJ_LEFT = 1, SQL_OJ_RIGHT = 2, SQL_OJ_FULL = 4, SQL_OJ_NESTED = 8
+ // SQL_OJ_NOT_ORDERED = 16, SQL_OJ_INNER = 32, SQL_OJ_ALL_COMPARISON_OPS
= 64
+ return_val.infoValue.__set_integerBitmask(127); // All supported
+ break;
+ case TGetInfoType::CLI_INTEGRITY:
+ // SQL_IC_NONE = 0 (no enforced integrity constraints)
+ return_val.infoValue.__set_smallIntValue(0);
+ break;
+ case TGetInfoType::CLI_DESCRIBE_PARAMETER:
+ // SQL_FALSE = 0 (does not support DESCRIBE PARAMETER)
+ return_val.infoValue.__set_smallIntValue(0);
+ break;
+ case TGetInfoType::CLI_XOPEN_CLI_YEAR:
+ return_val.infoValue.__set_stringValue("1995");
+ break;
+ case TGetInfoType::CLI_DATA_SOURCE_NAME:
+ return_val.infoValue.__set_stringValue("Impala");
+ break;
+ case TGetInfoType::CLI_ACCESSIBLE_TABLES:
+ // SQL_ACCESSIBLE_TABLES = 1 (returns accessible tables)
+ return_val.infoValue.__set_smallIntValue(1);
+ break;
+ case TGetInfoType::CLI_ACCESSIBLE_PROCEDURES:
+ // SQL_FALSE = 0 (no stored procedures)
+ return_val.infoValue.__set_smallIntValue(0);
+ break;
+ case TGetInfoType::CLI_CURSOR_COMMIT_BEHAVIOR:
+ // SQL_CB_DELETE = 1 (cursors are closed on commit)
+ return_val.infoValue.__set_smallIntValue(1);
+ break;
+ case TGetInfoType::CLI_DEFAULT_TXN_ISOLATION:
+ // SQL_TXN_NONE = 0 (no transaction support)
+ return_val.infoValue.__set_smallIntValue(0);
+ break;
+ case TGetInfoType::CLI_TXN_ISOLATION_OPTION:
+ // SQL_TXN_NONE = 0 (no transaction support)
+ return_val.infoValue.__set_integerBitmask(0);
+ break;
+ case TGetInfoType::CLI_ODBC_KEYWORDS: {
+ // Return Impala-specific keywords excluding ODBC-reserved keywords
+ string non_odbc_keywords;
+ Status kw_status =
ExecEnv::GetInstance()->frontend()->GetNonOdbcKeywords(
+ ODBC_KEYWORDS, &non_odbc_keywords);
+ if (kw_status.ok()) {
+ return_val.infoValue.__set_stringValue(non_odbc_keywords);
+ } else {
+ // Fallback to empty string on error to avoid returning incorrect
keywords.
+ VLOG(1) << "Failed to fetch non-ODBC keywords: " <<
kw_status.GetDetail();
+ return_val.infoValue.__set_stringValue("");
+ }
+ break;
+ }
+ case TGetInfoType::CLI_MAX_DRIVER_CONNECTIONS:
+ case TGetInfoType::CLI_MAX_CONCURRENT_ACTIVITIES:
+ case TGetInfoType::CLI_SCROLL_CONCURRENCY:
+ case TGetInfoType::CLI_GETDATA_EXTENSIONS:
+ case TGetInfoType::CLI_MAX_COLUMNS_IN_INDEX:
+ case TGetInfoType::CLI_MAX_INDEX_SIZE:
+ case TGetInfoType::CLI_CURSOR_SENSITIVITY:
+ case TGetInfoType::CLI_CATALOG_NAME:
+ case TGetInfoType::CLI_COLLATION_SEQ:
+ default:
+ return_val.status.__set_statusCode(thrift::TStatusCode::ERROR_STATUS);
+ return_val.status.__set_errorMessage(("Unsupported operation"));
+
return_val.status.__set_sqlState(sqlstate_optional_feature_not_implemented);
+ // 'infoValue' is a required field of TGetInfoResp
+ return_val.infoValue.__set_stringValue("");
+ return;
+ }
+ return_val.status.__set_statusCode(thrift::TStatusCode::SUCCESS_STATUS);
+}
+}
diff --git a/be/src/service/odbc-helper.h b/be/src/service/odbc-helper.h
new file mode 100644
index 000000000..4e162c85b
--- /dev/null
+++ b/be/src/service/odbc-helper.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "gen-cpp/TCLIService_types.h"
+#include "service/impala-server.h"
+
+namespace impala {
+
+/// ODBC reserved keywords as per ISO/IEF CLI specification and ODBC standard.
+/// From
https://docs.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql#odbc-reserved-keywords
+extern const std::string ODBC_KEYWORDS;
+
+/// Populates the TGetInfoResp structure based on the requested info type.
+/// This function handles the ODBC GetInfo metadata calls for HiveServer2.
+///
+/// Parameters:
+/// return_val - The response structure to be populated
+/// info_type - The type of information being requested
+/// session - The session state (may be nullptr for session-independent
queries)
+/// sqlstate_optional_feature_not_implemented - SQLSTATE code for
unsupported features
+///
+/// The function sets the infoValue field of return_val and may also set error
status
+/// for unsupported info types.
+void PopulateOdbcGetInfo(
+ apache::hive::service::cli::thrift::TGetInfoResp& return_val,
+ apache::hive::service::cli::thrift::TGetInfoType::type info_type,
+ const std::shared_ptr<ImpalaServer::SessionState>& session,
+ const char* sqlstate_optional_feature_not_implemented);
+
+}
diff --git a/common/thrift/hive-1-api/TCLIService.thrift
b/common/thrift/hive-1-api/TCLIService.thrift
index 777fcae24..f540e7a3b 100644
--- a/common/thrift/hive-1-api/TCLIService.thrift
+++ b/common/thrift/hive-1-api/TCLIService.thrift
@@ -636,6 +636,7 @@ enum TGetInfoType {
CLI_CATALOG_NAME = 10003,
CLI_COLLATION_SEQ = 10004,
CLI_MAX_IDENTIFIER_LEN = 10005,
+ CLI_ODBC_KEYWORDS = 10006,
}
union TGetInfoValue {
diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
index f2b0264bf..2cafe7c03 100644
--- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
+++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.security.ShellBasedUnixGroupsMapping;
import org.apache.hadoop.security.ShellBasedUnixGroupsNetgroupMapping;
import org.apache.impala.analysis.DescriptorTable;
import org.apache.impala.analysis.ToSqlUtils;
+import org.apache.impala.analysis.SqlScanner;
import org.apache.impala.authentication.saml.WrappedWebContext;
import org.apache.impala.authorization.AuthorizationFactory;
import org.apache.impala.authorization.ImpalaInternalAdminUser;
@@ -125,6 +126,7 @@ import java.util.Enumeration;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.HashSet;
import java.util.TimeZone;
/**
@@ -352,6 +354,34 @@ public class JniFrontend {
}
}
+ /**
+ * Returns a comma-separated list of Impala SQL keywords that are not part
of the
+ * provided ODBC-reserved keywords CSV.
+ */
+ public String getNonOdbcKeywords(byte[] odbcKeywordsCsvT) throws
ImpalaException {
+ final TStringLiteral odbcCsv = new TStringLiteral();
+ JniUtil.deserializeThrift(protocolFactory_, odbcCsv, odbcKeywordsCsvT);
+ String csv = odbcCsv.isSetValue()
+ ? StandardCharsets.UTF_8.decode(odbcCsv.value).toString()
+ : "";
+ Set<String> excludes = new HashSet<>();
+ if (csv != null && !csv.isEmpty()) {
+ for (String s : csv.split(",")) {
+ if (s != null) excludes.add(s.trim().toUpperCase());
+ }
+ }
+ StringBuilder sb = new StringBuilder();
+ for (String kw : SqlScanner.getKeywords()) {
+ String upper = kw.toUpperCase();
+ // Exclude symbolic tokens like &&, ||
+ if (upper.isEmpty() || !Character.isLetter(upper.charAt(0))) continue;
+ if (excludes.contains(upper)) continue;
+ if (sb.length() > 0) sb.append(",");
+ sb.append(upper);
+ }
+ return sb.toString();
+ }
+
/**
* Returns files info of a table or partition.
* The argument is a serialized TShowFilesParams object.
diff --git a/fe/src/main/jflex/sql-scanner.flex
b/fe/src/main/jflex/sql-scanner.flex
index a6ccc2892..df2943b91 100644
--- a/fe/src/main/jflex/sql-scanner.flex
+++ b/fe/src/main/jflex/sql-scanner.flex
@@ -27,6 +27,7 @@ import java.util.Set;
import java.util.Iterator;
import java.util.Arrays;
import java.util.HashSet;
+import java.util.Collections;
import com.google.common.base.Preconditions;
import org.apache.impala.analysis.SqlParserSymbols;
@@ -444,6 +445,11 @@ import org.apache.impala.thrift.TReservedWordsVersion;
return token != null && keywordMap.containsKey(token.toLowerCase());
}
+ // Returns an unmodifiable view of the current keyword names.
+ public static Set<String> getKeywords() {
+ return Collections.unmodifiableSet(keywordMap.keySet());
+ }
+
private Symbol newToken(int id, Object value) {
return new Symbol(id, yyline+1, yycolumn+1, value);
}
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index c5206346b..2ca89e39f 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -546,7 +546,154 @@ class TestHS2(HS2TestSuite):
self.session_handle), TCLIService.TGetInfoType.CLI_DBMS_NAME)
TestHS2.check_invalid_session(self.hs2_client.GetInfo(invalid_req))
- # TODO: it would be useful to add positive tests for GetInfo().
+ # Test basic info types that were already supported
+ get_info_req = TCLIService.TGetInfoReq()
+ get_info_req.sessionHandle = self.session_handle
+
+ # Test CLI_SERVER_NAME
+ get_info_req.infoType = TCLIService.TGetInfoType.CLI_SERVER_NAME
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.stringValue == "Impala"
+
+ # Test CLI_DBMS_NAME
+ get_info_req.infoType = TCLIService.TGetInfoType.CLI_DBMS_NAME
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.stringValue == "Impala"
+
+ # Test CLI_DBMS_VER
+ get_info_req.infoType = TCLIService.TGetInfoType.CLI_DBMS_VER
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert len(get_info_resp.infoValue.stringValue) > 0 # Should have version
string
+
+ # Test new length-based info types
+ length_tests = [
+ (TCLIService.TGetInfoType.CLI_MAX_COLUMN_NAME_LEN, 767),
+ (TCLIService.TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN, 128),
+ (TCLIService.TGetInfoType.CLI_MAX_TABLE_NAME_LEN, 128),
+ (TCLIService.TGetInfoType.CLI_MAX_CATALOG_NAME_LEN, 128),
+ (TCLIService.TGetInfoType.CLI_MAX_CURSOR_NAME_LEN, 128),
+ (TCLIService.TGetInfoType.CLI_MAX_USER_NAME_LEN, 128),
+ (TCLIService.TGetInfoType.CLI_MAX_IDENTIFIER_LEN, 128),
+ (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_SELECT, 0), # No limit
+ (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_TABLE, 0), # No limit
+ (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_GROUP_BY, 0), # No limit
+ (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_ORDER_BY, 0), # No limit
+ (TCLIService.TGetInfoType.CLI_MAX_TABLES_IN_SELECT, 0), # No limit
+ (TCLIService.TGetInfoType.CLI_MAX_ROW_SIZE, 524288), # 512 KB
+ ]
+
+ for info_type, expected_value in length_tests:
+ get_info_req.infoType = info_type
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.lenValue == expected_value
+
+ # Test CLI_MAX_STATEMENT_LEN separately since it depends on query options
+ get_info_req.infoType = TCLIService.TGetInfoType.CLI_MAX_STATEMENT_LEN
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ # Should return 0 (no limit) if max_statement_length_bytes is not set or
<= 0
+ assert get_info_resp.infoValue.lenValue >= 0
+
+ # Test small integer info types
+ small_int_tests = [
+ (TCLIService.TGetInfoType.CLI_IDENTIFIER_CASE, 2), # Case
insensitive, lowercase
+ (TCLIService.TGetInfoType.CLI_DATA_SOURCE_READ_ONLY, 0), # Not
read-only
+ (TCLIService.TGetInfoType.CLI_TXN_CAPABLE, 0), # No transaction
support
+ (TCLIService.TGetInfoType.CLI_NULL_COLLATION, 2), # NULLs sort high
+ (TCLIService.TGetInfoType.CLI_INTEGRITY, 0), # No integrity
constraints
+ (TCLIService.TGetInfoType.CLI_DESCRIBE_PARAMETER, 0), # No DESCRIBE
PARAMETER
+ (TCLIService.TGetInfoType.CLI_ACCESSIBLE_TABLES, 1), # Returns
accessible tables
+ (TCLIService.TGetInfoType.CLI_ACCESSIBLE_PROCEDURES, 0), # No stored
procedures
+ (TCLIService.TGetInfoType.CLI_CURSOR_COMMIT_BEHAVIOR, 1),
+ (TCLIService.TGetInfoType.CLI_DEFAULT_TXN_ISOLATION, 0),
+ ]
+
+ for info_type, expected_value in small_int_tests:
+ get_info_req.infoType = info_type
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.smallIntValue == expected_value
+
+ # Test string info types
+ string_tests = [
+ (TCLIService.TGetInfoType.CLI_IDENTIFIER_QUOTE_CHAR, "`"),
+ (TCLIService.TGetInfoType.CLI_SEARCH_PATTERN_ESCAPE, "\\"),
+ (TCLIService.TGetInfoType.CLI_SPECIAL_CHARACTERS, ""), # No special
chars
+ (TCLIService.TGetInfoType.CLI_XOPEN_CLI_YEAR, "1995"),
+ (TCLIService.TGetInfoType.CLI_DATA_SOURCE_NAME, "Impala"),
+ ]
+
+ for info_type, expected_value in string_tests:
+ get_info_req.infoType = info_type
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.stringValue == expected_value
+
+ # Test CLI_ORDER_BY_COLUMNS_IN_SELECT (string type: "Y"/"N")
+ get_info_req.infoType =
TCLIService.TGetInfoType.CLI_ORDER_BY_COLUMNS_IN_SELECT
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.stringValue == "N" # Not required in SELECT
+
+ # Test CLI_ODBC_KEYWORDS (should return a comma-separated list of
Impala-specific
+ # keywords excluding standard ODBC-reserved keywords)
+ get_info_req.infoType = TCLIService.TGetInfoType.CLI_ODBC_KEYWORDS
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert len(get_info_resp.infoValue.stringValue) > 0
+
+ keywords_csv = get_info_resp.infoValue.stringValue
+ tokens = set([k.strip() for k in keywords_csv.split(",") if k.strip()])
+ # Common ODBC keyword should be excluded (exact match)
+ assert "SELECT" not in tokens
+ # Impala keywords should be present
+ assert "SHOW" in tokens
+ assert "KUDU" in tokens
+ assert "ICEBERG" in tokens
+
+ # Test integer bitmask info types
+ bitmask_tests = [
+ (TCLIService.TGetInfoType.CLI_ALTER_TABLE, 3), # ADD and DROP COLUMN
+ (TCLIService.TGetInfoType.CLI_OJ_CAPABILITIES, 127),
+ (TCLIService.TGetInfoType.CLI_TXN_ISOLATION_OPTION, 0),
+ ]
+
+ for info_type, expected_value in bitmask_tests:
+ get_info_req.infoType = info_type
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert get_info_resp.infoValue.integerBitmask == expected_value
+
+ # Test CLI_USER_NAME (should return the current user)
+ get_info_req.infoType = TCLIService.TGetInfoType.CLI_USER_NAME
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp)
+ assert len(get_info_resp.infoValue.stringValue) > 0 # Should have username
+
+ # Test unsupported info types (moved to default case - should return error)
+ unsupported_info_types = [
+ TCLIService.TGetInfoType.CLI_MAX_DRIVER_CONNECTIONS,
+ TCLIService.TGetInfoType.CLI_MAX_CONCURRENT_ACTIVITIES,
+ TCLIService.TGetInfoType.CLI_SCROLL_CONCURRENCY,
+ TCLIService.TGetInfoType.CLI_GETDATA_EXTENSIONS,
+ TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_INDEX,
+ TCLIService.TGetInfoType.CLI_MAX_INDEX_SIZE,
+ TCLIService.TGetInfoType.CLI_CURSOR_SENSITIVITY,
+ TCLIService.TGetInfoType.CLI_FETCH_DIRECTION,
+ TCLIService.TGetInfoType.CLI_CATALOG_NAME,
+ TCLIService.TGetInfoType.CLI_COLLATION_SEQ,
+ 99999, # Completely invalid info type
+ ]
+
+ for info_type in unsupported_info_types:
+ get_info_req.infoType = info_type
+ get_info_resp = self.hs2_client.GetInfo(get_info_req)
+ TestHS2.check_response(get_info_resp,
TCLIService.TStatusCode.ERROR_STATUS)
+ assert "Unsupported operation" in get_info_resp.status.errorMessage
@needs_session()
def test_get_schemas(self):