This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4c549d79f2648c5ed906deef42c8d05bb747d8b0 Author: Pranav Lodha <[email protected]> AuthorDate: Mon Jun 24 15:24:11 2024 -0700 IMPALA-12992: Support for Hive JDBC Storage handler tables This is an enhancement request to support JDBC tables created by Hive JDBC Storage handler. This is essentially done by making JDBC table properties compatible with Impala. It is done by translating when loading the table, and maintaining that only in the Impala cluster, i.e. it's not written back to HMS. Impala includes JDBC drivers for PostgreSQL and MySQL making 'driver.url' not mandatory in such cases. The Impala JDBC driver is still required for Impala-to-Impala JDBC connections. Additionally, Hive allows adding database driver JARs at runtime via Beeline, enabling users to dynamically include JDBC driver JARs. However, Impala does not support adding database driver JARs at runtime, making the driver.url field still useful in cases where additional drivers are needed. 'hive.sql.query' property is not handled in this patch. It'll be covered in a separate jira. Testing: End-to-end tests are included in test_ext_data_sources.py. Change-Id: I1674b93a02f43df8c1a449cdc54053cc80d9c458 Reviewed-on: http://gerrit.cloudera.org:8080/22134 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/impala-config.sh | 2 + fe/pom.xml | 6 + .../org/apache/impala/catalog/TableLoader.java | 67 +++++- .../extdatasource/jdbc/conf/JdbcStorageConfig.java | 2 +- .../jdbc/dao/DataSourceObjectCache.java | 39 ++-- .../jdbc/dao/GenericJdbcDatabaseAccessor.java | 6 +- .../org/apache/impala/analysis/AnalyzeDDLTest.java | 6 +- testdata/bin/load-ext-data-sources.sh | 33 ++- testdata/bin/setup-mysql-env.sh | 32 +++ .../queries/QueryTest/hive-jdbc-mysql-tables.test | 234 +++++++++++++++++++++ .../QueryTest/hive-jdbc-postgres-tables.test | 234 +++++++++++++++++++++ tests/custom_cluster/test_ext_data_sources.py | 167 ++++++++++++++- 12 files changed, 805 insertions(+), 23 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 8853f2d46..b4784c4ae 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -177,6 +177,8 @@ export IMPALA_PROTOBUF_CLANG_VERSION=3.14.0-clangcompat-p2 unset IMPALA_PROTOBUF_CLANG_URL export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.6 unset IMPALA_POSTGRES_JDBC_DRIVER_URL +export IMPALA_MYSQL_JDBC_DRIVER_VERSION=8.2.0 +unset IMPALA_MYSQL_JDBC_DRIVER_URL export IMPALA_PYTHON_VERSION=2.7.16 unset IMPALA_PYTHON_URL export IMPALA_PYTHON3_VERSION=3.8.18 diff --git a/fe/pom.xml b/fe/pom.xml index 9b0f5ebb7..39970eb89 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -310,6 +310,12 @@ under the License. <version>${postgres.jdbc.version}</version> </dependency> + <dependency> + <groupId>com.mysql</groupId> + <artifactId>mysql-connector-j</artifactId> + <version>8.2.0</version> + </dependency> + <dependency> <groupId>org.antlr</groupId> <artifactId>antlr-runtime</artifactId> diff --git a/fe/src/main/java/org/apache/impala/catalog/TableLoader.java b/fe/src/main/java/org/apache/impala/catalog/TableLoader.java index a30313f15..f625ca100 100644 --- a/fe/src/main/java/org/apache/impala/catalog/TableLoader.java +++ b/fe/src/main/java/org/apache/impala/catalog/TableLoader.java @@ -20,6 +20,8 @@ package org.apache.impala.catalog; import com.google.common.base.Preconditions; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; @@ -117,6 +119,14 @@ public class TableLoader { "Unsupported table type '%s' for: %s", tableType, fullTblName)); } + // Support for Hive JDBC storage handler + String val = msTbl.getParameters().get("storage_handler"); + if (val != null && val.equals("org.apache.hive.storage.jdbc.JdbcStorageHandler")) { + Map<String, String> impalaTblProps = setHiveJdbcProperties(msTbl); + msTbl.unsetParameters(); + msTbl.setParameters(impalaTblProps); + } + // Create a table of appropriate type and have it load itself table = Table.fromMetastoreTable(db, msTbl); if (table == null) { @@ -179,6 +189,61 @@ public class TableLoader { return table; } + private Map<String, String> setHiveJdbcProperties( + org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException { + Map<String, String> impala_tbl_props = new HashMap<>(); + // Insert Impala specific JDBC storage handler properties + impala_tbl_props.put("__IMPALA_DATA_SOURCE_NAME", "impalajdbcdatasource"); + String val = msTbl.getParameters().get("hive.sql.database.type"); + if (val == null) { + throw new TableLoadingException("Required parameter: hive.sql.database.type" + + "is missing."); + } else { + impala_tbl_props.put("database.type", val); + } + val = msTbl.getParameters().get("hive.sql.dbcp.password"); + if (val != null) { + impala_tbl_props.put("dbcp.password", val); + } + val = msTbl.getParameters().get("hive.sql.dbcp.password.keystore"); + if (val != null) { + impala_tbl_props.put("dbcp.password.keystore", val); + } + val = msTbl.getParameters().get("hive.sql.dbcp.password.key"); + if (val != null) { + impala_tbl_props.put("dbcp.password.key", val); + } + val = msTbl.getParameters().get("hive.sql.jdbc.url"); + if (val == null) { + throw new TableLoadingException("Required parameter: hive.sql.jdbc.url" + + "is missing."); + } else { + impala_tbl_props.put("jdbc.url", val); + } + val = msTbl.getParameters().get("hive.sql.dbcp.username"); + if (val == null) { + throw new TableLoadingException("Required parameter: hive.sql.dbcp.username" + + "is missing."); + } else { + impala_tbl_props.put("dbcp.username", val); + } + val = msTbl.getParameters().get("hive.sql.table"); + if (val == null) { + throw new TableLoadingException("Required parameter: hive.sql.table" + + "is missing."); + } else { + impala_tbl_props.put("table", val); + } + val = msTbl.getParameters().get("hive.sql.jdbc.driver"); + if (val == null) { + throw new TableLoadingException("Required parameter: hive.sql.jdbc.driver" + + "is missing."); + } else { + impala_tbl_props.put("jdbc.driver", val); + } + return impala_tbl_props; + } + private void initMetrics(Metrics metrics) { metrics.addTimer( MetastoreEventsProcessor.EVENTS_FETCH_DURATION_METRIC); @@ -205,4 +270,4 @@ public class TableLoader { metrics.addCounter( MetastoreEventsProcessor.NUMBER_OF_PARTITIONS_REMOVED); } -} +} \ No newline at end of file diff --git a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java index 2611a591b..36d00a5bc 100644 --- a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java +++ b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java @@ -32,7 +32,7 @@ public enum JdbcStorageConfig { JDBC_DRIVER_CLASS("jdbc.driver", true), // Driver URL for downloading the Jar file package that is used to access the external // database. - JDBC_DRIVER_URL("driver.url", true), + JDBC_DRIVER_URL("driver.url", false), // Username for accessing the external database. DBCP_USERNAME("dbcp.username", false), // Password of the user. diff --git a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java index 5937a0757..c64ee9e34 100644 --- a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java +++ b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java @@ -18,6 +18,8 @@ package org.apache.impala.extdatasource.jdbc.dao; import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; import java.sql.SQLException; @@ -25,7 +27,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Properties; - import javax.sql.DataSource; import org.apache.commons.dbcp2.BasicDataSource; @@ -106,21 +107,35 @@ public class DataSourceObjectCache { String driverUrl = props.getProperty("driverUrl"); try { BasicDataSource dbcpDs = BasicDataSourceFactory.createDataSource(props); - // Copy jdbc driver to local file system. - String driverLocalPath = FileSystemUtil.copyFileFromUriToLocal(driverUrl); - // Create class loader for jdbc driver and set it for the - // BasicDataSource object so that the driver class could be loaded - // from jar file without searching classpath. - URL driverJarUrl = new File(driverLocalPath).toURI().toURL(); - URLClassLoader driverLoader = URLClassLoader.newInstance( - new URL[] { driverJarUrl }, getClass().getClassLoader()); - dbcpDs.setDriverClassLoader(driverLoader); + String driverLocalPath = null; + if (!Strings.isNullOrEmpty(driverUrl)) { + // Copy jdbc driver to local file system. + driverLocalPath = FileSystemUtil.copyFileFromUriToLocal(driverUrl); + // Create class loader for jdbc driver and set it for the + // BasicDataSource object so that the driver class could be loaded + // from jar file without searching classpath. + URL driverJarUrl = new File(driverLocalPath).toURI().toURL(); + URLClassLoader driverLoader = URLClassLoader.newInstance( + new URL[] { driverJarUrl }, getClass().getClassLoader()); + dbcpDs.setDriverClassLoader(driverLoader); + } + // Cache the datasource (no need to store driver path since it's in classpath) entry = new Entry(dbcpDs, driverLocalPath); cacheMap_.put(cacheKey, entry); return dbcpDs; + } catch (MalformedURLException e) { + throw new JdbcDatabaseAccessException(String.format( + "Invalid JDBC driver URL: '%s'.", driverUrl), e); + } catch (IOException e) { + throw new JdbcDatabaseAccessException(String.format( + "Failed to copy JDBC driver from '%s' to local filesystem.", driverUrl), e); + } catch (SQLException e) { + throw new JdbcDatabaseAccessException(String.format( + "Unable to fetch jdbc driver jar from location '%s'. ", driverUrl), e); } catch (Exception e) { - throw new JdbcDatabaseAccessException(String.format( - "Unable to fetch jdbc driver jar from location '%s'. ", driverUrl)); + // createDataSource() in commons-dbcp 2.9 throws Exception. + throw new JdbcDatabaseAccessException( + "Error while creating datasource.", e); } } } diff --git a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java index 75f26ad92..ddcf972a8 100644 --- a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java +++ b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java @@ -319,8 +319,10 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor { dbProperties.put("url", jdbcUrl); dbProperties.put("driverClassName", conf.get(JdbcStorageConfig.JDBC_DRIVER_CLASS.getPropertyName())); - dbProperties.put("driverUrl", - conf.get(JdbcStorageConfig.JDBC_DRIVER_URL.getPropertyName())); + if (conf.get(JdbcStorageConfig.JDBC_DRIVER_URL.getPropertyName()) != null) { + dbProperties.put("driverUrl", + conf.get(JdbcStorageConfig.JDBC_DRIVER_URL.getPropertyName())); + } dbProperties.put("type", "javax.sql.DataSource"); return dbProperties; } diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 6c96dfdd8..2ca9159ca 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -851,10 +851,8 @@ public class AnalyzeDDLTest extends FrontendTestBase { "tblproperties ('__IMPALA_DATA_SOURCE_NAME')", "Unsetting the '__IMPALA_DATA_SOURCE_NAME' table property is not supported " + "for DataSource table."); - AnalysisError("alter table functional.alltypes_jdbc_datasource unset " + - "tblproperties ('driver.url')", - "Unsetting the 'driver.url' table property is not supported for JDBC " + - "DataSource table."); + AnalyzesOk("alter table functional.alltypes_jdbc_datasource unset " + + "tblproperties ('driver.url')"); AnalyzesOk("alter table functional.alltypes set tblproperties('sort.columns'='id')"); AnalyzesOk("alter table functional.alltypes set tblproperties(" + diff --git a/testdata/bin/load-ext-data-sources.sh b/testdata/bin/load-ext-data-sources.sh index 3855fd9d3..82bfc6abe 100755 --- a/testdata/bin/load-ext-data-sources.sh +++ b/testdata/bin/load-ext-data-sources.sh @@ -108,6 +108,36 @@ INSERT INTO test_strategy (strategy_id, name, referrer, landing, priority, __EOT__ sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_test_strategy.sql +# Create country table +cat > /tmp/jdbc_country.sql << __EOT__ +DROP TABLE IF EXISTS country; +CREATE TABLE country +( + id int, + name varchar(20), + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE PRECISION, + date_col DATE, + string_col VARCHAR(10), + timestamp_col TIMESTAMP +); +INSERT INTO country (id, name, bool_col, tinyint_col, smallint_col, int_col, +bigint_col, float_col, double_col, date_col, string_col, timestamp_col) +VALUES +(1, 'India', TRUE, 10, 100, 1000, 10000, 1.1, 1.11, '2024-01-01', + 'IN', '2024-01-01 10:00:00'), +(2, 'Russia', FALSE, 20, 200, 2000, 20000, 2.2, 2.22, '2024-02-01', + 'RU', '2024-02-01 11:00:00'), +(3, 'USA', TRUE, 30, 300, 3000, 30000, 3.3, 3.33, '2024-03-01', + 'US', '2024-03-01 12:00:00'); +__EOT__ +sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_country.sql + # Load data to jdbc table cat ${IMPALA_HOME}/testdata/target/AllTypes/* > /tmp/jdbc_alltypes.csv loadCmd="\COPY alltypes FROM '/tmp/jdbc_alltypes.csv' DELIMITER ',' CSV" @@ -154,4 +184,5 @@ rm /tmp/jdbc_alltypes.* rm /tmp/jdbc_alltypes_with_quote.* rm /tmp/jdbc_decimal_tbl.* rm /tmp/jdbc_test_strategy.* -rm /tmp/impala_jdbc_alltypes.sql \ No newline at end of file +rm /tmp/jdbc_country.* +rm /tmp/impala_jdbc_alltypes.sql diff --git a/testdata/bin/setup-mysql-env.sh b/testdata/bin/setup-mysql-env.sh index e1bd612ff..4b5f666c3 100755 --- a/testdata/bin/setup-mysql-env.sh +++ b/testdata/bin/setup-mysql-env.sh @@ -141,6 +141,38 @@ __EOT__ docker exec -i mysql mysql -uroot -psecret functional < \ /tmp/mysql_jdbc_decimal_tbl.sql +# Create country table +cat > /tmp/jdbc_country.sql << __EOT__ +DROP TABLE IF EXISTS country; +CREATE TABLE country +( + id int, + name varchar(20), + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col VARCHAR(10), + timestamp_col TIMESTAMP +); +INSERT INTO country (id, name, bool_col, tinyint_col, smallint_col, int_col, +bigint_col, float_col, double_col, date_col, string_col, timestamp_col) +VALUES +(1, 'India', TRUE, 10, 100, 1000, 10000, 1.1, 1.11, '2024-01-01', + 'IN', '2024-01-01 10:00:00'), +(2, 'Russia', FALSE, 20, 200, 2000, 20000, 2.2, 2.22, '2024-02-01', + 'RU', '2024-02-01 11:00:00'), +(3, 'USA', TRUE, 30, 300, 3000, 30000, 3.3, 3.33, '2024-03-01', + 'US', '2024-03-01 12:00:00'); +__EOT__ + +docker exec -i mysql mysql -uroot -psecret functional < \ + /tmp/jdbc_country.sql + # Load data to jdbc table cat ${IMPALA_HOME}/testdata/target/AllTypes/* > /tmp/mysql_jdbc_alltypes.csv docker cp /tmp/mysql_jdbc_alltypes.csv mysql:/tmp diff --git a/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-mysql-tables.test b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-mysql-tables.test new file mode 100644 index 000000000..7dc5a5dae --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-mysql-tables.test @@ -0,0 +1,234 @@ +==== +---- QUERY +select * from country_mysql; +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select * from country_keystore_mysql; +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select id, name, date_col from country_mysql; +---- RESULTS +1,'India',2024-01-01 +2,'Russia',2024-02-01 +3,'USA',2024-03-01 +---- TYPES +INT, STRING, DATE +==== +---- QUERY +select id, name, date_col from country_keystore_mysql; +---- RESULTS +1,'India',2024-01-01 +2,'Russia',2024-02-01 +3,'USA',2024-03-01 +---- TYPES +INT, STRING, DATE +==== +---- QUERY +select count(*) from country_mysql; +---- RESULTS +3 +---- TYPES +BIGINT +==== +---- QUERY +select count(*) from country_keystore_mysql; +---- RESULTS +3 +---- TYPES +BIGINT +==== +---- QUERY +select distinct name from country_mysql; +---- RESULTS +'India' +'Russia' +'USA' +---- TYPES +STRING +==== +---- QUERY +select distinct name from country_keystore_mysql; +---- RESULTS +'India' +'Russia' +'USA' +---- TYPES +STRING +==== +---- QUERY +# Check for null values +select * from country_mysql where string_col IS NULL; +---- RESULTS +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select * from country_keystore_mysql where string_col IS NULL; +---- RESULTS +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +# Order by integer column +select id, name, int_col from country_mysql order by int_col DESC; +---- RESULTS +3,'USA',3000 +2,'Russia',2000 +1,'India',1000 +---- TYPES +INT, STRING, INT +==== +---- QUERY +select id, name, int_col from country_keystore_mysql order by int_col DESC; +---- RESULTS +3,'USA',3000 +2,'Russia',2000 +1,'India',1000 +---- TYPES +INT, STRING, INT +==== +---- QUERY +# Select using case statement +select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_mysql; +---- RESULTS +1,'India','Active' +2,'Russia','Inactive' +3,'USA','Active' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_keystore_mysql; +---- RESULTS +1,'India','Active' +2,'Russia','Inactive' +3,'USA','Active' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Check for minimum date +select min(date_col) from country_mysql; +---- RESULTS +2024-01-01 +---- TYPES +DATE +==== +---- QUERY +select min(date_col) from country_keystore_mysql; +---- RESULTS +2024-01-01 +---- TYPES +DATE +==== +---- QUERY +# Join with a self table alias +select a.id, a.name, b.name from country_mysql a join country_mysql b on a.id <> b.id; +---- RESULTS +1,'India','Russia' +1,'India','USA' +2,'Russia','India' +2,'Russia','USA' +3,'USA','India' +3,'USA','Russia' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.name from country_keystore_mysql a join country_keystore_mysql b on a.id <> b.id; +---- RESULTS +1,'India','Russia' +1,'India','USA' +2,'Russia','India' +2,'Russia','USA' +3,'USA','India' +3,'USA','Russia' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Left join with another table +select a.id, a.name, b.string_col from country_mysql a left join country_keystore_mysql b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.string_col from country_keystore_mysql a left join country_mysql b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Right join with another table +select a.id, a.name, b.string_col from country_mysql a right join country_keystore_mysql b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.string_col from country_keystore_mysql a right join country_mysql b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Full outer join +select a.id, a.name, b.string_col from country_mysql a full outer join country_keystore_mysql b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.string_col from country_keystore_mysql a full outer join country_mysql b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Using IN clause +select * from country_mysql where name IN ('India', 'USA'); +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select * from country_keystore_mysql where name IN ('India', 'USA'); +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-postgres-tables.test b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-postgres-tables.test new file mode 100644 index 000000000..544a73b4c --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-postgres-tables.test @@ -0,0 +1,234 @@ +==== +---- QUERY +select * from country_postgres; +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select * from country_keystore_postgres; +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select id, name, date_col from country_postgres; +---- RESULTS +1,'India',2024-01-01 +2,'Russia',2024-02-01 +3,'USA',2024-03-01 +---- TYPES +INT, STRING, DATE +==== +---- QUERY +select id, name, date_col from country_keystore_postgres; +---- RESULTS +1,'India',2024-01-01 +2,'Russia',2024-02-01 +3,'USA',2024-03-01 +---- TYPES +INT, STRING, DATE +==== +---- QUERY +select count(*) from country_postgres; +---- RESULTS +3 +---- TYPES +BIGINT +==== +---- QUERY +select count(*) from country_keystore_postgres; +---- RESULTS +3 +---- TYPES +BIGINT +==== +---- QUERY +select distinct name from country_postgres; +---- RESULTS +'India' +'Russia' +'USA' +---- TYPES +STRING +==== +---- QUERY +select distinct name from country_keystore_postgres; +---- RESULTS +'India' +'Russia' +'USA' +---- TYPES +STRING +==== +---- QUERY +# Check for null values +select * from country_postgres where string_col IS NULL; +---- RESULTS +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select * from country_keystore_postgres where string_col IS NULL; +---- RESULTS +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +# Order by integer column +select id, name, int_col from country_postgres order by int_col DESC; +---- RESULTS +3,'USA',3000 +2,'Russia',2000 +1,'India',1000 +---- TYPES +INT, STRING, INT +==== +---- QUERY +select id, name, int_col from country_keystore_postgres order by int_col DESC; +---- RESULTS +3,'USA',3000 +2,'Russia',2000 +1,'India',1000 +---- TYPES +INT, STRING, INT +==== +---- QUERY +# Select using case statement +select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_postgres; +---- RESULTS +1,'India','Active' +2,'Russia','Inactive' +3,'USA','Active' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_keystore_postgres; +---- RESULTS +1,'India','Active' +2,'Russia','Inactive' +3,'USA','Active' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Check for minimum date +select min(date_col) from country_postgres; +---- RESULTS +2024-01-01 +---- TYPES +DATE +==== +---- QUERY +select min(date_col) from country_keystore_postgres; +---- RESULTS +2024-01-01 +---- TYPES +DATE +==== +---- QUERY +# Join with a self table alias +select a.id, a.name, b.name from country_postgres a join country_postgres b on a.id <> b.id; +---- RESULTS +1,'India','Russia' +1,'India','USA' +2,'Russia','India' +2,'Russia','USA' +3,'USA','India' +3,'USA','Russia' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.name from country_keystore_postgres a join country_keystore_postgres b on a.id <> b.id; +---- RESULTS +1,'India','Russia' +1,'India','USA' +2,'Russia','India' +2,'Russia','USA' +3,'USA','India' +3,'USA','Russia' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Left join with another table +select a.id, a.name, b.string_col from country_postgres a left join country_keystore_postgres b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.string_col from country_keystore_postgres a left join country_postgres b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Right join with another table +select a.id, a.name, b.string_col from country_postgres a right join country_keystore_postgres b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.string_col from country_keystore_postgres a right join country_postgres b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Full outer join +select a.id, a.name, b.string_col from country_postgres a full outer join country_keystore_postgres b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +select a.id, a.name, b.string_col from country_keystore_postgres a full outer join country_postgres b on a.id = b.id; +---- RESULTS +1,'India','IN' +2,'Russia','RU' +3,'USA','US' +---- TYPES +INT, STRING, STRING +==== +---- QUERY +# Using IN clause +select * from country_postgres where name IN ('India', 'USA'); +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== +---- QUERY +select * from country_keystore_postgres where name IN ('India', 'USA'); +---- RESULTS +1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00 +3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00 +---- TYPES +INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP +==== \ No newline at end of file diff --git a/tests/custom_cluster/test_ext_data_sources.py b/tests/custom_cluster/test_ext_data_sources.py index b3dbde78d..53a42886a 100644 --- a/tests/custom_cluster/test_ext_data_sources.py +++ b/tests/custom_cluster/test_ext_data_sources.py @@ -176,8 +176,95 @@ class TestExtDataSources(CustomClusterTestSuite): assert "FailoverInSyncJdbcDataSource" not in result.get_data() +class TestHivePostgresJdbcTables(CustomClusterTestSuite): + """Tests for hive jdbc postgres tables. """ + + @classmethod + def get_workload(cls): + return 'functional-query' + + @classmethod + def setup_class(cls): + super(TestHivePostgresJdbcTables, cls).setup_class() + + @pytest.mark.execute_serially + def test_postgres_hive_jdbc_tables(self, vector, unique_database): + """Run tests for external hive jdbc tables.""" + hive_sql = """ + DROP TABLE IF EXISTS {0}.country_postgres; + CREATE EXTERNAL TABLE {0}.country_postgres + ( + id INT, + name STRING, + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col STRING, + timestamp_col TIMESTAMP + ) + STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' + TBLPROPERTIES ( + "hive.sql.database.type" = "POSTGRES", + "hive.sql.jdbc.driver" = "org.postgresql.Driver", + "hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional", + "hive.sql.dbcp.username" = "hiveuser", + "hive.sql.dbcp.password" = "password", + "hive.sql.table" = "country" + ); + + DROP TABLE IF EXISTS {0}.country_keystore_postgres; + CREATE EXTERNAL TABLE {0}.country_keystore_postgres + ( + id INT, + name STRING, + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col STRING, + timestamp_col TIMESTAMP + ) + STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' + TBLPROPERTIES ( + "hive.sql.database.type" = "POSTGRES", + "hive.sql.jdbc.driver" = "org.postgresql.Driver", + "hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional", + "hive.sql.dbcp.username" = "hiveuser", + "hive.sql.dbcp.password.keystore" = + "jceks://hdfs/test-warehouse/data-sources/test.jceks", + "hive.sql.dbcp.password.key" = "hiveuser", + "hive.sql.table" = "country" + ); + """.format(unique_database) + try: + self.run_stmt_in_hive(hive_sql) + except Exception: + pytest.xfail(reason="Can't create hive jdbc table.") + self.client.execute("INVALIDATE METADATA {0}.country_postgres". + format(unique_database)) + self.client.execute("INVALIDATE METADATA {0}.country_keystore_postgres". + format(unique_database)) + # Describing postgres hive jdbc table in Impala. + self.client.execute("DESCRIBE {0}.country_postgres".format(unique_database)) + self.client.execute("DESCRIBE {0}.country_keystore_postgres".format(unique_database)) + + # Select statements are verified in hive-jdbc-postgres-tables.test. + self.run_test_case('QueryTest/hive-jdbc-postgres-tables', vector, + use_db=unique_database) + + class TestMySqlExtJdbcTables(CustomClusterTestSuite): - """Impala query tests for external jdbc tables on MySQL server.""" + """Impala query tests for external jdbc tables on MySQL server. + It also includes tests for external hive jdbc tables on mysql.""" @classmethod def get_workload(cls): @@ -193,7 +280,7 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite): subprocess.check_call(run_cmd, close_fds=True) except subprocess.CalledProcessError as e: if e.returncode == 10: - pytest.skip("These tests requireadd the docker to be added to sudoer's group") + pytest.skip("These tests required the docker to be added to sudoer's group") elif e.returncode == 20: pytest.skip("Can't connect to local MySQL server") elif e.returncode == 30: @@ -227,6 +314,82 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite): """Run tests for external jdbc tables on MySQL""" self.run_test_case('QueryTest/mysql-ext-jdbc-tables', vector, use_db=unique_database) + @pytest.mark.execute_serially + def test_mysql_hive_jdbc_tables(self, vector, unique_database): + """ Run tests for external hive jdbc tables on mysql""" + hive_sql = """ + ADD JAR hdfs:///test-warehouse/data-sources/jdbc-drivers/mysql-jdbc.jar; + + DROP TABLE IF EXISTS {0}.country_mysql; + CREATE EXTERNAL TABLE {0}.country_mysql + ( + id INT, + name STRING, + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col STRING, + timestamp_col TIMESTAMP + ) + STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' + TBLPROPERTIES ( + "hive.sql.database.type" = "MYSQL", + "hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver", + "hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional", + "hive.sql.dbcp.username" = "hiveuser", + "hive.sql.dbcp.password" = "password", + "hive.sql.table" = "country" + ); + + DROP TABLE IF EXISTS {0}.country_keystore_mysql; + CREATE EXTERNAL TABLE {0}.country_keystore_mysql + ( + id INT, + name STRING, + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col STRING, + timestamp_col TIMESTAMP + ) + STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' + TBLPROPERTIES ( + "hive.sql.database.type" = "MYSQL", + "hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver", + "hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional", + "hive.sql.dbcp.username" = "hiveuser", + "hive.sql.dbcp.password.keystore" = + "jceks://hdfs/test-warehouse/data-sources/test.jceks", + "hive.sql.dbcp.password.key" = "hiveuser", + "hive.sql.table" = "country" + ); + """.format(unique_database) + try: + self.run_stmt_in_hive(hive_sql) + except Exception: + pytest.xfail(reason="Can't create hive jdbc table.") + self.client.execute("INVALIDATE METADATA {0}.country_mysql" + .format(unique_database)) + self.client.execute("INVALIDATE METADATA {0}.country_keystore_mysql" + .format(unique_database)) + # Describing mysql hive jdbc table in Impala. + self.client.execute("DESCRIBE {0}.country_mysql".format(unique_database)) + self.client.execute("DESCRIBE {0}.country_keystore_mysql".format(unique_database)) + + # Select statements are verified in hive-jdbc-mysql-tables.test. + self.run_test_case('QueryTest/hive-jdbc-mysql-tables', vector, + use_db=unique_database) + class TestImpalaExtJdbcTables(CustomClusterTestSuite): """Impala query tests for external jdbc tables in Impala cluster."""
