This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit f7a43b18aaa0585bcd7b4e608a3da887c0844c38 Author: gaurav1086 <[email protected]> AuthorDate: Wed Jan 10 11:15:21 2024 -0800 IMPALA-12503: Support date data type for predicates for external data source table This patch adds support for datatype date as predicates for external data sources. Testing: - Added tests for date predicates with operators: '=', '>', '<', '>=', '<=', '!=', 'BETWEEN'. Change-Id: Ibf13cbefaad812a0f78755c5791d82b24a3395e4 Reviewed-on: http://gerrit.cloudera.org:8080/20915 Reviewed-by: Wenzhe Zhou <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/planner/DataSourceScanNode.java | 5 +- .../org/apache/impala/service/FrontendTest.java | 4 +- .../impala/extdatasource/jdbc/JdbcDataSource.java | 2 +- .../extdatasource/jdbc/dao/DatabaseAccessor.java | 2 + .../jdbc/dao/GenericJdbcDatabaseAccessor.java | 17 ++ .../jdbc/util/QueryConditionUtil.java | 10 +- testdata/bin/create-ext-data-source-table.sql | 4 +- testdata/bin/load-ext-data-sources.sh | 34 +++- testdata/bin/setup-mysql-env.sh | 14 +- .../impala-ext-jdbc-tables-predicates.test | 182 +++++++++++++++++++++ .../queries/QueryTest/jdbc-data-source.test | 168 ++++++++++++++++--- .../queries/QueryTest/mysql-ext-jdbc-tables.test | 168 ++++++++++++++++--- tests/custom_cluster/test_ext_data_sources.py | 6 + 13 files changed, 556 insertions(+), 60 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/DataSourceScanNode.java b/fe/src/main/java/org/apache/impala/planner/DataSourceScanNode.java index 4c45b0187..408789cb7 100644 --- a/fe/src/main/java/org/apache/impala/planner/DataSourceScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/DataSourceScanNode.java @@ -29,6 +29,7 @@ import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.BinaryPredicate; import org.apache.impala.analysis.BoolLiteral; import org.apache.impala.analysis.CompoundPredicate; +import org.apache.impala.analysis.DateLiteral; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.NumericLiteral; @@ -135,8 +136,10 @@ public class DataSourceScanNode extends ScanNode { case STRING: return new TColumnValue().setString_val( ((StringLiteral) expr).getUnescapedValue()); - case DECIMAL: case DATE: + return new TColumnValue().setDate_val( + (int) ((DateLiteral) expr).getValue()); + case DECIMAL: case DATETIME: case TIMESTAMP: // TODO: we support DECIMAL, TIMESTAMP and DATE but no way to specify it in SQL. diff --git a/fe/src/test/java/org/apache/impala/service/FrontendTest.java b/fe/src/test/java/org/apache/impala/service/FrontendTest.java index a62ba3ad1..9bfcce0b2 100644 --- a/fe/src/test/java/org/apache/impala/service/FrontendTest.java +++ b/fe/src/test/java/org/apache/impala/service/FrontendTest.java @@ -143,7 +143,7 @@ public class FrontendTest extends FrontendTestBase { // HiveServer2 GetTables has 5 columns. assertEquals(5, resp.schema.columns.size()); assertEquals(5, resp.rows.get(0).colVals.size()); - assertEquals(5, resp.rows.size()); + assertEquals(6, resp.rows.size()); assertEquals("alltypes_datasource", resp.rows.get(0).colVals.get(2).string_val.toLowerCase()); assertEquals("alltypes_date_partition", @@ -154,6 +154,8 @@ public class FrontendTest extends FrontendTestBase { resp.rows.get(3).colVals.get(2).string_val.toLowerCase()); assertEquals("alltypes_jdbc_datasource_2", resp.rows.get(4).colVals.get(2).string_val.toLowerCase()); + assertEquals("alltypes_with_date", + resp.rows.get(5).colVals.get(2).string_val.toLowerCase()); } @Test diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/JdbcDataSource.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/JdbcDataSource.java index 10df71dd5..303f8dc6c 100644 --- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/JdbcDataSource.java +++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/JdbcDataSource.java @@ -291,7 +291,7 @@ public class JdbcDataSource implements ExternalDataSource { } sb.append(jdbcTableName); String condition = QueryConditionUtil - .buildCondition(params.getPredicates(), columnMapping); + .buildCondition(params.getPredicates(), columnMapping, dbAccessor_); if (StringUtils.isNotBlank(condition)) { sb.append(" WHERE ").append(condition); } diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DatabaseAccessor.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DatabaseAccessor.java index 6eab838fc..941d2745d 100644 --- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DatabaseAccessor.java +++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DatabaseAccessor.java @@ -32,4 +32,6 @@ public interface DatabaseAccessor { void close(boolean cleanCache); String getCaseSensitiveName(String name); + + String getDateString(int dateVal); } diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java index b05ef01aa..407135157 100644 --- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java +++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java @@ -25,9 +25,12 @@ import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.text.SimpleDateFormat; +import java.util.Date; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; +import java.util.TimeZone; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; @@ -69,6 +72,7 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor { protected static final int CACHE_EXPIRE_TIMEOUT_S = 1800; protected static final int CACHE_SIZE = 100; protected String jdbcDriverLocalPath = null; + protected static final long MILLI_SECONDS_PER_DAY = 86400000; protected DataSource dbcpDataSource = null; // Cache datasource for sharing @@ -206,6 +210,19 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor { return sb.toString(); } + /** + * This function converts the date represented in epoch days to + * a string format of "yyyy-MM-dd" + */ + @Override + public String getDateString(int dateVal) { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + String dateToString = formatter.format(new Date(((long)dateVal) * + MILLI_SECONDS_PER_DAY)); + return dateToString; + } + /** * Uses generic JDBC escape functions to add a limit and offset clause to a query * string diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/util/QueryConditionUtil.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/util/QueryConditionUtil.java index 4f7d58156..5f1695ffc 100644 --- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/util/QueryConditionUtil.java +++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/util/QueryConditionUtil.java @@ -23,6 +23,7 @@ import java.util.StringJoiner; import org.apache.impala.analysis.BinaryPredicate; import org.apache.impala.analysis.BinaryPredicate.Operator; +import org.apache.impala.extdatasource.jdbc.dao.DatabaseAccessor; import org.apache.impala.extdatasource.thrift.TBinaryPredicate; import org.apache.impala.extdatasource.thrift.TComparisonOp; import org.apache.impala.thrift.TColumnValue; @@ -42,7 +43,7 @@ public class QueryConditionUtil { private final static Logger LOG = LoggerFactory.getLogger(QueryConditionUtil.class); public static String buildCondition(List<List<TBinaryPredicate>> predicates, - Map<String, String> columnMapping) { + Map<String, String> columnMapping, DatabaseAccessor dbAccessor_) { List<String> condition = Lists.newArrayList(); for (List<TBinaryPredicate> tBinaryPredicates : predicates) { StringJoiner joiner = new StringJoiner(" OR ", "(", ")"); @@ -50,7 +51,7 @@ public class QueryConditionUtil { String name = predicate.getCol().getName(); name = columnMapping.getOrDefault(name, name); String op = converse(predicate.getOp()); - String value = getTColumnValueAsString(predicate.getValue()); + String value = getTColumnValueAsString(predicate.getValue(), dbAccessor_); joiner.add(String.format("%s %s %s", name, op, value)); } condition.add(joiner.toString()); @@ -64,7 +65,8 @@ public class QueryConditionUtil { * * @see org.apache.impala.planner.DataSourceScanNode#literalToColumnValue */ - public static String getTColumnValueAsString(TColumnValue value) { + public static String getTColumnValueAsString(TColumnValue value, + DatabaseAccessor dbAccessor_) { Preconditions.checkState(value != null); StringBuilder sb = new StringBuilder(); if (value.isSetBool_val()) { @@ -81,6 +83,8 @@ public class QueryConditionUtil { sb.append(value.double_val); } else if (value.isSetString_val()) { sb.append(String.format("'%s'", value.string_val)); + } else if (value.isSetDate_val()) { + sb.append(String.format("'%s'", dbAccessor_.getDateString(value.date_val))); } else { // TODO: Support data types of DECIMAL, TIMESTAMP, DATE and binary for predicates. // Keep in-sync with DataSourceScanNode.literalToColumnValue(). diff --git a/testdata/bin/create-ext-data-source-table.sql b/testdata/bin/create-ext-data-source-table.sql index 84bd02e17..803e4660d 100644 --- a/testdata/bin/create-ext-data-source-table.sql +++ b/testdata/bin/create-ext-data-source-table.sql @@ -62,7 +62,7 @@ CREATE TABLE alltypes_jdbc_datasource ( bigint_col BIGINT, float_col FLOAT, double_col DOUBLE, - date_string_col STRING, + date_col DATE, string_col STRING, timestamp_col TIMESTAMP) PRODUCED BY DATA SOURCE JdbcDataSource( @@ -84,7 +84,7 @@ CREATE TABLE alltypes_jdbc_datasource_2 ( bigint_col BIGINT, float_col FLOAT, double_col DOUBLE, - date_string_col STRING, + date_col DATE, string_col STRING, timestamp_col TIMESTAMP) PRODUCED BY DATA SOURCE JdbcDataSource( diff --git a/testdata/bin/load-ext-data-sources.sh b/testdata/bin/load-ext-data-sources.sh index 6d8541fdf..8b8541565 100755 --- a/testdata/bin/load-ext-data-sources.sh +++ b/testdata/bin/load-ext-data-sources.sh @@ -42,7 +42,7 @@ CREATE TABLE alltypes bigint_col BIGINT, float_col FLOAT, double_col DOUBLE PRECISION, - date_string_col VARCHAR(8), + date_col DATE, string_col VARCHAR(10), timestamp_col TIMESTAMP ); @@ -62,7 +62,7 @@ CREATE TABLE "AllTypesWithQuote" "Bigint_col" BIGINT, "Float_col" FLOAT, "Double_col" DOUBLE PRECISION, - "Date_string_col" VARCHAR(8), + "date_col" DATE, "String_col" VARCHAR(10), "Timestamp_col" TIMESTAMP ); @@ -77,6 +77,36 @@ sudo -u postgres psql -d functional -c "$loadCmd" loadCmd="COPY \"AllTypesWithQuote\" FROM '/tmp/jdbc_alltypes.csv' DELIMITER ',' CSV" sudo -u postgres psql -d functional -c "$loadCmd" +# Create impala tables and load data +cat > /tmp/impala_jdbc_alltypes.sql <<__EOT__ +USE FUNCTIONAL; +DROP TABLE IF EXISTS alltypes_with_date; +CREATE TABLE alltypes_with_date +( + id INT, + bool_col BOOLEAN, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col STRING, + timestamp_col TIMESTAMP +) STORED as PARQUET; + +INSERT INTO alltypes_with_date +SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, + double_col, CAST(to_timestamp(date_string_col, 'MM/dd/yy') as DATE), string_col, + timestamp_col +FROM FUNCTIONAL.alltypes; +__EOT__ + +IMPALAD=${IMPALAD:-localhost} +${IMPALA_HOME}/bin/impala-shell.sh -i ${IMPALAD} -f /tmp/impala_jdbc_alltypes.sql + # Clean tmp files rm /tmp/jdbc_alltypes.* rm /tmp/jdbc_alltypes_with_quote.* +rm /tmp/impala_jdbc_alltypes.sql diff --git a/testdata/bin/setup-mysql-env.sh b/testdata/bin/setup-mysql-env.sh index d78f8a6e2..fed3e309e 100755 --- a/testdata/bin/setup-mysql-env.sh +++ b/testdata/bin/setup-mysql-env.sh @@ -93,7 +93,7 @@ CREATE TABLE alltypes bigint_col BIGINT, float_col FLOAT, double_col DOUBLE PRECISION, - date_string_col VARCHAR(8), + date_col DATE, string_col VARCHAR(10), timestamp_col TIMESTAMP ); @@ -114,7 +114,7 @@ CREATE TABLE AllTypesCaseSensitiveNames Bigint_col BIGINT, Float_col FLOAT, Double_col DOUBLE PRECISION, - Date_string_col VARCHAR(8), + Date_col DATE, String_col VARCHAR(10), Timestamp_col TIMESTAMP ); @@ -128,11 +128,17 @@ cat ${IMPALA_HOME}/testdata/target/AllTypes/* > /tmp/mysql_jdbc_alltypes.csv docker cp /tmp/mysql_jdbc_alltypes.csv mysql:/tmp loadCmd="LOAD DATA LOCAL INFILE '/tmp/mysql_jdbc_alltypes.csv' INTO TABLE alltypes \ - COLUMNS TERMINATED BY ','" + COLUMNS TERMINATED BY ',' (id, bool_col, tinyint_col, smallint_col, int_col, \ + bigint_col, float_col, double_col, @date_col, string_col, timestamp_col) \ + set date_col = STR_TO_DATE(@date_col, '%m/%d/%Y')" + docker exec -i mysql mysql -uroot -psecret functional --local-infile=1 <<< "$loadCmd" loadCmd="LOAD DATA LOCAL INFILE '/tmp/mysql_jdbc_alltypes.csv' INTO TABLE \ - AllTypesCaseSensitiveNames COLUMNS TERMINATED BY ','" + AllTypesCaseSensitiveNames COLUMNS TERMINATED BY ',' (id, bool_col, tinyint_col, \ + smallint_col, int_col, bigint_col, float_col, double_col, @date_col, string_col, \ + timestamp_col) set date_col = STR_TO_DATE(@date_col, '%m/%d/%Y')" + docker exec -i mysql mysql -uroot -psecret functional --local-infile=1 <<< "$loadCmd" EXT_DATA_SOURCE_SRC_PATH=${IMPALA_HOME}/java/ext-data-source diff --git a/testdata/workloads/functional-query/queries/QueryTest/impala-ext-jdbc-tables-predicates.test b/testdata/workloads/functional-query/queries/QueryTest/impala-ext-jdbc-tables-predicates.test new file mode 100644 index 000000000..f46b6a75f --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/impala-ext-jdbc-tables-predicates.test @@ -0,0 +1,182 @@ +==== +---- QUERY +# Create DataSource +DROP DATA SOURCE IF EXISTS TestJdbcDataSource; +CREATE DATA SOURCE TestJdbcDataSource +LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar' +CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource' +API_VERSION 'V1'; +---- RESULTS +'Data source has been created.' +==== +---- QUERY +# Show created DataSource +SHOW DATA SOURCES LIKE 'testjdbcdatasource'; +---- LABELS +NAME,LOCATION,CLASS NAME,API VERSION +---- RESULTS +'testjdbcdatasource',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1' +---- TYPES +STRING,STRING,STRING,STRING +==== +---- QUERY +# Create external JDBC DataSource table +DROP TABLE IF EXISTS alltypes_jdbc_datasource; +CREATE TABLE alltypes_jdbc_datasource ( + id INT, + bool_col BOOLEAN, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + string_col STRING, + timestamp_col TIMESTAMP) +PRODUCED BY DATA SOURCE TestJdbcDataSource( +'{"database.type":"IMPALA", +"jdbc.url":"jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional", +"jdbc.auth":"AuthMech=0", +"jdbc.driver":"com.cloudera.impala.jdbc.Driver", +"driver.url":"$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar", +"dbcp.username":"impala", +"dbcp.password":"cloudera", +"table":"alltypes_with_date"}'); +---- RESULTS +'Table has been created.' +==== +---- QUERY +# Gets specified columns based on date predicate with operator '='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col = DATE '2009-01-02' order by id limit 5; +---- RESULTS +10,true,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '='. +# with empty result +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col = DATE '1990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col > DATE '2009-01-02' order by id limit 5; +---- RESULTS +20,true,0,0,0,2009-01-03 +21,false,1,1.100000023841858,10.1,2009-01-03 +22,true,2,2.200000047683716,20.2,2009-01-03 +23,false,3,3.299999952316284,30.3,2009-01-03 +24,true,4,4.400000095367432,40.4,2009-01-03 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>'. +# with empty result +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col > DATE '2990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col < DATE '2009-01-02' order by id limit 5; +---- RESULTS +0,true,0,0,0,2009-01-01 +1,false,1,1.100000023841858,10.1,2009-01-01 +2,true,2,2.200000047683716,20.2,2009-01-01 +3,false,3,3.299999952316284,30.3,2009-01-01 +4,true,4,4.400000095367432,40.4,2009-01-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<'. +# with empty result +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col < DATE '1990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col >= DATE '2009-01-02' order by id limit 5; +---- RESULTS +10,true,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col <= DATE '2009-01-02' order by id limit 5; +---- RESULTS +0,true,0,0,0,2009-01-01 +1,false,1,1.100000023841858,10.1,2009-01-01 +2,true,2,2.200000047683716,20.2,2009-01-01 +3,false,3,3.299999952316284,30.3,2009-01-01 +4,true,4,4.400000095367432,40.4,2009-01-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '!='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col != DATE '2009-01-01' order by id limit 5; +---- RESULTS +10,true,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator 'between'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col between DATE '2009-03-01' and DATE '2009-04-01' order by id limit 5; +---- RESULTS +590,true,0,0,0,2009-03-01 +591,false,1,1.100000023841858,10.1,2009-03-01 +592,true,2,2.200000047683716,20.2,2009-03-01 +593,false,3,3.299999952316284,30.3,2009-03-01 +594,true,4,4.400000095367432,40.4,2009-03-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Drop table +DROP TABLE alltypes_jdbc_datasource; +---- RESULTS +'Table has been dropped.' +==== +---- QUERY +# Drop DataSource +DROP DATA SOURCE TestJdbcDataSource; +---- RESULTS +'Data source has been dropped.' +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source.test b/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source.test index f5b73d568..9df510fc3 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source.test +++ b/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source.test @@ -31,7 +31,7 @@ CREATE TABLE alltypes_jdbc_datasource ( bigint_col BIGINT, float_col FLOAT, double_col DOUBLE, - date_string_col STRING, + date_col DATE, string_col STRING, timestamp_col TIMESTAMP) PRODUCED BY DATA SOURCE TestJdbcDataSource( @@ -58,7 +58,7 @@ CREATE TABLE alltypes_jdbc_datasource_2 ( bigint_col BIGINT, float_col FLOAT, double_col DOUBLE, - date_string_col STRING, + date_col DATE, string_col STRING, timestamp_col TIMESTAMP) PRODUCED BY DATA SOURCE TestJdbcDataSource( @@ -69,7 +69,7 @@ PRODUCED BY DATA SOURCE TestJdbcDataSource( "dbcp.username":"hiveuser", "dbcp.password":"password", "table":"AllTypesWithQuote", -"column.mapping":"id=id, bool_col=Bool_col, tinyint_col=Tinyint_col, smallint_col=Smallint_col, int_col=Int_col, bigint_col=Bigint_col, float_col=Float_col, double_col=Double_col, date_string_col=Date_string_col, string_col=String_col, timestamp=Timestamp"}'); +"column.mapping":"id=id, bool_col=Bool_col, tinyint_col=Tinyint_col, smallint_col=Smallint_col, int_col=Int_col, bigint_col=Bigint_col, float_col=Float_col, double_col=Double_col, date_col=date_col, string_col=String_col, timestamp=Timestamp"}'); ---- RESULTS 'Table has been created.' ==== @@ -98,42 +98,42 @@ select * from alltypes_jdbc_datasource where id > 10 and int_col< 5 limit 5 ---- RESULTS -11,false,1,1,1,10,1.100000023841858,10.1,'01/02/09','1',2009-01-02 00:11:00.450000000 -12,true,2,2,2,20,2.200000047683716,20.2,'01/02/09','2',2009-01-02 00:12:00.460000000 -13,false,3,3,3,30,3.299999952316284,30.3,'01/02/09','3',2009-01-02 00:13:00.480000000 -14,true,4,4,4,40,4.400000095367432,40.4,'01/02/09','4',2009-01-02 00:14:00.510000000 -20,true,0,0,0,0,0,0,'01/03/09','0',2009-01-03 00:20:00.900000000 +11,false,1,1,1,10,1.100000023841858,10.1,2009-01-02,'1',2009-01-02 00:11:00.450000000 +12,true,2,2,2,20,2.200000047683716,20.2,2009-01-02,'2',2009-01-02 00:12:00.460000000 +13,false,3,3,3,30,3.299999952316284,30.3,2009-01-02,'3',2009-01-02 00:13:00.480000000 +14,true,4,4,4,40,4.400000095367432,40.4,2009-01-02,'4',2009-01-02 00:14:00.510000000 +20,true,0,0,0,0,0,0,2009-01-03,'0',2009-01-03 00:20:00.900000000 ---- TYPES -INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP +INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP ==== ---- QUERY # Gets specified columns. -select id, bool_col, smallint_col, float_col, double_col, date_string_col +select id, bool_col, smallint_col, float_col, double_col, date_col from alltypes_jdbc_datasource where id > 10 and int_col< 5 limit 5 ---- RESULTS -11,false,1,1.100000023841858,10.1,'01/02/09' -12,true,2,2.200000047683716,20.2,'01/02/09' -13,false,3,3.299999952316284,30.3,'01/02/09' -14,true,4,4.400000095367432,40.4,'01/02/09' -20,true,0,0,0,'01/03/09' +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +20,true,0,0,0,2009-01-03 ---- TYPES -INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE ==== ---- QUERY # Gets specified columns from external jdbc table with case sensitive column names # and table name. -select id, bool_col, smallint_col, float_col, double_col, date_string_col +select id, bool_col, smallint_col, float_col, double_col, date_col from alltypes_jdbc_datasource_2 where id > 10 and int_col< 5 limit 5 ---- RESULTS -11,false,1,1.100000023841858,10.1,'01/02/09' -12,true,2,2.200000047683716,20.2,'01/02/09' -13,false,3,3.299999952316284,30.3,'01/02/09' -14,true,4,4.400000095367432,40.4,'01/02/09' -20,true,0,0,0,'01/03/09' +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +20,true,0,0,0,2009-01-03 ---- TYPES -INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE ==== ---- QUERY # Inner join with a non jdbc table @@ -177,6 +177,128 @@ order by a.id, b.id limit 10 INT, INT ==== ---- QUERY +# Gets specified columns based on date predicate with operator '='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col = DATE '2009-01-02' order by id limit 5; +---- RESULTS +10,true,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '=' +# with empty result. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col = DATE '1990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col > DATE '2009-01-02' order by id limit 5; +---- RESULTS +20,true,0,0,0,2009-01-03 +21,false,1,1.100000023841858,10.1,2009-01-03 +22,true,2,2.200000047683716,20.2,2009-01-03 +23,false,3,3.299999952316284,30.3,2009-01-03 +24,true,4,4.400000095367432,40.4,2009-01-03 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>' +# with empty result. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col > DATE '2990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col < DATE '2009-01-02' order by id limit 5; +---- RESULTS +0,true,0,0,0,2009-01-01 +1,false,1,1.100000023841858,10.1,2009-01-01 +2,true,2,2.200000047683716,20.2,2009-01-01 +3,false,3,3.299999952316284,30.3,2009-01-01 +4,true,4,4.400000095367432,40.4,2009-01-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<' +# with empty result. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col < DATE '1990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col >= DATE '2009-01-02' order by id limit 5; +---- RESULTS +10,true,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col <= DATE '2009-01-02' order by id limit 5; +---- RESULTS +0,true,0,0,0,2009-01-01 +1,false,1,1.100000023841858,10.1,2009-01-01 +2,true,2,2.200000047683716,20.2,2009-01-01 +3,false,3,3.299999952316284,30.3,2009-01-01 +4,true,4,4.400000095367432,40.4,2009-01-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '!='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col != DATE '2009-01-01' order by id limit 5; +---- RESULTS +10,true,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,true,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,true,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator 'between'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col between DATE '2009-03-01' and DATE '2009-04-01' order by id limit 5; +---- RESULTS +590,true,0,0,0,2009-03-01 +591,false,1,1.100000023841858,10.1,2009-03-01 +592,true,2,2.200000047683716,20.2,2009-03-01 +593,false,3,3.299999952316284,30.3,2009-03-01 +594,true,4,4.400000095367432,40.4,2009-03-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY # Drop table DROP TABLE alltypes_jdbc_datasource; ---- RESULTS diff --git a/testdata/workloads/functional-query/queries/QueryTest/mysql-ext-jdbc-tables.test b/testdata/workloads/functional-query/queries/QueryTest/mysql-ext-jdbc-tables.test index 4d42bbc3c..0d96ee5e7 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/mysql-ext-jdbc-tables.test +++ b/testdata/workloads/functional-query/queries/QueryTest/mysql-ext-jdbc-tables.test @@ -31,7 +31,7 @@ CREATE TABLE alltypes_jdbc_datasource ( bigint_col BIGINT, float_col FLOAT, double_col DOUBLE, - date_string_col STRING, + date_col DATE, string_col STRING, timestamp_col TIMESTAMP) PRODUCED BY DATA SOURCE TestJdbcDataSource( @@ -58,7 +58,7 @@ CREATE TABLE alltypes_jdbc_datasource_2 ( bigint_col BIGINT, float_col FLOAT, double_col DOUBLE, - date_string_col STRING, + date_col DATE, string_col STRING, timestamp_col TIMESTAMP) PRODUCED BY DATA SOURCE TestJdbcDataSource( @@ -69,7 +69,7 @@ PRODUCED BY DATA SOURCE TestJdbcDataSource( "dbcp.username":"hiveuser", "dbcp.password":"password", "table":"AllTypesCaseSensitiveNames", -"column.mapping":"id=id, bool_col=Bool_col, tinyint_col=Tinyint_col, smallint_col=Smallint_col, int_col=Int_col, bigint_col=Bigint_col, float_col=Float_col, double_col=Double_col, date_string_col=Date_string_col, string_col=String_col, timestamp=Timestamp"}'); +"column.mapping":"id=id, bool_col=Bool_col, tinyint_col=Tinyint_col, smallint_col=Smallint_col, int_col=Int_col, bigint_col=Bigint_col, float_col=Float_col, double_col=Double_col, date_col=Date_col, string_col=String_col, timestamp=Timestamp"}'); ---- RESULTS 'Table has been created.' ==== @@ -98,42 +98,42 @@ select * from alltypes_jdbc_datasource where id > 10 and int_col< 5 limit 5 ---- RESULTS -11,false,1,1,1,10,1.100000023841858,10.1,'01/02/09','1',2009-01-02 00:11:00 -12,false,2,2,2,20,2.200000047683716,20.2,'01/02/09','2',2009-01-02 00:12:00 -13,false,3,3,3,30,3.299999952316284,30.3,'01/02/09','3',2009-01-02 00:13:00 -14,false,4,4,4,40,4.400000095367432,40.4,'01/02/09','4',2009-01-02 00:14:01 -20,false,0,0,0,0,0,0,'01/03/09','0',2009-01-03 00:20:01 +11,false,1,1,1,10,1.100000023841858,10.1,2009-01-02,'1',2009-01-02 00:11:00 +12,false,2,2,2,20,2.200000047683716,20.2,2009-01-02,'2',2009-01-02 00:12:00 +13,false,3,3,3,30,3.299999952316284,30.3,2009-01-02,'3',2009-01-02 00:13:00 +14,false,4,4,4,40,4.400000095367432,40.4,2009-01-02,'4',2009-01-02 00:14:01 +20,false,0,0,0,0,0,0,2009-01-03,'0',2009-01-03 00:20:01 ---- TYPES -INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP +INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP ==== ---- QUERY # Gets specified columns. -select id, bool_col, smallint_col, float_col, double_col, date_string_col +select id, bool_col, smallint_col, float_col, double_col, date_col from alltypes_jdbc_datasource where id > 10 and int_col< 5 limit 5 ---- RESULTS -11,false,1,1.100000023841858,10.1,'01/02/09' -12,false,2,2.200000047683716,20.2,'01/02/09' -13,false,3,3.299999952316284,30.3,'01/02/09' -14,false,4,4.400000095367432,40.4,'01/02/09' -20,false,0,0,0,'01/03/09' +11,false,1,1.100000023841858,10.1,2009-01-02 +12,false,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,false,4,4.400000095367432,40.4,2009-01-02 +20,false,0,0,0,2009-01-03 ---- TYPES -INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE ==== ---- QUERY # Gets specified columns from external jdbc table with case sensitive column names # and table name. -select id, bool_col, smallint_col, float_col, double_col, date_string_col +select id, bool_col, smallint_col, float_col, double_col, date_col from alltypes_jdbc_datasource_2 where id > 10 and int_col< 5 limit 5 ---- RESULTS -11,false,1,1.100000023841858,10.1,'01/02/09' -12,false,2,2.200000047683716,20.2,'01/02/09' -13,false,3,3.299999952316284,30.3,'01/02/09' -14,false,4,4.400000095367432,40.4,'01/02/09' -20,false,0,0,0,'01/03/09' +11,false,1,1.100000023841858,10.1,2009-01-02 +12,false,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,false,4,4.400000095367432,40.4,2009-01-02 +20,false,0,0,0,2009-01-03 ---- TYPES -INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE ==== ---- QUERY # Inner join with a non jdbc table @@ -177,6 +177,128 @@ order by a.id, b.id limit 10 INT, INT ==== ---- QUERY +# Gets specified columns based on date predicate with operator '='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col = DATE '2009-01-09' order by id limit 5; +---- RESULTS +80,false,0,0,0,2009-01-09 +81,false,1,1.100000023841858,10.1,2009-01-09 +82,false,2,2.200000047683716,20.2,2009-01-09 +83,false,3,3.299999952316284,30.3,2009-01-09 +84,false,4,4.400000095367432,40.4,2009-01-09 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '=' +# with empty results. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col = DATE '1990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col > DATE '2009-01-02' order by id limit 5; +---- RESULTS +20,false,0,0,0,2009-01-03 +21,false,1,1.100000023841858,10.1,2009-01-03 +22,false,2,2.200000047683716,20.2,2009-01-03 +23,false,3,3.299999952316284,30.3,2009-01-03 +24,false,4,4.400000095367432,40.4,2009-01-03 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>' +# with empty results. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col > DATE '2990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col < DATE '2009-01-02' order by id limit 5; +---- RESULTS +0,false,0,0,0,2009-01-01 +1,false,1,1.100000023841858,10.1,2009-01-01 +2,false,2,2.200000047683716,20.2,2009-01-01 +3,false,3,3.299999952316284,30.3,2009-01-01 +4,false,4,4.400000095367432,40.4,2009-01-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<' +# with empty results. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col < DATE '1990-01-01' order by id limit 5; +---- RESULTS +==== +---- QUERY +# Gets specified columns based on date predicate with operator '>='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col >= DATE '2009-01-02' order by id limit 5; +---- RESULTS +10,false,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,false,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,false,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '<='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col <= DATE '2009-01-02' order by id limit 5; +---- RESULTS +0,false,0,0,0,2009-01-01 +1,false,1,1.100000023841858,10.1,2009-01-01 +2,false,2,2.200000047683716,20.2,2009-01-01 +3,false,3,3.299999952316284,30.3,2009-01-01 +4,false,4,4.400000095367432,40.4,2009-01-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator '!='. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col != DATE '2009-01-01' order by id limit 5; +---- RESULTS +10,false,0,0,0,2009-01-02 +11,false,1,1.100000023841858,10.1,2009-01-02 +12,false,2,2.200000047683716,20.2,2009-01-02 +13,false,3,3.299999952316284,30.3,2009-01-02 +14,false,4,4.400000095367432,40.4,2009-01-02 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY +# Gets specified columns based on date predicate with operator 'between'. +select id, bool_col, smallint_col, float_col, double_col, date_col +from alltypes_jdbc_datasource +where date_col between DATE '2009-03-01' and DATE '2009-04-01' order by id limit 5; +---- RESULTS +590,false,0,0,0,2009-03-01 +591,false,1,1.100000023841858,10.1,2009-03-01 +592,false,2,2.200000047683716,20.2,2009-03-01 +593,false,3,3.299999952316284,30.3,2009-03-01 +594,false,4,4.400000095367432,40.4,2009-03-01 +---- TYPES +INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, DATE +==== +---- QUERY # Drop table DROP TABLE alltypes_jdbc_datasource; ---- RESULTS diff --git a/tests/custom_cluster/test_ext_data_sources.py b/tests/custom_cluster/test_ext_data_sources.py index 7ebffa8b0..765f1bb26 100644 --- a/tests/custom_cluster/test_ext_data_sources.py +++ b/tests/custom_cluster/test_ext_data_sources.py @@ -247,3 +247,9 @@ class TestImpalaExtJdbcTables(CustomClusterTestSuite): "No matching option REQUEST_POOL found in the queries site." assert "SET DEBUG_ACTION" not in response_json, \ "Matching option DEBUG_ACTION found in the queries site." + + @pytest.mark.execute_serially + def test_impala_ext_jdbc_tables_predicates(self, vector, unique_database): + """Run tests for external jdbc tables in Impala cluster for new predicates""" + self.run_test_case( + 'QueryTest/impala-ext-jdbc-tables-predicates', vector, use_db=unique_database)
