zabetak commented on a change in pull request #2514:
URL: https://github.com/apache/hive/pull/2514#discussion_r676453357
##########
File path: ql/src/test/queries/clientpositive/external_jdbc_rowcount.q
##########
@@ -0,0 +1,200 @@
+--! qt:dataset:src
+
+set hive.strict.checks.cartesian.product= false;
Review comment:
Is it necessary to set `hive.strict.checks.cartesian.procuct` property
to `false`?
##########
File path: ql/src/test/queries/clientpositive/external_jdbc_rowcount.q
##########
@@ -0,0 +1,200 @@
+--! qt:dataset:src
+
+set hive.strict.checks.cartesian.product= false;
+
+CREATE TEMPORARY FUNCTION dboutput AS
'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput';
+
+
+FROM src
Review comment:
Is it necessary to create a query over `src` table? If you simply write
`SELECT dboutput (...)` it doesn't work?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
##########
@@ -93,6 +89,16 @@ public Double getDistinctRowCount(HiveJoin rel,
RelMetadataQuery mq, ImmutableBi
groupKey, predicate, true);
}
+ public Double getDistinctRowCount(RelNode r, RelMetadataQuery mq,
ImmutableBitSet groupKey,
+ RexNode predicate) {
+ if (r instanceof SingleRel) {
+ return mq.getDistinctRowCount(r.getInput(0), groupKey, predicate);
+ } else if (r instanceof JdbcHiveTableScan) {
+ return getDistinctRowCount(((JdbcHiveTableScan) r).getHiveTableScan(),
mq, groupKey, predicate);
+ }
+
+ return super.getDistinctRowCount(r, mq, groupKey, predicate);
+ }
Review comment:
Can you explain a bit what's the general idea behind this change cause
the logic looks a bit brittle.
`SingleRel` has many sub-classes and some of them can greatly affect the row
count so saying that you should get the row count by considering only its input
doesn't look right.
Regarding the `r istanceof JdbcHiveTableScan` check I think it would be
better to add a separate method `getDistictRowCount(JdbcHiveTableScan...)` and
do exactly what you did here.
##########
File path: ql/src/test/queries/clientpositive/external_jdbc_rowcount.q
##########
@@ -0,0 +1,200 @@
+--! qt:dataset:src
+
+set hive.strict.checks.cartesian.product= false;
+
+CREATE TEMPORARY FUNCTION dboutput AS
'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput';
+
+
+FROM src
+
+SELECT
+
+dboutput (
'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE1 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL,
"dkey" DOUBLE)' ),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','20','20','20.0','20.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','-20','-20','-20.0','-20.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','100','-15','65.0','-74.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','44','53','-455.454','330.76')
Review comment:
Do you need to insert data to the tables in order to reproduce the
problem?
##########
File path: ql/src/test/queries/clientpositive/external_jdbc_rowcount.q
##########
@@ -0,0 +1,200 @@
+--! qt:dataset:src
Review comment:
Is it necessary to use the `src` dataset?
##########
File path: ql/src/test/queries/clientpositive/external_jdbc_rowcount.q
##########
@@ -0,0 +1,200 @@
+--! qt:dataset:src
+
+set hive.strict.checks.cartesian.product= false;
+
+CREATE TEMPORARY FUNCTION dboutput AS
'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput';
+
+
+FROM src
+
+SELECT
+
+dboutput (
'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE1 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL,
"dkey" DOUBLE)' ),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','20','20','20.0','20.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','-20','-20','-20.0','-20.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','100','-15','65.0','-74.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES
(?,?,?,?)','44','53','-455.454','330.76')
+
+limit 1;
+
+FROM src
+
+SELECT
+
+dboutput (
'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE2 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL,
"dkey" DOUBLE, "datekey" DATE)' ),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey","datekey")
VALUES (?,?,?,?,?)','20','20','20.0','20.0','1999-02-22'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey","datekey")
VALUES (?,?,?,?,?)','-20','8','9.0','11.0','2000-03-15'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey","datekey")
VALUES (?,?,?,?,?)','101','-16','66.0','-75.0','2010-04-01'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey","datekey")
VALUES (?,?,?,?,?)','40','50','-455.4543','330.767','2010-04-02')
+
+limit 1;
+
+FROM src
+
+SELECT
+
+dboutput (
'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE3 ("ikey2" INTEGER, "bkey2" BIGINT, "fkey2"
REAL, "dkey2" DOUBLE)' ),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE3 ("ikey2","bkey2","fkey2","dkey2") VALUES
(?,?,?,?)','10','10','10.0','10.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE3 ("ikey2","bkey2","fkey2","dkey2") VALUES
(?,?,?,?)','-10','-10','-10.0','-10.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE3 ("ikey2","bkey2","fkey2","dkey2") VALUES
(?,?,?,?)','200','-25','55.0','-84.0'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE3 ("ikey2","bkey2","fkey2","dkey2") VALUES
(?,?,?,?)','54','53','-455.454','330.76')
+
+limit 1;
+
+FROM src
+
+SELECT
+
+dboutput (
'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE4 ("ikey2" INTEGER, "bkey2" BIGINT, "fkey2"
REAL, "dkey2" DOUBLE, "datekey2" DATE)' ),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE4 ("ikey2","bkey2","fkey2","dkey2","datekey2")
VALUES (?,?,?,?,?)','10','10','10.0','10.0','1999-02-22'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE4 ("ikey2","bkey2","fkey2","dkey2","datekey2")
VALUES (?,?,?,?,?)','-10','7','9.0','12.0','2000-03-15'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE4 ("ikey2","bkey2","fkey2","dkey2","datekey2")
VALUES (?,?,?,?,?)','102','-16','66.0','-75.0','2010-04-01'),
+
+dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'INSERT INTO SIMPLE_DERBY_TABLE4 ("ikey2","bkey2","fkey2","dkey2","datekey2")
VALUES (?,?,?,?,?)','40','50','-455.4543','330.767','2010-04-02')
+
+limit 1;
+
+CREATE EXTERNAL TABLE ext_simple_derby_table1
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+ "hive.sql.database.type" = "DERBY",
+ "hive.sql.jdbc.driver" =
"org.apache.derby.jdbc.EmbeddedDriver",
+ "hive.sql.jdbc.url" =
"jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY",
+ "hive.sql.dbcp.username" = "APP",
+ "hive.sql.dbcp.password" = "mine",
+ "hive.sql.table" = "SIMPLE_DERBY_TABLE1",
+ "hive.sql.dbcp.maxActive" = "1"
+);
+
+
+CREATE EXTERNAL TABLE ext_simple_derby_table2
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double,
+ datekey string
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+ "hive.sql.database.type" = "DERBY",
+ "hive.sql.jdbc.driver" =
"org.apache.derby.jdbc.EmbeddedDriver",
+ "hive.sql.jdbc.url" =
"jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY",
+ "hive.sql.dbcp.username" = "APP",
+ "hive.sql.dbcp.password" = "mine",
+ "hive.sql.table" = "SIMPLE_DERBY_TABLE2",
+ "hive.sql.dbcp.maxActive" = "1"
+);
+
+CREATE EXTERNAL TABLE ext_simple_derby_table3
+(
+ ikey2 int,
+ bkey2 bigint,
+ fkey2 float,
+ dkey2 double
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+ "hive.sql.database.type" = "DERBY",
+ "hive.sql.jdbc.driver" =
"org.apache.derby.jdbc.EmbeddedDriver",
+ "hive.sql.jdbc.url" =
"jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY",
+ "hive.sql.dbcp.username" = "APP",
+ "hive.sql.dbcp.password" = "mine",
+ "hive.sql.table" = "SIMPLE_DERBY_TABLE3",
+ "hive.sql.dbcp.maxActive" = "1"
+);
+
+
+CREATE EXTERNAL TABLE ext_simple_derby_table4
+(
+ ikey2 int,
+ bkey2 bigint,
+ fkey2 float,
+ dkey2 double,
+ datekey2 string
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+ "hive.sql.database.type" = "DERBY",
+ "hive.sql.jdbc.driver" =
"org.apache.derby.jdbc.EmbeddedDriver",
+ "hive.sql.jdbc.url" =
"jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY",
+ "hive.sql.dbcp.username" = "APP",
+ "hive.sql.dbcp.password" = "mine",
+ "hive.sql.table" = "SIMPLE_DERBY_TABLE4",
+ "hive.sql.dbcp.maxActive" = "1"
+);
+
+explain cbo
+with t1 as (select fkey, ikey, sum(dkey) as dk_sum, sum(dkey2) as dk2_sum
+ from ext_simple_derby_table1 left join ext_simple_derby_table3
+ on ikey = ikey2
+ where fkey2 is null
+ group by fkey, ikey),
+t2 as (select datekey, fkey, ikey, sum(dkey) as dk_sum2, sum(dkey2) as dk2_sum2
+ from ext_simple_derby_table2 left join ext_simple_derby_table4
+ on ikey = ikey2
+ where fkey2 is null
+ group by datekey, fkey, ikey)
+select t1.fkey, t2.ikey, sum(t1.ikey)
+from t1 left join t2
+on t1.ikey = t2.ikey AND t1.fkey = t2.fkey
+where t2.fkey is null
+group by t2.datekey, t1.fkey, t2.ikey;
+
+
+with t1 as (select fkey, ikey, sum(dkey) as dk_sum, sum(dkey2) as dk2_sum
+ from ext_simple_derby_table1 left join ext_simple_derby_table3
+ on ikey = ikey2
+ where fkey2 is null
+ group by fkey, ikey),
+t2 as (select datekey, fkey, ikey, sum(dkey) as dk_sum2, sum(dkey2) as dk2_sum2
+ from ext_simple_derby_table2 left join ext_simple_derby_table4
+ on ikey = ikey2
+ where fkey2 is null
+ group by datekey, fkey, ikey)
+select t1.fkey, t2.ikey, sum(t1.ikey)
+from t1 left join t2
+on t1.ikey = t2.ikey AND t1.fkey = t2.fkey
+where t2.fkey is null
+group by t2.datekey, t1.fkey, t2.ikey;
Review comment:
Does the problem affect the query execution? If not then I guess you can
remove this part.
##########
File path:
ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out
##########
@@ -5155,15 +5155,15 @@ STAGE PLANS:
FROM (SELECT "ss_sold_time_sk", "ss_hdemo_sk", "ss_store_sk"
FROM "STORE_SALES"
WHERE "ss_hdemo_sk" IS NOT NULL AND ("ss_sold_time_sk" IS NOT NULL AND
"ss_store_sk" IS NOT NULL)) AS "t0"
-INNER JOIN (SELECT "s_store_sk"
-FROM "STORE"
-WHERE "s_store_name" = 'ese' AND "s_store_sk" IS NOT NULL) AS "t2" ON
"t0"."ss_store_sk" = "t2"."s_store_sk"
+INNER JOIN (SELECT "hd_demo_sk"
+FROM "HOUSEHOLD_DEMOGRAPHICS"
+WHERE ("hd_vehicle_count" <= 5 OR ("hd_vehicle_count" <= 2 OR
"hd_vehicle_count" <= 3)) AND ("hd_dep_count" = 4 AND "hd_vehicle_count" <= 5
OR ("hd_dep_count" = 2 AND "hd_vehicle_count" <= 2 OR "hd_dep_count" = 0 AND
"hd_vehicle_count" <= 3)) AND ("hd_dep_count" IN (4, 2, 0) AND "hd_demo_sk" IS
NOT NULL)) AS "t2" ON "t0"."ss_hdemo_sk" = "t2"."hd_demo_sk"
INNER JOIN (SELECT "t_time_sk"
FROM "TIME_DIM"
WHERE "t_minute" >= 30 AND ("t_hour" = 8 AND "t_time_sk" IS NOT NULL)) AS "t4"
ON "t0"."ss_sold_time_sk" = "t4"."t_time_sk"
-INNER JOIN (SELECT "hd_demo_sk"
-FROM "HOUSEHOLD_DEMOGRAPHICS"
-WHERE ("hd_vehicle_count" <= 5 OR ("hd_vehicle_count" <= 2 OR
"hd_vehicle_count" <= 3)) AND ("hd_dep_count" = 4 AND "hd_vehicle_count" <= 5
OR ("hd_dep_count" = 2 AND "hd_vehicle_count" <= 2 OR "hd_dep_count" = 0 AND
"hd_vehicle_count" <= 3)) AND ("hd_dep_count" IN (4, 2, 0) AND "hd_demo_sk" IS
NOT NULL)) AS "t6" ON "t0"."ss_hdemo_sk" = "t6"."hd_demo_sk"
+INNER JOIN (SELECT "s_store_sk"
+FROM "STORE"
+WHERE "s_store_name" = 'ese' AND "s_store_sk" IS NOT NULL) AS "t6" ON
"t0"."ss_store_sk" = "t6"."s_store_sk"
Review comment:
It seems that join order has changed in this and the following queries?
Is this expected?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]