HAWQ-1122. Fix bug in index dxl translators that can't translatte ScalarArrayOpExpr [#126158185]
Orca couldn't pickup plan that uses index scan for the following cases: select * from btree_tbl where a in (1,2); --> Orca generated table scan instead of index scan select * from bitmap_tbl where a in (1,2); --> Orca generated table scan instead of bitmap scan Orca failed to consider the case that uses ArrayComp when trying to pick up index. The issue has been fixed in this patch. Closes #993 This is cherry-picked from greenplum-db/gpdb@cfafef0005191a63973e53076c5e08b0face68a0 Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/15fa66e7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/15fa66e7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/15fa66e7 Branch: refs/heads/master Commit: 15fa66e76820ff5f5aff97812016790ea2ff72bb Parents: 71caebc Author: Haisheng Yuan <[email protected]> Authored: Mon Aug 8 22:23:07 2016 -0700 Committer: rlei <[email protected]> Committed: Fri Oct 28 11:52:20 2016 +0800 ---------------------------------------------------------------------- .../gpopt/translate/CTranslatorDXLToPlStmt.cpp | 47 ++++- src/include/gpopt/translate/CIndexQualInfo.h | 17 +- src/test/regress/expected/gp_optimizer.out | 180 +++++++++++++++---- src/test/regress/sql/gp_optimizer.sql | 21 +++ 4 files changed, 218 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/15fa66e7/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp ---------------------------------------------------------------------- diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index dabfeb9..5cbae05 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -1524,7 +1524,8 @@ CTranslatorDXLToPlStmt::TranslateIndexConditions Expr *pexprOrigIndexCond = m_pdxlsctranslator->PexprFromDXLNodeScalar(pdxlnIndexCond, &mapcidvarplstmt); Expr *pexprIndexCond = m_pdxlsctranslator->PexprFromDXLNodeScalar(pdxlnIndexCond, &mapcidvarplstmt); - GPOS_ASSERT(IsA(pexprIndexCond, OpExpr) && "expected OpExpr in index qual"); + GPOS_ASSERT((IsA(pexprIndexCond, OpExpr) || IsA(pexprIndexCond, ScalarArrayOpExpr)) + && "expected OpExpr or ScalarArrayOpExpr in index qual"); // for indexonlyscan, we already have the attno referring to the index if (!fIndexOnlyScan) @@ -1535,9 +1536,45 @@ CTranslatorDXLToPlStmt::TranslateIndexConditions } // find index key's attno - List *plistArgs = ((OpExpr *) pexprIndexCond)->args; + List *plistArgs = NULL; + if (IsA(pexprIndexCond, OpExpr)) + { + plistArgs = ((OpExpr *) pexprIndexCond)->args; + } + else + { + plistArgs = ((ScalarArrayOpExpr *) pexprIndexCond)->args; + } + Node *pnodeFst = (Node *) lfirst(gpdb::PlcListHead(plistArgs)); Node *pnodeSnd = (Node *) lfirst(gpdb::PlcListTail(plistArgs)); + + BOOL fRelabel = false; + if (IsA(pnodeFst, RelabelType) && IsA(((RelabelType *) pnodeFst)->arg, Var)) + { + pnodeFst = (Node *) ((RelabelType *) pnodeFst)->arg; + fRelabel = true; + } + else if (IsA(pnodeSnd, RelabelType) && IsA(((RelabelType *) pnodeSnd)->arg, Var)) + { + pnodeSnd = (Node *) ((RelabelType *) pnodeSnd)->arg; + fRelabel = true; + } + + if (fRelabel) + { + List *plNewArgs = ListMake2(pnodeFst, pnodeSnd); + gpdb::GPDBFree(plistArgs); + if (IsA(pexprIndexCond, OpExpr)) + { + ((OpExpr *) pexprIndexCond)->args = plNewArgs; + } + else + { + ((ScalarArrayOpExpr *) pexprIndexCond)->args = plNewArgs; + } + } + GPOS_ASSERT(IsA(pnodeFst, Var) || IsA(pnodeSnd, Var) && "expected index key in index qual"); INT iAttno = 0; @@ -1566,7 +1603,7 @@ CTranslatorDXLToPlStmt::TranslateIndexConditions GPOS_ASSERT(!fRecheck); // create index qual - pdrgpindexqualinfo->Append(GPOS_NEW(m_pmp) CIndexQualInfo(iAttno, (OpExpr *)pexprIndexCond, (OpExpr *)pexprOrigIndexCond, (StrategyNumber) iSN, oidIndexSubtype)); + pdrgpindexqualinfo->Append(GPOS_NEW(m_pmp) CIndexQualInfo(iAttno, pexprIndexCond, pexprOrigIndexCond, (StrategyNumber) iSN, oidIndexSubtype)); } // the index quals much be ordered by attribute number @@ -1576,8 +1613,8 @@ CTranslatorDXLToPlStmt::TranslateIndexConditions for (ULONG ul = 0; ul < ulLen; ul++) { CIndexQualInfo *pindexqualinfo = (*pdrgpindexqualinfo)[ul]; - *pplIndexConditions = gpdb::PlAppendElement(*pplIndexConditions, pindexqualinfo->m_popExpr); - *pplIndexOrigConditions = gpdb::PlAppendElement(*pplIndexOrigConditions, pindexqualinfo->m_popOriginalExpr); + *pplIndexConditions = gpdb::PlAppendElement(*pplIndexConditions, pindexqualinfo->m_pexpr); + *pplIndexOrigConditions = gpdb::PlAppendElement(*pplIndexOrigConditions, pindexqualinfo->m_pexprOriginal); *pplIndexStratgey = gpdb::PlAppendInt(*pplIndexStratgey, pindexqualinfo->m_sn); *pplIndexSubtype = gpdb::PlAppendOid(*pplIndexSubtype, pindexqualinfo->m_oidIndexSubtype); } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/15fa66e7/src/include/gpopt/translate/CIndexQualInfo.h ---------------------------------------------------------------------- diff --git a/src/include/gpopt/translate/CIndexQualInfo.h b/src/include/gpopt/translate/CIndexQualInfo.h index b5076b3..73d9052 100644 --- a/src/include/gpopt/translate/CIndexQualInfo.h +++ b/src/include/gpopt/translate/CIndexQualInfo.h @@ -58,10 +58,10 @@ namespace gpdxl AttrNumber m_attno; // index qual expression tailored for GPDB - OpExpr *m_popExpr; + Expr *m_pexpr; // original index qual expression - OpExpr *m_popOriginalExpr; + Expr *m_pexprOriginal; // index strategy information StrategyNumber m_sn; @@ -73,18 +73,21 @@ namespace gpdxl CIndexQualInfo ( AttrNumber attno, - OpExpr *popExpr, - OpExpr *popOriginalExpr, + Expr *pexpr, + Expr *pexprOriginal, StrategyNumber sn, OID oidIndexSubtype ) : m_attno(attno), - m_popExpr(popExpr), - m_popOriginalExpr(popOriginalExpr), + m_pexpr(pexpr), + m_pexprOriginal(pexprOriginal), m_sn(sn), m_oidIndexSubtype(oidIndexSubtype) - {} + { + GPOS_ASSERT((IsA(m_pexpr, OpExpr) && IsA(m_pexprOriginal, OpExpr)) || + (IsA(m_pexpr, ScalarArrayOpExpr) && IsA(m_pexprOriginal, ScalarArrayOpExpr))); + } // dtor ~CIndexQualInfo() http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/15fa66e7/src/test/regress/expected/gp_optimizer.out ---------------------------------------------------------------------- diff --git a/src/test/regress/expected/gp_optimizer.out b/src/test/regress/expected/gp_optimizer.out index 4da8197..8444eb3 100644 --- a/src/test/regress/expected/gp_optimizer.out +++ b/src/test/regress/expected/gp_optimizer.out @@ -8729,42 +8729,152 @@ revoke all on can_set_tag_audit from unpriv; drop role unpriv; drop table can_set_tag_target; drop table can_set_tag_audit; +-- start_ignore +create language plpythonu; +-- end_ignore +-- Checking if ORCA uses parser's canSetTag for CREATE TABLE AS SELECT +create or replace function canSetTag_Func(x int) returns int as $$ + if (x is None): + return 0 + else: + return x * 3 +$$ language plpythonu; +create table canSetTag_input_data (domain integer, class integer, attr text, value integer) + distributed by (domain); +insert into canSetTag_input_data values(1, 1, 'A', 1); +insert into canSetTag_input_data values(2, 1, 'A', 0); +insert into canSetTag_input_data values(3, 0, 'B', 1); +create table canSetTag_bug_table as +SELECT attr, class, (select canSetTag_Func(count(distinct class)::int) from canSetTag_input_data) + as dclass FROM canSetTag_input_data GROUP BY attr, class distributed by (attr); +drop function canSetTag_Func(x int); +drop table canSetTag_bug_table; +drop table canSetTag_input_data; +-- Test B-Tree index scan with in list +CREATE TABLE btree_test as SELECT * FROM generate_series(1,100) as a distributed randomly; +CREATE INDEX btree_test_index ON btree_test(a); +EXPLAIN SELECT * FROM btree_test WHERE a in (select 1); + QUERY PLAN +----------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.00 rows=1 width=4) + -> Nested Loop (cost=0.00..2.00 rows=1 width=4) + Join Filter: true + -> Result (cost=0.00..0.00 rows=1 width=4) + -> Result (cost=0.00..0.00 rows=1 width=1) + -> Index Scan using btree_test_index on btree_test (cost=0.00..2.00 rows=1 width=4) + Index Cond: btree_test.a = (1) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(9 rows) + +EXPLAIN SELECT * FROM btree_test WHERE a in (1, 47); + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.00 rows=3 width=4) + -> Index Scan using btree_test_index on btree_test (cost=0.00..6.00 rows=1 width=4) + Index Cond: a = ANY ('{1,47}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(5 rows) + +EXPLAIN SELECT * FROM btree_test WHERE a in ('2', 47); + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.00 rows=3 width=4) + -> Index Scan using btree_test_index on btree_test (cost=0.00..6.00 rows=1 width=4) + Index Cond: a = ANY ('{2,47}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(5 rows) + +EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2'); + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.00 rows=3 width=4) + -> Index Scan using btree_test_index on btree_test (cost=0.00..6.00 rows=1 width=4) + Index Cond: a = ANY ('{1,2}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(5 rows) + +EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2', 47); + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..8.00 rows=4 width=4) + -> Index Scan using btree_test_index on btree_test (cost=0.00..8.00 rows=2 width=4) + Index Cond: a = ANY ('{1,2,47}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(5 rows) + +-- Test Bitmap index scan with in list +CREATE TABLE bitmap_test as SELECT * FROM generate_series(1,100) as a distributed randomly; +CREATE INDEX bitmap_index ON bitmap_test USING BITMAP(a); +-- The following query should fall back to planner. +-- Update the result file when this has been fixed in ORCA. +EXPLAIN SELECT * FROM bitmap_test WHERE a in (select 1); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) (cost=0.07..4.38 rows=7 width=4) + -> Hash Join (cost=0.07..4.38 rows=3 width=4) + Hash Cond: bitmap_test.a = (1) + -> Seq Scan on bitmap_test (cost=0.00..4.00 rows=34 width=4) + -> Hash (cost=0.05..0.05 rows=1 width=4) + -> Broadcast Motion 1:3 (slice1; segments: 1) (cost=0.02..0.05 rows=1 width=4) + -> HashAggregate (cost=0.02..0.03 rows=1 width=4) + Group By: 1 + -> Result (cost=0.00..0.01 rows=1 width=0) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: legacy query optimizer +(11 rows) + +-- The following queries should work without falling back to planner. +EXPLAIN SELECT * FROM bitmap_test WHERE a in (1, 47); + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..408.76 rows=3 width=4) + -> Bitmap Table Scan on bitmap_test (cost=0.00..408.76 rows=1 width=4) + Recheck Cond: a = ANY ('{1,47}'::integer[]) + -> Bitmap Index Scan on bitmap_index (cost=0.00..0.00 rows=0 width=0) + Index Cond: a = ANY ('{1,47}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(7 rows) + +EXPLAIN SELECT * FROM bitmap_test WHERE a in ('2', 47); + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..408.76 rows=3 width=4) + -> Bitmap Table Scan on bitmap_test (cost=0.00..408.76 rows=1 width=4) + Recheck Cond: a = ANY ('{2,47}'::integer[]) + -> Bitmap Index Scan on bitmap_index (cost=0.00..0.00 rows=0 width=0) + Index Cond: a = ANY ('{2,47}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(7 rows) + +EXPLAIN SELECT * FROM bitmap_test WHERE a in ('1', '2'); + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..408.76 rows=3 width=4) + -> Bitmap Table Scan on bitmap_test (cost=0.00..408.76 rows=1 width=4) + Recheck Cond: a = ANY ('{1,2}'::integer[]) + -> Bitmap Index Scan on bitmap_index (cost=0.00..0.00 rows=0 width=0) + Index Cond: a = ANY ('{1,2}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(7 rows) + +EXPLAIN SELECT * FROM bitmap_test WHERE a in ('1', '2', 47); + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Table Scan on bitmap_test (cost=0.00..431.00 rows=2 width=4) + Filter: a = ANY ('{1,2,47}'::integer[]) + Settings: optimizer=on; optimizer_metadata_caching=on + Optimizer status: PQO version 1.647 +(5 rows) -reset optimizer_segments; --- Check if ORCA can handle GPDB's error properly -drop table if exists orca_exc_handle; -NOTICE: table "orca_exc_handle" does not exist, skipping -create table orca_exc_handle( - a int primary key, - b char -); -NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "orca_exc_handle_pkey" for table "orca_exc_handle" -insert into orca_exc_handle select i, i from generate_Series(1,4) as i; --- enable the fault injector ---start_ignore -\! gpfaultinjector -f opt_relcache_translator_catalog_access -y error --seg_dbid 1 -20160425:14:59:22:001357 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Starting gpfaultinjector with args: -f opt_relcache_translator_catalog_access -y error --seg_dbid 1 -20160425:14:59:22:001357 gpfaultinjector:krajaraman:krajaraman-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 4.3.99.00 build dev' -20160425:14:59:22:001357 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Obtaining Segment details from master... -20160425:14:59:22:001357 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Injecting fault on 1 segment(s) -20160425:14:59:22:001357 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Injecting fault on krajaraman:/Users/krajaraman/gitdev/gpdb64/gpAux/gpdemo/datadirs/qddir/demoDataDir-1:content=-1:dbid=1:mode=s:status=u -20160425:14:59:22:001357 gpfaultinjector:krajaraman:krajaraman-[INFO]:-DONE ---end_ignore -select a from orca_exc_handle; -ERROR: fault triggered, fault name:'opt_relcache_translator_catalog_access' fault type:'error' (faultinjector.c:671) -ERROR: GPDB exception. Aborting GPORCA plan generation. (CGPOptimizer.cpp:66) --- reset the fault injector ---start_ignore -\! gpfaultinjector -f opt_relcache_translator_catalog_access -y reset --seg_dbid 1 -20160425:14:59:23:001374 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Starting gpfaultinjector with args: -f opt_relcache_translator_catalog_access -y reset --seg_dbid 1 -20160425:14:59:23:001374 gpfaultinjector:krajaraman:krajaraman-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 4.3.99.00 build dev' -20160425:14:59:23:001374 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Obtaining Segment details from master... -20160425:14:59:23:001374 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Injecting fault on 1 segment(s) -20160425:14:59:23:001374 gpfaultinjector:krajaraman:krajaraman-[INFO]:-Injecting fault on krajaraman:/Users/krajaraman/gitdev/gpdb64/gpAux/gpdemo/datadirs/qddir/demoDataDir-1:content=-1:dbid=1:mode=s:status=u -20160425:14:59:23:001374 gpfaultinjector:krajaraman:krajaraman-[INFO]:-DONE ---end_ignore -drop table orca_exc_handle; --- End of Check if ORCA can handle GPDB's error properly -- clean up drop schema orca cascade; NOTICE: drop cascades to table orca.bm_dyn_test_onepart_1_prt_part5 http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/15fa66e7/src/test/regress/sql/gp_optimizer.sql ---------------------------------------------------------------------- diff --git a/src/test/regress/sql/gp_optimizer.sql b/src/test/regress/sql/gp_optimizer.sql index faf93f1..d873e2a 100644 --- a/src/test/regress/sql/gp_optimizer.sql +++ b/src/test/regress/sql/gp_optimizer.sql @@ -841,5 +841,26 @@ select a from orca_exc_handle; drop table orca_exc_handle; -- End of Check if ORCA can handle GPDB's error properly +-- Test B-Tree index scan with in list +CREATE TABLE btree_test as SELECT * FROM generate_series(1,100) as a distributed randomly; +CREATE INDEX btree_test_index ON btree_test(a); +EXPLAIN SELECT * FROM btree_test WHERE a in (select 1); +EXPLAIN SELECT * FROM btree_test WHERE a in (1, 47); +EXPLAIN SELECT * FROM btree_test WHERE a in ('2', 47); +EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2'); +EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2', 47); + +-- Test Bitmap index scan with in list +CREATE TABLE bitmap_test as SELECT * FROM generate_series(1,100) as a distributed randomly; +CREATE INDEX bitmap_index ON bitmap_test USING BITMAP(a); +-- The following query should fall back to planner. +-- Update the result file when this has been fixed in ORCA. +EXPLAIN SELECT * FROM bitmap_test WHERE a in (select 1); +-- The following queries should work without falling back to planner. +EXPLAIN SELECT * FROM bitmap_test WHERE a in (1, 47); +EXPLAIN SELECT * FROM bitmap_test WHERE a in ('2', 47); +EXPLAIN SELECT * FROM bitmap_test WHERE a in ('1', '2'); +EXPLAIN SELECT * FROM bitmap_test WHERE a in ('1', '2', 47); + -- clean up drop schema orca cascade;
