This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0c1bd9eff37b9a2df2849e6bc8edc8035bb8aefe Author: stiga-huang <[email protected]> AuthorDate: Tue Jan 17 16:27:44 2023 +0800 IMPALA-11845: Fix incorrect check of struct STAR path in resolvePathWithMasking resolvePathWithMasking() is a wrapper on resolvePath() to further resolve nested columns inside the table masking view. When it was added, complex types in the select list hadn't been supported yet. So the table masking view can't expose complex type columns directly in the select list. Any paths in nested types will be further resolved inside the table masking view in resolvePathWithMasking(). Take the following query as an example: select id, nested_struct.* from complextypestbl; If Ranger column-masking/row-filter policies applied on the table, the query is rewritten as select id, nested_struct.* from ( select mask(id) from complextypestbl where row-filtering-condition ) t; Table masking view "t" can't expose the nested column "nested_struct". So we further resolve "nested_struct" inside the inlineView to use the masked table "complextypestbl". The underlying TableRef is expected to be a BaseTableRef. Paths that don't reference nested columns should be resolved and returned directly (just like the original resolvePath() does). E.g. select v.* from masked_view v is rewritten to select v.* from ( select mask(c1), mask(c2), ..., mask(cn) from masked_view where row-filtering-condition ) v; The STAR path "v.*" should be resolved directly. However, it's treated as a nested column unexpectedly. The code then tries to resolve it inside the table "masked_view" and found "masked_view" is not a table so throws the IllegalStateException. These are the current conditions for identifying nested STAR paths: - The destType is STRUCT - And the resolved path is rooted at a valid tuple descriptor They don't really recognize the nested struct columns because STAR paths on table/view also match these conditions. When the STAR path is an expansion on a catalog table/view, the root tuple descriptor is exactly the output tuple of the table/view. The destType is the type of the tuple descriptor which is always a StructType. Note that STAR paths on other nested types, i.e. array/map, are invalid. So the first condition matches for all valid cases. The second condition also matches all valid cases since both the table/view and struct STAR expansion have the path rooted at a valid tuple descriptor. This patch fixes the check for nested struct STAR path by checking the matched types instead. Note that if "v.*" is a table/view expansion, the matched type list is empty. If "v.*" is a struct column expansion, the matched type list contains the STRUCT column type. Tests: - Add missing coverage on STAR paths (v.*) on masked views. Change-Id: I8f1e78e325baafbe23101909d47e82bf140a2d77 Reviewed-on: http://gerrit.cloudera.org:8080/19429 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 8 +- .../main/java/org/apache/impala/analysis/Path.java | 5 +- .../queries/QueryTest/ranger_column_masking.test | 46 +++++++++ .../ranger_column_masking_complex_types.test | 112 ++++++++++++++++++++- 4 files changed, 166 insertions(+), 5 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 44d4dcb4c..c2e9c174a 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -1190,9 +1190,11 @@ public class Analyzer { return resolvedPath; } } else if (pathType == PathType.STAR) { - if (!resolvedPath.destType().isStructType() || !resolvedPath.isRootedAtTuple()) { - return resolvedPath; - } + // For "v.*", return directly if "v" is a table/view. + // If it's a struct column, we need to further resolve it if it's now resolved on a + // table masking view. + if (resolvedPath.getMatchedTypes().isEmpty()) return resolvedPath; + Preconditions.checkState(resolvedPath.destType().isStructType()); } // In this case, resolvedPath is resolved on a nested column. Check if it's resolved // on a table masking view. The root TableRef(table/view) could be at a parent query diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java b/fe/src/main/java/org/apache/impala/analysis/Path.java index 2dfe01032..044a42be4 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Path.java +++ b/fe/src/main/java/org/apache/impala/analysis/Path.java @@ -133,6 +133,7 @@ public class Path { // Registered table alias that this path is rooted at, if any. // Null if the path is rooted at a catalog table/view. + // Note that this is also set for STAR path of "v.*" when "v" is a catalog table/view. private final TupleDescriptor rootDesc_; // Catalog table that this resolved path is rooted at, if any. @@ -144,7 +145,9 @@ public class Path { private final Path rootPath_; // List of matched types and field positions set during resolution. The matched - // types/positions describe the physical path through the schema tree. + // types/positions describe the physical path through the schema tree of a catalog + // table/view. Empty if the path corresponds to a catalog table/view, e.g. when it's a + // TABLE_REF or when it's a STAR on a table/view. private final List<Type> matchedTypes_ = new ArrayList<>(); private final List<Integer> matchedPositions_ = new ArrayList<>(); diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test index d23961010..9f76fc778 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test +++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test @@ -446,6 +446,52 @@ order by id limit 10 STRING,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT ==== ---- QUERY +# Test on star select with table ref +select v.* from functional.alltypes_view v +order by id limit 10 +---- RESULTS +0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1 +100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1 +200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1 +300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1 +400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1 +500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1 +600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1 +700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1 +800,true,8,8,8,80,8.80000019073,80.8,'01/01/09','vvv8ttt',2009-01-01 00:08:00.280000000,2009,1 +900,false,9,9,9,90,9.89999961853,90.9,'01/01/09','vvv9ttt',2009-01-01 00:09:00.360000000,2009,1 +---- TYPES +INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT +==== +---- QUERY +select v.* from functional.alltypes_view v, functional.alltypestiny t where v.id = t.id +---- RESULTS +0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1 +100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1 +200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1 +300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1 +400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1 +500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1 +600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1 +700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1 +---- TYPES +INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT +==== +---- QUERY +select v.id, v.*, t.id from functional.alltypes_view v, functional.alltypestiny t where v.id = t.id +---- RESULTS +0,0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1,0 +100,100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1,100 +200,200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1,200 +300,300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1,300 +400,400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1,400 +500,500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1,500 +600,600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1,600 +700,700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1,700 +---- TYPES +INT,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT +==== +---- QUERY # Test on local view (CTE). Correctly ignore masking on local view names so the result # won't be 100 (affected by policy id => id * 100). use functional; diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test index 316f573b4..563df801b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test +++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test @@ -45,7 +45,7 @@ select id, nested_struct.a from complextypestbl BIGINT,INT ==== ---- QUERY -# Test resolving nested columns in expending star expression. +# Test resolving nested columns in expanding star expression. select id, nested_struct.* from complextypestbl ---- RESULTS 100,1 @@ -60,6 +60,116 @@ select id, nested_struct.* from complextypestbl BIGINT,INT ==== ---- QUERY +# Test resolving nested columns in expanding star expression. +select nested_struct.* from complextypestbl +---- RESULTS +1 +NULL +NULL +NULL +NULL +NULL +7 +-1 +---- TYPES +INT +==== +---- QUERY +# Test resolving nested columns in expanding star expression. +set EXPAND_COMPLEX_TYPES=1; +select nested_struct.* from complextypestbl +---- CATCH +AnalysisException: Struct containing a collection type is not allowed in the select list. +==== +---- QUERY +# Test resolving explicit STAR path on a nested struct column inside array +select id, nested_arr.item.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +100,10,'aaa' +100,-10,'bbb' +100,11,'c' +200,NULL,'NULL' +200,10,'aaa' +200,NULL,'NULL' +200,-10,'bbb' +200,NULL,'NULL' +200,11,'c' +200,NULL,'NULL' +700,NULL,'NULL' +800,-1,'nonnullable' +---- TYPES +BIGINT,INT,STRING +==== +---- QUERY +# Test resolving explicit STAR path on a nested struct column inside array +select nested_arr.item.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +10,'aaa' +-10,'bbb' +11,'c' +NULL,'NULL' +10,'aaa' +NULL,'NULL' +-10,'bbb' +NULL,'NULL' +11,'c' +NULL,'NULL' +NULL,'NULL' +-1,'nonnullable' +---- TYPES +INT,STRING +==== +---- QUERY +# Test resolving implicit STAR path on a nested struct column inside array +select id, nested_arr.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +100,10,'aaa' +100,-10,'bbb' +100,11,'c' +200,NULL,'NULL' +200,10,'aaa' +200,NULL,'NULL' +200,-10,'bbb' +200,NULL,'NULL' +200,11,'c' +200,NULL,'NULL' +700,NULL,'NULL' +800,-1,'nonnullable' +---- TYPES +BIGINT,INT,STRING +==== +---- QUERY +# Test resolving explicit STAR path on a nested struct column inside array +select nested_arr.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +10,'aaa' +-10,'bbb' +11,'c' +NULL,'NULL' +10,'aaa' +NULL,'NULL' +-10,'bbb' +NULL,'NULL' +11,'c' +NULL,'NULL' +NULL,'NULL' +-1,'nonnullable' +---- TYPES +INT,STRING +==== +---- QUERY # Test resolving nested column in function. select count(id), count(nested_struct.a) from complextypestbl ---- RESULTS
