This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-4.1.2 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 555370e0caf89cf9c78fb962b0532161603a55c9 Author: stiga-huang <[email protected]> AuthorDate: Tue Jan 17 16:27:44 2023 +0800 IMPALA-11845: Fix incorrect check of struct STAR path in resolvePathWithMasking resolvePathWithMasking() is a wrapper on resolvePath() to further resolve nested columns inside the table masking view. When it was added, complex types in the select list hadn't been supported yet. So the table masking view can't expose complex type columns directly in the select list. Any paths in nested types will be further resolved inside the table masking view in resolvePathWithMasking(). Take the following query as an example: select id, nested_struct.* from complextypestbl; If Ranger column-masking/row-filter policies applied on the table, the query is rewritten as select id, nested_struct.* from ( select mask(id) from complextypestbl where row-filtering-condition ) t; Table masking view "t" can't expose the nested column "nested_struct". So we further resolve "nested_struct" inside the inlineView to use the masked table "complextypestbl". The underlying TableRef is expected to be a BaseTableRef. Paths that don't reference nested columns should be resolved and returned directly (just like the original resolvePath() does). E.g. select v.* from masked_view v is rewritten to select v.* from ( select mask(c1), mask(c2), ..., mask(cn) from masked_view where row-filtering-condition ) v; The STAR path "v.*" should be resolved directly. However, it's treated as a nested column unexpectedly. The code then tries to resolve it inside the table "masked_view" and found "masked_view" is not a table so throws the IllegalStateException. These are the current conditions for identifying nested STAR paths: - The destType is STRUCT - And the resolved path is rooted at a valid tuple descriptor They don't really recognize the nested struct columns because STAR paths on table/view also match these conditions. When the STAR path is an expansion on a catalog table/view, the root tuple descriptor is exactly the output tuple of the table/view. The destType is the type of the tuple descriptor which is always a StructType. Note that STAR paths on other nested types, i.e. array/map, are invalid. So the first condition matches for all valid cases. The second condition also matches all valid cases since both the table/view and struct STAR expansion have the path rooted at a valid tuple descriptor. This patch fixes the check for nested struct STAR path by checking the matched types instead. Note that if "v.*" is a table/view expansion, the matched type list is empty. If "v.*" is a struct column expansion, the matched type list contains the STRUCT column type. Tests: - Add missing coverage on STAR paths (v.*) on masked views. Backport Note for 4.1.2: - Removed the test on EXPAND_COMPLEX_TYPES which is not supported. Change-Id: I8f1e78e325baafbe23101909d47e82bf140a2d77 Reviewed-on: http://gerrit.cloudera.org:8080/19429 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 8 +- .../main/java/org/apache/impala/analysis/Path.java | 5 +- .../queries/QueryTest/ranger_column_masking.test | 46 +++++++++ .../ranger_column_masking_complex_types.test | 105 ++++++++++++++++++++- 4 files changed, 159 insertions(+), 5 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index cfb5f2a05..a76c2e8be 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -1165,9 +1165,11 @@ public class Analyzer { return resolvedPath; } } else if (pathType == PathType.STAR) { - if (!resolvedPath.destType().isStructType() || !resolvedPath.isRootedAtTuple()) { - return resolvedPath; - } + // For "v.*", return directly if "v" is a table/view. + // If it's a struct column, we need to further resolve it if it's now resolved on a + // table masking view. + if (resolvedPath.getMatchedTypes().isEmpty()) return resolvedPath; + Preconditions.checkState(resolvedPath.destType().isStructType()); } // In this case, resolvedPath is resolved on a nested column. Check if it's resolved // on a table masking view. The root TableRef(table/view) could be at a parent query diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java b/fe/src/main/java/org/apache/impala/analysis/Path.java index 6e3fd2d3b..53a2bc939 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Path.java +++ b/fe/src/main/java/org/apache/impala/analysis/Path.java @@ -131,6 +131,7 @@ public class Path { // Registered table alias that this path is rooted at, if any. // Null if the path is rooted at a catalog table/view. + // Note that this is also set for STAR path of "v.*" when "v" is a catalog table/view. private final TupleDescriptor rootDesc_; // Catalog table that this resolved path is rooted at, if any. @@ -142,7 +143,9 @@ public class Path { private final Path rootPath_; // List of matched types and field positions set during resolution. The matched - // types/positions describe the physical path through the schema tree. + // types/positions describe the physical path through the schema tree of a catalog + // table/view. Empty if the path corresponds to a catalog table/view, e.g. when it's a + // TABLE_REF or when it's a STAR on a table/view. private final List<Type> matchedTypes_ = new ArrayList<>(); private final List<Integer> matchedPositions_ = new ArrayList<>(); diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test index 1f0dac113..0c2671892 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test +++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test @@ -446,6 +446,52 @@ order by id limit 10 STRING,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT ==== ---- QUERY +# Test on star select with table ref +select v.* from functional.alltypes_view v +order by id limit 10 +---- RESULTS +0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1 +100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1 +200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1 +300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1 +400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1 +500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1 +600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1 +700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1 +800,true,8,8,8,80,8.80000019073,80.8,'01/01/09','vvv8ttt',2009-01-01 00:08:00.280000000,2009,1 +900,false,9,9,9,90,9.89999961853,90.9,'01/01/09','vvv9ttt',2009-01-01 00:09:00.360000000,2009,1 +---- TYPES +INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT +==== +---- QUERY +select v.* from functional.alltypes_view v, functional.alltypestiny t where v.id = t.id +---- RESULTS +0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1 +100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1 +200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1 +300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1 +400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1 +500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1 +600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1 +700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1 +---- TYPES +INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT +==== +---- QUERY +select v.id, v.*, t.id from functional.alltypes_view v, functional.alltypestiny t where v.id = t.id +---- RESULTS +0,0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1,0 +100,100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1,100 +200,200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1,200 +300,300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1,300 +400,400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1,400 +500,500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1,500 +600,600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1,600 +700,700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1,700 +---- TYPES +INT,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT +==== +---- QUERY # Test on local view (CTE). Correctly ignore masking on local view names so the result # won't be 100 (affected by policy id => id * 100). use functional; diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test index 3f9fd0606..e088aeb12 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test +++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test @@ -45,7 +45,7 @@ select id, nested_struct.a from complextypestbl BIGINT,INT ==== ---- QUERY -# Test resolving nested columns in expending star expression. +# Test resolving nested columns in expanding star expression. select id, nested_struct.* from complextypestbl ---- RESULTS 100,1 @@ -60,6 +60,109 @@ select id, nested_struct.* from complextypestbl BIGINT,INT ==== ---- QUERY +# Test resolving nested columns in expanding star expression. +select nested_struct.* from complextypestbl +---- RESULTS +1 +NULL +NULL +NULL +NULL +NULL +7 +-1 +---- TYPES +INT +==== +---- QUERY +# Test resolving explicit STAR path on a nested struct column inside array +select id, nested_arr.item.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +100,10,'aaa' +100,-10,'bbb' +100,11,'c' +200,NULL,'NULL' +200,10,'aaa' +200,NULL,'NULL' +200,-10,'bbb' +200,NULL,'NULL' +200,11,'c' +200,NULL,'NULL' +700,NULL,'NULL' +800,-1,'nonnullable' +---- TYPES +BIGINT,INT,STRING +==== +---- QUERY +# Test resolving explicit STAR path on a nested struct column inside array +select nested_arr.item.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +10,'aaa' +-10,'bbb' +11,'c' +NULL,'NULL' +10,'aaa' +NULL,'NULL' +-10,'bbb' +NULL,'NULL' +11,'c' +NULL,'NULL' +NULL,'NULL' +-1,'nonnullable' +---- TYPES +INT,STRING +==== +---- QUERY +# Test resolving implicit STAR path on a nested struct column inside array +select id, nested_arr.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +100,10,'aaa' +100,-10,'bbb' +100,11,'c' +200,NULL,'NULL' +200,10,'aaa' +200,NULL,'NULL' +200,-10,'bbb' +200,NULL,'NULL' +200,11,'c' +200,NULL,'NULL' +700,NULL,'NULL' +800,-1,'nonnullable' +---- TYPES +BIGINT,INT,STRING +==== +---- QUERY +# Test resolving explicit STAR path on a nested struct column inside array +select nested_arr.* +from functional_parquet.complextypestbl t, + t.nested_struct.c.d arr, + arr.item nested_arr; +---- RESULTS +10,'aaa' +-10,'bbb' +11,'c' +NULL,'NULL' +10,'aaa' +NULL,'NULL' +-10,'bbb' +NULL,'NULL' +11,'c' +NULL,'NULL' +NULL,'NULL' +-1,'nonnullable' +---- TYPES +INT,STRING +==== +---- QUERY # Test resolving nested column in function. select count(id), count(nested_struct.a) from complextypestbl ---- RESULTS
