julianhyde commented on code in PR #3984:
URL: https://github.com/apache/calcite/pull/3984#discussion_r1801870649
##########
core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java:
##########
@@ -12379,6 +12379,370 @@ private void checkCustomColumnResolving(String table)
{
+ "from cte\n"
+ "where empno = 1")
.fails(missingFilters("JOB"));
+ fixture.withSql("WITH cte AS (\n"
+ + " select * from emp)\n"
+ + "SELECT *\n"
+ + "from cte\n"
+ + "where empno = 1\n"
+ + "and job = 'doctor'")
+ .ok();
+ fixture.withSql("WITH cte AS (\n"
+ + " select * from emp where empno = 1)\n"
+ + "SELECT *\n"
+ + "from cte\n"
+ + "where job = 'doctor'")
+ .ok();
+ fixture.withSql("WITH cte AS (\n"
+ + " select empno, job from emp)\n"
+ + "SELECT *\n"
+ + "from cte\n"
+ + "where empno = 1\n"
+ + "and job = 'doctor'")
+ .ok();
+
+ // Filters are missing on EMPNO and JOB, but the error message only
+ // complains about JOB because EMPNO is in the SELECT clause, and could
+ // theoretically be filtered by an enclosing query.
+ fixture.withSql("^select empno\n"
+ + "from emp^")
+ .fails(missingFilters("JOB"));
+ fixture.withSql("^select empno,\n"
+ + " sum(sal) over (order by mgr)\n"
+ + "from emp^")
+ .fails(missingFilters("JOB"));
+ }
+
+
+ /**
+ * Tests validation of must-filter columns with the inclusion of bypass
fields.
+ *
+ * <p>If a table that implements
+ * {@link org.apache.calcite.sql.validate.SemanticTable} tags fields as
+ * 'must-filter', and the SQL query does not contain a WHERE or HAVING clause
+ * on each of the tagged columns, the validator should throw an error.
+ * If any bypass field for a table is in a WHERE/HAVING clause for that
SELECT statement,
+ * the must-filter requirements for that table are disabled.
+ */
+ @Test void testMustFilterColumnsWithBypass() {
+ final SqlValidatorFixture fixture = fixture()
+ .withParserConfig(c -> c.withQuoting(Quoting.BACK_TICK))
+ .withOperatorTable(operatorTableFor(SqlLibrary.BIG_QUERY))
+ .withCatalogReader(MustFilterMockCatalogReader::create);
+ // Basic query
+ fixture.withSql("select empno\n"
+ + "from emp\n"
+ + "where job = 'doctor'\n"
+ + "and empno = 1")
+ .ok();
+ fixture.withSql("^select *\n"
+ + "from emp\n"
+ + "where concat(emp.empno, ' ') = 'abc'^")
+ .fails(missingFilters("JOB"));
+
+ fixture.withSql("select *\n"
+ + "from emp\n"
+ + "where concat(emp.ename, ' ') = 'abc'^")
+ .ok();
+ // ENAME is a bypass field
+
+ // SUBQUERIES
+ fixture.withSql("select * from (\n"
+ + " select * from emp where empno = 1)\n"
+ + "where job = 'doctor'")
+ .ok();
+ fixture.withSql("^select * from (\n"
+ + " select ename from emp where empno = 1)^")
+ .fails(missingFilters("JOB"));
+
+ fixture.withSql("select * from (\n"
+ + " select job, ename from emp where empno = 1)"
+ + "where ename = '1'")
+ .ok();
+
+ fixture.withSql("select * from (\n"
+ + " select empno, job from emp)\n"
+ + "where job = 'doctor' and empno = 1")
+ .ok();
+
+ // Deceitful alias #1. Filter on 'j' is a filter on the underlying 'job'.
+ fixture.withSql("select * from (\n"
+ + " select job as j, ename as job\n"
+ + " from emp\n"
+ + " where empno = 1)\n"
+ + "where j = 'doctor'")
+ .ok();
+ // Deceitful alias #2. Filter on 'job' is a filter on the underlying
+ // 'slacker', so the underlying 'job' is missing a filter.
+ fixture.withSql("^select * from (\n"
+ + " select job as j, slacker as job\n"
+ + " from emp\n"
+ + " where empno = 1)\n"
+ + "where job = 'doctor'^")
+ .fails(missingFilters("J"));
+
+ // Deceitful alias #3. Filter on 'job' is a filter on the underlying
+ // 'ename', which is a bypass field thus no exception.
+ fixture.withSql("select * from (\n"
+ + " select job as j, ename as job\n"
+ + " from emp\n"
+ + " where empno = 1)\n"
+ + "where job = 'doctor'^")
+ .ok();
+ fixture.withSql("select * from (\n"
+ + " select * from emp where job = 'doctor')\n"
+ + "where empno = 1")
+ .ok();
+ fixture.withSql("select * from (\n"
+ + " select empno from emp where job = 'doctor')\n"
+ + "where empno = 1")
+ .ok();
+ fixture.withSql("^select * from (\n"
+ + " select * from emp where empno = 1)^")
+ .fails(missingFilters("JOB"));
+ fixture.withSql("select * from (\n"
+ + " select * from emp where ename = 1)^")
+ .ok();
+ // ENAME is a bypass field
+
+ fixture.withSql("^select * from (select * from `SALES`.`EMP`) as a1^ ")
+ .fails(missingFilters("EMPNO", "JOB"));
+
+ fixture.withSql("select * from (select * from `SALES`.`EMP`) as a1 where
ename = '1'^ ")
+ .ok();
+ // JOINs
+ fixture.withSql("^select *\n"
+ + "from emp\n"
+ + "join dept on emp.deptno = dept.deptno^")
+ .fails(missingFilters("EMPNO", "JOB", "NAME"));
+
+ fixture.withSql("^select *\n"
+ + "from emp\n"
+ + "join dept on emp.deptno = dept.deptno where ename = '1'^")
+ .fails(missingFilters("NAME"));
+ // ENAME is a bypass field for EMP table.
+
+ fixture.withSql("^select *\n"
+ + "from emp\n"
+ + "join dept on emp.deptno = dept.deptno\n"
+ + "where emp.empno = 1^")
+ .fails(missingFilters("JOB", "NAME"));
+ fixture.withSql("select *\n"
+ + "from emp\n"
+ + "join dept on emp.deptno = dept.deptno\n"
+ + "where emp.empno = 1\n"
+ + "and emp.job = 'doctor'\n"
+ + "and dept.name = 'ACCOUNTING'")
+ .ok();
+ fixture.withSql("select *\n"
+ + "from emp\n"
+ + "join dept on emp.deptno = dept.deptno\n"
+ + "where empno = 1\n"
+ + "and job = 'doctor'\n"
+ + "and dept.name = 'ACCOUNTING'")
+ .ok();
+
+ // Self-join
+ fixture.withSql("^select *\n"
+ + "from `SALES`.emp a1\n"
+ + "join `SALES`.emp a2 on a1.empno = a2.empno^")
+ .fails(missingFilters("EMPNO", "EMPNO0", "JOB", "JOB0"));
+
+ fixture.withSql("^select *\n"
+ + "from `SALES`.emp a1\n"
+ + "join `SALES`.emp a2 on a1.empno = a2.empno where a1.ename =
'1'^")
+ .fails(missingFilters("EMPNO0", "JOB0"));
+ // Filtering on a bypass field in a1 disables must-filter for a1, but a2
must-filters
+ // are still required.
+
+ fixture.withSql("^select *\n"
+ + "from emp a1\n"
+ + "join emp a2 on a1.empno = a2.empno\n"
+ + "where a2.empno = 1\n"
+ + "and a1.empno = 1\n"
+ + "and a2.job = 'doctor'^")
+ .fails(missingFilters("JOB"));
+ // There are two JOB columns but only one is filtered
Review Comment:
> having the comments below the cases is a little confusing
Agree. Comments after cases is a neat idea (because it puts the comment
close to the error message), but I think it will confuse people, and the
comments will become detached from the test cases as the code is maintained. I
think you should move the comments above the test case (i.e. 7 lines earlier in
this case).
Expand 'There are two JOB columns but only one is filtered' to 'Query is
invalid because there are two JOB columns but only one is filtered'. It is a
little redundant but it ties the 'what' to the 'why'.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]