[jira] [Updated] (PHOENIX-5265) [UMBRELLA] Phoenix Test should use object based Plan for result comparison instead of using hard-corded comparison

Viraj Jasani (Jira) Sun, 06 Dec 2020 23:25:07 -0800


     [ 
https://issues.apache.org/jira/browse/PHOENIX-5265?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Viraj Jasani updated PHOENIX-5265:
----------------------------------
    Summary: [UMBRELLA] Phoenix Test should use object based Plan for result 
comparison instead of using hard-corded comparison  (was: [UMBRELLA] Phoenix 
Test should use gold files for result comparison instead of using hard-corded 
comparison.)

> [UMBRELLA] Phoenix Test should use object based Plan for result comparison 
> instead of using hard-corded comparison
> ------------------------------------------------------------------------------------------------------------------
>
>                 Key: PHOENIX-5265
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-5265
>             Project: Phoenix
>          Issue Type: Improvement
>         Environment: {code:java}
> // code placeholder
> @Test
> public void testWithMultiCF() throws Exception {
>     int nRows = 20;
>     Connection conn = getConnection(0);
>     PreparedStatement stmt;
>     conn.createStatement().execute(
>             "CREATE TABLE " + fullTableName
>                     + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v 
> INTEGER NULL, d.v INTEGER NULL) "
>                     + tableDDLOptions );
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " 
> VALUES(?,?, ?, ?, ?)");
>     byte[] val = new byte[250];
>     for (int i = 0; i < nRows; i++) {
>         stmt.setString(1, Character.toString((char)('a' + i)) + 
> Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.setInt(4, i);
>         stmt.setInt(5, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, 
> d.v) VALUES(?,?,?)");
>     for (int i = 0; i < 5; i++) {
>         stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + 
> Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     ResultSet rs;
>     String actualExplainPlan;
>     collectStatistics(conn, fullTableName);
>     List<KeyRange> keyRanges = getAllSplits(conn, fullTableName);
>     assertEquals(26, keyRanges.size());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + 
> fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals(
>             "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530" 
> : "14190" ) : 
> (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? 
> "25320" : "12420") +
>                     " BYTES PARALLEL 1-WAY FULL SCAN OVER " + 
> physicalTableName,
>             actualExplainPlan);
>     ConnectionQueryServices services = 
> conn.unwrap(PhoenixConnection.class).getQueryServices();
>     List<HRegionLocation> regions = 
> services.getAllTableRegions(Bytes.toBytes(physicalTableName));
>     assertEquals(1, regions.size());
>     collectStatistics(conn, fullTableName, Long.toString(1000));
>     keyRanges = getAllSplits(conn, fullTableName);
>     boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded;
>     boolean hasShadowCells = 
> TransactionFactory.Provider.OMID.name().equals(transactionProvider);
>     assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24 
> : 13, keyRanges.size());
>     rs = conn
>             .createStatement()
>             .executeQuery(
>                     "SELECT 
> COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from 
> \"SYSTEM\".STATS where PHYSICAL_NAME = '"
>                             + physicalTableName + "' GROUP BY COLUMN_FAMILY 
> ORDER BY COLUMN_FAMILY");
>     assertTrue(rs.next());
>     assertEquals("A", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells 
> ? 25320 : 12420, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23 
> : 12, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("B", rs.getString(1));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ? 
> 11260 : 5540, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 : 
> 5, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("C", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 
> 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("D", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 
> 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertFalse(rs.next());
>     // Disable stats
>     conn.createStatement().execute("ALTER TABLE " + fullTableName + 
>             " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0");
>     collectStatistics(conn, fullTableName);
>     // Assert that there are no more guideposts
>     rs = conn.createStatement().executeQuery("SELECT count(1) FROM " + 
> PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + 
>             " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + 
> physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS 
> NOT NULL");
>     assertTrue(rs.next());
>     assertEquals(0, rs.getLong(1));
>     assertFalse(rs.next());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + 
> fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + 
> physicalTableName, actualExplainPlan);
> }
> {code}
>            Reporter: Bin Shi
>            Assignee: Viraj Jasani
>            Priority: Major
>              Labels: hardening, phoenix-hardening
>
> Currently, in Phoenix Test, the comparison of the returned result and the 
> expected result is always hard-coded, which is especially widely used by E2E 
> and Integration test for comparing the query result, including the result of 
> EXPLAIN query plan. This has significantly impaired the productivity and 
> efficiency of workin on Phoenix.
> The right approach is, each test case should write the generated result to a 
> file under a new directory, then compares this file with a gold file under 
> "gold" directory. After we make some code changes and make sure the change is 
> correct by manually verifying several specific test cases, we can safely 
> replace the whole "gold" directory with the directory which contains all new 
> generated test files during test running. In this way, we can easily rebase 
> all the tests.
> For example, in BaseStatsCollectorIT.java, we verify the estimated row size 
> in the returned result of EXPLAIN query plan, the row size is decided by many 
> factors, like the column is encoded or not, it is mutable or not, it uses 
> transaction provider or not, it uses TEPHRA or OMID as transaction provider, 
> etc. The code snippet "testWithMultiCF" shows a typical test case. The 
> comparisons of the result result and the expected result are hard-coded in 
> those asserts. Now imagine, if we change the way collecting stats or we 
> change column encoding scheme or we change the cell storage format for TEPHRA 
> or OMID, which is very likely to happen, then we need to manually change all 
> those hard-coded comparison everywhere, and it isn't trivial to re-calculate 
> all expected row sizes with the different conditions . Today you might need 
> one or two weeks to rebase all the tests, which is a huge waste. We should 
> use "gold" files here, so that we can rebase the test very easily.
>  BTW, the new generated test result files and gold files should be in XML or 
> JSON. The result of "EXPLAN" query should be in XML or JSON too, because the 
> file format matches the structure of a query plan.
> {code:java}
> // code placeholder
> @Test
> public void testWithMultiCF() throws Exception {
>     int nRows = 20;
>     Connection conn = getConnection(0);
>     PreparedStatement stmt;
>     conn.createStatement().execute(
>             "CREATE TABLE " + fullTableName
>                     + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v 
> INTEGER NULL, d.v INTEGER NULL) "
>                     + tableDDLOptions );
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " 
> VALUES(?,?, ?, ?, ?)");
>     byte[] val = new byte[250];
>     for (int i = 0; i < nRows; i++) {
>         stmt.setString(1, Character.toString((char)('a' + i)) + 
> Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.setInt(4, i);
>         stmt.setInt(5, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, 
> d.v) VALUES(?,?,?)");
>     for (int i = 0; i < 5; i++) {
>         stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + 
> Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     ResultSet rs;
>     String actualExplainPlan;
>     collectStatistics(conn, fullTableName);
>     List<KeyRange> keyRanges = getAllSplits(conn, fullTableName);
>     assertEquals(26, keyRanges.size());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + 
> fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals(
>             "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530" 
> : "14190" ) : 
> (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? 
> "25320" : "12420") +
>                     " BYTES PARALLEL 1-WAY FULL SCAN OVER " + 
> physicalTableName,
>             actualExplainPlan);
>     ConnectionQueryServices services = 
> conn.unwrap(PhoenixConnection.class).getQueryServices();
>     List<HRegionLocation> regions = 
> services.getAllTableRegions(Bytes.toBytes(physicalTableName));
>     assertEquals(1, regions.size());
>     collectStatistics(conn, fullTableName, Long.toString(1000));
>     keyRanges = getAllSplits(conn, fullTableName);
>     boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded;
>     boolean hasShadowCells = 
> TransactionFactory.Provider.OMID.name().equals(transactionProvider);
>     assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24 
> : 13, keyRanges.size());
>     rs = conn
>             .createStatement()
>             .executeQuery(
>                     "SELECT 
> COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from 
> \"SYSTEM\".STATS where PHYSICAL_NAME = '"
>                             + physicalTableName + "' GROUP BY COLUMN_FAMILY 
> ORDER BY COLUMN_FAMILY");
>     assertTrue(rs.next());
>     assertEquals("A", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells 
> ? 25320 : 12420, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23 
> : 12, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("B", rs.getString(1));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ? 
> 11260 : 5540, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 : 
> 5, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("C", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 
> 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("D", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 
> 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertFalse(rs.next());
>     // Disable stats
>     conn.createStatement().execute("ALTER TABLE " + fullTableName + 
>             " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0");
>     collectStatistics(conn, fullTableName);
>     // Assert that there are no more guideposts
>     rs = conn.createStatement().executeQuery("SELECT count(1) FROM " + 
> PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + 
>             " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + 
> physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS 
> NOT NULL");
>     assertTrue(rs.next());
>     assertEquals(0, rs.getLong(1));
>     assertFalse(rs.next());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + 
> fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + 
> physicalTableName, actualExplainPlan);
> }
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Updated] (PHOENIX-5265) [UMBRELLA] Phoenix Test should use object based Plan for result comparison instead of using hard-corded comparison

Reply via email to