[
https://issues.apache.org/jira/browse/PHOENIX-5265?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Viraj Jasani updated PHOENIX-5265:
----------------------------------
Summary: [UMBRELLA] Phoenix Test should use object based Plan for result
comparison instead of using hard-corded comparison (was: [UMBRELLA] Phoenix
Test should use gold files for result comparison instead of using hard-corded
comparison.)
> [UMBRELLA] Phoenix Test should use object based Plan for result comparison
> instead of using hard-corded comparison
> ------------------------------------------------------------------------------------------------------------------
>
> Key: PHOENIX-5265
> URL: https://issues.apache.org/jira/browse/PHOENIX-5265
> Project: Phoenix
> Issue Type: Improvement
> Environment: {code:java}
> // code placeholder
> @Test
> public void testWithMultiCF() throws Exception {
> int nRows = 20;
> Connection conn = getConnection(0);
> PreparedStatement stmt;
> conn.createStatement().execute(
> "CREATE TABLE " + fullTableName
> + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v
> INTEGER NULL, d.v INTEGER NULL) "
> + tableDDLOptions );
> stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "
> VALUES(?,?, ?, ?, ?)");
> byte[] val = new byte[250];
> for (int i = 0; i < nRows; i++) {
> stmt.setString(1, Character.toString((char)('a' + i)) +
> Bytes.toString(val));
> stmt.setInt(2, i);
> stmt.setInt(3, i);
> stmt.setInt(4, i);
> stmt.setInt(5, i);
> stmt.executeUpdate();
> }
> conn.commit();
> stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v,
> d.v) VALUES(?,?,?)");
> for (int i = 0; i < 5; i++) {
> stmt.setString(1, Character.toString((char)('a' + 'z' + i)) +
> Bytes.toString(val));
> stmt.setInt(2, i);
> stmt.setInt(3, i);
> stmt.executeUpdate();
> }
> conn.commit();
> ResultSet rs;
> String actualExplainPlan;
> collectStatistics(conn, fullTableName);
> List<KeyRange> keyRanges = getAllSplits(conn, fullTableName);
> assertEquals(26, keyRanges.size());
> rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " +
> fullTableName);
> actualExplainPlan = QueryUtil.getExplainPlan(rs);
> assertEquals(
> "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530"
> : "14190" ) :
> (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ?
> "25320" : "12420") +
> " BYTES PARALLEL 1-WAY FULL SCAN OVER " +
> physicalTableName,
> actualExplainPlan);
> ConnectionQueryServices services =
> conn.unwrap(PhoenixConnection.class).getQueryServices();
> List<HRegionLocation> regions =
> services.getAllTableRegions(Bytes.toBytes(physicalTableName));
> assertEquals(1, regions.size());
> collectStatistics(conn, fullTableName, Long.toString(1000));
> keyRanges = getAllSplits(conn, fullTableName);
> boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded;
> boolean hasShadowCells =
> TransactionFactory.Provider.OMID.name().equals(transactionProvider);
> assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24
> : 13, keyRanges.size());
> rs = conn
> .createStatement()
> .executeQuery(
> "SELECT
> COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from
> \"SYSTEM\".STATS where PHYSICAL_NAME = '"
> + physicalTableName + "' GROUP BY COLUMN_FAMILY
> ORDER BY COLUMN_FAMILY");
> assertTrue(rs.next());
> assertEquals("A", rs.getString(1));
> assertEquals(25, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells
> ? 25320 : 12420, rs.getInt(3));
> assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23
> : 12, rs.getInt(4));
> assertTrue(rs.next());
> assertEquals("B", rs.getString(1));
> assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ?
> 11260 : 5540, rs.getInt(3));
> assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 :
> 5, rs.getInt(4));
> assertTrue(rs.next());
> assertEquals("C", rs.getString(1));
> assertEquals(25, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ?
> 14085 : 6930, rs.getInt(3));
> assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
> assertTrue(rs.next());
> assertEquals("D", rs.getString(1));
> assertEquals(25, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ?
> 14085 : 6930, rs.getInt(3));
> assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
> assertFalse(rs.next());
> // Disable stats
> conn.createStatement().execute("ALTER TABLE " + fullTableName +
> " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0");
> collectStatistics(conn, fullTableName);
> // Assert that there are no more guideposts
> rs = conn.createStatement().executeQuery("SELECT count(1) FROM " +
> PhoenixDatabaseMetaData.SYSTEM_STATS_NAME +
> " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" +
> physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS
> NOT NULL");
> assertTrue(rs.next());
> assertEquals(0, rs.getLong(1));
> assertFalse(rs.next());
> rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " +
> fullTableName);
> actualExplainPlan = QueryUtil.getExplainPlan(rs);
> assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " +
> physicalTableName, actualExplainPlan);
> }
> {code}
> Reporter: Bin Shi
> Assignee: Viraj Jasani
> Priority: Major
> Labels: hardening, phoenix-hardening
>
> Currently, in Phoenix Test, the comparison of the returned result and the
> expected result is always hard-coded, which is especially widely used by E2E
> and Integration test for comparing the query result, including the result of
> EXPLAIN query plan. This has significantly impaired the productivity and
> efficiency of workin on Phoenix.
> The right approach is, each test case should write the generated result to a
> file under a new directory, then compares this file with a gold file under
> "gold" directory. After we make some code changes and make sure the change is
> correct by manually verifying several specific test cases, we can safely
> replace the whole "gold" directory with the directory which contains all new
> generated test files during test running. In this way, we can easily rebase
> all the tests.
> For example, in BaseStatsCollectorIT.java, we verify the estimated row size
> in the returned result of EXPLAIN query plan, the row size is decided by many
> factors, like the column is encoded or not, it is mutable or not, it uses
> transaction provider or not, it uses TEPHRA or OMID as transaction provider,
> etc. The code snippet "testWithMultiCF" shows a typical test case. The
> comparisons of the result result and the expected result are hard-coded in
> those asserts. Now imagine, if we change the way collecting stats or we
> change column encoding scheme or we change the cell storage format for TEPHRA
> or OMID, which is very likely to happen, then we need to manually change all
> those hard-coded comparison everywhere, and it isn't trivial to re-calculate
> all expected row sizes with the different conditions . Today you might need
> one or two weeks to rebase all the tests, which is a huge waste. We should
> use "gold" files here, so that we can rebase the test very easily.
> BTW, the new generated test result files and gold files should be in XML or
> JSON. The result of "EXPLAN" query should be in XML or JSON too, because the
> file format matches the structure of a query plan.
> {code:java}
> // code placeholder
> @Test
> public void testWithMultiCF() throws Exception {
> int nRows = 20;
> Connection conn = getConnection(0);
> PreparedStatement stmt;
> conn.createStatement().execute(
> "CREATE TABLE " + fullTableName
> + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v
> INTEGER NULL, d.v INTEGER NULL) "
> + tableDDLOptions );
> stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "
> VALUES(?,?, ?, ?, ?)");
> byte[] val = new byte[250];
> for (int i = 0; i < nRows; i++) {
> stmt.setString(1, Character.toString((char)('a' + i)) +
> Bytes.toString(val));
> stmt.setInt(2, i);
> stmt.setInt(3, i);
> stmt.setInt(4, i);
> stmt.setInt(5, i);
> stmt.executeUpdate();
> }
> conn.commit();
> stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v,
> d.v) VALUES(?,?,?)");
> for (int i = 0; i < 5; i++) {
> stmt.setString(1, Character.toString((char)('a' + 'z' + i)) +
> Bytes.toString(val));
> stmt.setInt(2, i);
> stmt.setInt(3, i);
> stmt.executeUpdate();
> }
> conn.commit();
> ResultSet rs;
> String actualExplainPlan;
> collectStatistics(conn, fullTableName);
> List<KeyRange> keyRanges = getAllSplits(conn, fullTableName);
> assertEquals(26, keyRanges.size());
> rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " +
> fullTableName);
> actualExplainPlan = QueryUtil.getExplainPlan(rs);
> assertEquals(
> "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530"
> : "14190" ) :
> (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ?
> "25320" : "12420") +
> " BYTES PARALLEL 1-WAY FULL SCAN OVER " +
> physicalTableName,
> actualExplainPlan);
> ConnectionQueryServices services =
> conn.unwrap(PhoenixConnection.class).getQueryServices();
> List<HRegionLocation> regions =
> services.getAllTableRegions(Bytes.toBytes(physicalTableName));
> assertEquals(1, regions.size());
> collectStatistics(conn, fullTableName, Long.toString(1000));
> keyRanges = getAllSplits(conn, fullTableName);
> boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded;
> boolean hasShadowCells =
> TransactionFactory.Provider.OMID.name().equals(transactionProvider);
> assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24
> : 13, keyRanges.size());
> rs = conn
> .createStatement()
> .executeQuery(
> "SELECT
> COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from
> \"SYSTEM\".STATS where PHYSICAL_NAME = '"
> + physicalTableName + "' GROUP BY COLUMN_FAMILY
> ORDER BY COLUMN_FAMILY");
> assertTrue(rs.next());
> assertEquals("A", rs.getString(1));
> assertEquals(25, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells
> ? 25320 : 12420, rs.getInt(3));
> assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23
> : 12, rs.getInt(4));
> assertTrue(rs.next());
> assertEquals("B", rs.getString(1));
> assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ?
> 11260 : 5540, rs.getInt(3));
> assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 :
> 5, rs.getInt(4));
> assertTrue(rs.next());
> assertEquals("C", rs.getString(1));
> assertEquals(25, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ?
> 14085 : 6930, rs.getInt(3));
> assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
> assertTrue(rs.next());
> assertEquals("D", rs.getString(1));
> assertEquals(25, rs.getInt(2));
> assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ?
> 14085 : 6930, rs.getInt(3));
> assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
> assertFalse(rs.next());
> // Disable stats
> conn.createStatement().execute("ALTER TABLE " + fullTableName +
> " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0");
> collectStatistics(conn, fullTableName);
> // Assert that there are no more guideposts
> rs = conn.createStatement().executeQuery("SELECT count(1) FROM " +
> PhoenixDatabaseMetaData.SYSTEM_STATS_NAME +
> " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" +
> physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS
> NOT NULL");
> assertTrue(rs.next());
> assertEquals(0, rs.getLong(1));
> assertFalse(rs.next());
> rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " +
> fullTableName);
> actualExplainPlan = QueryUtil.getExplainPlan(rs);
> assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " +
> physicalTableName, actualExplainPlan);
> }
> {code}
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)