Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/984#discussion_r145298677 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/agg/TestHashAggrSpill.java --- @@ -43,146 +44,122 @@ import static org.junit.Assert.assertTrue; /** - * Test spilling for the Hash Aggr operator (using the mock reader) + * Test spilling for the Hash Aggr operator (using the mock reader) */ @Category({SlowTest.class, OperatorTest.class}) public class TestHashAggrSpill { - private void runAndDump(ClientFixture client, String sql, long expectedRows, long spillCycle, long fromSpilledPartitions, long toSpilledPartitions) throws Exception { - String plan = client.queryBuilder().sql(sql).explainJson(); - - QueryBuilder.QuerySummary summary = client.queryBuilder().sql(sql).run(); - if ( expectedRows > 0 ) { - assertEquals(expectedRows, summary.recordCount()); - } - // System.out.println(String.format("======== \n Results: %,d records, %d batches, %,d ms\n ========", summary.recordCount(), summary.batchCount(), summary.runTimeMs() ) ); - - //System.out.println("Query ID: " + summary.queryIdString()); - ProfileParser profile = client.parseProfile(summary.queryIdString()); - //profile.print(); - List<ProfileParser.OperatorProfile> ops = profile.getOpsOfType(UserBitShared.CoreOperatorType.HASH_AGGREGATE_VALUE); - - assertTrue( ! ops.isEmpty() ); - // check for the first op only - ProfileParser.OperatorProfile hag0 = ops.get(0); - long opCycle = hag0.getMetric(HashAggTemplate.Metric.SPILL_CYCLE.ordinal()); - assertEquals(spillCycle, opCycle); - long op_spilled_partitions = hag0.getMetric(HashAggTemplate.Metric.SPILLED_PARTITIONS.ordinal()); - assertTrue( op_spilled_partitions >= fromSpilledPartitions && op_spilled_partitions <= toSpilledPartitions ); - /* assertEquals(3, ops.size()); - for ( int i = 0; i < ops.size(); i++ ) { - ProfileParser.OperatorProfile hag = ops.get(i); - long cycle = hag.getMetric(HashAggTemplate.Metric.SPILL_CYCLE.ordinal()); - long num_partitions = hag.getMetric(HashAggTemplate.Metric.NUM_PARTITIONS.ordinal()); - long spilled_partitions = hag.getMetric(HashAggTemplate.Metric.SPILLED_PARTITIONS.ordinal()); - long mb_spilled = hag.getMetric(HashAggTemplate.Metric.SPILL_MB.ordinal()); - System.out.println(String.format("(%d) Spill cycle: %d, num partitions: %d, spilled partitions: %d, MB spilled: %d", i,cycle, num_partitions, spilled_partitions, - mb_spilled)); - } */ - } + @Rule + public final DirTestWatcher dirTestWatcher = new DirTestWatcher(); - /** - * A template for Hash Aggr spilling tests - * - * @throws Exception - */ - private void testSpill(long maxMem, long numPartitions, long minBatches, int maxParallel, boolean fallback ,boolean predict, - String sql, long expectedRows, int cycle, int fromPart, int toPart) throws Exception { - LogFixture.LogFixtureBuilder logBuilder = LogFixture.builder() - .toConsole() - //.logger("org.apache.drill.exec.physical.impl.aggregate", Level.INFO) - .logger("org.apache.drill", Level.WARN) - ; - - FixtureBuilder builder = ClusterFixture.builder() - .sessionOption(ExecConstants.HASHAGG_MAX_MEMORY_KEY,maxMem) - .sessionOption(ExecConstants.HASHAGG_NUM_PARTITIONS_KEY,numPartitions) - .sessionOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_KEY,minBatches) - .configProperty(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE, false) - .sessionOption(PlannerSettings.FORCE_2PHASE_AGGR_KEY,true) - // .sessionOption(PlannerSettings.EXCHANGE.getOptionName(), true) - .sessionOption(ExecConstants.HASHAGG_FALLBACK_ENABLED_KEY, fallback) - .sessionOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_KEY,predict) - - .maxParallelization(maxParallel) - .saveProfiles() - //.keepLocalFiles() - ; - String sqlStr = sql != null ? sql : // if null then use this default query - "SELECT empid_s17, dept_i, branch_i, AVG(salary_i) FROM `mock`.`employee_1200K` GROUP BY empid_s17, dept_i, branch_i"; - - try (LogFixture logs = logBuilder.build(); - ClusterFixture cluster = builder.build(); - ClientFixture client = cluster.clientFixture()) { - runAndDump(client, sqlStr, expectedRows, cycle, fromPart,toPart); - } - } - /** - * Test "normal" spilling: Only 2 (or 3) partitions (out of 4) would require spilling - * ("normal spill" means spill-cycle = 1 ) - * - * @throws Exception - */ - @Test - public void testSimpleHashAggrSpill() throws Exception { - testSpill(68_000_000, 16, 2, 2, false, true, null, - 1_200_000, 1,2, 3 - ); - } - /** - * Test with "needed memory" prediction turned off - * (i.e., do exercise code paths that catch OOMs from the Hash Table and recover) - * - * @throws Exception - */ - @Test - public void testNoPredictHashAggrSpill() throws Exception { - testSpill(58_000_000, 16, 2, 2, false,false /* no prediction */, - null,1_200_000, 1,1, 1 - ); + private void runAndDump(ClientFixture client, String sql, long expectedRows, long spillCycle, long fromSpilledPartitions, long toSpilledPartitions) throws Exception { --- End diff -- Boaz inherited this from one of my tests; but I use the "run and dump" pattern only in ad-hoc tests when I need lots of detail for debugging. Would be nice to turn off the "dump" part when running the tests as part of the suite. Also, when running in a suite, Boaz is not there to verify results. This test needs a way to ensure that the results are correct.
---