Github user paul-rogers commented on a diff in the pull request:
https://github.com/apache/drill/pull/984#discussion_r145298677
--- Diff:
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/agg/TestHashAggrSpill.java
---
@@ -43,146 +44,122 @@
import static org.junit.Assert.assertTrue;
/**
- * Test spilling for the Hash Aggr operator (using the mock reader)
+ * Test spilling for the Hash Aggr operator (using the mock reader)
*/
@Category({SlowTest.class, OperatorTest.class})
public class TestHashAggrSpill {
- private void runAndDump(ClientFixture client, String sql, long
expectedRows, long spillCycle, long fromSpilledPartitions, long
toSpilledPartitions) throws Exception {
- String plan = client.queryBuilder().sql(sql).explainJson();
-
- QueryBuilder.QuerySummary summary =
client.queryBuilder().sql(sql).run();
- if ( expectedRows > 0 ) {
- assertEquals(expectedRows, summary.recordCount());
- }
- // System.out.println(String.format("======== \n Results: %,d
records, %d batches, %,d ms\n ========", summary.recordCount(),
summary.batchCount(), summary.runTimeMs() ) );
-
- //System.out.println("Query ID: " + summary.queryIdString());
- ProfileParser profile =
client.parseProfile(summary.queryIdString());
- //profile.print();
- List<ProfileParser.OperatorProfile> ops =
profile.getOpsOfType(UserBitShared.CoreOperatorType.HASH_AGGREGATE_VALUE);
-
- assertTrue( ! ops.isEmpty() );
- // check for the first op only
- ProfileParser.OperatorProfile hag0 = ops.get(0);
- long opCycle =
hag0.getMetric(HashAggTemplate.Metric.SPILL_CYCLE.ordinal());
- assertEquals(spillCycle, opCycle);
- long op_spilled_partitions =
hag0.getMetric(HashAggTemplate.Metric.SPILLED_PARTITIONS.ordinal());
- assertTrue( op_spilled_partitions >= fromSpilledPartitions &&
op_spilled_partitions <= toSpilledPartitions );
- /* assertEquals(3, ops.size());
- for ( int i = 0; i < ops.size(); i++ ) {
- ProfileParser.OperatorProfile hag = ops.get(i);
- long cycle =
hag.getMetric(HashAggTemplate.Metric.SPILL_CYCLE.ordinal());
- long num_partitions =
hag.getMetric(HashAggTemplate.Metric.NUM_PARTITIONS.ordinal());
- long spilled_partitions =
hag.getMetric(HashAggTemplate.Metric.SPILLED_PARTITIONS.ordinal());
- long mb_spilled =
hag.getMetric(HashAggTemplate.Metric.SPILL_MB.ordinal());
- System.out.println(String.format("(%d) Spill cycle: %d, num
partitions: %d, spilled partitions: %d, MB spilled: %d", i,cycle,
num_partitions, spilled_partitions,
- mb_spilled));
- } */
- }
+ @Rule
+ public final DirTestWatcher dirTestWatcher = new DirTestWatcher();
- /**
- * A template for Hash Aggr spilling tests
- *
- * @throws Exception
- */
- private void testSpill(long maxMem, long numPartitions, long
minBatches, int maxParallel, boolean fallback ,boolean predict,
- String sql, long expectedRows, int cycle, int
fromPart, int toPart) throws Exception {
- LogFixture.LogFixtureBuilder logBuilder = LogFixture.builder()
- .toConsole()
- //.logger("org.apache.drill.exec.physical.impl.aggregate",
Level.INFO)
- .logger("org.apache.drill", Level.WARN)
- ;
-
- FixtureBuilder builder = ClusterFixture.builder()
- .sessionOption(ExecConstants.HASHAGG_MAX_MEMORY_KEY,maxMem)
-
.sessionOption(ExecConstants.HASHAGG_NUM_PARTITIONS_KEY,numPartitions)
-
.sessionOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_KEY,minBatches)
-
.configProperty(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE, false)
- .sessionOption(PlannerSettings.FORCE_2PHASE_AGGR_KEY,true)
- // .sessionOption(PlannerSettings.EXCHANGE.getOptionName(), true)
- .sessionOption(ExecConstants.HASHAGG_FALLBACK_ENABLED_KEY,
fallback)
-
.sessionOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_KEY,predict)
-
- .maxParallelization(maxParallel)
- .saveProfiles()
- //.keepLocalFiles()
- ;
- String sqlStr = sql != null ? sql : // if null then use this
default query
- "SELECT empid_s17, dept_i, branch_i, AVG(salary_i) FROM
`mock`.`employee_1200K` GROUP BY empid_s17, dept_i, branch_i";
-
- try (LogFixture logs = logBuilder.build();
- ClusterFixture cluster = builder.build();
- ClientFixture client = cluster.clientFixture()) {
- runAndDump(client, sqlStr, expectedRows, cycle,
fromPart,toPart);
- }
- }
- /**
- * Test "normal" spilling: Only 2 (or 3) partitions (out of 4) would
require spilling
- * ("normal spill" means spill-cycle = 1 )
- *
- * @throws Exception
- */
- @Test
- public void testSimpleHashAggrSpill() throws Exception {
- testSpill(68_000_000, 16, 2, 2, false, true, null,
- 1_200_000, 1,2, 3
- );
- }
- /**
- * Test with "needed memory" prediction turned off
- * (i.e., do exercise code paths that catch OOMs from the Hash Table
and recover)
- *
- * @throws Exception
- */
- @Test
- public void testNoPredictHashAggrSpill() throws Exception {
- testSpill(58_000_000, 16, 2, 2, false,false /* no prediction */,
- null,1_200_000, 1,1, 1
- );
+ private void runAndDump(ClientFixture client, String sql, long
expectedRows, long spillCycle, long fromSpilledPartitions, long
toSpilledPartitions) throws Exception {
--- End diff --
Boaz inherited this from one of my tests; but I use the "run and dump"
pattern only in ad-hoc tests when I need lots of detail for debugging. Would be
nice to turn off the "dump" part when running the tests as part of the suite.
Also, when running in a suite, Boaz is not there to verify results. This
test needs a way to ensure that the results are correct.
---