Hi Charles,
Below is an example for using the mock storage; I use this now for testing
my new code ( Hash Aggregation spilling ; so this specific test will not work
for you now …).
The query below - "SELECT empid_s17, dept_i, branch_i, AVG(salary_i) FROM
`mock`.`employee_1200K` GROUP BY empid_s17, dept_i, branch_i";
shows that you just make up the names for the table and the columns, followed
by the size (for the table) and the column type ( “i” for integer, “d” for
float, “s<size>” for a varchar + size).
Not sure if all the used imports are in 1.10 ; else you’d need the latest code.
Boaz
package org.apache.drill.exec.physical.impl.agg;
import ch.qos.logback.classic.Level;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.physical.impl.aggregate.HashAggTemplate;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.test.ClientFixture;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.FixtureBuilder;
import org.apache.drill.test.LogFixture;
import org.apache.drill.test.ProfileParser;
import org.apache.drill.test.QueryBuilder;
import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Test spilling for the Hash Aggr operator (using the mock reader)
*/
public class TestHashAggrSpill extends BaseTestQuery {
private void runAndDump(ClientFixture client, String sql, long
expectedRows, long spillCycle, long spilledPartitions) throws Exception {
String plan = client.queryBuilder().sql(sql).explainJson();
QueryBuilder.QuerySummary summary =
client.queryBuilder().sql(sql).run();
if ( expectedRows > 0 ) {
assertEquals(expectedRows, summary.recordCount());
}
System.out.println(String.format("======== \n Results: %,d records, %d
batches, %,d ms\n ========", summary.recordCount(), summary.batchCount(),
summary.runTimeMs() ) );
System.out.println("Query ID: " + summary.queryIdString());
ProfileParser profile = client.parseProfile(summary.queryIdString());
profile.print();
List<ProfileParser.OperatorProfile> ops =
profile.getOpsOfType(UserBitShared.CoreOperatorType.HASH_AGGREGATE_VALUE);
assertTrue( ! ops.isEmpty() );
// check for the first op only
ProfileParser.OperatorProfile hag = ops.get(0);
long opCycle =
hag.getMetric(HashAggTemplate.Metric.SPILL_CYCLE.ordinal());
assertEquals(spillCycle, opCycle);
long op_spilled_partitions =
hag.getMetric(HashAggTemplate.Metric.SPILLED_PARTITIONS.ordinal());
assertEquals(spilledPartitions, op_spilled_partitions);
}
/**
* Test "normal" spilling: Only 2 partitions (out of 4) would require
spilling
* ("normal spill" means spill-cycle = 1 )
*
* @throws Exception
*/
@Test
public void testHashAggrSpill() throws Exception {
LogFixture.LogFixtureBuilder logBuilder = LogFixture.builder()
.toConsole()
.logger("org.apache.drill.exec.physical.impl.aggregate", Level.WARN)
;
FixtureBuilder builder = ClusterFixture.builder()
.configProperty(ExecConstants.HASHAGG_MAX_MEMORY_KEY,"46000kB")
.configProperty(ExecConstants.HASHAGG_NUM_PARTITIONS_KEY,16)
// .sessionOption(PlannerSettings.EXCHANGE.getOptionName(), true)
.maxParallelization(2)
.saveProfiles()
//.keepLocalFiles()
;
try (LogFixture logs = logBuilder.build();
ClusterFixture cluster = builder.build();
ClientFixture client = cluster.clientFixture()) {
String sql = "SELECT empid_s17, dept_i, branch_i, AVG(salary_i)
FROM `mock`.`employee_1200K` GROUP BY empid_s17, dept_i, branch_i";
runAndDump(client, sql, 1_200_000, 1, 2);
}
}
}
On 3/31/17, 7:59 AM, "Charles Givre" <[email protected]> wrote:
Hello there,
Is there any documentation for the new mock storage engine? It looks
really useful.
Thanks,
- Charles