Re: Mock Data Source Question

Boaz Ben-Zvi Fri, 31 Mar 2017 12:21:07 -0700

 Hi Charles,

     Below is an example for using the mock storage; I use this now for testing 
my new code ( Hash Aggregation spilling ; so this specific test will not work 
for you now …).
The query below  -  "SELECT empid_s17, dept_i, branch_i, AVG(salary_i) FROM 
`mock`.`employee_1200K` GROUP BY empid_s17, dept_i, branch_i";
shows that you just make up the names for the table and the columns, followed 
by the size (for the table) and the column type ( “i” for integer,  “d” for 
float, “s<size>” for a varchar + size).
Not sure if all the used imports are in 1.10 ; else you’d need the latest code.


     Boaz


package org.apache.drill.exec.physical.impl.agg;

import ch.qos.logback.classic.Level;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.physical.impl.aggregate.HashAggTemplate;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.test.ClientFixture;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.FixtureBuilder;
import org.apache.drill.test.LogFixture;
import org.apache.drill.test.ProfileParser;
import org.apache.drill.test.QueryBuilder;
import org.junit.Ignore;
import org.junit.Test;

import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

/**
 *  Test spilling for the Hash Aggr operator (using the mock reader)
 */
public class TestHashAggrSpill extends BaseTestQuery {

    private void runAndDump(ClientFixture client, String sql, long 
expectedRows, long spillCycle, long spilledPartitions) throws Exception {
        String plan = client.queryBuilder().sql(sql).explainJson();

        QueryBuilder.QuerySummary summary = 
client.queryBuilder().sql(sql).run();
        if ( expectedRows > 0 ) {
            assertEquals(expectedRows, summary.recordCount());
        }
        System.out.println(String.format("======== \n Results: %,d records, %d 
batches, %,d ms\n ========", summary.recordCount(), summary.batchCount(), 
summary.runTimeMs() ) );

        System.out.println("Query ID: " + summary.queryIdString());
        ProfileParser profile = client.parseProfile(summary.queryIdString());
        profile.print();
        List<ProfileParser.OperatorProfile> ops = 
profile.getOpsOfType(UserBitShared.CoreOperatorType.HASH_AGGREGATE_VALUE);

        assertTrue( ! ops.isEmpty() );
        // check for the first op only
        ProfileParser.OperatorProfile hag = ops.get(0);
        long opCycle = 
hag.getMetric(HashAggTemplate.Metric.SPILL_CYCLE.ordinal());
        assertEquals(spillCycle, opCycle);
        long op_spilled_partitions = 
hag.getMetric(HashAggTemplate.Metric.SPILLED_PARTITIONS.ordinal());
        assertEquals(spilledPartitions, op_spilled_partitions);
    }

    /**
     * Test "normal" spilling: Only 2 partitions (out of 4) would require 
spilling
     * ("normal spill" means spill-cycle = 1 )
     *
     * @throws Exception
     */
    @Test
    public void testHashAggrSpill() throws Exception {
        LogFixture.LogFixtureBuilder logBuilder = LogFixture.builder()
            .toConsole()
            .logger("org.apache.drill.exec.physical.impl.aggregate", Level.WARN)
            ;

        FixtureBuilder builder = ClusterFixture.builder()
            .configProperty(ExecConstants.HASHAGG_MAX_MEMORY_KEY,"46000kB")
            .configProperty(ExecConstants.HASHAGG_NUM_PARTITIONS_KEY,16)
            // .sessionOption(PlannerSettings.EXCHANGE.getOptionName(), true)
            .maxParallelization(2)
            .saveProfiles()
            //.keepLocalFiles()
            ;
        try (LogFixture logs = logBuilder.build();
             ClusterFixture cluster = builder.build();
             ClientFixture client = cluster.clientFixture()) {
            String sql = "SELECT empid_s17, dept_i, branch_i, AVG(salary_i) 
FROM `mock`.`employee_1200K` GROUP BY empid_s17, dept_i, branch_i";
            runAndDump(client, sql, 1_200_000, 1, 2);
        }
    }
}




On 3/31/17, 7:59 AM, "Charles Givre" <[email protected]> wrote:

    Hello there,
    Is there any documentation for the new mock storage engine?  It looks
    really useful.
    Thanks,
    - Charles

Re: Mock Data Source Question

Reply via email to