Yes of course:
I add the SQL and the output of EXPLAIN PLAN FOR:
-----------------------------------------
jdbc:drill:schema=dfs> explain plan for SELECT columns[4] stichtag,
columns[10] geschlecht, count(columns[0]) anzahl FROM
dfs.`/mon_ew_xt_uni_bus_11.csv.gz` where 1 = 1 and columns[23] = 1 and
columns[5] = 'AA' group by columns[4], columns[10];
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(stichtag=[$0], geschlecht=[$1], anzahl=[$2])
00-02 UnionExchange
01-01 HashAgg(group=[{0, 1}], anzahl=[$SUM0($2)])
01-02 Project(stichtag=[$0], geschlecht=[$1], anzahl=[$2])
01-03 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]])
02-01 UnorderedMuxExchange
03-01 Project(stichtag=[$0], geschlecht=[$1],
anzahl=[$2], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1,
hash64AsDouble($0)))])
03-02 HashAgg(group=[{0, 1}], anzahl=[COUNT($2)])
03-03 Project(stichtag=[$0], geschlecht=[$1],
$f2=[$2])
03-04 SelectionVectorRemover
03-05 Filter(condition=[AND(true, =($3, 1),
=($4, 'AA'))])
03-06 Project(ITEM=[ITEM($0, 4)],
ITEM1=[ITEM($0, 10)], ITEM2=[ITEM($0, 0)], ITEM3=[ITEM($0, 23)],
ITEM4=[ITEM($0, 5)])
03-07 Scan(groupscan=[EasyGroupScan
[selectionRoot=hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz,
numFiles=1, columns=[`columns`[4], `columns`[10], `columns`[0],
`columns`[23], `columns`[5]],
files=[hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ {
"name" : "planner.slice_target",
"kind" : "LONG",
"type" : "SESSION",
"num_val" : 10000
} ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 196615,
"userName" : "drill",
"files" : [ "hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "hdfs://master:8020",
"workspaces" : {
"root" : {
"location" : "/srv/hadoop/dfs/data",
"writable" : true,
"defaultInputFormat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : null
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ";"
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
},
"avro" : {
"type" : "avro"
}
}
},
"format" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ";"
},
"columns" : [ "`columns`[4]", "`columns`[10]", "`columns`[0]",
"`columns`[23]", "`columns`[5]" ],
"selectionRoot" : "hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz",
"cost" : 1.4826017E7
}, {
"pop" : "project",
"@id" : 196614,
"exprs" : [ {
"ref" : "`ITEM`",
"expr" : "`columns`[4]"
}, {
"ref" : "`ITEM1`",
"expr" : "`columns`[10]"
}, {
"ref" : "`ITEM2`",
"expr" : "`columns`[0]"
}, {
"ref" : "`ITEM3`",
"expr" : "`columns`[23]"
}, {
"ref" : "`ITEM4`",
"expr" : "`columns`[5]"
} ],
"child" : 196615,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1.4826017E7
}, {
"pop" : "filter",
"@id" : 196613,
"child" : 196614,
"expr" : "booleanAnd(true, equal(`ITEM3`, 1) , equal(`ITEM4`, 'AA')
) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 333585.3825
}, {
"pop" : "selection-vector-remover",
"@id" : 196612,
"child" : 196613,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 333585.3825
}, {
"pop" : "project",
"@id" : 196611,
"exprs" : [ {
"ref" : "`stichtag`",
"expr" : "`ITEM`"
}, {
"ref" : "`geschlecht`",
"expr" : "`ITEM1`"
}, {
"ref" : "`$f2`",
"expr" : "`ITEM2`"
} ],
"child" : 196612,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 333585.3825
}, {
"pop" : "hash-aggregate",
"@id" : 196610,
"child" : 196611,
"cardinality" : 1.0,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"groupByExprs" : [ {
"ref" : "`stichtag`",
"expr" : "`stichtag`"
}, {
"ref" : "`geschlecht`",
"expr" : "`geschlecht`"
} ],
"aggrExprs" : [ {
"ref" : "`anzahl`",
"expr" : "count(`$f2`) "
} ],
"cost" : 250189.036875
}, {
"pop" : "project",
"@id" : 196609,
"exprs" : [ {
"ref" : "`stichtag`",
"expr" : "`stichtag`"
}, {
"ref" : "`geschlecht`",
"expr" : "`geschlecht`"
}, {
"ref" : "`anzahl`",
"expr" : "`anzahl`"
}, {
"ref" : "`E_X_P_R_H_A_S_H_F_I_E_L_D`",
"expr" : "castint(hash64asdouble(`geschlecht`,
hash64asdouble(`stichtag`) ) ) "
} ],
"child" : 196610,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 33358.53825
}, {
"pop" : "unordered-mux-exchange",
"@id" : 131073,
"child" : 196609,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 33358.53825
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65539,
"child" : 131073,
"expr" : "`E_X_P_R_H_A_S_H_F_I_E_L_D`",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 33358.53825
}, {
"pop" : "project",
"@id" : 65538,
"exprs" : [ {
"ref" : "`stichtag`",
"expr" : "`stichtag`"
}, {
"ref" : "`geschlecht`",
"expr" : "`geschlecht`"
}, {
"ref" : "`anzahl`",
"expr" : "`anzahl`"
} ],
"child" : 65539,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 33358.53825
}, {
"pop" : "hash-aggregate",
"@id" : 65537,
"child" : 65538,
"cardinality" : 1.0,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"groupByExprs" : [ {
"ref" : "`stichtag`",
"expr" : "`stichtag`"
}, {
"ref" : "`geschlecht`",
"expr" : "`geschlecht`"
} ],
"aggrExprs" : [ {
"ref" : "`anzahl`",
"expr" : "$sum0(`anzahl`) "
} ],
"cost" : 25018.9036875
}, {
"pop" : "union-exchange",
"@id" : 2,
"child" : 65537,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 3335.8538249999997
}, {
"pop" : "project",
"@id" : 1,
"exprs" : [ {
"ref" : "`stichtag`",
"expr" : "`stichtag`"
}, {
"ref" : "`geschlecht`",
"expr" : "`geschlecht`"
}, {
"ref" : "`anzahl`",
"expr" : "`anzahl`"
} ],
"child" : 2,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 3335.8538249999997
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 3335.8538249999997
} ]
} |
+------+------+
1 row selected (0,764 seconds)
On 07/23/15 18:10, Abdel Hakim Deneche wrote:
> Hi Juergen,
>
> can you share the query you tried to run ?
>
> Thanks
>
> On Thu, Jul 23, 2015 at 9:10 AM, Juergen Kneissl <[email protected]> wrote:
>
>> Hi everybody,
>>
>> I installed and configured a small cluster with two machines (gnu/linux)
>> with the following setup:
>>
>> zookeeper in version 3.4.6 , drill in version 1.1.0 and also using
>> hadoop (version 2.7.1) hdfs as dist. filesystem.
>>
>> So, I am playing around a bit, but what I am still not understanding is
>> why my drill Foreman bit1 (or whoever that is in the situation) is not
>> "really" parallelizing my request. (or do I expect something from the
>> architecture that is not intended?)
>>
>>
>> I select and aggregate on a 1,4 GB gzipped csv file, and I thought at
>> least part of the query would be processed on the other drillbit.
>> (bit 2)
>>
>> For instance, in the profiles I see that Major Fragment 01 was divided
>> into four Minor Fragments (of which two were forwarded to bit 2)
>>
>> If I check on the drillbit.log file of the bit2 (in the above
>> configuration) a debug message tells me that the incoming record count
>> is 0?
>>
>> The question is: What am I doing wrong in my configuration? Has it
>> something todo with using a csv file?
>>
>> The query is also set in a way that it is clear the whole file has to be
>> read in memory. That does not concern me that much, now I just wanted to
>> check how the Foreman does the "Parallelization"
>>
>> Best Regards & Thanks for any hint
>>
>>
>> Juergen
>>
>
>
>