The code formatting is not nice. Put them again:
private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet
incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs,
TypedFieldId[] buildKeyFieldIds)
throws SchemaChangeException {
cg.setMappingSet(incomingMapping);
if (keyExprs == null || keyExprs.length == 0) {
cg.getEvalBlock()._return(JExpr.lit(0));
}
String seedValue = "seedValue";
String fieldId = "fieldId";
LogicalExpression seed = ValueExpressions.getParameterExpression(seedValue,
Types.required(TypeProtos.MinorType.INT));
LogicalExpression fieldIdParamExpr =
ValueExpressions.getParameterExpression(fieldId, Types.required(
TypeProtos.MinorType.INT) );
HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr);
int i = 0;
for (LogicalExpression expr : keyExprs) {
TypedFieldId targetTypeFieldId = buildKeyFieldIds[i];
ValueExpressions.IntExpression targetBuildFieldIdExp = new
ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0],
ExpressionPosition.UNKNOWN);
JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp,
ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue();
JBlock ifBlock =
cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then();
//specify a special JBlock which is a inner one of the eval block to
the ClassGenerator to substitute the returned JBlock of getEvalBlock()
cg.setCustomizedEvalInnerBlock(ifBlock);
LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr,
seed, incomingProbe != null);
LogicalExpression materializedExpr =
ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch,
context.getFunctionRegistry());
HoldingContainer hash = cg.addExpr(materializedExpr,
ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
ifBlock._return(hash.getValue());
//reset the customized block to null ,so the getEvalBlock() return the
truly eval JBlock
cg.setCustomizedEvalInnerBlock(null);
i++;
}
cg.getEvalBlock()._return(JExpr.lit(0));
}
public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, int
fieldId)
throws SchemaChangeException
{
{
IntHolder fieldId12 = new IntHolder();
fieldId12 .value = fieldId;
if (fieldId12 .value == constant14 .value) {
IntHolder out18 = new IntHolder();
{
out18 .value = vv15 .getAccessor().get((incomingRowIdx));
}
IntHolder seedValue19 = new IntHolder();
seedValue19 .value = seedValue;
//---- start of eval portion of hash32AsDouble function. ----//
IntHolder out20 = new IntHolder();
{
final IntHolder out = new IntHolder();
IntHolder in = out18;
IntHolder seed = seedValue19;
Hash32WithSeedAsDouble$IntHash_eval: {
out.value =
org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) in.value,
seed.value);
}
out20 = out;
}
//---- end of eval portion of hash32AsDouble function. ----//
return out20 .value;
}
return 0;
}
}
On Tue, May 29, 2018 at 1:47 PM weijie tong <[email protected]> wrote:
> HI Paul:
>
> Thanks for your enthusiasm. I have managed this skill as you ever
> mentioned me at another mail thread. It's really helpful ,thanks for your
> valuable work.
>
> Now I have solved this tough problem by adding a customized JBlock
> member field to the ClassGenerator. So once you want the getEvalBlock() of
> the ClassGenerator to return a inner customized JBlock , then you set this
> member, if you want the method to return eval self JBlock , you reset this
> member to null.
>
> Here is my changed setup method :
>
>
> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet
> incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs,
> TypedFieldId[] buildKeyFieldIds)
> throws SchemaChangeException {
> cg.setMappingSet(incomingMapping);
> if (keyExprs == null || keyExprs.length == 0) {
> cg.getEvalBlock()._return(JExpr.lit(0));
> }
> String seedValue = "seedValue";
> String fieldId = "fieldId";
> LogicalExpression seed = ValueExpressions.getParameterExpression(seedValue,
> Types.required(TypeProtos.MinorType.INT));
>
> LogicalExpression fieldIdParamExpr =
> ValueExpressions.getParameterExpression(fieldId,
> Types.required(TypeProtos.MinorType.INT) );
> HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr);
> int i = 0;
> for (LogicalExpression expr : keyExprs) {
> TypedFieldId targetTypeFieldId = buildKeyFieldIds[i];
> ValueExpressions.IntExpression targetBuildFieldIdExp = new
> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0],
> ExpressionPosition.UNKNOWN);
>
> JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp,
> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue();
> JBlock ifBlock =
> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then();
> //specify a special JBlock which is a inner one of the eval block to the
> ClassGenerator to substitute the returned JBlock of getEvalBlock()
> cg.setCustomizedEvalInnerBlock(ifBlock);
> LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr,
> seed, incomingProbe != null);
> LogicalExpression materializedExpr =
> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch,
> context.getFunctionRegistry());
> HoldingContainer hash = cg.addExpr(materializedExpr,
> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
> ifBlock._return(hash.getValue());
> //reset the customized block to null ,so the getEvalBlock() return the
> truly eval JBlock
> cg.setCustomizedEvalInnerBlock(null);
> i++;
> }
> cg.getEvalBlock()._return(JExpr.lit(0));
> }
>
>
> The corresponding generated codes :
>
> public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue,
> int fieldId)
> throws SchemaChangeException
> {
> {
> IntHolder fieldId12 = new IntHolder();
> fieldId12 .value = fieldId;
> if (fieldId12 .value == constant14 .value) {
> IntHolder out18 = new IntHolder();
> {
> out18 .value = vv15 .getAccessor().get((incomingRowIdx));
> }
> IntHolder seedValue19 = new IntHolder();
> seedValue19 .value = seedValue;
> //---- start of eval portion of hash32AsDouble function.
> ----//
> IntHolder out20 = new IntHolder();
> {
> final IntHolder out = new IntHolder();
> IntHolder in = out18;
> IntHolder seed = seedValue19;
>
> Hash32WithSeedAsDouble$IntHash_eval: {
> out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double)
> in.value, seed.value);
> }
>
> out20 = out;
> }
> //---- end of eval portion of hash32AsDouble function. ----//
> return out20 .value;
> }
> return 0;
> }
> }
>
>
>
> Some other explanation:
> 1st : The if checking won't hurt the performance , as I invoke this
> method column by column , so it's branch predication friendly.
> 2nd: I will use the murmur3_64 not the murmur3_32 ,since the efficient
> bloom filter algorithm needs the 64 bit hash code to avoid the conflict.
>
>
>
>
>
>
>
>
>
>
>
>
>
> On Tue, May 29, 2018 at 12:37 PM Paul Rogers <[email protected]>
> wrote:
>
>> Hi Weijie,
>>
>> Seeing the discussion about the details of JCodeModel suggests you may be
>> trying to debug your generated code at the level of the code generator.
>>
>> Some time ago we added the ability to step through the generated code.
>> Look for the following line in the generator code:
>>
>>
>> // Uncomment out this line to debug the generated code.
>>
>> // cg.saveCodeForDebugging(true);
>>
>>
>> Uncomment the code line and Drill will save each generated file to a
>> configured location (which, if I recall correctly, is /tmp/drill/codegen,
>> though it may have changed after Tim's test directory changes.)
>>
>> Then, set a breakpoint in the template setup() method and you can step
>> directly into the generated doSetup() method. Same for the eval() method.
>>
>> This way, you can not only see the generated code, you can step through
>> it. I've found this to be a far easier way to understand the generated code
>> than the older techniques folks have used (look at byte codes, use print
>> statements, brute force reasoning, etc.)
>>
>> Tim, Boaz and others have used this technique more recently and can
>> probably give you additional pointers.
>>
>> Thanks,
>> - Paul
>>
>>
>>
>> On Monday, May 28, 2018, 8:52:19 PM PDT, weijie tong <
>> [email protected]> wrote:
>>
>> @aman thanks for your reply. "For the ifBlock, do you need an _else()
>> block
>> also ?" I give a default return logic at the method, so I don't need the
>> _else() block. I have noticed the IfExpression's evaluation method at
>> EvaluationVisitor which also uses the JConditional . But that also doesn't
>> match my requirement. I think the key point here is the
>> FunctionHolderExpression and ValueVectorReadExpression will put their
>> corresponding generated codes to the eval method's JBlock , not our
>> specific IfBlock which is a inner block of the eval method's JBlock .
>>
>> So it seems I should make some changes to the ClassGenerator to let the
>> getEvalBlock return the IfBlock (maybe accurately the JConditional's then
>> block) or implement some special FunctionHolderExpression
>> 、ValueVectorReadExpression and corresponding visiting methods at the
>> EvaluationVisitor to generate the special code blocks. Hope someone who
>> are
>> familiar with these part of codes to point out whether there are more easy
>> or different choices to achieve the target.
>>
>> To make discussion more accurate, I put the generated codes of the
>> previous
>> setupGetBuild64Hash method here:
>>
>> public long getBuild64HashCodeInner(int incomingRowIdx, int
>> seedValue, int fieldId)
>> throws SchemaChangeException
>> {
>> {
>> IntHolder fieldId16 = new IntHolder();
>> fieldId16 .value = fieldId;
>> if (fieldId16 .value == constant18 .value) {
>> return out24 .value;
>> }
>> IntHolder out22 = new IntHolder();
>> {
>> out22 .value = vv19 .getAccessor().get((incomingRowIdx));
>> }
>> IntHolder seedValue23 = new IntHolder();
>> seedValue23 .value = seedValue;
>> //---- start of eval portion of hash32AsDouble function.
>> ----//
>> IntHolder out24 = new IntHolder();
>> {
>> final IntHolder out = new IntHolder();
>> IntHolder in = out22;
>> IntHolder seed = seedValue23;
>>
>> Hash32WithSeedAsDouble$IntHash_eval: {
>> out.value =
>> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double)
>> in.value, seed.value);
>> }
>>
>> out24 = out;
>> }
>> //---- end of eval portion of hash32AsDouble function. ----//
>> if (fieldId16 .value == constant18 .value) {
>> return out26 .value;
>> }
>> IntHolder seedValue25 = new IntHolder();
>> seedValue25 .value = seedValue;
>> //---- start of eval portion of hash32AsDouble function.
>> ----//
>> IntHolder out26 = new IntHolder();
>> {
>> final IntHolder out = new IntHolder();
>> IntHolder in = out22;
>> IntHolder seed = seedValue25;
>>
>> Hash32WithSeedAsDouble$IntHash_eval: {
>> out.value =
>> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double)
>> in.value, seed.value);
>> }
>>
>> out26 = out;
>> }
>> //---- end of eval portion of hash32AsDouble function. ----//
>> return 0;
>> }
>> }
>>
>>
>>
>>
>>
>> On Tue, May 29, 2018 at 10:51 AM Aman Sinha <[email protected]> wrote:
>>
>> > sorry, the previous email is incomplete.
>> > For the ifBlock, do you need an _else() block also ?
>> >
>> > I have sometimes found that 'JConditional' is a good way to break down
>> the
>> > logic further. Please see example usages of JConditional here [1].
>> >
>> > -Aman
>> >
>> > [1]
>> >
>> >
>> https://www.programcreek.com/java-api-examples/?api=com.sun.codemodel.JBlock
>> >
>> > On Mon, May 28, 2018 at 7:46 PM, Aman Sinha <[email protected]>
>> wrote:
>> >
>> > > Hi Weijie,
>> > > It would be a little cumbersome to debug such issues over email since
>> one
>> > > has to look at the generated code output and iteratively debug.
>> > > Couple of thoughts I have that might help:
>> > >
>> > > For this particular if-then block, should you also
>> > > JBlock ifBlock =
>> > > cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe
>> > > tBuildSideFieldId))._then();
>> > >
>> > >
>> > >
>> > > On Mon, May 28, 2018 at 4:17 AM, weijie tong <[email protected]
>> >
>> > > wrote:
>> > >
>> > >> HI All:
>> > >> Through implementing the JPPD feature (
>> > >> https://issues.apache.org/jira/browse/DRILL-6385) , I was blocked by
>> > the
>> > >> problem: how to get the hash code of each build side of the hash join
>> > >> columns through the dynamic generated java code. Hope someone can
>> give
>> > >> some
>> > >> advice.
>> > >>
>> > >> I supposed to add methods as below to the HashTableTemplate :
>> > >>
>> > >> public long getBuild64HashCode(int incomingRowIdx, int seedValue, int
>> > >> fieldId) throws SchemaChangeException{
>> > >> return getBuild64HashCodeInner(incomingRowIdx, seedValue,
>> fieldId);
>> > >> }
>> > >>
>> > >> protected abstract long
>> > >> getBuild64HashCodeInner(@Named("incomingRowIdx") int incomingRowIdx,
>> > >> @Named("seedValue") int seedValue, @Named("fieldId") int fieldId)
>> > >> throws SchemaChangeException;
>> > >>
>> > >>
>> > >> The high level code to invoke the getBuild64HashCode method is at
>> the
>> > >> HashJoinBatch's executeBuildPhase() :
>> > >>
>> > >> //create runtime filter
>> > >> if (cycleNum == 0 && enableRuntimeFilter) {
>> > >> //create runtime filter and send out async
>> > >> int condFieldIndex = 0;
>> > >> for (BloomFilter bloomFilter : bloomFilters) {
>> > >> //VV
>> > >> for (int ind = 0; ind < currentRecordCount; ind++) {
>> > >> long hashCode = partitions[0].getBuild64HashCode(ind,
>> > >> condFieldIndex);
>> > >> bloomFilter.insert(hashCode);
>> > >> }
>> > >> condFieldIndex++;
>> > >> }
>> > >> //TODO sered out async
>> > >> }
>> > >>
>> > >>
>> > >> As you know, the abstract method getBuild64HashCodeInner needs to
>> > >> calculate the hash codes of each build side column by the fieldId
>> input
>> > >> parameter. In order to achieve this target, I plan to have different
>> > >> solving parts corresponding to different column ValueVector , using
>> the
>> > if
>> > >> statement to distinguish different solving parts through the id of
>> the
>> > >> column. The corresponding method to generate the dynamic codes is
>> as
>> > >> below:
>> > >>
>> > >> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg,
>> > >> MappingSet incomingMapping, VectorAccessible batch,
>> > >> LogicalExpression[] keyExprs, TypedFieldId[] buildKeyFieldIds)
>> > >> throws SchemaChangeException {
>> > >> cg.setMappingSet(incomingMapping);
>> > >> if (keyExprs == null || keyExprs.length == 0) {
>> > >> cg.getEvalBlock()._return(JExpr.lit(0));
>> > >> }
>> > >> String seedValue = "seedValue";
>> > >> String fieldId = "fieldId";
>> > >> LogicalExpression seed =
>> > >> ValueExpressions.getParameterExpression(seedValue,
>> > >> Types.required(TypeProtos.MinorType.INT));
>> > >>
>> > >> LogicalExpression fieldIdParamExpr =
>> > >> ValueExpressions.getParameterExpression(fieldId,
>> > >> Types.required(TypeProtos.MinorType.INT) );
>> > >> HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr);
>> > >> int i = 0;
>> > >> for (LogicalExpression expr : keyExprs) {
>> > >> TypedFieldId targetTypeFieldId = buildKeyFieldIds[i];
>> > >> ValueExpressions.IntExpression targetBuildFieldIdExp = new
>> > >> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0],
>> > >> ExpressionPosition.UNKNOWN);
>> > >> JFieldRef targetBuildSideFieldId =
>> > >> cg.addExpr(targetBuildFieldIdExp,
>> > >> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue();
>> > >> JBlock ifBlock =
>> > >> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe
>> > >> tBuildSideFieldId))._then();
>> > >>
>> > >> LogicalExpression hashExpression =
>> > >> HashPrelUtil.getHashExpression(expr, seed, incomingProbe != null);
>> > >> LogicalExpression materializedExpr =
>> > >> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression,
>> > >> batch, context.getFunctionRegistry());
>> > >> HoldingContainer hash = cg.addExpr(materializedExpr,
>> > >> ClassGenerator.BlkCreateMode.FALSE);
>> > >>
>> > >>
>> > >> ifBlock._return(hash.getValue());
>> > >> i++;
>> > >> }
>> > >> cg.getEvalBlock()._return(JExpr.lit(0));
>> > >>
>> > >> }
>> > >>
>> > >> But unfortunately, the generated codes are not what I expected. The
>> > codes
>> > >> to read ValueVector , calculate hash code of the read value do not
>> stay
>> > in
>> > >> the if block. So how can I let the related codes stay in the if
>> block ?
>> > >>
>> > >
>> > >
>> >
>
>