Repository: trafodion Updated Branches: refs/heads/master 4ef8055f0 -> 6d38e5809
[TRAFODION-2912] Better handling of non-deterministic scalar UDFs Fix some issues found by Andy Yang and others while writing a non-deterministic scalar UDF (a random generator in this case). This UDF was transformed into a hash join, which executes the UDF only once and not once per row. Another problem is the probe cache, which can also lead to a single execution instead of once per row. The fix records the non-deterministic UDF attribute in the group attributes and it adds checks in the normalizer to suppress the conversion from a TSJ to a non-TSJ when non-deterministic UDFs are present. The probe cache logic already had this check, so all that was needed was to set the attribute. Note that there may be some more complex queries where we still won't execute the UDF once per row. In general, there is no absolute guarantee that a non-deterministic scalar UDF is executed once per row (of the cartesian product of all the tables joined??). However, in simple cases like the added test we should try to call the UDF for every row that satisfies the join predicates. Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/a7256545 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/a7256545 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/a7256545 Branch: refs/heads/master Commit: a725654560fabfafc2b81568720f43a24a1b3007 Parents: dc94344 Author: Hans Zeller <[email protected]> Authored: Mon Jan 29 19:20:50 2018 +0000 Committer: Hans Zeller <[email protected]> Committed: Mon Jan 29 19:20:50 2018 +0000 ---------------------------------------------------------------------- core/sql/optimizer/NormItemExpr.cpp | 3 +- core/sql/optimizer/NormRelExpr.cpp | 19 ++-- core/sql/regress/udr/EXPECTED103 | 125 ++++++++++++++++++++++++ core/sql/regress/udr/TEST103 | 22 +++++ core/sql/regress/udr/TEST103_functions.cpp | 34 ++++++- 5 files changed, 194 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/optimizer/NormItemExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NormItemExpr.cpp b/core/sql/optimizer/NormItemExpr.cpp index abd213a..fb4996a 100644 --- a/core/sql/optimizer/NormItemExpr.cpp +++ b/core/sql/optimizer/NormItemExpr.cpp @@ -949,7 +949,8 @@ void UDFunction::transformToRelExpr(NormWA & normWARef, // The routine desc points to both the NARoutine structs. isUdf->setRoutineDesc(udfDesc_); - + isUdf->getGroupAttr()->setHasNonDeterministicUDRs( + !getRoutineDesc()->getEffectiveNARoutine()->isDeterministic()); // Get the valueIds of the parameters isUdf->gatherParamValueIds(isUdf->getProcAllParamsTree(), http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/optimizer/NormRelExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NormRelExpr.cpp b/core/sql/optimizer/NormRelExpr.cpp index 781ecc9..e53f954 100644 --- a/core/sql/optimizer/NormRelExpr.cpp +++ b/core/sql/optimizer/NormRelExpr.cpp @@ -1464,7 +1464,8 @@ void Join::transformNode(NormWA & normWARef, // reference the left subtree data. // // For RoutineJoins/Udfs we also want to convert it to a join if the UDF - // does not need any inputs from the left. + // does not need any inputs from the left and if the routine is + // deterministic. // --------------------------------------------------------------------- if (isTSJ()) { @@ -1597,18 +1598,21 @@ void Join::transformNode(NormWA & normWARef, if (crossReferences2.isEmpty() && !isTSJForWrite() && !getInliningInfo().isDrivingPipelinedActions() && - !getInliningInfo().isDrivingTempInsert() )// Triggers - + !getInliningInfo().isDrivingTempInsert() && // Triggers - + !(isRoutineJoin() && + child(1).getGroupAttr()->getHasNonDeterministicUDRs())) { // Remember we used to be a RoutineJoin. This is used to determine // what type of contexts for partitioning we will try in OptPhysRel. - if (isRoutineJoin()) setDerivedFromRoutineJoin(); + if (isRoutineJoin()) + setDerivedFromRoutineJoin(); convertToNotTsj(); } else { // We have a TSJ that will be changed to Nested join - // safe to change NOtIn here to non equi-predicate form (NE) + // safe to change NotIn here to non equi-predicate form (NE) // at this point only the case on single column NotIn can reach here // and the either the outer or inner column or both is nullable // and may have null values @@ -2319,17 +2323,20 @@ RelExpr * Join::normalizeNode(NormWA & normWARef) // and if a value that is produced by the left subtree is not // referenced in the right subtree, // --------------------------------------------------------------------- - if (isATSJ AND NOT isTSJForWrite() AND //NOT isRoutineJoin() AND + if (isATSJ AND NOT isTSJForWrite() AND NOT child(1)->getGroupAttr()-> getCharacteristicInputs().referencesOneValueFromTheSet (child(0)->getGroupAttr()->getCharacteristicOutputs()) && !getInliningInfo().isDrivingPipelinedActions() && !getInliningInfo().isDrivingTempInsert() // Triggers - + && !(isRoutineJoin() && + child(1).getGroupAttr()->getHasNonDeterministicUDRs()) ) { // Remember we used to be a RoutineJoin. This is used to determine // what type of contexts for partitioning we will try in OptPhysRel. - if (isRoutineJoin()) setDerivedFromRoutineJoin(); + if (isRoutineJoin()) + setDerivedFromRoutineJoin(); convertToNotTsj(); // --------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/regress/udr/EXPECTED103 ---------------------------------------------------------------------- diff --git a/core/sql/regress/udr/EXPECTED103 b/core/sql/regress/udr/EXPECTED103 index 197e0d1..aab044c 100644 --- a/core/sql/regress/udr/EXPECTED103 +++ b/core/sql/regress/udr/EXPECTED103 @@ -209,6 +209,68 @@ CREATE LIBRARY TRAFODION.UDR103SCH.FUNCTIONSFORTEST103 FILE '/mnt2/ansharma/ansh --- SQL operation complete. >> +>>create function nonDeterministicRandom ++> () returns (r integer) ++> language c parameter style sql external name 'nonDeterministicRandom' ++> library functionsForTest103 ++> not deterministic no sql final call allow any parallelism state area size 1024 ; + +--- SQL operation complete. +>> +>>cqd nested_joins 'off'; + +--- SQL operation complete. +>>cqd merge_joins 'off'; + +--- SQL operation complete. +>>cqd join_order_by_user 'on'; + +--- SQL operation complete. +>>prepare s from ++>select a, nonDeterministicRandom() r1, ++> generateRandomNumber(a, 4) r2, generateRandomNumber(123, 4) r3 ++>from (values (1), (2), (3)) T(a); + +--- SQL command prepared. +>>explain options 'f' s; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +8 . 9 root 3.00E+000 +7 1 8 hybrid_hash_join 3.00E+000 +4 6 7 nested_join 3.00E+000 +5 . 6 probe_cache 1.00E+000 +. . 5 isolated_scalar_udf GENERATERANDOMNUMBER 1.00E+000 +2 3 4 nested_join 3.00E+000 +. . 3 isolated_scalar_udf NONDETERMINISTICRAND 1.00E+000 +. . 2 tuplelist 3.00E+000 +. . 1 isolated_scalar_udf GENERATERANDOMNUMBER 1.00E+000 + +--- SQL operation complete. +>>-- r1: Nested join, no probe cache, because it's non-deterministic +>>-- r2: Nested join, probe cache, because it refers to the outer table +>>-- r3: Hash join, because it's deterministic, no refs to outer +>>execute s; + +A R1 R2 R3 +------ ----------- -------------- -------------- + + 1 555 3675 3330 + 2 316 1985 3330 + 3 209 6580 3330 + +--- 3 row(s) selected. +>>cqd nested_joins reset; + +--- SQL operation complete. +>>cqd merge_joins reset; + +--- SQL operation complete. +>>cqd join_order_by_user reset; + +--- SQL operation complete. +>> >>-- procedures >>create procedure updateSubscriptions( +> IN operation char(20), @@ -398,6 +460,68 @@ CREATE LIBRARY TRAFODION.UDR103SCH.FUNCTIONSFORTEST103 FILE '/mnt2/ansharma/ansh --- SQL operation complete. >> +>>create function nonDeterministicRandom ++> () returns (r integer) ++> language c parameter style sql external name 'nonDeterministicRandom' ++> library functionsForTest103 ++> not deterministic no sql final call allow any parallelism state area size 1024 ; + +--- SQL operation complete. +>> +>>cqd nested_joins 'off'; + +--- SQL operation complete. +>>cqd merge_joins 'off'; + +--- SQL operation complete. +>>cqd join_order_by_user 'on'; + +--- SQL operation complete. +>>prepare s from ++>select a, nonDeterministicRandom() r1, ++> generateRandomNumber(a, 4) r2, generateRandomNumber(123, 4) r3 ++>from (values (1), (2), (3)) T(a); + +--- SQL command prepared. +>>explain options 'f' s; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +8 . 9 root 3.00E+000 +7 1 8 hybrid_hash_join 3.00E+000 +4 6 7 nested_join 3.00E+000 +5 . 6 probe_cache 1.00E+000 +. . 5 isolated_scalar_udf GENERATERANDOMNUMBER 1.00E+000 +2 3 4 nested_join 3.00E+000 +. . 3 isolated_scalar_udf NONDETERMINISTICRAND 1.00E+000 +. . 2 tuplelist 3.00E+000 +. . 1 isolated_scalar_udf GENERATERANDOMNUMBER 1.00E+000 + +--- SQL operation complete. +>>-- r1: Nested join, no probe cache, because it's non-deterministic +>>-- r2: Nested join, probe cache, because it refers to the outer table +>>-- r3: Hash join, because it's deterministic, no refs to outer +>>execute s; + +A R1 R2 R3 +------ ----------- -------------- -------------- + + 1 555 3675 3330 + 2 316 1985 3330 + 3 209 6580 3330 + +--- 3 row(s) selected. +>>cqd nested_joins reset; + +--- SQL operation complete. +>>cqd merge_joins reset; + +--- SQL operation complete. +>>cqd join_order_by_user reset; + +--- SQL operation complete. +>> >>-- procedures >>create procedure updateSubscriptions( +> IN operation char(20), @@ -427,6 +551,7 @@ Functions in Schema TRAFODION.UDR103SCH CANACCESSVIEW GENERATEPHONENUMBER GENERATERANDOMNUMBER +NONDETERMINISTICRANDOM --- SQL operation complete. >>get procedures in schema udr103sch; http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/regress/udr/TEST103 ---------------------------------------------------------------------- diff --git a/core/sql/regress/udr/TEST103 b/core/sql/regress/udr/TEST103 index 4feb957..4446ff2 100644 --- a/core/sql/regress/udr/TEST103 +++ b/core/sql/regress/udr/TEST103 @@ -225,6 +225,28 @@ create function generateRandomNumber library functionsForTest103 deterministic no sql final call allow any parallelism state area size 1024 ; +create function nonDeterministicRandom + () returns (r integer) + language c parameter style sql external name 'nonDeterministicRandom' + library functionsForTest103 + not deterministic no sql final call allow any parallelism state area size 1024 ; + +cqd nested_joins 'off'; +cqd merge_joins 'off'; +cqd join_order_by_user 'on'; +prepare s from +select a, nonDeterministicRandom() r1, + generateRandomNumber(a, 4) r2, generateRandomNumber(123, 4) r3 +from (values (1), (2), (3)) T(a); +explain options 'f' s; +-- r1: Nested join, no probe cache, because it's non-deterministic +-- r2: Nested join, probe cache, because it refers to the outer table +-- r3: Hash join, because it's deterministic, no refs to outer +execute s; +cqd nested_joins reset; +cqd merge_joins reset; +cqd join_order_by_user reset; + -- procedures create procedure updateSubscriptions( IN operation char(20), http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/regress/udr/TEST103_functions.cpp ---------------------------------------------------------------------- diff --git a/core/sql/regress/udr/TEST103_functions.cpp b/core/sql/regress/udr/TEST103_functions.cpp index 1ee0531..b997935 100644 --- a/core/sql/regress/udr/TEST103_functions.cpp +++ b/core/sql/regress/udr/TEST103_functions.cpp @@ -69,7 +69,7 @@ SQLUDR_LIBFUNC SQLUDR_INT32 genPhoneNumber(SQLUDR_INT32 *in1, // seed } } - strcpy(out, result.c_str()); + memcpy(out, result.c_str(), result.length()); return SQLUDR_SUCCESS; } @@ -117,11 +117,41 @@ SQLUDR_LIBFUNC SQLUDR_INT32 genRandomNumber(SQLUDR_INT32 *in1, } } - strcpy(out, result.c_str()); + memcpy(out, result.c_str(), result.length()); return SQLUDR_SUCCESS; } +SQLUDR_LIBFUNC SQLUDR_INT32 nonDeterministicRandom(SQLUDR_INT32 *out1, + SQLUDR_INT16 *outInd1, + SQLUDR_TRAIL_ARGS) +{ + if (calltype == SQLUDR_CALLTYPE_FINAL) + return SQLUDR_SUCCESS; + + // pointer to the buffer in the state area that is + // available for the lifetime of this statement, + // this can be used by the UDF to maintain state + int *my_state = (int *) statearea->stmt_data.data; + + if (calltype == SQLUDR_CALLTYPE_INITIAL && *my_state == 0) + { + *my_state = 555; + } + else + // Use a simple linear congruential generator, we + // want deterministic regression results, despite + // the name of this function. Note that a + // this UDF is still "not deterministic", since it + // returns different results when called with the + // same (empty) inputs. + *my_state = (13 * (*my_state) + 101) % 1000; + + (*out1) = *my_state; + + return SQLUDR_SUCCESS; +} + SQLUDR_LIBFUNC SQLUDR_INT32 canAccessView(SQLUDR_CHAR *inZoneNeeded, SQLUDR_CHAR *inZoneHas, SQLUDR_INT32 *inPackageNeeded,
