[TRAFODION-2382] Support tinyint and boolean in tmudfs Adding support for these new data types in TMUDFs.
[TRAFODION-2392] Avoid costly sort in reducer TMUDFs Adding a new function type for TMUDFs that can avoid a costly sort for highly reducing UDFs that implement an internal hash table (or equivalent). Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/2faae457 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/2faae457 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/2faae457 Branch: refs/heads/master Commit: 2faae457be9928909a9e84aa5bf9979944324600 Parents: 1417baf Author: Hans Zeller <[email protected]> Authored: Tue Dec 20 22:39:58 2016 +0000 Committer: Hans Zeller <[email protected]> Committed: Tue Dec 20 22:39:58 2016 +0000 ---------------------------------------------------------------------- core/sql/generator/LmExpr.cpp | 5 +- core/sql/optimizer/OptPhysRelExpr.cpp | 13 +- core/sql/optimizer/UdfDllInteraction.cpp | 44 ++- core/sql/regress/compGeneral/EXPECTED071 | 4 +- core/sql/regress/compGeneral/TEST071 | 4 +- core/sql/regress/udr/EXPECTED001 | 98 ++++-- core/sql/regress/udr/TEST001 | 31 +- core/sql/sqludr/sqludr.cpp | 317 ++++++++++++++++++- core/sql/sqludr/sqludr.h | 20 +- .../java/org/trafodion/sql/udr/TupleInfo.java | 224 ++++++++++++- .../java/org/trafodion/sql/udr/TypeInfo.java | 89 +++++- .../main/java/org/trafodion/sql/udr/UDR.java | 36 ++- .../trafodion/sql/udr/UDRInvocationInfo.java | 13 +- 13 files changed, 796 insertions(+), 102 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/generator/LmExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/generator/LmExpr.cpp b/core/sql/generator/LmExpr.cpp index 6791c34..9267841 100644 --- a/core/sql/generator/LmExpr.cpp +++ b/core/sql/generator/LmExpr.cpp @@ -362,7 +362,7 @@ LmExprResult CreateLmOutputExpr(const NAType &formalType, // specified type t to be converted to/from C strings. The only // SQL types that do not need to be converted to C strings are: // -// INT, SMALLINT, LARGEINT, FLOAT, REAL, DOUBLE PRECISION +// INT, SMALLINT, LARGEINT, FLOAT, REAL, DOUBLE PRECISION, BOOLEAN // // because these types map to Java primitive types: // @@ -372,6 +372,7 @@ LmExprResult CreateLmOutputExpr(const NAType &formalType, // FLOAT -> float or double // REAL -> float // DOUBLE PREC -> double +// BOOLEAN -> byte // // For the object-oriented Java and C++ parameter styles, we represent // intervals as a signed numeric of 2, 4, or 8 bytes, in the other @@ -444,6 +445,8 @@ NABoolean LmTypeIsString(const NAType &t, // interval representation as a number result = FALSE; } + else if (t.getTypeQualifier() == NA_BOOLEAN_TYPE) + result = FALSE; return result; } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/optimizer/OptPhysRelExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/OptPhysRelExpr.cpp b/core/sql/optimizer/OptPhysRelExpr.cpp index 5d87610..392ee8d 100644 --- a/core/sql/optimizer/OptPhysRelExpr.cpp +++ b/core/sql/optimizer/OptPhysRelExpr.cpp @@ -17627,8 +17627,17 @@ Context* PhysicalTableMappingUDF::createContextForAChild(Context* myContext, // that the TMUDF sees all values of a particular partition // together. We do that by requesting an arrangement by th // PARTITION BY columns, if any are specified. This applies - // to parallel and serial plans. - rg.addArrangement(childInfo->getPartitionBy(),ESP_SOT); + // to parallel and serial plans. Suppress this if the function + // type is REDUCER_NC. In this case, the UDF has specifically + // asked not to sort. This typically means that the UDF maintains + // a hash table (or similar) of keys that allow it to receive its + // input data in any order, with values for different PARTITION BY + // keys being intermingled. This can be much more efficient when + // there are many input rows and a much smaller number of unique + // PARTITION BY keys. So, a REDUCER_NC is similar to a user-defined + // hash groupby. + if (invocationInfo_->getFuncType() != tmudr::UDRInvocationInfo::REDUCER_NC) + rg.addArrangement(childInfo->getPartitionBy(),ESP_SOT); // add ORDER BY as a required order if (NOT childInfo->getOrderBy().isEmpty()) http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/optimizer/UdfDllInteraction.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/UdfDllInteraction.cpp b/core/sql/optimizer/UdfDllInteraction.cpp index 14a2e10..c1bfdc6 100644 --- a/core/sql/optimizer/UdfDllInteraction.cpp +++ b/core/sql/optimizer/UdfDllInteraction.cpp @@ -37,6 +37,7 @@ #include "NumericType.h" #include "CharType.h" #include "DatetimeType.h" +#include "MiscType.h" #include "ItemLog.h" #include "ItemOther.h" #include "NARoutine.h" @@ -1213,27 +1214,17 @@ NABoolean TMUDFInternalSetup::setTypeInfoFromNAType( else if (isExact) switch (length) { - // SMALLINT, INT, LARGEINT, NUMERIC, signed and unsigned + // TINYINT, SMALLINT, INT, LARGEINT, NUMERIC, signed and unsigned case 1: - if (CmpCommon::getDefault(TRAF_TINYINT_SPJ_SUPPORT) == DF_ON) - { - *diags << DgSqlCode(-11151) - << DgString0("type") - << DgString1(src->getTypeSQLname()) - << DgString2("Tinyint datatype not yet supported"); - result = FALSE; - break; - } - if (isUnsigned) { if (!isDecimalPrecision) - sqlType = tmudr::TypeInfo::SMALLINT_UNSIGNED; + sqlType = tmudr::TypeInfo::TINYINT_UNSIGNED; } else { if (!isDecimalPrecision) - sqlType = tmudr::TypeInfo::SMALLINT; + sqlType = tmudr::TypeInfo::TINYINT; } break; @@ -1419,6 +1410,20 @@ NABoolean TMUDFInternalSetup::setTypeInfoFromNAType( } break; + case NA_BOOLEAN_TYPE: + { + sqlType = tmudr::TypeInfo::BOOLEAN; + if (length != 1) + { + *diags << DgSqlCode(-11151) + << DgString0("type") + << DgString1(src->getTypeSQLname()) + << DgString2("unsupported 4 byte boolean"); + result = FALSE; + } + } + break; + default: *diags << DgSqlCode(-11151) << DgString0("type") @@ -1843,6 +1848,14 @@ NAType *TMUDFInternalSetup::createNATypeFromTypeInfo( switch (typeCode) { + case tmudr::TypeInfo::TINYINT: + case tmudr::TypeInfo::TINYINT_UNSIGNED: + result = new(heap) + SQLTiny((typeCode == tmudr::TypeInfo::TINYINT), + src.getIsNullable(), + heap); + break; + case tmudr::TypeInfo::SMALLINT: case tmudr::TypeInfo::SMALLINT_UNSIGNED: result = new(heap) @@ -2074,6 +2087,11 @@ NAType *TMUDFInternalSetup::createNATypeFromTypeInfo( } break; + case tmudr::TypeInfo::BOOLEAN: + result = new(heap) SQLBooleanNative(src.getIsNullable(), + heap); + break; + default: *diags << DgSqlCode(-11152) << DgInt0(src.getSQLType()) http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/regress/compGeneral/EXPECTED071 ---------------------------------------------------------------------- diff --git a/core/sql/regress/compGeneral/EXPECTED071 b/core/sql/regress/compGeneral/EXPECTED071 index 2e8f767..e07238f 100644 --- a/core/sql/regress/compGeneral/EXPECTED071 +++ b/core/sql/regress/compGeneral/EXPECTED071 @@ -729,9 +729,9 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD +> mm_len +> from +> (select cast(description as varchar(3000 bytes) character set utf8) as description, -+> position(' min_max_hashj_cols: ' IN description) + 1 as mm_pos, ++> position(' min_max_cols: ' IN description) + 1 as mm_pos, +> position(' ' IN substring(description -+> from position(' min_max_hashj_cols: ' ++> from position(' min_max_cols: ' +> IN description) + 21)) + 19 mm_len +> from table(explain(null,'S')) +> ) X http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/regress/compGeneral/TEST071 ---------------------------------------------------------------------- diff --git a/core/sql/regress/compGeneral/TEST071 b/core/sql/regress/compGeneral/TEST071 index 50fdbec..09ef16a 100644 --- a/core/sql/regress/compGeneral/TEST071 +++ b/core/sql/regress/compGeneral/TEST071 @@ -410,9 +410,9 @@ from mm_len from (select cast(description as varchar(3000 bytes) character set utf8) as description, - position(' min_max_hashj_cols: ' IN description) + 1 as mm_pos, + position(' min_max_cols: ' IN description) + 1 as mm_pos, position(' ' IN substring(description - from position(' min_max_hashj_cols: ' + from position(' min_max_cols: ' IN description) + 21)) + 19 mm_len from table(explain(null,'S')) ) X http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/regress/udr/EXPECTED001 ---------------------------------------------------------------------- diff --git a/core/sql/regress/udr/EXPECTED001 b/core/sql/regress/udr/EXPECTED001 index 6283050..b978be3 100644 --- a/core/sql/regress/udr/EXPECTED001 +++ b/core/sql/regress/udr/EXPECTED001 @@ -70,6 +70,8 @@ +>c_largeint largeint, +>c_smallint smallint, +>c_smallint_unsigned smallint unsigned, ++>c_tinyint tinyint, ++>c_tinyint_unsigned tinyint unsigned, +>c_float float(10), +>c_real real, +>c_double_precision double precision, @@ -83,7 +85,8 @@ +>c_intervals86 interval second(8,6), +>c_intervald6s interval day(6) to second(6), +>c_blob blob (100), -+>c_clob clob (100) ++>c_clob clob (100), ++>c_boolean boolean +>); --- SQL operation complete. @@ -113,6 +116,8 @@ +>-1, +>-1, +>1, ++>-1, ++>1, +>-1.11, +>-1.11, +>-1.11, @@ -130,7 +135,8 @@ +>--stringtolob('CLOB_1') +>-- for now, use simple chars, see cqd TRAF_BLOB_AS_VARCHAR above +>'BLOB_1', -+>'CLOB_1' ++>'CLOB_1', ++>true +>); --- 1 row(s) inserted. @@ -291,6 +297,43 @@ CREATE TABLE_MAPPING FUNCTION TRAFODION.SCH.SESSIONIZE_JAVA --- 4 row(s) selected. >> +>>SELECT cast(CONVERTTIMESTAMP(ts) as TIME(6)), userid, session_id, ipAddr ++>FROM UDF(sessionize_dynamic(TABLE(SELECT userid, ++> JULIANTIMESTAMP(ts) as TS, ++> ipAddr ++> FROM clicks ++> WHERE userid='super-user' ++> PARTITION BY 1 ORDER BY 2), ++> 'USERID', ++> 'TS', ++> 60000000)); + +(EXPR) USERID SESSION_ID IPADDR +--------------- -------------------------------- -------------------- --------------- + +21:59:59.500000 super-user 1 12.345.567.345 +23:59:59.500000 super-user 2 12.345.567.345 + +--- 2 row(s) selected. +>>SELECT cast(CONVERTTIMESTAMP(ts) as TIME(6)), userid, session_id, ipAddr ++>FROM UDF(sessionize_dynamic(TABLE(SELECT userid, ++> JULIANTIMESTAMP(ts) as TS, ++> ipAddr ++> FROM clicks ++> WHERE userid='super-user' ++> PARTITION BY 1 ORDER BY 2), ++> 'USERID', ++> 'TS', ++> 60000000)); + +(EXPR) USERID SESSION_ID IPADDR +--------------- -------------------------------- -------------------- --------------- + +21:59:59.500000 super-user 1 12.345.567.345 +23:59:59.500000 super-user 2 12.345.567.345 + +--- 2 row(s) selected. +>> >>-- call sessionize_dynamic_shared, sharing the same DLL >>SELECT * +>FROM UDF(sessionize_dynamic_shared(TABLE(SELECT userid, @@ -477,10 +520,21 @@ SESSION_ID SEQUENCE_NO USERID TS +>from UDF(fibonacci(1,10)) XO +>order by 1; -*** ERROR[11151] Unable to use 'type' 'NUMERIC(1) SIGNED NO NULLS' in a user-defined routine. Details: Tinyint datatype not yet supported. +ORDINAL FIBONACCI_NUMBER +----------- -------------------- -*** ERROR[8822] The statement was not prepared. + 1 1 + 2 1 + 3 2 + 4 3 + 5 5 + 6 8 + 7 13 + 8 21 + 9 34 + 10 55 +--- 10 row(s) selected. >>cqd traf_tinyint_spj_support reset; --- SQL operation complete. @@ -598,18 +652,18 @@ ORDINAL FIBONACCI_NUMBER --- SQL command prepared. >>execute s; -SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB --------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------------------- ------------------------------------- +SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_TINYINT C_TINYINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB C_BOOLEAN +-------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- --------- ------------------ ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- -------------------------------- -------------------------------------------------------------------- --------- - 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 + 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 TRUE --- 1 row(s) selected. >>execute s; -SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB --------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------------------- ------------------------------------- +SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_TINYINT C_TINYINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB C_BOOLEAN +-------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- --------- ------------------ ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- -------------------------------- -------------------------------------------------------------------- --------- - 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 + 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 TRUE --- 1 row(s) selected. >> @@ -620,18 +674,18 @@ SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_ --- SQL command prepared. >>execute s; -SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB --------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------------------- ------------------------------------- +SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_TINYINT C_TINYINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB C_BOOLEAN +-------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- --------- ------------------ ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- -------------------------------- -------------------------------------------------------------------- --------- - 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 + 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 TRUE --- 1 row(s) selected. >>execute s; -SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB --------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------------------- ------------------------------------- +SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_TINYINT C_TINYINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB C_BOOLEAN +-------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- --------- ------------------ ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- -------------------------------- -------------------------------------------------------------------- --------- - 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 + 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 TRUE --- 1 row(s) selected. >> @@ -646,18 +700,18 @@ SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_ --- SQL command prepared. >>execute s; -SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB --------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------------------- ------------------------------------- +SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_TINYINT C_TINYINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB C_BOOLEAN +-------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- --------- ------------------ ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- -------------------------------- -------------------------------------------------------------------- --------- - 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 + 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 TRUE --- 1 row(s) selected. >>execute s; -SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB --------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------------------- ------------------------------------- +SESSION_ID SEQUENCE_NO C_CHAR C_CHAR_UPSHIFT C_CHAR_NOT_CASESPECIFIC C_CHAR_VARYING C_CHAR_VARYING_UPSHIFT C_CHAR_VARYING_NOT_CASESPECIFIC C_VARCHAR C_VARCHAR_UPSHIFT C_VARCHAR_NOT_CASESPECIFIC C_NCHAR C_NCHAR_UPSHIFT C_NCHAR_NOT_CASESPECIFIC C_NCHAR_VARYING C_NCHAR_VARYING_UPSHIFT C_NCHAR_VARYING_NOT_CASESPECIFIC C_NUMERIC C_NUMERIC_UNSIGNED C_DECIMAL C_DECIMAL_UNSIGNED C_INTEGER C_INTEGER_UNSIGNED C_LARGEINT C_SMALLINT C_SMALLINT_UNSIGNED C_TINYINT C_TINYINT_UNSIGNED C_FLOAT C_REAL C_DOUBLE_PRECISION C_DATE C_TIME C_TIME6 C_TIMESTAMP0 C_TIMESTAMP C_TIMESTAMP6 C_INTERVAL C_INTERVALS86 C_INTERVALD6S C_BLOB C_CLOB C_BOOLEAN +-------------------- -------------------- --------------- --------------- ----------------------- --------------- ---------------------- ------------------------------- --------------- ----------------- -------------------------- ------------------------------ ------------------------------ ------------------------------ ------------------------------ ------------------------------ -------------------------------- ------------ ------------------ ----------- ------------------ ----------- ------------------ -------------------- ---------- ------------------- --------- ------------------ ------------------------- --------------- ------------------------- ---------- -------- --------------- ------------------- -------------------------- -------------------------- ---------- ---------------- ----------------------- ---------------------------------------------------------------------------------------------------- -------------------------------- -------------------------------------------------------------------- --------- - 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 + 1 1 CHAR_1 CHAR_1 char_1 CHARVAR_1 CHARVAR_1 charvar_1 VARCHAR_1 VARCHAR_1 varchar_1 NCHAR_1 NCHAR_1 nchar_1 NCHARVAR_1 NCHARVAR_1 ncharvar_1 -1.00 1.00 -1.11 1.11 -1 1 -1 -1 1 -1 1 -1.11000000000000016E+000 -1.1100000E+000 -1.11000000000000016E+000 2001-01-01 01:01:01 01:01:01.111111 2001-01-01 01:01:01 2001-01-01 01:01:01.111111 2001-01-01 01:01:01.111111 1-01 88888888.666666 666666 23:59:59.999999 BLOB_1 CLOB_1 TRUE --- 1 row(s) selected. >> http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/regress/udr/TEST001 ---------------------------------------------------------------------- diff --git a/core/sql/regress/udr/TEST001 b/core/sql/regress/udr/TEST001 index f0ab9b7..101c5f4 100644 --- a/core/sql/regress/udr/TEST001 +++ b/core/sql/regress/udr/TEST001 @@ -104,6 +104,8 @@ c_integer_unsigned integer unsigned, c_largeint largeint, c_smallint smallint, c_smallint_unsigned smallint unsigned, +c_tinyint tinyint, +c_tinyint_unsigned tinyint unsigned, c_float float(10), c_real real, c_double_precision double precision, @@ -117,7 +119,8 @@ c_interval interval year to month, c_intervals86 interval second(8,6), c_intervald6s interval day(6) to second(6), c_blob blob (100), -c_clob clob (100) +c_clob clob (100), +c_boolean boolean ); insert into t001_Datatypes values ( @@ -145,6 +148,8 @@ insert into t001_Datatypes values ( -1, -1, 1, +-1, +1, -1.11, -1.11, -1.11, @@ -162,7 +167,8 @@ interval '666666 23:59:59.999999' day(6) to second(6), --stringtolob('CLOB_1') -- for now, use simple chars, see cqd TRAF_BLOB_AS_VARCHAR above 'BLOB_1', -'CLOB_1' +'CLOB_1', +true ); ?section register_functions @@ -243,6 +249,27 @@ FROM UDF(sessionize_dynamic(TABLE(SELECT userid, 'TS', 60000000)); +SELECT cast(CONVERTTIMESTAMP(ts) as TIME(6)), userid, session_id, ipAddr +FROM UDF(sessionize_dynamic(TABLE(SELECT userid, + JULIANTIMESTAMP(ts) as TS, + ipAddr + FROM clicks + WHERE userid='super-user' + PARTITION BY 1 ORDER BY 2), + 'USERID', + 'TS', + 60000000)); +SELECT cast(CONVERTTIMESTAMP(ts) as TIME(6)), userid, session_id, ipAddr +FROM UDF(sessionize_dynamic(TABLE(SELECT userid, + JULIANTIMESTAMP(ts) as TS, + ipAddr + FROM clicks + WHERE userid='super-user' + PARTITION BY 1 ORDER BY 2), + 'USERID', + 'TS', + 60000000)); + -- call sessionize_dynamic_shared, sharing the same DLL SELECT * FROM UDF(sessionize_dynamic_shared(TABLE(SELECT userid, http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/sqludr/sqludr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/sqludr/sqludr.cpp b/core/sql/sqludr/sqludr.cpp index 82825c8..91dedc9 100644 --- a/core/sql/sqludr/sqludr.cpp +++ b/core/sql/sqludr/sqludr.cpp @@ -610,6 +610,12 @@ TypeInfo::TypeInfo(SQLTypeCode sqlType, switch (sqlType) { + case TINYINT: + d_.length_ = 1; + d_.precision_ = 0; + d_.scale_ = 0; + break; + case SMALLINT: d_.length_ = 2; d_.precision_ = 0; @@ -649,6 +655,12 @@ TypeInfo::TypeInfo(SQLTypeCode sqlType, d_.length_ += 1; // for the decimal point break; + case TINYINT_UNSIGNED: + d_.length_ = 1; + d_.precision_ = 0; + d_.scale_ = 0; + break; + case SMALLINT_UNSIGNED: d_.length_ = 2; d_.precision_ = 0; @@ -833,6 +845,12 @@ TypeInfo::TypeInfo(SQLTypeCode sqlType, } break; + case BOOLEAN: + d_.length_ = 1; + d_.precision_ = 0; + d_.scale_ = 0; + break; + case UNDEFINED_SQL_TYPE: // this case is reached when we call the default constructor, // type and other fields still need to be defined @@ -893,11 +911,13 @@ TypeInfo::SQLTypeClassCode TypeInfo::getSQLTypeClass() const { switch (d_.sqlType_) { + case TINYINT: case SMALLINT: case INT: case LARGEINT: case NUMERIC: case DECIMAL_LSE: + case TINYINT_UNSIGNED: case SMALLINT_UNSIGNED: case INT_UNSIGNED: case NUMERIC_UNSIGNED: @@ -922,6 +942,9 @@ TypeInfo::SQLTypeClassCode TypeInfo::getSQLTypeClass() const case CLOB: return LOB_TYPE; + case BOOLEAN: + return BOOLEAN_TYPE; + default: break; } @@ -940,11 +963,13 @@ TypeInfo::SQLTypeSubClassCode TypeInfo::getSQLTypeSubClass() const { switch (d_.sqlType_) { + case TINYINT: case SMALLINT: case INT: case LARGEINT: case NUMERIC: case DECIMAL_LSE: + case TINYINT_UNSIGNED: case SMALLINT_UNSIGNED: case INT_UNSIGNED: case NUMERIC_UNSIGNED: @@ -994,6 +1019,9 @@ TypeInfo::SQLTypeSubClassCode TypeInfo::getSQLTypeSubClass() const case CLOB: return LOB_SUB_CLASS; + case BOOLEAN: + return BOOLEAN_SUB_CLASS; + default: break; } @@ -1133,6 +1161,8 @@ int TypeInfo::getMaxCharLength() const return d_.length_; case INTERVAL_TYPE: return 0; + case BOOLEAN_TYPE: + return 0; default: throw UDRException( 38900, @@ -1197,8 +1227,14 @@ long TypeInfo::getLong(const char *row, bool &wasNull) const // see also code in LmTypeIsString() in file ../generator/LmExpr.cpp if (d_.sqlType_ == NUMERIC || d_.sqlType_ == NUMERIC_UNSIGNED || - d_.sqlType_ == INTERVAL) + d_.sqlType_ == INTERVAL || + d_.sqlType_ == BOOLEAN) { + if (d_.length_ == 1) + if (d_.sqlType_ == NUMERIC_UNSIGNED) + tempSQLType = TINYINT_UNSIGNED; + else + tempSQLType = TINYINT; if (d_.length_ == 2) if (d_.sqlType_ == NUMERIC_UNSIGNED) tempSQLType = SMALLINT_UNSIGNED; @@ -1216,6 +1252,10 @@ long TypeInfo::getLong(const char *row, bool &wasNull) const switch (tempSQLType) { + case TINYINT: + result = *((char *) data); + break; + case SMALLINT: result = *((short *) data); break; @@ -1228,6 +1268,10 @@ long TypeInfo::getLong(const char *row, bool &wasNull) const result = *((long *) data); break; + case TINYINT_UNSIGNED: + result = *((unsigned char *) data); + break; + case SMALLINT_UNSIGNED: result = *((unsigned short *) data); break; @@ -1353,16 +1397,19 @@ double TypeInfo::getDouble(const char *row, bool &wasNull) const result = *((double *) data); break; + case TINYINT: case SMALLINT: case INT: case LARGEINT: case NUMERIC: case DECIMAL_LSE: + case TINYINT_UNSIGNED: case SMALLINT_UNSIGNED: case INT_UNSIGNED: case NUMERIC_UNSIGNED: case DECIMAL_UNSIGNED: case INTERVAL: + case BOOLEAN: { result = static_cast<double>(getLong(row, wasNull)); // for numbers with a scale, ensure that the decimal @@ -1505,6 +1552,62 @@ time_t TypeInfo::getTime(const char *row, bool &wasNull) const return result; } +bool TypeInfo::getBoolean(const char *row, bool &wasNull) const +{ + switch (getSQLTypeClass()) + { + case CHARACTER_TYPE: + { + int byteLen = 0; + const char *cval = getRaw(row, wasNull, byteLen); + + while (byteLen > 0 && cval[byteLen-1] == ' ') + byteLen--; + + // strings must have a value of "0" or "1" + if (byteLen == 1 && + (cval[0] == '0' || + cval[0] == '1')) + return (cval[0] == '1'); + else + { + std::string errval(cval, (byteLen > 10 ? 10 : byteLen)); + + throw UDRException( + 38900, + "getBoolean() encountered string value %s, booleans must be 0 or 1", + errval.c_str()); + } + } + break; + + case NUMERIC_TYPE: + case BOOLEAN_TYPE: + { + // numerics or booleans must have a value of 0 or 1 + long lval = getLong(row, wasNull); + if (lval <0 || lval > 1) + throw UDRException( + 38900, + "getBoolean() encountered value %ld, booleans must be 0 or 1", + lval); + return (lval != 0); + } + break; + + default: + { + std::string typeName; + + toString(typeName, false); + throw UDRException( + 38900, + "getBoolean() not supported for type %s", + typeName.c_str()); + } + } +} + const char * TypeInfo::getRaw(const char *row, bool &wasNull, int &byteLen) const @@ -1616,6 +1719,11 @@ void TypeInfo::setLong(long val, char *row) const d_.sqlType_ == NUMERIC_UNSIGNED || d_.sqlType_ == INTERVAL) { + if (d_.length_ == 1) + if (d_.sqlType_ == NUMERIC_UNSIGNED) + tempSQLType = TINYINT_UNSIGNED; + else + tempSQLType = TINYINT; if (d_.length_ == 2) if (d_.sqlType_ == NUMERIC_UNSIGNED) tempSQLType = SMALLINT_UNSIGNED; @@ -1633,6 +1741,10 @@ void TypeInfo::setLong(long val, char *row) const switch (tempSQLType) { + case TINYINT: + *((char *) data) = val; + break; + case SMALLINT: *((short *) data) = val; break; @@ -1645,6 +1757,14 @@ void TypeInfo::setLong(long val, char *row) const *((long *) data) = val; break; + case TINYINT_UNSIGNED: + if (val < 0) + throw UDRException( + 38900, + "Trying to assign a negative value to a TINYINT UNSIGNED type"); + *((unsigned char *) data) = val; + break; + case SMALLINT_UNSIGNED: if (val < 0) throw UDRException( @@ -1776,6 +1896,15 @@ void TypeInfo::setLong(long val, char *row) const setDouble(val, row); break; + case BOOLEAN: + *((char *) data) = val; + if (val != 0 && val != 1) + throw UDRException( + 38900, + "Got value %ld which cannot be converted to a BOOLEAN, only 0 or 1 are allowed", + val); + break; + default: throw UDRException(38900, "setLong(), setInt() or related is not supported for data type %d", @@ -2009,11 +2138,13 @@ void TypeInfo::setString(const char *val, int stringLen, char *row) const isApproxNumeric = true; // fall through to next case + case TINYINT: case SMALLINT: case INT: case LARGEINT: case NUMERIC: case DECIMAL_LSE: + case TINYINT_UNSIGNED: case SMALLINT_UNSIGNED: case INT_UNSIGNED: case NUMERIC_UNSIGNED: @@ -2308,6 +2439,39 @@ void TypeInfo::setString(const char *val, int stringLen, char *row) const } break; + case BOOLEAN: + { + long bval = -1; + // accept 0, 1, TRUE, true, FALSE, false as values + while (stringLen > 0 && val[stringLen-1] == ' ') + stringLen--; + if (stringLen == 1) + { + if (strcmp(val, "0") == 0) + bval = 0; + else if (strcmp(val, "1") == 0) + bval = 1; + } + if (stringLen == 4) + { + if (strcmp(val, "TRUE") == 0 || + strcmp(val, "true") == 0) + bval = 1; + } + else if (stringLen == 5) + if (strcmp(val, "FALSE") == 0 || + strcmp(val, "false") == 0) + bval = 0; + + if (bval >= 0) + setLong(bval, row); + else + throw UDRException(38900, + "Invalid value %.10s encountered in setString() for a boolean data type", + val); + } + break; + case UNDEFINED_SQL_TYPE: default: throw UDRException(38900, @@ -2316,6 +2480,33 @@ void TypeInfo::setString(const char *val, int stringLen, char *row) const } } +void TypeInfo::setBoolean(bool val, char *row) const +{ + switch (getSQLTypeClass()) + { + case CHARACTER_TYPE: + setString((val ? "1" : "0"), 1, row); + break; + + case NUMERIC_TYPE: + case BOOLEAN_TYPE: + setLong((val ? 1 : 0), row); + break; + + default: + { + std::string typeName; + + toString(typeName, false); + throw UDRException( + 38900, + "setBoolean() not supported for type %s", + typeName.c_str()); + } + + } +} + void TypeInfo::setNull(char *row) const { if (row == NULL || @@ -2354,7 +2545,9 @@ int TypeInfo::convertToBinaryPrecision(int decimalPrecision) const "Decimal precision %d is out of the allowed range of 1-18", decimalPrecision); - if (decimalPrecision < 5) + if (decimalPrecision < 3) + return 1; + else if (decimalPrecision < 5) return 2; else if (decimalPrecision < 10) return 4; @@ -2371,6 +2564,9 @@ void TypeInfo::toString(std::string &s, bool longForm) const case UNDEFINED_SQL_TYPE: s += "undefined_sql_type"; break; + case TINYINT: + s += "TINYINT"; + break; case SMALLINT: s += "SMALLINT"; break; @@ -2390,6 +2586,9 @@ void TypeInfo::toString(std::string &s, bool longForm) const getPrecision(), getScale()); s += buf; break; + case TINYINT_UNSIGNED: + s += "TINYINT UNSIGNED"; + break; case SMALLINT_UNSIGNED: s += "SMALLINT UNSIGNED"; break; @@ -2530,6 +2729,9 @@ void TypeInfo::toString(std::string &s, bool longForm) const case CLOB: s += "CLOB"; break; + case BOOLEAN: + s += "BOOLEAN"; + break; default: s += "invalid SQL type!"; break; @@ -4285,10 +4487,12 @@ std::string TupleInfo::getString(int colNum) const } + case TypeInfo::TINYINT: case TypeInfo::SMALLINT: case TypeInfo::INT: case TypeInfo::LARGEINT: case TypeInfo::NUMERIC: + case TypeInfo::TINYINT_UNSIGNED: case TypeInfo::SMALLINT_UNSIGNED: case TypeInfo::INT_UNSIGNED: case TypeInfo::NUMERIC_UNSIGNED: @@ -4419,6 +4623,13 @@ std::string TupleInfo::getString(int colNum) const return buf; } + case TypeInfo::BOOLEAN: + { + if (getBoolean(colNum)) + return "1"; + else + return "0"; + } default: throw UDRException( @@ -4451,6 +4662,54 @@ std::string TupleInfo::getString(const std::string &colName) const } /** + * Get a boolean value of a column or parameter + * + * This method is modeled after the JDBC interface. + * It can be used on boolean, numeric and character columns. + * Numeric columns need to have a value of 0 (false) or 1 (true), + * character columns need to have a value of "0" (false) or "1" (true). + * + * Use this method at runtime. It can also be used for + * actual parameters that are available at compile time. + * + * @param colNum Column number. + * @return Boolean value. + * If the value was a NULL value, false + * is returned. The wasNull() method can be used to + * determine whether a NULL value was returned. + * @throws UDRException + */ +bool TupleInfo::getBoolean(int colNum) const +{ + bool &nonConstWasNull = const_cast<TupleInfo *>(this)->wasNull_; + + nonConstWasNull = false; + + return getType(colNum).getBoolean(rowPtr_, nonConstWasNull); +} + +/** + * Get a boolean value of a column or parameter identified by name. + * + * This method is modeled after the JDBC interface. + * + * Use this method at runtime. It cannot be used for + * actual parameters that are available at compile time, use + * getString(int colNum) instead, since actual parameters are not named. + * + * @param colName Name of an existing column. + * @return bool value. + * If the value was a NULL value, false + * is returned. The wasNull() method can be used to + * determine whether a NULL value was returned. + * @throws UDRException + */ +bool TupleInfo::getBoolean(const std::string &colName) const +{ + return getBoolean(getColNum(colName)); +} + +/** * Check whether the last value returned from a getInt() etc. method was NULL. * * This method is modeled after the JDBC interface. @@ -4570,6 +4829,24 @@ void TupleInfo::setTime(int colNum, time_t val) const } /** + * Set a column to a value specified as bool + * + * This will set boolean, numeric and character columns. + * Numeric values will be 0 for false, 1 for true. + * String values will be "0" for false, "1" for true. + * + * Use this method at runtime. + * + * @param colNum Index/ordinal of the column to set. + * @param val The new boolean value for the column to set. + * @throws UDRException + */ +void TupleInfo::setBoolean(int colNum, bool val) const +{ + getType(colNum).setBoolean(val, rowPtr_); +} + +/** * Set the result row from a string with delimited field values. * * This method can be used to read delimited text files and @@ -6550,6 +6827,7 @@ void UDRInvocationInfo::copyPassThruData(int inputTableNum, case TypeInfo::EXACT_NUMERIC_TYPE: case TypeInfo::YEAR_MONTH_INTERVAL_TYPE: case TypeInfo::DAY_SECOND_INTERVAL_TYPE: + case TypeInfo::BOOLEAN_SUB_CLASS: { long l = in(it).getLong(ic); @@ -6638,7 +6916,8 @@ void UDRInvocationInfo::print() printf("Function type : %s\n", (funcType_ == GENERIC ? "GENERIC" : (funcType_ == MAPPER ? "MAPPER" : (funcType_ == REDUCER ? "REDUCER" : - "Invalid function type")))); + (funcType_ == REDUCER_NC ? "REDUCER_NC" : + "Invalid function type"))))); printf("User id : %s\n", getCurrentUser().c_str()); printf("Session user id : %s\n", getSessionUser().c_str()); printf("User role : %s\n", getCurrentRole().c_str()); @@ -7595,6 +7874,8 @@ void UDR::describeParamsAndColumns(UDRInvocationInfo &info) * pushed to table-valued inputs. These predicates may * eliminate entire groups of rows (partitions), and since * no state is carried between such groups that is valid. + * @li UDRInvocationInfo::REDUCER_NC: + * Same as REDUCER. * * NOTE: When eliminating columns from the table-valued inputs or * the table-valued result, column numbers may change in the @@ -7628,6 +7909,7 @@ void UDR::describeDataflowAndPredicates(UDRInvocationInfo &info) break; case UDRInvocationInfo::REDUCER: + case UDRInvocationInfo::REDUCER_NC: { int partitionedChild = -1; @@ -7673,7 +7955,7 @@ void UDR::describeDataflowAndPredicates(UDRInvocationInfo &info) } // column is from the partitioned input table } // is a comparison predicate } // found a partitioned child table - } // REDUCER + } // REDUCER(_NC) break; default: @@ -7724,8 +8006,9 @@ void UDR::describeConstraints(UDRInvocationInfo &info) * <ul> * <li>UDRs of type UDRInvocationInfo::MAPPER return one output row for * each row in their largest input table. - * <li>UDRs of type UDRInvocationInfo::REDUCER return one output row for - * every partition in their largest partitioned input table. + * <li>UDRs of type UDRInvocationInfo::REDUCER and REDUCER_NC return one + * output row for every partition in their largest partitioned input + * table. * <li>For output columns that are passthru columns, the estimated * unique entries are the same as for the underlying column in the * table-valued input. @@ -7758,20 +8041,21 @@ void UDR::describeStatistics(UDRInvocationInfo &info) * * The default behavior is to allow any degree of parallelism for * TMUDFs of function type UDRInvocationInfo::MAPPER or - * UDRInvocationInfo::REDUCER that have exactly one table-valued - * input. The default behavior forces serial execution - * in all other cases. The reason is that for a single table-valued - * input, there is a natural way to parallelize the function by - * parallelizing its input a la MapReduce. In all other cases, - * parallel execution requires active participation by the UDF, - * which is why the UDF needs to signal explicitly that it can - * handle such flavors of parallelism. + * UDRInvocationInfo::REDUCER (or REDUCER_NC) that have exactly + * one table-valued input. The default behavior forces serial + * execution in all other cases. The reason is that for a single + * table-valued input, there is a natural way to parallelize the + * function by parallelizing its input a la MapReduce. In all + * other cases, parallel execution requires active participation + * by the UDF, which is why the UDF needs to signal explicitly + * that it can handle such flavors of parallelism. * * Default implementation: * @code * if (info.getNumTableInputs() == 1 && * (info.getFuncType() == UDRInvocationInfo::MAPPER || - * info.getFuncType() == UDRInvocationInfo::REDUCER)) + * info.getFuncType() == UDRInvocationInfo::REDUCER || + * info.getFuncType() == UDRInvocationInfo::REDUCER_NC)) * plan.setDesiredDegreeOfParallelism(UDRPlanInfo::ANY_DEGREE_OF_PARALLELISM); * else * plan.setDesiredDegreeOfParallelism(1); // serial execution @@ -7789,7 +8073,8 @@ void UDR::describeDesiredDegreeOfParallelism(UDRInvocationInfo &info, { if (info.getNumTableInputs() == 1 && (info.getFuncType() == UDRInvocationInfo::MAPPER || - info.getFuncType() == UDRInvocationInfo::REDUCER)) + info.getFuncType() == UDRInvocationInfo::REDUCER || + info.getFuncType() == UDRInvocationInfo::REDUCER_NC)) plan.setDesiredDegreeOfParallelism(UDRPlanInfo::ANY_DEGREE_OF_PARALLELISM); else plan.setDesiredDegreeOfParallelism(1); // serial execution http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2faae457/core/sql/sqludr/sqludr.h ---------------------------------------------------------------------- diff --git a/core/sql/sqludr/sqludr.h b/core/sql/sqludr/sqludr.h index 2ab12f5..ddbb5ee 100644 --- a/core/sql/sqludr/sqludr.h +++ b/core/sql/sqludr/sqludr.h @@ -672,7 +672,10 @@ namespace tmudr TIMESTAMP, ///< timestamp INTERVAL, ///< interval BLOB, ///< Binary Large Object - CLOB ///< Character Large Object + CLOB, ///< Character Large Object + TINYINT, ///< 8 bit integer + TINYINT_UNSIGNED, ///< unsigned 8 bit integer + BOOLEAN ///< boolean, 1 byte 0 or 1 }; /** Classes of types defined in the SQL standard */ @@ -683,6 +686,7 @@ namespace tmudr DATETIME_TYPE, ///< date/time/timestamp INTERVAL_TYPE, ///< day/month or hour/second intervals LOB_TYPE, ///< BLOBs and CLOBs + BOOLEAN_TYPE, ///< Boolean UNDEFINED_TYPE_CLASS ///< undefined value }; @@ -701,6 +705,7 @@ namespace tmudr DAY_SECOND_INTERVAL_TYPE, ///< Intervals involving ///< days/hours/minutes/seconds LOB_SUB_CLASS, ///< LOBs + BOOLEAN_SUB_CLASS, ///< Boolean UNDEFINED_TYPE_SUB_CLASS ///< undefined value }; @@ -782,6 +787,7 @@ namespace tmudr long getLong(const char *row, bool &wasNull) const; double getDouble(const char *row, bool &wasNull) const; time_t getTime(const char *row, bool &wasNull) const; + bool getBoolean(const char *row, bool &wasNull) const; const char * getRaw(const char *row, bool &wasNull, int &byteLen) const; @@ -792,6 +798,7 @@ namespace tmudr void setDouble(double val, char *row) const; void setTime(time_t val, char *row) const; void setString(const char *val, int stringLen, char *row) const; + void setBoolean(bool val, char *row) const; void setNull(char *row) const; int minBytesPerChar() const; @@ -1271,6 +1278,8 @@ namespace tmudr double getDouble(const std::string &colName) const; std::string getString(int colNum) const; std::string getString(const std::string &colName) const; + bool getBoolean(int colNum) const; + bool getBoolean(const std::string &colName) const; const char * getRaw(int colNum, int &byteLen) const; time_t getTime(int colNum) const; bool isAvailable(int colNum) const; @@ -1290,6 +1299,7 @@ namespace tmudr void setString(int colNum, const char *val, int stringLen) const; void setString(int colNum, const std::string &val) const; void setTime(int colNum, time_t val) const; + void setBoolean(int colNum, bool val) const; const char * setFromDelimitedRow(const char *row, char delim='|', bool quote = false, @@ -1493,13 +1503,19 @@ namespace tmudr ///< rows, the Trafodion compiler can automatically ///< parallelize execution and push predicates down to ///< the table-valued inputs. - REDUCER ///< A reducer requires the data to be partitioned on + REDUCER, ///< A reducer requires the data to be partitioned on ///< a set of columns. The UDF does not carry any state ///< between groups of rows with the same partition column ///< values, but it may carry state within such groups. ///< This allows the compiler to parallelize execution and ///< to push predicates on the partitioning column(s) down ///< to table-valued inputs. + REDUCER_NC ///< Same as REDUCER, except that in this case the + ///< UDF does not require the rows belonging to a key + ///< to be grouped together, they can be non-contiguous + ///< (NC). This can avoid a costly sort of the input + ///< table in cases where a highly reducing UDF can keep + ///< a table of all the keys in memory. }; /**
