Repository: hive Updated Branches: refs/heads/master 01fd68443 -> 9350b6934
http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/ql/src/test/results/clientpositive/windowing_order_null.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/windowing_order_null.q.out b/ql/src/test/results/clientpositive/windowing_order_null.q.out new file mode 100644 index 0000000..ca18b42 --- /dev/null +++ b/ql/src/test/results/clientpositive/windowing_order_null.q.out @@ -0,0 +1,183 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over4_null' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over4_null' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +NULL alice ichabod NULL NULL +NULL NULL NULL NULL +65534 calvin miller NULL NULL +65534 NULL NULL NULL +65536 alice ichabod 4294967441 4294967441 +65536 alice robinson 4294967476 8589934917 +65536 bob robinson 4294967349 12884902266 +65536 calvin thompson 4294967336 17179869602 +65536 david johnson 4294967490 21474837092 +65536 david laertes 4294967431 25769804523 +PREHOOK: query: select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +NULL alice ichabod NULL NULL +NULL calvin miller NULL NULL +0.01 NULL NULL NULL +0.01 NULL NULL NULL +0.01 calvin miller 8.39 8.390000343322754 +0.02 NULL NULL NULL +0.02 holly polk 5.29 5.289999961853027 +0.02 wendy quirinius 25.5 30.789999961853027 +0.02 yuri laertes 37.59 68.38000011444092 +0.03 nick steinbeck 79.24 79.23999786376953 +PREHOOK: query: select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 NULL NULL 1276.850001335144 +2013-03-01 09:11:58.70307 gabriella xylophone 3.17 1276.850001335144 +2013-03-01 09:11:58.70307 calvin brown 10.89 1273.68000125885 +2013-03-01 09:11:58.70307 jessica laertes 14.54 1262.7900009155273 +2013-03-01 09:11:58.70307 yuri allen 14.78 1248.2500009536743 +2013-03-01 09:11:58.70307 tom johnson 17.85 1233.4700012207031 +2013-03-01 09:11:58.70307 bob ovid 20.61 1215.6200008392334 +2013-03-01 09:11:58.70307 fred nixon 28.69 1195.0100002288818 +2013-03-01 09:11:58.70307 oscar brown 29.22 1166.3199996948242 +2013-03-01 09:11:58.70307 calvin laertes 31.17 1137.1000003814697 +PREHOOK: query: select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +-3 alice allen 29.44 33.20166666666666 +-3 alice davidson 31.52 30.741428571428568 +-3 alice falkner 49.8 27.742499999999996 +-3 alice king 41.5 26.706666666666663 +-3 alice king 30.76 26.306999999999995 +-3 alice xylophone 16.19 24.458181818181814 +-3 bob ellison 15.98 25.029090909090908 +-3 bob falkner 6.75 24.216363636363635 +-3 bob ichabod 18.42 20.173636363636362 +-3 bob johnson 22.71 16.431818181818176 +PREHOOK: query: select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 calvin steinbeck 262874 +2013-03-01 09:11:58.70307 david falkner 328506 +2013-03-01 09:11:58.70307 fred nixon 394118 +2013-03-01 09:11:58.70307 fred zipper 459719 +2013-03-01 09:11:58.70307 gabriella van buren 525334 +2013-03-01 09:11:58.70307 gabriella xylophone 591058 +2013-03-01 09:11:58.70307 jessica laertes 656771 +2013-03-01 09:11:58.70307 jessica polk 722558 +2013-03-01 09:11:58.70307 katie king 788310 +2013-03-01 09:11:58.70307 katie white 853920 +PREHOOK: query: select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +NULL 65536 0.02 +NULL 65534 0.03 +NULL NULL 0.04 +alice allen 65758 23.59 +alice allen 65720 43.98 +PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +NULL 65536 0.002 +NULL 65534 0.002 +NULL NULL 0.001 +alice allen 65758 2.359 +alice allen 65720 2.199 +PREHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +NULL 65534 20.0 +NULL 65536 20.0 +NULL NULL 20.0 +alice allen 65545 20.0 +alice allen 65557 20.0 http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/ql/src/test/results/clientpositive/windowing_streaming.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/windowing_streaming.q.out b/ql/src/test/results/clientpositive/windowing_streaming.q.out index d3226a1..27dd96f 100644 --- a/ql/src/test/results/clientpositive/windowing_streaming.q.out +++ b/ql/src/test/results/clientpositive/windowing_streaming.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 + order by: _col1 ASC NULLS FIRST partition by: _col2 raw input shape: window functions: @@ -155,7 +155,7 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 + order by: _col1 ASC NULLS FIRST partition by: _col2 raw input shape: window functions: @@ -324,7 +324,7 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col5 + order by: _col5 ASC NULLS FIRST partition by: _col0 raw input shape: window functions: http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/if/serde.thrift ---------------------------------------------------------------------- diff --git a/serde/if/serde.thrift b/serde/if/serde.thrift index 40d5f47..0b3804d 100644 --- a/serde/if/serde.thrift +++ b/serde/if/serde.thrift @@ -30,6 +30,7 @@ const string SERIALIZATION_NULL_FORMAT = "serialization.null.format" const string SERIALIZATION_ESCAPE_CRLF = "serialization.escape.crlf" const string SERIALIZATION_LAST_COLUMN_TAKES_REST = "serialization.last.column.takes.rest" const string SERIALIZATION_SORT_ORDER = "serialization.sort.order" +const string SERIALIZATION_NULL_POSITION = "serialization.null.position"; const string SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object" const string SERIALIZATION_ENCODING = "serialization.encoding" http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/gen/thrift/gen-cpp/serde_constants.cpp ---------------------------------------------------------------------- diff --git a/serde/src/gen/thrift/gen-cpp/serde_constants.cpp b/serde/src/gen/thrift/gen-cpp/serde_constants.cpp index 243d3b8..75701e2 100644 --- a/serde/src/gen/thrift/gen-cpp/serde_constants.cpp +++ b/serde/src/gen/thrift/gen-cpp/serde_constants.cpp @@ -27,6 +27,8 @@ serdeConstants::serdeConstants() { SERIALIZATION_SORT_ORDER = "serialization.sort.order"; + SERIALIZATION_NULL_POSITION = "serialization.null.position"; + SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"; SERIALIZATION_ENCODING = "serialization.encoding"; http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/gen/thrift/gen-cpp/serde_constants.h ---------------------------------------------------------------------- diff --git a/serde/src/gen/thrift/gen-cpp/serde_constants.h b/serde/src/gen/thrift/gen-cpp/serde_constants.h index 3566ead..6d85928 100644 --- a/serde/src/gen/thrift/gen-cpp/serde_constants.h +++ b/serde/src/gen/thrift/gen-cpp/serde_constants.h @@ -23,6 +23,7 @@ class serdeConstants { std::string SERIALIZATION_ESCAPE_CRLF; std::string SERIALIZATION_LAST_COLUMN_TAKES_REST; std::string SERIALIZATION_SORT_ORDER; + std::string SERIALIZATION_NULL_POSITION; std::string SERIALIZATION_USE_JSON_OBJECTS; std::string SERIALIZATION_ENCODING; std::string FIELD_DELIM; http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java ---------------------------------------------------------------------- diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 8b3eeb7..04ed8f5 100644 --- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -52,6 +52,8 @@ public class serdeConstants { public static final String SERIALIZATION_SORT_ORDER = "serialization.sort.order"; + public static final String SERIALIZATION_NULL_SORT_ORDER = "serialization.sort.order.null"; + public static final String SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"; public static final String SERIALIZATION_ENCODING = "serialization.encoding"; http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php ---------------------------------------------------------------------- diff --git a/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php b/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php index 8370698..0bc6dd7 100644 --- a/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php +++ b/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php @@ -26,6 +26,7 @@ final class Constant extends \Thrift\Type\TConstant { static protected $SERIALIZATION_ESCAPE_CRLF; static protected $SERIALIZATION_LAST_COLUMN_TAKES_REST; static protected $SERIALIZATION_SORT_ORDER; + static protected $SERIALIZATION_NULL_POSITION; static protected $SERIALIZATION_USE_JSON_OBJECTS; static protected $SERIALIZATION_ENCODING; static protected $FIELD_DELIM; @@ -97,6 +98,10 @@ final class Constant extends \Thrift\Type\TConstant { return "serialization.sort.order"; } + static protected function init_SERIALIZATION_NULL_POSITION() { + return "serialization.null.position"; + } + static protected function init_SERIALIZATION_USE_JSON_OBJECTS() { return "serialization.use.json.object"; } http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py ---------------------------------------------------------------------- diff --git a/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py b/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py index 6ef3bcf..7939791 100644 --- a/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py +++ b/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py @@ -17,6 +17,7 @@ SERIALIZATION_NULL_FORMAT = "serialization.null.format" SERIALIZATION_ESCAPE_CRLF = "serialization.escape.crlf" SERIALIZATION_LAST_COLUMN_TAKES_REST = "serialization.last.column.takes.rest" SERIALIZATION_SORT_ORDER = "serialization.sort.order" +SERIALIZATION_NULL_POSITION = "serialization.null.position" SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object" SERIALIZATION_ENCODING = "serialization.encoding" FIELD_DELIM = "field.delim" http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/gen/thrift/gen-rb/serde_constants.rb ---------------------------------------------------------------------- diff --git a/serde/src/gen/thrift/gen-rb/serde_constants.rb b/serde/src/gen/thrift/gen-rb/serde_constants.rb index f98441b..d09e3c2 100644 --- a/serde/src/gen/thrift/gen-rb/serde_constants.rb +++ b/serde/src/gen/thrift/gen-rb/serde_constants.rb @@ -23,6 +23,8 @@ SERIALIZATION_LAST_COLUMN_TAKES_REST = %q"serialization.last.column.takes.rest" SERIALIZATION_SORT_ORDER = %q"serialization.sort.order" +SERIALIZATION_NULL_POSITION = %q"serialization.null.position" + SERIALIZATION_USE_JSON_OBJECTS = %q"serialization.use.json.object" SERIALIZATION_ENCODING = %q"serialization.encoding" http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index 144ea5a..5e119d7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -28,8 +28,6 @@ import java.util.List; import java.util.Map; import java.util.Properties; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -92,16 +90,18 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * BinarySortableSerDe can be used to write data in a way that the data can be * compared byte-by-byte with the same order. * - * The data format: NULL: a single byte \0 NON-NULL Primitives: ALWAYS prepend a - * single byte \1, and then: Boolean: FALSE = \1, TRUE = \2 Byte: flip the - * sign-bit to make sure negative comes before positive Short: flip the sign-bit - * to make sure negative comes before positive Int: flip the sign-bit to make - * sure negative comes before positive Long: flip the sign-bit to make sure + * The data format: NULL: a single byte (\0 or \1, check below) NON-NULL Primitives: + * ALWAYS prepend a single byte (\0 or \1), and then: Boolean: FALSE = \1, TRUE = \2 + * Byte: flip the sign-bit to make sure negative comes before positive Short: flip the + * sign-bit to make sure negative comes before positive Int: flip the sign-bit to + * make sure negative comes before positive Long: flip the sign-bit to make sure * negative comes before positive Double: flip the sign-bit for positive double, * and all bits for negative double values String: NULL-terminated UTF-8 string, * with NULL escaped to \1 \1, and \1 escaped to \1 \2 NON-NULL Complex Types: @@ -115,14 +115,23 @@ import org.apache.hadoop.io.Writable; * field should be sorted ascendingly, and "-" means descendingly. The sub * fields in the same top-level field will have the same sort order. * + * This SerDe takes an additional parameter SERIALIZATION_NULL_SORT_ORDER which is a + * string containing only "a" and "z". The length of the string should equal to + * the number of fields in the top-level struct for serialization. "a" means that + * NULL should come first (thus, single byte is \0 for ascending order, \1 + * for descending order), while "z" means that NULL should come last (thus, single + * byte is \1 for ascending order, \0 for descending order). */ @SerDeSpec(schemaProps = { serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, - serdeConstants.SERIALIZATION_SORT_ORDER}) + serdeConstants.SERIALIZATION_SORT_ORDER, serdeConstants.SERIALIZATION_NULL_SORT_ORDER}) public class BinarySortableSerDe extends AbstractSerDe { public static final Logger LOG = LoggerFactory.getLogger(BinarySortableSerDe.class.getName()); + public static final byte ZERO = (byte) 0; + public static final byte ONE = (byte) 1; + List<String> columnNames; List<TypeInfo> columnTypes; @@ -130,6 +139,8 @@ public class BinarySortableSerDe extends AbstractSerDe { StructObjectInspector rowObjectInspector; boolean[] columnSortOrderIsDesc; + byte[] columnNullMarker; + byte[] columnNotNullMarker; public static Charset decimalCharSet = Charset.forName("US-ASCII"); @@ -170,6 +181,37 @@ public class BinarySortableSerDe extends AbstractSerDe { columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder .charAt(i) == '-'); } + + // Null first/last + String columnNullOrder = tbl + .getProperty(serdeConstants.SERIALIZATION_NULL_SORT_ORDER); + columnNullMarker = new byte[columnNames.size()]; + columnNotNullMarker = new byte[columnNames.size()]; + for (int i = 0; i < columnSortOrderIsDesc.length; i++) { + if (columnSortOrderIsDesc[i]) { + // Descending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'a') { + // Null first + columnNullMarker[i] = ONE; + columnNotNullMarker[i] = ZERO; + } else { + // Null last (default for descending order) + columnNullMarker[i] = ZERO; + columnNotNullMarker[i] = ONE; + } + } else { + // Ascending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'z') { + // Null last + columnNullMarker[i] = ONE; + columnNotNullMarker[i] = ZERO; + } else { + // Null first (default for ascending order) + columnNullMarker[i] = ZERO; + columnNotNullMarker[i] = ONE; + } + } + } } @Override @@ -193,7 +235,7 @@ public class BinarySortableSerDe extends AbstractSerDe { try { for (int i = 0; i < columnNames.size(); i++) { row.set(i, deserialize(inputByteBuffer, columnTypes.get(i), - columnSortOrderIsDesc[i], row.get(i))); + columnSortOrderIsDesc[i], columnNullMarker[i], columnNotNullMarker[i], row.get(i))); } } catch (IOException e) { throw new SerDeException(e); @@ -203,14 +245,14 @@ public class BinarySortableSerDe extends AbstractSerDe { } static Object deserialize(InputByteBuffer buffer, TypeInfo type, - boolean invert, Object reuse) throws IOException { + boolean invert, byte nullMarker, byte notNullMarker, Object reuse) throws IOException { // Is this field a null? byte isNull = buffer.read(invert); - if (isNull == 0) { + if (isNull == nullMarker) { return null; } - assert (isNull == 1); + assert (isNull == notNullMarker); switch (type.getCategory()) { case PRIMITIVE: { @@ -475,7 +517,7 @@ public class BinarySortableSerDe extends AbstractSerDe { if (size == r.size()) { r.add(null); } - r.set(size, deserialize(buffer, etype, invert, r.get(size))); + r.set(size, deserialize(buffer, etype, invert, nullMarker, notNullMarker, r.get(size))); size++; } // Remove additional elements if the list is reused @@ -506,8 +548,8 @@ public class BinarySortableSerDe extends AbstractSerDe { } // \1 followed by each key and then each value assert (more == 1); - Object k = deserialize(buffer, ktype, invert, null); - Object v = deserialize(buffer, vtype, invert, null); + Object k = deserialize(buffer, ktype, invert, nullMarker, notNullMarker, null); + Object v = deserialize(buffer, vtype, invert, nullMarker, notNullMarker, null); r.put(k, v); } return r; @@ -527,7 +569,7 @@ public class BinarySortableSerDe extends AbstractSerDe { // Read one field by one field for (int eid = 0; eid < size; eid++) { r - .set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r + .set(eid, deserialize(buffer, fieldTypes.get(eid), invert, nullMarker, notNullMarker, r .get(eid))); } return r; @@ -540,7 +582,7 @@ public class BinarySortableSerDe extends AbstractSerDe { byte tag = buffer.read(invert); r.setTag(tag); r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), - invert, null)); + invert, nullMarker, notNullMarker, null)); return r; } default: { @@ -626,7 +668,8 @@ public class BinarySortableSerDe extends AbstractSerDe { for (int i = 0; i < columnNames.size(); i++) { serialize(output, soi.getStructFieldData(obj, fields.get(i)), - fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i]); + fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i], + columnNullMarker[i], columnNotNullMarker[i]); } serializeBytesWritable.set(output.getData(), 0, output.getLength()); @@ -641,14 +684,14 @@ public class BinarySortableSerDe extends AbstractSerDe { } static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, - boolean invert) throws SerDeException { + boolean invert, byte nullMarker, byte notNullMarker) throws SerDeException { // Is this field a null? if (o == null) { - writeByte(buffer, (byte) 0, invert); + writeByte(buffer, nullMarker, invert); return; } // This field is not a null. - writeByte(buffer, (byte) 1, invert); + writeByte(buffer, notNullMarker, invert); switch (oi.getCategory()) { case PRIMITIVE: { @@ -786,7 +829,7 @@ public class BinarySortableSerDe extends AbstractSerDe { int size = loi.getListLength(o); for (int eid = 0; eid < size; eid++) { writeByte(buffer, (byte) 1, invert); - serialize(buffer, loi.getListElement(o, eid), eoi, invert); + serialize(buffer, loi.getListElement(o, eid), eoi, invert, nullMarker, notNullMarker); } // and \0 to terminate writeByte(buffer, (byte) 0, invert); @@ -801,8 +844,8 @@ public class BinarySortableSerDe extends AbstractSerDe { Map<?, ?> map = moi.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { writeByte(buffer, (byte) 1, invert); - serialize(buffer, entry.getKey(), koi, invert); - serialize(buffer, entry.getValue(), voi, invert); + serialize(buffer, entry.getKey(), koi, invert, nullMarker, notNullMarker); + serialize(buffer, entry.getValue(), voi, invert, nullMarker, notNullMarker); } // and \0 to terminate writeByte(buffer, (byte) 0, invert); @@ -814,7 +857,7 @@ public class BinarySortableSerDe extends AbstractSerDe { for (int i = 0; i < fields.size(); i++) { serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get( - i).getFieldObjectInspector(), invert); + i).getFieldObjectInspector(), invert, nullMarker, notNullMarker); } return; } @@ -823,7 +866,7 @@ public class BinarySortableSerDe extends AbstractSerDe { byte tag = uoi.getTag(o); writeByte(buffer, tag, invert); serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), - invert); + invert, nullMarker, notNullMarker); return; } default: { @@ -971,13 +1014,24 @@ public class BinarySortableSerDe extends AbstractSerDe { } public static void serializeStruct(Output byteStream, Object[] fieldData, - List<ObjectInspector> fieldOis, boolean[] sortableSortOrders) throws SerDeException { + List<ObjectInspector> fieldOis, boolean[] sortableSortOrders, + byte[] nullMarkers, byte[] notNullMarkers) throws SerDeException { for (int i = 0; i < fieldData.length; i++) { - serialize(byteStream, fieldData[i], fieldOis.get(i), sortableSortOrders[i]); + serialize(byteStream, fieldData[i], fieldOis.get(i), sortableSortOrders[i], + nullMarkers[i], notNullMarkers[i]); } } public boolean[] getSortOrders() { return columnSortOrderIsDesc; } + + public byte[] getNullMarkers() { + return columnNullMarker; + } + + public byte[] getNotNullMarkers() { + return columnNotNullMarker; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java index ec43ae3..73e20a8 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java @@ -28,7 +28,7 @@ public class BinarySortableSerDeWithEndPrefix extends BinarySortableSerDe { public static void serializeStruct(Output byteStream, Object[] fieldData, List<ObjectInspector> fieldOis, boolean endPrefix) throws SerDeException { for (int i = 0; i < fieldData.length; i++) { - serialize(byteStream, fieldData[i], fieldOis.get(i), false); + serialize(byteStream, fieldData[i], fieldOis.get(i), false, ZERO, ONE); } if (endPrefix) { if (fieldData[fieldData.length-1]!=null) { http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java index 7456725..62bcaa5 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java @@ -23,8 +23,6 @@ import java.sql.Date; import java.sql.Timestamp; import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -32,10 +30,12 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hive.common.util.DateUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /* * Directly serialize, field-by-field, the BinarySortable format. @@ -49,6 +49,9 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { // The sort order (ascending/descending) for each field. Set to true when descending (invert). private boolean[] columnSortOrderIsDesc; + // Null first/last + private byte[] columnNullMarker; + private byte[] columnNotNullMarker; // Which field we are on. We start with -1 to be consistent in style with // BinarySortableDeserializeRead. @@ -58,20 +61,28 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { private TimestampWritable tempTimestampWritable; - public BinarySortableSerializeWrite(boolean[] columnSortOrderIsDesc) { + public BinarySortableSerializeWrite(boolean[] columnSortOrderIsDesc, + byte[] columnNullMarker, byte[] columnNotNullMarker) { this(); fieldCount = columnSortOrderIsDesc.length; this.columnSortOrderIsDesc = columnSortOrderIsDesc; + this.columnNullMarker = columnNullMarker; + this.columnNotNullMarker = columnNotNullMarker; } /* * Use this constructor when only ascending sort order is used. + * By default for ascending order, NULL first. */ public BinarySortableSerializeWrite(int fieldCount) { this(); this.fieldCount = fieldCount; columnSortOrderIsDesc = new boolean[fieldCount]; Arrays.fill(columnSortOrderIsDesc, false); + columnNullMarker = new byte[fieldCount]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + columnNotNullMarker = new byte[fieldCount]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); } // Not public since we must have the field count or column sort order information. @@ -112,7 +123,8 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeNull() throws IOException { - BinarySortableSerDe.writeByte(output, (byte) 0, columnSortOrderIsDesc[++index]); + ++index; + BinarySortableSerDe.writeByte(output, columnNullMarker[index], columnSortOrderIsDesc[index]); } /* @@ -120,10 +132,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeBoolean(boolean v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.writeByte(output, (byte) (v ? 2 : 1), invert); } @@ -133,10 +147,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeByte(byte v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.writeByte(output, (byte) (v ^ 0x80), invert); } @@ -146,10 +162,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeShort(short v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeShort(output, v, invert); } @@ -159,10 +177,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeInt(int v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, v, invert); } @@ -172,10 +192,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeLong(long v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeLong(output, v, invert); } @@ -185,10 +207,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeFloat(float vf) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeFloat(output, vf, invert); } @@ -198,10 +222,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeDouble(double vd) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeDouble(output, vd, invert); } @@ -214,20 +240,24 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeString(byte[] v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); } @Override public void writeString(byte[] v, int start, int length) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, start, length, invert); } @@ -257,20 +287,24 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeBinary(byte[] v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); } @Override public void writeBinary(byte[] v, int start, int length) { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, start, length, invert); } @@ -280,10 +314,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeDate(Date date) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, DateWritable.dateToDays(date), invert); } @@ -291,10 +327,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { // We provide a faster way to write a date without a Date object. @Override public void writeDate(int dateAsDays) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, dateAsDays, invert); } @@ -304,10 +342,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeTimestamp(Timestamp vt) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); tempTimestampWritable.set(vt); BinarySortableSerDe.serializeTimestampWritable(output, tempTimestampWritable, invert); @@ -318,20 +358,24 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeHiveIntervalYearMonth(output, viyt, invert); } @Override public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, totalMonths, invert); } @@ -341,10 +385,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeHiveIntervalDayTime(output, vidt, invert); } @@ -354,10 +400,12 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { */ @Override public void writeHiveDecimal(HiveDecimal dec, int scale) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeHiveDecimal(output, dec, invert); } http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java index ae476ae..0be3213 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java @@ -23,8 +23,6 @@ import java.util.List; import java.util.Map; import java.util.Random; -import junit.framework.TestCase; - import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; @@ -33,20 +31,23 @@ import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTy import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.BytesWritable; +import junit.framework.TestCase; + public class TestBinarySortableFast extends TestCase { private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, - boolean[] columnSortOrderIsDesc, SerDe serde, StructObjectInspector rowOI, boolean ascending, + boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, + SerDe serde, StructObjectInspector rowOI, boolean ascending, Map<Object, PrimitiveTypeInfo[]> primitiveTypeInfoMap) throws Throwable { - BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc); + BinarySortableSerializeWrite binarySortableSerializeWrite = + new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize @@ -227,15 +228,24 @@ public class TestBinarySortableFast extends TestCase { String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); String order; order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+'); - SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + String nullOrder; + nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'a'); + SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-'); - SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'z'); + SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount]; Arrays.fill(columnSortOrderIsDesc, false); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_ascending, rowOI, true, primitiveTypeInfoMap); + byte[] columnNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + byte[] columnNotNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, + columnNotNullMarker, serde_ascending, rowOI, true, primitiveTypeInfoMap); Arrays.fill(columnSortOrderIsDesc, true); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_descending, rowOI, false, primitiveTypeInfoMap); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, + columnNotNullMarker, serde_descending, rowOI, false, primitiveTypeInfoMap); } catch (Throwable e) { e.printStackTrace(); throw e; http://git-wip-us.apache.org/repos/asf/hive/blob/9350b693/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java index af47e6f..935313b 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java @@ -17,18 +17,12 @@ */ package org.apache.hadoop.hive.serde2.binarysortable; -import java.sql.Date; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Properties; import java.util.Random; -import junit.framework.TestCase; - import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -40,6 +34,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BytesWritable; +import junit.framework.TestCase; + /** * TestBinarySortableSerDe. * @@ -66,12 +62,13 @@ public class TestBinarySortableSerDe extends TestCase { return sb.toString(); } - public static SerDe getSerDe(String fieldNames, String fieldTypes, String order) + public static SerDe getSerDe(String fieldNames, String fieldTypes, String order, String nullOrder) throws Throwable { Properties schema = new Properties(); schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); schema.setProperty(serdeConstants.SERIALIZATION_SORT_ORDER, order); + schema.setProperty(serdeConstants.SERIALIZATION_NULL_SORT_ORDER, nullOrder); BinarySortableSerDe serde = new BinarySortableSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), schema, null); @@ -172,11 +169,14 @@ public class TestBinarySortableSerDe extends TestCase { String order; order = StringUtils.leftPad("", MyTestClass.fieldCount, '+'); + String nullOrder; + nullOrder = StringUtils.leftPad("", MyTestClass.fieldCount, 'a'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - order), true); + order, nullOrder), true); order = StringUtils.leftPad("", MyTestClass.fieldCount, '-'); + nullOrder = StringUtils.leftPad("", MyTestClass.fieldCount, 'z'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - order), false); + order, nullOrder), false); System.out.println("Test testTBinarySortableProtocol passed!"); } catch (Throwable e) {
