Repository: hive Updated Branches: refs/heads/master 354358ee2 -> 0e5911a84
HIVE-12479 : Vectorization: Vectorized Date UDFs with up-stream Joins (Gopal V, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0e5911a8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0e5911a8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0e5911a8 Branch: refs/heads/master Commit: 0e5911a848246dabc956ca95547e43e195785597 Parents: 354358e Author: Sergey Shelukhin <[email protected]> Authored: Wed Dec 2 15:10:11 2015 -0800 Committer: Sergey Shelukhin <[email protected]> Committed: Wed Dec 2 15:10:11 2015 -0800 ---------------------------------------------------------------------- .../expressions/VectorUDFDateAddColScalar.java | 5 +- .../expressions/VectorUDFDateAddScalarCol.java | 7 +- .../expressions/VectorUDFDateDiffColScalar.java | 15 ++-- .../expressions/VectorUDFDateDiffScalarCol.java | 17 ++--- .../VectorUDFTimestampFieldLong.java | 9 +-- .../VectorUDFTimestampFieldString.java | 5 +- .../TestVectorTimestampExpressions.java | 72 ++++++++++++++++++++ 7 files changed, 104 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index 9a9c928..f540994 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -66,6 +66,7 @@ public class VectorUDFDateAddColScalar extends VectorExpression { /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; + final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; if(batch.size == 0) { /* n != batch.size when isRepeating */ @@ -79,7 +80,7 @@ public class VectorUDFDateAddColScalar extends VectorExpression { case DATE: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateDate(inputCol, i); @@ -97,7 +98,7 @@ public class VectorUDFDateAddColScalar extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index e0497a1..2b473ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -72,6 +72,7 @@ public class VectorUDFDateAddScalarCol extends VectorExpression { /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; + final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; switch (inputTypes[0]) { @@ -91,7 +92,7 @@ public class VectorUDFDateAddScalarCol extends VectorExpression { break; } catch (Exception e) { outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.isNull[i] = true; @@ -117,7 +118,7 @@ public class VectorUDFDateAddScalarCol extends VectorExpression { if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; evaluate(baseDate, inputCol.vector[i], outV, i); @@ -131,7 +132,7 @@ public class VectorUDFDateAddScalarCol extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 3df53a4..0dfe8d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -71,6 +71,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; + final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; if(batch.size == 0) { /* n != batch.size when isRepeating */ @@ -99,7 +100,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { break; } catch (Exception e) { outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.isNull[i] = true; @@ -119,7 +120,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { case DATE: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateDate(inputCol, i); @@ -133,7 +134,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; @@ -155,7 +156,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { case TIMESTAMP: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateTimestamp(inputCol, i); @@ -169,7 +170,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; @@ -193,7 +194,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { case VARCHAR: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; evaluateString(inputCol, outV, i); @@ -207,7 +208,7 @@ public class VectorUDFDateDiffColScalar extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index b16a21c..3ea82aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -70,6 +70,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; + final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; if(batch.size == 0) { /* n != batch.size when isRepeating */ @@ -98,7 +99,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { break; } catch (Exception e) { outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.isNull[i] = true; @@ -118,7 +119,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { case DATE: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateDate(inputCol, i); @@ -132,7 +133,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; @@ -154,7 +155,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { case TIMESTAMP: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateTimestamp(inputCol, i); @@ -168,7 +169,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; @@ -192,7 +193,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { case VARCHAR: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; evaluateString(inputCol, outV, i); @@ -206,7 +207,7 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; @@ -302,4 +303,4 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression { VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java index 1cda0a9..3b9fffc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java @@ -98,6 +98,7 @@ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; + final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; if(batch.size == 0) { /* n != batch.size when isRepeating */ @@ -111,7 +112,7 @@ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { case TIMESTAMP: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = getTimestampField(inputCol.vector[i]); @@ -125,7 +126,7 @@ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; @@ -147,7 +148,7 @@ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { case DATE: if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = getDateField(inputCol.vector[i]); @@ -161,7 +162,7 @@ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index af96988..45e7a31 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -87,6 +87,7 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression { final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; + final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; if (batch.size == 0) { @@ -99,7 +100,7 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression { if (inputCol.noNulls) { outV.noNulls = true; - if (batch.selectedInUse) { + if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; try { @@ -126,7 +127,7 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; - if (batch.selectedInUse) { + if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; http://git-wip-us.apache.org/repos/asf/hive/blob/0e5911a8/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java index 419254b..4a4ce27 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java @@ -284,6 +284,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFYear(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFYear(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFYear(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -367,6 +376,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFDayOfMonth(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFDayOfMonth(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFDayOfMonth(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -443,6 +461,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFHour(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFHour(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFHour(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -519,6 +546,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFMinute(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFMinute(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFMinute(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -595,6 +631,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFMonth(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFMonth(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFMonth(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -671,6 +716,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFSecond(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFSecond(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFSecond(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -761,6 +815,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFUnixTimeStamp(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFUnixTimeStamp(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + + verifyUDFUnixTimeStamp(batch, testType); batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -834,6 +897,15 @@ public class TestVectorTimestampExpressions { batch.cols[0].isNull[0] = true; verifyUDFWeekOfYear(batch, testType); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch.cols[0].isRepeating = true; + batch.selectedInUse = true; + batch.selected = new int[] {42}; + verifyUDFWeekOfYear(batch, testType); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFWeekOfYear(batch, testType); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
