[
https://issues.apache.org/jira/browse/HIVE-25193?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
qiang.bi updated HIVE-25193:
----------------------------
Attachment: HIVE-25193.1.patch
> Vectorized Query Execution: ClassCastException when use nvl() function which
> default_value is decimal type
> ----------------------------------------------------------------------------------------------------------
>
> Key: HIVE-25193
> URL: https://issues.apache.org/jira/browse/HIVE-25193
> Project: Hive
> Issue Type: Bug
> Components: Vectorization
> Affects Versions: 4.0.0
> Reporter: qiang.bi
> Assignee: qiang.bi
> Priority: Major
> Labels: pull-request-available
> Attachments: HIVE-25193.1.patch
>
> Time Spent: 10m
> Remaining Estimate: 0h
>
> Problem statement:
> {code:java}
> set hive.vectorized.execution.enabled = true;
> select nvl(get_json_object(attr_json,'$.correctedPrice'),0.88)
> corrected_price,
> from dw_mdm_sync_asset;
> {code}
> The error log:
> {code:java}
> Caused by: java.lang.ClassCastException:
> org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to
> org.apache.hadoop.hive.ql.exec.vector.BytesColumnVectorCaused by:
> java.lang.ClassCastException:
> org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to
> org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector at
> org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:504)
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce.evaluate(VectorCoalesce.java:124)
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.evaluateChildren(VectorExpression.java:271)
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDouble.evaluate(CastStringToDouble.java:83)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146)
> ... 28 more{code}
> The problem HiveQL:
> {code:java}
> nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) corrected_price
> {code}
> The problem expression:
> {code:java}
> CastStringToDouble(col 39:string)(children: VectorCoalesce(columns [37,
> 38])(children: VectorUDFAdaptor(get_json_object(_col14, '$.correctedPrice'))
> -> 37:string, ConstantVectorExpression(val 0.88) -> 38:decimal(2,2)) ->
> 39:string) -> 40:double
> {code}
> The problem code:
> {code:java}
> public class VectorCoalesce extends VectorExpression {
> ...
> @Override
> public void evaluate(VectorizedRowBatch batch) throws HiveException { if
> (childExpressions != null) {
> super.evaluateChildren(batch);
> } int[] sel = batch.selected;
> int n = batch.size;
> ColumnVector outputColVector = batch.cols[outputColumnNum];
> boolean[] outputIsNull = outputColVector.isNull;
> if (n <= 0) {
> // Nothing to do
> return;
> } if (unassignedBatchIndices == null || n >
> unassignedBatchIndices.length) { // (Re)allocate larger to be a multiple
> of 1024 (DEFAULT_SIZE).
> final int roundUpSize =
> ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) /
> VectorizedRowBatch.DEFAULT_SIZE)
> * VectorizedRowBatch.DEFAULT_SIZE;
> unassignedBatchIndices = new int[roundUpSize];
> } // We do not need to do a column reset since we are carefully
> changing the output.
> outputColVector.isRepeating = false; // CONSIDER: Should be do this
> for all vector expressions that can
> // work on BytesColumnVector output columns???
> outputColVector.init();
> final int columnCount = inputColumns.length; /*
> * Process the input columns to find a non-NULL value for each row.
> *
> * We track the unassigned batchIndex of the rows that have not received
> * a non-NULL value yet. Similar to a selected array.
> */
> boolean isAllUnassigned = true;
> int unassignedColumnCount = 0;
> for (int k = 0; k < inputColumns.length; k++) {
> ColumnVector cv = batch.cols[inputColumns[k]];
> if (cv.isRepeating) { if (cv.noNulls || !cv.isNull[0]) {
> /*
> * With a repeating value we can finish all remaining rows.
> */
> if (isAllUnassigned) { // No other columns provided
> non-NULL values. We can return repeated output.
> outputIsNull[0] = false;
> outputColVector.setElement(0, 0, cv);
> outputColVector.isRepeating = true;
> return;
> } else { // Some rows have already been assigned values.
> Assign the remaining.
> // We cannot use copySelected method here.
> for (int i = 0; i < unassignedColumnCount; i++) {
> final int batchIndex = unassignedBatchIndices[i];
> outputIsNull[batchIndex] = false; // Our input is
> repeating (i.e. inputColNumber = 0).
> outputColVector.setElement(batchIndex, 0, cv);
> }
> return;
> }
> } else { // Repeated NULLs -- skip this input column.
> }
> } else { /*
> * Non-repeating input column. Use any non-NULL values for unassigned
> rows.
> */
> if (isAllUnassigned) { /*
> * No other columns provided non-NULL values. We *may* be able to
> finish all rows
> * with this input column...
> */
> if (cv.noNulls){ // Since no NULLs, we can provide
> values for all rows.
> if (batch.selectedInUse) {
> for (int i = 0; i < n; i++) {
> final int batchIndex = sel[i];
> outputIsNull[batchIndex] = false;
> outputColVector.setElement(batchIndex, batchIndex, cv);
> }
> } else {
> Arrays.fill(outputIsNull, 0, n, false);
> for (int batchIndex = 0; batchIndex < n; batchIndex++) {
> outputColVector.setElement(batchIndex, batchIndex, cv);
> }
> }
> return;
> } else { // We might not be able to assign all rows
> because of input NULLs. Start tracking any
> // unassigned rows.
> boolean[] inputIsNull = cv.isNull;
> if (batch.selectedInUse) {
> for (int i = 0; i < n; i++) {
> final int batchIndex = sel[i];
> if (!inputIsNull[batchIndex]) {
> outputIsNull[batchIndex] = false;
> outputColVector.setElement(batchIndex, batchIndex, cv);
> } else {
> unassignedBatchIndices[unassignedColumnCount++] =
> batchIndex;
> }
> }
> } else {
> for (int batchIndex = 0; batchIndex < n; batchIndex++) {
> if (!inputIsNull[batchIndex]) {
> outputIsNull[batchIndex] = false;
> outputColVector.setElement(batchIndex, batchIndex, cv);
> } else {
> unassignedBatchIndices[unassignedColumnCount++] =
> batchIndex;
> }
> }
> }
> if (unassignedColumnCount == 0) {
> return;
> }
> isAllUnassigned = false;
> }
> } else { /*
> * We previously assigned *some* rows with non-NULL values. The
> batch indices of
> * the unassigned row were tracked.
> */
> if (cv.noNulls) { // Assign all remaining rows.
> for (int i = 0; i < unassignedColumnCount; i++) {
> final int batchIndex = unassignedBatchIndices[i];
> outputIsNull[batchIndex] = false;
> outputColVector.setElement(batchIndex, batchIndex, cv);
> }
> return;
> } else { // Use any non-NULL values found; remember the
> remaining unassigned.
> boolean[] inputIsNull = cv.isNull;
> int newUnassignedColumnCount = 0;
> for (int i = 0; i < unassignedColumnCount; i++) {
> final int batchIndex = unassignedBatchIndices[i];
> if (!inputIsNull[batchIndex]) {
> outputIsNull[batchIndex] = false;
> outputColVector.setElement(batchIndex, batchIndex, cv);
> } else {
> unassignedBatchIndices[newUnassignedColumnCount++] =
> batchIndex;
> }
> }
> if (newUnassignedColumnCount == 0) {
> return;
> }
> unassignedColumnCount = newUnassignedColumnCount;
> }
> }
> }
> } // NULL out the remaining columns.
> outputColVector.noNulls = false;
> if (isAllUnassigned) {
> outputIsNull[0] = true;
> outputColVector.isRepeating = true;
> } else {
> for (int i = 0; i < unassignedColumnCount; i++) {
> final int batchIndex = unassignedBatchIndices[i];
> outputIsNull[batchIndex] = true;
> }
> }
> }
> ...
> }
> {code}
> The above code, outputColVector is BytesColumnVector type, but one of the
> columnVector is DecimalColumnVector type.
> At present, we can add single quotes in “0.88” to resolve this problem.For
> example:
> {code:java}
> nvl(get_json_object(attr_json,'$.correctedPrice'), '0.88') corrected_price
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)