deniskuzZ commented on code in PR #5624: URL: https://github.com/apache/hive/pull/5624#discussion_r2167516168
########## ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java: ########## @@ -172,32 +170,69 @@ public BytesWritable evaluate(BytesWritable bw, IntWritable pos, IntWritable len } private BytesWritable evaluateInternal(BytesWritable bw, int pos, int len) { - if (len <= 0) { return new BytesWritable(); } - int[] index = makeIndex(pos, len, bw.getLength()); - if (index == null) { + byte[] b = Arrays.copyOf(bw.getBytes(), bw.getLength()); + StringSubstrColStartLen.populateSubstrOffsets(b, 0, b.length, craetePos(pos), len, index); + if (index[0] == -1) { return new BytesWritable(); } - return new BytesWritable(Arrays.copyOfRange(bw.getBytes(), index[0], index[1])); + return new BytesWritable(arrayCopy(b, index[0], index[1])); + } + + private BytesWritable evaluateInternal(BytesWritable bw, int pos) { + byte[] b = Arrays.copyOf(bw.getBytes(), bw.getLength()); + int offset = StringSubstrColStart.getSubstrStartOffset(b, 0, b.length, craetePos(pos)); + if (offset == -1) { + return new BytesWritable(); + } + + return new BytesWritable(arrayCopy(b, offset, bw.getLength() - offset)); } public BytesWritable evaluate(BytesWritable bw, IntWritable pos){ - return evaluate(bw, pos, maxValue); + if ((bw == null) || (pos == null)) { + return null; + } + return evaluateInternal(bw, pos.get()); } public BytesWritable evaluate(BytesWritable bw, LongWritable pos){ - return evaluate(bw, pos, maxLongValue); + if ((bw == null) || (pos == null)) { + return null; + } + + return evaluateInternal(bw, (int) pos.get()); } @Override public StatEstimator getStatEstimator() { return new SubStrStatEstimator(); } + private byte[] arrayCopy(byte[] src, int pos, int len) { Review Comment: `udf_subst.q` passed with that change. please see the diff ```` Subject: [PATCH] refactor --- Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java (revision 8d1d81e10fc4ba400e6abbf2fcfc72699ad32ebb) +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java (date 1750879762534) @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.udf; -import java.util.Arrays; import java.util.List; import java.util.Optional; @@ -36,6 +35,8 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import static java.util.Arrays.copyOfRange; + /** * UDFSubstr. * @@ -181,7 +182,7 @@ return new BytesWritable(); } - return new BytesWritable(arrayCopy(bw.getBytes(), index[0], index[1])); + return new BytesWritable(copyOfRange(bw.getBytes(), index[0], index[0] + index[1])); } private BytesWritable evaluateInternal(BytesWritable bw, int pos) { @@ -192,7 +193,7 @@ return new BytesWritable(); } - return new BytesWritable(arrayCopy(bw.getBytes(), offset, bw.getLength() - offset)); + return new BytesWritable(copyOfRange(bw.getBytes(), offset, bw.getLength())); } public BytesWritable evaluate(BytesWritable bw, IntWritable pos){ @@ -215,17 +216,6 @@ return new SubStrStatEstimator(); } - private byte[] arrayCopy(byte[] src, int pos, int len) { - byte[] b = new byte[len]; - - int copyIdx = 0; - for (int srcIdx = pos; copyIdx < len; srcIdx++) { - b[copyIdx] = src[srcIdx]; - copyIdx++; - } - return b; - } - private int adjustStartPos(int pos) { if (pos <= 0) { return pos; @@ -244,7 +234,6 @@ // 99 rows with 0 length // orig avg is 10 // new avg is 5 (if substr(5)) ; but in reality it will stay ~10 - Optional<Double> start = getRangeWidth(csList.get(1).getRange()); Range startRange = csList.get(1).getRange(); if (startRange != null && startRange.minValue != null) { double newAvgColLen = cs.getAvgColLen() - startRange.minValue.doubleValue(); @@ -255,7 +244,7 @@ if (csList.size() > 2) { Range lengthRange = csList.get(2).getRange(); if (lengthRange != null && lengthRange.maxValue != null) { - Double w = lengthRange.maxValue.doubleValue(); + double w = lengthRange.maxValue.doubleValue(); if (cs.getAvgColLen() > w) { cs.setAvgColLen(w); } @@ -263,15 +252,5 @@ } return Optional.of(cs); } - - private Optional<Double> getRangeWidth(Range range) { - if (range != null) { - if (range.minValue != null && range.maxValue != null) { - return Optional.of(range.maxValue.doubleValue() - range.minValue.doubleValue()); - } - } - return Optional.empty(); - } - } } ```` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org