[ https://issues.apache.org/jira/browse/DRILL-6717?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16682214#comment-16682214 ]
ASF GitHub Bot commented on DRILL-6717: --------------------------------------- ilooner closed pull request #1450: DRILL-6717: lower and upper functions not works with national characters URL: https://github.com/apache/drill/pull/1450 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java index 677446adcf7..cd18180689e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java @@ -148,24 +148,22 @@ public static int varTypesToInt(final int start, final int end, DrillBuf buffer) * Capitalizes first letter in each word. * Any symbol except digits and letters is considered as word delimiter. * - * @param start start position in input buffer - * @param end end position in input buffer - * @param inBuf buffer with input characters - * @param outBuf buffer with output characters + * @param source input characters */ - public static void initCap(int start, int end, DrillBuf inBuf, DrillBuf outBuf) { + public static String initCap(String source) { boolean capitalizeNext = true; - int out = 0; - for (int id = start; id < end; id++, out++) { - int currentByte = inBuf.getByte(id); - if (Character.isLetterOrDigit(currentByte)) { - currentByte = capitalizeNext ? Character.toUpperCase(currentByte) : Character.toLowerCase(currentByte); + StringBuilder str = new StringBuilder(source); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (Character.isLetterOrDigit(c)) { + str.setCharAt(i, capitalizeNext ? Character.toUpperCase(c) : Character.toLowerCase(c)); capitalizeNext = false; } else { capitalizeNext = true; } - outBuf.setByte(out, currentByte); } + + return str.toString(); } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java index c23a1a29846..6b1ba40022f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java @@ -492,14 +492,13 @@ public void setup() { @Override public void eval() { - out.buffer = buffer = buffer.reallocIfNeeded(input.end- input.start); - out.start = 0; - out.end = input.end - input.start; + String str = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + byte[] result = str.toLowerCase().getBytes(com.google.common.base.Charsets.UTF_8); - for (int id = input.start; id < input.end; id++) { - byte currentByte = input.buffer.getByte(id); - out.buffer.setByte(id - input.start, Character.toLowerCase(currentByte)); - } + out.buffer = buffer = buffer.reallocIfNeeded(result.length); + out.start = 0; + out.end = result.length; + out.buffer.setBytes(0, result); } } @@ -523,14 +522,13 @@ public void setup() { @Override public void eval() { - out.buffer = buffer = buffer.reallocIfNeeded(input.end- input.start); - out.start = 0; - out.end = input.end - input.start; + String str = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + byte[] result = str.toUpperCase().getBytes(com.google.common.base.Charsets.UTF_8); - for (int id = input.start; id < input.end; id++) { - byte currentByte = input.buffer.getByte(id); - out.buffer.setByte(id - input.start, Character.toUpperCase(currentByte)); - } + out.buffer = buffer = buffer.reallocIfNeeded(result.length); + out.start = 0; + out.end = result.length; + out.buffer.setBytes(0, result); } } @@ -786,10 +784,13 @@ public void setup() { @Override public void eval() { - out.buffer = buffer = buffer.reallocIfNeeded(input.end - input.start); + String source = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + String result = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.initCap(source); + byte[] bytes = result.getBytes(com.google.common.base.Charsets.UTF_8); + out.buffer = buffer = buffer.reallocIfNeeded(bytes.length); out.start = 0; - out.end = input.end - input.start; - org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.initCap(input.start, input.end, input.buffer, out.buffer); + out.end = bytes.length; + out.buffer.setBytes(0, bytes); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java index 36e7ead50ce..5da8bb05d0a 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java @@ -23,7 +23,6 @@ import org.apache.drill.test.BaseTestQuery; import org.apache.drill.categories.SqlFunctionTest; import org.apache.drill.exec.util.Text; -import org.junit.Ignore; import org.junit.Test; import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; @@ -1468,7 +1467,6 @@ public void testInitcap() throws Exception { .run(); } - @Ignore("DRILL-5477") @Test public void testMultiByteEncoding() throws Exception { testBuilder() ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > lower and upper functions not works with national charactes > ----------------------------------------------------------- > > Key: DRILL-6717 > URL: https://issues.apache.org/jira/browse/DRILL-6717 > Project: Apache Drill > Issue Type: Bug > Affects Versions: 1.14.0 > Reporter: Oleg Zinoviev > Assignee: Oleg Zinoviev > Priority: Major > Labels: ready-to-commit > Fix For: 1.15.0 > > Attachments: result.csv, sample.json > > > upper() and lower() functions works only with a ascii characters. > sample dataset: [^sample.json] > query: > {code:sql} > select > upper(ascii_l) as upper_ascii, > upper(national_l) as upper_national, > lower(ascii_u) as lower_ascii, > lower(national_u) as lower_national > from dfs.`sample.json` > {code} > drill response: [^result.csv] -- This message was sent by Atlassian JIRA (v7.6.3#76005)