[ 
https://issues.apache.org/jira/browse/DRILL-6717?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16682214#comment-16682214
 ] 

ASF GitHub Bot commented on DRILL-6717:
---------------------------------------

ilooner closed pull request #1450: DRILL-6717: lower and upper functions not 
works with national characters
URL: https://github.com/apache/drill/pull/1450
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
index 677446adcf7..cd18180689e 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
@@ -148,24 +148,22 @@ public static int varTypesToInt(final int start, final 
int end, DrillBuf buffer)
    * Capitalizes first letter in each word.
    * Any symbol except digits and letters is considered as word delimiter.
    *
-   * @param start start position in input buffer
-   * @param end end position in input buffer
-   * @param inBuf buffer with input characters
-   * @param outBuf buffer with output characters
+   * @param source input characters
    */
-  public static void initCap(int start, int end, DrillBuf inBuf, DrillBuf 
outBuf) {
+  public static String initCap(String source) {
     boolean capitalizeNext = true;
-    int out = 0;
-    for (int id = start; id < end; id++, out++) {
-      int currentByte = inBuf.getByte(id);
-      if (Character.isLetterOrDigit(currentByte)) {
-        currentByte = capitalizeNext ? Character.toUpperCase(currentByte) : 
Character.toLowerCase(currentByte);
+    StringBuilder str = new StringBuilder(source);
+    for (int i = 0; i < str.length(); i++) {
+      char c = str.charAt(i);
+      if (Character.isLetterOrDigit(c)) {
+        str.setCharAt(i, capitalizeNext ? Character.toUpperCase(c) : 
Character.toLowerCase(c));
         capitalizeNext = false;
       } else {
         capitalizeNext = true;
       }
-      outBuf.setByte(out, currentByte);
     }
+
+    return str.toString();
   }
 
   /**
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index c23a1a29846..6b1ba40022f 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -492,14 +492,13 @@ public void setup() {
 
     @Override
     public void eval() {
-      out.buffer = buffer = buffer.reallocIfNeeded(input.end- input.start);
-      out.start = 0;
-      out.end = input.end - input.start;
+      String str = 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
 input.end, input.buffer);
+      byte[] result = 
str.toLowerCase().getBytes(com.google.common.base.Charsets.UTF_8);
 
-      for (int id = input.start; id < input.end; id++) {
-        byte  currentByte = input.buffer.getByte(id);
-        out.buffer.setByte(id - input.start, 
Character.toLowerCase(currentByte));
-      }
+      out.buffer = buffer = buffer.reallocIfNeeded(result.length);
+      out.start = 0;
+      out.end = result.length;
+      out.buffer.setBytes(0, result);
     }
   }
 
@@ -523,14 +522,13 @@ public void setup() {
 
     @Override
     public void eval() {
-      out.buffer = buffer = buffer.reallocIfNeeded(input.end- input.start);
-      out.start = 0;
-      out.end = input.end - input.start;
+      String str = 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
 input.end, input.buffer);
+      byte[] result = 
str.toUpperCase().getBytes(com.google.common.base.Charsets.UTF_8);
 
-      for (int id = input.start; id < input.end; id++) {
-        byte currentByte = input.buffer.getByte(id);
-        out.buffer.setByte(id - input.start, 
Character.toUpperCase(currentByte));
-      }
+      out.buffer = buffer = buffer.reallocIfNeeded(result.length);
+      out.start = 0;
+      out.end = result.length;
+      out.buffer.setBytes(0, result);
     }
   }
 
@@ -786,10 +784,13 @@ public void setup() {
 
     @Override
     public void eval() {
-      out.buffer = buffer = buffer.reallocIfNeeded(input.end - input.start);
+      String source = 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start,
 input.end, input.buffer);
+      String result = 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.initCap(source);
+      byte[] bytes = result.getBytes(com.google.common.base.Charsets.UTF_8);
+      out.buffer = buffer = buffer.reallocIfNeeded(bytes.length);
       out.start = 0;
-      out.end = input.end - input.start;
-      
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.initCap(input.start, 
input.end, input.buffer, out.buffer);
+      out.end = bytes.length;
+      out.buffer.setBytes(0, bytes);
     }
 
   }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index 36e7ead50ce..5da8bb05d0a 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -23,7 +23,6 @@
 import org.apache.drill.test.BaseTestQuery;
 import org.apache.drill.categories.SqlFunctionTest;
 import org.apache.drill.exec.util.Text;
-import org.junit.Ignore;
 import org.junit.Test;
 
 import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
@@ -1468,7 +1467,6 @@ public void testInitcap() throws Exception {
         .run();
   }
 
-  @Ignore("DRILL-5477")
   @Test
   public void testMultiByteEncoding() throws Exception {
     testBuilder()


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> lower and upper functions not works with national charactes
> -----------------------------------------------------------
>
>                 Key: DRILL-6717
>                 URL: https://issues.apache.org/jira/browse/DRILL-6717
>             Project: Apache Drill
>          Issue Type: Bug
>    Affects Versions: 1.14.0
>            Reporter: Oleg Zinoviev
>            Assignee: Oleg Zinoviev
>            Priority: Major
>              Labels: ready-to-commit
>             Fix For: 1.15.0
>
>         Attachments: result.csv, sample.json
>
>
> upper() and lower() functions works only with a ascii characters.
> sample dataset:  [^sample.json] 
> query:
> {code:sql}
> select 
>   upper(ascii_l) as upper_ascii, 
>   upper(national_l) as upper_national, 
>   lower(ascii_u) as lower_ascii, 
>   lower(national_u) as lower_national 
> from dfs.`sample.json`
> {code}
> drill response:  [^result.csv] 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to