This is an automated email from the ASF dual-hosted git repository.
abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 718df0a7e4f HIVE-26639: ConstantVectorExpression shouldn't rely on
default charset (#3675) (Laszlo Bodor reviewed by Ayush Saxena)
718df0a7e4f is described below
commit 718df0a7e4f5423e9ea4aeb4088e956635a1567a
Author: Bodor Laszlo <[email protected]>
AuthorDate: Tue Oct 18 18:07:15 2022 +0200
HIVE-26639: ConstantVectorExpression shouldn't rely on default charset
(#3675) (Laszlo Bodor reviewed by Ayush Saxena)
---
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 3 +-
.../expressions/ConstantVectorExpression.java | 2 +-
.../clientpositive/chinese_utf8_characters.q | 10 ++
.../llap/chinese_utf8_characters.q.out | 109 +++++++++++++++++++++
4 files changed, 122 insertions(+), 2 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index 0949d0a5b21..8dd6faf4e25 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -28,6 +28,7 @@ import java.io.Serializable;
import java.lang.annotation.Annotation;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -549,7 +550,7 @@ public class ExplainTask extends Task<ExplainWork>
implements Serializable {
try {
Path resFile = work.getResFile();
OutputStream outS = resFile.getFileSystem(conf).create(resFile);
- out = new PrintStream(outS);
+ out = new PrintStream(outS, false, StandardCharsets.UTF_8.name());
if(work.isDDL()){
getDDLPlan(out);
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
index a19d35ceb7c..3dca014d2f6 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
@@ -205,7 +205,7 @@ public class ConstantVectorExpression extends
VectorExpression {
outputColumnNum, (HiveDecimal) constantValue, outputTypeInfo);
case STRING:
return new ConstantVectorExpression(
- outputColumnNum, ((String) constantValue).getBytes(),
outputTypeInfo);
+ outputColumnNum, ((String)
constantValue).getBytes(StandardCharsets.UTF_8), outputTypeInfo);
case VARCHAR:
return new ConstantVectorExpression(
outputColumnNum, ((HiveVarchar) constantValue), outputTypeInfo);
diff --git a/ql/src/test/queries/clientpositive/chinese_utf8_characters.q
b/ql/src/test/queries/clientpositive/chinese_utf8_characters.q
new file mode 100644
index 00000000000..c4c3871c10c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/chinese_utf8_characters.q
@@ -0,0 +1,10 @@
+CREATE EXTERNAL TABLE tbl_chinese_chars(a int, b string, c string);
+INSERT INTO tbl_chinese_chars values(1,'上海','徐汇'),(2,'北京','海淀');
+
+set hive.fetch.task.conversion=more;
+EXPLAIN SELECT * FROM default.tbl_chinese_chars where b='北京';
+SELECT * FROM default.tbl_chinese_chars where b='北京';
+
+set hive.fetch.task.conversion=none;
+EXPLAIN SELECT * FROM default.tbl_chinese_chars where b='北京';
+SELECT * FROM default.tbl_chinese_chars where b='北京';
diff --git
a/ql/src/test/results/clientpositive/llap/chinese_utf8_characters.q.out
b/ql/src/test/results/clientpositive/llap/chinese_utf8_characters.q.out
new file mode 100644
index 00000000000..fd6bfb982f2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/chinese_utf8_characters.q.out
@@ -0,0 +1,109 @@
+PREHOOK: query: CREATE EXTERNAL TABLE tbl_chinese_chars(a int, b string, c
string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_chinese_chars
+POSTHOOK: query: CREATE EXTERNAL TABLE tbl_chinese_chars(a int, b string, c
string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_chinese_chars
+PREHOOK: query: INSERT INTO tbl_chinese_chars values(1,'上海','徐汇'),(2,'北京','海淀')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_chinese_chars
+POSTHOOK: query: INSERT INTO tbl_chinese_chars
values(1,'上海','徐汇'),(2,'北京','海淀')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_chinese_chars
+POSTHOOK: Lineage: tbl_chinese_chars.a SCRIPT []
+POSTHOOK: Lineage: tbl_chinese_chars.b SCRIPT []
+POSTHOOK: Lineage: tbl_chinese_chars.c SCRIPT []
+PREHOOK: query: EXPLAIN SELECT * FROM default.tbl_chinese_chars where b='北京'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT * FROM default.tbl_chinese_chars where b='北京'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: tbl_chinese_chars
+ filterExpr: (b = '北京') (type: boolean)
+ Filter Operator
+ predicate: (b = '北京') (type: boolean)
+ Select Operator
+ expressions: a (type: int), '北京' (type: string), c (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ ListSink
+
+PREHOOK: query: SELECT * FROM default.tbl_chinese_chars where b='北京'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM default.tbl_chinese_chars where b='北京'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+2 北京 海淀
+PREHOOK: query: EXPLAIN SELECT * FROM default.tbl_chinese_chars where b='北京'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT * FROM default.tbl_chinese_chars where b='北京'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_chinese_chars
+ filterExpr: (b = '北京') (type: boolean)
+ Statistics: Num rows: 2 Data size: 352 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (b = '北京') (type: boolean)
+ Statistics: Num rows: 1 Data size: 176 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), '北京' (type: string), c
(type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 176 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 176 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM default.tbl_chinese_chars where b='北京'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM default.tbl_chinese_chars where b='北京'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_chinese_chars
+#### A masked pattern was here ####
+2 北京 海淀