Author: ekoifman
Date: Tue Jan 20 23:26:26 2015
New Revision: 1653406

URL: http://svn.apache.org/r1653406
Log:
HIVE-9272 Tests for utf-8 support (Aswathy Chellammal Sreekumar via Eugene 
Koifman)

Added:
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig"
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt"
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf
Modified:
    hive/trunk/hcatalog/src/test/e2e/templeton/build.xml
    hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/build.xml
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/build.xml?rev=1653406&r1=1653405&r2=1653406&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/build.xml (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/build.xml Tue Jan 20 23:26:26 
2015
@@ -120,6 +120,7 @@
             <arg value="${basedir}/tests/ddl.conf"/>
             <arg value="${basedir}/tests/jobsubmission.conf"/>
             <arg value="${basedir}/tests/jobsubmission2.conf"/>
+            <arg value="${basedir}/tests/utf8.conf"/>
         </exec>
     </target>
 

Modified: 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh?rev=1653406&r1=1653405&r2=1653406&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh 
(original)
+++ 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh 
Tue Jan 20 23:26:26 2015
@@ -32,6 +32,8 @@ ${HADOOP_HOME}/bin/hdfs dfs -put ${PROJ_
 
 #For hadoop2 there are 2 separate jars
 ${HADOOP_HOME}/bin/hdfs dfs -put 
${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar
  webhcate2e/hexamples.jar
+#For utf8 test(for mapreduce) we need a jar with utf-8 characters in the name
+${HADOOP_HOME}/bin/hdfs dfs -put 
${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar
  webhcate2e/hadoop_examples_䶴ㄩ鼾丄狜〇_2_2_0.jar
 ${HADOOP_HOME}/bin/hdfs dfs -put 
${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-${HADOOP_VERSION}.jar
 webhcate2e/hclient.jar
 ${HADOOP_HOME}/bin/hdfs dfs -put 
${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-streaming-${HADOOP_VERSION}.jar  
/user/templeton/hadoop-streaming.jar
 

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm?rev=1653406&r1=1653405&r2=1653406&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm 
(original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm Tue 
Jan 20 23:26:26 2015
@@ -37,6 +37,7 @@ use English;
 use Storable qw(dclone);
 use File::Glob ':glob';
 use JSON::Path;
+use utf8;
 
 my $passedStr = 'passed';
 my $failedStr = 'failed';
@@ -922,12 +923,38 @@ sub compare
               $result = 0;
               next;
             }
-              
+            my $exp_userargsvalue;
+            my $r_userargsvalue;
+            if(ref($exp_userargs{$key}) eq "ARRAY"){
+              my @values = $exp_userargs{$key};
+              my $num_values = @values;
+
+              for(my $i=0;$i<=$num_values;$i++){
+                if (utf8::is_utf8($exp_userargs{$key}[$i])){
+                  $exp_userargs{$key}[$i] = 
utf8::decode($exp_userargs{$key}[$i]);
+                  $r_userargs{$key}[$i] = utf8::decode($r_userargs{$key}[$i]);
+                }
+              }
+              $exp_userargsvalue = $exp_userargs{$key};
+              $r_userargsvalue = $r_userargs{$key};
+            }
+            else {
+              if (utf8::is_utf8($exp_userargs{$key}))
+              {
+                $exp_userargsvalue = utf8::decode($exp_userargs{$key});
+                $r_userargsvalue = utf8::decode($r_userargs{$key});
+              } 
+              else 
+              {
+                $exp_userargsvalue = $exp_userargs{$key};
+                $r_userargsvalue = $r_userargs{$key};
+              }
+            }
             print $log "$0::$subName DEBUG comparing expected " 
                 . " $key ->" . dump($exp_userargs{$key})
                 . " With result $key ->" . dump($r_userargs{$key}) . "\n";
 
-            if (!Compare($exp_userargs{$key}, $r_userargs{$key})) {
+            if (!Compare($exp_userargsvalue, $r_userargsvalue)) {
               print $log "$0::$subName WARN check failed:" 
                   . " json compare failed. For field "
                   . "$key, regex <" . dump($r_userargs{$key})

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig"
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin%E4%B6%B4%E3%84%A9%E9%BC%BE%E4%B8%84%E7%8B%9C%E3%80%87work.pig%22?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig" 
(added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig" 
Tue Jan 20 23:26:26 2015
@@ -0,0 +1,4 @@
+A = load '$INPDIR/table3.txt' using PigStorage('\t') AS (row:int, 
content:chararray);
+B = load '$INPDIR/table3ToJoin.txt' using PigStorage('\t') AS (row:int, 
content:chararray);
+C = JOIN A BY content, B BY content;
+store C into '$OUTDIR/PigJoin' USING PigStorage();
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt"
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof%E4%B6%B4%E3%84%A9%E9%BC%BE%E4%B8%84%E7%8B%9C%E3%80%87war.txt%22?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt" 
(added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt" Tue 
Jan 20 23:26:26 2015
@@ -0,0 +1,21 @@
+孫 子 曰 : 兵 者 , 國 之 大 事 , 死 生 之 地 , 存 亡 
之 道 , 不 可 不 察 也 。 
+ 
+ 故 經 之 以 五 , 校 之 以 計 , 而 索 其 情 : 一 曰 道 
, 二 曰 天 , 三 曰 地 , 四 曰 
+ 將 , 五 曰 法 。 道 者 , 令 民 于 上 同 意 者 也 , 可 
與 之 死 , 可 與 之 生 , 民 不 
+ 詭 也 。 天 者 , 陰 陽 、 寒 暑 、 時 制 也 。 地 者 , 
高 下 、 遠 近 、 險 易 、 廣 狹 
+ 、 死 生 也 。 將 者 , 智 、 信 、 仁 、 勇 、 嚴 也 。 
法 者 , 曲 制 、 官 道 、 主 用 
+ 也 。 凡 此 五 者 , 將 莫 不 聞 , 知 之 者 勝 , 不 知 
之 者 不 勝 。 故 校 之 以 計 , 
+ 而 索 其 情 。 曰 : 主 孰 有 道 ? 將 孰 有 能 ? 天 地 
孰 得 ? 法 令 孰 行 ? 兵 眾 孰 
+ 強 ? 士 卒 孰 練 ? 賞 罰 孰 明 ? 吾 以 此 知 勝 負 矣 
。 
+ 
+ 將 聽 吾 計 , 用 之 必 勝 , 留 之 ﹔ 將 不 聽 吾 計 , 
用 之 必 敗 , 去 之 。 
+ 
+ 計 利 以 聽 , 乃 為 之 勢 , 以 佐 其 外 。 勢 者 , 因 
利 而 制 權 也 。 
+ 
+ 兵 者 , 詭 道 也 。 故 能 而 示 之 不 能 , 用 而 示 之 
不 用 , 近 而 示 之 遠 , 遠 而 
+ 示 之 近 。 利 而 誘 之 , 亂 而 取 之 , 實 而 備 之 , 
強 而 避 之 , 怒 而 撓 之 , 卑 
+ 而 驕 之 , 佚 而 勞 之 , 親 而 離 之 , 攻 其 不 備 , 
出 其 不 意 。 此 兵 家 之 勝 , 
+ 不 可 先 傳 也 。 
+ 
+ 夫 未 戰 而 廟 算 勝 者 , 得 算 多 也 ﹔ 未 戰 而 廟 算 
不 勝 者 , 得 算 少 也 。 多 算 
+ 勝 , 少 算 不 勝 , 而 況 無 算 乎 ! 吾 以 此 觀 之 , 
勝 負 見 矣 。
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt Tue Jan 20 
23:26:26 2015
@@ -0,0 +1,27 @@
+868    狚鼾䶵鼾c鼾㐀ee䶴﨨b
+423    be狝h狛狝狝㐁﨨狛a
+440    hc鼾㐀啊狝g狜c狜䶵㐁狛㐀丄g
+701    䶴䶵ecea狝䶵e狝䶴
+550    﨨狝狝䶴狛g﨩䶵狝a䶴﨩﨩g狛a狝
+123    阿狛鼾㐀鼾﨨dg䶴㐁d
+316    狜﨨﨩a䶵d㐀g狝c狝䶵de
+807    e狛h齄狝e䶴啊h䶴㐁狚阿狜g䶴aa
+870    狝a狛g丄fe䶵﨨狜狛䶵b䶵狛a狚齄
+669    dc丄a丄狜﨨f齄﨨b齄hd鼾
+622    a狚齄㐁f阿阿狜c﨩狛b狝e
+216    fh﨨阿dcf㐀b㐀h㐀狛c﨩g狚
+700    狜﨩㐀阿﨨齄鼾狝阿b﨨﨩狚f䶴阿
+528    啊狚﨩狛鼾丄啊丄丄㐁a䶴狝d狜fd丄
+173    h狚ed㐀d狛bc阿㐀啊﨩
+3      䶴狝﨩狚㐁啊䶵齄齄﨩狜ee㐀㐁h狛﨨㐀
+567    a啊g狜丄狚c䶵h狝﨨h
+435    b齄阿阿丄g㐁cf丄阿ed
+943    㐁g䶵狛cc㐀䶵h狝﨩䶵啊
+365    狛啊d狝䶵cb﨩䶵bd﨨狝阿b
+298    䶵䶵﨩h鼾㐁a狛鼾狝狚䶵f狝
+377    bd䶵﨩齄䶵㐀h㐁㐁狛鼾狝
+951    g狚ee﨨ach狝f﨩㐀㐀䶵ba
+399    啊﨨鼾d鼾ff齄h丄﨨狚a阿ga啊齄c
+199    阿狛㐀eg齄狝狝g齄ga啊d狜㐁齄
+816    齄狛狛啊狜af狚狛㐁狜
+758    狛gc㐀狜fa﨨㐀㐁齄ab㐀h
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt Tue Jan 20 
23:26:26 2015
@@ -0,0 +1,14 @@
+1      䶵
+2      䶵
+3      狚
+4      䶵
+5      é¼¾
+6      䶵
+112    﨩﨨狝狝
+12     﨩﨨狝狝
+7      é¼¾
+8      䶵
+9      狚䶵
+10     狚䶵齄
+11     﨩齄䶵
+12     﨩﨨狝狝
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt Tue Jan 
20 23:26:26 2015
@@ -0,0 +1,5 @@
+12     﨩﨨狝狝
+2      䶵shouldnotshowup
+3      狚
+4      䶵
+11     﨩齄䶵

Added: hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf Tue Jan 20 
23:26:26 2015
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+###############################################################################
+# curl command tests for templeton
+#
+#
+use utf8;
+
+$cfg = 
+{
+ 'driver' => 'Curl',
+
+ 'groups' => 
+ [
+##=============================================================================================================
+  {
+   'name' => 'Hive_UTF8',
+   'tests' => 
+   [
+    {
+    # Submitting Hive job with Unicode content on Unicode data
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/hive',
+     'post_options' => ['user.name=:UNAME:','execute=drop table if exists 
utf8_table;create table utf8_table(row int, content string) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY \'\t\';LOAD DATA INPATH 
\':INPDIR_HDFS:/table1.txt\' OVERWRITE INTO TABLE 
utf8_table;','statusdir=:OUTDIR:/status/Hive_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_job_exit_value' => 0,
+     'check_call_back' => 1,
+    },
+   ]
+  },
+##=============================================================================================================
+  {
+   'name' => 'Pig_UTF8',
+   'tests' => 
+   [
+    {
+    # Submitting Pig job with Unicode content on Unicode data
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/pig',
+     'post_options' => 
['user.name=:UNAME:','file=:INPDIR_HDFS:/PigJoin䶴ㄩ鼾丄狜〇work.pig','arg=-p',
 
'arg=INPDIR=:INPDIR_HDFS:','arg=-p','arg=OUTDIR=:OUTDIR:','statusdir=:OUTDIR:/status/Pig_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_call_back' => 1, 
+    },
+   ]
+  },
+##=============================================================================================================
+  {
+   'name' => 'MapReduce_UTF8',
+   'tests' => 
+   [
+    {
+         
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/mapreduce/jar',
+     'post_options' => 
['user.name=:UNAME:','arg=:INPDIR_HDFS:/artof䶴ㄩ鼾丄狜〇war.txt', 'arg= 
:OUTDIR:/utf8_wc.txt', 
+                        
'jar=:INPDIR_HDFS:/hadoop_examples_䶴ㄩ鼾丄狜〇_2_2_0.jar', 
'class=wordcount','statusdir=:OUTDIR:/status/MapRed_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_job_percent_complete' => 'map 100% reduce 100%',
+     'check_job_exit_value' => 0,
+     'check_call_back' => 1,
+    }, 
+   ]
+  }, 
+##=============================================================================================================
+  {
+   'name' => 'MapRedStreaming_UTF8',
+   'tests' => 
+   [
+    {
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/mapreduce/streaming',
+     'post_options' => 
['user.name=:UNAME:','input=:INPDIR_HDFS:/artof䶴ㄩ鼾丄狜〇war.txt', 
'input=:INPDIR_HDFS:/artof䶴ㄩ鼾丄狜〇war.txt', 
'output=:OUTDIR:/MapRedStreaming_UTF8_count', 'mapper=cat', 
'reducer=wc','statusdir=:OUTDIR:/status/MapRedStreaming_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_job_exit_value' => 0,
+     'check_call_back' => 1,
+    },
+   ]
+  },
+##=============================================================================================================
+
+  {
+   'name' => 'JobFiltering_UTF8',
+   'tests' => 
+   [
+    {
+     'num' => 1,
+     'depends_on' => 'Pig_UTF8',
+     'method' => 'GET',
+     'url' => 
':TEMPLETON_URL:/templeton/v1/jobs?user.name=:UNAME:&showall=true&fields=*',
+     'user_name' => ':UNAME:',
+     'format_header' => 'Content-Type: application/json',
+     'filter_job_names' => ['TempletonControllerJob', 
'PigLatin:PigJoin䶴ㄩ鼾丄狜〇work.pig'],
+     'status_code' => 200,
+    },
+   ]
+  },
+ ]
+},
+  ;
\ No newline at end of file


Reply via email to