Repository: spark
Updated Branches:
  refs/heads/branch-1.0 dd601bf92 -> d339b332c


[SQL] SPARK-1661 - Fix regex_serde test

The JIRA in question is actually reporting a bug with Shark, but I wanted to 
make sure Spark SQL did not have similar problems.  This fixes a bug in our 
parsing code that was preventing the test from executing, but it looks like the 
RegexSerDe is working in Spark SQL.

Author: Michael Armbrust <[email protected]>

Closes #595 from marmbrus/fixRegexSerdeTest and squashes the following commits:

a4dc612 [Michael Armbrust] Add files created by hive to gitignore.
efa6402 [Michael Armbrust] Fix Hive serde_regex test.

(cherry picked from commit a43d9c14f2083d6632f410d74db98476e0e4d986)
Signed-off-by: Reynold Xin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d339b332
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d339b332
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d339b332

Branch: refs/heads/branch-1.0
Commit: d339b332cb72675b7d5431c5a01922e316f1cffc
Parents: dd601bf
Author: Michael Armbrust <[email protected]>
Authored: Thu May 1 21:32:43 2014 -0700
Committer: Reynold Xin <[email protected]>
Committed: Thu May 1 21:33:13 2014 -0700

----------------------------------------------------------------------
 .gitignore                                      |  5 +++
 .../org/apache/spark/sql/hive/HiveQl.scala      |  6 +++-
 ...rde_regex-0-60462d14b99bb445b02800e9fb22760e | 22 ++++++++++++
 ...rde_regex-1-dea03bd88cbaabcf438b398e23c139f6 |  0
 ...de_regex-10-c5b3ec90419a40660e5f83736241c429 | 38 ++++++++++++++++++++
 ...de_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1 |  0
 ...rde_regex-2-9d00484beaee46cf72b154a1351aeee9 |  0
 ...rde_regex-3-817190d8871b70611483cd2abe2e55dc |  0
 ...rde_regex-4-c3e345183543f40a14d2dd742ebd5346 |  2 ++
 ...rde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129 |  2 ++
 ...rde_regex-6-f818821654f219d1f4e2482951fae4f1 |  0
 ...rde_regex-7-4db287576a17c0963219ca63ee0b20e0 | 22 ++++++++++++
 ...rde_regex-8-c429ee76b751e674992f61a29c95af77 |  0
 ...rde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b |  0
 .../hive/execution/HiveCompatibilitySuite.scala |  1 +
 15 files changed, 97 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 857e9fe..a204456 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,8 @@ unit-tests.log
 /lib/
 rat-results.txt
 scalastyle.txt
+
+# For Hive
+metastore_db/
+metastore/
+warehouse/

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 1777e96..1f688fe 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -347,7 +347,11 @@ private[hive] object HiveQl {
   protected def nodeToPlan(node: Node): LogicalPlan = node match {
     // Just fake explain for any of the native commands.
     case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains 
explainArgs.head.getText =>
-      NoRelation
+      ExplainCommand(NoRelation)
+    // Create tables aren't native commands due to CTAS queries, but we still 
don't need to
+    // explain them.
+    case Token("TOK_EXPLAIN", explainArgs) if explainArgs.head.getText == 
"TOK_CREATETABLE" =>
+      ExplainCommand(NoRelation)
     case Token("TOK_EXPLAIN", explainArgs) =>
       // Ignore FORMATTED if present.
       val Some(query) :: _ :: _ :: Nil =

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
 
b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
new file mode 100644
index 0000000..d00ee77
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
@@ -0,0 +1,22 @@
+ABSTRACT SYNTAX TREE:
+  (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST 
(TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user 
TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) 
(TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer 
TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 
'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES 
(TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) 
(-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ 
\"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?"))))) TOK_TBLTEXTFILE)
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+      Create Table Operator:
+        Create Table
+          columns: host string, identity string, user string, time string, 
request string, status string, size int, referer string, agent string
+          if not exists: false
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          # buckets: -1
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
+          serde properties:
+            input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ 
"]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))?
+          name: serde_regex
+          isExternal: false
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f6
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f6
 
b/sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f6
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
 
b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
new file mode 100644
index 0000000..3e29023
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
@@ -0,0 +1,38 @@
+NULL   0
+NULL   0
+-1234567890.123456789  -1234567890
+-4400  4400
+-1255.49       -1255
+-1.122 -11
+-1.12  -1
+-1.12  -1
+-0.333 0
+-0.33  0
+-0.3   0
+0      0
+0      0
+0.01   0
+0.02   0
+0.1    0
+0.2    0
+0.3    0
+0.33   0
+0.333  0
+0.9999999999999999999999999    1
+1      1
+1      1
+1.12   1
+1.122  1
+2      2
+2      2
+3.14   3
+3.14   3
+3.14   3
+3.14   4
+10     10
+20     20
+100    100
+124    124
+125.2  125
+200    200
+1234567890.12345678    1234567890

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1
 
b/sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
 
b/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
 
b/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346
 
b/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346
new file mode 100644
index 0000000..c55f3dd
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346
@@ -0,0 +1,2 @@
+127.0.0.1      -       frank   [10/Oct/2000:13:55:36 -0700]    "GET 
/apache_pb.gif HTTP/1.0"   200     2326    NULL    NULL
+127.0.0.1      -       -       [26/May/2009:00:00:00 +0000]    "GET 
/someurl/?track=Blabla(Main) HTTP/1.1"     200     5864    -       "Mozilla/5.0 
(Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) 
Chrome/1.0.154.65 Safari/525.19"

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129
 
b/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129
new file mode 100644
index 0000000..8bd185b
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129
@@ -0,0 +1,2 @@
+127.0.0.1      2326    200     [10/Oct/2000:13:55:36 -0700]
+127.0.0.1      5864    200     [26/May/2009:00:00:00 +0000]

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f1
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f1
 
b/sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f1
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
 
b/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
new file mode 100644
index 0000000..da61769
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
@@ -0,0 +1,22 @@
+ABSTRACT SYNTAX TREE:
+  (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST 
(TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER 
(TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES 
(TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) 
TOK_TBLTEXTFILE)
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+      Create Table Operator:
+        Create Table
+          columns: key decimal, value int
+          if not exists: false
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          # buckets: -1
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
+          serde properties:
+            input.regex ([^ ]*) ([^ ]*)
+          name: serde_regex1
+          isExternal: false
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
 
b/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
 
b/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/d339b332/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 0bb76f3..6c08e63 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -568,6 +568,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with 
BeforeAndAfter {
     "select_unquote_and",
     "select_unquote_not",
     "select_unquote_or",
+    "serde_regex",
     "serde_reported_schema",
     "set_variable_sub",
     "show_describe_func_quotes",

Reply via email to