[
https://issues.apache.org/jira/browse/SPARK-1661?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
likunjian updated SPARK-1661:
-----------------------------
Description:
the result of querying table created with RegexSerDe is all null
when i query the table created with
org.apache.hadoop.hive.contrib.serde2.RegexSerDe by shark,the columns in the
result is all null
select * from access_log where logdate='2014-04-28' limit 10;
OK
ip host time method request protocol status size referer
cookieuid requesttime session httpxrequestedwith agent
upstreamresponsetime logdate
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
Time taken: 4.362 seconds
my regex is
^([^ ]*) [^ ]* ([^ ]*) \\[([^\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\" (-|[0-9]*)
(-|[0-9]*) \"(\.\+\?|-)\" ([^ ]*) ([^ ]*) ([^ ]*) \"(\.\+\?|-)\" \"(\.\+\?|-)\"
\"(\.\+\?|-)\"$
nginx log example:
42.49.44.61 - www.xxxx.comm [20/Apr/2014:23:58:03 +0800] "GET /xxxxx/296837
HTTP/1.1" 200 3871 "http://www.xxxxx.com/xxxxx/296837" - 0.015
63hbb4om2cvtjs0f7d969n1uf4 "com.xxxxx.browser" "Mozilla/5.0 (Linux; U; xxxxx
4.1.2; zh-cn; ZTE N919 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko)
Version/4.0 Mobile Safari/534.30" "0.015"
111.121.176.149 - www.xxxx.comm [20/Apr/2014:23:58:03 +0800] "GET /xxxxx/264904
HTTP/1.1" 200 3827
"http://m.baidu.com/s?from=2001a&bd_page_type=1&word=%E8%8E%B2%E8%97%95%E6%80%8E%E6%A0%B7%E5%8D%A4%E6%89%8D%E5%A5%BD%E5%90%83"
- 0.015 ft7tr4b06b23ub9lnugdf4gcq3 "-" "Mozilla/5.0 (Linux; U; xxxxx 4.1.2;
zh-CN; 8190Q Build/JZO54K) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0
UCBrowser/9.5.2.394 U3/0.8.0 Mobile Safari/533.1" "0.015"
222.209.97.169 - www.xxxx.comm [20/Apr/2014:23:58:04 +0800] "GET / HTTP/1.1"
200 3188 "http://m.idea123.cn/food.html" - 0.014 - "-" "Lenovo S890/S100
Linux/3.0.13 xxxxx/4.0.3 Release/12.12.2011 Browser/AppleWebKit534.30
Profile/MIDP-2.0 Configuration/CLDC-1.1 Mobile Safari/534.30" "0.014"
59.36.84.241 - www.xxxx.comm [20/Apr/2014:23:58:05 +0800] "GET
/app/xxxxx/topic/view.php?id=138555 HTTP/1.1" 200 3151 "-" - 0.009 - "-"
"Mozilla/5.0 (Linux; U; xxxxx 2.3.7; zh-cn; TD500 Build/GWK74)
AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30" "0.009"
113.242.39.81 - www.xxxx.comm [20/Apr/2014:23:58:07 +0800] "GET /xxxxx/419691
HTTP/1.1" 200 4174 "http://www.xxxx.comm/xxxxx/all/308?p=3" - 0.013
1n579ukg1gho7i7mr3q8ic8j97 "-" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X
10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0
Safari/530.17; 360browser(securitypay,securityinstalled);
360(xxxxx,uppayplugin); 360 Aphone Browser (5.3.1)" "0.013"
Very strange, I execute a query in Hive is normal. I really do not understand.
. . :-(
OK
ip host time method request protocol status size referer
cookieuid requesttime session httpxrequestedwith agent
upstreamresponsetime logdate
14.151.40.117 www.xxxx.com 10/Apr/2014:23:58:01 +0800 POST
/xxxx.jsp?appid=4&appkey=573bbd2fbd1a6bac082ff4727d952ba3&format=json&sessionid=1397145480&vc=24&vn=v3.5.1&loguid=&deviceid=0f607264fc6318a92b9e13c65db7cd3c%7C02C105A6-6DC8-43D5-879E-46AD603AC34E%7C2096145A-114C-4B6E-BE91-1AC740D9BD21&channel=appstore&method=Update.forceUpdate
HTTP/1.1 200 88 - 0.052 - - xxxxx xxxxx
iPhone Client API 0.005 2014-04-11
112.91.89.149 www.xxxx.com 10/Apr/2014:23:58:02 +0800 POST
/xxxx.jsp?appid=2&appkey=9ef269eec4f7a9d07c73952d06b5413f&format=json&sessionid=1397141523139&vc=53&vn=3.5.2&loguid=2888069&deviceid=xxxxx354244055617944&uuid=c7d71164-bae7-4d51-8032-ec9cfafb5e7e&channel=default&method=Info.getinfoV3&virtual=O1wq33EpQ%2Fa%2B8NxjEsy57ZezKBefR85F4L%2BXPZlcSETtw5Fl%2FdQFuLuNUg4Co9zHJyw2jPJipOR3%0Acrc59PUeFvM5hU82AdDQGjIkXa%2FLMWtxuJYz6fJBHLxkQRPWUVCdpeENwrnYlvgAY6DqM9G%2Fh5g1%0AbZamnAgUMERY1iZzPLk%3D%0A
HTTP/1.1 200 2008 xxxxx xxxxx xxxxx POST - 0.033
- - Mozilla/5.0 (xxxxx) xxxxx/20100101 xxxxx/1.0.0 0.033
2014-04-11
113.133.68.221 www.xxxx.com 10/Apr/2014:23:58:02 +0800 POST
/xxxx.jsp?appid=2&appkey=9ef269eec4f7a9d07c73952d06b5413f&format=json&sessionid=1397145345830&vc=53&vn=3.5.2&loguid=4377880&deviceid=xxxxx355369055653422&uuid=546e07f7-6439-48d4-8d6d-e5dc0001e569&channel=91_v352&method=Ad.getAd_iMocha&virtual=
HTTP/1.1 200 88 xxxxx xxxxx xxxxx POST - 0.127
- - Mozilla/5.0 (xxxxx) xxxxx/20100101 xxxxx/1.0.0 0.127
2014-04-11
this is my HQL:
CREATE external TABLE access_log (
ip STRING,
host STRING,
time STRING,
method STRING,
request STRING,
protocol STRING,
status STRING,
size STRING,
referer STRING,
cookieuid STRING,
requestTime STRING,
session STRING,
httpXRequestedWith STRING,
agent STRING,
upstreamResponseTime STRING
)
partitioned by (logdate string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "^([^ ]*) - ([^ ]*) \\[([^\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\"
(-|[0-9]*) (-|[0-9]*) \"(\.\+\?|-)\" ([^ ]*) ([^ ]*) ([^ ]*) \"(\.\+\?|-)\"
\"(\.\+\?|-)\" \"(\.\+\?|-)\"$",
"output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s
%11$s %12$s %13$s %14$s %15$s"
)
STORED AS TEXTFILE;
was:
when i query the table created with
org.apache.hadoop.hive.contrib.serde2.RegexSerDe by shark,the columns in the
result is all null
select * from access_log where logdate='2014-04-28' limit 10;
OK
ip host time method request protocol status size referer
cookieuid requesttime session httpxrequestedwith agent
upstreamresponsetime logdate
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL 2014-04-28
Time taken: 4.362 seconds
my regex is
^([^ ]*) [^ ]* ([^ ]*) \\[([^\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\" (-|[0-9]*)
(-|[0-9]*) \"(\.\+\?|-)\" ([^ ]*) ([^ ]*) ([^ ]*) \"(\.\+\?|-)\" \"(\.\+\?|-)\"
\"(\.\+\?|-)\"$
nginx log example:
42.49.44.61 - www.xxxx.comm [20/Apr/2014:23:58:03 +0800] "GET /xxxxx/296837
HTTP/1.1" 200 3871 "http://www.xxxxx.com/xxxxx/296837" - 0.015
63hbb4om2cvtjs0f7d969n1uf4 "com.android.browser" "Mozilla/5.0 (Linux; U;
Android 4.1.2; zh-cn; ZTE N919 Build/JZO54K) AppleWebKit/534.30 (KHTML, like
Gecko) Version/4.0 Mobile Safari/534.30" "0.015"
111.121.176.149 - www.xxxx.comm [20/Apr/2014:23:58:03 +0800] "GET /xxxxx/264904
HTTP/1.1" 200 3827
"http://m.baidu.com/s?from=2001a&bd_page_type=1&word=%E8%8E%B2%E8%97%95%E6%80%8E%E6%A0%B7%E5%8D%A4%E6%89%8D%E5%A5%BD%E5%90%83"
- 0.015 ft7tr4b06b23ub9lnugdf4gcq3 "-" "Mozilla/5.0 (Linux; U; Android 4.1.2;
zh-CN; 8190Q Build/JZO54K) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0
UCBrowser/9.5.2.394 U3/0.8.0 Mobile Safari/533.1" "0.015"
222.209.97.169 - www.xxxx.comm [20/Apr/2014:23:58:04 +0800] "GET / HTTP/1.1"
200 3188 "http://m.idea123.cn/food.html" - 0.014 - "-" "Lenovo S890/S100
Linux/3.0.13 Android/4.0.3 Release/12.12.2011 Browser/AppleWebKit534.30
Profile/MIDP-2.0 Configuration/CLDC-1.1 Mobile Safari/534.30" "0.014"
59.36.84.241 - www.xxxx.comm [20/Apr/2014:23:58:05 +0800] "GET
/app/xxxxx/topic/view.php?id=138555 HTTP/1.1" 200 3151 "-" - 0.009 - "-"
"Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; TD500 Build/GWK74)
AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30" "0.009"
113.242.39.81 - www.xxxx.comm [20/Apr/2014:23:58:07 +0800] "GET /xxxxx/419691
HTTP/1.1" 200 4174 "http://www.xxxx.comm/xxxxx/all/308?p=3" - 0.013
1n579ukg1gho7i7mr3q8ic8j97 "-" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X
10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0
Safari/530.17; 360browser(securitypay,securityinstalled);
360(android,uppayplugin); 360 Aphone Browser (5.3.1)" "0.013"
Very strange, I execute a query in Hive is normal. I really do not understand.
. . :-(
OK
ip host time method request protocol status size referer
cookieuid requesttime session httpxrequestedwith agent
upstreamresponsetime logdate
14.151.40.117 api.haodou.com 10/Apr/2014:23:58:01 +0800 POST
/index.php?appid=4&appkey=573bbd2fbd1a6bac082ff4727d952ba3&format=json&sessionid=1397145480&vc=24&vn=v3.5.1&loguid=&deviceid=0f607264fc6318a92b9e13c65db7cd3c%7C02C105A6-6DC8-43D5-879E-46AD603AC34E%7C2096145A-114C-4B6E-BE91-1AC740D9BD21&channel=appstore&method=Update.forceUpdate
HTTP/1.1 200 88 - 0.052 - - Haodou Recipe
iPhone Client API 0.005 2014-04-11
112.91.89.149 api.haodou.com 10/Apr/2014:23:58:02 +0800 POST
/index.php?appid=2&appkey=9ef269eec4f7a9d07c73952d06b5413f&format=json&sessionid=1397141523139&vc=53&vn=3.5.2&loguid=2888069&deviceid=haodou354244055617944&uuid=c7d71164-bae7-4d51-8032-ec9cfafb5e7e&channel=default&method=Info.getinfoV3&virtual=O1wq33EpQ%2Fa%2B8NxjEsy57ZezKBefR85F4L%2BXPZlcSETtw5Fl%2FdQFuLuNUg4Co9zHJyw2jPJipOR3%0Acrc59PUeFvM5hU82AdDQGjIkXa%2FLMWtxuJYz6fJBHLxkQRPWUVCdpeENwrnYlvgAY6DqM9G%2Fh5g1%0AbZamnAgUMERY1iZzPLk%3D%0A
HTTP/1.1 200 2008 HAODOU Recipe Android POST - 0.033
- - Mozilla/5.0 (haodou) haodou/20100101 haodou/1.0.0
0.033 2014-04-11
113.133.68.221 api.haodou.com 10/Apr/2014:23:58:02 +0800 POST
/index.php?appid=2&appkey=9ef269eec4f7a9d07c73952d06b5413f&format=json&sessionid=1397145345830&vc=53&vn=3.5.2&loguid=4377880&deviceid=haodou355369055653422&uuid=546e07f7-6439-48d4-8d6d-e5dc0001e569&channel=91_v352&method=Ad.getAd_iMocha&virtual=
HTTP/1.1 200 88 HAODOU Recipe Android POST - 0.127
- - Mozilla/5.0 (haodou) haodou/20100101 haodou/1.0.0
0.127 2014-04-11
this is my HQL:
CREATE external TABLE access_log (
ip STRING,
host STRING,
time STRING,
method STRING,
request STRING,
protocol STRING,
status STRING,
size STRING,
referer STRING,
cookieuid STRING,
requestTime STRING,
session STRING,
httpXRequestedWith STRING,
agent STRING,
upstreamResponseTime STRING
)
partitioned by (logdate string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "^([^ ]*) - ([^ ]*) \\[([^\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\"
(-|[0-9]*) (-|[0-9]*) \"(\.\+\?|-)\" ([^ ]*) ([^ ]*) ([^ ]*) \"(\.\+\?|-)\"
\"(\.\+\?|-)\" \"(\.\+\?|-)\"$",
"output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s
%11$s %12$s %13$s %14$s %15$s"
)
STORED AS TEXTFILE;
> the result of querying table created with RegexSerDe is all null
> ----------------------------------------------------------------
>
> Key: SPARK-1661
> URL: https://issues.apache.org/jira/browse/SPARK-1661
> Project: Spark
> Issue Type: Bug
> Components: Spark Core, SQL
> Affects Versions: 0.9.0
> Environment: linux 2.6.32-358.el6.x86_64,Hive 12.0,shark 0.9.0,Hadoop
> 2.2.0
> Reporter: likunjian
> Labels: HQL, hadoop, hive, regex, shark
> Original Estimate: 168h
> Remaining Estimate: 168h
>
> the result of querying table created with RegexSerDe is all null
> when i query the table created with
> org.apache.hadoop.hive.contrib.serde2.RegexSerDe by shark,the columns in the
> result is all null
> select * from access_log where logdate='2014-04-28' limit 10;
> OK
> ip host time method request protocol status size
> referer cookieuid requesttime session httpxrequestedwith agent
> upstreamresponsetime logdate
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL 2014-04-28
> Time taken: 4.362 seconds
> my regex is
> ^([^ ]*) [^ ]* ([^ ]*) \\[([^\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\" (-|[0-9]*)
> (-|[0-9]*) \"(\.\+\?|-)\" ([^ ]*) ([^ ]*) ([^ ]*) \"(\.\+\?|-)\"
> \"(\.\+\?|-)\" \"(\.\+\?|-)\"$
> nginx log example:
> 42.49.44.61 - www.xxxx.comm [20/Apr/2014:23:58:03 +0800] "GET /xxxxx/296837
> HTTP/1.1" 200 3871 "http://www.xxxxx.com/xxxxx/296837" - 0.015
> 63hbb4om2cvtjs0f7d969n1uf4 "com.xxxxx.browser" "Mozilla/5.0 (Linux; U; xxxxx
> 4.1.2; zh-cn; ZTE N919 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko)
> Version/4.0 Mobile Safari/534.30" "0.015"
> 111.121.176.149 - www.xxxx.comm [20/Apr/2014:23:58:03 +0800] "GET
> /xxxxx/264904 HTTP/1.1" 200 3827
> "http://m.baidu.com/s?from=2001a&bd_page_type=1&word=%E8%8E%B2%E8%97%95%E6%80%8E%E6%A0%B7%E5%8D%A4%E6%89%8D%E5%A5%BD%E5%90%83"
> - 0.015 ft7tr4b06b23ub9lnugdf4gcq3 "-" "Mozilla/5.0 (Linux; U; xxxxx 4.1.2;
> zh-CN; 8190Q Build/JZO54K) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0
> UCBrowser/9.5.2.394 U3/0.8.0 Mobile Safari/533.1" "0.015"
> 222.209.97.169 - www.xxxx.comm [20/Apr/2014:23:58:04 +0800] "GET / HTTP/1.1"
> 200 3188 "http://m.idea123.cn/food.html" - 0.014 - "-" "Lenovo S890/S100
> Linux/3.0.13 xxxxx/4.0.3 Release/12.12.2011 Browser/AppleWebKit534.30
> Profile/MIDP-2.0 Configuration/CLDC-1.1 Mobile Safari/534.30" "0.014"
> 59.36.84.241 - www.xxxx.comm [20/Apr/2014:23:58:05 +0800] "GET
> /app/xxxxx/topic/view.php?id=138555 HTTP/1.1" 200 3151 "-" - 0.009 - "-"
> "Mozilla/5.0 (Linux; U; xxxxx 2.3.7; zh-cn; TD500 Build/GWK74)
> AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
> "0.009"
> 113.242.39.81 - www.xxxx.comm [20/Apr/2014:23:58:07 +0800] "GET /xxxxx/419691
> HTTP/1.1" 200 4174 "http://www.xxxx.comm/xxxxx/all/308?p=3" - 0.013
> 1n579ukg1gho7i7mr3q8ic8j97 "-" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X
> 10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0
> Safari/530.17; 360browser(securitypay,securityinstalled);
> 360(xxxxx,uppayplugin); 360 Aphone Browser (5.3.1)" "0.013"
> Very strange, I execute a query in Hive is normal. I really do not
> understand. . . :-(
> OK
> ip host time method request protocol status size
> referer cookieuid requesttime session httpxrequestedwith agent
> upstreamresponsetime logdate
> 14.151.40.117 www.xxxx.com 10/Apr/2014:23:58:01 +0800 POST
> /xxxx.jsp?appid=4&appkey=573bbd2fbd1a6bac082ff4727d952ba3&format=json&sessionid=1397145480&vc=24&vn=v3.5.1&loguid=&deviceid=0f607264fc6318a92b9e13c65db7cd3c%7C02C105A6-6DC8-43D5-879E-46AD603AC34E%7C2096145A-114C-4B6E-BE91-1AC740D9BD21&channel=appstore&method=Update.forceUpdate
> HTTP/1.1 200 88 - 0.052 - - xxxxx xxxxx
> iPhone Client API 0.005 2014-04-11
> 112.91.89.149 www.xxxx.com 10/Apr/2014:23:58:02 +0800 POST
> /xxxx.jsp?appid=2&appkey=9ef269eec4f7a9d07c73952d06b5413f&format=json&sessionid=1397141523139&vc=53&vn=3.5.2&loguid=2888069&deviceid=xxxxx354244055617944&uuid=c7d71164-bae7-4d51-8032-ec9cfafb5e7e&channel=default&method=Info.getinfoV3&virtual=O1wq33EpQ%2Fa%2B8NxjEsy57ZezKBefR85F4L%2BXPZlcSETtw5Fl%2FdQFuLuNUg4Co9zHJyw2jPJipOR3%0Acrc59PUeFvM5hU82AdDQGjIkXa%2FLMWtxuJYz6fJBHLxkQRPWUVCdpeENwrnYlvgAY6DqM9G%2Fh5g1%0AbZamnAgUMERY1iZzPLk%3D%0A
> HTTP/1.1 200 2008 xxxxx xxxxx xxxxx POST - 0.033
> - - Mozilla/5.0 (xxxxx) xxxxx/20100101 xxxxx/1.0.0 0.033
> 2014-04-11
> 113.133.68.221 www.xxxx.com 10/Apr/2014:23:58:02 +0800 POST
> /xxxx.jsp?appid=2&appkey=9ef269eec4f7a9d07c73952d06b5413f&format=json&sessionid=1397145345830&vc=53&vn=3.5.2&loguid=4377880&deviceid=xxxxx355369055653422&uuid=546e07f7-6439-48d4-8d6d-e5dc0001e569&channel=91_v352&method=Ad.getAd_iMocha&virtual=
> HTTP/1.1 200 88 xxxxx xxxxx xxxxx POST - 0.127
> - - Mozilla/5.0 (xxxxx) xxxxx/20100101 xxxxx/1.0.0 0.127
> 2014-04-11
> this is my HQL:
> CREATE external TABLE access_log (
> ip STRING,
> host STRING,
> time STRING,
> method STRING,
> request STRING,
> protocol STRING,
> status STRING,
> size STRING,
> referer STRING,
> cookieuid STRING,
> requestTime STRING,
> session STRING,
> httpXRequestedWith STRING,
> agent STRING,
> upstreamResponseTime STRING
> )
> partitioned by (logdate string)
> ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
> WITH SERDEPROPERTIES (
> "input.regex" = "^([^ ]*) - ([^ ]*) \\[([^\]]*)\\] \"([^ ]*) ([^ ]*) ([^
> ]*)\" (-|[0-9]*) (-|[0-9]*) \"(\.\+\?|-)\" ([^ ]*) ([^ ]*) ([^ ]*)
> \"(\.\+\?|-)\" \"(\.\+\?|-)\" \"(\.\+\?|-)\"$",
> "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s
> %11$s %12$s %13$s %14$s %15$s"
> )
> STORED AS TEXTFILE;
--
This message was sent by Atlassian JIRA
(v6.2#6252)