Author: xuefu
Date: Mon Jul 14 23:07:28 2014
New Revision: 1610555
URL: http://svn.apache.org/r1610555
Log:
HIVE-6637: UDF in_file() doesn't take CHAR or VARCHAR as input (Ashish via
Xuefu)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java
hive/trunk/ql/src/test/queries/clientpositive/udf_in_file.q
hive/trunk/ql/src/test/results/clientpositive/udf_in_file.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java?rev=1610555&r1=1610554&r2=1610555&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java
Mon Jul 14 23:07:28 2014
@@ -33,8 +33,8 @@ import org.apache.hadoop.hive.ql.exec.UD
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
* IN_FILE(str, filename) returns true if 'str' appears in the file specified
@@ -59,29 +59,35 @@ public class GenericUDFInFile extends Ge
"IN_FILE() accepts exactly 2 arguments.");
}
- for (int i = 0; i < arguments.length; i++) {
- if (!String.class.equals(
- PrimitiveObjectInspectorUtils.
- getJavaPrimitiveClassFromObjectInspector(arguments[i]))) {
- throw new UDFArgumentTypeException(i, "The "
- + GenericUDFUtils.getOrdinal(i + 1)
- + " argument of function IN_FILE must be a string but "
- + arguments[i].toString() + " was given.");
- }
- }
-
strObjectInspector = arguments[0];
fileObjectInspector = arguments[1];
- if (!ObjectInspectorUtils.isConstantObjectInspector(fileObjectInspector)) {
- throw new UDFArgumentTypeException(1,
- "The second argument of IN_FILE() must be a constant string but " +
- fileObjectInspector.toString() + " was given.");
+ if (!isTypeCompatible(strObjectInspector)) {
+ throw new UDFArgumentTypeException(0, "The first " +
+ "argument of function IN_FILE must be a string, " +
+ "char or varchar but " +
+ strObjectInspector.toString() + " was given.");
+ }
+
+ if (((PrimitiveObjectInspector)
fileObjectInspector).getPrimitiveCategory() !=
+ PrimitiveObjectInspector.PrimitiveCategory.STRING ||
+ !ObjectInspectorUtils.isConstantObjectInspector(fileObjectInspector)) {
+ throw new UDFArgumentTypeException(1, "The second " +
+ "argument of IN_FILE() must be a constant string but " +
+ fileObjectInspector.toString() + " was given.");
}
return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
}
+ private boolean isTypeCompatible(ObjectInspector argument) {
+ PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) argument);
+ return
+ poi.getPrimitiveCategory() ==
PrimitiveObjectInspector.PrimitiveCategory.STRING ||
+ poi.getPrimitiveCategory() ==
PrimitiveObjectInspector.PrimitiveCategory.CHAR ||
+ poi.getPrimitiveCategory() ==
PrimitiveObjectInspector.PrimitiveCategory.VARCHAR;
+ }
+
@Override
public String[] getRequiredFiles() {
return new String[] {
@@ -96,12 +102,12 @@ public class GenericUDFInFile extends Ge
return null;
}
- String str = (String)ObjectInspectorUtils.copyToStandardJavaObject(
- arguments[0].get(), strObjectInspector);
+ String str = ObjectInspectorUtils.copyToStandardJavaObject(
+ arguments[0].get(), strObjectInspector).toString();
if (set == null) {
String fileName = (String)ObjectInspectorUtils.copyToStandardJavaObject(
- arguments[1].get(), fileObjectInspector);
+ arguments[1].get(), fileObjectInspector);
try {
load(new FileInputStream((new File(fileName)).getName()));
} catch (FileNotFoundException e) {
Modified: hive/trunk/ql/src/test/queries/clientpositive/udf_in_file.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_in_file.q?rev=1610555&r1=1610554&r2=1610555&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_in_file.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_in_file.q Mon Jul 14
23:07:28 2014
@@ -1,12 +1,30 @@
DESCRIBE FUNCTION in_file;
+CREATE TABLE value_src (str_val char(3), ch_val STRING, vch_val varchar(10),
+ str_val_neg char(3), ch_val_neg STRING, vch_val_neg
varchar(10))
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+
+LOAD DATA LOCAL INPATH '../../data/files/in_file.dat' INTO TABLE value_src;
+
EXPLAIN
-SELECT in_file("303", "../../data/files/test2.dat"),
+SELECT in_file(str_val, "../../data/files/test2.dat"),
+ in_file(ch_val, "../../data/files/test2.dat"),
+ in_file(vch_val, "../../data/files/test2.dat"),
+ in_file(str_val_neg, "../../data/files/test2.dat"),
+ in_file(ch_val_neg, "../../data/files/test2.dat"),
+ in_file(vch_val_neg, "../../data/files/test2.dat"),
+ in_file("303", "../../data/files/test2.dat"),
in_file("304", "../../data/files/test2.dat"),
in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
-FROM src LIMIT 1;
+FROM value_src LIMIT 1;
-SELECT in_file("303", "../../data/files/test2.dat"),
+SELECT in_file(str_val, "../../data/files/test2.dat"),
+ in_file(ch_val, "../../data/files/test2.dat"),
+ in_file(vch_val, "../../data/files/test2.dat"),
+ in_file(str_val_neg, "../../data/files/test2.dat"),
+ in_file(ch_val_neg, "../../data/files/test2.dat"),
+ in_file(vch_val_neg, "../../data/files/test2.dat"),
+ in_file("303", "../../data/files/test2.dat"),
in_file("304", "../../data/files/test2.dat"),
in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
-FROM src LIMIT 1;
+FROM value_src LIMIT 1;
\ No newline at end of file
Modified: hive/trunk/ql/src/test/results/clientpositive/udf_in_file.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_in_file.q.out?rev=1610555&r1=1610554&r2=1610555&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_in_file.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_in_file.q.out Mon Jul 14
23:07:28 2014
@@ -3,17 +3,48 @@ PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION in_file
POSTHOOK: type: DESCFUNCTION
in_file(str, filename) - Returns true if str appears in the file
+PREHOOK: query: CREATE TABLE value_src (str_val char(3), ch_val STRING,
vch_val varchar(10),
+ str_val_neg char(3), ch_val_neg STRING, vch_val_neg
varchar(10))
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE value_src (str_val char(3), ch_val STRING,
vch_val varchar(10),
+ str_val_neg char(3), ch_val_neg STRING, vch_val_neg
varchar(10))
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@value_src
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in_file.dat' INTO
TABLE value_src
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@value_src
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in_file.dat' INTO
TABLE value_src
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@value_src
PREHOOK: query: EXPLAIN
-SELECT in_file("303", "../../data/files/test2.dat"),
+SELECT in_file(str_val, "../../data/files/test2.dat"),
+ in_file(ch_val, "../../data/files/test2.dat"),
+ in_file(vch_val, "../../data/files/test2.dat"),
+ in_file(str_val_neg, "../../data/files/test2.dat"),
+ in_file(ch_val_neg, "../../data/files/test2.dat"),
+ in_file(vch_val_neg, "../../data/files/test2.dat"),
+ in_file("303", "../../data/files/test2.dat"),
in_file("304", "../../data/files/test2.dat"),
in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
-FROM src LIMIT 1
+FROM value_src LIMIT 1
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
-SELECT in_file("303", "../../data/files/test2.dat"),
+SELECT in_file(str_val, "../../data/files/test2.dat"),
+ in_file(ch_val, "../../data/files/test2.dat"),
+ in_file(vch_val, "../../data/files/test2.dat"),
+ in_file(str_val_neg, "../../data/files/test2.dat"),
+ in_file(ch_val_neg, "../../data/files/test2.dat"),
+ in_file(vch_val_neg, "../../data/files/test2.dat"),
+ in_file("303", "../../data/files/test2.dat"),
in_file("304", "../../data/files/test2.dat"),
in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
-FROM src LIMIT 1
+FROM value_src LIMIT 1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -24,18 +55,18 @@ STAGE PLANS:
Map Reduce
Map Operator Tree:
TableScan
- alias: src
- Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL
Column stats: COMPLETE
+ alias: value_src
+ Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column
stats: NONE
Select Operator
- expressions: in_file('303', '../../data/files/test2.dat') (type:
boolean), in_file('304', '../../data/files/test2.dat') (type: boolean),
in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL
Column stats: COMPLETE
+ expressions: in_file(str_val, '../../data/files/test2.dat')
(type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean),
in_file(vch_val, '../../data/files/test2.dat') (type: boolean),
in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean),
in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean),
in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean),
in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304',
'../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null),
'../../data/files/test2.dat') (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8
+ Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL
Column stats: NONE
Limit
Number of rows: 1
- Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL
Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL
Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL
Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL
Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -47,18 +78,30 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: SELECT in_file("303", "../../data/files/test2.dat"),
+PREHOOK: query: SELECT in_file(str_val, "../../data/files/test2.dat"),
+ in_file(ch_val, "../../data/files/test2.dat"),
+ in_file(vch_val, "../../data/files/test2.dat"),
+ in_file(str_val_neg, "../../data/files/test2.dat"),
+ in_file(ch_val_neg, "../../data/files/test2.dat"),
+ in_file(vch_val_neg, "../../data/files/test2.dat"),
+ in_file("303", "../../data/files/test2.dat"),
in_file("304", "../../data/files/test2.dat"),
in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
-FROM src LIMIT 1
+FROM value_src LIMIT 1
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@value_src
#### A masked pattern was here ####
-POSTHOOK: query: SELECT in_file("303", "../../data/files/test2.dat"),
+POSTHOOK: query: SELECT in_file(str_val, "../../data/files/test2.dat"),
+ in_file(ch_val, "../../data/files/test2.dat"),
+ in_file(vch_val, "../../data/files/test2.dat"),
+ in_file(str_val_neg, "../../data/files/test2.dat"),
+ in_file(ch_val_neg, "../../data/files/test2.dat"),
+ in_file(vch_val_neg, "../../data/files/test2.dat"),
+ in_file("303", "../../data/files/test2.dat"),
in_file("304", "../../data/files/test2.dat"),
in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
-FROM src LIMIT 1
+FROM value_src LIMIT 1
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@value_src
#### A masked pattern was here ####
-true false NULL
+true true true false false false true false NULL