Author: gates
Date: Wed Aug 12 06:30:15 2009
New Revision: 803373
URL: http://svn.apache.org/viewvc?rev=803373&view=rev
Log:
PIG-893: Added string -> integer, long, float, and double casts.
Added:
hadoop/pig/trunk/src/org/apache/pig/impl/util/CastUtils.java
hadoop/pig/trunk/test/org/apache/pig/test/TestCharArrayToNumeric.java
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=803373&r1=803372&r2=803373&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Wed Aug 12 06:30:15 2009
@@ -25,6 +25,8 @@
PIG-734: Changed maps to only take strings as keys (gates).
IMPROVEMENTS
+PIG-893: Added string -> integer, long, float, and double casts (zjffdu via
gates).
+
PIG-833: Added Zebra, new columnar storage mechanism for HDFS (rangadi plus
many others via gates)
PIG-697: Proposed improvements to pig's optimizer, Phase5 (daijy)
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java?rev=803373&r1=803372&r2=803373&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
Wed Aug 12 06:30:15 2009
@@ -39,6 +39,7 @@
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.VisitorException;
+import org.apache.pig.impl.util.CastUtils;
/**
* This is just a cast that converts DataByteArray into either String or
@@ -205,7 +206,7 @@
String str = null;
Result res = in.getNext(str);
if (res.returnStatus == POStatus.STATUS_OK && res.result != null) {
- res.result = new Integer(Integer.valueOf((String) res.result));
+ res.result = CastUtils.stringToInteger((String)res.result);
}
return res;
}
@@ -327,7 +328,7 @@
String str = null;
Result res = in.getNext(str);
if (res.returnStatus == POStatus.STATUS_OK && res.result != null) {
- res.result = new Long(Long.valueOf((String) res.result));
+ res.result = CastUtils.stringToLong((String)res.result);
}
return res;
}
@@ -448,7 +449,7 @@
String str = null;
Result res = in.getNext(str);
if (res.returnStatus == POStatus.STATUS_OK && res.result != null) {
- res.result = new Double(Double.valueOf((String) res.result));
+ res.result = CastUtils.stringToDouble((String)res.result);
}
return res;
}
@@ -571,7 +572,7 @@
String str = null;
Result res = in.getNext(str);
if (res.returnStatus == POStatus.STATUS_OK && res.result != null) {
- res.result = new Float(Float.valueOf((String) res.result));
+ res.result = CastUtils.stringToFloat((String)res.result);
}
return res;
}
Modified:
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java?rev=803373&r1=803372&r2=803373&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
(original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
Wed Aug 12 06:30:15 2009
@@ -303,7 +303,8 @@
inputType == DataType.CHARARRAY
) &&
( (castType == DataType.CHARARRAY) ||
- (castType == DataType.BYTEARRAY)
+ (castType == DataType.BYTEARRAY) ||
+ (DataType.isNumberType(castType))
)
) {
//good
Added: hadoop/pig/trunk/src/org/apache/pig/impl/util/CastUtils.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/util/CastUtils.java?rev=803373&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/util/CastUtils.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/util/CastUtils.java Wed Aug 12
06:30:15 2009
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.impl.util;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pig.PigWarning;
+
+public class CastUtils {
+
+ private static Integer mMaxInt = Integer.valueOf(Integer.MAX_VALUE);
+
+ private static Long mMaxLong = Long.valueOf(Long.MAX_VALUE);
+
+ protected static final Log mLog = LogFactory.getLog(CastUtils.class);
+
+
+ public static Double stringToDouble(String str) {
+ if (str == null) {
+ return null;
+ } else {
+ try {
+ return Double.parseDouble(str);
+ } catch (NumberFormatException e) {
+ LogUtils
+ .warn(
+ CastUtils.class,
+ "Unable to
interpret value "
+
+ str
+
+ " in field being "
+
+ "converted to double, caught NumberFormatException <"
+
+ e.getMessage() + "> field discarded",
+
PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED,
+ mLog);
+ return null;
+ }
+ }
+ }
+
+ public static Float stringToFloat(String str) {
+ if (str == null) {
+ return null;
+ } else {
+ try {
+ return Float.parseFloat(str);
+ } catch (NumberFormatException e) {
+ LogUtils
+ .warn(
+ CastUtils.class,
+ "Unable to
interpret value "
+
+ str
+
+ " in field being "
+
+ "converted to float, caught NumberFormatException <"
+
+ e.getMessage() + "> field discarded",
+
PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED,
+ mLog);
+ return null;
+ }
+ }
+ }
+
+ public static Integer stringToInteger(String str) {
+ if (str == null) {
+ return null;
+ } else {
+ try {
+ return Integer.parseInt(str);
+ } catch (NumberFormatException e) {
+ // It's possible that this field can be
interpreted as a double.
+ // Unfortunately Java doesn't handle this in
Integer.valueOf. So
+ // we need to try to convert it to a double and
if that works
+ // then
+ // go to an int.
+ try {
+ Double d = Double.valueOf(str);
+ // Need to check for an overflow error
+ if (d.doubleValue() >
mMaxInt.doubleValue() + 1.0) {
+ LogUtils.warn(CastUtils.class,
"Value " + d
+ + " too large
for integer",
+
PigWarning.TOO_LARGE_FOR_INT, mLog);
+ return null;
+ }
+ return Integer.valueOf(d.intValue());
+ } catch (NumberFormatException nfe2) {
+ LogUtils
+ .warn(
+
CastUtils.class,
+ "Unable
to interpret value "
+
+ str
+
+ " in field being "
+
+ "converted to int, caught NumberFormatException <"
+
+ e.getMessage()
+
+ "> field discarded",
+
PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED,
+ mLog);
+ return null;
+ }
+ }
+ }
+ }
+
+ public static Long stringToLong(String str) {
+ if (str == null) {
+ return null;
+ } else {
+ try {
+ return Long.parseLong(str);
+ } catch (NumberFormatException e) {
+ // It's possible that this field can be
interpreted as a double.
+ // Unfortunately Java doesn't handle this in
Long.valueOf. So
+ // we need to try to convert it to a double and
if that works
+ // then
+ // go to an long.
+ try {
+ Double d = Double.valueOf(str);
+ // Need to check for an overflow error
+ if (d.doubleValue() >
mMaxLong.doubleValue() + 1.0) {
+ LogUtils.warn(CastUtils.class,
"Value " + d
+ + " too large
for long",
+
PigWarning.TOO_LARGE_FOR_INT, mLog);
+ return null;
+ }
+ return Long.valueOf(d.longValue());
+ } catch (NumberFormatException nfe2) {
+ LogUtils
+ .warn(
+
CastUtils.class,
+ "Unable
to interpret value "
+
+ str
+
+ " in field being "
+
+ "converted to long, caught NumberFormatException <"
+
+ nfe2.getMessage()
+
+ "> field discarded",
+
PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED,
+ mLog);
+ return null;
+ }
+ }
+ }
+ }
+
+}
Added: hadoop/pig/trunk/test/org/apache/pig/test/TestCharArrayToNumeric.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestCharArrayToNumeric.java?rev=803373&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestCharArrayToNumeric.java
(added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestCharArrayToNumeric.java Wed
Aug 12 06:30:15 2009
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.test;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POCast;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.plan.NodeIdGenerator;
+import org.apache.pig.impl.plan.OperatorKey;
+
+public class TestCharArrayToNumeric extends TestCase {
+
+ private Double dummyDouble = null;
+
+ private Float dummyFloat = null;
+
+ private Long dummyLong = null;
+
+ private Integer dummyInteger = null;
+
+ private Double MaxDouble = Double.MIN_VALUE;
+
+ private Double MinDouble = Double.MIN_VALUE;
+
+ private Float MaxFloat = Float.MAX_VALUE;
+
+ private Float MinFloat = Float.MIN_VALUE;
+
+ private Long MaxLong = Long.MAX_VALUE;
+
+ private Long MinLong = Long.MIN_VALUE;
+
+ private Integer MaxInteger = Integer.MAX_VALUE;
+
+ private Integer MinInteger = Integer.MIN_VALUE;
+
+ public static OperatorKey newOperatorKey() {
+ long newId =
NodeIdGenerator.getGenerator().getNextNodeId("scope");
+ return new OperatorKey("scope", newId);
+ }
+
+ public void testCast() throws ExecException {
+
+ POCast cast = new POCast(newOperatorKey(), -1);
+ POProject proj = new POProject(newOperatorKey(), -1, 0);
+ proj.setResultType(DataType.CHARARRAY);
+ List<PhysicalOperator> inputs = new
ArrayList<PhysicalOperator>();
+ inputs.add(proj);
+ cast.setInputs(inputs);
+
+ // cast to double
+ String[] items = { "12.0", "-13.2", "0.1f", "1.3e2", "zjf",
+ MaxDouble.toString(), MinDouble.toString() };
+ Double[] doubleExpected = { 12.0, -13.2, 0.1, 1.3e2, null,
MaxDouble,
+ MinDouble };
+ for (int i = 0; i < items.length; ++i) {
+ Tuple tuple = TupleFactory.getInstance().newTuple(1);
+ tuple.set(0, items[i]);
+ proj.attachInput(tuple);
+ Double actual = (Double)
cast.getNext(dummyDouble).result;
+ if (doubleExpected[i] != null) {
+ assertEquals(doubleExpected[i], actual, 1e-6);
+ } else {
+ assertNull(actual);
+ }
+ }
+
+ // cast to float
+ items = new String[] { "12.0", "-13.2", "0.1f", "1.3e2",
+ MaxFloat.toString(), MinFloat.toString(), "zjf"
};
+ Float[] floatExpected = { 12.0f, -13.2f, 0.1f, 1.3e2f, MaxFloat,
+ MinFloat, null };
+ for (int i = 0; i < items.length; ++i) {
+ Tuple tuple = TupleFactory.getInstance().newTuple(1);
+ tuple.set(0, items[i]);
+ proj.attachInput(tuple);
+ Float actual = (Float) cast.getNext(dummyFloat).result;
+ if (floatExpected[i] != null) {
+ assertEquals(floatExpected[i], actual, 1e-6);
+ } else {
+ assertNull(actual);
+ }
+ }
+
+ // cast to long
+ items = new String[] { "1", "-1", "12.2", "12.8",
MaxLong.toString(),
+ MinLong.toString(), "df1.2" };
+ Long[] longExpected = { 1L, -1L, 12L, 12L, MaxLong, MinLong,
null };
+ for (int i = 0; i < items.length; ++i) {
+ Tuple tuple = TupleFactory.getInstance().newTuple(1);
+ tuple.set(0, items[i]);
+ proj.attachInput(tuple);
+ Long actual = (Long) cast.getNext(dummyLong).result;
+ if (longExpected[i] != null) {
+ assertEquals(longExpected[i], actual);
+ } else {
+ assertNull(actual);
+ }
+ }
+
+ // cast to int
+ items = new String[] { "1", "-1", "12.2", "12.8",
+ MaxInteger.toString(), MinInteger.toString(),
"ff4332" };
+ Integer[] intExpected = { 1, -1, 12, 12, MaxInteger,
MinInteger, null };
+ for (int i = 0; i < items.length; ++i) {
+ Tuple tuple = TupleFactory.getInstance().newTuple(1);
+ tuple.set(0, items[i]);
+ proj.attachInput(tuple);
+ Integer actual = (Integer)
cast.getNext(dummyInteger).result;
+ if (intExpected[i] != null) {
+ assertEquals(intExpected[i], actual);
+ } else {
+ assertNull(actual);
+ }
+ }
+ }
+
+
+ public void testCharArray2FloatAndDoubleScript() {
+
+ // create a input file with format (key,value)
+ int size = 100;
+ String[] numbers = new String[size];
+ Random rand = new Random();
+ Map<Integer, Double> map = new HashMap<Integer, Double>();
+ File inputFile = null;
+
+ try {
+ inputFile = File.createTempFile("pig_jira_893", ".txt");
+ PrintStream ps = new PrintStream(new
FileOutputStream(inputFile));
+ for (int i = 0; i < numbers.length; ++i) {
+ int key = i;
+ double value = rand.nextDouble() * 100;
+ ps.println(key + "\t" + value);
+ map.put(key, value);
+ }
+ // append a null at the last line, to test string which
can not been
+ // cast
+ ps.println(numbers.length + "\t" + "null");
+ map.put(numbers.length, null);
+
+ byte[] numericTypes = new byte[] { DataType.DOUBLE,
DataType.FLOAT, };
+ for (byte type : numericTypes) {
+ PigServer pig = new PigServer(ExecType.LOCAL);
+ pig.registerQuery("A = Load '"
+ +
Util.generateURI(inputFile.getCanonicalPath())
+ + "' AS
(key:int,value:chararray);");
+ pig.registerQuery("B = FOREACH A GENERATE key,("
+ + DataType.findTypeName(type) +
")value;");
+ Iterator<Tuple> iter = pig.openIterator("B");
+ while (iter.hasNext()) {
+ Tuple tuple = iter.next();
+ Integer key = (Integer) tuple.get(0);
+ String value = null;
+ if (tuple.get(1) != null) {
+ value = tuple.get(1).toString();
+ }
+
+ if (type == DataType.DOUBLE) {
+ Double expected = map.get(key);
+ if (value != null) {
+ assertEquals(expected,
Double.parseDouble(value));
+ } else {
+ assertEquals(expected,
null);
+ }
+
+ }
+ if (type == DataType.FLOAT) {
+ Float expected = null;
+ if (map.get(key) != null) {
+ expected =
map.get(key).floatValue();
+ }
+ if (value != null) {
+ assertEquals(expected,
Float.parseFloat(value));
+ } else {
+ assertEquals(expected,
null);
+ }
+ }
+ }
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ fail();
+ } finally {
+ if (inputFile != null) {
+ inputFile.delete();
+ }
+ }
+ }
+
+ public void testCharArrayToIntAndLongScript() {
+
+ // create a input file with format (key,value)
+ int size = 100;
+ String[] numbers = new String[size];
+ Random rand = new Random();
+ Map<Integer, Long> map = new HashMap<Integer, Long>();
+ File inputFile = null;
+
+ try {
+ inputFile = File.createTempFile("pig_jira_893", ".txt");
+ PrintStream ps = new PrintStream(new
FileOutputStream(inputFile));
+ for (int i = 0; i < numbers.length; ++i) {
+ int key = i;
+ long value = rand.nextInt(100);
+ ps.println(key + "\t" + value);
+ map.put(key, value);
+ }
+ // append a null at the last line, to test string which
can not been
+ // cast
+ ps.println(numbers.length + "\t" + "null");
+ map.put(numbers.length, null);
+
+ byte[] numericTypes = new byte[] { DataType.INTEGER,
DataType.LONG, };
+ for (byte type : numericTypes) {
+ PigServer pig = new PigServer(ExecType.LOCAL);
+ pig.registerQuery("A = Load '"
+ +
Util.generateURI(inputFile.getCanonicalPath())
+ + "' AS
(key:int,value:chararray);");
+ pig.registerQuery("B = FOREACH A GENERATE key,("
+ + DataType.findTypeName(type) +
")value;");
+ Iterator<Tuple> iter = pig.openIterator("B");
+ while (iter.hasNext()) {
+ Tuple tuple = iter.next();
+ Integer key = (Integer) tuple.get(0);
+ String value = null;
+ if (tuple.get(1) != null) {
+ value = tuple.get(1).toString();
+ }
+
+ if (type == DataType.LONG) {
+ Long expected = map.get(key);
+ if (value != null) {
+ Long actual =
Long.parseLong(value);
+ assertEquals(expected,
actual);
+ } else {
+ assertEquals(expected,
null);
+ }
+ }
+ if (type == DataType.INTEGER) {
+ Integer expected = null;
+ if (map.get(key) != null) {
+ expected =
map.get(key).intValue();
+ }
+ if (value != null) {
+ Integer actual =
Integer.parseInt(value);
+ assertEquals(expected,
actual);
+ } else {
+ assertEquals(expected,
null);
+ }
+ }
+ }
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ fail();
+ } finally {
+ if (inputFile != null) {
+ inputFile.delete();
+ }
+ }
+ }
+}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java?rev=803373&r1=803372&r2=803373&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java Wed Aug 12
06:30:15 2009
@@ -626,4 +626,12 @@
Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false,
false)) ;
}
+ public void testCharArray2Numeric(){
+ byte[] numbericTypes=new
byte[]{DataType.DOUBLE,DataType.FLOAT,DataType.LONG,DataType.INTEGER};
+ Schema.FieldSchema inputFieldSchema=new
Schema.FieldSchema("",DataType.CHARARRAY);
+ for (byte type:numbericTypes){
+ Schema.FieldSchema castFieldSchema=new
Schema.FieldSchema("",type);
+ Assert.assertTrue(Schema.FieldSchema.castable(castFieldSchema,
inputFieldSchema));
+ }
+ }
}
Modified:
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=803373&r1=803372&r2=803373&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
Wed Aug 12 06:30:15 2009
@@ -568,8 +568,11 @@
assertEquals(DataType.TUPLE, equal1.getLhsOperand().getType()) ;
}
+ /*
+ * chararray can been cast to int when jira-893 been resolved
+ */
@Test
- public void testExpressionTypeCheckingFail8() throws Throwable {
+ public void testExpressionTypeChecking9() throws Throwable {
LogicalPlan plan = new LogicalPlan() ;
TupleFactory tupleFactory = TupleFactory.getInstance();
@@ -644,15 +647,14 @@
try {
typeValidator.validate(plan, collector) ;
- fail("Exception expected") ;
} catch(PlanValidationException pve) {
- //good
+ fail("Exception expected") ;
}
printMessageCollector(collector) ;
printTypeGraph(plan) ;
- if (!collector.hasError()) {
+ if (collector.hasError()) {
throw new Exception("Error expected") ;
}
}