HIVE-12238: Vectorization: Thread-safety errors in VectorUDFDate (Gopal V, reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d7c04859 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d7c04859 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d7c04859 Branch: refs/heads/master-fixed Commit: d7c04859e1903cd6ed38678e8dae6b453c34b7bb Parents: ad12765 Author: Gopal V <[email protected]> Authored: Mon Nov 2 19:56:08 2015 -0800 Committer: Gopal V <[email protected]> Committed: Mon Nov 2 19:56:08 2015 -0800 ---------------------------------------------------------------------- .../vector/expressions/VectorUDFDateString.java | 4 +- .../expressions/TestVectorDateExpressions.java | 71 +++++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d7c04859/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java index f1a5b93..e27ac6a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; import java.text.SimpleDateFormat; import java.util.Date; @@ -30,14 +31,13 @@ import java.text.ParseException; public class VectorUDFDateString extends StringUnaryUDF { private static final long serialVersionUID = 1L; - private transient static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private static final Logger LOG = LoggerFactory.getLogger( VectorUDFDateString.class.getName()); public VectorUDFDateString(int colNum, int outputColumn) { super(colNum, outputColumn, new StringUnaryUDF.IUDFUnaryString() { Text t = new Text(); + final transient SimpleDateFormat formatter = DateUtils.getDateFormat(); @Override public Text evaluate(Text s) { http://git-wip-us.apache.org/repos/asf/hive/blob/d7c04859/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java index 6bd4be1..9c4a751 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -18,8 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import junit.framework.Assert; +import org.junit.Assert; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -31,15 +32,28 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; +import org.junit.After; +import org.junit.Before; import org.junit.Test; +import org.junit.internal.runners.statements.Fail; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; import java.util.List; import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; public class TestVectorDateExpressions { + + private ExecutorService runner; + /* copied over from VectorUDFTimestampFieldLong */ private TimestampWritable toTimestampWritable(long daysSinceEpoch) { Timestamp ts = new Timestamp(DateWritable.daysToMillis((int) daysSinceEpoch)); @@ -412,6 +426,60 @@ public class TestVectorDateExpressions { verifyUDFWeekOfYear(batch); } + @Before + public void setUp() throws Exception { + runner = + Executors.newFixedThreadPool(3, + new ThreadFactoryBuilder().setNameFormat("date-tester-thread-%d").build()); + } + + private static final class MultiThreadedDateFormatTest implements Callable<Void> { + @Override + public Void call() throws Exception { + int batchSize = 1024; + VectorUDFDateString udf = new VectorUDFDateString(0, 1); + VectorizedRowBatch batch = new VectorizedRowBatch(2, batchSize); + BytesColumnVector in = new BytesColumnVector(batchSize); + BytesColumnVector out = new BytesColumnVector(batchSize); + batch.cols[0] = in; + batch.cols[1] = out; + for (int i = 0; i < batchSize; i++) { + byte[] data = String.format("1999-%02d-%02d", 1 + (i % 12), 1 + (i % 15)).getBytes("UTF-8"); + in.setRef(i, data, 0, data.length); + in.isNull[i] = false; + } + udf.evaluate(batch); + // bug if it throws an exception + return (Void) null; + } + } + + // 5s timeout + @Test(timeout = 5000) + public void testMultiThreadedVectorUDFDate() { + List<Callable<Void>> tasks = new ArrayList<Callable<Void>>(); + for (int i = 0; i < 200; i++) { + tasks.add(new MultiThreadedDateFormatTest()); + } + try { + List<Future<Void>> results = runner.invokeAll(tasks); + for (Future<Void> f : results) { + Assert.assertNull(f.get()); + } + } catch (InterruptedException ioe) { + Assert.fail("Interrupted while running tests"); + } catch (Exception e) { + Assert.fail("Multi threaded operations threw unexpected Exception: " + e.getMessage()); + } + } + + @After + public void tearDown() throws Exception { + if (runner != null) { + runner.shutdownNow(); + } + } + public static void main(String[] args) { TestVectorDateExpressions self = new TestVectorDateExpressions(); self.testVectorUDFYear(); @@ -419,5 +487,6 @@ public class TestVectorDateExpressions { self.testVectorUDFDayOfMonth(); self.testVectorUDFWeekOfYear(); self.testVectorUDFUnixTimeStamp(); + self.testMultiThreadedVectorUDFDate(); } }
