[ https://issues.apache.org/jira/browse/ASTERIXDB-1466?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dmitry Lychagin reassigned ASTERIXDB-1466: ------------------------------------------ Assignee: Dmitry Lychagin (was: Yingyi Bu) > For the multiple key join on Hyracks (needs better error message) > ----------------------------------------------------------------- > > Key: ASTERIXDB-1466 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-1466 > Project: Apache AsterixDB > Issue Type: Bug > Components: RT - Runtime > Environment: Hyracks > Reporter: Mingda Li > Assignee: Dmitry Lychagin > Priority: Minor > Labels: test > Original Estimate: 4h > Remaining Estimate: 4h > > This is not an implementation bug but looks to be an error message issue. The > runtime operator should better check the cardinality of hash > functions/comparators to provide better error message. > The original issue text: > ---------------------------------------- > I want to test the efficiency of multiple tables' join with multiple key on > Hyracks. But I get some bugs. I change the code in CLASS: > TPCHCustomerOrderHashJoinTest as following to join: cust join order on( > C.custkey=O.custkey, C.Nationkey=O.Orderkey) > [the condition for Nationkey=Orderkey has no meaning, just a try for multiple > key] > @Test > public void customerOrderCIDHybridHashJoin() throws Exception { > JobSpecification spec = new JobSpecification(); > FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new > FileReference(new File( > "data/tpch0.001/customer.tbl"))) }; > IFileSplitProvider custSplitsProvider = new > ConstantFileSplitProvider(custSplits); > RecordDescriptor custDesc = new RecordDescriptor(new > ISerializerDeserializer[] { > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer() }); > FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, > new FileReference(new File( > "data/tpch0.001/orders.tbl"))) }; > IFileSplitProvider ordersSplitsProvider = new > ConstantFileSplitProvider(ordersSplits); > RecordDescriptor ordersDesc = new RecordDescriptor(new > ISerializerDeserializer[] { > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer() }); > RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new > ISerializerDeserializer[] { > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer(), new > UTF8StringSerializerDeserializer(), > new UTF8StringSerializerDeserializer() }); > FileScanOperatorDescriptor ordScanner = new > FileScanOperatorDescriptor(spec, ordersSplitsProvider, > new DelimitedDataTupleParserFactory(new IValueParserFactory[] > { UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc); > PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, > ordScanner, NC1_ID); > FileScanOperatorDescriptor custScanner = new > FileScanOperatorDescriptor(spec, custSplitsProvider, > new DelimitedDataTupleParserFactory(new IValueParserFactory[] > { UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE, > UTF8StringParserFactory.INSTANCE }, '|'), custDesc); > PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, > custScanner, NC1_ID); > HybridHashJoinOperatorDescriptor join = new > HybridHashJoinOperatorDescriptor( > spec, > 5, > 20, > 200, > 1.2, > new int[] { 1,0 }, > new int[] { 0,3 }, > new IBinaryHashFunctionFactory[] { > PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, > new IBinaryComparatorFactory[] { > PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, > custOrderJoinDesc, null, false, null); > PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, > NC1_ID); > ResultSetId rsId = new ResultSetId(1); > spec.addResultSetId(rsId); > IOperatorDescriptor printer = new > ResultWriterOperatorDescriptor(spec, rsId, false, false, > > ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider()); > PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, > printer, NC1_ID); > IConnectorDescriptor ordJoinConn = new > OneToOneConnectorDescriptor(spec); > spec.connect(ordJoinConn, ordScanner, 0, join, 0); > IConnectorDescriptor custJoinConn = new > OneToOneConnectorDescriptor(spec); > spec.connect(custJoinConn, custScanner, 0, join, 1); > IConnectorDescriptor joinPrinterConn = new > OneToOneConnectorDescriptor(spec); > spec.connect(joinPrinterConn, join, 0, printer, 0); > spec.addRoot(printer); > runTest(spec); > } > and get the bug as following: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:218) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.initialize(SuperActivityOperatorNodePushable.java:83) > at org.apache.hyracks.control.nc.Task.run(Task.java:263) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:212) > ... 5 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:71) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.lambda$initialize$0(SuperActivityOperatorNodePushable.java:83) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:205) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:202) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory$1.partition(FieldHashPartitionComputerFactory.java:55) > at > org.apache.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor$BuildAndPartitionActivityNode$1.nextFrame(HybridHashJoinOperatorDescriptor.java:227) > at > org.apache.hyracks.dataflow.common.comm.io.AbstractFrameAppender.write(AbstractFrameAppender.java:93) > at > org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory$1.parse(DelimitedDataTupleParserFactory.java:90) > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:68) > ... 7 more > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:218) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.initialize(SuperActivityOperatorNodePushable.java:83) > at org.apache.hyracks.control.nc.Task.run(Task.java:263) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:212) > ... 5 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:71) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.lambda$initialize$0(SuperActivityOperatorNodePushable.java:83) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:205) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:202) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory$1.partition(FieldHashPartitionComputerFactory.java:55) > at > org.apache.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor$BuildAndPartitionActivityNode$1.nextFrame(HybridHashJoinOperatorDescriptor.java:227) > at > org.apache.hyracks.dataflow.common.comm.io.AbstractFrameAppender.write(AbstractFrameAppender.java:93) > at > org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory$1.parse(DelimitedDataTupleParserFactory.java:90) > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:68) > ... 7 more > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:218) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.initialize(SuperActivityOperatorNodePushable.java:83) > at org.apache.hyracks.control.nc.Task.run(Task.java:263) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:212) > ... 5 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:71) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.lambda$initialize$0(SuperActivityOperatorNodePushable.java:83) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:205) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:202) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory$1.partition(FieldHashPartitionComputerFactory.java:55) > at > org.apache.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor$BuildAndPartitionActivityNode$1.nextFrame(HybridHashJoinOperatorDescriptor.java:227) > at > org.apache.hyracks.dataflow.common.comm.io.AbstractFrameAppender.write(AbstractFrameAppender.java:93) > at > org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory$1.parse(DelimitedDataTupleParserFactory.java:90) > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:68) > ... 7 more > org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212) > at > org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:45) > at org.apache.hyracks.control.nc.Task.run(Task.java:319) > ... 3 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:218) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.initialize(SuperActivityOperatorNodePushable.java:83) > at org.apache.hyracks.control.nc.Task.run(Task.java:263) > ... 3 more > Caused by: java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:212) > ... 5 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:71) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.lambda$initialize$0(SuperActivityOperatorNodePushable.java:83) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:205) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:202) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory$1.partition(FieldHashPartitionComputerFactory.java:55) > at > org.apache.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor$BuildAndPartitionActivityNode$1.nextFrame(HybridHashJoinOperatorDescriptor.java:227) > at > org.apache.hyracks.dataflow.common.comm.io.AbstractFrameAppender.write(AbstractFrameAppender.java:93) > at > org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory$1.parse(DelimitedDataTupleParserFactory.java:90) > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:68) > ... 7 more > org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212) > at > org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:45) > at org.apache.hyracks.control.nc.Task.run(Task.java:319) > ... 3 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:218) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.initialize(SuperActivityOperatorNodePushable.java:83) > at org.apache.hyracks.control.nc.Task.run(Task.java:263) > ... 3 more > Caused by: java.util.concurrent.ExecutionException: > org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:212) > ... 5 more > Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: > java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:71) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.lambda$initialize$0(SuperActivityOperatorNodePushable.java:83) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:205) > at > org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:202) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory$1.partition(FieldHashPartitionComputerFactory.java:55) > at > org.apache.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor$BuildAndPartitionActivityNode$1.nextFrame(HybridHashJoinOperatorDescriptor.java:227) > at > org.apache.hyracks.dataflow.common.comm.io.AbstractFrameAppender.write(AbstractFrameAppender.java:93) > at > org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory$1.parse(DelimitedDataTupleParserFactory.java:90) > at > org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor$1.initialize(FileScanOperatorDescriptor.java:68) > ... 7 more > Process finished with exit code 255 -- This message was sent by Atlassian JIRA (v6.4.14#64029)