[
https://issues.apache.org/jira/browse/DRILL-6724?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16627782#comment-16627782
]
ASF GitHub Bot commented on DRILL-6724:
---------------------------------------
KazydubB commented on a change in pull request #1455: DRILL-6724: Dump operator
context to logs when error occurs during query execution
URL: https://github.com/apache/drill/pull/1455#discussion_r219799214
##########
File path: exec/java-exec/src/test/java/org/apache/drill/TestOperatorDump.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+import ch.qos.logback.classic.spi.ILoggingEvent;
+import ch.qos.logback.core.ConsoleAppender;
+import org.apache.drill.common.exceptions.UserRemoteException;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.physical.impl.ScanBatch;
+import org.apache.drill.exec.physical.impl.join.HashJoinBatch;
+import org.apache.drill.exec.physical.impl.limit.LimitRecordBatch;
+import org.apache.drill.exec.testing.Controls;
+import org.apache.drill.exec.testing.ControlsInjectionUtil;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterFixtureBuilder;
+import org.apache.drill.test.ClusterTest;
+import org.apache.drill.test.LogFixture;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertTrue;
+
+public class TestOperatorDump extends ClusterTest {
+
+ private static final String ENTRY_DUMP_COMPLETED = "Operator dump completed";
+ private static final String ENTRY_DUMP_STARTED = "Operator dump started";
+
+ private LogFixture logFixture;
+ private TestAppender appender;
+
+ @BeforeClass
+ public static void setupFiles() {
+ dirTestWatcher.copyResourceToRoot(Paths.get("multilevel"));
+ }
+
+ @Before
+ public void setup() throws Exception {
+ ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher);
+ appender = new TestAppender();
+ logFixture = LogFixture.builder()
+ .toConsole(appender, LogFixture.DEFAULT_CONSOLE_FORMAT)
+ .build();
+ startCluster(builder);
+ }
+
+ @After
+ public void tearUp(){
+ logFixture.close();
+ }
+
+ @Test(expected = UserRemoteException.class)
+ public void testScanBatchChecked() throws Exception {
+ String exceptionDesc = "next-allocate";
+ final String controls = Controls.newBuilder()
+ .addException(ScanBatch.class, exceptionDesc,
OutOfMemoryException.class, 0, 1)
+ .build();
+ ControlsInjectionUtil.setControls(client.client(), controls);
+ String query = "select * from dfs.`multilevel/parquet` limit 100";
+ try {
+ client.queryBuilder().sql(query).run();
+ } catch (UserRemoteException e) {
+ assertTrue(e.getMessage().contains(exceptionDesc));
+
+ String[] expectedEntries = new String[] {ENTRY_DUMP_STARTED,
ENTRY_DUMP_COMPLETED};
+ validateContainsEntries(expectedEntries, ScanBatch.class.getName());
+ throw e;
+ }
+ }
+
+ @Test(expected = UserRemoteException.class)
+ public void testLimitRecordBatchUnchecked() throws Exception {
+ String exceptionDesc = "limit-do-work";
+ final String controls = Controls.newBuilder()
+ .addException(LimitRecordBatch.class, exceptionDesc,
IndexOutOfBoundsException.class, 0, 1)
+ .build();
+ ControlsInjectionUtil.setControls(client.client(), controls);
+ String query = "select * from dfs.`multilevel/parquet` limit 5";
+ try {
+ client.queryBuilder().sql(query).run();
+ } catch (UserRemoteException e) {
+ assertTrue(e.getMessage().contains(exceptionDesc));
+
+ String[] expectedEntries = new String[] {ENTRY_DUMP_STARTED,
ENTRY_DUMP_COMPLETED};
+ validateContainsEntries(expectedEntries,
LimitRecordBatch.class.getName());
+ throw e;
+ }
+ }
+
+ @Test(expected = UserRemoteException.class)
+ public void testHashJoinBatchUnchecked() throws Exception {
+ String exceptionDesc = "hashjoin-innerNext";
+ final String controls = Controls.newBuilder()
+ .addException(HashJoinBatch.class, exceptionDesc,
RuntimeException.class, 0, 1)
+ .build();
+ ControlsInjectionUtil.setControls(client.client(), controls);
+ String query = "select e.employee_id from cp.`employee.json` e " +
+ "left join cp.`employee.json` e1 on e1.position_id = e.employee_id";
+ try {
+ client.queryBuilder().sql(query).run();
+ } catch (UserRemoteException e) {
+ assertTrue(e.getMessage().contains(exceptionDesc));
+
+ String[] expectedEntries = new String[] {ENTRY_DUMP_STARTED,
ENTRY_DUMP_COMPLETED};
+ validateContainsEntries(expectedEntries, HashJoinBatch.class.getName());
+ throw e;
+ }
+ }
+
+ private void validateContainsEntries(String[] entries, String
expectedClassName) {
+ if (entries == null) {
+ entries = new String[0];
+ }
+ List<String> messages = appender.getMessages();
+ List<String> entryList = new ArrayList<>(entries.length);
+ Collections.addAll(entryList, entries);
+ Iterator<String> it = entryList.iterator();
+ while (it.hasNext()) {
+ String entry = it.next();
+ for (String message : messages) {
+ if (message.contains(entry)) {
+ it.remove();
+ break;
+ }
+ }
+ }
+ assertTrue(String.format("Entries %s were not found in logs.", entryList),
entryList.isEmpty());
+
+ Set<String> loggerNames = appender.getLoggerNames();
+ assertTrue(String.format("Entry for class %s was not found",
expectedClassName),
+ loggerNames.contains(expectedClassName));
+ }
+
+ // ConsoleAppender which stores logged events
+ private static class TestAppender extends ConsoleAppender<ILoggingEvent> {
Review comment:
Done.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Dump operator context to logs when error occurs during query execution
> ----------------------------------------------------------------------
>
> Key: DRILL-6724
> URL: https://issues.apache.org/jira/browse/DRILL-6724
> Project: Apache Drill
> Issue Type: Improvement
> Affects Versions: 1.14.0
> Reporter: Bohdan Kazydub
> Assignee: Bohdan Kazydub
> Priority: Major
> Fix For: 1.15.0
>
>
> * Sometimes when an error occurs it may be helpful to have operator stack
> with context data along with original stacktrace to help developers to track
> down a failure. To achieve this, each operator should provide a context which
> can be dumped to logs. In the end of FragmentExecutor's execution if the
> state is FAILED the operator stack will be dumped to logs before exception
> stacktrace.
>
> Dump example for case
> [DRILL-6670|https://issues.apache.org/jira/projects/DRILL/issues/DRILL-6670]
> (*Operator dump started* and *Operator dump completed* designate start and
> end of dump respectively):
> {code}
> ...
> 2018-09-17 08:26:34,392 [24609ac4-80f6-b318-7296-ee53c406a9f6:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor -
> 24609ac4-80f6-b318-7296-ee53c406a9f6:0:0: State change requested RUNNING -->
> FAILED
> 2018-09-17 08:26:34,392 [24609ac4-80f6-b318-7296-ee53c406a9f6:frag:0:0] ERROR
> o.a.d.e.physical.impl.BaseRootExec - Operator dump started.
> 2018-09-17 08:26:34,392 [24609ac4-80f6-b318-7296-ee53c406a9f6:frag:0:0] ERROR
> o.a.d.e.p.i.p.ProjectRecordBatch - ProjectRecordBatch[projector=null,
> hasRemainder=false, remainderIndex=0, recordCount=0]
> 2018-09-17 08:26:34,392 [24609ac4-80f6-b318-7296-ee53c406a9f6:frag:0:0] ERROR
> o.a.d.exec.physical.impl.ScanBatch -
> ScanBatch[container=org.apache.drill.exec.record.VectorContainer@383d6f45[recordCount
> = 0, schemaChanged = true, schema = null, wrappers = [], ...],
> currentReader=ParquetRecordReader[File=/home/mapr/example.parquet, Row group
> index=0, Records in row group=1, Total records read=-1,
> MetadataParquetMetaData{FileMetaData{schema: message schema {
> optional binary name (UTF8);
> optional binary creation_parameters (UTF8);
> optional int64 creation_date (TIMESTAMP_MICROS);
> optional int32 data_version;
> optional int32 schema_version;
> }
> , metadata: \{pandas={"index_columns": [], "column_indexes": [], "columns":
> [{"name": "name", "field_name": "name", "pandas_type": "unicode",
> "numpy_type": "object", "metadata": null}, \{"name": "creation_parameters",
> "field_name": "creation_parameters", "pandas_type": "unicode", "numpy_type":
> "object", "metadata": null}, \{"name": "creation_date", "field_name":
> "creation_date", "pandas_type": "datetime", "numpy_type": "datetime64[ns]",
> "metadata": null}, \{"name": "data_version", "field_name": "data_version",
> "pandas_type": "int32", "numpy_type": "int32", "metadata": null}, \{"name":
> "schema_version", "field_name": "schema_version", "pandas_type": "int32",
> "numpy_type": "int32", "metadata": null}], "pandas_version": "0.21.0"}}},
> blocks: [BlockMetaData\{1, 331 [ColumnMetaData{SNAPPY [name] optional binary
> name (UTF8) [PLAIN, PLAIN_DICTIONARY, RLE], 27}, ColumnMetaData\{SNAPPY
> [creation_parameters] optional binary creation_parameters (UTF8) [PLAIN,
> PLAIN_DICTIONARY, RLE], 140}, ColumnMetaData\{SNAPPY [creation_date] optional
> int64 creation_date (TIMESTAMP_MICROS) [PLAIN, PLAIN_DICTIONARY, RLE], 267},
> ColumnMetaData\{SNAPPY [data_version] optional int32 data_version [PLAIN,
> PLAIN_DICTIONARY, RLE], 430}, ColumnMetaData\{SNAPPY [schema_version]
> optional int32 schema_version [PLAIN, PLAIN_DICTIONARY, RLE], 567}]}]}],
> schema=null]
> 2018-09-17 08:26:34,392 [24609ac4-80f6-b318-7296-ee53c406a9f6:frag:0:0] ERROR
> o.a.d.e.physical.impl.BaseRootExec - Operator dump completed.
> 2018-09-17 08:26:34,392 [24609ac4-80f6-b318-7296-ee53c406a9f6:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor -
> 24609ac4-80f6-b318-7296-ee53c406a9f6:0:0: State change requested FAILED -->
> FINISHED
> ...
> {code}
> Another example:
> {code}
> 13:57:27.603 [245b3168-3d98-1046-5f2b-6b4935b9012d:frag:0:0] ERROR
> o.a.d.e.physical.impl.BaseRootExec - Operator dump started: dumping last 2
> failed operators
> 13:57:27.604 [245b3168-3d98-1046-5f2b-6b4935b9012d:frag:0:0] ERROR
> o.a.d.e.p.i.v.IteratorValidatorBatchIterator -
> IteratorValidatorBatchIterator[container=org.apache.drill.exec.record.VectorContainer@1ab00dc8[recordCount
> = 0, schemaChanged = false, schema = BatchSchema [fields=[[`dir0`
> (VARCHAR:OPTIONAL)], [`dir1` (VARCHAR:OPTIONAL)], [`o_orderkey`
> (INT:REQUIRED)], [`o_custkey` (INT:REQUIRED)], [`o_orderstatus`
> (VARCHAR:REQUIRED)], [`o_totalprice` (FLOAT8:REQUIRED)], [`o_orderdate`
> (DATE:REQUIRED)], [`o_orderpriority` (VARCHAR:REQUIRED)], [`o_clerk`
> (VARCHAR:REQUIRED)], [`o_shippriority` (INT:REQUIRED)], [`o_comment`
> (VARCHAR:REQUIRED)]], selectionVector=TWO_BYTE], wrappers =
> [org.apache.drill.exec.vector.NullableVarCharVector@6e1bb101[field = [`dir0`
> (VARCHAR:OPTIONAL)], ...],
> org.apache.drill.exec.vector.NullableVarCharVector@1edc5831[field = [`dir1`
> (VARCHAR:OPTIONAL)], ...],
> org.apache.drill.exec.vector.IntVector@2c63d90c[field = [`o_orderkey`
> (INT:REQUIRED)], ...], org.apache.drill.exec.vector.IntVector@36275f05[field
> = [`o_custkey` (INT:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@5077379[field = [`o_orderstatus`
> (VARCHAR:REQUIRED)], ...],
> org.apache.drill.exec.vector.Float8Vector@79e6ae38[field = [`o_totalprice`
> (FLOAT8:REQUIRED)], ...],
> org.apache.drill.exec.vector.DateVector@4bffb3b1[field = [`o_orderdate`
> (DATE:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@472c02bc[field =
> [`o_orderpriority` (VARCHAR:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@790e3d9b[field = [`o_clerk`
> (VARCHAR:REQUIRED)], ...],
> org.apache.drill.exec.vector.IntVector@35d5cbec[field = [`o_shippriority`
> (INT:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@38194dc[field = [`o_comment`
> (VARCHAR:REQUIRED)], ...]], ...], instNum=17, batchTypeName=LimitRecordBatch,
> lastSchema=null, lastNewSchema=null]
> 13:57:27.604 [245b3168-3d98-1046-5f2b-6b4935b9012d:frag:0:0] ERROR
> o.a.d.e.p.i.limit.LimitRecordBatch -
> LimitRecordBatch[container=org.apache.drill.exec.record.VectorContainer@1ab00dc8[recordCount
> = 0, schemaChanged = false, schema = BatchSchema [fields=[[`dir0`
> (VARCHAR:OPTIONAL)], [`dir1` (VARCHAR:OPTIONAL)], [`o_orderkey`
> (INT:REQUIRED)], [`o_custkey` (INT:REQUIRED)], [`o_orderstatus`
> (VARCHAR:REQUIRED)], [`o_totalprice` (FLOAT8:REQUIRED)], [`o_orderdate`
> (DATE:REQUIRED)], [`o_orderpriority` (VARCHAR:REQUIRED)], [`o_clerk`
> (VARCHAR:REQUIRED)], [`o_shippriority` (INT:REQUIRED)], [`o_comment`
> (VARCHAR:REQUIRED)]], selectionVector=TWO_BYTE], wrappers =
> [org.apache.drill.exec.vector.NullableVarCharVector@6e1bb101[field = [`dir0`
> (VARCHAR:OPTIONAL)], ...],
> org.apache.drill.exec.vector.NullableVarCharVector@1edc5831[field = [`dir1`
> (VARCHAR:OPTIONAL)], ...],
> org.apache.drill.exec.vector.IntVector@2c63d90c[field = [`o_orderkey`
> (INT:REQUIRED)], ...], org.apache.drill.exec.vector.IntVector@36275f05[field
> = [`o_custkey` (INT:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@5077379[field = [`o_orderstatus`
> (VARCHAR:REQUIRED)], ...],
> org.apache.drill.exec.vector.Float8Vector@79e6ae38[field = [`o_totalprice`
> (FLOAT8:REQUIRED)], ...],
> org.apache.drill.exec.vector.DateVector@4bffb3b1[field = [`o_orderdate`
> (DATE:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@472c02bc[field =
> [`o_orderpriority` (VARCHAR:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@790e3d9b[field = [`o_clerk`
> (VARCHAR:REQUIRED)], ...],
> org.apache.drill.exec.vector.IntVector@35d5cbec[field = [`o_shippriority`
> (INT:REQUIRED)], ...],
> org.apache.drill.exec.vector.VarCharVector@38194dc[field = [`o_comment`
> (VARCHAR:REQUIRED)], ...]], ...], offset=0, numberOfRecords=5,
> incomingSV=null, outgoingSV=[SV2: recs=0 - ]]
> 13:57:27.604 [245b3168-3d98-1046-5f2b-6b4935b9012d:frag:0:0] ERROR
> o.a.d.e.physical.impl.BaseRootExec - Operator dump completed.
> {code}
> For UserException with ErrorType SYSTEM or INTERNAL_ERROR some exceptions
> contain not useful message so added a line suggesting refering to logs for
> more information for these error types:
> {code}
> org.apache.drill.common.exceptions.UserException: SYSTEM ERROR:
> java.lang.IndexOutOfBoundsException: DrillBuf[7], udle: [1 0..0], index: 0,
> length: 4 (expected: range(0, 0)) DrillBuf[7], udle: [1 0..0]
> Fragment 0:0
> Please, refer to logs for more information.
> [Error Id: 995bd741-eb39-4eaf-86d5-d3089f1041f0 on node4.cluster.com:31010]
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)