Author: hashutosh
Date: Sun Jul 7 15:17:44 2013
New Revision: 1500449
URL: http://svn.apache.org/r1500449
Log:
HIVE-4805 : Enhance coverage of package org.apache.hadoop.hive.ql.exec.errors
(Ivan Veselovsky via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
Sun Jul 7 15:17:44 2013
@@ -36,7 +36,7 @@ import java.util.regex.Pattern;
public class DataCorruptErrorHeuristic extends RegexErrorHeuristic {
- private static final String SPLIT_REGEX = "split:.*";
+ private static final String SPLIT_REGEX = "split:\\s*([^\\s]+)";
private static final String EXCEPTION_REGEX = "EOFException";
public DataCorruptErrorHeuristic() {
@@ -55,14 +55,13 @@ public class DataCorruptErrorHeuristic e
rll.get(SPLIT_REGEX).size() > 0) {
// There should only be a single split line...
- assert(rll.get(SPLIT_REGEX).size()==1);
String splitLogLine = rll.get(SPLIT_REGEX).get(0);
// Extract only 'split: hdfs://...'
Pattern p = Pattern.compile(SPLIT_REGEX, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(splitLogLine);
m.find();
- String splitStr = m.group();
+ String splitStr = m.group(1);
es = new ErrorAndSolution(
"Data file " + splitStr + " is corrupted.",
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
Sun Jul 7 15:17:44 2013
@@ -23,8 +23,8 @@ package org.apache.hadoop.hive.ql.exec.e
*/
public class ErrorAndSolution {
- private String error = null;
- private String solution = null;
+ private final String error;
+ private final String solution;
ErrorAndSolution(String error, String solution) {
this.error = error;
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
Sun Jul 7 15:17:44 2013
@@ -40,7 +40,6 @@ import org.apache.hadoop.mapred.JobConf;
*/
public abstract class RegexErrorHeuristic implements ErrorHeuristic {
- private String query = null;
private JobConf conf = null;
// Pattern to look for in the hive query and whether it matched
@@ -86,10 +85,9 @@ public abstract class RegexErrorHeuristi
* Before init is called, logRegexes and queryRegexes should be populated.
*/
public void init(String query, JobConf conf) {
- this.query = query;
this.conf = conf;
- assert((logRegexes!=null) && (queryRegex != null));
+ assert(queryRegex != null);
Pattern queryPattern = Pattern.compile(queryRegex,
Pattern.CASE_INSENSITIVE);
queryMatches = queryPattern.matcher(query).find();
@@ -98,7 +96,6 @@ public abstract class RegexErrorHeuristi
regexToPattern.put(regex, Pattern.compile(regex,
Pattern.CASE_INSENSITIVE));
regexToLogLines.put(regex, new ArrayList<String>());
}
-
}
@Override
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
Sun Jul 7 15:17:44 2013
@@ -47,13 +47,11 @@ public class TaskLogProcessor {
new HashMap<ErrorHeuristic, HeuristicStats>();
private final List<String> taskLogUrls = new ArrayList<String>();
- private JobConf conf = null;
// Query is the hive query string i.e. "SELECT * FROM src;" associated with
// this set of tasks logs
- private String query = null;
+ private final String query;
public TaskLogProcessor(JobConf conf) {
- this.conf = conf;
query = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING);
heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats());
@@ -197,7 +195,6 @@ public class TaskLogProcessor {
try {
in = new BufferedReader(
new InputStreamReader(taskAttemptLogUrl.openStream()));
- String inputLine;
String lastLine = null;
boolean lastLineMatched = false;
List<String> stackTrace = null;
@@ -207,9 +204,20 @@ public class TaskLogProcessor {
Pattern endStackTracePattern =
Pattern.compile("^\t... [0-9]+ more.*", Pattern.CASE_INSENSITIVE);
- while ((inputLine =
- ShimLoader.getHadoopShims().unquoteHtmlChars(in.readLine())) !=
null) {
-
+ String inputLine;
+ while (true) {
+ inputLine = in.readLine();
+ if (inputLine == null) { // EOF:
+ if (stackTrace != null) {
+ stackTraces.add(stackTrace);
+ stackTrace = null;
+ }
+ break;
+ }
+
+ inputLine =
+ ShimLoader.getHadoopShims().unquoteHtmlChars(inputLine);
+
if (stackTracePattern.matcher(inputLine).matches() ||
endStackTracePattern.matcher(inputLine).matches()) {
// We are in a stack trace
Added:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java?rev=1500449&view=auto
==============================================================================
---
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
(added)
+++
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
Sun Jul 7 15:17:44 2013
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.io.BufferedReader;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.After;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestTaskLogProcessor {
+
+ private final List<File> toBeDeletedList = new LinkedList<File>();
+
+ @After
+ public void after() {
+ for (File f: toBeDeletedList) {
+ f.delete();
+ }
+ toBeDeletedList.clear();
+ }
+
+ private File writeTestLog(String id, String content) throws IOException {
+ // Put the script content in a temp file
+ File scriptFile = File.createTempFile(getClass().getName() + "-" + id +
"-", ".log");
+ scriptFile.deleteOnExit();
+ toBeDeletedList.add(scriptFile);
+ PrintStream os = new PrintStream(new FileOutputStream(scriptFile));
+ try {
+ os.print(content);
+ } finally {
+ os.close();
+ }
+ return scriptFile;
+ }
+
+ private String toString(Throwable t) {
+ StringWriter sw = new StringWriter();
+ PrintWriter pw = new PrintWriter(sw, false);
+ t.printStackTrace(pw);
+ pw.close();
+ return sw.toString();
+ }
+
+ /*
+ * returns number of lines in the printed throwable stack trace.
+ */
+ private String writeThrowableAsFile(String before, Throwable t, String
after,
+ String fileSuffix, TaskLogProcessor taskLogProcessor) throws IOException
{
+ // compose file text:
+ StringBuilder sb = new StringBuilder();
+ if (before != null) {
+ sb.append(before);
+ }
+ final String stackTraceStr = toString(t);
+ sb.append(stackTraceStr);
+ if (after != null) {
+ sb.append(after);
+ }
+
+ // write it to file:
+ File file = writeTestLog(fileSuffix, sb.toString());
+ // add it to the log processor:
+ taskLogProcessor.addTaskAttemptLogUrl(file.toURI().toURL().toString());
+ return stackTraceStr;
+ }
+
+ @Test
+ public void testGetStackTraces() throws Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo
group by moo;");
+
+ final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+
+ Throwable oome = new OutOfMemoryError("java heap space");
+ String oomeStr = writeThrowableAsFile("Some line in the beginning\n",
oome, null, "1", taskLogProcessor);
+
+ Throwable compositeException = new InvocationTargetException(new
IOException(new NullPointerException()));
+ String compositeStr = writeThrowableAsFile(null, compositeException, "Some
line in the end.\n", "2", taskLogProcessor);
+
+ Throwable eofe = new EOFException();
+ String eofeStr = writeThrowableAsFile("line a\nlineb\n", eofe, " line
c\nlineD\n", "3", taskLogProcessor);
+
+ List<List<String>> stackTraces = taskLogProcessor.getStackTraces();
+ assertEquals(3, stackTraces.size());
+
+ // Assert the actual stack traces are exactly equal to the written ones,
+ // and are contained in "stackTraces" list in the submission order:
+ checkException(oomeStr, stackTraces.get(0));
+ checkException(compositeStr, stackTraces.get(1));
+ checkException(eofeStr, stackTraces.get(2));
+ }
+
+ private void checkException(String writenText, List<String> actualTrace)
throws IOException {
+ List<String> expectedLines = getLines(writenText);
+ String expected, actual;
+ for (int i=0; i<expectedLines.size(); i++) {
+ expected = expectedLines.get(i);
+ actual = actualTrace.get(i);
+ assertEquals(expected, actual);
+ }
+ }
+
+ private List<String> getLines(String text) throws IOException{
+ BufferedReader br = new BufferedReader(new StringReader(text));
+ List<String> list = new ArrayList<String>(48);
+ String string;
+ while (true) {
+ string = br.readLine();
+ if (string == null) {
+ break;
+ } else {
+ list.add(string);
+ }
+ }
+ br.close();
+ return list;
+ }
+
+ @Test
+ public void testScriptErrorHeuristic() throws Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo
group by moo;");
+
+ final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+
+ String errorCode = "7874"; // example code
+ String content = "line a\nlineb\n" + "Script failed with code " +
errorCode + " line c\nlineD\n";
+ File log3File = writeTestLog("1", content);
+ taskLogProcessor.addTaskAttemptLogUrl(log3File.toURI().toURL().toString());
+
+ List<ErrorAndSolution> errList = taskLogProcessor.getErrors();
+ assertEquals(1, errList.size());
+
+ final ErrorAndSolution eas = errList.get(0);
+
+ String error = eas.getError();
+ assertNotNull(error);
+ // check that the error code is present in the error description:
+ assertTrue(error.indexOf(errorCode) >= 0);
+
+ String solution = eas.getSolution();
+ assertNotNull(solution);
+ assertTrue(solution.length() > 0);
+ }
+
+ @Test
+ public void testDataCorruptErrorHeuristic() throws Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo
group by moo;");
+
+ final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+
+ String badFile1 = "hdfs://localhost/foo1/moo1/zoo1";
+ String badFile2 = "hdfs://localhost/foo2/moo2/zoo2";
+ String content = "line a\nlineb\n"
+ + "split: " + badFile1 + " is very bad.\n"
+ + " line c\nlineD\n"
+ + "split: " + badFile2 + " is also very bad.\n"
+ + " java.io.EOFException: null \n"
+ + "line E\n";
+ File log3File = writeTestLog("1", content);
+ taskLogProcessor.addTaskAttemptLogUrl(log3File.toURI().toURL().toString());
+
+ List<ErrorAndSolution> errList = taskLogProcessor.getErrors();
+ assertEquals(1, errList.size());
+
+ final ErrorAndSolution eas = errList.get(0);
+
+ String error = eas.getError();
+ assertNotNull(error);
+ // check that the error code is present in the error description:
+ assertTrue(error.contains(badFile1) || error.contains(badFile2));
+
+ String solution = eas.getSolution();
+ assertNotNull(solution);
+ assertTrue(solution.length() > 0);
+ }
+
+ @Test
+ public void testMapAggrMemErrorHeuristic() throws Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo
group by moo;");
+
+ final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+
+ Throwable oome = new OutOfMemoryError("java heap space");
+ File log1File = writeTestLog("1", toString(oome));
+ taskLogProcessor.addTaskAttemptLogUrl(log1File.toURI().toURL().toString());
+
+ List<ErrorAndSolution> errList = taskLogProcessor.getErrors();
+ assertEquals(1, errList.size());
+
+ final ErrorAndSolution eas = errList.get(0);
+
+ String error = eas.getError();
+ assertNotNull(error);
+ // check that the error code is present in the error description:
+ assertTrue(error.contains("memory"));
+
+ String solution = eas.getSolution();
+ assertNotNull(solution);
+ assertTrue(solution.length() > 0);
+ String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString();
+ assertTrue(solution.contains(confName));
+ }
+
+}