Author: hashutosh
Date: Sun Jul  7 15:17:44 2013
New Revision: 1500449

URL: http://svn.apache.org/r1500449
Log:
HIVE-4805 : Enhance coverage of package org.apache.hadoop.hive.ql.exec.errors 
(Ivan Veselovsky via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/
    
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
 Sun Jul  7 15:17:44 2013
@@ -36,7 +36,7 @@ import java.util.regex.Pattern;
 
 public class DataCorruptErrorHeuristic extends RegexErrorHeuristic {
 
-  private static final String SPLIT_REGEX = "split:.*";
+  private static final String SPLIT_REGEX = "split:\\s*([^\\s]+)";
   private static final String EXCEPTION_REGEX = "EOFException";
 
   public DataCorruptErrorHeuristic() {
@@ -55,14 +55,13 @@ public class DataCorruptErrorHeuristic e
           rll.get(SPLIT_REGEX).size() > 0) {
 
         // There should only be a single split line...
-        assert(rll.get(SPLIT_REGEX).size()==1);
         String splitLogLine = rll.get(SPLIT_REGEX).get(0);
 
         // Extract only 'split: hdfs://...'
         Pattern p = Pattern.compile(SPLIT_REGEX, Pattern.CASE_INSENSITIVE);
         Matcher m = p.matcher(splitLogLine);
         m.find();
-        String splitStr = m.group();
+        String splitStr = m.group(1);
 
         es = new ErrorAndSolution(
             "Data file " + splitStr + " is corrupted.",

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
 Sun Jul  7 15:17:44 2013
@@ -23,8 +23,8 @@ package org.apache.hadoop.hive.ql.exec.e
  */
 public class ErrorAndSolution {
 
-  private String error = null;
-  private String solution = null;
+  private final String error;
+  private final String solution;
 
   ErrorAndSolution(String error, String solution) {
     this.error = error;

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
 Sun Jul  7 15:17:44 2013
@@ -40,7 +40,6 @@ import org.apache.hadoop.mapred.JobConf;
  */
 public abstract class RegexErrorHeuristic implements ErrorHeuristic {
 
-  private String query = null;
   private JobConf conf = null;
 
   // Pattern to look for in the hive query and whether it matched
@@ -86,10 +85,9 @@ public abstract class RegexErrorHeuristi
    * Before init is called, logRegexes and queryRegexes should be populated.
    */
   public void init(String query, JobConf conf) {
-    this.query = query;
     this.conf = conf;
 
-    assert((logRegexes!=null) && (queryRegex != null));
+    assert(queryRegex != null);
 
     Pattern queryPattern = Pattern.compile(queryRegex, 
Pattern.CASE_INSENSITIVE);
     queryMatches = queryPattern.matcher(query).find();
@@ -98,7 +96,6 @@ public abstract class RegexErrorHeuristi
       regexToPattern.put(regex, Pattern.compile(regex, 
Pattern.CASE_INSENSITIVE));
       regexToLogLines.put(regex, new ArrayList<String>());
     }
-
   }
 
   @Override

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java?rev=1500449&r1=1500448&r2=1500449&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
 Sun Jul  7 15:17:44 2013
@@ -47,13 +47,11 @@ public class TaskLogProcessor {
     new HashMap<ErrorHeuristic, HeuristicStats>();
   private final List<String> taskLogUrls = new ArrayList<String>();
 
-  private JobConf conf = null;
   // Query is the hive query string i.e. "SELECT * FROM src;" associated with
   // this set of tasks logs
-  private String query = null;
+  private final String query;
 
   public TaskLogProcessor(JobConf conf) {
-    this.conf = conf;
     query = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING);
 
     heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats());
@@ -197,7 +195,6 @@ public class TaskLogProcessor {
       try {
         in = new BufferedReader(
             new InputStreamReader(taskAttemptLogUrl.openStream()));
-        String inputLine;
         String lastLine = null;
         boolean lastLineMatched = false;
         List<String> stackTrace = null;
@@ -207,9 +204,20 @@ public class TaskLogProcessor {
         Pattern endStackTracePattern =
             Pattern.compile("^\t... [0-9]+ more.*", Pattern.CASE_INSENSITIVE);
 
-        while ((inputLine =
-          ShimLoader.getHadoopShims().unquoteHtmlChars(in.readLine())) != 
null) {
-
+        String inputLine;
+        while (true) {
+          inputLine = in.readLine();
+          if (inputLine == null) { // EOF:
+            if (stackTrace != null) {
+              stackTraces.add(stackTrace);
+              stackTrace = null;
+            }
+            break;
+          }
+          
+          inputLine =
+              ShimLoader.getHadoopShims().unquoteHtmlChars(inputLine);
+          
           if (stackTracePattern.matcher(inputLine).matches() ||
               endStackTracePattern.matcher(inputLine).matches()) {
             // We are in a stack trace

Added: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java?rev=1500449&view=auto
==============================================================================
--- 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
 (added)
+++ 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/errors/TestTaskLogProcessor.java
 Sun Jul  7 15:17:44 2013
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.io.BufferedReader;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.After;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestTaskLogProcessor {
+  
+  private final List<File> toBeDeletedList = new LinkedList<File>();
+  
+  @After
+  public void after() {
+    for (File f: toBeDeletedList) {
+      f.delete();
+    }
+    toBeDeletedList.clear();
+  }
+  
+  private File writeTestLog(String id, String content) throws IOException {
+    // Put the script content in a temp file
+    File scriptFile = File.createTempFile(getClass().getName() + "-" + id + 
"-", ".log");
+    scriptFile.deleteOnExit();
+    toBeDeletedList.add(scriptFile);
+    PrintStream os = new PrintStream(new FileOutputStream(scriptFile));
+    try {
+      os.print(content);
+    } finally {
+      os.close();
+    }
+    return scriptFile;
+  }
+  
+  private String toString(Throwable t) {
+    StringWriter sw = new StringWriter();
+    PrintWriter pw = new PrintWriter(sw, false); 
+    t.printStackTrace(pw);
+    pw.close();
+    return sw.toString();
+  }
+  
+  /*
+   * returns number of lines in the printed throwable stack trace.
+   */
+  private String writeThrowableAsFile(String before, Throwable t, String 
after, 
+      String fileSuffix, TaskLogProcessor taskLogProcessor) throws IOException 
{
+    // compose file text:
+    StringBuilder sb = new StringBuilder();
+    if (before != null) {
+      sb.append(before);
+    }
+    final String stackTraceStr = toString(t);
+    sb.append(stackTraceStr);
+    if (after != null) {
+      sb.append(after);
+    }
+    
+    // write it to file:
+    File file = writeTestLog(fileSuffix, sb.toString());
+    // add it to the log processor:
+    taskLogProcessor.addTaskAttemptLogUrl(file.toURI().toURL().toString());
+    return stackTraceStr;
+  }
+
+  @Test
+  public void testGetStackTraces() throws Exception {
+    JobConf jobConf = new JobConf();
+    jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo 
group by moo;");
+
+    final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+
+    Throwable oome = new OutOfMemoryError("java heap space");
+    String oomeStr = writeThrowableAsFile("Some line in the beginning\n", 
oome, null, "1", taskLogProcessor);
+
+    Throwable compositeException = new InvocationTargetException(new 
IOException(new NullPointerException()));
+    String compositeStr = writeThrowableAsFile(null, compositeException, "Some 
line in the end.\n", "2", taskLogProcessor);
+
+    Throwable eofe = new EOFException();
+    String eofeStr = writeThrowableAsFile("line a\nlineb\n", eofe, " line 
c\nlineD\n", "3", taskLogProcessor);
+    
+    List<List<String>> stackTraces = taskLogProcessor.getStackTraces();
+    assertEquals(3, stackTraces.size());
+    
+    // Assert the actual stack traces are exactly equal to the written ones, 
+    // and are contained in "stackTraces" list in the submission order:
+    checkException(oomeStr, stackTraces.get(0));
+    checkException(compositeStr, stackTraces.get(1));
+    checkException(eofeStr, stackTraces.get(2));
+  }
+  
+  private void checkException(String writenText, List<String> actualTrace) 
throws IOException {
+    List<String> expectedLines = getLines(writenText);
+    String expected, actual; 
+    for (int i=0; i<expectedLines.size(); i++) {
+      expected = expectedLines.get(i);
+      actual = actualTrace.get(i);
+      assertEquals(expected, actual);
+    }
+  }
+  
+  private List<String> getLines(String text) throws IOException{
+    BufferedReader br = new BufferedReader(new StringReader(text));
+    List<String> list = new ArrayList<String>(48);
+    String string;
+    while (true) {
+      string = br.readLine();
+      if (string == null) {
+        break;
+      } else {
+        list.add(string);
+      }
+    }
+    br.close();
+    return list;
+  }
+  
+  @Test
+  public void testScriptErrorHeuristic() throws Exception {
+    JobConf jobConf = new JobConf();
+    jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo 
group by moo;");
+
+    final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+    
+    String errorCode = "7874"; // example code
+    String content = "line a\nlineb\n" + "Script failed with code " + 
errorCode + " line c\nlineD\n";
+    File log3File = writeTestLog("1", content);
+    taskLogProcessor.addTaskAttemptLogUrl(log3File.toURI().toURL().toString());
+    
+    List<ErrorAndSolution> errList = taskLogProcessor.getErrors();
+    assertEquals(1, errList.size());
+    
+    final ErrorAndSolution eas = errList.get(0);
+    
+    String error = eas.getError();
+    assertNotNull(error);
+    // check that the error code is present in the error description: 
+    assertTrue(error.indexOf(errorCode) >= 0);
+    
+    String solution = eas.getSolution();
+    assertNotNull(solution);
+    assertTrue(solution.length() > 0);
+  }
+
+  @Test
+  public void testDataCorruptErrorHeuristic() throws Exception {
+    JobConf jobConf = new JobConf();
+    jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo 
group by moo;");
+
+    final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+    
+    String badFile1 = "hdfs://localhost/foo1/moo1/zoo1"; 
+    String badFile2 = "hdfs://localhost/foo2/moo2/zoo2"; 
+    String content = "line a\nlineb\n" 
+       + "split: " + badFile1 + " is very bad.\n" 
+       + " line c\nlineD\n" 
+       + "split: " + badFile2 + " is also very bad.\n" 
+       + " java.io.EOFException: null \n" 
+       + "line E\n";
+    File log3File = writeTestLog("1", content);
+    taskLogProcessor.addTaskAttemptLogUrl(log3File.toURI().toURL().toString());
+    
+    List<ErrorAndSolution> errList = taskLogProcessor.getErrors();
+    assertEquals(1, errList.size());
+    
+    final ErrorAndSolution eas = errList.get(0);
+    
+    String error = eas.getError();
+    assertNotNull(error);
+    // check that the error code is present in the error description: 
+    assertTrue(error.contains(badFile1) || error.contains(badFile2));
+    
+    String solution = eas.getSolution();
+    assertNotNull(solution);
+    assertTrue(solution.length() > 0);
+  }
+  
+  @Test
+  public void testMapAggrMemErrorHeuristic() throws Exception {
+    JobConf jobConf = new JobConf();
+    jobConf.set(HiveConf.ConfVars.HIVEQUERYSTRING.varname, "select * from foo 
group by moo;");
+
+    final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf);
+
+    Throwable oome = new OutOfMemoryError("java heap space");
+    File log1File = writeTestLog("1", toString(oome));
+    taskLogProcessor.addTaskAttemptLogUrl(log1File.toURI().toURL().toString());
+    
+    List<ErrorAndSolution> errList = taskLogProcessor.getErrors();
+    assertEquals(1, errList.size());
+    
+    final ErrorAndSolution eas = errList.get(0);
+    
+    String error = eas.getError();
+    assertNotNull(error);
+    // check that the error code is present in the error description: 
+    assertTrue(error.contains("memory"));
+    
+    String solution = eas.getSolution();
+    assertNotNull(solution);
+    assertTrue(solution.length() > 0);
+    String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString();
+    assertTrue(solution.contains(confName));
+  }
+  
+}


Reply via email to