[ 
https://issues.apache.org/jira/browse/MAPREDUCE-5912?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Remus Rusanu updated MAPREDUCE-5912:
------------------------------------

    Description: 
{code}
@@ -1098,8 +1120,8 @@ private long calculateOutputSize() throws IOException {
     if (isMapTask() && conf.getNumReduceTasks() > 0) {
       try {
         Path mapOutput =  mapOutputFile.getOutputFile();
-        FileSystem localFS = FileSystem.getLocal(conf);
-        return localFS.getFileStatus(mapOutput).getLen();
+        FileSystem fs = mapOutput.getFileSystem(conf);
+        return fs.getFileStatus(mapOutput).getLen();
       } catch (IOException e) {
         LOG.warn ("Could not find output size " , e);
       }
{code}

causes Windows local output files to be routed through HDFS:

{code}
2014-06-02 00:14:53,891 WARN [main] org.apache.hadoop.mapred.YarnChild: 
Exception running child : java.lang.IllegalArgumentException: Pathname 
/c:/Hadoop/Data/Hadoop/local/usercache/HadoopUser/appcache/application_1401693085139_0001/output/attempt_1401693085139_0001_m_000000_0/file.out
 from 
c:/Hadoop/Data/Hadoop/local/usercache/HadoopUser/appcache/application_1401693085139_0001/output/attempt_1401693085139_0001_m_000000_0/file.out
 is not a valid DFS filename.
       at 
org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:187)
       at 
org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFileSystem.java:101)
       at 
org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1024)
       at 
org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1020)
       at 
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
       at 
org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1020)
       at org.apache.hadoop.mapred.Task.calculateOutputSize(Task.java:1124)
       at org.apache.hadoop.mapred.Task.sendLastUpdate(Task.java:1102)
       at org.apache.hadoop.mapred.Task.done(Task.java:1048)
{code}


> Task.calculateOutputSize does not handle Windows files after MAPREDUCE-5196
> ---------------------------------------------------------------------------
>
>                 Key: MAPREDUCE-5912
>                 URL: https://issues.apache.org/jira/browse/MAPREDUCE-5912
>             Project: Hadoop Map/Reduce
>          Issue Type: Bug
>    Affects Versions: 3.0.0
>            Reporter: Remus Rusanu
>            Assignee: Remus Rusanu
>
> {code}
> @@ -1098,8 +1120,8 @@ private long calculateOutputSize() throws IOException {
>      if (isMapTask() && conf.getNumReduceTasks() > 0) {
>        try {
>          Path mapOutput =  mapOutputFile.getOutputFile();
> -        FileSystem localFS = FileSystem.getLocal(conf);
> -        return localFS.getFileStatus(mapOutput).getLen();
> +        FileSystem fs = mapOutput.getFileSystem(conf);
> +        return fs.getFileStatus(mapOutput).getLen();
>        } catch (IOException e) {
>          LOG.warn ("Could not find output size " , e);
>        }
> {code}
> causes Windows local output files to be routed through HDFS:
> {code}
> 2014-06-02 00:14:53,891 WARN [main] org.apache.hadoop.mapred.YarnChild: 
> Exception running child : java.lang.IllegalArgumentException: Pathname 
> /c:/Hadoop/Data/Hadoop/local/usercache/HadoopUser/appcache/application_1401693085139_0001/output/attempt_1401693085139_0001_m_000000_0/file.out
>  from 
> c:/Hadoop/Data/Hadoop/local/usercache/HadoopUser/appcache/application_1401693085139_0001/output/attempt_1401693085139_0001_m_000000_0/file.out
>  is not a valid DFS filename.
>        at 
> org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:187)
>        at 
> org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFileSystem.java:101)
>        at 
> org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1024)
>        at 
> org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1020)
>        at 
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>        at 
> org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1020)
>        at org.apache.hadoop.mapred.Task.calculateOutputSize(Task.java:1124)
>        at org.apache.hadoop.mapred.Task.sendLastUpdate(Task.java:1102)
>        at org.apache.hadoop.mapred.Task.done(Task.java:1048)
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to