[ 
https://issues.apache.org/jira/browse/MAPREDUCE-7082?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

tartarus updated MAPREDUCE-7082:
--------------------------------
    Attachment: MAPREDUCE_7082.patch

> Fix FileInputFormat throw java.lang.ArrayIndexOutOfBoundsException(0)
> ---------------------------------------------------------------------
>
>                 Key: MAPREDUCE-7082
>                 URL: https://issues.apache.org/jira/browse/MAPREDUCE-7082
>             Project: Hadoop Map/Reduce
>          Issue Type: Bug
>          Components: mrv1
>    Affects Versions: 2.7.1
>         Environment: CentOS 7
> Hive 1.2.1
> Hadoop 2.7.1
>            Reporter: tartarus
>            Priority: Major
>         Attachments: MAPREDUCE_7082.patch
>
>
> when hdfs is miss block and then MR is create split with FileInputFormat
> then will throw ArrayIndexOutOfBoundsException like this
>  
> {code:java}
> java.lang.ArrayIndexOutOfBoundsException: 0
> at 
> org.apache.hadoop.mapred.FileInputFormat.identifyHosts(FileInputFormat.java:708)
> at 
> org.apache.hadoop.mapred.FileInputFormat.getSplitHostsAndCachedHosts(FileInputFormat.java:675)
> at 
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:365)
> at 
> com.hadoop.mapred.DeprecatedLzoTextInputFormat.getSplits(DeprecatedLzoTextInputFormat.java:129)
> at 
> org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:305)
> at 
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:407)
> at 
> org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getCombineSplits(CombineHiveInputFormat.java:408)
> at 
> org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:571)
> at 
> org.apache.hadoop.mapreduce.JobSubmitter.writeOldSplits(JobSubmitter.java:363)
> at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:355)
> at 
> org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:231)
> at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)
> at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656)
> at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
> at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:575)
> at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:570)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656)
> at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:570)
> at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:561)
> {code}
> part code of methon {color:#d04437}getSplits(JobConf job, int 
> numSplits){color} :
>  
>  
> {code:java}
> if (isSplitable(fs, path)) {
>   long blockSize = file.getBlockSize();
>   long splitSize = computeSplitSize(goalSize, minSize, blockSize);
>   long bytesRemaining = length;
>   while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
>     String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations,
>         length-bytesRemaining, splitSize, clusterMap);
>     splits.add(makeSplit(path, length-bytesRemaining, splitSize,
>         splitHosts[0], splitHosts[1]));
>     bytesRemaining -= splitSize;
>   }
>   if (bytesRemaining != 0) {
>     String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length
>         - bytesRemaining, bytesRemaining, clusterMap);
>     splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
>         splitHosts[0], splitHosts[1]));
>   }
> } else {
>   if (LOG.isDebugEnabled()) {
>     // Log only if the file is big enough to be splitted
>     if (length > Math.min(file.getBlockSize(), minSize)) {
>       LOG.debug("File is not splittable so no parallelization "
>           + "is possible: " + file.getPath());
>     }
>   }
>   String[][] splitHosts = 
> getSplitHostsAndCachedHosts(blkLocations,0,length,clusterMap);
>   splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1]));
> }
> {code}
> part code of methon 
> {color:#d04437}getSplitHostsAndCachedHosts(BlockLocation[] blkLocations, 
> {color}
> {color:#d04437} long offset, long splitSize, NetworkTopology 
> clusterMap){color} :
>  
>  
> {code:java}
> allTopos = blkLocations[index].getTopologyPaths();
> // If no topology information is available, just
> // prefix a fakeRack
> if (allTopos.length == 0) {
>   allTopos = fakeRacks(blkLocations, index);
> }
> ...
> return new String[][] { identifyHosts(allTopos.length, racksMap),
>     new String[0]};
> {code}
> part code of methon{color:#d04437} identifyHosts(int replicationFactor, 
> Map<Node,NodeInfo> racksMap) :{color}
>  
> {code:java}
> String [] retVal = new String[replicationFactor];
> ...
> retVal[index++] = host.node.getName().split(":")[0];{code}
>  
> because the  {color:#d04437}blkLocations[index].getTopologyPaths(){color} is 
> empty and {color:#d04437}blkLocations[index].getHosts(){color} is empty too, 
> so {color:#d04437}replicationFactor is 0{color} , then execute 
> {code:java}
> retVal[index++] = host.node.getName().split(":")[0];{code}
>  
> will throw ArrayIndexOutOfBoundsException(0)



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: mapreduce-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: mapreduce-issues-h...@hadoop.apache.org

Reply via email to