tartarus created MAPREDUCE-7082: ----------------------------------- Summary: Fix FileInputFormat throw java.lang.ArrayIndexOutOfBoundsException(0) Key: MAPREDUCE-7082 URL: https://issues.apache.org/jira/browse/MAPREDUCE-7082 Project: Hadoop Map/Reduce Issue Type: Bug Components: mrv1 Affects Versions: 2.7.1 Environment: CentOS 7
Hive 1.2.1 Hadoop 2.7.1 Reporter: tartarus when hdfs is miss block and then MR is create split with FileInputFormat then will throw ArrayIndexOutOfBoundsException like this {code:java} java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.hadoop.mapred.FileInputFormat.identifyHosts(FileInputFormat.java:708) at org.apache.hadoop.mapred.FileInputFormat.getSplitHostsAndCachedHosts(FileInputFormat.java:675) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:365) at com.hadoop.mapred.DeprecatedLzoTextInputFormat.getSplits(DeprecatedLzoTextInputFormat.java:129) at org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:305) at org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:407) at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getCombineSplits(CombineHiveInputFormat.java:408) at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:571) at org.apache.hadoop.mapreduce.JobSubmitter.writeOldSplits(JobSubmitter.java:363) at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:355) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:231) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287) at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:575) at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:570) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656) at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:570) at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:561) {code} part code of methon {color:#d04437}getSplits(JobConf job, int numSplits){color} : {code:java} if (isSplitable(fs, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(goalSize, minSize, blockSize); long bytesRemaining = length; while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) { String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length-bytesRemaining, splitSize, clusterMap); splits.add(makeSplit(path, length-bytesRemaining, splitSize, splitHosts[0], splitHosts[1])); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining, bytesRemaining, clusterMap); splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts[0], splitHosts[1])); } } else { if (LOG.isDebugEnabled()) { // Log only if the file is big enough to be splitted if (length > Math.min(file.getBlockSize(), minSize)) { LOG.debug("File is not splittable so no parallelization " + "is possible: " + file.getPath()); } } String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations,0,length,clusterMap); splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1])); } {code} part code of methon {color:#d04437}getSplitHostsAndCachedHosts(BlockLocation[] blkLocations, {color} {color:#d04437} long offset, long splitSize, NetworkTopology clusterMap){color} : {code:java} allTopos = blkLocations[index].getTopologyPaths(); // If no topology information is available, just // prefix a fakeRack if (allTopos.length == 0) { allTopos = fakeRacks(blkLocations, index); } ... return new String[][] { identifyHosts(allTopos.length, racksMap), new String[0]}; {code} part code of methon{color:#d04437} identifyHosts(int replicationFactor, Map<Node,NodeInfo> racksMap) :{color} {code:java} String [] retVal = new String[replicationFactor]; ... retVal[index++] = host.node.getName().split(":")[0];{code} because the {color:#d04437}blkLocations[index].getTopologyPaths(){color} is empty and {color:#d04437}blkLocations[index].getHosts(){color} is empty too, so {color:#d04437}replicationFactor is 0{color} , then execute {code:java} retVal[index++] = host.node.getName().split(":")[0];{code} will throw ArrayIndexOutOfBoundsException(0) -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: mapreduce-dev-unsubscr...@hadoop.apache.org For additional commands, e-mail: mapreduce-dev-h...@hadoop.apache.org