[ https://issues.apache.org/jira/browse/MAPREDUCE-7082?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
tartarus updated MAPREDUCE-7082: -------------------------------- Attachment: MAPREDUCE_7082.patch > Fix FileInputFormat throw java.lang.ArrayIndexOutOfBoundsException(0) > --------------------------------------------------------------------- > > Key: MAPREDUCE-7082 > URL: https://issues.apache.org/jira/browse/MAPREDUCE-7082 > Project: Hadoop Map/Reduce > Issue Type: Bug > Components: mrv1 > Affects Versions: 2.7.1 > Environment: CentOS 7 > Hive 1.2.1 > Hadoop 2.7.1 > Reporter: tartarus > Priority: Major > Attachments: MAPREDUCE_7082.patch > > > when hdfs is miss block and then MR is create split with FileInputFormat > then will throw ArrayIndexOutOfBoundsException like this > > {code:java} > java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.hadoop.mapred.FileInputFormat.identifyHosts(FileInputFormat.java:708) > at > org.apache.hadoop.mapred.FileInputFormat.getSplitHostsAndCachedHosts(FileInputFormat.java:675) > at > org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:365) > at > com.hadoop.mapred.DeprecatedLzoTextInputFormat.getSplits(DeprecatedLzoTextInputFormat.java:129) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:305) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:407) > at > org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getCombineSplits(CombineHiveInputFormat.java:408) > at > org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:571) > at > org.apache.hadoop.mapreduce.JobSubmitter.writeOldSplits(JobSubmitter.java:363) > at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:355) > at > org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:231) > at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290) > at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656) > at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287) > at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:575) > at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:570) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656) > at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:570) > at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:561) > {code} > part code of methon {color:#d04437}getSplits(JobConf job, int > numSplits){color} : > > > {code:java} > if (isSplitable(fs, path)) { > long blockSize = file.getBlockSize(); > long splitSize = computeSplitSize(goalSize, minSize, blockSize); > long bytesRemaining = length; > while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) { > String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, > length-bytesRemaining, splitSize, clusterMap); > splits.add(makeSplit(path, length-bytesRemaining, splitSize, > splitHosts[0], splitHosts[1])); > bytesRemaining -= splitSize; > } > if (bytesRemaining != 0) { > String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length > - bytesRemaining, bytesRemaining, clusterMap); > splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, > splitHosts[0], splitHosts[1])); > } > } else { > if (LOG.isDebugEnabled()) { > // Log only if the file is big enough to be splitted > if (length > Math.min(file.getBlockSize(), minSize)) { > LOG.debug("File is not splittable so no parallelization " > + "is possible: " + file.getPath()); > } > } > String[][] splitHosts = > getSplitHostsAndCachedHosts(blkLocations,0,length,clusterMap); > splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1])); > } > {code} > part code of methon > {color:#d04437}getSplitHostsAndCachedHosts(BlockLocation[] blkLocations, > {color} > {color:#d04437} long offset, long splitSize, NetworkTopology > clusterMap){color} : > > > {code:java} > allTopos = blkLocations[index].getTopologyPaths(); > // If no topology information is available, just > // prefix a fakeRack > if (allTopos.length == 0) { > allTopos = fakeRacks(blkLocations, index); > } > ... > return new String[][] { identifyHosts(allTopos.length, racksMap), > new String[0]}; > {code} > part code of methon{color:#d04437} identifyHosts(int replicationFactor, > Map<Node,NodeInfo> racksMap) :{color} > > {code:java} > String [] retVal = new String[replicationFactor]; > ... > retVal[index++] = host.node.getName().split(":")[0];{code} > > because the {color:#d04437}blkLocations[index].getTopologyPaths(){color} is > empty and {color:#d04437}blkLocations[index].getHosts(){color} is empty too, > so {color:#d04437}replicationFactor is 0{color} , then execute > {code:java} > retVal[index++] = host.node.getName().split(":")[0];{code} > > will throw ArrayIndexOutOfBoundsException(0) -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: mapreduce-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: mapreduce-issues-h...@hadoop.apache.org