[ https://issues.apache.org/jira/browse/MAPREDUCE-5664?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13838754#comment-13838754 ]
Steve Loughran commented on MAPREDUCE-5664: ------------------------------------------- You need to move up to a recent version of Hadoop: the fixes for your problem are already in the codebase. If you can't move up, look at HADOOP-5254 for one route as to how to go about identifying and working round the XML parser versioning issue *on your own machine*. > java.lang.RuntimeException: javax.xml.parsers.ParserConfigurationException: > --------------------------------------------------------------------------- > > Key: MAPREDUCE-5664 > URL: https://issues.apache.org/jira/browse/MAPREDUCE-5664 > Project: Hadoop Map/Reduce > Issue Type: Bug > Reporter: ranjini > > Hi, > I am using hadoop 0.21 vesrsion and java 1.6. Please help me to fix the > issue. What version jar should i put. > The sample code with xml i have attached here. > {code} > <?xml version="1.0"?> > <Company> > <Employee> > <id>100</id> > <ename>ranjini</ename> > <dept>IT</dept> > <sal>123456</sal> > <location>nextlevel</location> > </Employee> > </Company> > {code} > {code} > import java.io.IOException; > import java.util.*; > import org.apache.hadoop.fs.Path; > import org.apache.hadoop.fs.FileSystem; > import org.apache.hadoop.conf.*; > import org.apache.hadoop.conf.Configuration; > import org.apache.hadoop.fs.FileStatus; > import org.apache.hadoop.io.*; > import org.apache.hadoop.mapred.*; > import org.apache.hadoop.util.*; > import java.io.*; > import org.apache.hadoop.mapred.lib.*; > import java.io.Reader; > import java.io.StringReader; > import org.jdom.Document; > import org.jdom.Element; > import org.jdom.JDOMException; > import org.jdom.input.SAXBuilder; > public class ParseXml { > public static class Map extends MapReduceBase implements > Mapper<LongWritable, Text, Text, Text> { > > public void map(LongWritable key, Text value, > OutputCollector<Text, Text> output, Reporter > reporter) > throws IOException { > > String s=""; > FileSystem fs=null; > Configuration conf=new Configuration(); > conf.set("fs.default.name","hdfs://localhost:4440/"); > Path srcpath=new Path("/user/hduser/Ran/"); > try { > > String xmlString = value.toString(); > > SAXBuilder builder = new SAXBuilder(); > Reader in = new StringReader(xmlString); > Document doc = builder.build(in); > Element root = doc.getRootElement(); > > s > =root.getChild("Employee").getChild("id").getChild("ename").getChild("dept").getChild("sal").getChild("location").getTextTrim(); > output.collect(new Text(""),new Text(s)); > > } catch (Exception e) { > e.printStackTrace(); > } > } > } > > public static void main(String[] args) throws Exception { > > String input="/user/hduser/Ran/"; > String fileoutput="/user/task/Sales/"; > JobConf conf = new JobConf(ParseXml.class); > conf.setJobName("file"); > conf.setOutputKeyClass(Text.class); > conf.setOutputValueClass(Text.class); > conf.setNumReduceTasks(1); > conf.setMapperClass(Map.class); > conf.setInputFormat(TextInputFormat.class); > conf.setOutputFormat(TextOutputFormat.class); > FileInputFormat.setInputPaths(conf,input); > Path outPath = new Path(fileoutput); > FileOutputFormat.setOutputPath(conf, outPath); > FileSystem dfs = FileSystem.get(outPath.toUri(), conf); > if (dfs.exists(outPath)) { > dfs.delete(outPath, true); > } > //conf.setOutputFormat(MultiFileOutput.class); > JobClient.runJob(conf); > } > } > {code} > When processing xml file as input via map reduce, the error occurred is > {code} > conf.Configuration: error parsing conf file: > javax.xml.parsers.ParserConfigurationException: Feature > 'http://apache.org/xml/features/xinclude' is not recognized. > Exception in thread "main" java.lang.RuntimeException: > javax.xml.parsers.ParserConfigurationException: Feature > 'http://apache.org/xml/features/xinclude' is not recognized. > at > org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1171) > at > org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:1030) > at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:980) > at org.apache.hadoop.conf.Configuration.get(Configuration.java:382) > at org.apache.hadoop.util.RunJar.main(RunJar.java:109) > Caused by: javax.xml.parsers.ParserConfigurationException: Feature > 'http://apache.org/xml/features/xinclude' is not recognized. > at > org.apache.xerces.jaxp.DocumentBuilderFactoryImpl.newDocumentBuilder(Unknown > Source) > at > org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1061) > ... 4 more > {code} > Please help to fix the issue -- This message was sent by Atlassian JIRA (v6.1#6144)