luetzkendorf 2004/11/29 10:43:21
Added: src/share/org/apache/slide/extractor
XmlContentExtractor.java
Log:
added
Revision Changes Path
1.1
jakarta-slide/src/share/org/apache/slide/extractor/XmlContentExtractor.java
Index: XmlContentExtractor.java
===================================================================
/*
* $Header:
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/XmlContentExtractor.java,v
1.1 2004/11/29 18:43:21 luetzkendorf Exp $
* $Revision: 1.1 $
* $Date: 2004/11/29 18:43:21 $
*
* ====================================================================
*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.extractor;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* Content extractor that simply extracts all text content of an
* XML document.
*/
public class XmlContentExtractor extends AbstractContentExtractor
{
private final SAXParserFactory parserFactory;
private final EntityResolverImpl entityResolver = new
EntityResolverImpl();
public XmlContentExtractor(String uri, String contentType)
{
super(uri, contentType);
parserFactory = SAXParserFactory.newInstance();
parserFactory.setValidating(false);
}
public XmlContentExtractor(String uri, String contentType, String
namespace)
{
super(uri, contentType, namespace);
parserFactory = SAXParserFactory.newInstance();
parserFactory.setValidating(false);
}
public Reader extract(InputStream content) throws ExtractorException
{
TextCollector collector = new TextCollector();
try {
SAXParser parser = parserFactory.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setContentHandler(collector);
reader.setErrorHandler(collector);
reader.setEntityResolver(this.entityResolver);
InputSource source = new InputSource(content);
source.setSystemId("/slide");
reader.parse(source);
} catch (ParserConfigurationException e) {
throw new ExtractorException(e.toString());
} catch (SAXException e) {
throw new ExtractorException(e.toString());
} catch (IOException e) {
throw new ExtractorException(e.toString());
}
if (collector.exception != null) {
throw new ExtractorException(collector.exception.toString());
}
return new StringReader(collector.buffer.toString());
}
private static class TextCollector extends DefaultHandler {
StringBuffer buffer = new StringBuffer();
SAXParseException exception = null;
public void characters(char[] ch, int start, int length)
throws SAXException
{
this.buffer.append(ch, start, length);
}
public void endElement(String uri, String localName, String qName)
throws SAXException
{
// each end tag breaks words, TODO make this configurable
this.buffer.append(' ');
}
public void error(SAXParseException e) throws SAXException
{
this.exception = e;
}
public void fatalError(SAXParseException e) throws SAXException
{
this.exception = e;
}
}
private static class EntityResolverImpl implements EntityResolver {
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException
{
return new InputSource(new StringReader(""));
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]