This message is for those of you coming up against "out of memory" issues whilst unmarshalling large XML files.
I've written a class that will stream the objects to you as they are parsed getting rid of the root hierarchy altogether. For testing I've parsed a 500MB XML file and watched as the VM uses less than a 1MB of RAM to process it (took 2 1/2 minutes). It use isnt constrained to large files either. In fact it's an efficient way to process all your castor hierarchies whilst getting just the objects you want. I've included an abstract class and a usage of the abstract class below so you can see how it works import org.xml.sax.helpers.XMLReaderFactory; import org.xml.sax.helpers.XMLFilterImpl; import org.xml.sax.InputSource; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.exolab.castor.xml.Unmarshaller; import org.exolab.castor.xml.UnmarshalHandler; import org.exolab.castor.xml.UnmarshalListener; import java.io.Reader; import java.io.IOException; /** * An abstract class for unmarshalling large XML files. Castor's default * behaviour is to return an object hierarchy which with large XML files * can use too much memory. The approach used here is to filter over * objects we're not interested in and just receive the objects we want * via a callback as its unmarshalled. Thus only using enough memory to * store the current unmarshalled object. * * Credit goes to Andre Vanha [EMAIL PROTECTED] for commentary he * posted to the Castor Dev List on handling large files. * * http://www.mail-archive.com/[EMAIL PROTECTED]/msg16052.html * * User: KallenM * Date: Jul 13, 2004 * Time: 11:14:45 AM */ public abstract class FilteredUnmarshaller { /** * Constructor * @param target_class the top level class you are filtering for * @param reader a reader to the data * @param validate validate the unmarshalled objects * @throws SAXException */ public FilteredUnmarshaller(Class target_class, Reader reader, boolean validate) throws SAXException { m_targetClass = target_class; Unmarshaller unmarshaller = new Unmarshaller(m_targetClass); unmarshaller.setValidation(validate); UnmarshalHandler handler = unmarshaller.createHandler(); LocalUnmarshallListener listener = new LocalUnmarshallListener(); handler.setUnmarshalListener(listener); m_xr = new LocalXMLFilter(); m_xr.setParent(XMLReaderFactory.createXMLReader()); m_xr.setContentHandler(handler); m_xr.setErrorHandler(handler); m_reader = reader; } /** * Begin parsing the data * * @throws SAXException * @throws IOException */ public void parse() throws SAXException, IOException { m_xr.parse(new InputSource(m_reader)); } /** * Allow overriding classes to ignore certain elements in the XML file. * Typically you will want to override all elements above the target element * supplied to the constructor * * @param element_name the current element being handled by start\endElement * @return true if this element should be ignored */ public abstract boolean ignoreElement(String element_name); /** * Called during unmarshalling when your target class has been encountered * @param target_class_object an object of the target class type */ public abstract void onElement(Object target_class_object); private Class m_targetClass = null; private LocalXMLFilter m_xr; private Reader m_reader; private class LocalXMLFilter extends XMLFilterImpl { public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if( !ignoreElement(qName) ) super.startElement(namespaceURI, localName, qName,atts); } public void endElement (String uri, String localName, String qName) throws SAXException { if( !ignoreElement(qName) ) super.endElement(uri, localName, qName); } } private class LocalUnmarshallListener implements UnmarshalListener { public void initialized (Object object) { } public void attributesProcessed(Object object) { } public void fieldAdded (String fieldName, Object parent, Object child) { } public void unmarshalled (Object obj) { if( obj.getClass().equals(m_targetClass) ) onElement(obj); } } } public class MessageUnmarshaller extends FilteredUnmarshaller { public static void main (String args[]) throws Exception { long start = System.currentTimeMillis(); AmazonMessageUnmarshaller l =new AmazonMessageUnmarshaller(Message.class,new FileReader(args[0]),false); l.parse(); System.out.println((System.currentTimeMillis() - start) / 1000); } public MessageUnmarshaller(Class target_class, Reader reader, boolean validate) throws SAXException { super(target_class,reader,validate); } public boolean ignoreElement(String element_name) { if( element_name.compareTo("MyEnvelope") == 0 || element_name.compareTo("MyHeader") == 0 ) return true; else return false; } public void onElement(Object target_class_object) { // Will only recieve Message objects } } ----------------------------------------------------------- If you wish to unsubscribe from this mailing, send mail to [EMAIL PROTECTED] with a subject of: unsubscribe castor-user
