This message is for those of you coming up against
"out of memory" issues whilst unmarshalling large XML files.

I've written a class that will stream the objects
to you as they are parsed getting rid of the root
hierarchy altogether.

For testing I've parsed a 500MB XML file
and watched as the VM uses less than a 1MB
of RAM to process it (took 2 1/2 minutes).

It use isnt constrained to large files either.
In fact it's an efficient way to process all your
castor hierarchies whilst getting just the objects
you want.

I've included an abstract class and a usage of the
abstract class below so you can see how it works


import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.XMLFilterImpl;
import org.xml.sax.InputSource;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.exolab.castor.xml.Unmarshaller;
import org.exolab.castor.xml.UnmarshalHandler;
import org.exolab.castor.xml.UnmarshalListener;

import java.io.Reader;
import java.io.IOException;

/**
 * An abstract class for unmarshalling large XML files.  Castor's default
 * behaviour is to return an object hierarchy which with large XML files
 * can use too much memory.  The approach used here is to filter over
 * objects we're not interested in and just receive the objects we want
 * via a callback as its unmarshalled.  Thus only using enough memory to
 * store the current unmarshalled object.
 *
 * Credit goes to Andre Vanha [EMAIL PROTECTED] for commentary he
 * posted to the Castor Dev List on handling large files.
 *
 * http://www.mail-archive.com/[EMAIL PROTECTED]/msg16052.html
 *
 * User: KallenM
 * Date: Jul 13, 2004
 * Time: 11:14:45 AM
 */
public abstract class FilteredUnmarshaller
{
    /**
     * Constructor
     * @param target_class  the top level class you are filtering for
     * @param reader        a reader to the data
     * @param validate      validate the unmarshalled objects
     * @throws SAXException
     */
    public FilteredUnmarshaller(Class target_class, Reader reader, boolean
validate) throws SAXException
    {
        m_targetClass = target_class;
        Unmarshaller unmarshaller = new Unmarshaller(m_targetClass);
        unmarshaller.setValidation(validate);
        UnmarshalHandler handler = unmarshaller.createHandler();

        LocalUnmarshallListener listener = new LocalUnmarshallListener();
        handler.setUnmarshalListener(listener);

        m_xr = new LocalXMLFilter();
        m_xr.setParent(XMLReaderFactory.createXMLReader());
        m_xr.setContentHandler(handler);
        m_xr.setErrorHandler(handler);
        m_reader = reader;
    }

    /**
     * Begin parsing the data
     *
     * @throws SAXException
     * @throws IOException
     */
    public void parse() throws SAXException, IOException
    {
        m_xr.parse(new InputSource(m_reader));
    }

    /**
     * Allow overriding classes to ignore certain elements in the XML file.
     * Typically you will want to override all elements above the target
element
     * supplied to the constructor
     *
     * @param element_name the current element being handled by
start\endElement
     * @return true if this element should be ignored
     */
    public abstract boolean ignoreElement(String element_name);

    /**
     * Called during unmarshalling when your target class has been
encountered
     * @param target_class_object an object of the target class type
     */
    public abstract void    onElement(Object target_class_object);

    private Class               m_targetClass = null;
    private LocalXMLFilter      m_xr;
    private Reader              m_reader;

    private class LocalXMLFilter  extends XMLFilterImpl
    {
        public void startElement(String namespaceURI, String localName,
           String qName, Attributes atts) throws SAXException
        {
            if( !ignoreElement(qName) )
                super.startElement(namespaceURI, localName, qName,atts);
        }

        public void endElement (String uri, String localName, String qName)
throws SAXException
        {
            if( !ignoreElement(qName) )
                super.endElement(uri, localName, qName);
        }
    }

    private class LocalUnmarshallListener implements UnmarshalListener
    {
        public void initialized (Object object)
        {
        }

        public void attributesProcessed(Object object)
        {

        }

        public void fieldAdded (String fieldName, Object parent, Object
child)
        {

        }

        public void unmarshalled (Object obj)
        {
            if( obj.getClass().equals(m_targetClass) )
                onElement(obj);
        }
    }
}

public class MessageUnmarshaller extends FilteredUnmarshaller
{
    public static void main (String args[])     throws Exception
    {
        long start = System.currentTimeMillis();
        AmazonMessageUnmarshaller l =new
AmazonMessageUnmarshaller(Message.class,new FileReader(args[0]),false);
        l.parse();
        System.out.println((System.currentTimeMillis() - start) / 1000);
    }

    public MessageUnmarshaller(Class target_class, Reader reader, boolean
validate) throws SAXException
    {
        super(target_class,reader,validate);
    }

    public boolean ignoreElement(String element_name)
    {
        if( element_name.compareTo("MyEnvelope") == 0  ||
            element_name.compareTo("MyHeader") == 0  )
                return true;
        else
            return false;
    }

    public void onElement(Object target_class_object)
    {
        // Will only recieve Message objects
    }
}



----------------------------------------------------------- 
If you wish to unsubscribe from this mailing, send mail to
[EMAIL PROTECTED] with a subject of:
        unsubscribe castor-user

Reply via email to