I've added the FilteredUnmarshaller and this quick and dirty
StreamedMarshaller to the examples board

I also needed a way to stream during marshalling since building the object
hierarchy consumed too much memory. I'm not totally happy with its design
but maybe someone who knows the inner workings of castor better than I could
help out.  The tricky part was halting unmarshalling at the correct spot so
I could marshal out my objects.  For now I halt on the end element of a
passed in element name.

import java.io.*; 

import org.xml.sax.helpers.XMLFilterImpl; 
import org.xml.sax.Attributes; 
import org.xml.sax.SAXException; 
import org.exolab.castor.xml.Marshaller; 
import org.exolab.castor.xml.MarshalException; 
import org.exolab.castor.xml.ValidationException; 

/** 
 * An abstract class for streaming marshalled data to a writer 
 * rather than building an object hierarchy in memory then marshalling. 
 * The approach taken is that you supply a minimal hierarchy that 
 * only goes to the depth just above where the objects you want to marshal 
 * are found.  When the Marshaller reaches this location it breaks 
 * and allows you to marshall all the objects you want (i.e. one by one
hopefully 
 * so you can save memory.) 
 * 
 * User: KallenM 
 * Date: Jul 14, 2004 
 * Time: 3:58:58 PM 
 */ 
public abstract class StreamedMarshaller 
{ 
    /** 
     * Constructor 
     * @param writer  Where all data will be written 
     * @param elementName *Important* the element name to stop and allow
streaming at 
     * @throws Exception 
     */ 
    public StreamedMarshaller(Writer writer, String elementName) throws
Exception 
    { 
        m_marshaller = new Marshaller(new LocalXMLFilter(writer)); 
        m_marshaller.setValidation(false); // We cant validate at this point
because of the 
        m_elementName = elementName; // minimal hierarchy.  Validate later
when we stream data 
    } 

    /** 
     * Marshall your minimal hierarchy 
     * 
     * @param obj the root of the minimal hierarhcy 
     * @throws MarshalException 
     * @throws ValidationException 
     */ 
    public void marshal(Object obj) throws MarshalException,
ValidationException 
    { 
        m_marshaller.marshal(obj); 
    } 

    /** 
     * Begin streaming your data here.  Use the getMarshaller() method to
marshall 
     * your sub objects 
     * 
     * @param writer the writer 
     * @throws IOException 
     * @throws MarshalException 
     * @throws ValidationException 
     */ 
    public abstract void streamData(Writer writer) throws
IOException,MarshalException,ValidationException; 

    protected Marshaller  getMarshaller() 
    { 
        return m_marshaller; 
    } 

    private Marshaller  m_marshaller; 
    private String      m_elementName; 
    private static final String DEFAULT_HEADER_STR = "<?xml version=\"1.0\"
encoding=\"UTF-8\"?>\n"; 

    private class LocalXMLFilter  extends XMLFilterImpl 
    { 
        private Writer m_writer; 

        public LocalXMLFilter(Writer writer) throws IOException 
        { 
            m_writer = writer; 

            // Can't write this during startDocument since it will get
called 
            // more than once 
            m_writer.write(DEFAULT_HEADER_STR); 
        } 

        public void startElement(String namespaceURI, String localName, 
           String qName, Attributes atts) throws SAXException 
        { 
            super.startElement(namespaceURI, localName, qName,atts); 

            try 
            { 
                m_writer.write('<'); 
                m_writer.write(qName); 

                for(int i=0; i<atts.getLength(); ++i) 
                { 
                    m_writer.write(atts.getQName(i)); 
                    m_writer.write("=\""); 
                    m_writer.write(atts.getValue(i)); 
                    m_writer.write("\""); 
                } 

                m_writer.write('>'); 
            } 
            catch(IOException io) 
            { 
                throw new SAXException(io); 
            } 
        } 

        public void endElement (String uri, String localName, String qName)
throws SAXException 
        { 
            super.endElement(uri, localName, qName); 

            try 
            { 
                if(
m_elementName.toLowerCase().compareTo(qName.toLowerCase()) == 0 ) 
                    streamData(m_writer); 
                m_writer.write("</"); 
                m_writer.write(qName); 
                m_writer.write('>'); 
            } 
            catch(Exception ex) 
            { 
                throw new SAXException(ex); 
            } 
        } 

        public void characters (char ch[], int start, int length) throws
SAXException 
        { 
            super.characters(ch, start, length); 

            try 
            { 
                m_writer.write(ch,start,length); 
            } 
            catch(IOException io) 
            { 
                throw new SAXException(io); 
            } 
        } 
    } 
} 

public class ProductMarshaller extends StreamedMarshaller 
{ 
    public static void main (String args[])   throws Exception 
    { 
        OutputStreamWriter writer = new OutputStreamWriter(new
FileOutputStream("c:\\test.xml")); 

        ProductMarshaller l = new ProductMarshaller(writer,"Transmission"); 
        l.marshal(new Transmission()); 
        writer.flush(); 
    } 

    public ProductMarshaller(Writer writer, String className) throws
Exception 
    { 
        super(writer,className); 
    } 

    public void streamData(Writer writer) throws
IOException,MarshalException,ValidationException 
    { 
        getMarshaller().setValidation(true); 

        for(int i=0; i < 10; ++i) 
        { 
                Product container = new Product(); 
   container.setCaption("Test"); 
   container.setDescription("Test"); 
   container.setKeywords("Test"); 

   getMarshaller().marshal(container); 
        } 
    } 
} 

> -----Original Message-----
> From: Keith Visco [mailto:[EMAIL PROTECTED]
> Sent: Wednesday, July 14, 2004 10:17 PM
> To: '[EMAIL PROTECTED]'
> Subject: Re: [castor-user] Handling very large XML files
> 
> 
> 
> 
> Kallen,
> 
> This would be a good candidate to post in the Castor XML Examples
> section of the new message board (http://castor.exolab.org/phpBB2/).
> 
> When you get an opportunity, it would be good if you could post it
> there.
> 
> Thanks,
> 
> --Keith
> 
> Kallen McInerney wrote:
> > 
> > This message is for those of you coming up against
> > "out of memory" issues whilst unmarshalling large XML files.
> > 
> > I've written a class that will stream the objects
> > to you as they are parsed getting rid of the root
> > hierarchy altogether.
> > 
> > For testing I've parsed a 500MB XML file
> > and watched as the VM uses less than a 1MB
> > of RAM to process it (took 2 1/2 minutes).
> > 
> > It use isnt constrained to large files either.
> > In fact it's an efficient way to process all your
> > castor hierarchies whilst getting just the objects
> > you want.
> > 
> > I've included an abstract class and a usage of the
> > abstract class below so you can see how it works
> > 
> > import org.xml.sax.helpers.XMLReaderFactory;
> > import org.xml.sax.helpers.XMLFilterImpl;
> > import org.xml.sax.InputSource;
> > import org.xml.sax.Attributes;
> > import org.xml.sax.SAXException;
> > import org.exolab.castor.xml.Unmarshaller;
> > import org.exolab.castor.xml.UnmarshalHandler;
> > import org.exolab.castor.xml.UnmarshalListener;
> > 
> > import java.io.Reader;
> > import java.io.IOException;
> > 
> > /**
> >  * An abstract class for unmarshalling large XML files.  
> Castor's default
> >  * behaviour is to return an object hierarchy which with 
> large XML files
> >  * can use too much memory.  The approach used here is to 
> filter over
> >  * objects we're not interested in and just receive the 
> objects we want
> >  * via a callback as its unmarshalled.  Thus only using 
> enough memory to
> >  * store the current unmarshalled object.
> >  *
> >  * Credit goes to Andre Vanha [EMAIL PROTECTED] for 
> commentary he
> >  * posted to the Castor Dev List on handling large files.
> >  *
> >  * http://www.mail-archive.com/[EMAIL PROTECTED]/msg16052.html
> >  *
> >  * User: KallenM
> >  * Date: Jul 13, 2004
> >  * Time: 11:14:45 AM
> >  */
> > public abstract class FilteredUnmarshaller
> > {
> >     /**
> >      * Constructor
> >      * @param target_class  the top level class you are 
> filtering for
> >      * @param reader        a reader to the data
> >      * @param validate      validate the unmarshalled objects
> >      * @throws SAXException
> >      */
> >     public FilteredUnmarshaller(Class target_class, Reader 
> reader, boolean
> > validate) throws SAXException
> >     {
> >         m_targetClass = target_class;
> >         Unmarshaller unmarshaller = new Unmarshaller(m_targetClass);
> >         unmarshaller.setValidation(validate);
> >         UnmarshalHandler handler = unmarshaller.createHandler();
> > 
> >         LocalUnmarshallListener listener = new 
> LocalUnmarshallListener();
> >         handler.setUnmarshalListener(listener);
> > 
> >         m_xr = new LocalXMLFilter();
> >         m_xr.setParent(XMLReaderFactory.createXMLReader());
> >         m_xr.setContentHandler(handler);
> >         m_xr.setErrorHandler(handler);
> >         m_reader = reader;
> >     }
> > 
> >     /**
> >      * Begin parsing the data
> >      *
> >      * @throws SAXException
> >      * @throws IOException
> >      */
> >     public void parse() throws SAXException, IOException
> >     {
> >         m_xr.parse(new InputSource(m_reader));
> >     }
> > 
> >     /**
> >      * Allow overriding classes to ignore certain elements 
> in the XML file.
> >      * Typically you will want to override all elements 
> above the target
> > element
> >      * supplied to the constructor
> >      *
> >      * @param element_name the current element being handled by
> > start\endElement
> >      * @return true if this element should be ignored
> >      */
> >     public abstract boolean ignoreElement(String element_name);
> > 
> >     /**
> >      * Called during unmarshalling when your target class has been
> > encountered
> >      * @param target_class_object an object of the target class type
> >      */
> >     public abstract void    onElement(Object target_class_object);
> > 
> >     private Class               m_targetClass = null;
> >     private LocalXMLFilter      m_xr;
> >     private Reader              m_reader;
> > 
> >     private class LocalXMLFilter  extends XMLFilterImpl
> >     {
> >         public void startElement(String namespaceURI, 
> String localName,
> >            String qName, Attributes atts) throws SAXException
> >         {
> >             if( !ignoreElement(qName) )
> >                 super.startElement(namespaceURI, localName, 
> qName,atts);
> >         }
> > 
> >         public void endElement (String uri, String 
> localName, String qName)
> > throws SAXException
> >         {
> >             if( !ignoreElement(qName) )
> >                 super.endElement(uri, localName, qName);
> >         }
> >     }
> > 
> >     private class LocalUnmarshallListener implements 
> UnmarshalListener
> >     {
> >         public void initialized (Object object)
> >         {
> >         }
> > 
> >         public void attributesProcessed(Object object)
> >         {
> > 
> >         }
> > 
> >         public void fieldAdded (String fieldName, Object 
> parent, Object
> > child)
> >         {
> > 
> >         }
> > 
> >         public void unmarshalled (Object obj)
> >         {
> >             if( obj.getClass().equals(m_targetClass) )
> >                 onElement(obj);
> >         }
> >     }
> > }
> > 
> > public class MessageUnmarshaller extends FilteredUnmarshaller
> > {
> >     public static void main (String args[])     throws Exception
> >     {
> >         long start = System.currentTimeMillis();
> >         AmazonMessageUnmarshaller l =new
> > AmazonMessageUnmarshaller(Message.class,new 
> FileReader(args[0]),false);
> >         l.parse();
> >         System.out.println((System.currentTimeMillis() - 
> start) / 1000);
> >     }
> > 
> >     public MessageUnmarshaller(Class target_class, Reader 
> reader, boolean
> > validate) throws SAXException
> >     {
> >         super(target_class,reader,validate);
> >     }
> > 
> >     public boolean ignoreElement(String element_name)
> >     {
> >         if( element_name.compareTo("MyEnvelope") == 0  ||
> >             element_name.compareTo("MyHeader") == 0  )
> >                 return true;
> >         else
> >             return false;
> >     }
> > 
> >     public void onElement(Object target_class_object)
> >     {
> >         // Will only recieve Message objects
> >     }
> > }
> > 
> > -----------------------------------------------------------
> > If you wish to unsubscribe from this mailing, send mail to
> > [EMAIL PROTECTED] with a subject of:
> >         unsubscribe castor-user
> 
> 
> 
> ----------------------------------------------------------- 
> If you wish to unsubscribe from this mailing, send mail to
> [EMAIL PROTECTED] with a subject of:
>         unsubscribe castor-user
> 



----------------------------------------------------------- 
If you wish to unsubscribe from this mailing, send mail to
[EMAIL PROTECTED] with a subject of:
        unsubscribe castor-user

Reply via email to