Hrmmmm

One problem I'm coming up against with the streamed
marshaller is that characters arent being converted
to entities (i.e. & to &) where in castor is this
done so I can call it in my characters method?

> -----Original Message-----
> From: Kallen McInerney [mailto:[EMAIL PROTECTED]
> Sent: Thursday, July 15, 2004 10:43 AM
> To: '[EMAIL PROTECTED]'
> Subject: Re: [castor-user] Handling very large XML files
> 
> 
> 
> I've added the FilteredUnmarshaller and this quick and dirty
> StreamedMarshaller to the examples board
> 
> I also needed a way to stream during marshalling since 
> building the object
> hierarchy consumed too much memory. I'm not totally happy 
> with its design
> but maybe someone who knows the inner workings of castor 
> better than I could
> help out.  The tricky part was halting unmarshalling at the 
> correct spot so
> I could marshal out my objects.  For now I halt on the end 
> element of a
> passed in element name.
> 
> import java.io.*; 
> 
> import org.xml.sax.helpers.XMLFilterImpl; 
> import org.xml.sax.Attributes; 
> import org.xml.sax.SAXException; 
> import org.exolab.castor.xml.Marshaller; 
> import org.exolab.castor.xml.MarshalException; 
> import org.exolab.castor.xml.ValidationException; 
> 
> /** 
>  * An abstract class for streaming marshalled data to a writer 
>  * rather than building an object hierarchy in memory then 
> marshalling. 
>  * The approach taken is that you supply a minimal hierarchy that 
>  * only goes to the depth just above where the objects you 
> want to marshal 
>  * are found.  When the Marshaller reaches this location it breaks 
>  * and allows you to marshall all the objects you want (i.e. 
> one by one
> hopefully 
>  * so you can save memory.) 
>  * 
>  * User: KallenM 
>  * Date: Jul 14, 2004 
>  * Time: 3:58:58 PM 
>  */ 
> public abstract class StreamedMarshaller 
> { 
>     /** 
>      * Constructor 
>      * @param writer  Where all data will be written 
>      * @param elementName *Important* the element name to 
> stop and allow
> streaming at 
>      * @throws Exception 
>      */ 
>     public StreamedMarshaller(Writer writer, String 
> elementName) throws
> Exception 
>     { 
>         m_marshaller = new Marshaller(new LocalXMLFilter(writer)); 
>         m_marshaller.setValidation(false); // We cant 
> validate at this point
> because of the 
>         m_elementName = elementName; // minimal hierarchy.  
> Validate later
> when we stream data 
>     } 
> 
>     /** 
>      * Marshall your minimal hierarchy 
>      * 
>      * @param obj the root of the minimal hierarhcy 
>      * @throws MarshalException 
>      * @throws ValidationException 
>      */ 
>     public void marshal(Object obj) throws MarshalException,
> ValidationException 
>     { 
>         m_marshaller.marshal(obj); 
>     } 
> 
>     /** 
>      * Begin streaming your data here.  Use the 
> getMarshaller() method to
> marshall 
>      * your sub objects 
>      * 
>      * @param writer the writer 
>      * @throws IOException 
>      * @throws MarshalException 
>      * @throws ValidationException 
>      */ 
>     public abstract void streamData(Writer writer) throws
> IOException,MarshalException,ValidationException; 
> 
>     protected Marshaller  getMarshaller() 
>     { 
>         return m_marshaller; 
>     } 
> 
>     private Marshaller  m_marshaller; 
>     private String      m_elementName; 
>     private static final String DEFAULT_HEADER_STR = "<?xml 
> version=\"1.0\"
> encoding=\"UTF-8\"?>\n"; 
> 
>     private class LocalXMLFilter  extends XMLFilterImpl 
>     { 
>         private Writer m_writer; 
> 
>         public LocalXMLFilter(Writer writer) throws IOException 
>         { 
>             m_writer = writer; 
> 
>             // Can't write this during startDocument since it will get
> called 
>             // more than once 
>             m_writer.write(DEFAULT_HEADER_STR); 
>         } 
> 
>         public void startElement(String namespaceURI, String 
> localName, 
>            String qName, Attributes atts) throws SAXException 
>         { 
>             super.startElement(namespaceURI, localName, qName,atts); 
> 
>             try 
>             { 
>                 m_writer.write('<'); 
>                 m_writer.write(qName); 
> 
>                 for(int i=0; i<atts.getLength(); ++i) 
>                 { 
>                     m_writer.write(atts.getQName(i)); 
>                     m_writer.write("=\""); 
>                     m_writer.write(atts.getValue(i)); 
>                     m_writer.write("\""); 
>                 } 
> 
>                 m_writer.write('>'); 
>             } 
>             catch(IOException io) 
>             { 
>                 throw new SAXException(io); 
>             } 
>         } 
> 
>         public void endElement (String uri, String localName, 
> String qName)
> throws SAXException 
>         { 
>             super.endElement(uri, localName, qName); 
> 
>             try 
>             { 
>                 if(
> m_elementName.toLowerCase().compareTo(qName.toLowerCase()) == 0 ) 
>                     streamData(m_writer); 
>                 m_writer.write("</"); 
>                 m_writer.write(qName); 
>                 m_writer.write('>'); 
>             } 
>             catch(Exception ex) 
>             { 
>                 throw new SAXException(ex); 
>             } 
>         } 
> 
>         public void characters (char ch[], int start, int 
> length) throws
> SAXException 
>         { 
>             super.characters(ch, start, length); 
> 
>             try 
>             { 
>                 m_writer.write(ch,start,length); 
>             } 
>             catch(IOException io) 
>             { 
>                 throw new SAXException(io); 
>             } 
>         } 
>     } 
> } 
> 
> public class ProductMarshaller extends StreamedMarshaller 
> { 
>     public static void main (String args[])   throws Exception 
>     { 
>         OutputStreamWriter writer = new OutputStreamWriter(new
> FileOutputStream("c:\\test.xml")); 
> 
>         ProductMarshaller l = new 
> ProductMarshaller(writer,"Transmission"); 
>         l.marshal(new Transmission()); 
>         writer.flush(); 
>     } 
> 
>     public ProductMarshaller(Writer writer, String className) throws
> Exception 
>     { 
>         super(writer,className); 
>     } 
> 
>     public void streamData(Writer writer) throws
> IOException,MarshalException,ValidationException 
>     { 
>         getMarshaller().setValidation(true); 
> 
>         for(int i=0; i < 10; ++i) 
>         { 
>                 Product container = new Product(); 
>    container.setCaption("Test"); 
>    container.setDescription("Test"); 
>    container.setKeywords("Test"); 
> 
>    getMarshaller().marshal(container); 
>         } 
>     } 
> } 
> 
> > -----Original Message-----
> > From: Keith Visco [mailto:[EMAIL PROTECTED]
> > Sent: Wednesday, July 14, 2004 10:17 PM
> > To: '[EMAIL PROTECTED]'
> > Subject: Re: [castor-user] Handling very large XML files
> > 
> > 
> > 
> > 
> > Kallen,
> > 
> > This would be a good candidate to post in the Castor XML Examples
> > section of the new message board (http://castor.exolab.org/phpBB2/).
> > 
> > When you get an opportunity, it would be good if you could post it
> > there.
> > 
> > Thanks,
> > 
> > --Keith
> > 
> > Kallen McInerney wrote:
> > > 
> > > This message is for those of you coming up against
> > > "out of memory" issues whilst unmarshalling large XML files.
> > > 
> > > I've written a class that will stream the objects
> > > to you as they are parsed getting rid of the root
> > > hierarchy altogether.
> > > 
> > > For testing I've parsed a 500MB XML file
> > > and watched as the VM uses less than a 1MB
> > > of RAM to process it (took 2 1/2 minutes).
> > > 
> > > It use isnt constrained to large files either.
> > > In fact it's an efficient way to process all your
> > > castor hierarchies whilst getting just the objects
> > > you want.
> > > 
> > > I've included an abstract class and a usage of the
> > > abstract class below so you can see how it works
> > > 
> > > import org.xml.sax.helpers.XMLReaderFactory;
> > > import org.xml.sax.helpers.XMLFilterImpl;
> > > import org.xml.sax.InputSource;
> > > import org.xml.sax.Attributes;
> > > import org.xml.sax.SAXException;
> > > import org.exolab.castor.xml.Unmarshaller;
> > > import org.exolab.castor.xml.UnmarshalHandler;
> > > import org.exolab.castor.xml.UnmarshalListener;
> > > 
> > > import java.io.Reader;
> > > import java.io.IOException;
> > > 
> > > /**
> > >  * An abstract class for unmarshalling large XML files.  
> > Castor's default
> > >  * behaviour is to return an object hierarchy which with 
> > large XML files
> > >  * can use too much memory.  The approach used here is to 
> > filter over
> > >  * objects we're not interested in and just receive the 
> > objects we want
> > >  * via a callback as its unmarshalled.  Thus only using 
> > enough memory to
> > >  * store the current unmarshalled object.
> > >  *
> > >  * Credit goes to Andre Vanha [EMAIL PROTECTED] for 
> > commentary he
> > >  * posted to the Castor Dev List on handling large files.
> > >  *
> > >  * http://www.mail-archive.com/[EMAIL PROTECTED]/msg16052.html
> > >  *
> > >  * User: KallenM
> > >  * Date: Jul 13, 2004
> > >  * Time: 11:14:45 AM
> > >  */
> > > public abstract class FilteredUnmarshaller
> > > {
> > >     /**
> > >      * Constructor
> > >      * @param target_class  the top level class you are 
> > filtering for
> > >      * @param reader        a reader to the data
> > >      * @param validate      validate the unmarshalled objects
> > >      * @throws SAXException
> > >      */
> > >     public FilteredUnmarshaller(Class target_class, Reader 
> > reader, boolean
> > > validate) throws SAXException
> > >     {
> > >         m_targetClass = target_class;
> > >         Unmarshaller unmarshaller = new 
> Unmarshaller(m_targetClass);
> > >         unmarshaller.setValidation(validate);
> > >         UnmarshalHandler handler = unmarshaller.createHandler();
> > > 
> > >         LocalUnmarshallListener listener = new 
> > LocalUnmarshallListener();
> > >         handler.setUnmarshalListener(listener);
> > > 
> > >         m_xr = new LocalXMLFilter();
> > >         m_xr.setParent(XMLReaderFactory.createXMLReader());
> > >         m_xr.setContentHandler(handler);
> > >         m_xr.setErrorHandler(handler);
> > >         m_reader = reader;
> > >     }
> > > 
> > >     /**
> > >      * Begin parsing the data
> > >      *
> > >      * @throws SAXException
> > >      * @throws IOException
> > >      */
> > >     public void parse() throws SAXException, IOException
> > >     {
> > >         m_xr.parse(new InputSource(m_reader));
> > >     }
> > > 
> > >     /**
> > >      * Allow overriding classes to ignore certain elements 
> > in the XML file.
> > >      * Typically you will want to override all elements 
> > above the target
> > > element
> > >      * supplied to the constructor
> > >      *
> > >      * @param element_name the current element being handled by
> > > start\endElement
> > >      * @return true if this element should be ignored
> > >      */
> > >     public abstract boolean ignoreElement(String element_name);
> > > 
> > >     /**
> > >      * Called during unmarshalling when your target class has been
> > > encountered
> > >      * @param target_class_object an object of the target 
> class type
> > >      */
> > >     public abstract void    onElement(Object target_class_object);
> > > 
> > >     private Class               m_targetClass = null;
> > >     private LocalXMLFilter      m_xr;
> > >     private Reader              m_reader;
> > > 
> > >     private class LocalXMLFilter  extends XMLFilterImpl
> > >     {
> > >         public void startElement(String namespaceURI, 
> > String localName,
> > >            String qName, Attributes atts) throws SAXException
> > >         {
> > >             if( !ignoreElement(qName) )
> > >                 super.startElement(namespaceURI, localName, 
> > qName,atts);
> > >         }
> > > 
> > >         public void endElement (String uri, String 
> > localName, String qName)
> > > throws SAXException
> > >         {
> > >             if( !ignoreElement(qName) )
> > >                 super.endElement(uri, localName, qName);
> > >         }
> > >     }
> > > 
> > >     private class LocalUnmarshallListener implements 
> > UnmarshalListener
> > >     {
> > >         public void initialized (Object object)
> > >         {
> > >         }
> > > 
> > >         public void attributesProcessed(Object object)
> > >         {
> > > 
> > >         }
> > > 
> > >         public void fieldAdded (String fieldName, Object 
> > parent, Object
> > > child)
> > >         {
> > > 
> > >         }
> > > 
> > >         public void unmarshalled (Object obj)
> > >         {
> > >             if( obj.getClass().equals(m_targetClass) )
> > >                 onElement(obj);
> > >         }
> > >     }
> > > }
> > > 
> > > public class MessageUnmarshaller extends FilteredUnmarshaller
> > > {
> > >     public static void main (String args[])     throws Exception
> > >     {
> > >         long start = System.currentTimeMillis();
> > >         AmazonMessageUnmarshaller l =new
> > > AmazonMessageUnmarshaller(Message.class,new 
> > FileReader(args[0]),false);
> > >         l.parse();
> > >         System.out.println((System.currentTimeMillis() - 
> > start) / 1000);
> > >     }
> > > 
> > >     public MessageUnmarshaller(Class target_class, Reader 
> > reader, boolean
> > > validate) throws SAXException
> > >     {
> > >         super(target_class,reader,validate);
> > >     }
> > > 
> > >     public boolean ignoreElement(String element_name)
> > >     {
> > >         if( element_name.compareTo("MyEnvelope") == 0  ||
> > >             element_name.compareTo("MyHeader") == 0  )
> > >                 return true;
> > >         else
> > >             return false;
> > >     }
> > > 
> > >     public void onElement(Object target_class_object)
> > >     {
> > >         // Will only recieve Message objects
> > >     }
> > > }
> > > 
> > > -----------------------------------------------------------
> > > If you wish to unsubscribe from this mailing, send mail to
> > > [EMAIL PROTECTED] with a subject of:
> > >         unsubscribe castor-user
> > 
> > 
> > 
> > ----------------------------------------------------------- 
> > If you wish to unsubscribe from this mailing, send mail to
> > [EMAIL PROTECTED] with a subject of:
> >         unsubscribe castor-user
> > 
> 
> 
> 
> ----------------------------------------------------------- 
> If you wish to unsubscribe from this mailing, send mail to
> [EMAIL PROTECTED] with a subject of:
>         unsubscribe castor-user
> 



----------------------------------------------------------- 
If you wish to unsubscribe from this mailing, send mail to
[EMAIL PROTECTED] with a subject of:
        unsubscribe castor-user

Reply via email to