
package com;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;

import org.apache.xerces.parsers.SAXParser;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class TestXerces {
	public static void main(String[] args) throws SAXException, IOException{
        try{

		StringBuffer b = new StringBuffer("<xml>");
		for(int i=4;i<2047;i++)
			b.append(" ");
		// add surrogate pair
		b.append("\uD800");
		b.append("\uDC00");
		b.append("...</xml>");

		byte[] input = b.toString().getBytes("utf-8");

		System.out.println("utf-8 4-byte start: "+Integer.toBinaryString(input[2048]));

		ByteArrayInputStream in = new ByteArrayInputStream(input);

		SAXParser p = new SAXParser();
		p.setContentHandler(new Handler());
		p.parse(new InputSource(in));
        }catch(Exception e){
            e.printStackTrace();
        }
	}

	static class Handler extends DefaultHandler {

		@Override
		public void characters(char[] ch, int start, int length) throws SAXException {
			System.out.println("Got: ");
			for(int i=start;i<start+length;i++){
				System.out.println(Integer.toHexString(ch[i]));
			}
		}

	}
}
