neeraj 2002/08/09 02:36:58 Modified: java/src/org/apache/xerces/impl Tag: jaxp-ri-1_2_0-fcs-branch XMLEntityManager.java XMLEntityScanner.java XMLDocumentFragmentScannerImpl.java java/src/org/apache/xerces/util Tag: jaxp-ri-1_2_0-fcs-branch SymbolTable.java Log: First set of performance updates. 1. In scanners considerable amount of the time is consumed on checks done for XML characters, we can optimize on it by predicting the end element and skipping the string in buffer and avoid the checks done for endElement, 2. we will also avoid symbol table lookup i 3. Increase the default buffer size from 2KB to 8KB, makes difference for big size files. We can also define feature for it so that it can be controlled by application. 4. Increase the number of buckets in SymbolTable, we dont loose much on memory but performance gain of around 4% for xml files having large vocabulary. A function for rehashing the symbol table is to be written, which can dynamically increase the bucket size, still to do some performance tests on this. 5. Using different buffer size for Internal and External Entity Declarations. Revision Changes Path No revision No revision 1.25.2.1 +163 -49 xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java Index: XMLEntityManager.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java,v retrieving revision 1.25 retrieving revision 1.25.2.1 diff -u -r1.25 -r1.25.2.1 --- XMLEntityManager.java 31 Jan 2002 15:17:56 -0000 1.25 +++ XMLEntityManager.java 9 Aug 2002 09:36:58 -0000 1.25.2.1 @@ -75,6 +75,7 @@ import org.apache.xerces.impl.io.UCSReader; import org.apache.xerces.impl.io.UTF8Reader; import org.apache.xerces.impl.msg.XMLMessageFormatter; +import org.apache.xerces.impl.validation.ValidationManager; import org.apache.xerces.util.EncodingMap; import org.apache.xerces.util.SymbolTable; @@ -124,8 +125,13 @@ // Constants // - /** Default buffer size (2048). */ - public static final int DEFAULT_BUFFER_SIZE = 2048; + /** Default buffer size (8192). */ + public static final int DEFAULT_BUFFER_SIZE = 8192; + + /** Default buffer size before we've finished with the XMLDecl: */ + public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; + + public static final int INTERNAL_ENTITY_BUFFER_SIZE = 4096; // feature identifiers @@ -166,14 +172,14 @@ VALIDATION, EXTERNAL_GENERAL_ENTITIES, EXTERNAL_PARAMETER_ENTITIES, - ALLOW_JAVA_ENCODINGS, + ALLOW_JAVA_ENCODINGS }; /** Recognized properties. */ private static final String[] RECOGNIZED_PROPERTIES = { SYMBOL_TABLE, ERROR_REPORTER, - ENTITY_RESOLVER, + ENTITY_RESOLVER }; // debugging @@ -364,6 +370,7 @@ Entity entity = new InternalEntity(name, text); fEntities.put(name, entity); } + } // addInternalEntity(String,String) /** @@ -407,10 +414,11 @@ } } } - Entity entity = new ExternalEntity(name, + Entity entity = new ExternalEntity(name, new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandSystemId(literalSystemId, baseSystemId)), null); fEntities.put(name, entity); } + } // addExternalEntity(String,String,String,String) /** @@ -452,6 +460,7 @@ Entity entity = new ExternalEntity(name, new XMLResourceIdentifierImpl(publicId, systemId, baseSystemId, null), notation); fEntities.put(name, entity); } + } // addUnparsedEntity(String,String,String,String) /** @@ -528,13 +537,20 @@ } if (needExpand) expandedSystemId = expandSystemId(literalSystemId, baseSystemId); - + // give the entity resolver a chance XMLInputSource xmlInputSource = null; if (fEntityResolver != null) { - fResourceIdentifier.clear(); - fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); - xmlInputSource = fEntityResolver.resolveEntity(fResourceIdentifier); + XMLResourceIdentifierImpl ri = null; + if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { + ri = (XMLResourceIdentifierImpl)resourceIdentifier; + } + else { + fResourceIdentifier.clear(); + ri = fResourceIdentifier; + } + ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); + xmlInputSource = fEntityResolver.resolveEntity(ri); } // do default resolution @@ -585,6 +601,7 @@ // should we skip external entities? boolean external = entity.isExternal(); + if (external) { boolean unparsed = entity.isUnparsed(); boolean parameter = entityName.startsWith("%"); @@ -594,18 +611,16 @@ if (fEntityHandler != null) { fResourceIdentifier.clear(); final String encoding = null; - if (external) { - ExternalEntity externalEntity = (ExternalEntity)entity; - //REVISIT: since we're storing expandedSystemId in the - // externalEntity, how could this have got here if it wasn't already - // expanded??? - neilg - String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); - String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); - String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); - fResourceIdentifier.setValues( - (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), - extLitSysId, extBaseSysId, expandedSystemId); - } + ExternalEntity externalEntity = (ExternalEntity)entity; + //REVISIT: since we're storing expandedSystemId in the + // externalEntity, how could this have got here if it wasn't already + // expanded??? - neilg + String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); + String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); + String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); + fResourceIdentifier.setValues( + (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), + extLitSysId, extBaseSysId, expandedSystemId); fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding); fEntityHandler.endEntity(entityName); } @@ -637,8 +652,8 @@ if (external) { ExternalEntity externalEntity = (ExternalEntity)entity; // REVISIT: for the same reason above... - String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); - String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); + String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); + String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); fResourceIdentifier.setValues( (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), @@ -805,16 +820,18 @@ // we've seen a new Reader. put it in a list, so that // we can close it later. fOwnReaders.addElement(reader); - + // push entity on stack if (fCurrentEntity != null) { fEntityStack.push(fCurrentEntity); } + int bufferSize = isExternal ? fBufferSize : INTERNAL_ENTITY_BUFFER_SIZE ; + //int bufferSize = fBufferSize ; // create entity - fCurrentEntity = new ScannedEntity(name, + fCurrentEntity = new ScannedEntity(name, new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId), - stream, reader, encoding, literal, false, isExternal); + stream, reader, encoding, literal, false, isExternal, bufferSize); // call handler if (fEntityHandler != null) { @@ -831,7 +848,7 @@ // a list of Readers ever seen protected Vector fOwnReaders = new Vector(); - + /** * Close all opened InputStreams and Readers opened by this parser. */ @@ -847,7 +864,7 @@ // and clear the list fOwnReaders.removeAllElements(); } - + // // XMLComponent methods // @@ -906,6 +923,7 @@ catch (XMLConfigurationException e) { fEntityResolver = null; } + // initialize state fStandalone = false; fEntities.clear(); @@ -1180,7 +1198,7 @@ * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. - * @return a 2-element array: the first element, an IANA-encoding string, + * @return a 2-element array: the first element, an IANA-encoding string, * the second element a Boolean which is true iff the document is big endian, false * if it's little-endian, and null if the distinction isn't relevant. */ @@ -1273,7 +1291,7 @@ * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot - * specify a byte order, this tells whether the order is bigEndian. null menas + * specify a byte order, this tells whether the order is bigEndian. null menas * unknown or not relevant. * * @return Returns a reader. @@ -1718,8 +1736,10 @@ // buffer + int bufferSize; + /** Character buffer. */ - public char[] ch = new char[fBufferSize]; + public char[] ch = null ; /** Position in character buffer. */ public int position; @@ -1727,7 +1747,7 @@ /** Count of characters in buffer. */ public int count; - // to allow the reader/nputStream to behave efficiently: + // to allow the reader/inputStream to behave efficiently: public boolean mayReadChunks; // @@ -1738,7 +1758,7 @@ public ScannedEntity(String name, XMLResourceIdentifier entityLocation, InputStream stream, Reader reader, - String encoding, boolean literal, boolean mayReadChunks, boolean isExternal) { + String encoding, boolean literal, boolean mayReadChunks, boolean isExternal, int bufferSize) { super(name); this.entityLocation = entityLocation; this.stream = stream; @@ -1747,6 +1767,8 @@ this.literal = literal; this.mayReadChunks = mayReadChunks; this.isExternal = isExternal; + this.bufferSize = bufferSize ; + ch = new char[this.bufferSize]; } // <init>(StringXMLResourceIdentifier,InputStream,Reader,String,boolean, boolean) // @@ -1779,6 +1801,13 @@ } // toString():String + public void setBufferSize(int size){ + bufferSize = size ; + }//setBufferSize + + public int getBufferSize(){ + return bufferSize ; + }//getBufferSize } // class ScannedEntity /** @@ -1875,8 +1904,10 @@ } //fCurrentEntity.stream.reset(); fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); + //we have got the reader now.. we should read in chunks.. + //fCurrentEntity.mayReadChunks = true ; } else { - if (DEBUG_ENCODINGS) + if (DEBUG_ENCODINGS) System.out.println("$$$ reusing old reader on stream"); } } @@ -2264,6 +2295,50 @@ } // scanQName(QName):boolean + //new method added... + /** + this method expects beginning of name ie coming after "</" + * + *@return Returns true if a qualified name appeared immediately on + * the input and was scanned, false otherwise. + */ + /** REVISIT: + public boolean scanEndElementName(char [] startElementName) throws IOException{ + + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + return isMatched(startElementName, getCharacters(startElementName.length) ) ; + + }//peekEndElementName(int length) + */ + + /** REVISIT + public char[] getCharacters(int length) throws IOException{ + + }//getCharacters(length) + */ + + public boolean scanEndElementName(String startElementName) throws IOException { + + return skipString(startElementName) ; + + } + + public boolean isMatched(char [] startElement, char [] endElement) { + if(startElement.length == endElement.length){ + for(int i = 0 ; i < startElement.length ; i++){ + if(startElement[i] != endElement[i]){ + return false; + } + } + } + return false; + + }//isMatched + /** * Scans a range of parsed character data, setting the fields of the * XMLString structure, appropriately. @@ -2923,6 +2998,35 @@ } // skipSpaces():boolean + public boolean skipCharacters(char [] chars) throws IOException{ + + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // skip string + final int length = chars.length ; + for (int i = 0; i < length; i++) { + char c = fCurrentEntity.ch[fCurrentEntity.position++]; + if (c != chars[i]) { + fCurrentEntity.position -= i + 1; + return false; + } + if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) { + System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1); + // REVISIT: Can a string to be skipped cross an + // entity boundary? -Ac + if (load(i + 1, false)) { + fCurrentEntity.position -= i + 1; + return false; + } + } + } + fCurrentEntity.columnNumber += length; + return true; + } + /** * Skips the specified string appearing immediately on the input. * <p> @@ -3158,6 +3262,7 @@ // Private methods // + /** * Loads a chunk of text. * @@ -3181,7 +3286,10 @@ } // read characters - int length = fCurrentEntity.ch.length - offset; + int length = fCurrentEntity.mayReadChunks? + (fCurrentEntity.ch.length - offset): + (DEFAULT_XMLDECL_BUFFER_SIZE); + if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); @@ -3219,7 +3327,7 @@ return entityChanged; - } // load(int):boolean + } // load(int, boolean):boolean } // class EntityScanner @@ -3231,7 +3339,7 @@ // methods. This means that, once we discover the true (declared) // encoding of a document, we can neither backtrack to read the // whole doc again nor start reading where we are with a new - // reader. + // reader. // // This class allows rewinding an inputStream by allowing a mark // to be set, and the stream reset to that position. <strong>The @@ -3252,7 +3360,7 @@ private int fOffset; private int fLength; private int fMark; - + public RewindableInputStream(InputStream is) { fData = new byte[DEFAULT_BUFFER_SIZE]; fInputStream = is; @@ -3266,11 +3374,11 @@ public void setStartOffset(int offset) { fStartOffset = offset; } - + public void rewind() { fOffset = fStartOffset; } - + public int read() throws IOException { int b = 0; if (fOffset < fLength) { @@ -3326,7 +3434,7 @@ fOffset += len; return len; } - + public long skip(long n) throws IOException { @@ -3350,21 +3458,27 @@ return bytesLeft; } n -= bytesLeft; + /* + * In a manner of speaking, when this class isn't permitting more + * than one byte at a time to be read, it is "blocking". The + * available() method should indicate how much can be read without + * blocking, so while we're in this mode, it should only indicate + * that bytes in its buffer are available; otherwise, the result of + * available() on the underlying InputStream is appropriate. + */ return fInputStream.skip(n) + bytesLeft; } - + public int available() throws IOException { int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } - return fInputStream.available(); - } - if (fLength == fEndOffset) { - return bytesLeft; + return fCurrentEntity.mayReadChunks ? fInputStream.available() + : 0; } - return fInputStream.available() + bytesLeft; + return bytesLeft; } public void mark(int howMuch) { @@ -3381,7 +3495,7 @@ public void close() throws IOException { if (fInputStream != null) { - fInputStream.close(); + fInputStream.close(); fInputStream = null; } } 1.4.2.1 +18 -16 xml-xerces/java/src/org/apache/xerces/impl/XMLEntityScanner.java Index: XMLEntityScanner.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityScanner.java,v retrieving revision 1.4 retrieving revision 1.4.2.1 diff -u -r1.4 -r1.4.2.1 --- XMLEntityScanner.java 29 Jan 2002 01:15:09 -0000 1.4 +++ XMLEntityScanner.java 9 Aug 2002 09:36:58 -0000 1.4.2.1 @@ -2,7 +2,7 @@ * The Apache Software License, Version 1.1 * * - * Copyright (c) 2000-2002 The Apache Software Foundation. All rights + * Copyright (c) 2000-2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -10,7 +10,7 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in @@ -18,7 +18,7 @@ * distribution. * * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: + * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, @@ -26,7 +26,7 @@ * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this - * software without prior written permission. For written + * software without prior written permission. For written * permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", @@ -67,7 +67,7 @@ /** * This class allows various parser scanners to scan basic XML constructs * from entities. This class works directly with the entity manager to - * provide this functionality. + * provide this functionality. * <p> * There is only one entity scanner and entity manager per parser. The * entity manager <em>could</em> implement the methods to perform entity @@ -88,7 +88,7 @@ // Public methods // - /** + /** * Returns the base system identifier of the currently scanned * entity, or null if none is available. */ @@ -97,10 +97,10 @@ /** * Sets the encoding of the scanner. This method is used by the * scanners if the XMLDecl or TextDecl line contains an encoding - * pseudo-attribute. + * pseudo-attribute. * <p> * <strong>Note:</strong> The underlying character reader on the - * current entity will be changed to accomodate the new encoding. + * current entity will be changed to accomodate the new encoding. * However, the new encoding is ignored if the current reader was * not constructed from an input stream (e.g. an external entity * that is resolved directly to the appropriate java.io.Reader @@ -108,13 +108,13 @@ * * @param encoding The IANA encoding name of the new encoding. * - * @throws IOException Thrown if the new encoding is not supported. + * @throws IOException Thrown if the new encoding is not supported. * * @see org.apache.xerces.util.EncodingMap * @see org.apache.xerces.util.XMLChar#isValidIANAEncoding * @see org.apache.xerces.util.XMLChar#isValidJavaEncoding */ - public abstract void setEncoding(String encoding) + public abstract void setEncoding(String encoding) throws IOException; /** Returns true if the current entity being scanned is external. */ @@ -140,6 +140,8 @@ */ public abstract int scanChar() throws IOException; + //public abstract boolean scanEndElementName(char [] startElementName) throws IOException ; + public abstract boolean scanEndElementName( String startElementName) throws IOException ; /** * Returns a string matching the NMTOKEN production appearing immediately * on the input as a symbol, or null if NMTOKEN Name string is present. @@ -174,7 +176,7 @@ * @see org.apache.xerces.util.XMLChar#isNameStart */ public abstract String scanName() throws IOException; - + /** * Scans a qualified name from the input, setting the fields of the * QName structure appropriately. @@ -255,11 +257,11 @@ * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ - public abstract int scanLiteral(int quote, XMLString content) + public abstract int scanLiteral(int quote, XMLString content) throws IOException; - + /** - * Scans a range of character data up to the specicied delimiter, + * Scans a range of character data up to the specicied delimiter, * setting the fields of the XMLString structure, appropriately. * <p> * <strong>Note:</strong> The characters are consumed. @@ -288,7 +290,7 @@ * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ - public abstract boolean scanData(String delimiter, XMLString data) + public abstract boolean scanData(String delimiter, XMLString data) throws IOException; /** 1.10.2.1 +133 -100 xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java Index: XMLDocumentFragmentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v retrieving revision 1.10 retrieving revision 1.10.2.1 diff -u -r1.10 -r1.10.2.1 --- XMLDocumentFragmentScannerImpl.java 29 Jan 2002 03:44:36 -0000 1.10 +++ XMLDocumentFragmentScannerImpl.java 9 Aug 2002 09:36:58 -0000 1.10.2.1 @@ -2,7 +2,7 @@ * The Apache Software License, Version 1.1 * * - * Copyright (c) 1999-2002 The Apache Software Foundation. + * Copyright (c) 1999-2002 The Apache Software Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -10,7 +10,7 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in @@ -18,7 +18,7 @@ * distribution. * * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: + * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, @@ -26,7 +26,7 @@ * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this - * software without prior written permission. For written + * software without prior written permission. For written * permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", @@ -88,7 +88,7 @@ /** * This class is responsible for scanning the structure and content - * of document fragments. The scanner acts as the source for the + * of document fragments. The scanner acts as the source for the * document information which is communicated to the document handler. * <p> * This component requires the following features and properties from the @@ -156,21 +156,21 @@ // feature identifiers /** Feature identifier: namespaces. */ - protected static final String NAMESPACES = + protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; /** Feature identifier: notify built-in refereces. */ protected static final String NOTIFY_BUILTIN_REFS = Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; - + // recognized features and properties /** Recognized features. */ private static final String[] RECOGNIZED_FEATURES = { - NAMESPACES, - VALIDATION, + NAMESPACES, + VALIDATION, NOTIFY_BUILTIN_REFS, - NOTIFY_CHAR_REFS, + NOTIFY_CHAR_REFS, }; /** Recognized properties. */ @@ -214,7 +214,7 @@ /** has external dtd */ protected boolean fHasExternalDTD; - + /** Standalone. */ protected boolean fStandalone; @@ -298,8 +298,8 @@ // XMLDocumentScanner methods // - /** - * Sets the input source. + /** + * Sets the input source. * * @param inputSource The input source. * @@ -311,7 +311,7 @@ fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); } // setInputSource(XMLInputSource) - /** + /** * Scans a document. * * @param complete True if the scanner should scan the document @@ -324,7 +324,7 @@ * * @returns True if there is more to scan, false otherwise. */ - public boolean scanDocument(boolean complete) + public boolean scanDocument(boolean complete) throws IOException, XNIException { // keep dispatching "events" @@ -348,13 +348,13 @@ * Resets the component. The component can query the component manager * about any features and properties that affect the operation of the * component. - * + * * @param componentManager The component manager. * * @throws SAXException Thrown by component on initialization error. * For example, if a feature or property is * required for the operation of the component, the - * component manager may throw a + * component manager may throw a * SAXNotRecognizedException or a * SAXNotSupportedException. */ @@ -396,7 +396,7 @@ // setup dispatcher setScannerState(SCANNER_STATE_CONTENT); setDispatcher(fContentDispatcher); - + } // reset(XMLComponentManager) /** @@ -410,11 +410,11 @@ /** * Sets the state of a feature. This method is called by the component - * manager any time after reset when a feature changes state. + * manager any time after reset when a feature changes state. * <p> * <strong>Note:</strong> Components should silently ignore features * that do not affect the operation of the component. - * + * * @param featureId The feature identifier. * @param state The state of the feature. * @@ -427,7 +427,7 @@ throws XMLConfigurationException { super.setFeature(featureId, state); - + // Xerces properties if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); @@ -444,16 +444,16 @@ * are recognized by this component. */ public String[] getRecognizedProperties() { - return RECOGNIZED_PROPERTIES; + return (String[])(RECOGNIZED_PROPERTIES.clone()); } // getRecognizedProperties():String[] /** * Sets the value of a property. This method is called by the component - * manager any time after reset when a property changes value. + * manager any time after reset when a property changes value. * <p> * <strong>Note:</strong> Components should silently ignore properties * that do not affect the operation of the component. - * + * * @param propertyId The property identifier. * @param value The value of the property. * @@ -464,7 +464,7 @@ */ public void setProperty(String propertyId, Object value) throws XMLConfigurationException { - + super.setProperty(propertyId, value); // Xerces properties @@ -484,8 +484,8 @@ /** * setDocumentHandler - * - * @param documentHandler + * + * @param documentHandler */ public void setDocumentHandler(XMLDocumentHandler documentHandler) { fDocumentHandler = documentHandler; @@ -499,7 +499,7 @@ * This method notifies of the start of an entity. The DTD has the * pseudo-name of "[dtd]" parameter entity names start with '%'; and * general entities are just specified by their name. - * + * * @param name The name of the entity. * @param identifier The resource identifier. * @param encoding The auto-detected IANA encoding name of the entity @@ -510,7 +510,7 @@ * * @throws XNIException Thrown by handler to signal an error. */ - public void startEntity(String name, + public void startEntity(String name, XMLResourceIdentifier identifier, String encoding) throws XNIException { @@ -535,9 +535,9 @@ /** * This method notifies the end of an entity. The DTD has the pseudo-name - * of "[dtd]" parameter entity names start with '%'; and general entities + * of "[dtd]" parameter entity names start with '%'; and general entities * are just specified by their name. - * + * * @param name The name of the entity. * * @throws XNIException Thrown by handler to signal an error. @@ -564,7 +564,7 @@ fDocumentHandler.endGeneralEntity(name, null); } } - + } // endEntity(String) // @@ -598,7 +598,7 @@ * be scanned instead of an XML * declaration. */ - protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) + protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) throws IOException, XNIException { // scan decl @@ -633,13 +633,13 @@ /** * Scans a processing data. This is needed to handle the situation - * where a document starts with a processing instruction whose + * where a document starts with a processing instruction whose * target name <em>starts with</em> "xml". (e.g. xmlfoo) * * @param target The PI target * @param data The string to fill in with the data */ - protected void scanPIData(String target, XMLString data) + protected void scanPIData(String target, XMLString data) throws IOException, XNIException { super.scanPIData(target, data); @@ -672,8 +672,8 @@ } } // scanComment() - - /** + + /** * Scans a start element. This method will handle the binding of * namespace information and notifying the handler of the start * of the element. @@ -681,7 +681,7 @@ * <pre> * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * [40] STag ::= '<' Name (S Attribute)* S? '>' - * </pre> + * </pre> * <p> * <strong>Note:</strong> This method assumes that the leading * '<' character has been consumed. @@ -694,7 +694,7 @@ * @returns True if element is empty. (i.e. It matches * production [44]. */ - protected boolean scanStartElement() + protected boolean scanStartElement() throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()"); @@ -708,6 +708,8 @@ } String rawname = fElementQName.rawname; + //if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement() " + rawname ); + // push element stack fCurrentElement = fElementStack.pushElement(fElementQName); @@ -746,7 +748,15 @@ if (fDocumentHandler != null) { if (empty) { fDocumentHandler.emptyElement(fElementQName, fAttributes, null); - handleEndElement(fElementQName, true); + //decrease the markup depth.. + fMarkupDepth--; + // check that this element was opened in the same entity + if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { + reportFatalError("ElementEntityMismatch", + new Object[]{fCurrentElement.rawname}); + } + //pop the element off the stack.. + fElementStack.popElement(fElementQName); } else { fDocumentHandler.startElement(fElementQName, fAttributes, null); @@ -758,14 +768,14 @@ } // scanStartElement():boolean - /** + /** * Scans an attribute. * <p> * <pre> * [41] Attribute ::= Name Eq AttValue - * </pre> + * </pre> * <p> - * <strong>Note:</strong> This method assumes that the next + * <strong>Note:</strong> This method assumes that the next * character on the stream is the first character of the attribute * name. * <p> @@ -775,7 +785,7 @@ * * @param attributes The attributes list for the scanned attribute. */ - protected void scanAttribute(XMLAttributes attributes) + protected void scanAttribute(XMLAttributes attributes) throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()"); @@ -792,8 +802,7 @@ fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('=')) { reportFatalError("EqRequiredInAttribute", - new Object[]{fCurrentElement.rawname, - fAttributeQName.rawname}); + new Object[]{fAttributeQName.rawname}); } fEntityScanner.skipSpaces(); @@ -808,7 +817,7 @@ fAttributeQName.rawname}); } //REVISIT: one more case needs to be included: external PE and standalone is no - boolean isVC = fHasExternalDTD && !fStandalone; + boolean isVC = fHasExternalDTD && !fStandalone; scanAttributeValue(fString, fString2, fAttributeQName.rawname, attributes, oldLen, isVC); @@ -872,8 +881,8 @@ } // scanContent():int - /** - * Scans a CDATA section. + /** + * Scans a CDATA section. * <p> * <strong>Note:</strong> This method uses the fString and * fStringBuffer variables. @@ -883,9 +892,9 @@ * * @return True if CDATA is completely scanned. */ - protected boolean scanCDATASection(boolean complete) + protected boolean scanCDATASection(boolean complete) throws IOException, XNIException { - + // call handler if (fDocumentHandler != null) { fDocumentHandler.startCDATA(null); @@ -965,29 +974,46 @@ protected int scanEndElement() throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()"); - // name - if (fNamespaces) { - if (!fEntityScanner.scanQName(fElementQName)) { - fElementQName.clear(); - } - } - else { - String name = fEntityScanner.scanName(); - fElementQName.setValues(null, name, name, null); + fElementStack.popElement(fElementQName) ; + + // Take advantage of the fact that next string _should_ be "fElementQName.rawName", + //In scanners most of the time is consumed on checks done for XML characters, we can + // optimize on it and avoid the checks done for endElement, + //we will also avoid symbol table lookup - [EMAIL PROTECTED] + + // this should work both for namespace processing true or false... + + //REVISIT: if the string is not the same as expected.. we need to do better error handling.. + //We can skip this for now... In any case if the string doesn't match -- document is not well formed. + if ( ! fEntityScanner.scanEndElementName( fElementQName.rawname ) ) { + reportFatalError("ETagRequired", + new Object[]{ fElementQName.rawname }); } + // end fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ETagUnterminated", new Object[]{fElementQName.rawname}); } + + //we have increased the depth for two markup "<" characters + fMarkupDepth--; fMarkupDepth--; - // handle end element - int depth = handleEndElement(fElementQName, false); - if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanEndElement(): "+depth); - return depth; + // check that this element was opened in the same entity + if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { + reportFatalError("ElementEntityMismatch", + new Object[]{fCurrentElement.rawname}); + } + + // call handler + if (fDocumentHandler != null ) { + fDocumentHandler.endElement(fElementQName, null); + } + + return fMarkupDepth; } // scanEndElement():int @@ -998,7 +1024,7 @@ * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' * </pre> */ - protected void scanCharReference() + protected void scanCharReference() throws IOException, XNIException { fStringBuffer2.clear(); @@ -1065,10 +1091,10 @@ //REVISIT: one more case needs to be included: external PE and standalone is no if ( fHasExternalDTD && !fStandalone) { if (fValidation) - fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", + fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); } - else + else reportFatalError("EntityNotDeclared", new Object[]{name}); } fEntityManager.startEntity(name, false); @@ -1078,9 +1104,9 @@ // utility methods - /** + /** * Calls document handler with a single character resulting from - * built-in entity resolution. + * built-in entity resolution. * * @param c * @param entity built-in name @@ -1090,18 +1116,18 @@ if (fNotifyBuiltInRefs) { fDocumentHandler.startGeneralEntity(entity, null, null, null); } - + fSingleChar[0] = c; fString.setValues(fSingleChar, 0, 1); fDocumentHandler.characters(fString, null); - + if (fNotifyBuiltInRefs) { fDocumentHandler.endGeneralEntity(entity, null); } } } // handleCharacter(char) - /** + /** * Handles the end element. This method will make sure that * the end element name matches the current element and notify * the handler about the end of the element and the end of any @@ -1118,7 +1144,7 @@ * upon notification. * */ - protected int handleEndElement(QName element, boolean isEmpty) + protected int handleEndElement(QName element, boolean isEmpty) throws XNIException { fMarkupDepth--; @@ -1127,6 +1153,9 @@ reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } + + /** PERFCHANGES-OFF + // make sure the elements match QName startElement = fQName; fElementStack.popElement(startElement); @@ -1139,7 +1168,9 @@ if (fNamespaces) { element.uri = startElement.uri; } - + + PERFCHANGES-OFF */ + // call handler if (fDocumentHandler != null && !isEmpty) { fDocumentHandler.endElement(element, null); @@ -1266,13 +1297,13 @@ // Public methods // - /** - * Pushes an element on the stack. + /** + * Pushes an element on the stack. * <p> * <strong>Note:</strong> The QName values are copied into the * stack. In other words, the caller does <em>not</em> orphan * the element to the stack. Also, the QName object returned - * is <em>not</em> orphaned to the caller. It should be + * is <em>not</em> orphaned to the caller. It should be * considered read-only. * * @param element The element to push onto the stack. @@ -1292,7 +1323,7 @@ return fElements[fSize++]; } // pushElement(QName):QName - /** + /** * Pops an element off of the stack by setting the values of * the specified QName. * <p> @@ -1311,7 +1342,7 @@ } // class ElementStack - /** + /** * This interface defines an XML "event" dispatching model. Classes * that implement this interface are responsible for scanning parts * of the XML document and dispatching callbacks. @@ -1324,19 +1355,19 @@ // Dispatcher methods // - /** + /** * Dispatch an XML "event". * * @param complete True if this dispatcher is intended to scan - * and dispatch as much as possible. + * and dispatch as much as possible. * - * @returns True if there is more to dispatch either from this + * @returns True if there is more to dispatch either from this * or a another dispatcher. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ - public boolean dispatch(boolean complete) + public boolean dispatch(boolean complete) throws IOException, XNIException; } // interface Dispatcher @@ -1354,19 +1385,19 @@ // Dispatcher methods // - /** + /** * Dispatch an XML "event". * * @param complete True if this dispatcher is intended to scan - * and dispatch as much as possible. + * and dispatch as much as possible. * - * @returns True if there is more to dispatch either from this + * @returns True if there is more to dispatch either from this * or a another dispatcher. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ - public boolean dispatch(boolean complete) + public boolean dispatch(boolean complete) throws IOException, XNIException { try { @@ -1463,12 +1494,12 @@ case SCANNER_STATE_COMMENT: { scanComment(); setScannerState(SCANNER_STATE_CONTENT); - break; + break; } case SCANNER_STATE_PI: { scanPI(); setScannerState(SCANNER_STATE_CONTENT); - break; + break; } case SCANNER_STATE_CDATA: { scanCDATASection(complete); @@ -1506,12 +1537,14 @@ String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); scanPIData(target, fString); } - + // standard text declaration else { scanXMLDeclOrTextDecl(true); } } + // now that we've straightened out the readers, we can read in chunks: + fEntityManager.fCurrentEntity.mayReadChunks = true; setScannerState(SCANNER_STATE_CONTENT); break; } @@ -1529,7 +1562,7 @@ } } while (complete || again); } - + // premature end of file catch (EOFException e) { endOfFileHook(e); @@ -1549,20 +1582,20 @@ // NOTE: These hook methods are added so that the full document // scanner can share the majority of code with this class. - /** + /** * Scan for DOCTYPE hook. This method is a hook for subclasses - * to add code to handle scanning for a the "DOCTYPE" string + * to add code to handle scanning for a the "DOCTYPE" string * after the string "<!" has been scanned. - * + * * @returns True if the "DOCTYPE" was scanned; false if "DOCTYPE" * was not scanned. */ - protected boolean scanForDoctypeHook() + protected boolean scanForDoctypeHook() throws IOException, XNIException { return false; } // scanForDoctypeHook():boolean - /** + /** * Element depth iz zero. This methos is a hook for subclasses * to add code to handle when the element depth hits zero. When * scanning a document fragment, an element depth of zero is @@ -1571,7 +1604,7 @@ * the document after the end of the document's root element. * * @returns True if the caller should stop and return true which - * allows the scanner to switch to a new scanning + * allows the scanner to switch to a new scanning * dispatcher. A return value of false indicates that * the content dispatcher should continue as normal. */ @@ -1588,7 +1621,7 @@ * the scanner must handle the root element specially. * * @returns True if the caller should stop and return true which - * allows the scanner to switch to a new scanning + * allows the scanner to switch to a new scanning * dispatcher. A return value of false indicates that * the content dispatcher should continue as normal. */ @@ -1604,7 +1637,7 @@ * However, when scanning a full XML document, an end of file * is always premature. */ - protected void endOfFileHook(EOFException e) + protected void endOfFileHook(EOFException e) throws IOException, XNIException { // NOTE: An end of file is only only an error if we were No revision No revision 1.7.2.1 +39 -3 xml-xerces/java/src/org/apache/xerces/util/SymbolTable.java Index: SymbolTable.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/SymbolTable.java,v retrieving revision 1.7 retrieving revision 1.7.2.1 diff -u -r1.7 -r1.7.2.1 --- SymbolTable.java 7 Feb 2002 22:15:09 -0000 1.7 +++ SymbolTable.java 9 Aug 2002 09:36:58 -0000 1.7.2.1 @@ -93,18 +93,27 @@ // /** Default table size. */ - protected static final int TABLE_SIZE = 101; + protected static final int TABLE_SIZE = 173; // // Data // + protected int entries; + /** Buckets. */ protected Entry[] fBuckets = null; // actual table size protected int fTableSize; + //no. of entries in table + protected int count ; + + static final float loadFactor = .75f ; + + protected int threshold ; + // // Constructors // @@ -118,6 +127,7 @@ public SymbolTable(int tableSize) { fTableSize = tableSize; fBuckets = new Entry[fTableSize]; + threshold = (int)(tableSize * loadFactor) ; } // @@ -148,9 +158,17 @@ } } + /** + if(count > threshold){ + rehash(); + bucket = hash(symbol) % fTableSize ; + } + */ + // create new entry Entry entry = new Entry(symbol, fBuckets[bucket]); fBuckets[bucket] = entry; + count++ ; return entry.symbol; } // addSymbol(String):String @@ -179,10 +197,16 @@ return entry.symbol; } } - + /** + if(count > threshold){ + rehash(); + bucket = hash(buffer, offset, length) % fTableSize ; + } + */ // add new entry Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]); fBuckets[bucket] = entry; + count++ ; return entry.symbol; } // addSymbol(char[],int,int):String @@ -206,6 +230,15 @@ } // hash(String):int + //rearrange the table.. it will be done only once... + + //REVISIT: implement this function.. + public void rehash(){ + + //REVISIT: + + }//rehash() + /** * Returns a hashcode value for the specified symbol information. * The value returned by this method must be identical to the value @@ -305,6 +338,9 @@ /** The next entry. */ public Entry next; + + //hashValue + public int hash ; // // Constructors
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]