On 04/03/2014 12:23 PM, Daniel Naber wrote: > On 2014-04-03 01:12, Andriy Rysin wrote: > >> I guess we have two ways to go from here: adjust the tests to load >> files >> and keep them (I am not sure how easy it is - depends on how flexible >> our XMLValidator is) > We're just using standard XML validation, I don't think there's much we > can do (other than catching that specific exception, which would be very > ugly). But not many rules are affected, what about moving those to > grammar.xml? (I know, that's not very elegant either). > Here's the patch for the solution that I think should be acceptable: for multiple grammar files when validating we extract all unification elements from the first file and prepend them to the rest of the files. Advantages: * only tests are affected by this change * only langauges with multiple grammar xml files are affected * low overhead (re-including only the elements we need)
I would appreciate any feedback, Thanks Andriy
diff --git a/languagetool-core/src/test/java/org/languagetool/XMLValidator.java b/languagetool-core/src/test/java/org/languagetool/XMLValidator.java index e113dbb..ce9c6d4 100644 --- a/languagetool-core/src/test/java/org/languagetool/XMLValidator.java +++ b/languagetool-core/src/test/java/org/languagetool/XMLValidator.java @@ -27,15 +27,22 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.Source; +import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import org.languagetool.tools.StringTools; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -123,6 +130,61 @@ public final class XMLValidator { /** * Validate XML file using the given XSD. Throws an exception on error. + * @param baseFilename File to prepend common parts (unification) from before validating main file + * @param filename File in classpath to validate + * @param xmlSchemaPath XML schema file in classpath + */ + public void validateWithXmlSchema(String baseFilename, String filename, String xmlSchemaPath) throws IOException { + try { + final InputStream xmlStream = this.getClass().getResourceAsStream(filename); + final InputStream baseXmlStream = this.getClass().getResourceAsStream(baseFilename); + if (xmlStream == null || baseXmlStream == null ) { + throw new IOException("File not found in classpath: " + filename); + } + try { + final URL schemaUrl = this.getClass().getResource(xmlSchemaPath); + if (schemaUrl == null) { + throw new IOException("XML schema not found in classpath: " + xmlSchemaPath); + } + validateInternal(mergeIntoSource(baseXmlStream, xmlStream, this.getClass().getResource(xmlSchemaPath)), schemaUrl); + } finally { + xmlStream.close(); + } + } catch (Exception e) { + throw new IOException("Cannot load or parse '" + filename + "'", e); + } + } + + + private static Source mergeIntoSource(InputStream baseXmlStream, InputStream xmlStream, URL xmlSchema) throws Exception { + DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); + domFactory.setIgnoringComments(true); + domFactory.setValidating(false); + domFactory.setNamespaceAware(true); + +// SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); +// Schema schema = sf.newSchema(xmlSchema); +// domFactory.setSchema(schema); + + DocumentBuilder builder = domFactory.newDocumentBuilder(); + Document baseDoc = builder.parse(baseXmlStream); + Document ruleDoc = builder.parse(xmlStream); + + // Shall this be more generic, i.e. reuse not just unification ??? + NodeList unificationNodes = baseDoc.getElementsByTagName("unification"); + Node ruleNode = ruleDoc.getElementsByTagName("rules").item(0); + Node firstChildRuleNode = ruleNode.getChildNodes().item(1); + + for(int i=0; i<unificationNodes.getLength(); i++) { + Node unificationNode = ruleDoc.importNode(unificationNodes.item(i), true); + ruleNode.insertBefore(unificationNode, firstChildRuleNode); + } + + return new DOMSource(ruleDoc); + } + + /** + * Validate XML file using the given XSD. Throws an exception on error. * @param xml the XML string to be validated * @param xmlSchemaPath XML schema file in classpath * @since 2.3 @@ -171,6 +233,14 @@ public final class XMLValidator { validator.validate(new StreamSource(xml)); } + private void validateInternal(Source xmlSrc, URL xmlSchema) throws SAXException, IOException { + final SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + final Schema schema = sf.newSchema(xmlSchema); + final Validator validator = schema.newValidator(); + validator.setErrorHandler(new ErrorHandler()); + validator.validate(xmlSrc); + } + /** * XML handler that throws exception on error and warning, does nothing otherwise. */ diff --git a/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java b/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java index 313857a..433f988 100644 --- a/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java +++ b/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java @@ -163,7 +163,14 @@ public class PatternRuleTest extends TestCase { continue; } try { - validator.validateWithXmlSchema(ruleFilePath, rulesDir + "/rules.xsd"); + // if there are multiple xml grammar files we'll prepend all unification elements + // from the first file to the rest of them + if( grammarFiles.size() > 1 && ! grammarFiles.get(0).equals(grammarFile) ) { + validator.validateWithXmlSchema(rulesDir + "/" + grammarFiles.get(0), ruleFilePath, rulesDir + "/rules.xsd"); + } + else { + validator.validateWithXmlSchema(ruleFilePath, rulesDir + "/rules.xsd"); + } } finally { xmlStream.close(); }
------------------------------------------------------------------------------
_______________________________________________ Languagetool-devel mailing list Languagetool-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-devel