On 04/03/2014 12:23 PM, Daniel Naber wrote:
> On 2014-04-03 01:12, Andriy Rysin wrote:
>
>> I guess we have two ways to go from here: adjust the tests to load 
>> files
>> and keep them (I am not sure how easy it is - depends on how flexible
>> our XMLValidator is)
> We're just using standard XML validation, I don't think there's much we 
> can do (other than catching that specific exception, which would be very 
> ugly). But not many rules are affected, what about moving those to 
> grammar.xml? (I know, that's not very elegant either).
>
Here's the patch for the solution that I think should be acceptable: for
multiple grammar files when validating we extract all unification
elements from the first file and prepend them to the rest of the files.
Advantages:
* only tests are affected by this change
* only langauges with multiple grammar xml files are affected
* low overhead (re-including only the elements we need)

I would appreciate any feedback,
Thanks
Andriy
diff --git a/languagetool-core/src/test/java/org/languagetool/XMLValidator.java b/languagetool-core/src/test/java/org/languagetool/XMLValidator.java
index e113dbb..ce9c6d4 100644
--- a/languagetool-core/src/test/java/org/languagetool/XMLValidator.java
+++ b/languagetool-core/src/test/java/org/languagetool/XMLValidator.java
@@ -27,15 +27,22 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.Source;
+import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamSource;
 import javax.xml.validation.Schema;
 import javax.xml.validation.SchemaFactory;
 import javax.xml.validation.Validator;
 
 import org.languagetool.tools.StringTools;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXParseException;
@@ -123,6 +130,61 @@ public final class XMLValidator {
 
   /**
    * Validate XML file using the given XSD. Throws an exception on error.
+   * @param baseFilename File to prepend common parts (unification) from before validating main file
+   * @param filename File in classpath to validate
+   * @param xmlSchemaPath XML schema file in classpath
+   */
+  public void validateWithXmlSchema(String baseFilename, String filename, String xmlSchemaPath) throws IOException {
+    try {
+      final InputStream xmlStream = this.getClass().getResourceAsStream(filename);
+      final InputStream baseXmlStream = this.getClass().getResourceAsStream(baseFilename);
+      if (xmlStream == null || baseXmlStream == null ) {
+        throw new IOException("File not found in classpath: " + filename);
+      }
+      try {
+        final URL schemaUrl = this.getClass().getResource(xmlSchemaPath);
+        if (schemaUrl == null) {
+          throw new IOException("XML schema not found in classpath: " + xmlSchemaPath);
+        }
+        validateInternal(mergeIntoSource(baseXmlStream, xmlStream, this.getClass().getResource(xmlSchemaPath)), schemaUrl);
+      } finally {
+        xmlStream.close();
+      }
+    } catch (Exception e) {
+      throw new IOException("Cannot load or parse '" + filename + "'", e);
+    }
+  }
+
+
+  private static Source mergeIntoSource(InputStream baseXmlStream, InputStream xmlStream, URL xmlSchema) throws Exception {
+    DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
+    domFactory.setIgnoringComments(true);
+    domFactory.setValidating(false);
+    domFactory.setNamespaceAware(true);
+
+//    SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
+//    Schema schema = sf.newSchema(xmlSchema);
+//    domFactory.setSchema(schema);
+    
+    DocumentBuilder builder = domFactory.newDocumentBuilder();
+    Document baseDoc = builder.parse(baseXmlStream);
+    Document ruleDoc = builder.parse(xmlStream);
+
+    // Shall this be more generic, i.e. reuse not just unification ???
+    NodeList unificationNodes = baseDoc.getElementsByTagName("unification");
+    Node ruleNode = ruleDoc.getElementsByTagName("rules").item(0);
+    Node firstChildRuleNode = ruleNode.getChildNodes().item(1);
+
+    for(int i=0; i<unificationNodes.getLength(); i++) {
+      Node unificationNode = ruleDoc.importNode(unificationNodes.item(i), true);
+      ruleNode.insertBefore(unificationNode, firstChildRuleNode);
+    }
+
+    return new DOMSource(ruleDoc);
+  }
+  
+  /**
+   * Validate XML file using the given XSD. Throws an exception on error.
    * @param xml the XML string to be validated
    * @param xmlSchemaPath XML schema file in classpath
    * @since 2.3
@@ -171,6 +233,14 @@ public final class XMLValidator {
     validator.validate(new StreamSource(xml));
   }
 
+  private void validateInternal(Source xmlSrc, URL xmlSchema) throws SAXException, IOException {
+    final SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
+    final Schema schema = sf.newSchema(xmlSchema);
+    final Validator validator = schema.newValidator();
+    validator.setErrorHandler(new ErrorHandler());
+    validator.validate(xmlSrc);
+  }
+
   /**
    * XML handler that throws exception on error and warning, does nothing otherwise.
    */
diff --git a/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java b/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
index 313857a..433f988 100644
--- a/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
+++ b/languagetool-core/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
@@ -163,7 +163,14 @@ public class PatternRuleTest extends TestCase {
         continue;
       }
       try {
-        validator.validateWithXmlSchema(ruleFilePath, rulesDir + "/rules.xsd");
+        // if there are multiple xml grammar files we'll prepend all unification elements 
+        // from the first file to the rest of them 
+        if( grammarFiles.size() > 1 && ! grammarFiles.get(0).equals(grammarFile) ) {
+          validator.validateWithXmlSchema(rulesDir + "/" + grammarFiles.get(0), ruleFilePath, rulesDir + "/rules.xsd");
+        }
+        else {
+          validator.validateWithXmlSchema(ruleFilePath, rulesDir + "/rules.xsd");
+        }
       } finally {
         xmlStream.close();
       }
------------------------------------------------------------------------------
_______________________________________________
Languagetool-devel mailing list
Languagetool-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-devel

Reply via email to