Revision: 8070 http://languagetool.svn.sourceforge.net/languagetool/?rev=8070&view=rev Author: dnaber Date: 2012-09-19 22:28:57 +0000 (Wed, 19 Sep 2012) Log Message: ----------- adding offset (i.e. character count at which the error starts) to XML output, this is often easier to use for applications than row/column based values
Modified Paths: -------------- trunk/JLanguageTool/CHANGES.txt trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java Modified: trunk/JLanguageTool/CHANGES.txt =================================================================== --- trunk/JLanguageTool/CHANGES.txt 2012-09-19 19:42:37 UTC (rev 8069) +++ trunk/JLanguageTool/CHANGES.txt 2012-09-19 22:28:57 UTC (rev 8070) @@ -90,7 +90,7 @@ java -cp LanguageTool.jar org.languagetool.server.HTTPServer --public -HTTP API and XML output: extended XML to include the version and build date - of LanguageTool and the category of each match + of LanguageTool and the category and offset of each match -The word tokenizer now considers the following characters as word separator: | (pipe) and` (backtick). Modified: trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java =================================================================== --- trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java 2012-09-19 19:42:37 UTC (rev 8069) +++ trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java 2012-09-19 22:28:57 UTC (rev 8070) @@ -367,6 +367,7 @@ context = context.replaceAll("[\n\r]", " "); xml.append(" context=\"" + StringTools.escapeXML(context) + "\""); xml.append(" contextoffset=\"" + contextOffset + "\""); + xml.append(" offset=\"" + match.getFromPos() + "\""); xml.append(" errorlength=\"" + (match.getToPos() - match.getFromPos()) + "\""); if (match.getRule().getUrl() != null) { xml.append(" url=\"" Modified: trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd =================================================================== --- trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd 2012-09-19 19:42:37 UTC (rev 8069) +++ trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd 2012-09-19 22:28:57 UTC (rev 8070) @@ -1,7 +1,6 @@ <!-- Simple XML output format for the 'api' option and the HTTP -interface of LanguageTool. Version 1.1. -Daniel Naber (http://www.danielnaber.de), 2012-08-13 +interface of LanguageTool (http://www.languagetool.org). Version 1.2 of the XML, 2012-09-19 --> <!ELEMENT matches (error)*> @@ -17,7 +16,6 @@ <!ATTLIST error fromy CDATA #REQUIRED> <!-- The column in which the error starts. Counting starts at 0. --> <!ATTLIST error fromx CDATA #REQUIRED> - <!-- The line in which the error ends: --> <!ATTLIST error toy CDATA #REQUIRED> <!-- The column in which the error ends: --> @@ -32,31 +30,28 @@ <!-- The message describing the error that will be displayed to the user. --> <!ATTLIST error msg CDATA #REQUIRED> -<!-- -One or more suggestions to fix the error. If there is more than one -suggestion, the strings are separated by a "#" character: ---> +<!-- One or more suggestions to fix the error. If there is more than one +suggestion, the strings are separated by a "#" character: --> <!ATTLIST error replacements CDATA #IMPLIED> -<!-- -The context or sentence in which the error occurs. ---> +<!-- The context or sentence in which the error occurs. --> <!ATTLIST error context CDATA #REQUIRED> <!-- The position of the start of the error in the 'context' -attribute. Counting starts at 0. ---> +attribute. Counting starts at 0. --> <!ATTLIST error contextoffset CDATA #REQUIRED> -<!-- The length in characters of the error in the 'context' -attribute, starting at 'contextoffset'. ---> +<!-- The position of the start of the error in the input text. +Counting starts at 0. (added in LanguageTool 1.9) --> +<!ATTLIST error offset CDATA #REQUIRED> + +<!-- The length of the error in the input text. --> <!ATTLIST error errorlength CDATA #REQUIRED> -<!-- The url with additional information that may be given --> +<!-- The url with additional information that may be given. --> <!ATTLIST error url CDATA #IMPLIED> -<!-- The category of the match, if any : --> +<!-- The category of the match, if any (added in LanguageTool 1.9). --> <!ATTLIST error category CDATA #IMPLIED> <!-- Note that the XML output may contain XML comments with Modified: trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java =================================================================== --- trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java 2012-09-19 19:42:37 UTC (rev 8069) +++ trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java 2012-09-19 22:28:57 UTC (rev 8070) @@ -187,14 +187,16 @@ } public void testEnglishStdIn4() throws Exception { - System.setIn(this.getClass().getResourceAsStream(ENGLISH_TEST_FILE)); - String[] args = new String[] {"-l", "en", "--api", "-"}; + System.setIn(this.getClass().getResourceAsStream(ENGLISH_TEST_FILE)); + String[] args = new String[] {"-l", "en", "--api", "-"}; - Main.main(args); - String output = new String(this.out.toByteArray()); - assertTrue(output.contains("<error fromy=\"4\" fromx=\"5\" toy=\"4\" tox=\"10\" " + - "ruleId=\"ENGLISH_WORD_REPEAT_RULE\" msg=\"Possible typo: you repeated a word\" replacements=\"is\" " + - "context=\"This is is a test of language tool. \" contextoffset=\"5\" errorlength=\"5\" category=\"Miscellaneous\"/>")); + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue("Got: " + output, output.contains("<error fromy=\"4\" fromx=\"5\" toy=\"4\" tox=\"10\" " + + "ruleId=\"ENGLISH_WORD_REPEAT_RULE\" msg=\"Possible typo: you repeated a word\" replacements=\"is\" " + + "context=\"This is is a test of language tool. \" contextoffset=\"5\" offset=\"5\" errorlength=\"5\" category=\"Miscellaneous\"/>")); + // note: the offset is relative to the sentence... this seems wrong - it happens because of the way + // the command line client feeds the data into the check() methods. } //test line mode vs. para mode @@ -294,7 +296,8 @@ assertTrue(output.indexOf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") == 0); assertTrue(output.contains("<error fromy=\"0\" fromx=\"8\" toy=\"0\" tox=\"10\" ruleId=\"EN_A_VS_AN\" " + "msg=\"Use 'a' instead of 'an' if the following word doesn't start with a vowel sound, e.g. 'a sentence', " + - "'a university'\" replacements=\"a\" context=\"This is an test. This is a test of of language tool. ...\" contextoffset=\"8\" errorlength=\"2\" category=\"Miscellaneous\"/>")); + "'a university'\" replacements=\"a\" context=\"This is an test. This is a test of of language tool. ...\" " + + "contextoffset=\"8\" offset=\"8\" errorlength=\"2\" category=\"Miscellaneous\"/>")); } public void testGermanFileWithURL() throws Exception { @@ -328,7 +331,7 @@ assertTrue(output.contains("<error fromy=\"0\" fromx=\"8\" toy=\"0\" tox=\"20\" ruleId=\"BRAK_PRZECINKA_KTORY\" subId=\"5\"")); //This tests whether XML encoding is actually UTF-8: assertTrue(output.contains("msg=\"Brak przecinka w tym fragmencie zdania. Przecinek prawdopodobnie należy postawić tak: 'świnia, która'.\" replacements=\"świnia, która\" ")); - assertTrue(output.contains("context=\"To jest świnia która się ślini. \" contextoffset=\"8\" errorlength=\"12\" category=\"Błędy interpunkcyjne\"/>")); + assertTrue(output.contains("context=\"To jest świnia która się ślini. \" contextoffset=\"8\" offset=\"8\" errorlength=\"12\" category=\"Błędy interpunkcyjne\"/>")); } public void testPolishLineNumbers() throws Exception { Modified: trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java =================================================================== --- trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java 2012-09-19 19:42:37 UTC (rev 8069) +++ trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java 2012-09-19 22:28:57 UTC (rev 8070) @@ -179,7 +179,8 @@ final Matcher matcher = matchesPattern.matcher(xml); assertTrue(matcher.matches()); assertTrue(xml.contains(">\n" + - "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" errorlength=\"2\"/>\n" + + "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " + + "replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" offset=\"8\" errorlength=\"2\"/>\n" + "</matches>\n")); } @@ -197,7 +198,8 @@ matches.add(match); final String xml = StringTools.ruleMatchesToXML(matches, text, 5, StringTools.XmlPrintMode.NORMAL_XML); assertTrue(xml.contains(">\n" + - "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"MY_ID\" msg=\"myMessage\" replacements=\"\" context=\"...s is a test ...\" contextoffset=\"8\" errorlength=\"2\" category=\"MyCategory\"/>\n" + + "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"MY_ID\" msg=\"myMessage\" " + + "replacements=\"\" context=\"...s is a test ...\" contextoffset=\"8\" offset=\"8\" errorlength=\"2\" category=\"MyCategory\"/>\n" + "</matches>\n")); } @@ -221,7 +223,8 @@ matches.add(match); final String xml = StringTools.ruleMatchesToXML(matches, text, 5, StringTools.XmlPrintMode.NORMAL_XML); assertTrue(xml.contains(">\n" + - "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" errorlength=\"2\" url=\"http://server.org?id=1&foo=bar\"/>\n" + + "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " + + "replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" offset=\"8\" errorlength=\"2\" url=\"http://server.org?id=1&foo=bar\"/>\n" + "</matches>\n")); } @@ -236,7 +239,8 @@ matches.add(match); final String xml = StringTools.ruleMatchesToXML(matches, text, 5, StringTools.XmlPrintMode.NORMAL_XML); assertTrue(xml.contains(">\n" + - "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" replacements=\"\" context=\"... is "an test...\" contextoffset=\"8\" errorlength=\"2\"/>\n" + + "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " + + "replacements=\"\" context=\"... is "an test...\" contextoffset=\"8\" offset=\"9\" errorlength=\"2\"/>\n" + "</matches>\n")); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-commits mailing list Languagetool-commits@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-commits