Revision: 7849 http://languagetool.svn.sourceforge.net/languagetool/?rev=7849&view=rev Author: dnaber Date: 2012-08-12 21:05:35 +0000 (Sun, 12 Aug 2012) Log Message: ----------- partial bugfix for sourceforge bug 3555372: don't remove XML/HTML elements unless the new --xmlfilter option is set
Modified Paths: -------------- trunk/JLanguageTool/CHANGES.txt trunk/JLanguageTool/src/java/org/languagetool/Main.java trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java trunk/JLanguageTool/src/test/org/languagetool/MainTest.java Modified: trunk/JLanguageTool/CHANGES.txt =================================================================== --- trunk/JLanguageTool/CHANGES.txt 2012-08-12 19:59:04 UTC (rev 7848) +++ trunk/JLanguageTool/CHANGES.txt 2012-08-12 21:05:35 UTC (rev 7849) @@ -40,6 +40,10 @@ -Support for Swedish has been re-enabled after it had been disabled in LanguageTool 1.7. + -bugfix for command line: We removed XML from even plain text input. Now XML/HTML elements + are only filtered out if the new --xmlfilter option is specified. Note that there's still + a bug that can screw up position information with that option. + -introduced a file resources/<lang>/hunspell/ignore.txt with words that the spell checker will ignore Modified: trunk/JLanguageTool/src/java/org/languagetool/Main.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/Main.java 2012-08-12 19:59:04 UTC (rev 7848) +++ trunk/JLanguageTool/src/java/org/languagetool/Main.java 2012-08-12 21:05:35 UTC (rev 7849) @@ -146,7 +146,7 @@ } private void runOnFile(final String filename, final String encoding, - final boolean listUnknownWords) throws IOException { + final boolean listUnknownWords, final boolean xmlFiltering) throws IOException { boolean oneTime = false; if (!"-".equals(filename)) { if (autoDetect) { @@ -165,24 +165,23 @@ oneTime = file.length() < MAX_FILE_SIZE || bitextMode; } if (oneTime) { - runOnFileInOneGo(filename, encoding, listUnknownWords); + runOnFileInOneGo(filename, encoding, listUnknownWords, xmlFiltering); } else { runOnFileLineByLine(filename, encoding, listUnknownWords); } } - private void runOnFileInOneGo(String filename, String encoding, boolean listUnknownWords) throws IOException { + private void runOnFileInOneGo(String filename, String encoding, boolean listUnknownWords, boolean xmlFiltering) throws IOException { if (bitextMode) { //TODO: add parameter to set different readers final TabBitextReader reader = new TabBitextReader(filename, encoding); if (applySuggestions) { Tools.correctBitext(reader, srcLt, lt, bRules); } else { - Tools.checkBitext(reader, srcLt, lt, bRules, - apiFormat); + Tools.checkBitext(reader, srcLt, lt, bRules, apiFormat); } } else { - final String text = getFilteredText(filename, encoding); + final String text = getFilteredText(filename, encoding, xmlFiltering); if (applySuggestions) { System.out.print(Tools.correctText(text, lt)); } else if (profileRules) { @@ -389,18 +388,17 @@ } private void runRecursive(final String filename, final String encoding, - final boolean listUnknown) throws IOException, ParserConfigurationException, SAXException { + final boolean listUnknown, final boolean xmlFiltering) throws IOException, ParserConfigurationException, SAXException { final File dir = new File(filename); if (!dir.isDirectory()) { - throw new IllegalArgumentException(dir.getAbsolutePath() - + " is not a directory, cannot use recursion"); + throw new IllegalArgumentException(dir.getAbsolutePath() + " is not a directory, cannot use recursion"); } final File[] files = dir.listFiles(); for (final File file : files) { if (file.isDirectory()) { - runRecursive(file.getAbsolutePath(), encoding, listUnknown); + runRecursive(file.getAbsolutePath(), encoding, listUnknown, xmlFiltering); } else { - runOnFile(file.getAbsolutePath(), encoding, listUnknown); + runOnFile(file.getAbsolutePath(), encoding, listUnknown, xmlFiltering); } } } @@ -409,16 +407,19 @@ * Loads filename and filters out XML. Note that the XML * filtering can lead to incorrect positions in the list of matching rules. */ - private String getFilteredText(final String filename, final String encoding) throws IOException { + private String getFilteredText(final String filename, final String encoding, boolean xmlFiltering) throws IOException { if (verbose) { lt.setOutput(System.err); } if (!apiFormat && !applySuggestions) { System.out.println("Working on " + filename + "..."); } - final String fileContents = StringTools.readFile(new FileInputStream( - filename), encoding); - return StringTools.filterXML(fileContents); + final String fileContents = StringTools.readFile(new FileInputStream(filename), encoding); + if (xmlFiltering) { + return StringTools.filterXML(fileContents); + } else { + return fileContents; + } } private void changeLanguage(Language language, Language motherTongue, @@ -485,9 +486,9 @@ prg.setBitextMode(options.getMotherTongue(), options.getDisabledRules(), options.getEnabledRules()); } if (options.isRecursive()) { - prg.runRecursive(options.getFilename(), options.getEncoding(), options.isListUnknown()); + prg.runRecursive(options.getFilename(), options.getEncoding(), options.isListUnknown(), options.isXmlFiltering()); } else { - prg.runOnFile(options.getFilename(), options.getEncoding(), options.isListUnknown()); + prg.runOnFile(options.getFilename(), options.getEncoding(), options.isListUnknown(), options.isXmlFiltering()); } prg.cleanUp(); } Modified: trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java 2012-08-12 19:59:04 UTC (rev 7848) +++ trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java 2012-08-12 21:05:35 UTC (rev 7849) @@ -36,6 +36,7 @@ private boolean profile = false; private boolean bitext = false; private boolean autoDetect = false; + private boolean xmlFiltering = false; private Language language = null; private Language motherTongue = null; private String encoding = null; @@ -179,4 +180,11 @@ this.enabledRules = enabledRules; } + public boolean isXmlFiltering() { + return xmlFiltering; + } + + public void setXmlFiltering(boolean xmlFiltering) { + this.xmlFiltering = xmlFiltering; + } } Modified: trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java 2012-08-12 19:59:04 UTC (rev 7848) +++ trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java 2012-08-12 21:05:35 UTC (rev 7849) @@ -111,7 +111,9 @@ if (options.isTaggerOnly()) { throw new IllegalArgumentException("Tagging makes no sense for profiling."); } - } else if (i == args.length - 1) { + } else if (args[i].equals("--xmlfilter")) { + options.setXmlFiltering(true); + } else if (i == args.length - 1) { options.setFilename(args[i]); } else { throw new IllegalArgumentException("Unknown option: " + args[i]); @@ -140,7 +142,8 @@ + " -p, --profile print performance measurements\n" + " -v, --verbose print text analysis (sentences, part-of-speech tags) to STDERR\n" + " --version print LanguageTool version number and exit\n" - + " -a, --apply automatically apply suggestions if available, printing result to STDOUT"); + + " -a, --apply automatically apply suggestions if available, printing result to STDOUT" + + " --xmlfilter remove XML/HTML elements from input before checking"); } private void checkArguments(String option, int argParsingPos, String[] args) { Modified: trunk/JLanguageTool/src/test/org/languagetool/MainTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/MainTest.java 2012-08-12 19:59:04 UTC (rev 7848) +++ trunk/JLanguageTool/src/test/org/languagetool/MainTest.java 2012-08-12 21:05:35 UTC (rev 7849) @@ -470,4 +470,31 @@ assertTrue(output.contains("MORFOLOGIK_RULE_EN_US")); } + public void testNoXmlFilteringByDefault() throws Exception { + File input = populateFile("This < is is > filtered."); + String[] args = new String[] {input.getAbsolutePath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.contains("ENGLISH_WORD_REPEAT_RULE")); + } + + public void testXmlFiltering() throws Exception { + File input = populateFile("This < is is > filtered."); + String[] args = new String[] {"--xmlfilter", input.getAbsolutePath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertFalse(output.contains("ENGLISH_WORD_REPEAT_RULE")); + } + + private File populateFile(String content) throws IOException { + File tempFile = createTempFile(); + PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(tempFile), "UTF-8")); + try { + writer.println(content); + } finally { + writer.close(); + } + return tempFile; + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs