Revision: 7849
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7849&view=rev
Author:   dnaber
Date:     2012-08-12 21:05:35 +0000 (Sun, 12 Aug 2012)
Log Message:
-----------
partial bugfix for sourceforge bug 3555372: don't remove XML/HTML elements 
unless the new --xmlfilter option is set

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    trunk/JLanguageTool/src/java/org/languagetool/Main.java
    
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java
    
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java
    trunk/JLanguageTool/src/test/org/languagetool/MainTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-08-12 19:59:04 UTC (rev 7848)
+++ trunk/JLanguageTool/CHANGES.txt     2012-08-12 21:05:35 UTC (rev 7849)
@@ -40,6 +40,10 @@
    -Support for Swedish has been re-enabled after it had been disabled in 
LanguageTool 1.7.
 
 
+ -bugfix for command line: We removed XML from even plain text input. Now 
XML/HTML elements
+  are only filtered out if the new --xmlfilter option is specified. Note that 
there's still
+  a bug that can screw up position information with that option.
+
  -introduced a file resources/<lang>/hunspell/ignore.txt with words that the 
spell checker
   will ignore
 

Modified: trunk/JLanguageTool/src/java/org/languagetool/Main.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/Main.java     2012-08-12 
19:59:04 UTC (rev 7848)
+++ trunk/JLanguageTool/src/java/org/languagetool/Main.java     2012-08-12 
21:05:35 UTC (rev 7849)
@@ -146,7 +146,7 @@
   }
 
   private void runOnFile(final String filename, final String encoding,
-      final boolean listUnknownWords) throws IOException {
+      final boolean listUnknownWords, final boolean xmlFiltering) throws 
IOException {
     boolean oneTime = false;
     if (!"-".equals(filename)) {
       if (autoDetect) {
@@ -165,24 +165,23 @@
       oneTime = file.length() < MAX_FILE_SIZE || bitextMode;
     }
     if (oneTime) {
-      runOnFileInOneGo(filename, encoding, listUnknownWords);
+      runOnFileInOneGo(filename, encoding, listUnknownWords, xmlFiltering);
     } else {
       runOnFileLineByLine(filename, encoding, listUnknownWords);
     }
   }
 
-  private void runOnFileInOneGo(String filename, String encoding, boolean 
listUnknownWords) throws IOException {
+  private void runOnFileInOneGo(String filename, String encoding, boolean 
listUnknownWords, boolean xmlFiltering) throws IOException {
     if (bitextMode) {
       //TODO: add parameter to set different readers
       final TabBitextReader reader = new TabBitextReader(filename, encoding);
       if (applySuggestions) {
         Tools.correctBitext(reader, srcLt, lt, bRules);
       } else {
-        Tools.checkBitext(reader, srcLt, lt, bRules,
-          apiFormat);
+        Tools.checkBitext(reader, srcLt, lt, bRules, apiFormat);
       }
     } else {
-      final String text = getFilteredText(filename, encoding);
+      final String text = getFilteredText(filename, encoding, xmlFiltering);
       if (applySuggestions) {
         System.out.print(Tools.correctText(text, lt));
       } else if (profileRules) {
@@ -389,18 +388,17 @@
   }
 
   private void runRecursive(final String filename, final String encoding,
-      final boolean listUnknown) throws IOException, 
ParserConfigurationException, SAXException {
+      final boolean listUnknown, final boolean xmlFiltering) throws 
IOException, ParserConfigurationException, SAXException {
     final File dir = new File(filename);
     if (!dir.isDirectory()) {
-      throw new IllegalArgumentException(dir.getAbsolutePath()
-          + " is not a directory, cannot use recursion");
+      throw new IllegalArgumentException(dir.getAbsolutePath() + " is not a 
directory, cannot use recursion");
     }
     final File[] files = dir.listFiles();
     for (final File file : files) {
       if (file.isDirectory()) {
-        runRecursive(file.getAbsolutePath(), encoding, listUnknown);
+        runRecursive(file.getAbsolutePath(), encoding, listUnknown, 
xmlFiltering);
       } else {
-        runOnFile(file.getAbsolutePath(), encoding, listUnknown);
+        runOnFile(file.getAbsolutePath(), encoding, listUnknown, xmlFiltering);
       }
     }    
   }
@@ -409,16 +407,19 @@
    * Loads filename and filters out XML. Note that the XML
    * filtering can lead to incorrect positions in the list of matching rules.
    */
-  private String getFilteredText(final String filename, final String encoding) 
throws IOException {
+  private String getFilteredText(final String filename, final String encoding, 
boolean xmlFiltering) throws IOException {
     if (verbose) {
       lt.setOutput(System.err);
     }
     if (!apiFormat && !applySuggestions) {
       System.out.println("Working on " + filename + "...");
     }
-    final String fileContents = StringTools.readFile(new FileInputStream(
-        filename), encoding);
-    return StringTools.filterXML(fileContents);
+    final String fileContents = StringTools.readFile(new 
FileInputStream(filename), encoding);
+    if (xmlFiltering) {
+      return StringTools.filterXML(fileContents);
+    } else {
+      return fileContents;
+    }
   }
 
   private void changeLanguage(Language language, Language motherTongue,
@@ -485,9 +486,9 @@
       prg.setBitextMode(options.getMotherTongue(), options.getDisabledRules(), 
options.getEnabledRules());
     }
     if (options.isRecursive()) {
-      prg.runRecursive(options.getFilename(), options.getEncoding(), 
options.isListUnknown());
+      prg.runRecursive(options.getFilename(), options.getEncoding(), 
options.isListUnknown(), options.isXmlFiltering());
     } else {
-      prg.runOnFile(options.getFilename(), options.getEncoding(), 
options.isListUnknown());
+      prg.runOnFile(options.getFilename(), options.getEncoding(), 
options.isListUnknown(), options.isXmlFiltering());
     }
     prg.cleanUp();
   }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java
   2012-08-12 19:59:04 UTC (rev 7848)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineOptions.java
   2012-08-12 21:05:35 UTC (rev 7849)
@@ -36,6 +36,7 @@
   private boolean profile = false;
   private boolean bitext = false;
   private boolean autoDetect = false;
+  private boolean xmlFiltering = false;
   private Language language = null;
   private Language motherTongue = null;
   private String encoding = null;
@@ -179,4 +180,11 @@
     this.enabledRules = enabledRules;
   }
 
+  public boolean isXmlFiltering() {
+    return xmlFiltering;
+  }
+
+  public void setXmlFiltering(boolean xmlFiltering) {
+    this.xmlFiltering = xmlFiltering;
+  }
 }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java
    2012-08-12 19:59:04 UTC (rev 7848)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/commandline/CommandLineParser.java
    2012-08-12 21:05:35 UTC (rev 7849)
@@ -111,7 +111,9 @@
         if (options.isTaggerOnly()) {
           throw new IllegalArgumentException("Tagging makes no sense for 
profiling.");
         }
-      }  else if (i == args.length - 1) {
+      } else if (args[i].equals("--xmlfilter")) {
+        options.setXmlFiltering(true);
+      } else if (i == args.length - 1) {
         options.setFilename(args[i]);
       } else {
         throw new IllegalArgumentException("Unknown option: " + args[i]);
@@ -140,7 +142,8 @@
             + "  -p, --profile            print performance measurements\n"
             + "  -v, --verbose            print text analysis (sentences, 
part-of-speech tags) to STDERR\n"
             + "  --version                print LanguageTool version number 
and exit\n"
-            + "  -a, --apply              automatically apply suggestions if 
available, printing result to STDOUT");
+            + "  -a, --apply              automatically apply suggestions if 
available, printing result to STDOUT"
+            + "  --xmlfilter              remove XML/HTML elements from input 
before checking");
   }
 
   private void checkArguments(String option, int argParsingPos, String[] args) 
{

Modified: trunk/JLanguageTool/src/test/org/languagetool/MainTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/MainTest.java 2012-08-12 
19:59:04 UTC (rev 7848)
+++ trunk/JLanguageTool/src/test/org/languagetool/MainTest.java 2012-08-12 
21:05:35 UTC (rev 7849)
@@ -470,4 +470,31 @@
     assertTrue(output.contains("MORFOLOGIK_RULE_EN_US"));
   }
 
+  public void testNoXmlFilteringByDefault() throws Exception {
+    File input = populateFile("This < is is > filtered.");
+    String[] args = new String[] {input.getAbsolutePath()};
+    Main.main(args);
+    String output = new String(this.out.toByteArray());
+    assertTrue(output.contains("ENGLISH_WORD_REPEAT_RULE"));
+  }
+
+  public void testXmlFiltering() throws Exception {
+    File input = populateFile("This < is is > filtered.");
+    String[] args = new String[] {"--xmlfilter", input.getAbsolutePath()};
+    Main.main(args);
+    String output = new String(this.out.toByteArray());
+    assertFalse(output.contains("ENGLISH_WORD_REPEAT_RULE"));
+  }
+
+  private File populateFile(String content) throws IOException {
+    File tempFile = createTempFile();
+    PrintWriter writer = new PrintWriter(new OutputStreamWriter(new 
FileOutputStream(tempFile), "UTF-8"));
+    try {
+      writer.println(content);
+    } finally {
+      writer.close();
+    }
+    return tempFile;
+  }
+
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to