Revision: 6928
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6928&view=rev
Author:   dnaber
Date:     2012-05-15 21:18:52 +0000 (Tue, 15 May 2012)
Log Message:
-----------
alternative "<marker>" converter that doesn't expand entity references, to be 
used for German

Added Paths:
-----------
    trunk/JLanguageTool/src/dev/org/languagetool/dev/MarkerConverter2.java

Added: trunk/JLanguageTool/src/dev/org/languagetool/dev/MarkerConverter2.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/MarkerConverter2.java      
                        (rev 0)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/MarkerConverter2.java      
2012-05-15 21:18:52 UTC (rev 6928)
@@ -0,0 +1,183 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.dev;
+
+import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
+import org.languagetool.rules.Rule;
+import org.languagetool.rules.patterns.PatternRule;
+import org.languagetool.tools.StringTools;
+
+import javax.xml.stream.*;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Convert to the new marker format. Note: is buggy at least with "and" 
elements.
+ * Does not expand entities, unlike MarkerConverter.
+ *
+ * @deprecated for internal one-time conversion only
+ */
+public class MarkerConverter2 {
+
+  private final Map<String, Integer> startPos = new HashMap<String, Integer>();
+  private final Map<String, Integer> endPos = new HashMap<String, Integer>();
+  private boolean needsMarker;
+  private boolean inRuleGroup;
+  private int currentTokenPos = 0;
+  private String currentId = "";
+  private int currentSubId = 0;
+  private int mark_from = Integer.MAX_VALUE;
+  private int mark_to = Integer.MAX_VALUE;
+
+  public void convert(String filename) throws IOException, XMLStreamException {
+
+    initStartAndEndPositions();
+
+    final XMLInputFactory inputFactory = XMLInputFactory.newInstance();
+    inputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, 
false);
+    final InputStream in = new FileInputStream(filename);
+    final XMLStreamReader streamReader = 
inputFactory.createXMLStreamReader(in);
+
+    while (streamReader.hasNext()) {
+
+      final int eventType = streamReader.next();
+      if (eventType == XMLStreamConstants.COMMENT) {
+        if (streamReader.hasText()) {
+          System.out.print("<!--" + streamReader.getText() + "-->");
+        }
+      } else if (eventType == XMLStreamConstants.CHARACTERS) {
+        if (streamReader.hasText()) {
+          System.out.print(StringTools.escapeXML(streamReader.getText()));
+        }
+      } else {
+        if (eventType == XMLStreamConstants.START_ELEMENT) {
+          final String localName = streamReader.getLocalName();
+
+          initMarkFromAndMarkTo(streamReader);
+          if (localName.equals("rulegroup")) {
+            currentSubId = 0;
+            inRuleGroup = true;
+            currentTokenPos = 0;
+          } else if (localName.equals("rule")) {
+            currentTokenPos = 0;
+            if (inRuleGroup) {
+              currentSubId++;
+            } else {
+              currentSubId = 1;
+            }
+          } else if (localName.equals("pattern")) {
+            final boolean defaultStart = mark_from == Integer.MAX_VALUE || 
mark_from == 0;
+            final boolean defaultEnd = mark_to == Integer.MAX_VALUE || mark_to 
== 0;
+            needsMarker = !defaultStart || !defaultEnd;
+          } else if (localName.equals("token")) {
+            final String key = currentId + " " + currentSubId;
+            //System.out.println("###needsMarker: " + needsMarker + ", " + 
startPos.get(key) + "==" + (currentTokenPos));
+            if (needsMarker && startPos.get(key) == currentTokenPos) {
+              System.out.print("<marker>\n");
+            }
+            currentTokenPos++;
+          }
+
+          printStartTag(streamReader, localName);
+
+          // ----------------------------------------------------
+
+        } else if (eventType == XMLStreamConstants.END_ELEMENT) {
+          final String localName = streamReader.getLocalName();
+          System.out.print("</" + localName + ">");
+          if (localName.equals("rulegroup")) {
+            inRuleGroup = false;
+          } else if (localName.equals("token")) {
+            final String key = currentId + " " + currentSubId;
+            if (needsMarker && endPos.get(key) == currentTokenPos) {
+              System.out.print("\n</marker>");
+            }
+          }
+        } else if (eventType == XMLStreamConstants.ENTITY_REFERENCE) {
+          final String localName = streamReader.getLocalName();
+          System.out.print("&" + localName + ";");
+        }
+      }
+    }
+  }
+
+  private void initStartAndEndPositions() throws IOException {
+    final JLanguageTool languageTool = new JLanguageTool(Language.GERMAN);
+    languageTool.activateDefaultPatternRules();
+    final List<Rule> rules = languageTool.getAllRules();
+    for (Rule rule : rules) {
+      if (rule instanceof PatternRule) {
+        final PatternRule pRule = (PatternRule) rule;
+        final String key = pRule.getId() + " " + pRule.getSubId();
+        startPos.put(key, pRule.getStartPositionCorrection());
+        endPos.put(key, pRule.getElements().size() + 
pRule.getEndPositionCorrection());
+      }
+    }
+  }
+
+  private void printStartTag(XMLStreamReader eventReader, String localName) {
+    System.out.print("<" + localName);
+    mark_from = Integer.MAX_VALUE;
+    mark_to = Integer.MAX_VALUE;
+    for (int i = 0; i < eventReader.getAttributeCount(); i++) {
+      final String attributeValue = eventReader.getAttributeValue(i);
+
+      final String attributeLocalName = eventReader.getAttributeLocalName(i);
+      if (!attributeLocalName.equals("mark_from") && 
!attributeLocalName.equals("mark_to")) {
+        System.out.print(" " + attributeLocalName + "=\"" + 
StringTools.escapeXML(attributeValue) + "\"");
+      }
+
+      if (attributeLocalName.equals("mark_from")) {
+        mark_from = Integer.parseInt(attributeValue);
+      } else if (attributeLocalName.equals("mark_to")) {
+        mark_to = Integer.parseInt(attributeValue);
+      }
+      if ((localName.equals("rulegroup") || localName.equals("rule")) && 
attributeLocalName.equals("id")) {
+        if (attributeValue != null) {
+          currentId = attributeValue;
+        }
+      }
+    }
+    System.out.print(">");
+  }
+
+  private void initMarkFromAndMarkTo(XMLStreamReader eventReader) {
+    mark_from = Integer.MAX_VALUE;
+    mark_to = Integer.MAX_VALUE;
+    for (int i = 0; i < eventReader.getAttributeCount(); i++) {
+      final String attributeValue = eventReader.getAttributeValue(i);
+      if (eventReader.getAttributeLocalName(i).equals("mark_from")) {
+        mark_from = Integer.parseInt(attributeValue);
+      } else if (eventReader.getAttributeLocalName(i).equals("mark_to")) {
+        mark_to = Integer.parseInt(attributeValue);
+      }
+    }
+  }
+
+  public static void main(String args[]) throws XMLStreamException, 
IOException {
+    final MarkerConverter2 converter = new MarkerConverter2();
+    converter.convert("/home/dnaber/languagetool/src/rules/de/grammar.xml");
+  }
+
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to