hunspell

milek_pl Mon, 04 Jun 2012 02:32:54 -0700

Revision: 7229
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7229&view=rev
Author:   milek_pl
Date:     2012-06-04 09:32:38 +0000 (Mon, 04 Jun 2012)
Log Message:
-----------
Javadoc change for HunspellRule, and some internal charset encoding code (in a 
comment) for further testing on UTF-8 dictionaries


Modified Paths:
--------------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/Hunspell.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/Hunspell.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/Hunspell.java
 2012-06-03 22:08:18 UTC (rev 7228)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/Hunspell.java
 2012-06-04 09:32:38 UTC (rev 7229)
@@ -6,10 +6,18 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 
+import morfologik.util.BufferUtils;
+
 import com.sun.jna.Native;
 import com.sun.jna.Pointer;
 import com.sun.jna.ptr.PointerByReference;
@@ -245,7 +253,19 @@
                private String encoding;
        
 
+               private final CharsetEncoder encoder;
+
                /**
+               * Charset decoder for hunspell.
+               */
+                private final CharsetDecoder decoder;
+
+                
+                ByteBuffer bytes = ByteBuffer.allocate(0);
+                
+                CharBuffer charBuffer = CharBuffer.allocate(0);
+               
+               /**
                 * Creates an instance of the dictionary.
                 * @param baseFileName the base name of the dictionary, 
                 */
@@ -262,6 +282,13 @@
            
                        hunspellDict = hsl.Hunspell_create(aff.toString(), 
dic.toString());
                        encoding = hsl.Hunspell_get_dic_encoding(hunspellDict);
+                       
+                       Charset charset = Charset.forName(encoding);
+                       
+                       encoder = charset.newEncoder();
+               decoder = charset.newDecoder()
+                           .onMalformedInput(CodingErrorAction.REPORT)
+                           .onUnmappableCharacter(CodingErrorAction.REPORT);
 
                        // This will blow up if the encoding doesn't exist
                        stringToBytes("test"); 
@@ -284,7 +311,16 @@
                 */
                public boolean misspelled(String word) {
                        try {
-                               return hsl.Hunspell_spell(hunspellDict, 
stringToBytes(word)) == 0;
+                           if (hsl.Hunspell_spell(hunspellDict, 
stringToBytes(word)) == 0) {
+                           byte[] arr = stringToBytes(word);
+                           for (int i = 0; i < arr.length; i++) {
+                               System.err.print(arr[i]);
+                               System.err.print(' ');
+                           } System.err.println();
+                           
+                               return true;
+                           }
+                           return false;
                        } catch (UnsupportedEncodingException e) {
                                return true; // this should probably never 
happen.
                        }
@@ -296,7 +332,40 @@
                 */
                protected byte[] stringToBytes(String str)
                        throws UnsupportedEncodingException {
-                       return (str+"\u0000").getBytes(encoding);
+                       return (str+"\u0000").getBytes(encoding); 
+                       
+                   /*
+                   bytes.clear();                   
+                   charBuffer.clear();
+               
+               if ("UTF-8".equals(encoding)) {
+                   charBuffer = BufferUtils.ensureCapacity(charBuffer, 
str.length() + 4);
+                   charBuffer.put((char)0xEF);
+                   charBuffer.put((char)0xBB);
+                   charBuffer.put((char)0xBF);
+                   
+               } else {
+               charBuffer = BufferUtils.ensureCapacity(charBuffer, 
str.length() + 1);          
+               }
+               for (int i = 0; i < str.length(); i++) {
+                char chr = str.charAt(i);
+                charBuffer.put(chr);
+               }
+               charBuffer.put('\u0000');
+               charBuffer.flip();
+               final int maxCapacity = (int) (charBuffer.remaining() * encoder
+                       .maxBytesPerChar());
+               if (bytes.capacity() <= maxCapacity) {
+                   bytes = ByteBuffer.allocate(maxCapacity);
+               }
+
+               charBuffer.mark();
+               encoder.reset();
+               encoder.encode(charBuffer, bytes, true);
+               bytes.flip();
+               charBuffer.reset();
+               return bytes.array();
+               */
                }
 
                /**

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
     2012-06-03 22:08:18 UTC (rev 7228)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
     2012-06-04 09:32:38 UTC (rev 7229)
@@ -41,6 +41,9 @@
 /**
  * A hunspell-based spellchecking-rule.
  * 
+ * The default dictionary is set to the first country variant on the list - so 
the order
+   in the Language class declaration is important!
+ * 
  * @author Marcin Miłkowski
  * 
  */
@@ -56,11 +59,6 @@
                super(messages, language);
                super.setCategory(new 
Category(messages.getString("category_typo")));
 
-               // TODO: currently, the default dictionary is now
-               // set to the first country variant on the list - so the order
-               // in the Language class declaration is important!
-               // we might support country variants in the near future
-
                final String langCountry = language.getShortName()
                                + "_" 
                                + language.getCountryVariants()[0]; 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[7229] trunk/JLanguageTool/src/java/org/ languagetool/rules/spelling/hunspell

Reply via email to