Dear Wiki user, You have subscribed to a wiki page or wiki category on "Lucene-hadoop Wiki" for change notification.
The following page has been changed by udanax: http://wiki.apache.org/lucene-hadoop/Hbase/HbaseShell/Ideas ------------------------------------------------------------------------------ == Hbase JDBC driver == I've started to think about Java Database Connectivity (JDBC) driver for use with Hbase. - [[BR]]I am thinking of integrating Zeroboard with this work in progress for a test case. + [[BR]]I am thinking of integrating Zeroboard with this work in progress for a test case. -- [:udanax:Edward yoon] ~-''Zeroboard is one of the most popular PHP web boards in Asia.''-~ @@ -16, +16 @@ == Korean Morphological Analyzer for lucene == + * work in progress with NLP lab. + + {{{ + package org.apache.lucene; + + import java.io.IOException; + import java.io.StringReader; + import java.util.ArrayList; + import java.util.List; + + import org.apache.lucene.analysis.Analyzer; + import org.apache.lucene.analysis.Token; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.kr.KoreanAnalyzer; + import org.apache.lucene.analysis.kr.KoreanMorphemeAnalyzer; + + public class Test { + public static void main(String[] arg) throws IOException { + Analyzer a = new KoreanMorphemeAnalyzer(); + String[] result = getMorphemeArray(a, "ì¡°ì ì 4ë ìê¸ ì¸ì¢ ì´ í민ì ìì´ë¼ë ì´ë¦ì¼ë¡ ì°½ì íìë¤"); + + for (int i = 0; i < result.length; i++) { + System.out.println(result[i]); + } + + a = new KoreanAnalyzer(); + result = getMorphemeArray(a, "ì¡°ì ì 4ë ìê¸ ì¸ì¢ ì´ í민ì ìì´ë¼ë ì´ë¦ì¼ë¡ ì°½ì íìë¤"); + + for (int i = 0; i < result.length; i++) { + System.out.println(result[i]); + } + } + + private static String[] getMorphemeArray(Analyzer a, String string) + throws IOException { + List<String> tmp = new ArrayList<String>(); + TokenStream ts = a.tokenStream("dummy", new StringReader(string)); + + for (int i = 0; true; i++) { + Token t = ts.next(); + if (t == null) { + break; + } else { + tmp.add(t.toString()); + } + } + + return tmp.toArray(new String[] {}); + } + }}} +