Author: michiel
Date: 2009-05-27 22:18:46 +0200 (Wed, 27 May 2009)
New Revision: 35454

Modified:
   
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
Log:
Updated to tm-extractor 1.0

Modified: 
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
===================================================================
--- 
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
     2009-05-27 20:05:24 UTC (rev 35453)
+++ 
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
     2009-05-27 20:18:46 UTC (rev 35454)
@@ -16,13 +16,15 @@
 import java.io.InputStream;
 import org.mmbase.module.lucene.extraction.Extractor;
 import org.mmbase.util.logging.*;
-import org.textmining.text.extraction.WordExtractor;
+import org.textmining.extraction.TextExtractor;
+import org.textmining.extraction.word.WordTextExtractorFactory;
 
 /**
  * Use textmining lib to extract text from a Word document
- * 
+ *
  * @author Wouter Heijke
- * @version $Revision: 1.1 $
+ * @author Michiel Meeuwissen
+ * @version $Id$
  */
 public class TextMiningExtractor implements Extractor {
     private static final Logger log = 
Logging.getLoggerInstance(TextMiningExtractor.class);
@@ -39,7 +41,8 @@
 
     public String extract(InputStream input) throws Exception {
         log.debug("extract stream");
-        WordExtractor extractor = new WordExtractor();
-        return extractor.extractText(input);
+        WordTextExtractorFactory factory = new WordTextExtractorFactory();
+        TextExtractor extractor = factory.textExtractor(input);
+        return extractor.getText().trim();
     }
 }

_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs

Reply via email to