Author: michiel
Date: 2009-05-27 22:18:46 +0200 (Wed, 27 May 2009)
New Revision: 35454
Modified:
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
Log:
Updated to tm-extractor 1.0
Modified:
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
===================================================================
---
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
2009-05-27 20:05:24 UTC (rev 35453)
+++
mmbase/trunk/contributions/lucene/src/org/mmbase/module/lucene/extraction/impl/TextMiningExtractor.java
2009-05-27 20:18:46 UTC (rev 35454)
@@ -16,13 +16,15 @@
import java.io.InputStream;
import org.mmbase.module.lucene.extraction.Extractor;
import org.mmbase.util.logging.*;
-import org.textmining.text.extraction.WordExtractor;
+import org.textmining.extraction.TextExtractor;
+import org.textmining.extraction.word.WordTextExtractorFactory;
/**
* Use textmining lib to extract text from a Word document
- *
+ *
* @author Wouter Heijke
- * @version $Revision: 1.1 $
+ * @author Michiel Meeuwissen
+ * @version $Id$
*/
public class TextMiningExtractor implements Extractor {
private static final Logger log =
Logging.getLoggerInstance(TextMiningExtractor.class);
@@ -39,7 +41,8 @@
public String extract(InputStream input) throws Exception {
log.debug("extract stream");
- WordExtractor extractor = new WordExtractor();
- return extractor.extractText(input);
+ WordTextExtractorFactory factory = new WordTextExtractorFactory();
+ TextExtractor extractor = factory.textExtractor(input);
+ return extractor.getText().trim();
}
}
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs