Revision: 9732
http://languagetool.svn.sourceforge.net/languagetool/?rev=9732&view=rev
Author: dnaber
Date: 2013-03-19 21:04:24 +0000 (Tue, 19 Mar 2013)
Log Message:
-----------
wikipedia indexing: catch Exception so one problem doesn't stop the whole
indexing process; some optimization
Modified Paths:
--------------
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
Modified:
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
===================================================================
---
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
2013-03-19 21:01:48 UTC (rev 9731)
+++
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
2013-03-19 21:04:24 UTC (rev 9732)
@@ -32,14 +32,25 @@
private static final int WRAP_COL = Integer.MAX_VALUE;
+ private final SimpleWikiConfiguration config;
+ private final Compiler compiler;
+ private final PageId pageId;
+
+ public SwebleWikipediaTextFilter() {
+ try {
+ config = new SimpleWikiConfiguration(
+
"classpath:/org/languagetool/resource/dev/SimpleWikiConfiguration.xml");
+ compiler = new Compiler(config);
+ final PageTitle pageTitle = PageTitle.make(config, "fileTitle");
+ pageId = new PageId(pageTitle, -1);
+ } catch (Exception e) {
+ throw new RuntimeException("Could not set up text filter", e);
+ }
+ }
+
@Override
public String filter(String wikiText) {
try {
- final SimpleWikiConfiguration config = new SimpleWikiConfiguration(
-
"classpath:/org/languagetool/resource/dev/SimpleWikiConfiguration.xml");
- final Compiler compiler = new Compiler(config);
- final PageTitle pageTitle = PageTitle.make(config, "fileTitle");
- final PageId pageId = new PageId(pageTitle, -1);
final CompiledPage compiledPage = compiler.postprocess(pageId, wikiText,
null);
final TextConverter textConverter = new TextConverter(config, WRAP_COL);
return (String) textConverter.go(compiledPage.getPage());
Modified:
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
===================================================================
---
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
2013-03-19 21:01:48 UTC (rev 9731)
+++
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
2013-03-19 21:04:24 UTC (rev 9732)
@@ -102,9 +102,15 @@
throw new DocumentLimitReachedException(end);
}
try {
- final String textToCheck = textFilter.filter(text.toString());
- if (!textToCheck.contains("#REDIRECT") &&
!textToCheck.trim().equals("")) {
- indexer.index(textToCheck, false, articleCount);
+ final String textToCheck;
+ try {
+ textToCheck = textFilter.filter(text.toString());
+ if (!textToCheck.contains("#REDIRECT") &&
!textToCheck.trim().equals("")) {
+ indexer.index(textToCheck, false, articleCount);
+ }
+ } catch (Exception e) {
+ System.err.println("Exception when filtering '" + title + "' -
skipping file. Stacktrace follows:");
+ e.printStackTrace();
}
} catch (Exception e) {
throw new RuntimeException("Failed checking article " + articleCount,
e);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits