Revision: 9732
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=9732&view=rev
Author:   dnaber
Date:     2013-03-19 21:04:24 +0000 (Tue, 19 Mar 2013)
Log Message:
-----------
wikipedia indexing: catch Exception so one problem doesn't stop the whole 
indexing process; some optimization

Modified Paths:
--------------
    
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
    
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java

Modified: 
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
===================================================================
--- 
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
       2013-03-19 21:01:48 UTC (rev 9731)
+++ 
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/SwebleWikipediaTextFilter.java
       2013-03-19 21:04:24 UTC (rev 9732)
@@ -32,14 +32,25 @@
 
   private static final int WRAP_COL = Integer.MAX_VALUE;
 
+  private final SimpleWikiConfiguration config;
+  private final Compiler compiler;
+  private final PageId pageId;
+  
+  public SwebleWikipediaTextFilter() {
+    try {
+      config = new SimpleWikiConfiguration(
+              
"classpath:/org/languagetool/resource/dev/SimpleWikiConfiguration.xml");
+      compiler = new Compiler(config);
+      final PageTitle pageTitle = PageTitle.make(config, "fileTitle");
+      pageId = new PageId(pageTitle, -1);
+    } catch (Exception e) {
+      throw new RuntimeException("Could not set up text filter", e);
+    }
+  }
+
   @Override
   public String filter(String wikiText) {
     try {
-      final SimpleWikiConfiguration config = new SimpleWikiConfiguration(
-              
"classpath:/org/languagetool/resource/dev/SimpleWikiConfiguration.xml");
-      final Compiler compiler = new Compiler(config);
-      final PageTitle pageTitle = PageTitle.make(config, "fileTitle");
-      final PageId pageId = new PageId(pageTitle, -1);
       final CompiledPage compiledPage = compiler.postprocess(pageId, wikiText, 
null);
       final TextConverter textConverter = new TextConverter(config, WRAP_COL);
       return (String) textConverter.go(compiledPage.getPage());

Modified: 
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
===================================================================
--- 
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
   2013-03-19 21:01:48 UTC (rev 9731)
+++ 
trunk/languagetool/languagetool-wikipedia/src/main/java/org/languagetool/dev/wikipedia/WikipediaIndexHandler.java
   2013-03-19 21:04:24 UTC (rev 9732)
@@ -102,9 +102,15 @@
         throw new DocumentLimitReachedException(end);
       }
       try {
-        final String textToCheck = textFilter.filter(text.toString());
-        if (!textToCheck.contains("#REDIRECT") && 
!textToCheck.trim().equals("")) {
-          indexer.index(textToCheck, false, articleCount);
+        final String textToCheck;
+        try {
+          textToCheck = textFilter.filter(text.toString());
+          if (!textToCheck.contains("#REDIRECT") && 
!textToCheck.trim().equals("")) {
+            indexer.index(textToCheck, false, articleCount);
+          }
+        } catch (Exception e) {
+          System.err.println("Exception when filtering '" + title + "' - 
skipping file. Stacktrace follows:");
+          e.printStackTrace();
         }
       } catch (Exception e) {
         throw new RuntimeException("Failed checking article " + articleCount, 
e);

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to