Revision: 6104
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6104&view=rev
Author:   dnaber
Date:     2011-12-28 15:15:29 +0000 (Wed, 28 Dec 2011)
Log Message:
-----------
support checking of random pages (using "random:xx" where xx is the language 
code)

Modified Paths:
--------------
    
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
    trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp

Modified: 
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
===================================================================
--- 
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
        2011-12-28 11:51:56 UTC (rev 6103)
+++ 
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
        2011-12-28 15:15:29 UTC (rev 6104)
@@ -22,6 +22,8 @@
 import org.languagetool.dev.wikipedia.WikipediaQuickCheck
 import org.languagetool.dev.wikipedia.WikipediaQuickCheckResult
 import org.apache.commons.io.IOUtils
+import java.util.regex.Pattern
+import java.util.regex.Matcher
 
 class WikiCheckController extends BaseController {
 
@@ -29,6 +31,8 @@
   private static final List<String> DEFAULT_DISABLED_RULES = 
     Arrays.asList("WHITESPACE_RULE", "UNPAIRED_BRACKETS", 
"UPPERCASE_SENTENCE_START", "COMMA_PARENTHESIS_WHITESPACE")
   private static final Map<String,List<String>> LANG_TO_DISABLED_RULES = new 
HashMap<String, List<String>>()
+
+  private static final Pattern XML_TITLE_PATTERN = 
Pattern.compile("title=\"(.*?)\"")
     
   static {
     LANG_TO_DISABLED_RULES.put("en", Arrays.asList("EN_QUOTES"))
@@ -46,13 +50,13 @@
           throw new Exception("You clicked the WikiCheck bookmarklet - this 
link only works when you put it in your bookmarks and call the bookmark while 
you're on a Wikipedia page")
       }
       WikipediaQuickCheck checker = new WikipediaQuickCheck()
-      checker.validateWikipediaUrl(new URL(params.url))
-      URL plainTextUrl = new URL(CONVERT_URL_PREFIX + params.url)
+      String pageUrl = getPageUrl(params, checker)
+      URL plainTextUrl = new URL(CONVERT_URL_PREFIX + pageUrl.replace(' ', 
'_'))
       String plainText = download(plainTextUrl)
       if (plainText == '') {
-        throw new Exception("No Wikipedia page content found at the given URL")
+        throw new Exception("No Wikipedia page content found at the given URL: 
" + plainTextUrl + " (page url: " + pageUrl + ")")
       }
-      Language language = checker.getLanguage(new URL(params.url))
+      Language language = checker.getLanguage(new URL(pageUrl))
       if (params.disabled) {
         checker.setDisabledRuleIds(Arrays.asList(params.disabled.split(",")))
       } else {
@@ -69,13 +73,31 @@
       log.info("WikiCheck: ${params.url} (${runTime}ms)")
       [result: result, matches: result.getRuleMatches(), textToCheck: 
result.getText(),
               lang: result.getLanguageCode(),
-              url: params.url, disabledRuleIds: checker.getDisabledRuleIds(),
+              url: params.url,
+              realUrl: pageUrl,
+              disabledRuleIds: checker.getDisabledRuleIds(),
               plainText: plainText]
     } else {
       []
     }
   }
-    
+
+  private String getPageUrl(params, WikipediaQuickCheck checker) {
+    String pageUrl
+    if (params.url.startsWith("random:")) {
+      String lang = params.url.substring("random:".length())
+      if (lang.length() < 2 || lang.length() > 3) {
+        throw new Exception("Invalid language: " + lang)
+      }
+      URL randomUrl = new URL("http://"; + lang + 
".wikipedia.org/w/api.php?action=query&list=random&rnnamespace=0&rnlimit=1&format=xml")
+      pageUrl = "http://"; + lang + ".wikipedia.org/wiki/" + 
getRandomPageTitle(randomUrl)
+    } else {
+      checker.validateWikipediaUrl(new URL(params.url))
+      pageUrl = params.url
+    }
+    return pageUrl
+  }
+
   private String download(final URL url) throws IOException {
     final HttpURLConnection connection = 
(HttpURLConnection)url.openConnection()
     if (connection.getResponseCode() != 200) {
@@ -91,4 +113,11 @@
     }
   }
 
+  private String getRandomPageTitle(final URL randomUrl) throws IOException {
+    final String content = download(randomUrl)
+    final Matcher matcher = XML_TITLE_PATTERN.matcher(content)
+    matcher.find()
+    return matcher.group(1)
+  }
+
 }

Modified: trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp
===================================================================
--- trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp      2011-12-28 
11:51:56 UTC (rev 6103)
+++ trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp      2011-12-28 
15:15:29 UTC (rev 6104)
@@ -41,7 +41,7 @@
 
                 <h2 style="margin-top:10px;margin-bottom:10px">Result</h2>
                 
-                <p>URL: <a 
href="${url.encodeAsHTML()}">${url.encodeAsHTML()}</a></p>
+                <p>URL: <a 
href="${realUrl.encodeAsHTML()}">${realUrl.encodeAsHTML()}</a></p>
                 
                 <br />
                 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Write once. Port to many.
Get the SDK and tools to simplify cross-platform app development. Create 
new or port existing apps to sell to consumers worldwide. Explore the 
Intel AppUpSM program developer opportunity. appdeveloper.intel.com/join
http://p.sf.net/sfu/intel-appdev
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to