Revision: 6104
http://languagetool.svn.sourceforge.net/languagetool/?rev=6104&view=rev
Author: dnaber
Date: 2011-12-28 15:15:29 +0000 (Wed, 28 Dec 2011)
Log Message:
-----------
support checking of random pages (using "random:xx" where xx is the language
code)
Modified Paths:
--------------
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp
Modified:
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
===================================================================
---
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
2011-12-28 11:51:56 UTC (rev 6103)
+++
trunk/ltcommunity/grails-app/controllers/org/languagetool/WikiCheckController.groovy
2011-12-28 15:15:29 UTC (rev 6104)
@@ -22,6 +22,8 @@
import org.languagetool.dev.wikipedia.WikipediaQuickCheck
import org.languagetool.dev.wikipedia.WikipediaQuickCheckResult
import org.apache.commons.io.IOUtils
+import java.util.regex.Pattern
+import java.util.regex.Matcher
class WikiCheckController extends BaseController {
@@ -29,6 +31,8 @@
private static final List<String> DEFAULT_DISABLED_RULES =
Arrays.asList("WHITESPACE_RULE", "UNPAIRED_BRACKETS",
"UPPERCASE_SENTENCE_START", "COMMA_PARENTHESIS_WHITESPACE")
private static final Map<String,List<String>> LANG_TO_DISABLED_RULES = new
HashMap<String, List<String>>()
+
+ private static final Pattern XML_TITLE_PATTERN =
Pattern.compile("title=\"(.*?)\"")
static {
LANG_TO_DISABLED_RULES.put("en", Arrays.asList("EN_QUOTES"))
@@ -46,13 +50,13 @@
throw new Exception("You clicked the WikiCheck bookmarklet - this
link only works when you put it in your bookmarks and call the bookmark while
you're on a Wikipedia page")
}
WikipediaQuickCheck checker = new WikipediaQuickCheck()
- checker.validateWikipediaUrl(new URL(params.url))
- URL plainTextUrl = new URL(CONVERT_URL_PREFIX + params.url)
+ String pageUrl = getPageUrl(params, checker)
+ URL plainTextUrl = new URL(CONVERT_URL_PREFIX + pageUrl.replace(' ',
'_'))
String plainText = download(plainTextUrl)
if (plainText == '') {
- throw new Exception("No Wikipedia page content found at the given URL")
+ throw new Exception("No Wikipedia page content found at the given URL:
" + plainTextUrl + " (page url: " + pageUrl + ")")
}
- Language language = checker.getLanguage(new URL(params.url))
+ Language language = checker.getLanguage(new URL(pageUrl))
if (params.disabled) {
checker.setDisabledRuleIds(Arrays.asList(params.disabled.split(",")))
} else {
@@ -69,13 +73,31 @@
log.info("WikiCheck: ${params.url} (${runTime}ms)")
[result: result, matches: result.getRuleMatches(), textToCheck:
result.getText(),
lang: result.getLanguageCode(),
- url: params.url, disabledRuleIds: checker.getDisabledRuleIds(),
+ url: params.url,
+ realUrl: pageUrl,
+ disabledRuleIds: checker.getDisabledRuleIds(),
plainText: plainText]
} else {
[]
}
}
-
+
+ private String getPageUrl(params, WikipediaQuickCheck checker) {
+ String pageUrl
+ if (params.url.startsWith("random:")) {
+ String lang = params.url.substring("random:".length())
+ if (lang.length() < 2 || lang.length() > 3) {
+ throw new Exception("Invalid language: " + lang)
+ }
+ URL randomUrl = new URL("http://" + lang +
".wikipedia.org/w/api.php?action=query&list=random&rnnamespace=0&rnlimit=1&format=xml")
+ pageUrl = "http://" + lang + ".wikipedia.org/wiki/" +
getRandomPageTitle(randomUrl)
+ } else {
+ checker.validateWikipediaUrl(new URL(params.url))
+ pageUrl = params.url
+ }
+ return pageUrl
+ }
+
private String download(final URL url) throws IOException {
final HttpURLConnection connection =
(HttpURLConnection)url.openConnection()
if (connection.getResponseCode() != 200) {
@@ -91,4 +113,11 @@
}
}
+ private String getRandomPageTitle(final URL randomUrl) throws IOException {
+ final String content = download(randomUrl)
+ final Matcher matcher = XML_TITLE_PATTERN.matcher(content)
+ matcher.find()
+ return matcher.group(1)
+ }
+
}
Modified: trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp
===================================================================
--- trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp 2011-12-28
11:51:56 UTC (rev 6103)
+++ trunk/ltcommunity/grails-app/views/wikiCheck/index.gsp 2011-12-28
15:15:29 UTC (rev 6104)
@@ -41,7 +41,7 @@
<h2 style="margin-top:10px;margin-bottom:10px">Result</h2>
- <p>URL: <a
href="${url.encodeAsHTML()}">${url.encodeAsHTML()}</a></p>
+ <p>URL: <a
href="${realUrl.encodeAsHTML()}">${realUrl.encodeAsHTML()}</a></p>
<br />
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Write once. Port to many.
Get the SDK and tools to simplify cross-platform app development. Create
new or port existing apps to sell to consumers worldwide. Explore the
Intel AppUpSM program developer opportunity. appdeveloper.intel.com/join
http://p.sf.net/sfu/intel-appdev
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs