jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/340800 )
Change subject: Spike: Which language projects support FeaturedFeed extension ...................................................................... Spike: Which language projects support FeaturedFeed extension This script uses FeaturedFeeds extension pages to get various featured items: tfa, dyk, potd, motd, qotd, wotd, fwotd. For tfa it gets the title of the featured article. For others it gets the full HTML snippet. It checks all languages to see for which languages we could get one of these added to the aggregated feed. The page titles are taken from https://github.com/wikimedia/operations-mediawiki-config/blob/master/wmf-config/FeaturedFeedsWMF.php#L47-L73 (must be in MediaWiki namespace) There are actually three modes in this script. To change the mode you currently need to modify the script. * wikitext of the setup page (e.g. MediaWiki:Ffeed-wotd-page) * expanded templates actually expanded twice: e.g. '{{ {{ MediaWiki:Ffeed-wotd-page }} }}' * retrieving the RSS feed using action=featuredfeed Bug: T148680 Change-Id: I4fbb160de1be7204454cb252c1531ba03d9201fe --- M package.json A scripts/check-featured-feed.js A static/wikiquotes.json A static/wiktionaries.json 4 files changed, 488 insertions(+), 0 deletions(-) Approvals: Gergő Tisza: Looks good to me, but someone else must approve jenkins-bot: Verified Mholloway: Looks good to me, approved diff --git a/package.json b/package.json index bf5c027..e03e1b2 100644 --- a/package.json +++ b/package.json @@ -71,6 +71,7 @@ "mocha-jshint": "^2.3.1", "mocha-lcov-reporter": "^1.2.0", "nsp": "^2.6.2", + "rss-parser": "^2.5.2", "sepia": "^2.0.1" }, "deploy": { diff --git a/scripts/check-featured-feed.js b/scripts/check-featured-feed.js new file mode 100755 index 0000000..8d4b850 --- /dev/null +++ b/scripts/check-featured-feed.js @@ -0,0 +1,222 @@ +#!/usr/bin/env node + +'use strict'; + +const BBPromise = require('bluebird'); +const domino = require('domino'); +const preq = require('preq'); +const parseRssUrl = BBPromise.promisify(require('rss-parser').parseURL); +const underscore = require('underscore'); + +// +// LANGUAGES SECTION start +// +const wikiquoteLanguages = require('../static/wikiquotes.json'); +const wiktionaryLanguages = require('../static/wiktionaries.json'); +const wikipediaLanguagesRawList = require('../static/languages_list.json'); + +const prepareWikipediaLanguageCodes = () => { + delete wikipediaLanguagesRawList['Simplified Chinese']; // skip lang variants + delete wikipediaLanguagesRawList['Traditional Chinese']; + return underscore.values(wikipediaLanguagesRawList).sort(); +}; +const wikipediaLanguages = prepareWikipediaLanguageCodes(); +// +// LANGUAGES SECTION end +// + +const ModeEnum = Object.freeze({ wikitext: 0, expandTemplates: 1, rssFeed: 2 }); +const mode = ModeEnum.rssFeed; + +const candidates = []; +const errNotConfigured = []; +const errElse = []; +const errHttp = []; + + +const cleanUpHtml = html => html.replace(/<!--[\s\S]*?-->/mg, '').replace(/\n/mg, ''); + +/* eslint-disable no-console */ + +const lookForBoldAnchor = (document, projectLang, html) => { + const boldAnchor = document.querySelector('b > a, a > b'); + if (boldAnchor) { + const tfaTitle = boldAnchor.getAttribute('title'); + console.log(`${projectLang}: ${tfaTitle}`); + candidates.push(projectLang); + } else { + console.log(`${projectLang}: ERROR: bold anchor not found: else: ${html}`); + errElse.push(projectLang); + } +}; + +const getWikitext = (projectLang, feature) => { + const baseUri = `https://${projectLang}.${feature.projectFamily}.org`; + const queryParams1 = `action=query&format=json&prop=revisions&rvprop=content`; + const queryParams2 = `&titles=MediaWiki%3A${feature.title}`; + const uri = `${baseUri}/w/api.php?${queryParams1}${queryParams2}`; + + return preq.get({ uri }) + .then((rsp) => { + const pages = rsp.body.query && rsp.body.query.pages; + if (!pages) { + errNotConfigured.push(projectLang); + return; + } + const firstPageKey = Object.keys(pages)[0]; + if (Object.prototype.hasOwnProperty.call(pages[firstPageKey], 'missing')) { + errNotConfigured.push(projectLang); + return; + } + + const wikiText = pages[firstPageKey].revisions[0]['*']; + if (wikiText) { + console.log(`${projectLang}: ${wikiText}`); + candidates.push(projectLang); + } else { + errNotConfigured.push(projectLang); + } + }) + .catch((err) => { + console.log(`${projectLang} ERROR: ${err}`); + errHttp.push(projectLang); + }); +}; + +const doubleExpandTemplate = (projectLang, feature) => { + const baseUri = `https://${projectLang}.${feature.projectFamily}.org`; + const wikiText = encodeURIComponent( + `{{#ifexist:{{int:${feature.title}}}|{{ {{int:${feature.title}}} }} }}`); + const queryParams = `action=parse&format=json&prop=text&contentmodel=wikitext&text=${wikiText}`; + const uri = `${baseUri}/w/api.php?${queryParams}`; + + return preq.get({ uri }) + .then((rsp) => { + const html = cleanUpHtml(rsp.body.parse.text['*']); + if (html === '') { + errNotConfigured.push(projectLang); + } else if (feature.shouldLookForBoldAnchor) { + lookForBoldAnchor(domino.createDocument(html), projectLang, html); + } else { + console.log(`${projectLang}: ${html}`); + candidates.push(projectLang); + } + }) + .catch((err) => { + console.log(`${projectLang} ERROR: ${err}`); + errHttp.push(projectLang); + }); +}; + +const getRssFeed = (projectLang, feature) => { + const baseUri = `https://${projectLang}.${feature.projectFamily}.org`; + const queryParams = `action=featuredfeed&feed=${feature.feedName}&feedformat=rss`; + const uri = `${baseUri}/w/api.php?${queryParams}`; + const contentPool = []; + const indices = []; + + return parseRssUrl(uri) + .then((response) => { + let languageCode; + // console.log(`${projectLang}: ${response.feed.title}`); + response.feed.entries.forEach((entry) => { + const index = underscore.indexOf(contentPool, entry.content); + // console.log(`${entry.title}: ${index}: ${entry.link}`); + if (index >= 0) { + indices.push(index); + } else { + contentPool.push(entry.content); + indices.push(contentPool.length - 1); + } + languageCode = entry.link.substr(entry.link.lastIndexOf('/') + 1); + }); + // in a few instances the languageCode differs from the projectLang (Example: no/nb) + console.log(`${projectLang}/${languageCode}: ${indices}: ${uri}`); + candidates.push(projectLang); + }) + .catch((err) => { + if (err.message === 'RSS 1.0 parsing not yet implemented.') { + errNotConfigured.push(projectLang); + } else { + console.log(`${projectLang}: ERROR: ${uri}: ${err}`); + errHttp.push(projectLang); + } + }); +}; + +const printResultSummary = () => { + console.log(`candidates: ${candidates.length}: ${JSON.stringify(candidates.sort())}`); + console.log(`errNotConfigured: ${errNotConfigured.length}: ${errNotConfigured.sort()}`); + console.log(`errElse: ${errElse.length}: ${errElse.sort()}`); + console.log(`retryQueue: ${errHttp.length}: ${errHttp.sort()}`); +}; + +const processAllLanguages = (feature) => { + BBPromise.map(feature.projectLangs, (projectLang) => { + if (mode === ModeEnum.expandTemplates) { + return doubleExpandTemplate(projectLang, feature); + } else if (mode === ModeEnum.wikitext) { + return getWikitext(projectLang, feature); + } else { + return getRssFeed(projectLang, feature); + } + }, { concurrency: 1 }) + .then(() => { + printResultSummary(); + }); +}; + +// MAIN +const featureMap = { + tfa: { + projectFamily: 'wikipedia', + projectLangs: wikipediaLanguages, + feedName: 'featured', + title: 'Ffeed-featured-page', + shouldLookForBoldAnchor: true + }, + dyk: { + projectFamily: 'wikipedia', + projectLangs: wikipediaLanguages, + feedName: 'dyk', + title: 'Ffeed-dyk-page' + }, + potd: { + projectFamily: 'wikipedia', + projectLangs: wikipediaLanguages, + feedName: 'potd', + title: 'Ffeed-potd-page', + }, + motd: { + projectFamily: 'wikipedia', + projectLangs: wikipediaLanguages, + feedName: 'motd', + title: 'Ffeed-motd-page' + }, + qotd: { + projectFamily: 'wikiquote', + projectLangs: wikiquoteLanguages, + feedName: 'qotd', + title: 'Ffeed-qotd-page' + }, + wotd: { + projectFamily: 'wiktionary', + projectLangs: wiktionaryLanguages, + feedName: 'wotd', + title: 'Ffeed-wotd-page' + }, + fwotd: { + projectFamily: 'wiktionary', + projectLangs: wiktionaryLanguages, + feedName: 'fwotd', + title: 'Ffeed-fwotd-page' + } +}; + +const feature = featureMap[process.argv[2]]; +if (feature) { + processAllLanguages(feature); +} else { + console.error(`Error: need to specify one of ${Object.keys(featureMap)}!`); + process.exit(-1); +} diff --git a/static/wikiquotes.json b/static/wikiquotes.json new file mode 100644 index 0000000..c011f44 --- /dev/null +++ b/static/wikiquotes.json @@ -0,0 +1,91 @@ +[ +"af", +"als", +"am", +"ang", +"ar", +"ast", +"az", +"be", +"bg", +"bm", +"br", +"bs", +"ca", +"co", +"cr", +"cs", +"cy", +"da", +"de", +"el", +"en", +"eo", +"es", +"et", +"eu", +"fa", +"fi", +"fr", +"ga", +"gl", +"gu", +"he", +"hi", +"hr", +"hu", +"hy", +"id", +"is", +"it", +"ja", +"ka", +"kk", +"kn", +"ko", +"kr", +"ks", +"ku", +"kw", +"ky", +"la", +"lb", +"li", +"lt", +"ml", +"mr", +"na", +"nds", +"nl", +"nn", +"no", +"pl", +"pt", +"qu", +"ro", +"ru", +"sa", +"simple", +"sk", +"sl", +"sq", +"sr", +"su", +"sv", +"ta", +"te", +"th", +"tk", +"tr", +"tt", +"ug", +"uk", +"ur", +"uz", +"vi", +"vo", +"wo", +"za", +"zh", +"zh-min-nan" +] diff --git a/static/wiktionaries.json b/static/wiktionaries.json new file mode 100644 index 0000000..6c61fd2 --- /dev/null +++ b/static/wiktionaries.json @@ -0,0 +1,174 @@ +[ +"aa", +"ab", +"af", +"ak", +"als", +"am", +"an", +"ang", +"ar", +"as", +"ast", +"av", +"ay", +"az", +"be", +"bg", +"bh", +"bi", +"bm", +"bn", +"bo", +"br", +"bs", +"ca", +"ch", +"chr", +"co", +"cr", +"cs", +"csb", +"cy", +"da", +"de", +"dv", +"dz", +"el", +"en", +"eo", +"es", +"et", +"eu", +"fa", +"fi", +"fj", +"fo", +"fr", +"fy", +"ga", +"gd", +"gl", +"gn", +"gu", +"gv", +"ha", +"he", +"hi", +"hr", +"hsb", +"hu", +"hy", +"ia", +"id", +"ie", +"ik", +"io", +"is", +"it", +"iu", +"ja", +"jbo", +"jv", +"ka", +"kk", +"kl", +"km", +"kn", +"ko", +"ks", +"ku", +"kw", +"ky", +"la", +"lb", +"li", +"ln", +"lo", +"lt", +"lv", +"mg", +"mh", +"mi", +"mk", +"ml", +"mn", +"mo", +"mr", +"ms", +"mt", +"my", +"na", +"nah", +"nds", +"ne", +"nl", +"nn", +"no", +"oc", +"om", +"or", +"pa", +"pi", +"pl", +"pnb", +"ps", +"pt", +"qu", +"rm", +"rn", +"ro", +"roa-rup", +"ru", +"rw", +"sa", +"sc", +"scn", +"sd", +"sg", +"sh", +"si", +"simple", +"sk", +"sl", +"sm", +"sn", +"so", +"sq", +"sr", +"ss", +"st", +"su", +"sv", +"sw", +"ta", +"te", +"tg", +"th", +"ti", +"tk", +"tl", +"tn", +"to", +"tpi", +"tr", +"ts", +"tt", +"tw", +"ug", +"uk", +"ur", +"uz", +"vec", +"vi", +"vo", +"wa", +"wo", +"xh", +"yi", +"yo", +"za", +"zh", +"zh-min-nan", +"zu" +] -- To view, visit https://gerrit.wikimedia.org/r/340800 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I4fbb160de1be7204454cb252c1531ba03d9201fe Gerrit-PatchSet: 13 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Gergő Tisza <gti...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Jhernandez <jhernan...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits