jenkins-bot has submitted this change and it was merged. Change subject: Use pronunciation file URL instead of page URL ......................................................................
Use pronunciation file URL instead of page URL Bug: T117944 Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061 --- M lib/parseProperty.js M routes/mobile-html-sections.js M test/features/mobile-html-sections-lead/pagecontent.js A test/features/mobile-html-sections-lead/parse-pronunciation-test.js 4 files changed, 64 insertions(+), 11 deletions(-) Approvals: BearND: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/parseProperty.js b/lib/parseProperty.js index 170baa1..3a699a8 100644 --- a/lib/parseProperty.js +++ b/lib/parseProperty.js @@ -21,16 +21,41 @@ return mUtil.defaultVal(mUtil.filterEmpty(table)); } -function ensureUrlWithDomain(url, domain) { - if (url.indexOf("//") === -1) { - url = "//" + domain + url; - } - return url; +function parsePronunciationFilePageUrls(doc) { + var ipaFileWithExtensionPageUrlSelector = 'span.IPA a[href*=.]'; + return doc.querySelectorAll(ipaFileWithExtensionPageUrlSelector) + .map(function(anchor) { + return anchor.getAttribute('href'); + }); } -function parsePronunciation(doc, domain) { - var anchor = doc.querySelector('span.IPA a[href]'); - return anchor && { 'url': ensureUrlWithDomain(anchor.getAttribute('href'), domain) }; +function filePageUrlToFilename(url) { + return url && url.replace(/^.*wiki\/File:/, ''); +} + +function filePageUrlToFileUrlSelector(filePageUrl) { + return filePageUrl && 'a[href^=//][href$=/' + filePageUrlToFilename(filePageUrl) + ']'; +} + +function pickPronunciationFilePageUrl(urls, title) { + if (urls.length) { + // Filenames may contain different delimiters than the page title uses. + // Increases chances of success for this crude heuristic by allowing + // any delimiter. e.g., "Molecular biology" may match + // "en-us-molecular-biology.mp3". + var titleRegExp = new RegExp(title.replace(/[\s_-]/g, '.?'), 'i'); + return urls.find(function(url) { + return titleRegExp.test(url); + }) || urls[0]; + } +} + +function parsePronunciation(doc, title) { + var pageUrls = parsePronunciationFilePageUrls(doc); + var pageUrl = pickPronunciationFilePageUrl(pageUrls, title); + var selector = filePageUrlToFileUrlSelector(pageUrl); + var url = pageUrl && doc.querySelector(selector).getAttribute('href'); + return url && { url: url }; } /** @@ -90,5 +115,6 @@ parseInfobox: parseInfobox, parsePronunciation: parsePronunciation, parseSpokenWikipedia: parseSpokenWikipedia, - parseGeo: parseGeo + parseGeo: parseGeo, + _pickPronunciationFilePageUrl: pickPronunciationFilePageUrl }; diff --git a/routes/mobile-html-sections.js b/routes/mobile-html-sections.js index ca2c9fc..a2a0e67 100644 --- a/routes/mobile-html-sections.js +++ b/routes/mobile-html-sections.js @@ -132,7 +132,7 @@ })), extract: input.extract && parseExtract(input.extract.body), infobox: parse.parseInfobox(lead), - pronunciation: parse.parsePronunciation(lead, domain), + pronunciation: parse.parsePronunciation(lead, input.meta.displaytitle), spoken: input.page.spoken, geo: input.page.geo, sections: buildLeadSections(input.page.sections), diff --git a/test/features/mobile-html-sections-lead/pagecontent.js b/test/features/mobile-html-sections-lead/pagecontent.js index 1d1aa86..de77774 100644 --- a/test/features/mobile-html-sections-lead/pagecontent.js +++ b/test/features/mobile-html-sections-lead/pagecontent.js @@ -102,7 +102,7 @@ .then(function(res) { var lead = res.body; assert.deepEqual(res.status, 200); - assert.deepEqual(lead.pronunciation.url, '//en.wikipedia.org/wiki/File:En-us-Barack-Hussein-Obama.ogg'); + assert.deepEqual(lead.pronunciation.url, '//upload.wikimedia.org/wikipedia/commons/8/82/En-us-Barack-Hussein-Obama.ogg'); }); }); it('en Main page should have at least one image', function() { diff --git a/test/features/mobile-html-sections-lead/parse-pronunciation-test.js b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js new file mode 100644 index 0000000..2b6bc91 --- /dev/null +++ b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js @@ -0,0 +1,27 @@ +'use strict'; +/* global describe, it */ + +var assert = require('../../utils/assert.js'); +var pickPronunciatonFilePageUrl = require('../../../lib/parseProperty.js')._pickPronunciationFilePageUrl; + +describe('pickPronunciationFilePageUrl', function() { + function padExpectedUrl(expected) { + return ['/wiki/File:dummy0.ogg', expected, '/wiki/File:dummy1.ogg']; + } + + it('spaces in title should not effect choice', function() { + var expected = '/wiki/File:en-us-United-Arab-Emirates.ogg'; + var urls = padExpectedUrl(expected); + var title = 'United Arab Emirates'; + var result = pickPronunciatonFilePageUrl(urls, title); + assert.deepEqual(result, expected); + }); + + it('subset of filename should not effect choice', function() { + var expected = '/wiki/File:República_de_Cuba.ogg'; + var urls = padExpectedUrl(expected); + var title = 'Cuba'; + var result = pickPronunciatonFilePageUrl(urls, title); + assert.deepEqual(result, expected); + }); +}); \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/252448 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Bgerstle <bgers...@wikimedia.org> Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits