Niedzielski has uploaded a new change for review. https://gerrit.wikimedia.org/r/252448
Change subject: Use pronunciation file URL instead of page URL ...................................................................... Use pronunciation file URL instead of page URL Bug: T117944 Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061 --- M lib/parseProperty.js M routes/mobile-html-sections.js M test/features/mobile-html-sections-lead/pagecontent.js A test/features/mobile-html-sections-lead/parse-pronunciation-test.js 4 files changed, 57 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/48/252448/1 diff --git a/lib/parseProperty.js b/lib/parseProperty.js index 170baa1..7df6b94 100644 --- a/lib/parseProperty.js +++ b/lib/parseProperty.js @@ -21,16 +21,35 @@ return mUtil.defaultVal(mUtil.filterEmpty(table)); } -function ensureUrlWithDomain(url, domain) { - if (url.indexOf("//") === -1) { - url = "//" + domain + url; - } - return url; +function parsePronunciationFilePageUrls(doc) { + var ipaFileWithExtensionPageUrlSelector = 'span.IPA a[href*=.]'; + return doc.querySelectorAll(ipaFileWithExtensionPageUrlSelector) + .map(function(anchor) { + return anchor.getAttribute('href'); + }); } -function parsePronunciation(doc, domain) { - var anchor = doc.querySelector('span.IPA a[href]'); - return anchor && { 'url': ensureUrlWithDomain(anchor.getAttribute('href'), domain) }; +function filePageUrlToFilename(url) { + return url && url.replace(/^.*wiki\/File:/, ''); +} + +function filePageUrlToFileUrlSelector(filePageUrl) { + return filePageUrl && 'a[href^=//][href$=/' + filePageUrlToFilename(filePageUrl) + ']'; +} + +function pickPronuciationFilePageUrl(urls, title) { + var titleRegExp = new RegExp(title.replace(/[\s_-]/g, '.?'), 'i'); + return urls.find(function(url) { + return titleRegExp.test(url); + }) || urls[0]; +} + +function parsePronunciation(doc, title) { + var pageUrls = parsePronunciationFilePageUrls(doc); + var pageUrl = pickPronuciationFilePageUrl(pageUrls, title); + var selector = filePageUrlToFileUrlSelector(pageUrl); + var url = pageUrl && doc.querySelector(selector).getAttribute('href'); + return url && { url: 'https:' + url }; } /** @@ -90,5 +109,6 @@ parseInfobox: parseInfobox, parsePronunciation: parsePronunciation, parseSpokenWikipedia: parseSpokenWikipedia, - parseGeo: parseGeo + parseGeo: parseGeo, + testPickPronuciationFilePageUrl: pickPronuciationFilePageUrl }; diff --git a/routes/mobile-html-sections.js b/routes/mobile-html-sections.js index ca2c9fc..a2a0e67 100644 --- a/routes/mobile-html-sections.js +++ b/routes/mobile-html-sections.js @@ -132,7 +132,7 @@ })), extract: input.extract && parseExtract(input.extract.body), infobox: parse.parseInfobox(lead), - pronunciation: parse.parsePronunciation(lead, domain), + pronunciation: parse.parsePronunciation(lead, input.meta.displaytitle), spoken: input.page.spoken, geo: input.page.geo, sections: buildLeadSections(input.page.sections), diff --git a/test/features/mobile-html-sections-lead/pagecontent.js b/test/features/mobile-html-sections-lead/pagecontent.js index 1d1aa86..fea632f 100644 --- a/test/features/mobile-html-sections-lead/pagecontent.js +++ b/test/features/mobile-html-sections-lead/pagecontent.js @@ -102,7 +102,7 @@ .then(function(res) { var lead = res.body; assert.deepEqual(res.status, 200); - assert.deepEqual(lead.pronunciation.url, '//en.wikipedia.org/wiki/File:En-us-Barack-Hussein-Obama.ogg'); + assert.deepEqual(lead.pronunciation.url, 'https://upload.wikimedia.org/wikipedia/commons/8/82/En-us-Barack-Hussein-Obama.ogg'); }); }); it('en Main page should have at least one image', function() { diff --git a/test/features/mobile-html-sections-lead/parse-pronunciation-test.js b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js new file mode 100644 index 0000000..cb645cb --- /dev/null +++ b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js @@ -0,0 +1,26 @@ +'use strict'; +var assert = require('../../utils/assert.js'); +var pickPronunciatonFilePageUrl = require('../../../lib/parseProperty.js').testPickPronuciationFilePageUrl; + +//update android code +describe('pickPronuciationFilePageUrl', function() { + function padExpectedUrl(expected) { + return ['/wiki/File:dummy0.ogg', expected, '/wiki/File:dummy1.ogg']; + } + + it('spaces in title should not effect choice', function() { + var expected = '/wiki/File:en-us-United-Arab-Emirates.ogg'; + var urls = padExpectedUrl(expected); + var title = 'United Arab Emirates'; + var result = pickPronunciatonFilePageUrl(urls, title) + assert.deepEqual(result, expected); + }); + + it('subset of filename should not effect choice', function() { + var expected = '/wiki/File:República_de_Cuba.ogg'; + var urls = padExpectedUrl(expected); + var title = 'Cuba'; + var result = pickPronunciatonFilePageUrl(urls, title) + assert.deepEqual(result, expected); + }); +}); \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/252448 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Niedzielski <sniedziel...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits