jenkins-bot has submitted this change and it was merged.

Change subject: Use pronunciation file URL instead of page URL
......................................................................


Use pronunciation file URL instead of page URL

Bug: T117944
Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061
---
M lib/parseProperty.js
M routes/mobile-html-sections.js
M test/features/mobile-html-sections-lead/pagecontent.js
A test/features/mobile-html-sections-lead/parse-pronunciation-test.js
4 files changed, 64 insertions(+), 11 deletions(-)

Approvals:
  BearND: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/parseProperty.js b/lib/parseProperty.js
index 170baa1..3a699a8 100644
--- a/lib/parseProperty.js
+++ b/lib/parseProperty.js
@@ -21,16 +21,41 @@
     return mUtil.defaultVal(mUtil.filterEmpty(table));
 }
 
-function ensureUrlWithDomain(url, domain) {
-    if (url.indexOf("//") === -1) {
-        url = "//" + domain + url;
-    }
-    return url;
+function parsePronunciationFilePageUrls(doc) {
+    var ipaFileWithExtensionPageUrlSelector = 'span.IPA a[href*=.]';
+    return doc.querySelectorAll(ipaFileWithExtensionPageUrlSelector)
+        .map(function(anchor) {
+            return anchor.getAttribute('href');
+        });
 }
 
-function parsePronunciation(doc, domain) {
-    var anchor = doc.querySelector('span.IPA a[href]');
-    return anchor && { 'url': ensureUrlWithDomain(anchor.getAttribute('href'), 
domain) };
+function filePageUrlToFilename(url) {
+    return url && url.replace(/^.*wiki\/File:/, '');
+}
+
+function filePageUrlToFileUrlSelector(filePageUrl) {
+    return filePageUrl && 'a[href^=//][href$=/' + 
filePageUrlToFilename(filePageUrl) + ']';
+}
+
+function pickPronunciationFilePageUrl(urls, title) {
+    if (urls.length) {
+        // Filenames may contain different delimiters than the page title uses.
+        // Increases chances of success for this crude heuristic by allowing
+        // any delimiter. e.g., "Molecular biology" may match
+        // "en-us-molecular-biology.mp3".
+        var titleRegExp = new RegExp(title.replace(/[\s_-]/g, '.?'), 'i');
+        return urls.find(function(url) {
+            return titleRegExp.test(url);
+        }) || urls[0];
+    }
+}
+
+function parsePronunciation(doc, title) {
+    var pageUrls = parsePronunciationFilePageUrls(doc);
+    var pageUrl = pickPronunciationFilePageUrl(pageUrls, title);
+    var selector = filePageUrlToFileUrlSelector(pageUrl);
+    var url = pageUrl && doc.querySelector(selector).getAttribute('href');
+    return url && { url: url };
 }
 
 /**
@@ -90,5 +115,6 @@
     parseInfobox: parseInfobox,
     parsePronunciation: parsePronunciation,
     parseSpokenWikipedia: parseSpokenWikipedia,
-    parseGeo: parseGeo
+    parseGeo: parseGeo,
+    _pickPronunciationFilePageUrl: pickPronunciationFilePageUrl
 };
diff --git a/routes/mobile-html-sections.js b/routes/mobile-html-sections.js
index ca2c9fc..a2a0e67 100644
--- a/routes/mobile-html-sections.js
+++ b/routes/mobile-html-sections.js
@@ -132,7 +132,7 @@
         })),
         extract: input.extract && parseExtract(input.extract.body),
         infobox: parse.parseInfobox(lead),
-        pronunciation: parse.parsePronunciation(lead, domain),
+        pronunciation: parse.parsePronunciation(lead, input.meta.displaytitle),
         spoken: input.page.spoken,
         geo: input.page.geo,
         sections: buildLeadSections(input.page.sections),
diff --git a/test/features/mobile-html-sections-lead/pagecontent.js 
b/test/features/mobile-html-sections-lead/pagecontent.js
index 1d1aa86..de77774 100644
--- a/test/features/mobile-html-sections-lead/pagecontent.js
+++ b/test/features/mobile-html-sections-lead/pagecontent.js
@@ -102,7 +102,7 @@
             .then(function(res) {
                 var lead = res.body;
                 assert.deepEqual(res.status, 200);
-                assert.deepEqual(lead.pronunciation.url, 
'//en.wikipedia.org/wiki/File:En-us-Barack-Hussein-Obama.ogg');
+                assert.deepEqual(lead.pronunciation.url, 
'//upload.wikimedia.org/wikipedia/commons/8/82/En-us-Barack-Hussein-Obama.ogg');
             });
     });
     it('en Main page should have at least one image', function() {
diff --git 
a/test/features/mobile-html-sections-lead/parse-pronunciation-test.js 
b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js
new file mode 100644
index 0000000..2b6bc91
--- /dev/null
+++ b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js
@@ -0,0 +1,27 @@
+'use strict';
+/* global describe, it */
+
+var assert = require('../../utils/assert.js');
+var pickPronunciatonFilePageUrl = 
require('../../../lib/parseProperty.js')._pickPronunciationFilePageUrl;
+
+describe('pickPronunciationFilePageUrl', function() {
+    function padExpectedUrl(expected) {
+        return ['/wiki/File:dummy0.ogg', expected, '/wiki/File:dummy1.ogg'];
+    }
+
+    it('spaces in title should not effect choice', function() {
+        var expected = '/wiki/File:en-us-United-Arab-Emirates.ogg';
+        var urls = padExpectedUrl(expected);
+        var title = 'United Arab Emirates';
+        var result = pickPronunciatonFilePageUrl(urls, title);
+        assert.deepEqual(result, expected);
+    });
+
+    it('subset of filename should not effect choice', function() {
+        var expected = '/wiki/File:República_de_Cuba.ogg';
+        var urls = padExpectedUrl(expected);
+        var title = 'Cuba';
+        var result = pickPronunciatonFilePageUrl(urls, title);
+        assert.deepEqual(result, expected);
+    });
+});
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/252448
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Niedzielski <sniedziel...@wikimedia.org>
Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org>
Gerrit-Reviewer: Bgerstle <bgers...@wikimedia.org>
Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org>
Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org>
Gerrit-Reviewer: Mhurd <mh...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to