Niedzielski has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/252448

Change subject: Use pronunciation file URL instead of page URL
......................................................................

Use pronunciation file URL instead of page URL

Bug: T117944
Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061
---
M lib/parseProperty.js
M routes/mobile-html-sections.js
M test/features/mobile-html-sections-lead/pagecontent.js
A test/features/mobile-html-sections-lead/parse-pronunciation-test.js
4 files changed, 57 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/48/252448/1

diff --git a/lib/parseProperty.js b/lib/parseProperty.js
index 170baa1..7df6b94 100644
--- a/lib/parseProperty.js
+++ b/lib/parseProperty.js
@@ -21,16 +21,35 @@
     return mUtil.defaultVal(mUtil.filterEmpty(table));
 }
 
-function ensureUrlWithDomain(url, domain) {
-    if (url.indexOf("//") === -1) {
-        url = "//" + domain + url;
-    }
-    return url;
+function parsePronunciationFilePageUrls(doc) {
+    var ipaFileWithExtensionPageUrlSelector = 'span.IPA a[href*=.]';
+    return doc.querySelectorAll(ipaFileWithExtensionPageUrlSelector)
+        .map(function(anchor) {
+            return anchor.getAttribute('href');
+        });
 }
 
-function parsePronunciation(doc, domain) {
-    var anchor = doc.querySelector('span.IPA a[href]');
-    return anchor && { 'url': ensureUrlWithDomain(anchor.getAttribute('href'), 
domain) };
+function filePageUrlToFilename(url) {
+    return url && url.replace(/^.*wiki\/File:/, '');
+}
+
+function filePageUrlToFileUrlSelector(filePageUrl) {
+    return filePageUrl && 'a[href^=//][href$=/' + 
filePageUrlToFilename(filePageUrl) + ']';
+}
+
+function pickPronuciationFilePageUrl(urls, title) {
+  var titleRegExp = new RegExp(title.replace(/[\s_-]/g, '.?'), 'i');
+  return urls.find(function(url) {
+      return titleRegExp.test(url);
+  }) || urls[0];
+}
+
+function parsePronunciation(doc, title) {
+  var pageUrls = parsePronunciationFilePageUrls(doc);
+  var pageUrl = pickPronuciationFilePageUrl(pageUrls, title);
+  var selector = filePageUrlToFileUrlSelector(pageUrl);
+  var url = pageUrl && doc.querySelector(selector).getAttribute('href');
+  return url && { url: 'https:' + url };
 }
 
 /**
@@ -90,5 +109,6 @@
     parseInfobox: parseInfobox,
     parsePronunciation: parsePronunciation,
     parseSpokenWikipedia: parseSpokenWikipedia,
-    parseGeo: parseGeo
+    parseGeo: parseGeo,
+    testPickPronuciationFilePageUrl: pickPronuciationFilePageUrl
 };
diff --git a/routes/mobile-html-sections.js b/routes/mobile-html-sections.js
index ca2c9fc..a2a0e67 100644
--- a/routes/mobile-html-sections.js
+++ b/routes/mobile-html-sections.js
@@ -132,7 +132,7 @@
         })),
         extract: input.extract && parseExtract(input.extract.body),
         infobox: parse.parseInfobox(lead),
-        pronunciation: parse.parsePronunciation(lead, domain),
+        pronunciation: parse.parsePronunciation(lead, input.meta.displaytitle),
         spoken: input.page.spoken,
         geo: input.page.geo,
         sections: buildLeadSections(input.page.sections),
diff --git a/test/features/mobile-html-sections-lead/pagecontent.js 
b/test/features/mobile-html-sections-lead/pagecontent.js
index 1d1aa86..fea632f 100644
--- a/test/features/mobile-html-sections-lead/pagecontent.js
+++ b/test/features/mobile-html-sections-lead/pagecontent.js
@@ -102,7 +102,7 @@
             .then(function(res) {
                 var lead = res.body;
                 assert.deepEqual(res.status, 200);
-                assert.deepEqual(lead.pronunciation.url, 
'//en.wikipedia.org/wiki/File:En-us-Barack-Hussein-Obama.ogg');
+                assert.deepEqual(lead.pronunciation.url, 
'https://upload.wikimedia.org/wikipedia/commons/8/82/En-us-Barack-Hussein-Obama.ogg');
             });
     });
     it('en Main page should have at least one image', function() {
diff --git 
a/test/features/mobile-html-sections-lead/parse-pronunciation-test.js 
b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js
new file mode 100644
index 0000000..cb645cb
--- /dev/null
+++ b/test/features/mobile-html-sections-lead/parse-pronunciation-test.js
@@ -0,0 +1,26 @@
+'use strict';
+var assert = require('../../utils/assert.js');
+var pickPronunciatonFilePageUrl = 
require('../../../lib/parseProperty.js').testPickPronuciationFilePageUrl;
+
+//update android code
+describe('pickPronuciationFilePageUrl', function() {
+    function padExpectedUrl(expected) {
+        return ['/wiki/File:dummy0.ogg', expected, '/wiki/File:dummy1.ogg'];
+    }
+
+    it('spaces in title should not effect choice', function() {
+        var expected = '/wiki/File:en-us-United-Arab-Emirates.ogg';
+        var urls = padExpectedUrl(expected);
+        var title = 'United Arab Emirates';
+        var result = pickPronunciatonFilePageUrl(urls, title)
+        assert.deepEqual(result, expected);
+    });
+
+    it('subset of filename should not effect choice', function() {
+        var expected = '/wiki/File:República_de_Cuba.ogg';
+        var urls = padExpectedUrl(expected);
+        var title = 'Cuba';
+        var result = pickPronunciatonFilePageUrl(urls, title)
+        assert.deepEqual(result, expected);
+    });
+});
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/252448
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie8bcd2fbd1d649d5f2c402662398244b42dde061
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Niedzielski <sniedziel...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to