Mobrovac has submitted this change and it was merged.

Change subject: Validate language codes in mediawiki format
......................................................................


Validate language codes in mediawiki format

* Add fixLang method to lib/ZoteroService which
uses regex to loosely fix and validate language
codes:

1. Replaces underscores with dashes.
2. Language codes must consist of two alpha
characters with an optional group starting with
'-' and consisting of any number of alpha
characters.

* Call fixLang from within
ZoteroService.prototype.convertToMediawiki
function.

* Change OG test to verify that bad language codes
are deleted from the citation.

Bug: T93337
Change-Id: I797cc30ebc278a6b8d8a310ab68665bf7341b353
---
M lib/ZoteroService.js
M test/index.js
2 files changed, 26 insertions(+), 10 deletions(-)

Approvals:
  Mobrovac: Looks good to me, approved



diff --git a/lib/ZoteroService.js b/lib/ZoteroService.js
index d6df076..b15613a 100644
--- a/lib/ZoteroService.js
+++ b/lib/ZoteroService.js
@@ -173,7 +173,8 @@
                replaceCreators,
                addPubMedIdentifiers,
                fixISBN,
-               fixISSN
+               fixISSN,
+               fixLang
        ], function (err, citation) {
                callback([citation]);
        });
@@ -311,7 +312,7 @@
 /**
  * Replace Zotero output of CURRENT_TIMESTAMP with ISO time
  * @param  {Object}   citation     citation object
- * @param  {Function} callback     callback on citation object
+ * @param  {Function} callback     callback(error, citation)
  */
 function fixAccessDate(citation, callback){
        if (!citation.accessDate || (citation.accessDate === 
"CURRENT_TIMESTAMP")){
@@ -323,7 +324,7 @@
 /**
  * Convert String of ISSNs into an Array of ISSNs
  * @param  {Object}   citation     citation object
- * @param  {Function} callback     callback on citation object
+ * @param  {Function} callback     callback(error, citation)
  */
 function fixISSN(citation, callback){
        var match, i, reISSN,
@@ -349,7 +350,7 @@
 /**
  * Convert String of ISBNs into an Array of ISBNs
  * @param  {Object}   citation     citation object
- * @param  {Function} callback     callback on citation object
+ * @param  {Function} callback     callback(error, citation)
  */
 function fixISBN(citation, callback){
        var match, i, reISBN,
@@ -372,6 +373,21 @@
        callback(null, citation);
 }
 
+/**
+ * Validate language codes
+ * @param  {Object}   citation     citation object
+ * @param  {Function} callback     callback(error, citation)
+ */
+function fixLang(citation, callback){
+       if (citation.language) {
+               citation.language = citation.language.replace('_', '-');
+               if (!/^[a-z]{2}(?:-?[a-z]{2,})*$/i.test(citation.language)){
+                       delete citation.language;
+               }
+       }
+       callback(null, citation);
+}
+
 /* Exports */
 module.exports = ZoteroService;
 
diff --git a/test/index.js b/test/index.js
index 95ad698..cfd87c5 100644
--- a/test/index.js
+++ b/test/index.js
@@ -206,7 +206,6 @@
        });
 });
 
-
 describe('websiteTitle', function() {
 
        var opts = {
@@ -232,23 +231,24 @@
                });
        });
 });
-describe('scrape open graph', function() {
+
+describe('invalid language code', function() {
 
        var opts = {
-               search : 
'http://www.pbs.org/newshour/making-sense/care-peoples-kids/',
+               search : 'http://www.ncbi.nlm.nih.gov/pubmed/23555203',
                format : 'mediawiki',
                acceptLanguage : 'en'
                };
 
-       it('should correctly scrape open graph data', function(done) {
+       it('should delete invalid language code', function(done) {
                citoidService.request(opts, function(error, responseCode, 
citation){
                        if (error) {throw error;}
                        if (responseCode !== 200){
                                throw new Error('Should respond 200: Response 
code is ' + responseCode);
                        }
                        if (!citation) {throw new Error ('Empty body');}
-                       if (!citation[0].language){
-                               throw new Error('Should contain language code');
+                       if (citation[0].language){
+                               throw new Error('Should not contain language 
code');
                        }
                        done();
                });

-- 
To view, visit https://gerrit.wikimedia.org/r/198491
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I797cc30ebc278a6b8d8a310ab68665bf7341b353
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <mv...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Mvolz <mv...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to