Mvolz has submitted this change and it was merged.

Change subject: Add missing PMIDs by looking up DOI, create pubMedRequest.js
......................................................................


Add missing PMIDs by looking up DOI, create pubMedRequest.js

If the PMID is missing but the DOI is present, use the DOI to lookup
the PMID and PMCID with pubMedRequest

Creates lib/pubMedRequest.js which requests an object from PubMed's
API, and sanity checks the response.

Corrects testJSON input file in lib/zotero.js

Assorted code styling improvements

Bug: T1088
Change-Id: If174e13c124eb2c0f0e3582175167b5757c6912d
---
A lib/pubMedRequest.js
M lib/requests.js
M lib/zotero.js
3 files changed, 108 insertions(+), 59 deletions(-)

Approvals:
  Mvolz: Verified; Looks good to me, approved



diff --git a/lib/pubMedRequest.js b/lib/pubMedRequest.js
new file mode 100644
index 0000000..231d939
--- /dev/null
+++ b/lib/pubMedRequest.js
@@ -0,0 +1,60 @@
+#!/usr/bin/env node
+/**
+ * https://www.mediawiki.org/wiki/citoid
+ *
+ * Requests and sanity checks the response from PubMed's API
+ */
+
+(function() {
+
+    var request = require('request');
+       var bunyan = require('bunyan');
+       var log = bunyan.createLogger({name: "citoid"});
+
+       /**
+        * Requests a PubMed object using any supported identifier
+        * @param  {String}   identifier Valid PubMed identifier (PMID, PMCID, 
Manuscript ID, versioned ID)
+        * @param  {Function} callback   callback (error, object)
+        */
+
+       var pubMedRequest = function (identifier, callback){
+        var escapedId = encodeURIComponent(identifier);
+               var url = 
"http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=citoid&email=citoid@mediawiki&format=json&ids=";
 + escapedId;
+
+               request(url, function(error, response, body){
+                       log.info("PubMed query made for: " + url);
+                       if (error) {
+                               log.error(error);
+                               callback(error, null);
+                       } else if (response.statusCode !== 200) {
+                               log.error("Unexpected HTTP status code: " + 
response.statusCode);
+                               callback("Unexpected HTTP status code: " + 
response.statusCode, null);
+                       } else {
+                               var jsonObj;
+                               try {
+                                       jsonObj = JSON.parse(body);
+                               } catch (error) {
+                                       log.info("Original response: " + body);
+                                       log.error("JSON parse error: " + error);
+                                       callback("JSON parse error: " + error, 
null);
+                               }
+
+                               if (jsonObj){
+                                       if (jsonObj.status !== 'ok'){
+                                               log.error("Unexpected status 
from PubMed API: " + jsonObj.status);
+                                               callback("Unexpected status 
from PubMed API: " + jsonObj.status, null);
+                                       } else if (jsonObj.records.length === 
0){
+                                               log.error("No records from 
PubMed API");
+                                               callback("No records from 
PubMed API", null);
+                                       } else {
+                                               callback(null, jsonObj);
+                                       }
+                               }
+                       }
+               });
+       };
+
+    module.exports = pubMedRequest;
+
+}());
+
diff --git a/lib/requests.js b/lib/requests.js
index f3dc099..e1e82bc 100644
--- a/lib/requests.js
+++ b/lib/requests.js
@@ -11,8 +11,8 @@
 var unshorten = require('./unshorten.js');
 var scrape = require('./scrape.js').scrape;
 var zoteroRequest = require('./zotero.js').zoteroRequest;
+var pubMedRequest = require('./pubMedRequest.js');
 var util = require('util');
-var request = require('request');
 
 /**
  * Request citation metadata from a URL
@@ -104,51 +104,13 @@
  */
 
 var requestFromPubMedID = function (requestedPubMedID, opts, callback){
-    var pubMedLink = 
'http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=citoid&email=citoid@mediawiki&format=json&ids='+requestedPubMedID;
-    requestJSON(pubMedLink, function(error, obj){
+    pubMedRequest(requestedPubMedID, function(error, obj){
                if(error){
                        callback(error, null, null);
-               }
-               else if(obj.status !== 'ok'){
-                       log.error("PubMed returned unrecognized status code: " 
+ obj.status);
-                       callback("PubMed returned unrecognized status", 200, 
null);
                } else {
-                       if(obj.records.length > 0){
-                               var doi = obj.records[0].doi;
-                               log.info("Got DOI " + doi);
-                               requestFromDOI(doi, opts, callback);
-                       } else {
-                               log.error("No records retrieved from PubMed for 
" + requestedPMID);
-                               callback("No records rerieved from PubMed", 
200, null);
-                       }
-               }
-       });
-};
-
-/**
- * Fetch and parse a JSON object from URL
- * @param  {String}   url      JSON endpoint to fetch and parse
- * @param  {Function} callback callback (error, object)
- */
-
-var requestJSON = function (url, callback){
-       request(url, function(error, response, body){
-               log.info("JSON query made for: " + url);
-               if (error) {
-                       log.error(error);
-                       callback(error, null);
-               } else if (response.statusCode !== 200) {
-                       log.error("Unexpected HTTP status code: " + 
response.statusCode);
-                       callback("Unexpected HTTP status code: " + 
response.statusCode, null);
-               } else {
-                       try {
-                               var jsonObj = JSON.parse(body);
-                               callback(null, jsonObj);
-                       } catch (error) {
-                               log.info("Original response: " + body);
-                               log.error("JSON parse error: " + error);
-                               callback("JSON parse error: " + error, null);
-                       }
+                       var doi = obj.records[0].doi;
+                       log.info("Got DOI " + doi);
+                       requestFromDOI(doi, opts, callback);
                }
        });
 };
diff --git a/lib/zotero.js b/lib/zotero.js
index f383ce4..32aad12 100644
--- a/lib/zotero.js
+++ b/lib/zotero.js
@@ -7,6 +7,7 @@
 
 var request = require('request');
 var async = require('async');
+var pubMedRequest = require('./pubMedRequest.js');
 
 /**
  * Requests to Zotero server
@@ -14,7 +15,7 @@
  * @param  {Object}   opts         options for request
  * @param  {Function} callback     callback(error, response, body)
  */
-var zoteroRequest  = function(requestedURL, opts, callback){
+var zoteroRequest = function(requestedURL, opts, callback){
        var options = {
                url: opts.zoteroURL,
                method: 'POST',
@@ -31,8 +32,7 @@
                                        callback(error, response, modifiedBody);
                                });
                        });
-               }
-               else {
+               } else {
                        callback(error, response, body);
                }
 
@@ -81,7 +81,7 @@
                fixURL, //must go directly after unnamed function that hands it 
url
                fixAccessDate,
                replaceCreators,
-               addPMID,
+               addPubMedIdentifiers,
                fixISBN,
                fixISSN
        ], function (err, citation) {
@@ -111,8 +111,7 @@
                                        creatorFieldName = 
zotCreators[z].creatorType;
                                        if (creatorTypeCount[creatorFieldName]){
                                                
creatorTypeCount[creatorFieldName] += 1;
-                                       }
-                                       else {
+                                       } else {
                                                
creatorTypeCount[creatorFieldName] = 1;
                                        }
                                        //Appends number to name, i.e. author 
-> author1
@@ -127,7 +126,7 @@
                },
                fixAccessDate,
                replaceCreators,
-               addPMID,
+               addPubMedIdentifiers,
                fixISBN,
                fixISSN
        ], function (err, citation) {
@@ -163,20 +162,42 @@
        callback(null, citation);
 };
 
-var addPMID = function(citation, callback){
-       //get pmid out of extra fields
-       if (citation.extra){
+/**
+ * Add PMID and PMCID fields from the extra field or through DOI lookup
+ * @param  {Object}   citation citation object to add PMID
+ * @param  {Function} callback callback (error, citation)
+ */
+
+var addPubMedIdentifiers = function(citation, callback){
+       if (citation.extra) {
+               //get pmid from extra fields
                var extraFields = citation.extra.split('\n');
-               for (var f in extraFields){
+               for (var f in extraFields) {
                        //could add them all, but let's not do this in case of 
conflicting fields
                        var keyValue = extraFields[f].split(': ');
-                       if (keyValue[0] === 'PMID'){
+                       if (keyValue[0] === 'PMID' || keyValue[0] === 'PMCID') {
                                citation[keyValue[0]] = keyValue[1].trim();
                        }
                }
        }
-       //TODO: if no pmid available from zotero output, get one from doi using 
api: http://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
-       callback(null, citation);
+
+       if (!citation.PMID && citation.DOI) {
+               //if pmid is not found, lookup the pmid using the DOI
+               pubMedRequest(citation.DOI, function (error, object){
+                       if (!error) { //don't pass along error as it's 
non-critical, and will halt the waterfall
+                               if (object.records[0].pmid){
+                                       citation['PMID'] = 
object.records[0].pmid;
+                               }
+                               if (object.records[0].pmcid) {
+                                       citation['PMCID'] = 
object.records[0].pmcid;
+                               }
+                       }
+                       callback(null, citation);
+               });
+       } else {
+               //if we add another async function, use async.series
+               callback(null, citation);
+       }
 };
 
 var fixURL = function(url, citation, callback){
@@ -207,7 +228,7 @@
                        for (i in match){
                                citation.ISSN.push(match[i]);
                        }
-               } else{
+               } else {
                        citation.ISSN = [issn]; //wraps issn field in array in 
case of false negatives
                }
        }
@@ -237,7 +258,7 @@
 
 /*Test response alterations without having to use server*/
 var testJSON = function(){
-       var sampleBody = require("../test_files/3_input.json");
+       var sampleBody = require("../test_files/4_input.json");
        console.log("before:");
        console.log(JSON.stringify(sampleBody));
        console.log("after:");
@@ -246,6 +267,12 @@
                        console.log(JSON.stringify(modifiedBody));
                });
        });
+
+       //test PMID lookup
+       addPubMedIdentifiers({"DOI": "10.1371/journal.pcbi.1002947"}, function 
(error, modifiedCitation){
+               console.log("Test lookup of PMID by DOI: PMID=" + 
modifiedCitation.PMID + ", PMCID=" + modifiedCitation.PMCID);
+               console.log("Expected: PMID=23555203, PMCID=PMC3605911");
+       });
 };
 
 /*Test methods in main */

-- 
To view, visit https://gerrit.wikimedia.org/r/176921
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If174e13c124eb2c0f0e3582175167b5757c6912d
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Unicodesnowman <ad...@glados.cc>
Gerrit-Reviewer: Mvolz <mv...@wikimedia.org>
Gerrit-Reviewer: Unicodesnowman <ad...@glados.cc>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to