Mvolz has submitted this change and it was merged. Change subject: Add missing PMIDs by looking up DOI, create pubMedRequest.js ......................................................................
Add missing PMIDs by looking up DOI, create pubMedRequest.js If the PMID is missing but the DOI is present, use the DOI to lookup the PMID and PMCID with pubMedRequest Creates lib/pubMedRequest.js which requests an object from PubMed's API, and sanity checks the response. Corrects testJSON input file in lib/zotero.js Assorted code styling improvements Bug: T1088 Change-Id: If174e13c124eb2c0f0e3582175167b5757c6912d --- A lib/pubMedRequest.js M lib/requests.js M lib/zotero.js 3 files changed, 108 insertions(+), 59 deletions(-) Approvals: Mvolz: Verified; Looks good to me, approved diff --git a/lib/pubMedRequest.js b/lib/pubMedRequest.js new file mode 100644 index 0000000..231d939 --- /dev/null +++ b/lib/pubMedRequest.js @@ -0,0 +1,60 @@ +#!/usr/bin/env node +/** + * https://www.mediawiki.org/wiki/citoid + * + * Requests and sanity checks the response from PubMed's API + */ + +(function() { + + var request = require('request'); + var bunyan = require('bunyan'); + var log = bunyan.createLogger({name: "citoid"}); + + /** + * Requests a PubMed object using any supported identifier + * @param {String} identifier Valid PubMed identifier (PMID, PMCID, Manuscript ID, versioned ID) + * @param {Function} callback callback (error, object) + */ + + var pubMedRequest = function (identifier, callback){ + var escapedId = encodeURIComponent(identifier); + var url = "http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=citoid&email=citoid@mediawiki&format=json&ids=" + escapedId; + + request(url, function(error, response, body){ + log.info("PubMed query made for: " + url); + if (error) { + log.error(error); + callback(error, null); + } else if (response.statusCode !== 200) { + log.error("Unexpected HTTP status code: " + response.statusCode); + callback("Unexpected HTTP status code: " + response.statusCode, null); + } else { + var jsonObj; + try { + jsonObj = JSON.parse(body); + } catch (error) { + log.info("Original response: " + body); + log.error("JSON parse error: " + error); + callback("JSON parse error: " + error, null); + } + + if (jsonObj){ + if (jsonObj.status !== 'ok'){ + log.error("Unexpected status from PubMed API: " + jsonObj.status); + callback("Unexpected status from PubMed API: " + jsonObj.status, null); + } else if (jsonObj.records.length === 0){ + log.error("No records from PubMed API"); + callback("No records from PubMed API", null); + } else { + callback(null, jsonObj); + } + } + } + }); + }; + + module.exports = pubMedRequest; + +}()); + diff --git a/lib/requests.js b/lib/requests.js index f3dc099..e1e82bc 100644 --- a/lib/requests.js +++ b/lib/requests.js @@ -11,8 +11,8 @@ var unshorten = require('./unshorten.js'); var scrape = require('./scrape.js').scrape; var zoteroRequest = require('./zotero.js').zoteroRequest; +var pubMedRequest = require('./pubMedRequest.js'); var util = require('util'); -var request = require('request'); /** * Request citation metadata from a URL @@ -104,51 +104,13 @@ */ var requestFromPubMedID = function (requestedPubMedID, opts, callback){ - var pubMedLink = 'http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=citoid&email=citoid@mediawiki&format=json&ids='+requestedPubMedID; - requestJSON(pubMedLink, function(error, obj){ + pubMedRequest(requestedPubMedID, function(error, obj){ if(error){ callback(error, null, null); - } - else if(obj.status !== 'ok'){ - log.error("PubMed returned unrecognized status code: " + obj.status); - callback("PubMed returned unrecognized status", 200, null); } else { - if(obj.records.length > 0){ - var doi = obj.records[0].doi; - log.info("Got DOI " + doi); - requestFromDOI(doi, opts, callback); - } else { - log.error("No records retrieved from PubMed for " + requestedPMID); - callback("No records rerieved from PubMed", 200, null); - } - } - }); -}; - -/** - * Fetch and parse a JSON object from URL - * @param {String} url JSON endpoint to fetch and parse - * @param {Function} callback callback (error, object) - */ - -var requestJSON = function (url, callback){ - request(url, function(error, response, body){ - log.info("JSON query made for: " + url); - if (error) { - log.error(error); - callback(error, null); - } else if (response.statusCode !== 200) { - log.error("Unexpected HTTP status code: " + response.statusCode); - callback("Unexpected HTTP status code: " + response.statusCode, null); - } else { - try { - var jsonObj = JSON.parse(body); - callback(null, jsonObj); - } catch (error) { - log.info("Original response: " + body); - log.error("JSON parse error: " + error); - callback("JSON parse error: " + error, null); - } + var doi = obj.records[0].doi; + log.info("Got DOI " + doi); + requestFromDOI(doi, opts, callback); } }); }; diff --git a/lib/zotero.js b/lib/zotero.js index f383ce4..32aad12 100644 --- a/lib/zotero.js +++ b/lib/zotero.js @@ -7,6 +7,7 @@ var request = require('request'); var async = require('async'); +var pubMedRequest = require('./pubMedRequest.js'); /** * Requests to Zotero server @@ -14,7 +15,7 @@ * @param {Object} opts options for request * @param {Function} callback callback(error, response, body) */ -var zoteroRequest = function(requestedURL, opts, callback){ +var zoteroRequest = function(requestedURL, opts, callback){ var options = { url: opts.zoteroURL, method: 'POST', @@ -31,8 +32,7 @@ callback(error, response, modifiedBody); }); }); - } - else { + } else { callback(error, response, body); } @@ -81,7 +81,7 @@ fixURL, //must go directly after unnamed function that hands it url fixAccessDate, replaceCreators, - addPMID, + addPubMedIdentifiers, fixISBN, fixISSN ], function (err, citation) { @@ -111,8 +111,7 @@ creatorFieldName = zotCreators[z].creatorType; if (creatorTypeCount[creatorFieldName]){ creatorTypeCount[creatorFieldName] += 1; - } - else { + } else { creatorTypeCount[creatorFieldName] = 1; } //Appends number to name, i.e. author -> author1 @@ -127,7 +126,7 @@ }, fixAccessDate, replaceCreators, - addPMID, + addPubMedIdentifiers, fixISBN, fixISSN ], function (err, citation) { @@ -163,20 +162,42 @@ callback(null, citation); }; -var addPMID = function(citation, callback){ - //get pmid out of extra fields - if (citation.extra){ +/** + * Add PMID and PMCID fields from the extra field or through DOI lookup + * @param {Object} citation citation object to add PMID + * @param {Function} callback callback (error, citation) + */ + +var addPubMedIdentifiers = function(citation, callback){ + if (citation.extra) { + //get pmid from extra fields var extraFields = citation.extra.split('\n'); - for (var f in extraFields){ + for (var f in extraFields) { //could add them all, but let's not do this in case of conflicting fields var keyValue = extraFields[f].split(': '); - if (keyValue[0] === 'PMID'){ + if (keyValue[0] === 'PMID' || keyValue[0] === 'PMCID') { citation[keyValue[0]] = keyValue[1].trim(); } } } - //TODO: if no pmid available from zotero output, get one from doi using api: http://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/ - callback(null, citation); + + if (!citation.PMID && citation.DOI) { + //if pmid is not found, lookup the pmid using the DOI + pubMedRequest(citation.DOI, function (error, object){ + if (!error) { //don't pass along error as it's non-critical, and will halt the waterfall + if (object.records[0].pmid){ + citation['PMID'] = object.records[0].pmid; + } + if (object.records[0].pmcid) { + citation['PMCID'] = object.records[0].pmcid; + } + } + callback(null, citation); + }); + } else { + //if we add another async function, use async.series + callback(null, citation); + } }; var fixURL = function(url, citation, callback){ @@ -207,7 +228,7 @@ for (i in match){ citation.ISSN.push(match[i]); } - } else{ + } else { citation.ISSN = [issn]; //wraps issn field in array in case of false negatives } } @@ -237,7 +258,7 @@ /*Test response alterations without having to use server*/ var testJSON = function(){ - var sampleBody = require("../test_files/3_input.json"); + var sampleBody = require("../test_files/4_input.json"); console.log("before:"); console.log(JSON.stringify(sampleBody)); console.log("after:"); @@ -246,6 +267,12 @@ console.log(JSON.stringify(modifiedBody)); }); }); + + //test PMID lookup + addPubMedIdentifiers({"DOI": "10.1371/journal.pcbi.1002947"}, function (error, modifiedCitation){ + console.log("Test lookup of PMID by DOI: PMID=" + modifiedCitation.PMID + ", PMCID=" + modifiedCitation.PMCID); + console.log("Expected: PMID=23555203, PMCID=PMC3605911"); + }); }; /*Test methods in main */ -- To view, visit https://gerrit.wikimedia.org/r/176921 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: If174e13c124eb2c0f0e3582175167b5757c6912d Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/services/citoid Gerrit-Branch: master Gerrit-Owner: Unicodesnowman <ad...@glados.cc> Gerrit-Reviewer: Mvolz <mv...@wikimedia.org> Gerrit-Reviewer: Unicodesnowman <ad...@glados.cc> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits