Mvolz has uploaded a new change for review.
https://gerrit.wikimedia.org/r/200314
Change subject: Change how pubmed and pmcs are requested
......................................................................
Change how pubmed and pmcs are requested
The PubMed ID converted API was frequently
not returning results for valid PMIDs, so
instead we request metadata from pubmed
URLs directly.
CitoidService
* Remove requestFromPubMed, which used
the ID converter API to get a DOI
* Replace with requestFromPMCID and PMID
which concatenates id onto a PubMed URL,
verifies the server sends a 200 ok
response code, and sends to requestFromURL.
Tests
* Convert pmid test for to a PMID which has
no results in the converter API but has a
valid URL.
* Add test for PMC with PMC prefix
* Add test for PMC without PMC prefix
* Add test for invalid PMCID
* Add test for invald PMID
Bug: T93335
Change-Id: Ie2830d42fa63fecd30db72f723e6d5d9979f51b5
---
M lib/CitoidService.js
M package.json
M test/features/errors/index.js
M test/features/scraping/index.js
4 files changed, 114 insertions(+), 37 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid
refs/changes/14/200314/1
diff --git a/lib/CitoidService.js b/lib/CitoidService.js
index 9b3d79d..f801f65 100644
--- a/lib/CitoidService.js
+++ b/lib/CitoidService.js
@@ -131,32 +131,73 @@
urlOpts.search = res.headers.location;
citoidService.requestFromURL(urlOpts, callback);
} else {
- citoidService.logger.log('debug/DOI', "Unable to
resolve DOI " + doiOpts.search);
- var message = 'Unable to resolve DOI';
+ var message = 'Unable to resolve DOI ' + doiOpts.search;
var error = new Error(message);
+ citoidService.logger.log('debug/DOI', message);
callback(error, 404, {Error: message});
}
});
};
/**
- * Request citation metadata from a PubMed identifier. Supports PMID, PMCID,
Manuscript ID and versioned identifiers
- * @param {Object} opts options object containing PubMed identifier.
PMCID identifiers must begin with 'PMC'
+ * Request citation metadata from a PMID identifier.
+ * @param {Object} pmidOpts options object containing PMID
* @param {Function} callback callback (error, statusCode, body)
*/
-CitoidService.prototype.requestFromPubMedID = function(opts, callback){
+CitoidService.prototype.requestFromPMID = function(pmidOpts, callback){
var citoidService = this;
- pubMedRequest(opts.search, this.logger, function(error, obj){
- if(error) {
- callback(error, null, null);
+ var baseURL = 'http://www.ncbi.nlm.nih.gov/pubmed/';
+ var urlOpts = Object.assign({}, pmidOpts); // Shallow clone doiOpts
+ var pmidURL = baseURL + pmidOpts.search;
+
+ urlOpts.search = pmidURL;
+
+ citoidService.logger.log('debug/pmid', "Converting PMID " +
pmidOpts.search +
+ 'to URL ' + urlOpts.search);
+
+ // Check if url is 200 okay
+ http.get(pmidURL, function (res) {
+ if (res && res.statusCode === 200) {
+ citoidService.requestFromURL(urlOpts, callback);
} else {
- var doi = obj.records[0].doi;
- citoidService.logger.log('debug/pubmed', "Got DOI " +
doi);
- opts.search = doi;
- citoidService.requestFromDOI(opts, callback);
+ var message = 'Unable to locate resource with PMID '
+ + pmidOpts.search;
+ var error = new Error(message);
+ citoidService.logger.log('debug/PMID', message);
+ callback(error, 404, {Error: message});
}
});
};
+
+/**
+ * Request citation metadata from a PMCID identifier.
+ * @param {Object} pmcidOpts options object containing PMCID
+ * @param {Function} callback callback (error, statusCode, body)
+ */
+CitoidService.prototype.requestFromPMCID = function(pmcidOpts, callback){
+ var citoidService = this;
+ var baseURL = 'http://www.ncbi.nlm.nih.gov/pmc/articles/';
+ var urlOpts = Object.assign({}, pmcidOpts); // Shallow clone doiOpts
+ var pmcidURL = baseURL + pmcidOpts.search + '/';
+
+ urlOpts.search = pmcidURL;
+
+ citoidService.logger.log('debug/pmcid', "Converting PMCID "
+ + pmcidOpts.search + 'to URL ' + urlOpts.search);
+ // Check if url is 200 okay
+ http.get(pmcidURL, function (res) {
+ if (res && res.statusCode === 200) {
+ citoidService.requestFromURL(urlOpts, callback);
+ } else {
+ var message = 'Unable to locate resource with PMCID '
+ + pmcidOpts.search;
+ var error = new Error(message);
+ citoidService.logger.log('debug/PMCID', message);
+ callback(error, 404, {Error: message});
+ }
+ });
+};
+
/**
* Determine type of string (doi, url) and callback on correct handler
@@ -164,24 +205,20 @@
* @param {Function} callback callback(extractedValue,
correctFunction)
*/
CitoidService.prototype.distinguish = function(rawSearchInput, callback){
- var reDOI, rePMID, rePMCID, rePMCID2, reHTTP, reWWW,
- parsedURL,
- matchDOI, matchPMID, matchPMCID, matchHTTP, matchWWW,
- search = rawSearchInput.trim();
+ var search = rawSearchInput.trim();
- reHTTP = new RegExp('^((https?)://.+\\..+)'); // Assumes all strings
with http/s protocol are URLs
- reWWW = new RegExp('^((www)\\..+\\..+)'); // Assumes all strings with
www substring are URLs
- reDOI = new RegExp('\\b10\\.?[0-9]{3,4}(?:[.][0-9]+)*/.*');
- rePMID = new RegExp('^\\d{8}\\b');
- rePMCID = new RegExp('\\bPMC\\d{7}\\b');
- rePMCID2 = new RegExp('^\\d{7}\\b');
+ var reHTTP = new RegExp('^((https?)://.+\\..+)'); // Assumes all
strings with http/s protocol are URLs
+ var reWWW = new RegExp('^((www)\\..+\\..+)'); // Assumes all strings
with www substring are URLs
+ var reDOI = new RegExp('\\b10\\.?[0-9]{3,4}(?:[.][0-9]+)*/.*');
+ var rePMID = new RegExp('^\\d{8}\\b');
+ var rePMCID = new RegExp('\\bPMC\\d{7}\\b');
+ var rePMCID2 = new RegExp('^\\d{7}\\b');
- matchHTTP = search.match(reHTTP);
- matchDOI = search.match(reDOI);
- matchPMID = search.match(rePMID);
- matchPMCID = search.match(rePMCID);
- matchWWW = search.match(reWWW);
-
+ var matchHTTP = search.match(reHTTP);
+ var matchDOI = search.match(reDOI);
+ var matchPMID = search.match(rePMID);
+ var matchPMCID = search.match(rePMCID);
+ var matchWWW = search.match(reWWW);
if (matchHTTP || matchWWW){
this.stats.increment('input.url');
@@ -191,18 +228,18 @@
callback(matchDOI[0], this.requestFromDOI.bind(this));
} else if (matchPMID) {
this.stats.increment('input.pmid');
- callback(matchPMID[0], this.requestFromPubMedID.bind(this));
+ callback(matchPMID[0], this.requestFromPMID.bind(this));
} else if (matchPMCID) {
this.stats.increment('input.pmcid');
- callback(matchPMCID[0], this.requestFromPubMedID.bind(this));
+ callback(matchPMCID[0], this.requestFromPMCID.bind(this));
} else {
- matchPMCID = search.match(rePMCID2);
+ matchPMCID = search.match(rePMCID2); // Detects PMCIDs with no
PMC prefix
if (matchPMCID) {
this.stats.increment('input.pmcid');
- callback('PMC' + matchPMCID[0],
this.requestFromPubMedID.bind(this));
+ callback('PMC' + matchPMCID[0],
this.requestFromPMCID.bind(this));
} else {
this.stats.increment('input.url');
- parsedURL = urlParse.parse(search);
+ var parsedURL = urlParse.parse(search);
if (!parsedURL.protocol){
search = 'http://'+ search;
}
diff --git a/package.json b/package.json
index 4bdfddc..1707ab2 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "citoid",
- "version": "0.2.3",
+ "version": "0.2.4",
"description": "Converts search terms such as URL or DOI into citations.",
"scripts": {
"start": "service-runner",
diff --git a/test/features/errors/index.js b/test/features/errors/index.js
index b997ed9..b56e120 100644
--- a/test/features/errors/index.js
+++ b/test/features/errors/index.js
@@ -77,7 +77,32 @@
assert.status(res, 404);
}, function(err) {
assert.status(err, 404);
- assert.deepEqual(err.body.Error, 'Unable to resolve
DOI',
+ assert.deepEqual(err.body.Error, 'Unable to resolve DOI
' + doi,
+ 'Unexpected error message ' + err.body.Error);
+ });
+ });
+
+ it('bad pmid', function() {
+ var pmid = '99999999';
+ return server.query(pmid, 'mediawiki', 'en')
+ .then(function(res) {
+ assert.status(res, 404);
+ }, function(err) {
+ assert.status(err, 404);
+ assert.deepEqual(err.body.Error,
+ 'Unable to locate resource with PMID ' + pmid,
+ 'Unexpected error message ' + err.body.Error);
+ });
+ });
+
+ it('bad pmcid', function() {
+ var pmcid = 'PMC9999999';
+ return server.query(pmcid, 'mediawiki', 'en')
+ .then(function(res) {
+ assert.status(res, 404);
+ }, function(err) {
+ assert.status(err, 404);
+ assert.deepEqual(err.body.Error, 'Unable to locate
resource with PMCID ' + pmcid,
'Unexpected error message ' + err.body.Error);
});
});
diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js
index ff2e333..5538399 100644
--- a/test/features/scraping/index.js
+++ b/test/features/scraping/index.js
@@ -12,8 +12,23 @@
before(function () { return server.start(); });
- it('pmid', function() {
- return server.query('23555203').then(function(res) {
+ //PMID on NIH website that is not found in the id converter api
+ it('pmid (not in id converter)', function() {
+ return server.query('14656957').then(function(res) {
+ assert.status(res, 200);
+ assert.checkCitation(res, 'Seventh report of the Joint
National Committee on Prevention, Detection, Evaluation, and Treatment of High
Blood Pressure');
+ });
+ });
+
+ it('pmcid with prefix', function() {
+ return server.query('PMC3605911').then(function(res) {
+ assert.status(res, 200);
+ assert.checkCitation(res, 'Viral Phylodynamics');
+ });
+ });
+
+ it('pmcid without prefix', function() {
+ return server.query('3605911').then(function(res) {
assert.status(res, 200);
assert.checkCitation(res, 'Viral Phylodynamics');
});
--
To view, visit https://gerrit.wikimedia.org/r/200314
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie2830d42fa63fecd30db72f723e6d5d9979f51b5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits