Mvolz has uploaded a new change for review.
https://gerrit.wikimedia.org/r/189201
Change subject: Bug fixes: fix status response codes
......................................................................
Bug fixes: fix status response codes
Response codes for various errors were inconsistent
or non-existant in areas.
* Fixed bug T8884 where response codes from scraped
URLs were ignored (scrape.js). Now returns basic citations
for these URLS (i.e. with URL in title) instead of scraped
titles and a 520 error to indicate the resource wasn't found
or had errors.
* Fixed bug T78388 where failure to export to bibtex format
returned a 200 OK and an empty body. Now returns a 404
not found error and a message saying the requested format
couldn't be obtained.
Bug: T88884
Bug: T78388
Change-Id: I9b94fe40bfe71fadc5d227da50eb142d285b14b2
---
M lib/requests.js
M lib/scrape.js
M lib/zotero.js
M server.js
4 files changed, 56 insertions(+), 50 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid
refs/changes/01/189201/1
diff --git a/lib/requests.js b/lib/requests.js
index 588accf..aba107d 100644
--- a/lib/requests.js
+++ b/lib/requests.js
@@ -30,15 +30,16 @@
log.info("Zotero request made for: " + requestedURL);
if (error) {
log.error(error);
- log.info("Falling back on native scraper.");
- scrapeHelper(requestedURL, opts, function(error, body){
- callback (error, 200, body);
+ scrapeHelper(requestedURL, opts, function(error,
responseCode, body){
+ log.info("Falling back on native scraper.");
+ if (error) {log.error(error)};
+ callback(error, responseCode, body);
});
} else if (response) {
//501 indicates no translator available
//this is common- can indicate shortened url
//or a website not specified in the translators
- if (response.statusCode == 501){
+ if (response.statusCode === 501){
log.info("Status Code from Zotero: " +
response.statusCode);
log.info("Looking for redirects...");
//try again following all redirects
@@ -53,48 +54,63 @@
callback(null,
200, body);
} else {
log.info("No
Zotero response available; falling back on native scraper.");
-
scrapeHelper(requestedURL, opts, function(error, body){
-
callback (error, 200, body);
+
scrapeHelper(requestedURL, opts, function(error, responseCode, body){
+
callback (error, responseCode, body);
});
}
});
} else {
log.info("No redirect detected;
falling back on native scraper.");
- scrapeHelper(requestedURL,
opts, function(error, body){
- callback (error, 200,
body);
+ scrapeHelper(requestedURL,
opts, function(error, responseCode, body){
+ callback (error,
responseCode, body);
});
}
});
- } else if (response.statusCode == 200){
+ } else if (response.statusCode === 200){
log.info("Successfully retrieved and translated
body from Zotero");
callback (null, 200, body);
} else {
//other response codes such as 500, 300
log.info("Falling back on native scraper.");
- scrapeHelper(requestedURL, opts,
function(error, body){
- callback (error, 200, body);
+ scrapeHelper(requestedURL, opts,
function(error, responseCode, body){
+ callback (error, responseCode, body);
});
}
} else {
log.info("Falling back on native scraper.");
- scrapeHelper(requestedURL, opts, function(error, body){
- callback (error, 200, body);
+ scrapeHelper(requestedURL, opts, function(error,
responseCode, body){
+ callback (error, responseCode, body);
});
}
});
};
-/* Native scraper helper */
+/**
+ * Export scrape response to Zotero translator if nessecary
+ * Default format of scrape response is 'mediawiki', so only
+ * sends response to Zotero if it's a format Zotero can convert to.
+ * Currently only exports bibtex.
+ * @param {String} url requested URL
+ * @param {Object} opts request options object
+ * @param {Function} callback callback(error, responseCode, citation)
+ */
var scrapeHelper = function(url, opts, callback) {
- if (opts.format == 'bibtex') {
- scrape(url, function(error, citation) {
- zoteroExportRequest(citation[0], opts, function(err,
response, body) {
- callback(err, body);
+ if (opts.format === 'bibtex') {
+ scrape(url, function(error, responseCode, citation) {
+ zoteroExportRequest(citation[0], opts, function(err,
resCode, body) {
+ if (resCode !== 200){
+ body = "Unable to serve this format at
this time";
+ resCode = 404; // 404 error if cannot
translate into alternate format
+ } else if (responseCode !== 200){
+ resCode = 520; // 520 error if the
scraper couldn't scrape from url
+ }
+ console.log(resCode);
+ callback(err, resCode, body);
});
});
} else {
- scrape(url, function(error, citation) {
- callback(error, citation);
+ scrape(url, function(error, responseCode, citation) {
+ callback(error, responseCode, citation);
});
}
};
diff --git a/lib/scrape.js b/lib/scrape.js
index ae07062..21e4c1b 100644
--- a/lib/scrape.js
+++ b/lib/scrape.js
@@ -12,7 +12,7 @@
* Currently scrapes title only
* callback runs on list of json objs (var body)
* @param {String} url url to scrape
- * @param {Function} callback callback(error, body)
+ * @param {Function} callback callback(error, statusCode, body)
*/
var scrape = function(url, callback){
@@ -43,9 +43,8 @@
var citation = {itemType: 'webpage', url: url, title:
url};
- if (error || !response) {
- console.log(error);
- callback(error, [citation]);
+ if (error || !response || response.statusCode !== 200) {
+ callback(error, 520, [citation]);
return;
}
@@ -53,8 +52,7 @@
$ = cheerio.load(html);
}
catch (e){
- console.log('Could not load document: ' + e);
- callback(error, [citation]);
+ callback(error, 520, [citation]);
}
citation.title = getTitle();
@@ -68,7 +66,7 @@
citation.publicationTitle = parsedUrl.hostname;
}
- callback(error, [citation]);
+ callback(error, 200, [citation]);
});
};
diff --git a/lib/zotero.js b/lib/zotero.js
index d8bdc76..620311a 100644
--- a/lib/zotero.js
+++ b/lib/zotero.js
@@ -15,7 +15,7 @@
* Requests to Zotero server endpoint /web
* @param {String} requestedURL url being requested
* @param {Object} opts options for request
- * @param {Function} callback callback(error, response, body)
+ * @param {Function} callback callback(error, responseCode, body)
*/
var zoteroWebRequest = function(requestedURL, opts, callback){
var options = {
@@ -31,11 +31,11 @@
if (!error && response.statusCode == 200) {
selectFormatFcn(opts.format, function(convert){
convert(requestedURL, opts, body,
function(modifiedBody){
- callback(error, response, modifiedBody);
+ callback(error, 200, modifiedBody);
});
});
} else {
- callback(error, response, body);
+ callback('Unable to retrieve this format', 404, body);
}
});
@@ -45,7 +45,7 @@
* Request to Zotero server endpoint /export
* @param {Object} citation Zotero JSON citation to be converted
* @param {Object} opts options for request
- * @param {Function} callback callback(error, response, body)
+ * @param {Function} callback callback(error, responseCode, body)
*/
var zoteroExportRequest = function(citation, opts, callback){
var options = {
@@ -59,7 +59,11 @@
};
request.post(options, function(error, response, body) {
- callback(error, response, body);
+ if (error){
+ callback(error, null, body);
+ } else {
+ callback(null, response.statusCode, body);
+ }
});
};
@@ -125,8 +129,8 @@
fixURL, //must go directly after unnamed function that hands it
url
fixAccessDate,
function(citation, cb){
- zoteroExportRequest(citation, opts, function(error,
response, body){
- cb(null, body);
+ zoteroExportRequest(citation, opts, function(error,
responseCode, body){
+ cb(error, body);
});
}], function (err, citation) {
callback(citation);
diff --git a/server.js b/server.js
index 4af484e..85d9928 100644
--- a/server.js
+++ b/server.js
@@ -111,14 +111,8 @@
else {requestedURL = urlParse.format(parsedURL);}
requestFromURL(requestedURL, opts, function(error,
responseCode, body){
- if (!error){
- res.statusCode = responseCode;
- res.send(body);
- }
- else {
- res.statusCode = 520;
- res.send(body);
- }
+ res.statusCode = responseCode;
+ res.send(body);
});
}
});
@@ -157,14 +151,8 @@
distinguish(dSearch, function(extractedID, runnerFunction){
runnerFunction(extractedID, opts, function(error,
responseCode, body){
- if (!error){
- res.statusCode = responseCode;
- res.send(body);
- }
- else {
- res.statusCode = 520; //TODO: Server at
requested location not available, not valid for non-urls
- res.send(body);
- }
+ res.statusCode = responseCode;
+ res.send(body);
});
});
}
--
To view, visit https://gerrit.wikimedia.org/r/189201
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I9b94fe40bfe71fadc5d227da50eb142d285b14b2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits