Mvolz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/189201

Change subject: Bug fixes: fix status response codes
......................................................................

Bug fixes: fix status response codes

Response codes for various errors were inconsistent
or non-existant in areas.

* Fixed bug T8884 where response codes from scraped
URLs were ignored (scrape.js). Now returns basic citations
for these URLS (i.e. with URL in title) instead of scraped
titles and a 520 error to indicate the resource wasn't found
or had errors.
* Fixed bug T78388 where failure to export to bibtex format
returned a 200 OK and an empty body. Now returns a 404
not found error and a message saying the requested format
couldn't be obtained.

Bug: T88884
Bug: T78388
Change-Id: I9b94fe40bfe71fadc5d227da50eb142d285b14b2
---
M lib/requests.js
M lib/scrape.js
M lib/zotero.js
M server.js
4 files changed, 56 insertions(+), 50 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid 
refs/changes/01/189201/1

diff --git a/lib/requests.js b/lib/requests.js
index 588accf..aba107d 100644
--- a/lib/requests.js
+++ b/lib/requests.js
@@ -30,15 +30,16 @@
                log.info("Zotero request made for: " + requestedURL);
                if (error) {
                        log.error(error);
-                       log.info("Falling back on native scraper.");
-                       scrapeHelper(requestedURL, opts, function(error, body){
-                               callback (error, 200, body);
+                       scrapeHelper(requestedURL, opts, function(error, 
responseCode, body){
+                               log.info("Falling back on native scraper.");
+                               if (error) {log.error(error)};
+                               callback(error, responseCode, body);
                        });
                } else if (response) {
                        //501 indicates no translator available
                        //this is common- can indicate shortened url
                        //or a website not specified in the translators
-                       if (response.statusCode == 501){
+                       if (response.statusCode === 501){
                                log.info("Status Code from Zotero: " + 
response.statusCode);
                                log.info("Looking for redirects...");
                                //try again following all redirects
@@ -53,48 +54,63 @@
                                                                callback(null, 
200, body);
                                                        } else {
                                                                log.info("No 
Zotero response available; falling back on native scraper.");
-                                                               
scrapeHelper(requestedURL, opts, function(error, body){
-                                                                       
callback (error, 200, body);
+                                                               
scrapeHelper(requestedURL, opts, function(error, responseCode, body){
+                                                                       
callback (error, responseCode, body);
                                                                });
                                                        }
                                                });
                                        } else {
                                                log.info("No redirect detected; 
falling back on native scraper.");
-                                               scrapeHelper(requestedURL, 
opts, function(error, body){
-                                                       callback (error, 200, 
body);
+                                               scrapeHelper(requestedURL, 
opts, function(error, responseCode, body){
+                                                       callback (error, 
responseCode, body);
                                                });
                                        }
                                });
-                       } else if (response.statusCode == 200){
+                       } else if (response.statusCode === 200){
                                log.info("Successfully retrieved and translated 
body from Zotero");
                                callback (null, 200, body);
                        } else {
                                //other response codes such as 500, 300
                                log.info("Falling back on native scraper.");
-                               scrapeHelper(requestedURL, opts, 
function(error, body){
-                                       callback (error, 200, body);
+                               scrapeHelper(requestedURL, opts, 
function(error, responseCode, body){
+                                       callback (error, responseCode, body);
                                });
                        }
                } else {
                        log.info("Falling back on native scraper.");
-                       scrapeHelper(requestedURL, opts, function(error, body){
-                               callback (error, 200, body);
+                       scrapeHelper(requestedURL, opts, function(error, 
responseCode, body){
+                               callback (error, responseCode, body);
                        });
                }
        });
 };
 
-/* Native scraper helper */
+/**
+ * Export scrape response to Zotero translator if nessecary
+ * Default format of scrape response is 'mediawiki', so only
+ * sends response to Zotero if it's a format Zotero can convert to.
+ * Currently only exports bibtex.
+ * @param  {String}   url      requested URL
+ * @param  {Object}   opts     request options object
+ * @param  {Function} callback callback(error, responseCode, citation)
+ */
 var scrapeHelper = function(url, opts, callback) {
-       if (opts.format == 'bibtex') {
-               scrape(url, function(error, citation) {
-                       zoteroExportRequest(citation[0], opts, function(err, 
response, body) {
-                               callback(err, body);
+       if (opts.format === 'bibtex') {
+               scrape(url, function(error, responseCode, citation) {
+                       zoteroExportRequest(citation[0], opts, function(err, 
resCode, body) {
+                               if (resCode !== 200){
+                                       body = "Unable to serve this format at 
this time";
+                                       resCode = 404; // 404 error if cannot 
translate into alternate format
+                               } else if (responseCode !== 200){
+                                       resCode = 520; // 520 error if the 
scraper couldn't scrape from url
+                               }
+                               console.log(resCode);
+                               callback(err, resCode, body);
                        });
                });
        } else {
-               scrape(url, function(error, citation) {
-                       callback(error, citation);
+               scrape(url, function(error, responseCode, citation) {
+                       callback(error, responseCode, citation);
                });
        }
 };
diff --git a/lib/scrape.js b/lib/scrape.js
index ae07062..21e4c1b 100644
--- a/lib/scrape.js
+++ b/lib/scrape.js
@@ -12,7 +12,7 @@
  * Currently scrapes title only
  * callback runs on list of json objs (var body)
  * @param  {String}   url      url to scrape
- * @param  {Function} callback callback(error, body)
+ * @param  {Function} callback callback(error, statusCode, body)
  */
 
 var scrape = function(url, callback){
@@ -43,9 +43,8 @@
 
                        var citation = {itemType: 'webpage', url: url, title: 
url};
 
-                       if (error || !response) {
-                               console.log(error);
-                               callback(error, [citation]);
+                       if (error || !response || response.statusCode !== 200) {
+                               callback(error, 520, [citation]);
                                return;
                        }
 
@@ -53,8 +52,7 @@
                                $ = cheerio.load(html);
                        }
                        catch (e){
-                               console.log('Could not load document: ' + e);
-                               callback(error, [citation]);
+                               callback(error, 520, [citation]);
                        }
 
                        citation.title = getTitle();
@@ -68,7 +66,7 @@
                                citation.publicationTitle = parsedUrl.hostname;
                        }
 
-                       callback(error, [citation]);
+                       callback(error, 200, [citation]);
        });
 };
 
diff --git a/lib/zotero.js b/lib/zotero.js
index d8bdc76..620311a 100644
--- a/lib/zotero.js
+++ b/lib/zotero.js
@@ -15,7 +15,7 @@
  * Requests to Zotero server endpoint /web
  * @param  {String}   requestedURL url being requested
  * @param  {Object}   opts         options for request
- * @param  {Function} callback     callback(error, response, body)
+ * @param  {Function} callback     callback(error, responseCode, body)
  */
 var zoteroWebRequest = function(requestedURL, opts, callback){
        var options = {
@@ -31,11 +31,11 @@
                if (!error && response.statusCode == 200) {
                        selectFormatFcn(opts.format, function(convert){
                                convert(requestedURL, opts, body, 
function(modifiedBody){
-                                       callback(error, response, modifiedBody);
+                                       callback(error, 200, modifiedBody);
                                });
                        });
                } else {
-                       callback(error, response, body);
+                       callback('Unable to retrieve this format', 404, body);
                }
 
        });
@@ -45,7 +45,7 @@
  * Request to Zotero server endpoint /export
  * @param  {Object}   citation     Zotero JSON citation to be converted
  * @param  {Object}   opts         options for request
- * @param  {Function} callback     callback(error, response, body)
+ * @param  {Function} callback     callback(error, responseCode, body)
  */
 var zoteroExportRequest = function(citation, opts, callback){
        var options = {
@@ -59,7 +59,11 @@
        };
 
        request.post(options, function(error, response, body) {
-               callback(error, response, body);
+               if (error){
+                       callback(error, null, body);
+               } else {
+                       callback(null, response.statusCode, body);
+               }
        });
 };
 
@@ -125,8 +129,8 @@
                fixURL, //must go directly after unnamed function that hands it 
url
                fixAccessDate,
                function(citation, cb){
-                       zoteroExportRequest(citation, opts, function(error, 
response, body){
-                               cb(null, body);
+                       zoteroExportRequest(citation, opts, function(error, 
responseCode, body){
+                               cb(error, body);
                        });
        }], function (err, citation) {
                callback(citation);
diff --git a/server.js b/server.js
index 4af484e..85d9928 100644
--- a/server.js
+++ b/server.js
@@ -111,14 +111,8 @@
                else {requestedURL = urlParse.format(parsedURL);}
 
                requestFromURL(requestedURL, opts, function(error, 
responseCode, body){
-                       if (!error){
-                               res.statusCode = responseCode;
-                               res.send(body);
-                       }
-                       else {
-                               res.statusCode = 520;
-                               res.send(body);
-                       }
+                       res.statusCode = responseCode;
+                       res.send(body);
                });
        }
 });
@@ -157,14 +151,8 @@
                distinguish(dSearch, function(extractedID, runnerFunction){
 
                        runnerFunction(extractedID, opts, function(error, 
responseCode, body){
-                               if (!error){
-                                       res.statusCode = responseCode;
-                                       res.send(body);
-                               }
-                               else {
-                                       res.statusCode = 520; //TODO: Server at 
requested location not available, not valid for non-urls
-                                       res.send(body);
-                               }
+                               res.statusCode = responseCode;
+                               res.send(body);
                        });
                });
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/189201
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9b94fe40bfe71fadc5d227da50eb142d285b14b2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to