[MediaWiki-commits] [Gerrit] mediawiki...citoid[master]: Remove TZ offsets & add ordinal incators

2016-09-19 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged.

Change subject: Remove TZ offsets & add ordinal incators
..


Remove TZ offsets & add ordinal incators

* Add chrono-node library which has slightly better
parser than Date.parse().
 * This adds support for ordinal indicators
 * This unfortunately removes support for oridinal
   numbers (less commonly used).
* Remove offsets from all dates containing TZ info;
this is because TZ info can shift the day to the day
before or after, which is confusing from the user-end,
see bug.
* Fixes to two scraper tests where the metadata
changed upstream

Bug: T145052
Change-Id: I44134fca827ed5c387f6f56b6670f25f6becb97c
---
M lib/Exporter.js
M package.json
M test/features/scraping/index.js
M test/features/scraping/lang.js
M test/features/unit/exporter.js
5 files changed, 71 insertions(+), 11 deletions(-)

Approvals:
  Mobrovac: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/Exporter.js b/lib/Exporter.js
index 3cc9007..8d57a3a 100644
--- a/lib/Exporter.js
+++ b/lib/Exporter.js
@@ -8,6 +8,7 @@
 
 /* Import Modules */
 var BBPromise = require('bluebird');
+var chrono = require('chrono-node');
 var crypto = require('crypto');
 var extend = require('extend');
 var stripTags = require('striptags');
@@ -508,11 +509,26 @@
  */
 function fixDate(citation){
 if (citation.date){
-// Explicitly set TZ to GMT for servers not running in GMT
-var d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? 
'' : ' GMT+00'));
-if (isFinite(d)) {
-citation.date = d.toISOString().split('T').shift();
-} else {
+try {
+var d;
+// Try to parse with chrono first
+var p = chrono.parse(citation.date); // Create ParsedResult object 
with chrono
+if (p && p[0] && p[0].start){
+p[0].start.assign('timezoneOffset', 0); // Remove timezone 
offset so that the user-observed date doesn't change based on offset
+d = p[0].start.date(); // Create a Date object from 
ParsedComponents Object
+} else {
+// Try to parse with Date.parse() as fallback; chrono doesn't 
seem to work with ambigious dates, such as '2010'
+d = new Date(citation.date + 
(/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); // Explicitly set to GMT 
time to avoid offset issue
+}
+
+// Lastly, remove time from date
+if (isFinite(d)) {
+citation.date = d.toISOString().split('T').shift();
+} else {
+// If no finite translation of the date is available, remove 
the field
+delete citation.date;
+}
+} catch (e) { // Remove field if errors are thrown
 delete citation.date;
 }
 }
diff --git a/package.json b/package.json
index 9578f4d..b5fa125 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "citoid",
-  "version": "0.4.2",
+  "version": "0.4.3",
   "description": "Converts search terms such as URL or DOI into citations.",
   "homepage": "https://www.mediawiki.org/wiki/Citoid";,
   "license": "Apache-2.0",
@@ -16,6 +16,7 @@
 "bunyan": "1.8.1",
 "cassandra-uuid": "0.0.2",
 "cheerio": "0.20.0",
+"chrono-node": "1.2.4",
 "compression": "1.6.2",
 "content-type": "1.0.2",
 "core-js": "2.4.1",
diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js
index e8ce812..54d3e1b 100644
--- a/test/features/scraping/index.js
+++ b/test/features/scraping/index.js
@@ -330,7 +330,7 @@
 it('doi in url with query parameters - uses crossRef', function() {
 return 
server.query('http://www.example.com/10.1086/378695?uid=3739832&uid=2&uid=4&uid=3739256&sid=21105503736473').then(function(res)
 {
 assert.status(res, 200);
-assert.checkCitation(res, 'Salaries, Turnover, and Performance 
in the Federal Criminal Justice System*');
+assert.checkCitation(res, 'Salaries, Turnover, and Performance 
in the Federal Criminal Justice System');
 assert.deepEqual(res.body[0].DOI, '10.1086/378695');
 assert.deepEqual(res.body[0].author.length, 1);
 });
diff --git a/test/features/scraping/lang.js b/test/features/scraping/lang.js
index dadb952..f07a8cf 100644
--- a/test/features/scraping/lang.js
+++ b/test/features/scraping/lang.js
@@ -26,7 +26,7 @@
 it('german twitter', function() {
 return server.query('http://twitter.com', 'mediawiki', 
'de').then(function(res) {
 assert.status(res, 200);
-assert.checkCitation(res, 'Twitter - Sieh Dir an, was gerade 
los ist');
+assert.checkCitation(res, 'Twitter. Alles, was gerade los 
ist.');
 assert.deepEqual(!!res.body[0].accessDate, true, 'No 
accessDate present

[MediaWiki-commits] [Gerrit] mediawiki...citoid[master]: Remove TZ offsets & add ordinal incators

2016-09-09 Thread Mvolz (Code Review)
Mvolz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/309599

Change subject: Remove TZ offsets & add ordinal incators
..

Remove TZ offsets & add ordinal incators

* Add chrono-node library which has slightly better
parser than Date.parse().
 * This adds support for ordinal indicators
 * This unfortunately removes support for oridinal
   numbers (less commonly used).
* Remove offsets from all dates containing TZ info;
this is because TZ info can shift the day to the day
before or after, which is confusing from the user-end,
see bug.
* Fixes to two scraper tests where the metadata
changed upstream

Bug: T145052
Change-Id: I44134fca827ed5c387f6f56b6670f25f6becb97c
---
M lib/Exporter.js
M package.json
M test/features/scraping/index.js
M test/features/scraping/lang.js
M test/features/unit/exporter.js
5 files changed, 64 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid 
refs/changes/99/309599/1

diff --git a/lib/Exporter.js b/lib/Exporter.js
index 3cc9007..04251cd 100644
--- a/lib/Exporter.js
+++ b/lib/Exporter.js
@@ -8,6 +8,7 @@
 
 /* Import Modules */
 var BBPromise = require('bluebird');
+var chrono = require('chrono-node');
 var crypto = require('crypto');
 var extend = require('extend');
 var stripTags = require('striptags');
@@ -508,11 +509,22 @@
  */
 function fixDate(citation){
 if (citation.date){
-// Explicitly set TZ to GMT for servers not running in GMT
-var d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? 
'' : ' GMT+00'));
+var d;
+// Try to parse with chrono first
+var p = chrono.parse(citation.date); // Create ParsedResult object 
with chrono
+if (p && p[0] && p[0].start){
+p[0].start.assign('timezoneOffset', 0); // Remove timezone offset 
so that the user-observed date doesn't change based on offset
+d = p[0].start.date(); // Create a Date object from 
ParsedComponents Object
+} else {
+// Try to parse with Date.parse() as fallback; chrono doesn't seem 
to work with ambigious dates, such as '2010'
+d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? 
'' : ' GMT+00')); // Explicitly set to GMT time to avoid offset issue
+}
+
+// Lastly, remove time from date
 if (isFinite(d)) {
 citation.date = d.toISOString().split('T').shift();
 } else {
+// If no finite translation of the date is available, remove the 
field
 delete citation.date;
 }
 }
diff --git a/package.json b/package.json
index 9578f4d..b5fa125 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "citoid",
-  "version": "0.4.2",
+  "version": "0.4.3",
   "description": "Converts search terms such as URL or DOI into citations.",
   "homepage": "https://www.mediawiki.org/wiki/Citoid";,
   "license": "Apache-2.0",
@@ -16,6 +16,7 @@
 "bunyan": "1.8.1",
 "cassandra-uuid": "0.0.2",
 "cheerio": "0.20.0",
+"chrono-node": "1.2.4",
 "compression": "1.6.2",
 "content-type": "1.0.2",
 "core-js": "2.4.1",
diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js
index e8ce812..577d2d6 100644
--- a/test/features/scraping/index.js
+++ b/test/features/scraping/index.js
@@ -330,7 +330,7 @@
 it('doi in url with query parameters - uses crossRef', function() {
 return 
server.query('http://www.example.com/10.1086/378695?uid=3739832&uid=2&uid=4&uid=3739256&sid=21105503736473').then(function(res)
 {
 assert.status(res, 200);
-assert.checkCitation(res, 'Salaries, Turnover, and Performance 
in the Federal Criminal Justice System*');
+assert.checkCitation(res, 'Salaries, Turnover, and Performance 
in the Federal Criminal Justice Systems');
 assert.deepEqual(res.body[0].DOI, '10.1086/378695');
 assert.deepEqual(res.body[0].author.length, 1);
 });
diff --git a/test/features/scraping/lang.js b/test/features/scraping/lang.js
index dadb952..f07a8cf 100644
--- a/test/features/scraping/lang.js
+++ b/test/features/scraping/lang.js
@@ -26,7 +26,7 @@
 it('german twitter', function() {
 return server.query('http://twitter.com', 'mediawiki', 
'de').then(function(res) {
 assert.status(res, 200);
-assert.checkCitation(res, 'Twitter - Sieh Dir an, was gerade 
los ist');
+assert.checkCitation(res, 'Twitter. Alles, was gerade los 
ist.');
 assert.deepEqual(!!res.body[0].accessDate, true, 'No 
accessDate present');
 });
 });
diff --git a/test/features/unit/exporter.js b/test/features/unit/exporter.js
index 4899e8d..6b6d0a8 100644
--- a/test/features/unit/exporter.js
+++ b/test/features/unit/exporter.js
@@ -67,12 +67,55 @@
 asser