Mvolz has uploaded a new change for review. https://gerrit.wikimedia.org/r/237713
Change subject: [WIP] Finish openGraph definitions ...................................................................... [WIP] Finish openGraph definitions Finish openGraph definitions, and validate date and language fields. Bug: T1069 Bug: T93337 Change-Id: If25fe8f94974d5c67a75d04c798e0254a02e9e18 --- M lib/Exporter.js M lib/translators/general.js M lib/translators/openGraph.js M test/features/scraping/index.js M test/features/scraping/lang.js M test/features/scraping/noZotero.js 6 files changed, 65 insertions(+), 16 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid refs/changes/13/237713/1 diff --git a/lib/Exporter.js b/lib/Exporter.js index a5914ee..ff95b2a 100644 --- a/lib/Exporter.js +++ b/lib/Exporter.js @@ -446,3 +446,4 @@ module.exports.fixURL = fixURL; module.exports.stripCitation = stripCitation; module.exports.fixDate = fixDate; +module.exports.fixLang = fixLang; diff --git a/lib/translators/general.js b/lib/translators/general.js index caff5f7..14ab4fb 100644 --- a/lib/translators/general.js +++ b/lib/translators/general.js @@ -1,11 +1,22 @@ 'use strict'; + /** * Utility function to map the keys directly + * @param {String} property Zotero property name to add to citation + * @param {Function} validate Function to run on scraped value + * @return {Object} citation object */ -function makeTranslator(property) { +function makeTranslator(property, validateFunction) { + var validate = validateFunction; function translateProp(citation, metadataValue) { + // Create function that returns the citation back if no validate function is supplied + if (!validate || typeof validate !== 'function'){ + validate = function(cit){ + return cit; + }; + } citation = citation || {}; if (typeof metadataValue === 'string') { // Add the string value and trim whitespace @@ -14,7 +25,7 @@ // Choose the first value translateProp(citation, metadataValue[0]); } - return citation; + return validate(citation); } return { name: property, diff --git a/lib/translators/openGraph.js b/lib/translators/openGraph.js index 234db25..9327358 100644 --- a/lib/translators/openGraph.js +++ b/lib/translators/openGraph.js @@ -2,6 +2,8 @@ var makeTranslator = require('./general').util.makeTranslator; +var fixDate = require('../Exporter.js').fixDate; +var fixLang = require('../Exporter.js').fixLang; /** * Open graph type field values : Zotero type field values @@ -9,9 +11,9 @@ */ exports.types = { website: 'webpage', - article: 'blogPost', //or journalArticle, newspaperArticle, magazineArticle ? + article: 'blogPost', // May change to journalArticle, newspaperArticle, magazineArticle book: 'book', - profile: 'webpage', //may be possible to obtain more information from this link a.k.a. names + profile: 'webpage', // May be possible to obtain more information from this link a.k.a. names 'music.song': 'audioRecording', 'music.album': 'audioRecording', 'music.playlist': 'webpage', @@ -33,7 +35,7 @@ image: null, // general OG property, unused in any Zotero type //could possible put in archive location? audio: null, // general OG property, unused in Zotero in any Zotero type //could possibly put in archive location? description: makeTranslator('abstractNote'), // general OG property, abstractNote common to all Zotero types - locale: null, // general OG property, common to all Zotero types + locale: makeTranslator('language', fixLang), // general OG property, common to all Zotero types determiner: null, // general OG property, unused in any Zotero type 'locale:alternate': null, // general OG property, unused in any Zotero type site_name: null, // general OG property, only used in webpage types - translate there @@ -47,10 +49,33 @@ * @type {Object} */ exports.webpage = { - site_name: makeTranslator('websiteTitle') // prefix og: general property, but should only be assigned if type webpage is used + site_name: makeTranslator('websiteTitle'), // prefix og: general property, but should only be assigned if type webpage is used }; exports.videoRecording = { duration: makeTranslator('runningTime'), - release_date: makeTranslator('date') + release_date: makeTranslator('date', fixDate) }; + +exports.audioRecording = { + release_date: makeTranslator('date',fixDate) // only present in music.album +}; + +exports.blogPost = { + published_time: makeTranslator('date', fixDate), +}; + +exports.book = { + release_date: makeTranslator('date', fixDate), + isbn: makeTranslator('ISBN'), +}; + +/** Not currently used */ +exports.journalArticle = { + published_time: makeTranslator('date', fixDate), +}; + +/** Not currently used */ +exports.newspaperArticle = { + published_time: makeTranslator('date', fixDate), +}; \ No newline at end of file diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js index 696b9b0..67adc7f 100644 --- a/test/features/scraping/index.js +++ b/test/features/scraping/index.js @@ -230,14 +230,6 @@ }); }); - it('open graph', function() { - return server.query('http://www.pbs.org/newshour/making-sense/care-peoples-kids/').then(function(res) { - assert.status(res, 200); - assert.checkCitation(res); - assert.deepEqual(!!res.body[0].accessDate, true, 'No accessDate present'); - }); - }); - it('websiteTitle but no publicationTitle', function() { return server.query('http://blog.woorank.com/2013/04/dublin-core-metadata-for-seo-and-usability/').then(function(res) { assert.status(res, 200); diff --git a/test/features/scraping/lang.js b/test/features/scraping/lang.js index bc4668b..cdc3147 100644 --- a/test/features/scraping/lang.js +++ b/test/features/scraping/lang.js @@ -31,6 +31,15 @@ }); }); + it('open graph locale converted to language code', function() { + return server.query('http://www.pbs.org/newshour/making-sense/care-peoples-kids/').then(function(res) { + assert.status(res, 200); + assert.checkCitation(res); + assert.deepEqual(!!res.body[0].accessDate, true, 'No accessDate present'); + assert.deepEqual(res.body[0].language, 'en-US'); // Converts en_US to en-US + }); + }); + // Support for language encoding other than those native to Node it('non-native to node encoding in response', function() { return server.query('http://corriere.it/esteri/15_marzo_27/aereo-germanwings-indizi-interessanti-casa-copilota-ff5e34f8-d446-11e4-831f-650093316b0e.shtml').then(function(res) { diff --git a/test/features/scraping/noZotero.js b/test/features/scraping/noZotero.js index e658d9c..3b29276 100644 --- a/test/features/scraping/noZotero.js +++ b/test/features/scraping/noZotero.js @@ -31,7 +31,7 @@ }); }); - //PMID on NIH website that is found in the id converter api- should convert to DOI + // PMID on NIH website that is found in the id converter api- should convert to DOI it('PMCID present in doi id converter api', function() { return server.query('PMC3605911').then(function(res) { assert.status(res, 200); @@ -43,4 +43,15 @@ }); }); + // JSTOR page with tabs in natively scraped title + it('JSTOR page with tabs in natively scraped title', function() { + return server.query('http://www.jstor.org/discover/10.2307/3677029').then(function(res) { + assert.status(res, 200); + assert.checkZotCitation(res, 'Flight Feather Moult in the Red-Necked Nightjar Caprimulgus ruficollis'); + assert.deepEqual(!!res.body[0].DOI, true, 'Missing DOI'); + assert.deepEqual(!!res.body[0].ISSN, false, 'Should not contain ISSN'); // This indicates Zotero is actually activated since ISSN is not in crossRef, where we're obtaining the metadata + assert.deepEqual(res.body[0].itemType, 'journalArticle', 'Wrong itemType; expected journalArticle, got' + res.body[0].itemType); + }); + }); + }); \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/237713 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If25fe8f94974d5c67a75d04c798e0254a02e9e18 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/citoid Gerrit-Branch: master Gerrit-Owner: Mvolz <mv...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits