Lokal Profil has uploaded a new change for review. https://gerrit.wikimedia.org/r/219800
Change subject: Add DCAT-AP for Wikibase ...................................................................... Add DCAT-AP for Wikibase Constructs a dcat-ap compliant rdf describing wikidata including entity dumps. To be run after each entity dump creation. Previous repo: https://github.com/lokal-profil/DCAT Bug: T103087 Change-Id: I755cafd0309e27d628b67fa01a4eadd7c487d6af --- A modules/snapshot/files/dcat/DCAT.php A modules/snapshot/files/dcat/LICENSE A modules/snapshot/files/dcat/README.md A modules/snapshot/files/dcat/catalog.example.json A modules/snapshot/files/dcat/config.example.json A modules/snapshot/files/dcat/config.json A modules/snapshot/files/dcat/i18n/en.json A modules/snapshot/files/dcat/i18n/qqq.json 8 files changed, 784 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/00/219800/1 diff --git a/modules/snapshot/files/dcat/DCAT.php b/modules/snapshot/files/dcat/DCAT.php new file mode 100644 index 0000000..9053623 --- /dev/null +++ b/modules/snapshot/files/dcat/DCAT.php @@ -0,0 +1,526 @@ +<?php +/** + * DCAT-AP generation for Wikibase + * + * @author Lokal_Profil + * @licence MIT + * + */ + +/** + * Construct a data blob as an easy way of passing data around. + * @return array: A data blob + */ +function makeDataBlob() { + // Open config file and languages + $config = json_decode( file_get_contents( 'config.json' ), true ); + + // identify existant i18n files + $langs = array (); + foreach ( scandir( 'i18n' ) as $key => $filename ) { + if ( substr( $filename, -strlen( '.json' ) ) === '.json' && $filename !== 'qqq.json' ) { + $langs[substr( $filename, 0, -strlen( '.json' ) )] = "i18n/$filename"; + } + } + + // load i18n files into i18n object + $i18n = array (); + foreach ( $langs as $langCode => $filename ) { + $i18n[$langCode] = json_decode( file_get_contents( $filename ), true ); + } + + // load catalog i18n info from URL and add to i18n object + $i18nJSON = json_decode( file_get_contents( $config['catalog-i18n'] ), true ); + foreach ( array_keys( $i18n ) as $langCode ) { + if ( array_key_exists( "$langCode-title", $i18nJSON ) ) { + $i18n[$langCode]['catalog-title'] = $i18nJSON["$langCode-title"]; + } + if ( array_key_exists( "$langCode-description", $i18nJSON ) ) { + $i18n[$langCode]['catalog-description'] = $i18nJSON["$langCode-description"]; + } + } + + // hardcoded ids (for now at least) + // issue #2 + $ids = array ( + 'publisher' => '_n42', + 'contactPoint' => '_n43', + 'liveDataset' => 'liveData', + 'dumpDatasetPrefix' => 'dumpData', + 'liveDistribLD' => 'liveDataLD', + 'liveDistribAPI' => 'liveDataAPI', + 'dumpDistribPrefix' => 'dumpDist', + ); + + // stick loaded data into blob + $data = array ( + 'config' => $config, + 'dumps' => null, + 'i18n' => $i18n, + 'ids' => $ids, + ); + return $data; +} + +/** + * Add additional data to a distribution entry when dealing with a dump. + * Complement to writeDistribution() + * + * @param XmlWriter $xml XML stream to write to + * @param array $data data-blob of i18n and config variables + * @param string $dumpDate the date of the dumpfile, null for live data + */ +function dumpDistributionExtras( XMLWriter $xml, $data, $dumpDate, $format ) { + $url = str_replace( + '$1', + $dumpDate . '/' . $data['dumps'][$dumpDate][$format]['filename'], + $data['config']['dump-info']['accessURL'] + ); + + $xml->startElementNS( 'dcat', 'accessURL', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, $url ); + $xml->endElement(); + + $xml->startElementNS( 'dcat', 'downloadURL', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, $url ); + $xml->endElement(); + + $xml->writeElementNS( 'dcterms', 'issued', null, + $data['dumps'][$dumpDate][$format]['timestamp'] ); + + $xml->startElementNS( 'dcat', 'byteSize', null ); + $xml->writeAttributeNS( 'rdf', 'datatype', null, + 'http://www.w3.org/2001/XMLSchema#decimal' ); + $xml->text( $data['dumps'][$dumpDate][$format]['byteSize'] ); + $xml->endElement(); +} + +/** + * Construct distribution entry for each format in which a distribution + * is available. The DCAT-specification requires each format to be a + * separate distribution. + * + * @param XmlWriter $xml XML stream to write to + * @param array $data data-blob of i18n and config variables + * @param string $distribId id for the distribution + * @param string $prefix prefix for corresponding entry in config file + * @param string $dumpDate the date of the dumpfile, null for live data + */ +function writeDistribution( XMLWriter $xml, $data, $distribId, $prefix, $dumpDate ) { + $ids = array (); + + foreach ( $data['config']["$prefix-info"]['mediatype'] as $format => $mediatype ) { + $id = $data['config']['uri'] . '#' . $distribId . $dumpDate . $format; + array_push( $ids, $id ); + + $xml->startElementNS( 'rdf', 'Description', null ); + $xml->writeAttributeNS( 'rdf', 'about', null, $id ); + + $xml->startElementNS( 'rdf', 'type', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://www.w3.org/ns/dcat#Distribution' ); + $xml->endElement(); + + $xml->startElementNS( 'dcterms', 'license', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + $data['config']["$prefix-info"]['license'] ); + $xml->endElement(); + + if ( is_null( $dumpDate ) ) { + $xml->startElementNS( 'dcat', 'accessURL', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + $data['config']["$prefix-info"]['accessURL'] ); + $xml->endElement(); + } + else { + dumpDistributionExtras( $xml, $data, $dumpDate, $format ); + } + + $xml->writeElementNS( 'dcterms', 'format', null, $mediatype ); + + // add description in each language + foreach ( $data['i18n'] as $langCode => $langData ) { + if ( array_key_exists( "distribution-$prefix-description", $langData ) ) { + $xml->startElementNS( 'dcterms', 'description', null ); + $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); + $xml->text( + str_replace( '$1', $format, $langData["distribution-$prefix-description"] ) + ); + $xml->endElement(); + } + } + + $xml->endElement(); + } + + return $ids; +} + +/** + * Construct a dataset entry + * + * @param XmlWriter $xml XML stream to write to + * @param array $data data-blob of i18n and config variables + * @param string $dumpDate the date of the dumpfile, null for live data + * @param string $datasetId the id of the dataset + * @param string $publisher the nodeId of the publisher + * @param string $contactPoint the nodeId of the contactPoint + * @param array $distribution array of the distribution identifiers + */ +function writeDataset( XMLWriter $xml, $data, $dumpDate, $datasetId, + $publisher, $contactPoint, $distribution ) { + + $type = 'dump'; + if ( is_null( $dumpDate ) ) { + $type = 'live'; + } + + $id = $data['config']['uri'] . '#' . $datasetId . $dumpDate; + + $xml->startElementNS( 'rdf', 'Description', null ); + $xml->writeAttributeNS( 'rdf', 'about', null, $id ); + + $xml->startElementNS( 'rdf', 'type', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://www.w3.org/ns/dcat#Dataset' ); + $xml->endElement(); + + $xml->startElementNS( 'adms', 'contactPoint', null ); + $xml->writeAttributeNS( 'rdf', 'nodeID', null, $contactPoint ); + $xml->endElement(); + + $xml->startElementNS( 'dcterms', 'publisher', null ); + $xml->writeAttributeNS( 'rdf', 'nodeID', null, $publisher ); + $xml->endElement(); + + if ( $type === 'live' ) { + $xml->startElementNS( 'dcterms', 'accrualPeriodicity', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://purl.org/cld/freq/continuous' ); + $xml->endElement(); + } + + // add keywords + foreach ( $data['config']['keywords'] as $key => $keyword ) { + $xml->writeElementNS( 'dcat', 'keyword', null, $keyword ); + } + + // add themes + foreach ( $data['config']['themes'] as $key => $keyword ) { + $xml->startElementNS( 'dcat', 'theme', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + "http://eurovoc.europa.eu/$keyword" ); + $xml->endElement(); + } + + // add title and description in each language + foreach ( $data['i18n'] as $langCode => $langData ) { + if ( array_key_exists( "dataset-$type-title", $langData ) ) { + $xml->startElementNS( 'dcterms', 'title', null ); + $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); + if ( $type === 'live' ) { + $xml->text( $langData['dataset-live-title'] ); + } + else { + $xml->text( + str_replace( '$1', $dumpDate, $langData['dataset-dump-title'] ) + ); + } + $xml->endElement(); + } + if ( array_key_exists( "dataset-$type-description", $langData ) ) { + $xml->startElementNS( 'dcterms', 'description', null ); + $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); + $xml->text( $langData["dataset-$type-description"] ); + $xml->endElement(); + } + } + + // add distributions + foreach ( $distribution as $key => $value ) { + $xml->startElementNS( 'dcat', 'distribution', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, $value ); + $xml->endElement(); + } + + $xml->endElement(); + return $id; +} + +/** + * Construct the publisher for the catalog and datasets with a given nodeId + * + * @param XmlWriter $xml XML stream to write to + * @param array $data data-blob of i18n and config variables + * @param string $publisher the nodeId of the publisher + */ +function writePublisher( XMLWriter $xml, $data, $publisher ) { + $xml->startElementNS( 'rdf', 'Description', null ); + $xml->writeAttributeNS( 'rdf', 'nodeID', null, $publisher ); + + $xml->startElementNS( 'rdf', 'type', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://xmlns.com/foaf/0.1/Agent' ); + $xml->endElement(); + + $xml->writeElementNS( 'foaf', 'name', null, + $data['config']['publisher']['name'] ); + + $xml->startElementNS( 'dcterms', 'type', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://purl.org/adms/publishertype/' . + $data['config']['publisher']['publisherType'] ); + $xml->endElement(); + + $xml->writeElementNS( 'foaf', 'homepage', null, + $data['config']['publisher']['homepage'] ); + + $xml->startElementNS( 'vcard', 'hasEmail', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'mailto:' . $data['config']['publisher']['email'] ); + $xml->endElement(); + + $xml->endElement(); +} + +/** + * Construct a contactPoint for the datasets with a given nodeId + * + * @param XmlWriter $xml XML stream to write to + * @param array $data data-blob of i18n and config variables + * @param string $contactPoint the nodeId of the contactPoint + */ +function writeContactPoint( XMLWriter $xml, $data, $contactPoint ) { + $xml->startElementNS( 'rdf', 'Description', null ); + $xml->writeAttributeNS( 'rdf', 'nodeID', null, $contactPoint ); + + $xml->startElementNS( 'rdf', 'type', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://www.w3.org/2006/vcard/ns#' . + $data['config']['contactPoint']['vcardType'] ); + $xml->endElement(); + + $xml->startElementNS( 'vcard', 'hasEmail', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'mailto:' . $data['config']['contactPoint']['email'] ); + $xml->endElement(); + + $xml->writeElementNS( 'vcard', 'fn', null, + $data['config']['contactPoint']['name'] ); + + $xml->endElement(); +} + +/** + * Construct the catalog entry + * + * @param XmlWriter $xml XML stream to write to + * @param array $data data-blob of i18n and config variables + * @param string $publisher the nodeId of the publisher + * @param array $dataset array of the dataset identifiers + */ +function writeCatalog( XMLWriter $xml, $data, $publisher, $dataset ) { + $xml->startElementNS( 'rdf', 'Description', null ); + $xml->writeAttributeNS( 'rdf', 'about', null, + $data['config']['uri'] . '#catalog' ); + + $xml->startElementNS( 'rdf', 'type', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://www.w3.org/ns/dcat#Catalog' ); + $xml->endElement(); + + $xml->startElementNS( 'dcterms', 'license', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + $data['config']['catalog-license'] ); + $xml->endElement(); + + $xml->startElementNS( 'dcat', 'themeTaxonomy', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + 'http://eurovoc.europa.eu/' ); + $xml->endElement(); + + $xml->writeElementNS( 'foaf', 'homepage', null, + 'https://www.wikidata.org' ); + $xml->writeElementNS( 'dcterms', 'modified', null, date( 'Y-m-d' ) ); + $xml->writeElementNS( 'dcterms', 'issued', null, + $data['config']['catalog-issued'] ); + + $xml->startElementNS( 'dcterms', 'publisher', null ); + $xml->writeAttributeNS( 'rdf', 'nodeID', null, $publisher ); + $xml->endElement(); + + // add language, title and description in each language + foreach ( $data['i18n'] as $langCode => $langData ) { + $xml->startElementNS( 'dcterms', 'language', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, + "http://id.loc.gov/vocabulary/iso639-1/$langCode" ); + $xml->endElement(); + + if ( array_key_exists( 'catalog-title', $langData ) ) { + $xml->startElementNS( 'dcterms', 'title', null ); + $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); + $xml->text( $langData['catalog-title'] ); + $xml->endElement(); + } + if ( array_key_exists( 'catalog-description', $langData ) ) { + $xml->startElementNS( 'dcterms', 'description', null ); + $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); + $xml->text( $langData['catalog-description'] ); + $xml->endElement(); + } + } + + // add datasets + foreach ( $dataset as $key => $value ) { + $xml->startElementNS( 'dcat', 'dataset', null ); + $xml->writeAttributeNS( 'rdf', 'resource', null, $value ); + $xml->endElement(); + } + + $xml->endElement(); +} + +/** + * Construct the whole DCAT-AP document given an array of dump info + * + * @param array $data data-blob of i18n and config variables + * @return string: xmldata + */ +function outputXml( $data ) { + // Setting XML header + @header ( 'content-type: text/xml charset=UTF-8' ); + + // Initializing the XML Object + $xml = new XmlWriter(); + $xml->openMemory(); + $xml->setIndent( true ); + $xml->setIndentString( ' ' ); + + // set namespaces + $xml->startDocument( '1.0', 'UTF-8' ); + $xml->startElementNS( 'rdf', 'RDF', null ); + $xml->writeAttributeNS( 'xmlns', 'rdf', null, + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ); + $xml->writeAttributeNS( 'xmlns', 'dcterms', null, + 'http://purl.org/dc/terms/' ); + $xml->writeAttributeNS( 'xmlns', 'dcat', null, + 'http://www.w3.org/ns/dcat#' ); + $xml->writeAttributeNS( 'xmlns', 'foaf', null, + 'http://xmlns.com/foaf/0.1/' ); + $xml->writeAttributeNS( 'xmlns', 'adms', null, + 'http://www.w3.org/ns/adms#' ); + $xml->writeAttributeNS( 'xmlns', 'vcard', null, + 'http://www.w3.org/2006/vcard/ns#' ); + + // Calls previously declared functions to construct xml + writePublisher( $xml, $data, $data['ids']['publisher'] ); + writeContactPoint( $xml, $data, $data['ids']['contactPoint'] ); + + $dataset = array (); + + // Live dataset and distributions + $liveDistribs = writeDistribution( $xml, $data, + $data['ids']['liveDistribLD'], 'ld', null ); + if ( $data['config']['api-enabled'] ) { + $liveDistribs = array_merge( $liveDistribs, + writeDistribution( $xml, $data, + $data['ids']['liveDistribAPI'], 'api', null ) + ); + } + array_push( $dataset, + writeDataset( $xml, $data, null, $data['ids']['liveDataset'], + $data['ids']['publisher'], $data['ids']['contactPoint'], + $liveDistribs ) + ); + + // Dump dataset and distributions + if ( $data['config']['dumps-enabled'] ) { + foreach ( $data['dumps'] as $key => $value ) { + $distIds = writeDistribution( $xml, $data, + $data['ids']['dumpDistribPrefix'], 'dump', $key ); + array_push( $dataset, + writeDataset( $xml, $data, $key, + $data['ids']['dumpDatasetPrefix'], + $data['ids']['publisher'], + $data['ids']['contactPoint'], $distIds ) + ); + } + } + + writeCatalog( $xml, $data, $data['ids']['publisher'], $dataset ); + + // Closing last XML node + $xml->endElement(); + + // Printing the XML + return $xml->outputMemory( true ); +} + +/** + * Given a dump directory produce array with data needed by outputXml() + * + * @param string $dirname directory name + * @return array: of dumpdata, or empty array + */ +function scanDump( $dirname, $data ) { + $teststrings = array(); + foreach ( $data['config']['dump-info']['mediatype'] as $fileEnding => $mediatype ) { + $teststrings[$fileEnding] = 'all.' . $fileEnding . '.gz'; + } + + $dumps = array (); + + foreach ( scandir( $dirname ) as $dirKey => $subdir ) { + // get rid of files and non-relevant sub-directories + if ( substr( $subdir, 0, 1 ) != '.' && is_dir( $dirname . '/' . $subdir ) ) { + // each subdir refers to a timestamp + $dumps[$subdir] = array(); + foreach ( scandir( $dirname . '/' . $subdir ) as $key => $filename ) { + // match each file against an expected teststring + foreach ( $teststrings as $fileEnding => $teststring ) { + if ( substr( $filename, -strlen( $teststring ) ) === $teststring ) { + $info = stat( "$dirname/$subdir/$filename" ); + $dumps[$subdir][$fileEnding] = array( + 'timestamp' => gmdate( 'Y-m-d', $info['mtime'] ), + 'byteSize' => $info['size'], + 'filename' => $filename + ); + } + } + } + } + } + + return $dumps; +} + +/** + * Scan dump directory for dump files (if any) and + * create dcatap.rdf in the same directory + * + * @param string $directory directory name, overrides config setting if provided + */ +function run( $directory = null ) { + // Load config variables and i18n a data blob + $data = makeDataBlob(); + + // Load directory from config, unless overridden + if ( is_null( $directory ) ) { + $directory = $data['config']['directory']; + } + + // test if dir exists + if ( !is_dir( $directory ) ) { + echo "$directory is not a valid directory"; + return; + } + + // add dump data to data blob + $data['dumps'] = scanDump( $directory, $data ); + + // create xml string from data blob + $xml = outputXml( $data ); + + file_put_contents( "$directory/dcatap.rdf", $xml ); +} +?> diff --git a/modules/snapshot/files/dcat/LICENSE b/modules/snapshot/files/dcat/LICENSE new file mode 100644 index 0000000..d7e0e57 --- /dev/null +++ b/modules/snapshot/files/dcat/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2014 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/modules/snapshot/files/dcat/README.md b/modules/snapshot/files/dcat/README.md new file mode 100644 index 0000000..608ced4 --- /dev/null +++ b/modules/snapshot/files/dcat/README.md @@ -0,0 +1,100 @@ +DCAT-AP for Wikibase +================= + +A project aimed at generating a [DCAT-AP](https://joinup.ec.europa.eu/system/files/project/c3/22/18/DCAT-AP_Final_v1.00.html) +document for [Wikibase](http://wikiba.se) installations +in general and [Wikidata](http://wikidata.org) in particular. + +Takes into account access through: + +* Content negotiation (various formats) +* MediaWiki api (various formats) +* Entity dumps e.g. json, ttl (assumes that these are gziped) + +Current result can be found at [lokal-profil / dcat-wikidata.rdf](https://gist.github.com/lokal-profil/8086dc6bf2398d84a311) + + +## To use + +1. Copy `config.example.json` to `config.json` and change the contents + to match your installation. Refer to the *Config* section below for + an explanation of the individual configuration parameters. +2. Copy `catalog.example.json` to a suitable place (e.g. on-wiki) and + update the translations to fit your wikibase installation. Set this + value as `catalog-i18n` in the config file. +3. Create the dcatap.rdf file by running `php -r "require 'DCAT.php'; run('<PATH>');"` + where `<PATH>` is the relative path to the directory containing the + dumps (if any) and where the dcatap.rdf file should be created. + `<PATH>` can be left out if already supplied through the `directory` + parameter in the config file. + + +## Translations + +* Translations which are generic to the tool can be submitted as pull + requests and should be in the same format as the files in the `i18n` + directory. +* Translations which are specific to a project/catalog are added to + the location specified in the `catalog-i18n` parameter of the config + file. + + +## Config + +Below follows a key by key explanation of the config file. + +* `directory`: Relative path to the directory containing the dump + subcategories (if any) and for the final dcat file. +* `api-enabled`: (`Boolean`) Is API access activated for the MediaWiki + installation? +* `dumps-enabled`: (`Boolean`) Is JSON dump generation activated for the + WikiBase installation? +* `uri`: URL used as basis for rdf identifiers, + e.g. *http://www.example.org/about* +* `catalog-homepage`: URL for the homepage of the WikiBase installation, + e.g. *http://www.example.org* +* `catalog-issued`: ISO date at which the WikiBase installation was + first issued, e.g. *2000-12-24* +* `catalog-license`: License of the catalog, i.e. of the dcat file + itself (not the contents of the WikiBase installation), + e.g. *http://creativecommons.org/publicdomain/zero/1.0/* +* `catalog-i18n`: URL or path to json file containing i18n strings for + catalog title and description. Can be an on-wiki page, + e.g. *https://www.example.org/w/index.php?title=MediaWiki:DCAT.json&action=raw* +* `keywords`: (`array`) List of keywords applicable to all of the datasets +* `themes`: (`array`) List of thematic ids in accordance with + [Eurovoc](http://eurovoc.europa.eu/), e.g. *2191* for + http://eurovoc.europa.eu/2191 +* `publisher`: + * `name`: Name of the publisher + * `homepage`: URL for or the homepage of the publisher + * `email`: Contact e-mail for the publisher, should be a function + address, e.g. *[email protected]* + * `publisherType`: Publisher type according to [ADMS](http://purl.org/adms/publishertype/1.0), + e.g. *NonProfitOrganisation* +* `contactPoint`: + * `name`: Name of the contact point + * `email`: E-mail for the contact point, should ideally be a + function address, e.g. *[email protected]* + * `vcardType`: Type of contact point, either `Organization` or + `Individual` +* `ld-info`: + * `accessURL`: URL to the content negotiation endpoint of the + WikiBase installation, e.g. *http://www.example.org/entity/* + * `mediatype`: (`object`) List of [IANA media types](http://www.iana.org/assignments/media-types/) + available through content negotiation in the format *file-ending:media-type* + * `license`: License of the data in the distribution, e.g. + *http://creativecommons.org/publicdomain/zero/1.0/* +* `api-info`: + * `accessURL`: URL to the MediaWiki API endpoint of the wiki, + e.g. *http://www.example.org/w/api.php* + * `mediatype`: (`object`) List of non-deprecated formats available + thorough the API, see ld-info:mediatype above for formatting + * `license`: See ld-info:license above +* `dump-info`: + * `accessURL`: URL to the directory where the *.json.gz* files + reside (`$1` is replaced on the fly by the actual filename), + e.g. *http://example.org/dumps/$1* + * `mediatype`: (`object`) List of media types. In practice this is + always `{"json": "application/json"}` ... for now + * `license`: See ld-info:license above diff --git a/modules/snapshot/files/dcat/catalog.example.json b/modules/snapshot/files/dcat/catalog.example.json new file mode 100644 index 0000000..04abdae --- /dev/null +++ b/modules/snapshot/files/dcat/catalog.example.json @@ -0,0 +1,8 @@ +{ + "qqq-title": "The name of the catalog", + "qqq-description": "A description of the catalog", + "en-title": "Wikidata", + "en-description": "Wikidata is a free linked database that can be read and edited by both humans and machines. \nWikidata acts as central storage for the structured data of its Wikimedia sister projects including Wikipedia, Wikivoyage, Wikisource, and others.", + "sv-title": "Wikidata", + "sv-description": "Wikidata är en fri länkad databas som kan läsas och redigeras av både människor och maskiner. \nWikidata fungerar som ett centralt förvar av strukturerade data för sina systerprojekt vilka inkluderar Wikipedia, Wikivoyage, Wikisource, och fler." +} diff --git a/modules/snapshot/files/dcat/config.example.json b/modules/snapshot/files/dcat/config.example.json new file mode 100644 index 0000000..ce57691 --- /dev/null +++ b/modules/snapshot/files/dcat/config.example.json @@ -0,0 +1,52 @@ +{ + "directory": "testdir/test", + "api-enabled": true, + "dumps-enabled": true, + "uri": "http://www.wikidata.org/about", + "catalog-homepage": "http://wikidata.org", + "catalog-issued": "2012-10-30", + "catalog-license": "http://creativecommons.org/publicdomain/zero/1.0/", + "catalog-i18n": "https://www.wikidata.org/w/index.php?title=User:Lokal_Profil/DCAT.json&action=raw", + "keywords": ["data store", "semantic", "knowledgebase", "Wikimedia", "user generated content", "UGC", "Wikipedia", "Wikidata"], + "themes": ["1428", "441", "2191", "384", "7374"], + "publisher": { + "name": "Wikimedia Foundation", + "homepage": "http://wikimediafoundation.org/", + "email": "[email protected]", + "publisherType": "NonProfitOrganisation" + }, + "contactPoint": { + "name": "Wikidata information team", + "email": "[email protected]", + "vcardType" : "Organization" + }, + "ld-info": { + "accessURL": "http://www.wikidata.org/entity/", + "mediatype": { + "json": "application/json", + "n3": "application/n-triples", + "rdf": "application/rdf+xml", + "ttl": "text/turtle", + "xml": "application/xml", + "html": "text/html" + }, + "license": "http://creativecommons.org/publicdomain/zero/1.0/" + }, + "api-info": { + "accessURL": "https://www.wikidata.org/w/api.php", + "mediatype": { + "json": "application/json", + "php": "application/x-php", + "xml": "application/xml" + }, + "license": "http://creativecommons.org/publicdomain/zero/1.0/" + }, + "dump-info": { + "accessURL": "https://dumps.wikimedia.org/wikidatawiki/entities/$1", + "mediatype": { + "json": "application/json", + "ttl": "text/turtle" + }, + "license": "http://creativecommons.org/publicdomain/zero/1.0/" + } +} diff --git a/modules/snapshot/files/dcat/config.json b/modules/snapshot/files/dcat/config.json new file mode 100644 index 0000000..6eccc36 --- /dev/null +++ b/modules/snapshot/files/dcat/config.json @@ -0,0 +1,52 @@ +{ + "directory": "pathTo/wikidatawiki/entities/", + "api-enabled": true, + "dumps-enabled": true, + "uri": "http://www.wikidata.org/about", + "catalog-homepage": "http://wikidata.org", + "catalog-issued": "2012-10-30", + "catalog-license": "http://creativecommons.org/publicdomain/zero/1.0/", + "catalog-i18n": "https://www.wikidata.org/w/index.php?title=MediaWiki:DCAT.json&action=raw", + "keywords": ["data store", "semantic", "knowledgebase", "Wikimedia", "user generated content", "UGC", "Wikipedia", "Wikidata"], + "themes": ["1428", "441", "2191", "384", "7374"], + "publisher": { + "name": "Wikimedia Foundation", + "homepage": "http://wikimediafoundation.org/", + "email": "[email protected]", + "publisherType": "NonProfitOrganisation" + }, + "contactPoint": { + "vcardType" : "Organization", + "name": "Wikidata information team", + "email": "[email protected]" + }, + "ld-info": { + "accessURL": "http://www.wikidata.org/entity/", + "mediatype": { + "json": "application/json", + "n3": "application/n-triples", + "rdf": "application/rdf+xml", + "ttl": "text/turtle", + "xml": "application/xml", + "html": "text/html" + }, + "license": "http://creativecommons.org/publicdomain/zero/1.0/" + }, + "api-info": { + "accessURL": "https://www.wikidata.org/w/api.php", + "mediatype": { + "json": "application/json", + "php": "application/x-php", + "xml": "application/xml" + }, + "license": "http://creativecommons.org/publicdomain/zero/1.0/" + }, + "dump-info": { + "accessURL": "https://dumps.wikimedia.org/wikidatawiki/entities/$1", + "mediatype": { + "json": "application/json", + "ttl": "text/turtle" + }, + "license": "http://creativecommons.org/publicdomain/zero/1.0/" + } +} diff --git a/modules/snapshot/files/dcat/i18n/en.json b/modules/snapshot/files/dcat/i18n/en.json new file mode 100644 index 0000000..58a5299 --- /dev/null +++ b/modules/snapshot/files/dcat/i18n/en.json @@ -0,0 +1,12 @@ +{ + "@metadata": { + "authors": ["Lokal_Profil"] + }, + "dataset-live-title": "Live access", + "dataset-live-description": "The live version of the data, includes entities and properties. Only non-deprecated formats are listed as distributions.", + "dataset-dump-title": "Entity dump of $1", + "dataset-dump-description": "A static dump of all entites for the given date.", + "distribution-ld-description": "The Linked Data endpoint. Format is resolved through content negotiation.", + "distribution-api-description": "The MediaWiki API endpoint. Format is given through the \"format\" parameter.", + "distribution-dump-description": "A gziped $1 file." +} diff --git a/modules/snapshot/files/dcat/i18n/qqq.json b/modules/snapshot/files/dcat/i18n/qqq.json new file mode 100644 index 0000000..ca1012c --- /dev/null +++ b/modules/snapshot/files/dcat/i18n/qqq.json @@ -0,0 +1,12 @@ +{ + "@metadata": { + "authors": ["Lokal_Profil"] + }, + "dataset-live-title": "The title for the live access dataset", + "dataset-live-description": "The description of the live access dataset. For terminology see https://www.wikidata.org/wiki/Wikidata:Glossary. For deprecation see https://en.wikipedia.org/wiki/Deprecation.", + "dataset-dump-title": "The title for the entity dump where $1 is the date of the dump in the format YYYYMMDD", + "dataset-dump-description": "The description of the entity dump for the given date.", + "distribution-ld-description": "The description of the Linked Data endpoint. For content negotiation see https://en.wikipedia.org/wiki/Content_negotiation", + "distribution-api-description": "The description of the MediaWiki API endpoint. Leave \"format\" untranslated.", + "distribution-dump-description": "The description of a dump file where $1 is the file format." +} -- To view, visit https://gerrit.wikimedia.org/r/219800 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I755cafd0309e27d628b67fa01a4eadd7c487d6af Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Lokal Profil <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
