jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/392173 )
Change subject: Add more data to categories dump
......................................................................
Add more data to categories dump
Hidden category: <cat> a mediawiki:HiddenCategory .
Pages count: <cat> mediawiki:pages 10 .
Subcats count: <cat> mediawiki:subcategories 5 .
Note that pages count includes only actual articles, unlike mediawiki
table.
Bug: T173980
Change-Id: I6d34c58f844411f891195776406e11acd2aef7b1
---
M docs/ontology.owl
M includes/CategoriesRdf.php
M maintenance/dumpCategoriesAsRdf.php
M tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
M tests/phpunit/maintenance/categoriesRdfTest.php
5 files changed, 89 insertions(+), 7 deletions(-)
Approvals:
Multichill: Looks good to me, but someone else must approve
Lucas Werkmeister (WMDE): Looks good to me, but someone else must approve
jenkins-bot: Verified
Thiemo Kreuz (WMDE): Looks good to me, approved
diff --git a/docs/ontology.owl b/docs/ontology.owl
index 6b2e0b7..19476a3 100644
--- a/docs/ontology.owl
+++ b/docs/ontology.owl
@@ -38,6 +38,11 @@
<rdfs:comment>MediaWiki category.</rdfs:comment>
</owl:Class>
+ <owl:Class rdf:about="&mediawiki;HiddenCategory">
+ <rdfs:label>HiddenCategory</rdfs:label>
+ <rdfs:comment>MediaWiki hidden category.</rdfs:comment>
+ </owl:Class>
+
<!--
///////////////////////////////////////////////////////////////////////////////////////
//
@@ -53,4 +58,18 @@
<rdfs:domain rdf:resource="&mediawiki;Category"/>
</owl:ObjectProperty>
+ <owl:DatatypeProperty rdf:about="&mediawiki;pages">
+ <rdfs:label>pages</rdfs:label>
+ <rdfs:comment>Number of articles belonging to this
category.</rdfs:comment>
+ <rdfs:range rdf:resource="&mediawiki;Category"/>
+ <rdfs:range rdf:resource="&xsd;integer"/>
+ </owl:DatatypeProperty>
+
+ <owl:DatatypeProperty rdf:about="&mediawiki;subcategories">
+ <rdfs:label>subcategories</rdfs:label>
+ <rdfs:comment>Number of subcategories belonging to this
category.</rdfs:comment>
+ <rdfs:range rdf:resource="&mediawiki;Category"/>
+ <rdfs:range rdf:resource="&xsd;integer"/>
+ </owl:DatatypeProperty>
+
</rdf:RDF>
diff --git a/includes/CategoriesRdf.php b/includes/CategoriesRdf.php
index 463f6e8..fc296d4 100644
--- a/includes/CategoriesRdf.php
+++ b/includes/CategoriesRdf.php
@@ -80,14 +80,23 @@
/**
* Write out the data for single category.
* @param string $categoryName Category name
+ * @param bool $isHidden Hidden category?
+ * @param int $pages Page count (note this includes only Wiki articles,
not subcats or files)
+ * @param int $subcategories Subcategory count
*/
- public function writeCategoryData( $categoryName ) {
+ public function writeCategoryData( $categoryName, $isHidden, $pages,
$subcategories ) {
$title = Title::makeTitle( NS_CATEGORY, $categoryName );
$this->rdfWriter->about( $this->titleToUrl( $title ) )
->say( 'a' )
->is( self::ONTOLOGY_PREFIX, 'Category' );
+ if ( $isHidden ) {
+ $this->rdfWriter->is( self::ONTOLOGY_PREFIX,
'HiddenCategory' );
+ }
$titletext = $title->getText();
$this->rdfWriter->say( 'rdfs', 'label' )->value( $titletext );
+ $this->rdfWriter->say( self::ONTOLOGY_PREFIX, 'pages' )->value(
$pages );
+ $this->rdfWriter->say( self::ONTOLOGY_PREFIX, 'subcategories'
)->value( $subcategories );
+ // TODO: do we want files too here? Easy to add, but don't have
use case so far.
}
/**
diff --git a/maintenance/dumpCategoriesAsRdf.php
b/maintenance/dumpCategoriesAsRdf.php
index c1835d0..3467932 100644
--- a/maintenance/dumpCategoriesAsRdf.php
+++ b/maintenance/dumpCategoriesAsRdf.php
@@ -58,14 +58,32 @@
public function getCategoryIterator( IDatabase $dbr ) {
$it = new BatchRowIterator(
$dbr,
- 'page',
+ [ 'page', 'page_props', 'category' ],
[ 'page_title' ],
$this->getBatchSize()
);
$it->addConditions( [
'page_namespace' => NS_CATEGORY,
] );
- $it->setFetchColumns( [ 'page_title', 'page_id' ] );
+ $it->setFetchColumns( [
+ 'page_title',
+ 'page_id',
+ 'pp_propname',
+ 'cat_pages',
+ 'cat_subcats',
+ 'cat_files'
+ ] );
+ $it->addJoinConditions(
+ [
+ 'page_props' => [
+ 'LEFT JOIN', [ 'pp_propname' =>
'hiddencat', 'pp_page = page_id' ]
+ ],
+ 'category' => [
+ 'LEFT JOIN', [ 'cat_title = page_title'
]
+ ]
+ ]
+
+ );
return $it;
}
@@ -90,6 +108,9 @@
return new RecursiveIteratorIterator( $it );
}
+ /**
+ * @param int $timestamp
+ */
public function addDumpHeader( $timestamp ) {
global $wgRightsUrl;
$licenseUrl = $wgRightsUrl;
@@ -129,7 +150,12 @@
foreach ( $this->getCategoryIterator( $dbr ) as $batch ) {
$pages = [];
foreach ( $batch as $row ) {
- $this->categoriesRdf->writeCategoryData(
$row->page_title );
+ $this->categoriesRdf->writeCategoryData(
+ $row->page_title,
+ $row->pp_propname === 'hiddencat',
+ (int)$row->cat_pages -
(int)$row->cat_subcats - (int)$row->cat_files,
+ (int)$row->cat_subcats
+ );
$pages[$row->page_id] = $row->page_title;
}
diff --git a/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
b/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
index b8bd8e0..bbb3787 100644
--- a/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
+++ b/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
@@ -7,10 +7,17 @@
<http://acme.test/wiki/Special:CategoryDump>
<http://www.w3.org/2002/07/owl#imports>
<https://www.mediawiki.org/ontology/ontology.owl> .
<http://acme.test/wiki/Category:Category_One>
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
<https://www.mediawiki.org/ontology#Category> .
<http://acme.test/wiki/Category:Category_One>
<http://www.w3.org/2000/01/rdf-schema#label> "Category One" .
+<http://acme.test/wiki/Category:Category_One>
<https://www.mediawiki.org/ontology#pages>
"7"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<http://acme.test/wiki/Category:Category_One>
<https://www.mediawiki.org/ontology#subcategories>
"10"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://acme.test/wiki/Category:2_Category_Two>
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
<https://www.mediawiki.org/ontology#Category> .
+<http://acme.test/wiki/Category:2_Category_Two>
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
<https://www.mediawiki.org/ontology#HiddenCategory> .
<http://acme.test/wiki/Category:2_Category_Two>
<http://www.w3.org/2000/01/rdf-schema#label> "2 Category Two" .
+<http://acme.test/wiki/Category:2_Category_Two>
<https://www.mediawiki.org/ontology#pages>
"17"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<http://acme.test/wiki/Category:2_Category_Two>
<https://www.mediawiki.org/ontology#subcategories>
"0"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://acme.test/wiki/Category:Category_One>
<https://www.mediawiki.org/ontology#isInCategory>
<http://acme.test/wiki/Category:Parent_of_1> .
<http://acme.test/wiki/Category:2_Category_Two>
<https://www.mediawiki.org/ontology#isInCategory>
<http://acme.test/wiki/Category:Parent_of_2> .
<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
<https://www.mediawiki.org/ontology#Category> .
<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
<http://www.w3.org/2000/01/rdf-schema#label>
"\u0422\u0440\u0435\u0442\u044C\u044F
\u043A\u0430\u0442\u0435\u0433\u043E\u0440\u0438\u044F" .
+<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
<https://www.mediawiki.org/ontology#pages>
"0"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
<https://www.mediawiki.org/ontology#subcategories>
"0"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
<https://www.mediawiki.org/ontology#isInCategory>
<http://acme.test/wiki/Category:Parent_of_3> .
diff --git a/tests/phpunit/maintenance/categoriesRdfTest.php
b/tests/phpunit/maintenance/categoriesRdfTest.php
index 2edbae1..c0850ab 100644
--- a/tests/phpunit/maintenance/categoriesRdfTest.php
+++ b/tests/phpunit/maintenance/categoriesRdfTest.php
@@ -9,12 +9,33 @@
return [
// batch 1
[
- (object)[ 'page_title' => 'Category One',
'page_id' => 1 ],
- (object)[ 'page_title' => '2 Category Two',
'page_id' => 2 ],
+ (object)[
+ 'page_title' => 'Category One',
+ 'page_id' => 1,
+ 'pp_propname' => null,
+ 'cat_pages' => '20',
+ 'cat_subcats' => '10',
+ 'cat_files' => '3'
+ ],
+ (object)[
+ 'page_title' => '2 Category Two',
+ 'page_id' => 2,
+ 'pp_propname' => 'hiddencat',
+ 'cat_pages' => 20,
+ 'cat_subcats' => 0,
+ 'cat_files' => 3
+ ],
],
// batch 2
[
- (object)[ 'page_title' => 'Третья категория',
'page_id' => 3 ],
+ (object)[
+ 'page_title' => 'Третья категория',
+ 'page_id' => 3,
+ 'pp_propname' => null,
+ 'cat_pages' => '0',
+ 'cat_subcats' => '0',
+ 'cat_files' => '0'
+ ],
]
];
}
--
To view, visit https://gerrit.wikimedia.org/r/392173
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6d34c58f844411f891195776406e11acd2aef7b1
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Lucas Werkmeister (WMDE) <[email protected]>
Gerrit-Reviewer: Multichill <[email protected]>
Gerrit-Reviewer: Parent5446 <[email protected]>
Gerrit-Reviewer: Smalyshev <[email protected]>
Gerrit-Reviewer: Thiemo Kreuz (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits