jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/392173 )

Change subject: Add more data to categories dump
......................................................................


Add more data to categories dump

Hidden category: <cat> a mediawiki:HiddenCategory .
Pages count: <cat> mediawiki:pages 10 .
Subcats count: <cat> mediawiki:subcategories 5 .

Note that pages count includes only actual articles, unlike mediawiki
table.

Bug: T173980
Change-Id: I6d34c58f844411f891195776406e11acd2aef7b1
---
M docs/ontology.owl
M includes/CategoriesRdf.php
M maintenance/dumpCategoriesAsRdf.php
M tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
M tests/phpunit/maintenance/categoriesRdfTest.php
5 files changed, 89 insertions(+), 7 deletions(-)

Approvals:
  Multichill: Looks good to me, but someone else must approve
  Lucas Werkmeister (WMDE): Looks good to me, but someone else must approve
  jenkins-bot: Verified
  Thiemo Kreuz (WMDE): Looks good to me, approved



diff --git a/docs/ontology.owl b/docs/ontology.owl
index 6b2e0b7..19476a3 100644
--- a/docs/ontology.owl
+++ b/docs/ontology.owl
@@ -38,6 +38,11 @@
     <rdfs:comment>MediaWiki category.</rdfs:comment>
   </owl:Class>
 
+  <owl:Class rdf:about="&mediawiki;HiddenCategory">
+    <rdfs:label>HiddenCategory</rdfs:label>
+    <rdfs:comment>MediaWiki hidden category.</rdfs:comment>
+  </owl:Class>
+
   <!--
   
///////////////////////////////////////////////////////////////////////////////////////
   //
@@ -53,4 +58,18 @@
       <rdfs:domain rdf:resource="&mediawiki;Category"/>
   </owl:ObjectProperty>
 
+  <owl:DatatypeProperty rdf:about="&mediawiki;pages">
+      <rdfs:label>pages</rdfs:label>
+      <rdfs:comment>Number of articles belonging to this 
category.</rdfs:comment>
+      <rdfs:range rdf:resource="&mediawiki;Category"/>
+      <rdfs:range rdf:resource="&xsd;integer"/>
+  </owl:DatatypeProperty>
+
+  <owl:DatatypeProperty rdf:about="&mediawiki;subcategories">
+      <rdfs:label>subcategories</rdfs:label>
+      <rdfs:comment>Number of subcategories belonging to this 
category.</rdfs:comment>
+      <rdfs:range rdf:resource="&mediawiki;Category"/>
+      <rdfs:range rdf:resource="&xsd;integer"/>
+  </owl:DatatypeProperty>
+
 </rdf:RDF>
diff --git a/includes/CategoriesRdf.php b/includes/CategoriesRdf.php
index 463f6e8..fc296d4 100644
--- a/includes/CategoriesRdf.php
+++ b/includes/CategoriesRdf.php
@@ -80,14 +80,23 @@
        /**
         * Write out the data for single category.
         * @param string $categoryName Category name
+        * @param bool $isHidden Hidden category?
+        * @param int $pages Page count (note this includes only Wiki articles, 
not subcats or files)
+        * @param int $subcategories Subcategory count
         */
-       public function writeCategoryData( $categoryName ) {
+       public function writeCategoryData( $categoryName, $isHidden, $pages, 
$subcategories ) {
                $title = Title::makeTitle( NS_CATEGORY, $categoryName );
                $this->rdfWriter->about( $this->titleToUrl( $title ) )
                        ->say( 'a' )
                        ->is( self::ONTOLOGY_PREFIX, 'Category' );
+               if ( $isHidden ) {
+                       $this->rdfWriter->is( self::ONTOLOGY_PREFIX, 
'HiddenCategory' );
+               }
                $titletext = $title->getText();
                $this->rdfWriter->say( 'rdfs', 'label' )->value( $titletext );
+               $this->rdfWriter->say( self::ONTOLOGY_PREFIX, 'pages' )->value( 
$pages );
+               $this->rdfWriter->say( self::ONTOLOGY_PREFIX, 'subcategories' 
)->value( $subcategories );
+               // TODO: do we want files too here? Easy to add, but don't have 
use case so far.
        }
 
        /**
diff --git a/maintenance/dumpCategoriesAsRdf.php 
b/maintenance/dumpCategoriesAsRdf.php
index c1835d0..3467932 100644
--- a/maintenance/dumpCategoriesAsRdf.php
+++ b/maintenance/dumpCategoriesAsRdf.php
@@ -58,14 +58,32 @@
        public function getCategoryIterator( IDatabase $dbr ) {
                $it = new BatchRowIterator(
                        $dbr,
-                       'page',
+                       [ 'page', 'page_props', 'category' ],
                        [ 'page_title' ],
                        $this->getBatchSize()
                );
                $it->addConditions( [
                        'page_namespace' => NS_CATEGORY,
                ] );
-               $it->setFetchColumns( [ 'page_title', 'page_id' ] );
+               $it->setFetchColumns( [
+                       'page_title',
+                       'page_id',
+                       'pp_propname',
+                       'cat_pages',
+                       'cat_subcats',
+                       'cat_files'
+               ] );
+               $it->addJoinConditions(
+                       [
+                               'page_props' => [
+                                       'LEFT JOIN', [ 'pp_propname' => 
'hiddencat', 'pp_page = page_id' ]
+                               ],
+                               'category' => [
+                                       'LEFT JOIN', [ 'cat_title = page_title' 
]
+                               ]
+                       ]
+
+               );
                return $it;
        }
 
@@ -90,6 +108,9 @@
                return new RecursiveIteratorIterator( $it );
        }
 
+       /**
+        * @param int $timestamp
+        */
        public function addDumpHeader( $timestamp ) {
                global $wgRightsUrl;
                $licenseUrl = $wgRightsUrl;
@@ -129,7 +150,12 @@
                foreach ( $this->getCategoryIterator( $dbr ) as $batch ) {
                        $pages = [];
                        foreach ( $batch as $row ) {
-                               $this->categoriesRdf->writeCategoryData( 
$row->page_title );
+                               $this->categoriesRdf->writeCategoryData(
+                                       $row->page_title,
+                                       $row->pp_propname === 'hiddencat',
+                                       (int)$row->cat_pages - 
(int)$row->cat_subcats - (int)$row->cat_files,
+                                       (int)$row->cat_subcats
+                               );
                                $pages[$row->page_id] = $row->page_title;
                        }
 
diff --git a/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt 
b/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
index b8bd8e0..bbb3787 100644
--- a/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
+++ b/tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
@@ -7,10 +7,17 @@
 <http://acme.test/wiki/Special:CategoryDump> 
<http://www.w3.org/2002/07/owl#imports> 
<https://www.mediawiki.org/ontology/ontology.owl> .
 <http://acme.test/wiki/Category:Category_One> 
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> 
<https://www.mediawiki.org/ontology#Category> .
 <http://acme.test/wiki/Category:Category_One> 
<http://www.w3.org/2000/01/rdf-schema#label> "Category One" .
+<http://acme.test/wiki/Category:Category_One> 
<https://www.mediawiki.org/ontology#pages> 
"7"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<http://acme.test/wiki/Category:Category_One> 
<https://www.mediawiki.org/ontology#subcategories> 
"10"^^<http://www.w3.org/2001/XMLSchema#integer> .
 <http://acme.test/wiki/Category:2_Category_Two> 
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> 
<https://www.mediawiki.org/ontology#Category> .
+<http://acme.test/wiki/Category:2_Category_Two> 
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> 
<https://www.mediawiki.org/ontology#HiddenCategory> .
 <http://acme.test/wiki/Category:2_Category_Two> 
<http://www.w3.org/2000/01/rdf-schema#label> "2 Category Two" .
+<http://acme.test/wiki/Category:2_Category_Two> 
<https://www.mediawiki.org/ontology#pages> 
"17"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<http://acme.test/wiki/Category:2_Category_Two> 
<https://www.mediawiki.org/ontology#subcategories> 
"0"^^<http://www.w3.org/2001/XMLSchema#integer> .
 <http://acme.test/wiki/Category:Category_One> 
<https://www.mediawiki.org/ontology#isInCategory> 
<http://acme.test/wiki/Category:Parent_of_1> .
 <http://acme.test/wiki/Category:2_Category_Two> 
<https://www.mediawiki.org/ontology#isInCategory> 
<http://acme.test/wiki/Category:Parent_of_2> .
 
<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> 
<https://www.mediawiki.org/ontology#Category> .
 
<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
 <http://www.w3.org/2000/01/rdf-schema#label> 
"\u0422\u0440\u0435\u0442\u044C\u044F 
\u043A\u0430\u0442\u0435\u0433\u043E\u0440\u0438\u044F" .
+<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
 <https://www.mediawiki.org/ontology#pages> 
"0"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
 <https://www.mediawiki.org/ontology#subcategories> 
"0"^^<http://www.w3.org/2001/XMLSchema#integer> .
 
<http://acme.test/wiki/Category:%D0%A2%D1%80%D0%B5%D1%82%D1%8C%D1%8F_%D0%BA%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%8F>
 <https://www.mediawiki.org/ontology#isInCategory> 
<http://acme.test/wiki/Category:Parent_of_3> .
diff --git a/tests/phpunit/maintenance/categoriesRdfTest.php 
b/tests/phpunit/maintenance/categoriesRdfTest.php
index 2edbae1..c0850ab 100644
--- a/tests/phpunit/maintenance/categoriesRdfTest.php
+++ b/tests/phpunit/maintenance/categoriesRdfTest.php
@@ -9,12 +9,33 @@
                return [
                        // batch 1
                        [
-                               (object)[ 'page_title' => 'Category One', 
'page_id' => 1 ],
-                               (object)[ 'page_title' => '2 Category Two', 
'page_id' => 2 ],
+                               (object)[
+                                       'page_title' => 'Category One',
+                                       'page_id' => 1,
+                                       'pp_propname' => null,
+                                       'cat_pages' => '20',
+                                       'cat_subcats' => '10',
+                                       'cat_files' => '3'
+                               ],
+                               (object)[
+                                       'page_title' => '2 Category Two',
+                                       'page_id' => 2,
+                                       'pp_propname' => 'hiddencat',
+                                       'cat_pages' => 20,
+                                       'cat_subcats' => 0,
+                                       'cat_files' => 3
+                               ],
                        ],
                        // batch 2
                        [
-                               (object)[ 'page_title' => 'Третья категория', 
'page_id' => 3 ],
+                               (object)[
+                                       'page_title' => 'Третья категория',
+                                       'page_id' => 3,
+                                       'pp_propname' => null,
+                                       'cat_pages' => '0',
+                                       'cat_subcats' => '0',
+                                       'cat_files' => '0'
+                               ],
                        ]
                ];
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/392173
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I6d34c58f844411f891195776406e11acd2aef7b1
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Lucas Werkmeister (WMDE) <[email protected]>
Gerrit-Reviewer: Multichill <[email protected]>
Gerrit-Reviewer: Parent5446 <[email protected]>
Gerrit-Reviewer: Smalyshev <[email protected]>
Gerrit-Reviewer: Thiemo Kreuz (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to