MaxSem has submitted this change and it was merged. Change subject: Backend for picking questions ......................................................................
Backend for picking questions Change-Id: Ia8a1fdb3cd6f3dbccc8409d57f6be57d6db2813d --- M WikiGrok.php A includes/Campaign.php A includes/ConfirmationCampaign.php A includes/Hooks.php A includes/LinkedPropsCampaign.php A includes/QuestionPicker.php A includes/WikibaseConnector.php 7 files changed, 715 insertions(+), 3 deletions(-) Approvals: MaxSem: Verified; Looks good to me, approved Kaldari: Looks good to me, approved diff --git a/WikiGrok.php b/WikiGrok.php index 6fce74c..c71e032 100644 --- a/WikiGrok.php +++ b/WikiGrok.php @@ -14,16 +14,69 @@ ); $autoloadClasses = array( - 'WikiGrok\Api\ApiResponse' => 'api/ApiResponse', - 'WikiGrok\EventLogger' => 'EventLogger', + 'Api\ApiResponse' => 'api/ApiResponse', + 'Campaign' => 'Campaign', + 'ConfirmationCampaign' => 'ConfirmationCampaign', + 'EventLogger' => 'EventLogger', + 'Hooks' => 'Hooks', + 'LinkedPropsCampaign' => 'LinkedPropsCampaign', + 'QuestionPicker' => 'QuestionPicker', + 'WikibaseConnector' => 'WikibaseConnector', ); foreach ( $autoloadClasses as $className => $classFilename ) { - $wgAutoloadClasses[$className] = __DIR__ . "/includes/$classFilename.php"; + $wgAutoloadClasses["WikiGrok\\$className"] = __DIR__ . "/includes/$classFilename.php"; } $wgAPIModules['wikigrokresponse'] = 'WikiGrok\Api\ApiResponse'; +$wgHooks['ParserBeforeTidy'][] = 'WikiGrok\Hooks::onParserBeforeTidy'; +$wgHooks['OutputPageParserOutput'][] = 'WikiGrok\Hooks::onOutputPageParserOutput'; $wgHooks['UnitTestsList'][] = function ( &$files ) { $files[] = __DIR__ . '/tests/phpunit/api/ApiResponseTest.php'; }; + +$wgConfigRegistry['wikigrok'] = 'GlobalVarConfig::newInstance'; + + +/** + * Configuration settings + */ + +/** + * Slow question campaigns, updated on page save only + * TESTING ONLY, DON'T ENABLE IN PRODUCTION + */ +$wgWikiGrokSlowCampaigns = array( +// Example: +// 'occupation' => array( +// 'type' => 'LinkedProps', +// 'property' => 'P4'/* occupation */, +// 'if' => array( 'P7'/* instance of */ => 'Q10'/* human */ ), +// //'ifNot' => array( 'P1', 'P2' ), +// 'searchLinks' => array( +// 'P7'/* instance of */ => 'Q3'/* profession */, +// /* OR */ +// 'P5'/* subclass of */ => 'Q5'/* occupation */, +// ), +// 'numLinks' => 10, +// ), +); + +/** + * Fast question campaigns, updated on page views + */ +$wgWikiGrokFastCampaigns = array( +// //Example: +// 'occupation_confirm' => array( +// 'type' => 'Confirmation', +// 'property' => 'P4'/* occupation */, +// //'if' => array( ... ), +// //'ifNot' => array( ... ), +// ), +); + +/** + * Fast campaign cache expiry in seconds + */ +$wgWikiGrokFastCampaignsCacheTime = 60 * 60 * 24; // 1 day \ No newline at end of file diff --git a/includes/Campaign.php b/includes/Campaign.php new file mode 100644 index 0000000..4e7cbe1 --- /dev/null +++ b/includes/Campaign.php @@ -0,0 +1,151 @@ +<?php + +namespace WikiGrok; + +use Wikibase\DataModel\Entity\EntityIdValue; + +/** + * Base class for picking questions + */ +abstract class Campaign { + /** @var array */ + protected $options; + /** @var QuestionPicker */ + protected $picker; + + private static $classes = array( + 'LinkedProps' => 'LinkedPropsCampaign', + 'Confirmation' => 'ConfirmationCampaign', + ); + + /** + * Instantiates compaign of a given type + * + * @param array $options + * @param QuestionPicker $picker + * @return Campaign + */ + public static function create( array $options, QuestionPicker $picker ) { + $type = $options['type']; + if ( !isset( self::$classes[$type] ) ) { + throw new \MWException( "Unknown campaign type '$type'" ); + } + $class = 'WikiGrok\\' . self::$classes[$type]; + return new $class( $options, $picker ); + } + + /** + * @param array $options + * @param QuestionPicker $picker + */ + public function __construct( array $options, QuestionPicker $picker ) { + $this->options = $options; + $this->picker = $picker; + } + + /** + * @return string ID of the property this campaign asks about + */ + public function getProperty() { + return $this->options['property']; + } + + /** + * @return int Number of page links needed to analyze a page for this campaign + */ + public function getNumLinks() { + return isset( $this->options['numLinks'] ) ? $this->options['numLinks'] : 0; + } + + /** + * Checks whether this campaign can be used for a page with the given properties + * + * @param array $props Wikidata properties + * @return bool + */ + public function isApplicableToPage( array $props ) { + if ( !isset( $this->options['if'] ) && !isset( $this->options['ifNot'] ) ) { + return true; + } + return ( isset( $this->options['if'] ) && $this->propMatch( $props, $this->options['if'] ) ) + && !( isset( $this->options['ifNot'] ) && $this->propMatch( $props, $this->options['ifNot'] ) ); + } + + /** + * Picks questions + * + * @param array $thisProps + * @param array $linkedProps + * @return array[] + */ + public abstract function getQuestions( array $thisProps, array $linkedProps ); + + /** + * Returns Wikibase properties of current page this campaign needs + * + * @return array + */ + public function getRequestedCurrentPageProps() { + $props = array( $this->getProperty() => 1 ); + if ( isset( $this->options['if'] ) ) { + $props += $this->propsFromArray( (array)$this->options['if'] ); + } + if ( isset( $this->options['ifNot'] ) ) { + $props += $this->propsFromArray( (array)$this->options['ifNot'] ); + } + return $props; + } + + /** + * Returns Wikibase properties of pages linked to by the current page this campaign needs + * + * @return array + */ + public function getRequestedLinkedProps() { + return array(); + } + + /** + * Checks page properties against an array of conditions + * + * @param array $props + * @param array $conds + * @return bool + */ + protected function propMatch( array $props, array $conds ) { + foreach ( $conds as $key => $value ) { + if ( is_int( $key ) ) { + if ( isset( $props[$value] ) ) { + return true; + } + } elseif ( isset( $props[$key] ) ) { + foreach ( $props[$key] as $claim ) { + if ( $claim instanceof EntityIdValue + && $claim->getEntityId()->getPrefixedId() == $value + ) { + return true; + } + } + } + } + return false; + } + + /** + * Returns IDs of properties needed by a conditions array + * + * @param array $arr + * @return array + */ + protected function propsFromArray( array $arr ) { + $props = array(); + foreach ( $arr as $key => $value ) { + if ( is_int( $key ) ) { + $props[$value] = 1; + } else { + $props[$key] = 1; + } + } + return $props; + } +} diff --git a/includes/ConfirmationCampaign.php b/includes/ConfirmationCampaign.php new file mode 100644 index 0000000..c43d3a0 --- /dev/null +++ b/includes/ConfirmationCampaign.php @@ -0,0 +1,48 @@ +<?php + +namespace WikiGrok; + +use Wikibase\DataModel\Entity\EntityIdValue; + +/** + * Asks questions to confirm already present claims + */ +class ConfirmationCampaign extends Campaign { + + public function isApplicableToPage( array $props ) { + return parent::isApplicableToPage( $props ) + && $this->propMatch( $props, (array)$this->getProperty() ); + } + + /** + * @param array $thisProps + * @param array $linkedProps + * @return array[] + */ + public function getQuestions( array $thisProps, array $linkedProps ) { + global $wgContLang; + $profileSection = new \ProfileSection( __METHOD__ ); + + $suggestions = array(); + foreach ( $thisProps[$this->getProperty()] as $value ) { + if ( !$value instanceof EntityIdValue ) { + continue; + } + $entityId = $value->getEntityId(); + $entity = $this->picker->getWikibase()->getEntityLookup()->getEntity( $entityId ); + if ( !$entity ) { + continue; + } + try { + $label = $entity + ->getFingerprint() + ->getLabel( $wgContLang->getCode() ) + ->getText(); + $suggestions[$entityId->getPrefixedId()] = $label; + } catch ( \Exception $ex ) { + // No label in content language, don't add + } + } + return $suggestions; + } +} diff --git a/includes/Hooks.php b/includes/Hooks.php new file mode 100644 index 0000000..e6dd9c2 --- /dev/null +++ b/includes/Hooks.php @@ -0,0 +1,101 @@ +<?php + +namespace WikiGrok; + +/** + * Hook handlers class + */ +class Hooks { + /** + * ParserBeforeTidy hook handler + * Attaches suggestions to page properties + * @see https://www.mediawiki.org/wiki/Manual:Hooks/ParserBeforeTidy + * + * @param \Parser $parser + * @return bool + */ + public static function onParserBeforeTidy( \Parser &$parser ) { + $profileSection = new \ProfileSection( __METHOD__ ); + + $parserOutput = $parser->getOutput(); + $campaigns = new QuestionPicker( self::getConfig() ); + try { + $data = $campaigns->getSlowQuestions( $parserOutput ); + if ( $data ) { + $value = serialize( $data ); + $parserOutput->setProperty( QuestionPicker::CACHE_KEY, $value ); + } + } catch ( \Exception $ex ) { + wfDebugLog( 'mobile', __METHOD__ . "(): {$ex->getMessage()}" ); + } + + return true; + } + + /** + * OutputPageParserOutput hook handler + * @see https://www.mediawiki.org/wiki/Manual:Hooks/OutputPageParserOutput + * + * @param \OutputPage $out + * @param \ParserOutput $parserOutput + * @return bool + */ + public static function onOutputPageParserOutput( \OutputPage $out, \ParserOutput $parserOutput ) { + $profileSection = new \ProfileSection( __METHOD__ ); + + if ( !\MobileContext::singleton()->shouldDisplayMobileView() + || !$out->getUser()->isAnon() + ) { + return true; + } + + try { + $prop = $parserOutput->getProperty( QuestionPicker::CACHE_KEY ); + $item = $parserOutput->getProperty( 'wikibase_item' ); + $config = self::getConfig(); + $campaigns = new QuestionPicker( $config ); + + $list = + self::filterCampaigns( + unserialize( $prop ), + $config->get( 'WikiGrokSlowCampaigns' ) + ) + + self::filterCampaigns( + $campaigns->getFastQuestions( $out->getTitle(), $item ), + $config->get( 'WikiGrokFastCampaigns') + ); + if ( $list ) { + $out->addJsConfigVars( 'wgWikiGrokCampaigns', $list ); + } + } catch ( \Exception $ex ) { + wfDebugLog( 'mobile', __METHOD__ . "(): {$ex->getMessage()}" ); + } + + return true; + } + + /** + * Check cached campaigns in case there are campaigns disabled by now + * + * @param $prop + * @param array $campaigns + * @return mixed + */ + private static function filterCampaigns( $prop, array $campaigns ) { + if ( !is_array( $prop ) ) { + return array(); + } + if ( $prop && $campaigns ) { + foreach ( array_keys( $prop ) as $campaign ) { + if ( !isset( $campaigns[$campaign] ) ) { + unset( $prop[$campaign] ); + } + } + } + return $prop; + } + + private static function getConfig() { + return \ConfigFactory::getDefaultInstance()->makeConfig( 'wikigrok' ); + } +} \ No newline at end of file diff --git a/includes/LinkedPropsCampaign.php b/includes/LinkedPropsCampaign.php new file mode 100644 index 0000000..b78e29b --- /dev/null +++ b/includes/LinkedPropsCampaign.php @@ -0,0 +1,44 @@ +<?php + +namespace WikiGrok; + +/** + * Picks suggestions for new properties by looking up links on pages + */ +class LinkedPropsCampaign extends Campaign { + public function isApplicableToPage( array $props ) { + return parent::isApplicableToPage( $props ) + && !$this->propMatch( $props, (array)$this->getProperty() ); + } + + public function getRequestedLinkedProps() { + return $this->propsFromArray( $this->options['searchLinks'] ); + } + + /** + * @param array $thisProps + * @param array $linkedProps + * @return array[] + */ + public function getQuestions( array $thisProps, array $linkedProps ) { + global $wgContLang; + $profileSection = new \ProfileSection( __METHOD__ ); + + $suggestions = array(); + foreach ( $linkedProps as $id => $props ) { + if ( $this->propMatch( $props, $this->options['searchLinks'] ) ) { + $entity = $this->picker->getWikibase()->getEntity( $id ); + try { + $label = $entity + ->getFingerprint() + ->getLabel( $wgContLang->getCode() ) + ->getText(); + $suggestions[$id] = $label; + } catch ( \Exception $ex ) { + // No label in content language, don't add + } + } + } + return $suggestions; + } +} diff --git a/includes/QuestionPicker.php b/includes/QuestionPicker.php new file mode 100644 index 0000000..583d0f7 --- /dev/null +++ b/includes/QuestionPicker.php @@ -0,0 +1,194 @@ +<?php + +namespace WikiGrok; + +/** + * Class that generates multiple WikiGrok questions + */ +class QuestionPicker { + const CACHE_KEY = 'wikigrok_questions_v1'; + + /** @var WikibaseConnector */ + private $wikibase; + /** @var \Config */ + private $config; + + public function __construct( \Config $config ) { + $this->config = $config; + $this->wikibase = new WikibaseConnector(); + } + + /** + * @return \Config + */ + public function getConfig() { + return $this->config; + } + + /** + * @return WikibaseConnector + */ + public function getWikibase() { + return $this->wikibase; + } + + /** + * Returns WikiGrok questions + * + * @param string $configName + * @param string $item + * @param array[] $links + * @return array + */ + protected function getQuestions( $configName, $item, array $links ) { + $profileSection = new \ProfileSection( __METHOD__ ); + + $campaigns = $this->getCampaigns( $configName ); + if ( !$item || !$campaigns ) { + return array(); + } + + $numLinks = 0; + $requestedProps = array(); + $linkedProps = array(); + $linkSearchOnly = true; + foreach ( $campaigns as $campaign ) { + $requestedProps = $campaign->getRequestedCurrentPageProps(); + $linkSearchOnly = $linkSearchOnly && $campaign->getNumLinks() > 0; + } + if ( !$requestedProps || ( $linkSearchOnly && !isset( $links[NS_MAIN] ) ) ) { + return array(); + } + $requestedProps = array_keys( $requestedProps ); + $props = $this->wikibase->getValuesForProperties( $item, $requestedProps ); + foreach ( $campaigns as $name => $campaign ) { + if ( !$campaign->isApplicableToPage( $props ) ) { + unset( $campaigns[$name] ); + continue; + } + $numLinks = max( $numLinks, $campaign->getNumLinks() ); + $linkedProps += $campaign->getRequestedLinkedProps(); + } + $linkedProps = array_keys( $linkedProps ); + if ( !$campaigns ) { + return array(); + } + + $wikibaseIds = $this->getWikibaseIdsForLinks( $links, $numLinks ); + + $results = array(); + $data = array(); + foreach ( $wikibaseIds as $id ) { + $data[$id] = $this->wikibase->getValuesForProperties( $id, $linkedProps ); + } + foreach ( $campaigns as $name => $campaign ) { + $suggestions = $campaign->getQuestions( $props, $data ); + if ( $suggestions ) { + $results[$name] = array( + 'property' => $campaign->getProperty(), + 'questions' => $suggestions, + ); + + } + } + + return $results; + } + + /** + * Picks questions for "slow" campaigns at parse time + * + * @param \ParserOutput $parserOutput + * @return array + */ + public function getSlowQuestions( \ParserOutput $parserOutput ) { + $profileSection = new \ProfileSection( __METHOD__ ); + + $item = $parserOutput->getProperty( 'wikibase_item' ); + $links = $parserOutput->getLinks(); + + return $this->getQuestions( 'Slow', $item, $links ); + } + + /** + * Picks questions for "fast" campaigns at view time (cached) + * + * @param \Title $title + * @param string $item + * @return array[] + */ + public function getFastQuestions( \Title $title, $item ) { + global $wgMemc; + $profileSection = new \ProfileSection( __METHOD__ ); + + $revId = $title->getLatestRevID(); + $campaigns = $this->getCampaigns( 'Fast' ); + if ( !$item || !$revId || !$campaigns ) { + return array(); + } + $memcKey = wfMemcKey( self::CACHE_KEY, $revId ); + $result = $wgMemc->get( $memcKey ); + if ( is_array( $result ) ) { + return $result; + } + + $result = $this->getQuestions( 'Fast', $item, array() ); + $wgMemc->set( $memcKey, $result, $this->config->get( 'WikiGrokFastCampaignsCacheTime' ) ); + + return $result; + } + + /** + * Returns an array of Campaign objects for a given campaign type + * + * @param $type + * @return Campaign[] + */ + private function getCampaigns( $type ) { + $campaigns = $this->config->get( "WikiGrok{$type}Campaigns" ); + $result = array(); + foreach ( $campaigns as $name => $options ) { + $result[$name] = Campaign::create( $options, $this ); + } + return $result; + } + + /** + * Returns array of wikidata IDs for first $numLinks mainspace links on page + * + * @param array $links Links returned by ParserOutput, format: [ namespace => ['Title' => page_id ] ] + * @param int $numLinks Number of links to check + * @return array + */ + private function getWikibaseIdsForLinks( array $links, $numLinks ) { + $profileSection = new \ProfileSection( __METHOD__ ); + + if ( !isset( $links[NS_MAIN] ) ) { + return array(); + } + // page_id => dbkey + $linksToCheck = array_flip( array_slice( $links[NS_MAIN], 0, $numLinks ) ); + // Remove the only nonexistent page left by array_flip() + unset( $linksToCheck[0] ); + if ( !$linksToCheck ) { + return array(); + } + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->select( + array( 'page', 'page_props' ), + array( 'page_id', 'pp_value' ), + array( + 'page_namespace' => NS_MAIN, + 'page_id' => array_keys( $linksToCheck ), + 'page_id = pp_page', + 'pp_propname' => 'wikibase_item', + ), + __METHOD__ + ); + $ids = array(); + foreach ( $res as $row ) { + $ids[] = $row->pp_value; + } + return $ids; + } +} diff --git a/includes/WikibaseConnector.php b/includes/WikibaseConnector.php new file mode 100644 index 0000000..eab3e6a --- /dev/null +++ b/includes/WikibaseConnector.php @@ -0,0 +1,121 @@ +<?php + +namespace WikiGrok; + +use Wikibase\Client\WikibaseClient; +use Wikibase\DataModel\Entity\EntityId; +use Wikibase\DataModel\Entity\ItemId; +use Wikibase\DataModel\Entity\PropertyId; +use Wikibase\DataModel\Snak\PropertyValueSnak; + +/** + * Class that intercts with Wikibase + */ +class WikibaseConnector { + private $entityLookup = null; + + /** + * @return \Wikibase\Lib\Store\EntityLookup + */ + public function getEntityLookup() { + if ( !$this->entityLookup ) { + $this->entityLookup = WikibaseClient::getDefaultInstance() + ->getStore() + ->getEntityLookup(); + } + return $this->entityLookup; + } + + /** + * @param string $entityId + * @return \Wikibase\DataModel\Entity\Entity + */ + public function getEntity( $entityId ) { + return $this->getEntityLookup()->getEntity( + new ItemId( $entityId ) + ); + } + + /** + * Returns a list of values of a given property of a given Wikidata item + * + * @param string $itemId: item identifier + * @param string $property: property identifier + * @return \DataValues\DataValue[] + */ + public function getValuesForProperty( $itemId, $property ) { + $section = new \ProfileSection( __METHOD__ ); + + $propertyId = new PropertyId( $property ); + + /* @var \Wikibase\DataModel\Entity\Item $item */ + $item = $this->getEntity( $itemId ); + if ( !$item ) { + return array(); + } + + // There should really be a nicer way. + // StatementList should support this... + $values = array(); + $statements = + $item->getStatements()->getBestStatementPerProperty(); + + /* @var \Wikibase\DataModel\Statement\Statement $statement */ + foreach ( $statements->getIterator() as $statement ) { + $statementPropertyId = $statement->getPropertyId(); + if ( $statementPropertyId->equals( $propertyId ) ) { + $snak = $statement->getMainSnak(); + + if ( $snak instanceof PropertyValueSnak ) { + $values[] = $snak->getDataValue(); + } + } + } + + return $values; + } + + /** + * Returns a list of values of given properties of a given Wikidata item + * + * @param string $itemId: item identifier + * @param array $properties: array of property identifiers + * @return array: array( 'property id' => \DataValues\DataValue[] ) + */ + public function getValuesForProperties( $itemId, array $properties ) { + $section = new \ProfileSection( __METHOD__ ); + + $propertyIds = array(); + foreach ( $properties as $property ) { + $propertyIds[$property] = new PropertyId( $property ); + } + + /* @var \Wikibase\DataModel\Entity\Item $item */ + $item = $this->getEntity( $itemId ); + if ( !$item ) { + return null; + } + + // There should really be a nicer way. + // StatementList should support this... + $values = array(); + $statements = + $item->getStatements()->getBestStatementPerProperty(); + + /* @var \Wikibase\DataModel\Statement\Statement $statement */ + foreach ( $statements->getIterator() as $statement ) { + $statementPropertyId = $statement->getPropertyId(); + foreach ( $propertyIds as $idStr => $idObj ) { + if ( $statementPropertyId->equals( $idObj ) ) { + $snak = $statement->getMainSnak(); + + if ( $snak instanceof PropertyValueSnak ) { + $values[$idStr][] = $snak->getDataValue(); + } + } + } + } + + return $values; + } +} \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/166148 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia8a1fdb3cd6f3dbccc8409d57f6be57d6db2813d Gerrit-PatchSet: 10 Gerrit-Project: mediawiki/extensions/WikiGrok Gerrit-Branch: master Gerrit-Owner: MaxSem <maxsem.w...@gmail.com> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Hoo man <h...@online.de> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Kaldari <rkald...@wikimedia.org> Gerrit-Reviewer: MaxSem <maxsem.w...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits