jenkins-bot has submitted this change and it was merged. Change subject: Suggestion lists - Database and APIs ......................................................................
Suggestion lists - Database and APIs * Script to fetch the featured articles from a category not present in a target language. * SQL file for the required tables * SuggestionList and Suggestion classes * SuggestionListManager for DB access - create new list - delete a list - get relevant suggestions (to be tuned later) * API for getting suggestions for a user Introduces ContentTranslationEnableSuggestions configuration which is false by default. Bug: T92987 Bug: T106405 Change-Id: I62b168957c4a9e2d0518fdbb48d6ceec44bf5dbd --- A api/ApiQueryContentTranslationSuggestions.php M extension.json M i18n/en.json M i18n/qqq.json M includes/SiteMapper.php A includes/Suggestion.php A includes/SuggestionList.php A includes/SuggestionListManager.php A scripts/manage-lists.php A sql/lists.sql 10 files changed, 649 insertions(+), 10 deletions(-) Approvals: Amire80: Looks good to me, approved jenkins-bot: Verified diff --git a/api/ApiQueryContentTranslationSuggestions.php b/api/ApiQueryContentTranslationSuggestions.php new file mode 100644 index 0000000..5efe7fa --- /dev/null +++ b/api/ApiQueryContentTranslationSuggestions.php @@ -0,0 +1,99 @@ +<?php +/** + * Api module for querying translation suggestions. + * + * @file + * @copyright See AUTHORS.txt + * @license GPL-2.0+ + */ + +use ContentTranslation\Translator; +use ContentTranslation\SuggestionListManager; + +/** + * Api module for querying translation suggestions. + * + * @ingroup API ContentTranslationAPI + */ +class ApiQueryContentTranslationSuggestions extends ApiQueryGeneratorBase { + public function __construct( $query, $moduleName ) { + parent::__construct( $query, $moduleName ); + } + + public function execute() { + $this->run(); + } + + public function executeGenerator( $resultPageSet ) { + $this->run( $resultPageSet ); + } + + /** + * @param ApiPageSet $resultPageSet + * TODO: Use the limit parameter + */ + private function run( $resultPageSet = null ) { + $config = $this->getConfig(); + if ( !$config->get( 'ContentTranslationEnableSuggestions' ) ) { + $this->dieUsage( 'Suggestions not enabled for this wiki', 'suggestionsdisabled' ); + } + $params = $this->extractRequestParams(); + $result = $this->getResult(); + $user = $this->getUser(); + + if ( $params['from'] === $params['to'] ) { + $this->dieUsage( + 'Source and target languages cannot be the same. Use from, to API params.', + 'invalidparam' + ); + } + + $translator = new Translator( $user ); + $manager = new SuggestionListManager(); + $data = $manager->getRelevantSuggestions( $translator, $params['from'], $params['to'] ); + + $lists = array(); + foreach ( $data['lists'] as $list ) { + $lists[$list->getId()] = array( + 'displayName' => $list->getDisplayNameMessage( $this->getContext() )->text(), + 'name' => $list->getName(), + 'type' => $list->getType(), + ); + } + + $result->addValue( array( 'query', $this->getModuleName() ), 'lists', $lists ); + + $suggestions = array(); + foreach ( $data['suggestions'] as $suggestion ) { + $suggestions[] = array( + 'name' => $suggestion->getTitle()->getPrefixedText(), + 'sourceLanguage' => $suggestion->getSourceLanguage(), + 'targetLanguage' => $suggestion->getTargetLanguage(), + 'listId' => $suggestion->getListId(), + ); + } + + $result->addValue( array( 'query', $this->getModuleName() ), 'suggestions', $suggestions ); + } + + public function getAllowedParams() { + $allowedParams = array( + 'from' => array( + ApiBase::PARAM_TYPE => 'string', + ApiBase::PARAM_REQUIRED => true, + ), + 'to' => array( + ApiBase::PARAM_TYPE => 'string', + ApiBase::PARAM_REQUIRED => true, + ), + ); + return $allowedParams; + } + + protected function getExamplesMessages() { + return array( + 'action=query&list=contenttranslationsuggestions&from=en&to=es' => + 'apihelp-query+contenttranslationsuggestions-example-1', + ); + } +} diff --git a/extension.json b/extension.json index ab58785..b8910ae 100644 --- a/extension.json +++ b/extension.json @@ -42,6 +42,7 @@ }, "APIListModules": { "contenttranslation": "ApiQueryContentTranslation", + "contenttranslationsuggestions": "ApiQueryContentTranslationSuggestions", "contenttranslationstats": "ApiQueryContentTranslationStats", "contenttranslationlangtrend": "ApiQueryContentTranslationLanguageTrend", "cxpublishedtranslations": "ApiQueryPublishedTranslations" @@ -60,6 +61,7 @@ "ApiContentTranslationPublish": "api/ApiContentTranslationPublish.php", "ApiContentTranslationToken": "api/ApiContentTranslationToken.php", "ApiQueryContentTranslation": "api/ApiQueryContentTranslation.php", + "ApiQueryContentTranslationSuggestions": "api/ApiQueryContentTranslationSuggestions.php", "ApiQueryContentTranslationLanguageTrend": "api/ApiQueryContentTranslationLanguageTrend.php", "ApiQueryContentTranslationStats": "api/ApiQueryContentTranslationStats.php", "ApiQueryPublishedTranslations": "api/ApiQueryPublishedTranslations.php", @@ -71,6 +73,9 @@ "ContentTranslation\\SiteMapper": "includes/SiteMapper.php", "ContentTranslation\\Stats": "includes/Stats.php", "ContentTranslation\\Translation": "includes/Translation.php", + "ContentTranslation\\Suggestion": "includes/Suggestion.php", + "ContentTranslation\\SuggestionList": "includes/SuggestionList.php", + "ContentTranslation\\SuggestionListManager": "includes/SuggestionListManager.php", "ContentTranslation\\Translator": "includes/Translator.php", "SpecialContentTranslation": "specials/SpecialContentTranslation.php", "SpecialContentTranslationStats": "specials/SpecialContentTranslationStats.php" @@ -128,7 +133,8 @@ "algorithm": "HS256", "key": "", "age": "3600" - } + }, + "ContentTranslationEnableSuggestions": false }, "ResourceModules": { "ext.cx.contributions": { diff --git a/i18n/en.json b/i18n/en.json index f9644d6..d78317f 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -183,5 +183,6 @@ "cx-tools-link-external-link-placeholder": "Add external link", "cx-tools-link-to-another-page": "Link to another page", "cx-tools-link-apply": "Done", - "mw-pageselector-missing": "Page does not exist" + "mw-pageselector-missing": "Page does not exist", + "cx-suggestionlist-featured": "Featured article" } diff --git a/i18n/qqq.json b/i18n/qqq.json index 4aec7dd..bca3f7a 100644 --- a/i18n/qqq.json +++ b/i18n/qqq.json @@ -188,5 +188,6 @@ "cx-tools-link-external-link-placeholder": "Placeholder text for the input field to enter an external link", "cx-tools-link-to-another-page": "Opens a tool that allows linking to any internal or external page.", "cx-tools-link-apply": "Button label to apply the link selection from link tool card\n{{Identical|Done}}", - "mw-pageselector-missing": "Message shown in page selector when the search did not fetch any result" + "mw-pageselector-missing": "Message shown in page selector when the search did not fetch any result", + "cx-suggestionlist-featured": "A type of a suggested page to translate" } diff --git a/includes/SiteMapper.php b/includes/SiteMapper.php index e2f48a0..f6d7a7b 100644 --- a/includes/SiteMapper.php +++ b/includes/SiteMapper.php @@ -3,17 +3,29 @@ class SiteMapper { /** + * Get the the domain code matching language + * + * @param string $language Language code (MediaWiki internal format) + * @return string + */ + public static function getDomainCode( $language ) { + global $wgContentTranslationDomainCodeMapping; + + if ( isset( $wgContentTranslationDomainCodeMapping[$language] ) ) { + return $wgContentTranslationDomainCodeMapping[$language]; + } + + return $domain; + } + + + /** * Get the page URL constructed from the domain template of sites */ public static function getPageURL( $language, $title ) { - global $wgContentTranslationSiteTemplates, - $wgContentTranslationDomainCodeMapping; + global $wgContentTranslationSiteTemplates; - if ( isset( $wgContentTranslationDomainCodeMapping[$language] ) ) { - $domain = $wgContentTranslationDomainCodeMapping[$language]; - } else { - $domain = $language; - } + $domain = self::getDomainCode( $language ); return str_replace( array( '$1', '$2' ), diff --git a/includes/Suggestion.php b/includes/Suggestion.php new file mode 100644 index 0000000..65513b9 --- /dev/null +++ b/includes/Suggestion.php @@ -0,0 +1,55 @@ +<?php + +namespace ContentTranslation; + +class Suggestion { + protected $listId; + protected $title; + protected $sourceLanguage; + protected $targetLanguage; + + public function __construct( array $params ) { + $this->listId = (int)$params['listId']; + $this->title = (string)$params['title']; + $this->sourceLanguage = (string)$params['sourceLanguage']; + $this->targetLanguage = (string)$params['targetLanguage']; + } + + /** + * @param stdClass $row + * @return SuggestionList + */ + public static function newFromRow( $row ) { + $params = array( + 'listId' => $row->cxs_list_id, + 'title' => $row->cxs_title, + 'sourceLanguage' => $row->cxs_source_language, + 'targetLanguage' => $row->cxs_target_language, + ); + + return new Suggestion( $params ); + } + + public function getListId() { + return $this->listId; + } + + public function getTitle() { + return \Title::newFromText( $this->title ); + } + + public function getSourceLanguage() { + return $this->sourceLanguage; + } + + public function getTargetLanguage() { + return $this->targetLanguage; + } + + /** + * @return string + */ + public function __toString() { + return $this->title; + } +} diff --git a/includes/SuggestionList.php b/includes/SuggestionList.php new file mode 100644 index 0000000..59afc5d --- /dev/null +++ b/includes/SuggestionList.php @@ -0,0 +1,122 @@ +<?php + +namespace ContentTranslation; + +class SuggestionList { + const TYPE_DEFAULT = 0; + const TYPE_FEATURED = 1; + + protected $id; + protected $name; + protected $info; + protected $owner; + protected $startTime; + protected $endTime; + protected $type; + protected $public; + + public function __construct( array $params ) { + if ( isset( $params['id'] ) ) { + $this->id = (int)$params['id']; + } + + $this->name = (string)$params['name']; + + if ( isset( $params['info'] ) ) { + $this->info = (string)$params['info']; + } + + if ( isset( $params['owner'] ) ) { + $this->owner = (int)$params['owner']; + } + + if ( isset( $params['public'] ) ) { + $this->public = (bool)$params['public']; + } + + if ( isset( $params['startTime'] ) ) { + $this->startTime = $params['startTime']; + } + + if ( isset( $params['endTime'] ) ) { + $this->endTime = $params['endTime']; + } + + if ( isset( $params['type'] ) ) { + $this->type = $params['type']; + } + } + + /** + * @param stdClass $row + * @return SuggestionList + */ + public static function newFromRow( $row ) { + $params = array( + 'id' => $row->cxl_id, + 'name' => $row->cxl_name, + 'info' => $row->cxl_info, + 'owner' => $row->cxl_owner, + 'startTime' => $row->cxl_start_time, + 'endTime' => $row->cxl_end_time, + 'type' => $row->cxl_type, + ); + + return new SuggestionList( $params ); + } + + public function getId() { + return $this->id; + } + + public function getName() { + return $this->name; + } + + public function getDisplayNameMessage( \IContextSource $context ) { + if ( $this->getType() === self::TYPE_FEATURED ) { + return $context->msg( 'cx-suggestionlist-featured' ); + } + + return new \RawMessage( $this->getName() ); + } + + public function getInfo() { + return $this->info; + } + + public function getOwner() { + if ( $this->owner ) { + return $this->owner; + } + + return 0; + } + + public function isPublic() { + return (bool)$this->public; + } + + public function getStartTime() { + return $this->startTime; + } + + public function getEndTime() { + return $this->endTime; + } + + public function getType() { + if ( $this->type === null ) { + return self::TYPE_DEFAULT; + } + + return $this->type; + } + + /** + * @return string + */ + public function __toString() { + return $this->name; + } +} diff --git a/includes/SuggestionListManager.php b/includes/SuggestionListManager.php new file mode 100644 index 0000000..8ce24ad --- /dev/null +++ b/includes/SuggestionListManager.php @@ -0,0 +1,132 @@ +<?php + +namespace ContentTranslation; + +class SuggestionListManager { + + /** + * @return int Id of the list. + */ + public function insertList( SuggestionList $list ) { + $dbw = Database::getConnection( DB_MASTER ); + $values = array( + 'cxl_id' => $list->getId(), + 'cxl_owner' => $list->getOwner(), + 'cxl_public' => $list->isPublic(), + + 'cxl_name' => $list->getName(), + 'cxl_info' => $list->getInfo(), + 'cxl_type' => $list->getType(), + ); + + if ( $list->getStartTime() !== null ) { + $values['cxl_start_time'] = $dbw->timestamp( $list->getStartTime() ); + } + + if ( $list->getEndTime() !== null ) { + $values['cxl_end_time'] = $dbw->timestamp( $list->getEndTime() ); + } + + $dbw->insert( 'cx_lists', $values, __METHOD__ ); + + return $dbw->insertId(); + } + + public function deleteList( $id ) { + $dbw = Database::getConnection( DB_MASTER ); + $dbw->delete( + 'cx_suggestions', + array( + 'cxs_list_id' => $this->getId(), + ), + __METHOD__ + ); + $dbw->delete( + 'cx_lists', + array( + 'cxl_id' => $this->getId() + ), + __METHOD__ + ); + } + + /** + * Add suggestions to database. + * + * @param Suggestion[] $suggestions + */ + public function addSuggestions( array $suggestions ) { + $dbw = Database::getConnection( DB_MASTER ); + + $batchSize = 100; + while ( count( $suggestions ) > 0 ) { + $values = array(); + $batch = array_splice( $suggestions, 0, $batchSize ); + + $values = array(); + foreach ( $batch as $suggestion ) { + $values[] = array( + 'cxs_list_id' => $suggestion->getListId(), + 'cxs_title' => $suggestion->getTitle()->getPrefixedText(), + 'cxs_source_language' => $suggestion->getSourceLanguage(), + 'cxs_target_language' => $suggestion->getTargetLanguage(), + ); + } + + $dbw->insert( + 'cx_suggestions', + $values, + __METHOD__, + array( 'IGNORE' ) + ); + + wfWaitForSlaves(); + } + } + + public function getRelevantSuggestions( Translator $translators, $from, $to ) { + $dbw = Database::getConnection( DB_MASTER ); + + $lists = array(); + $suggestions = array(); + + $res = $dbw->select( + 'cx_lists', + '*', + array( + 'cxl_type' => SuggestionList::TYPE_FEATURED, + 'cxl_public' => true, + ), + __METHOD__ + ); + + foreach ( $res as $row ) { + $list = SuggestionList::newFromRow( $row ); + $lists[$list->getId()] = $list; + } + + if ( count( $lists ) ) { + $conds = array( + 'cxs_list_id' => array_keys( $lists ), + 'cxs_source_language' => $from, + 'cxs_target_language' => array( $to, '*' ), + ); + + $options = array( + 'LIMIT' => '10', + 'ORDER BY' => 'RAND()' + ); + + $res = $dbw->select( 'cx_suggestions', '*', $conds, __METHOD__, $options ); + + foreach ( $res as $row ) { + $suggestions[] = Suggestion::newFromRow( $row ); + } + } + + return array( + 'lists' => $lists, + 'suggestions' => $suggestions, + ); + } +} diff --git a/scripts/manage-lists.php b/scripts/manage-lists.php new file mode 100644 index 0000000..074f4ea --- /dev/null +++ b/scripts/manage-lists.php @@ -0,0 +1,165 @@ +<?php +/** + * + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +use ContentTranslation\SiteMapper; +use ContentTranslation\Suggestion; +use ContentTranslation\SuggestionList; +use ContentTranslation\SuggestionListManager; + +class CXManageLists extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to import suggestion list'; + + // Default to safe option which doesn't actually change data. + $this->addOption( + 'really', + '(optional) Also execute actions' + ); + $this->addOption( + 'source', + 'Source language (real language, not the domain)', + true, + true + ); + $this->addOption( + 'target', + 'Target language (real language, not the domain)', + true, + true + ); + + $this->addOption( + 'category', + 'Use the pages from this category, but not in present in target language corresponding' + . ' to that category. Example: Featured_articles', + true, + true + ); + } + + public function execute() { + $this->dryrun = !$this->hasOption( 'really' ); + $sourceDomain = SiteMapper::getDomainCode( $this->getOption( 'source' ) ); + $targetDomain = SiteMapper::getDomainCode( $this->getOption( 'target' ) ); + $category = $this->getOption( 'category' ); + + if ( $this->dryrun ) { + $this->output( "DRY-RUN mode: actions are NOT executed\n" ); + } else { + $this->output( "EXECUTE mode: actions ARE executed\n" ); + } + + $apiUrl = "https://$sourceDomain.wikipedia.org/w/api.php?"; + $pages = $this->getUntranslatedPages( $apiUrl, $category, $targetDomain ); + + $count = count( $pages ); + + if ( !$this->dryrun ) { + $this->createFeaturedSuggestions( $pages ); + $this->output( "$count pages added to the list successfully.\n" ); + } else { + $this->output( "Found $count pages:\n" ); + + foreach ( $pages as $page ) { + $this->output( "$page\n" ); + } + + $this->output( "Use --really to insert these pages.\n" ); + } + } + + protected function getUntranslatedPages( $apiUrl, $category, $targetDomain ) { + $this->output( "Fetching pages from $category not present in $targetDomain ..." ); + + $pages = array(); + + $params = array( + 'action' => 'query', + 'format' => 'json', + 'generator' => 'categorymembers', + 'gcmtitle' => "Category:$category", + 'gcmnamespace' => 0, + 'gcmlimit' => 500, + 'gcmsort' => 'timestamp', + 'prop' => 'langlinks', + 'lllang' => $targetDomain, + 'lllimit' => 500, + 'continue' => '', + ); + + while ( true ) { + $url = $apiUrl . http_build_query( $params ); + $json = Http::get( $url ); + $data = FormatJson::decode( $json, true ); + + if ( !isset( $data['query'] ) ) { + $this->output( "\t[FAIL]\n" ); + return array(); + } + + $pagesInCategory = $data['query']['pages']; + + foreach ( $pagesInCategory as $pageId => $page ) { + if ( !isset( $page['langlinks'] ) ) { + $pages[] = $page['title']; + } + } + + if ( !isset( $data['continue'] ) || count( $pages ) > 5000 ) { + break; + } else { + unset( $param['llcontinue'] ); + unset( $param['gcmcontinue'] ); + $params += $data['continue']; + } + } + + $this->output( "\t[OK]\n" ); + + return $pages; + } + + protected function createFeaturedSuggestions( $pages ) { + $sourceLanguage = $this->getOption( 'source' ); + $targetLanguage = $this->getOption( 'target' ); + + $manager = new SuggestionListManager(); + $list = new SuggestionList( array( + 'type' => SuggestionList::TYPE_FEATURED, + 'name' => 'featured', + 'public' => true, + ) ); + + $listId = $manager->insertList( $list ); + $suggestion = array(); + + foreach ( $pages as $page ) { + $suggestions[] = new Suggestion( array( + 'listId' => $listId, + 'title' => $page, + 'sourceLanguage' => $sourceLanguage, + 'targetLanguage' => $targetLanguage, + ) ); + } + + $manager->addSuggestions( $suggestions ); + } +} + +$maintClass = 'CXManageLists'; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/sql/lists.sql b/sql/lists.sql new file mode 100644 index 0000000..ca31ea3 --- /dev/null +++ b/sql/lists.sql @@ -0,0 +1,46 @@ +-- Content translaton suggestion related tables + +DROP TABLE IF EXISTS /*_*/cx_lists; +CREATE TABLE /*_*/cx_lists ( + -- List id + cxl_id int NOT NULL PRIMARY KEY auto_increment, + + -- Type of the list such as features, popular, etc. + cxl_type int default 0, + -- Owner of the list + cxl_owner int NOT NULL, + cxl_public BOOLEAN NOT NULL, + -- Optional validity period for the list + cxl_start_time varchar(14) binary, + cxl_end_time varchar(14) binary, + + -- Name for the suggestion list + cxl_name varbinary(512) NOT NULL, + -- Url to page with additional info about the list + cxl_info mediumblob +) /*$wgDBTableOptions*/; + +CREATE UNIQUE INDEX /*_*/cx_lists_relevant ON /*_*/cx_lists ( + cxl_type, + cxl_public, + cxl_start_time, + cxl_end_time +); + +DROP TABLE IF EXISTS /*_*/cx_suggestions; +CREATE TABLE /*_*/cx_suggestions ( + -- Foreign key to cxl_id + cxs_list_id int NOT NULL, + -- Source language code + cxs_source_language varbinary(36) NOT NULL, + -- Target language code + cxs_target_language varbinary(36), + -- Title of the suggestion + cxs_title varbinary(512) NOT NULL +) /*$wgDBTableOptions*/; + +CREATE INDEX /*i*/cx_suggestions_by_lang ON /*_*/cx_suggestions ( + cxs_list_id, + cxs_source_language, + cxs_target_language +); -- To view, visit https://gerrit.wikimedia.org/r/231253 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I62b168957c4a9e2d0518fdbb48d6ceec44bf5dbd Gerrit-PatchSet: 25 Gerrit-Project: mediawiki/extensions/ContentTranslation Gerrit-Branch: master Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com> Gerrit-Reviewer: Amire80 <amir.ahar...@mail.huji.ac.il> Gerrit-Reviewer: KartikMistry <kartik.mis...@gmail.com> Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com> Gerrit-Reviewer: Santhosh <santhosh.thottin...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits