Brion VIBBER has uploaded a new change for review.
https://gerrit.wikimedia.org/r/314862
Change subject: WIP: Rewrite discovery of TimedText tracks
......................................................................
WIP: Rewrite discovery of TimedText tracks
(Resurrecting I69c4af20 after revert, needs fixes)
* Remove broken ForeignApIRequest
* Directly query the database for Local and ForeignDb situations
* Use the new timedtext api query for remote sources (T134642)
* Fixes a bug, where the namespace for timed text on Commons was
missing from track src's (T122737 and T71453)
Bug: T61780
Bug: T134642
Bug: T122737
Bug: T71453
Change-Id: I6e4915540a2ddef2c8cc78a920ef3950ddae46b1
---
M TimedMediaHandler.php
M handlers/TextHandler/TextHandler.php
2 files changed, 166 insertions(+), 142 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/TimedMediaHandler
refs/changes/62/314862/1
diff --git a/TimedMediaHandler.php b/TimedMediaHandler.php
index 108e02e..66e42da 100644
--- a/TimedMediaHandler.php
+++ b/TimedMediaHandler.php
@@ -294,8 +294,6 @@
$wgAutoloadClasses['WAVHandler'] =
"$timedMediaDir/handlers/WAVHandler/WAVHandler.php";
// Text handler
-$wgAutoloadClasses['ForeignApiQueryAllPages'] =
- "$timedMediaDir/handlers/TextHandler/TextHandler.php";
$wgAutoloadClasses['TextHandler'] =
"$timedMediaDir/handlers/TextHandler/TextHandler.php";
$wgAutoloadClasses['TimedTextPage'] = "$timedMediaDir/TimedTextPage.php";
diff --git a/handlers/TextHandler/TextHandler.php
b/handlers/TextHandler/TextHandler.php
index 605fbe2..4442dbd 100644
--- a/handlers/TextHandler/TextHandler.php
+++ b/handlers/TextHandler/TextHandler.php
@@ -8,61 +8,10 @@
* TODO On "new" timedtext language save purge all pages where file exists
*/
-/**
- * Subclass ApiMain but query other db
- */
-class ForeignApiQueryAllPages extends ApiQueryAllPages {
- public function __construct( $mDb, $query, $moduleName ) {
- global $wgTimedTextForeignNamespaces;
-
- $this->foreignDb = $mDb;
-
- $wikiID = $this->foreignDb->getWikiID();
- if ( isset( $wgTimedTextForeignNamespaces[ $wikiID ] ) ) {
- $this->foreignNs = $wgTimedTextForeignNamespaces[
$wikiID ];
- } else {
- $this->foreignNs = NS_TIMEDTEXT;
- }
- parent::__construct( $query, $moduleName, 'ap' );
- }
-
- protected function getDB() {
- return $this->foreignDb;
- }
-
- protected function parseMultiValue( $valueName, $value, $allowMultiple,
$allowedValues ) {
- // foreignnNs might not be defined localy,
- // catch the undefined error here
- if ( $valueName == 'apnamespace'
- && $value == $this->foreignNs
- && $allowMultiple == false
- ) {
- return $this->foreignNs;
- }
- return parent::parseMultiValue( $valueName, $value,
$allowMultiple, $allowedValues );
- }
-
- /**
- * An alternative to titleToKey() that doesn't trim trailing spaces
- *
- *
- * @FIXME: I'M A BIG HACK
- *
- * @param string $titlePart Title part with spaces
- * @return string Title part with underscores
- */
- public function titlePartToKey( $titlePart, $defaultNamespace = NS_MAIN
) {
- $t = Title::newFromText( $titlePart . 'x' );
- if ( !$t ) {
- $this->dieUsageMsg( [ 'invalidtitle', $titlePart ] );
- }
- return substr( $t->getPrefixedDBkey(), 0, -1 );
- }
-}
-
class TextHandler {
// lazy init remote Namespace number
public $remoteNs = null;
+ public $remoteNsName = null;
/**
* @var File
@@ -120,8 +69,11 @@
if ( isset( $data['query'] ) && isset(
$data['query']['namespaces'] ) ) {
// get the ~last~ timed text namespace defined
foreach ( $data['query']['namespaces'] as $ns )
{
- if ( $ns['*'] == 'TimedText' ) {
+ if ( isset( $ns['canonical'] ) &&
$ns['canonical'] === 'TimedText' ) {
$this->remoteNs = $ns['id'];
+ $this->remoteNsName = $ns['*'];
+ wfDebug( "Discovered remoteNs:
$this->remoteNs and name: $this->remoteNsName \n" );
+ break;
}
}
}
@@ -131,25 +83,59 @@
}
/**
- * @return array|bool
+ * Retrieve a list of TimedText pages in the database that start with
+ * the name of the file associated with this handler.
+ *
+ * If the file is on a foreign repo, will query the ForeignDb
+ *
+ * @return ResultWrapper|bool
*/
- function getTextPagesQuery() {
+ function getTextPages() {
$ns = $this->getTimedTextNamespace();
if ( $ns === false ) {
- wfDebug( "Repo: " . $this->file->repo->getName() . "
does not have a TimedText namesapce \n" );
+ wfDebug( 'Repo: ' . $this->file->repo->getName() . "
does not have a TimedText namespace \n" );
+ // No timed text namespace, don't try to look up timed
text tracks
+ return false;
+ }
+ $dbr = $this->file->getRepo()->getSlaveDB();
+ $prefix = $this->file->getTitle()->getDBkey();
+ return $dbr->select(
+ 'page',
+ [ 'page_namespace', 'page_title' ],
+ [
+ 'page_namespace' => $ns,
+ 'page_title ' . $dbr->buildLike( $prefix,
$dbr->anyString() )
+ ],
+ __METHOD__,
+ [
+ 'LIMIT' => 300,
+ 'ORDER BY' => 'page_title'
+ ]
+ );
+ }
+
+ /**
+ * Build the api query to find TimedText pages belonging to a remote
file
+ * @return array|bool
+ */
+ function getRemoteTextPagesQuery() {
+ $ns = $this->getTimedTextNamespace();
+ if ( $ns === false ) {
+ wfDebug( 'Repo: ' . $this->file->repo->getName() . "
does not have a TimedText namespace \n" );
// No timed text namespace, don't try to look up timed
text tracks
return false;
}
return [
'action' => 'query',
- 'list' => 'allpages',
- 'apnamespace' => $ns,
- 'aplimit' => 300,
- 'apprefix' => $this->file->getTitle()->getDBkey()
+ 'titles' => $this->file->getTitle()->getPrefixedDBkey(),
+ 'prop' => 'videoinfo',
+ 'viprop' => 'timedtext',
+ 'formatversion' => '2',
];
}
/**
+ * Retrieve the text sources belonging to a remote file
* @return array|mixed
*/
function getRemoteTextSources() {
@@ -168,7 +154,7 @@
wfDebug( "miss\n" );
}
wfDebug( "Get text tracks from remote api \n" );
- $query = $this->getTextPagesQuery();
+ $query = $this->getRemoteTextPagesQuery();
// Error in getting timed text namespace return empty array;
if ( $query === false ) {
@@ -183,99 +169,111 @@
}
/**
+ * Retrieve the text sources belonging to a foreign db accessible file
+ * @return array
+ */
+ function getForeignDBTextSources() {
+ $data = $this->getTextPages();
+ if ( $data !== false ) {
+ return $this->getTextTracksFromRows( $data );
+ }
+ return [];
+ }
+
+ /**
+ * Retrieve the text sources belonging to a local file
* @return array
*/
function getLocalTextSources() {
global $wgEnableLocalTimedText;
if ( $wgEnableLocalTimedText ) {
- // Init $this->textTracks
- $params = new FauxRequest( $this->getTextPagesQuery() );
- $api = new ApiMain( $params );
- $api->execute();
- if ( defined( 'ApiResult::META_CONTENT' ) ) {
- $data = $api->getResult()->getResultData( null,
[ 'Strip' => 'all' ] );
- } else {
- $data = $api->getResultData();
+ $data = $this->getTextPages();
+ if ( $data !== false ) {
+ return $this->getTextTracksFromRows( $data );
}
- wfDebug( print_r( $data, true ) );
- // Get the list of language Names
- return $this->getTextTracksFromData( $data );
- } else {
- return [];
}
+ return [];
}
/**
- * @return array|mixed
+ * Build an array of track information using a Database result
+ * Handles both local and foreign Db results
+ *
+ * @param ResultWrapper $data Database result with page titles
+ * @return array
*/
- function getForeignDBTextSources() {
- // Init $this->textTracks
- $params = new FauxRequest( $this->getTextPagesQuery() );
- $api = new ApiMain( $params );
- $api->profileIn();
- $query = new ApiQuery( $api, 'foo', 'bar' );
- $query->profileIn();
- $module = new ForeignApiQueryAllPages(
$this->file->getRepo()->getSlaveDB(), $query, 'allpages' );
- $module->profileIn();
- $module->execute();
- $module->profileOut();
- $query->profileOut();
- $api->profileOut();
-
- if ( defined( 'ApiResult::META_CONTENT' ) ) {
- $data = $module->getResult()->getResultData( null, [
'Strip' => 'all' ] );
- } else {
- $data = $module->getResultData();
+ function getTextTracksFromRows( ResultWrapper $data ) {
+ $textTracks = [];
+ $providerName = $this->file->repo->getName();
+ // commons is called shared in production. normalize it to
wikimediacommons
+ if ( $providerName === 'shared' ) {
+ $providerName = 'wikimediacommons';
}
- // Get the list of language Names
- return $this->getTextTracksFromData( $data );
+ // Provider name should be the same as the interwiki map
+
+ if ( !$this->file->isLocal() ) {
+ $namespaceName = $this->getForeignNamespaceName();
+ }
+ $langNames = Language::fetchLanguageNames( null, 'mw' );
+
+ foreach ( $data as $row ) {
+ // Note, the namespace ID of this title might be
'unknown'
+ // to our configuration if this is called in ForeignDb
situations
+ if ( $this->file->isLocal() ) {
+ $subTitle = Title::newFromRow( $row );
+ } else {
+ $subTitle = new ForeignTitle(
$row->page_namespace, $namespaceName, $row->page_title );
+ }
+ $titleParts = explode( '.', $row->page_title );
+ if ( count( $titleParts ) >= 3 ) {
+ $timedTextExtension = array_pop( $titleParts );
+ $languageKey = array_pop( $titleParts );
+ $contentType = $this->getContentType(
$timedTextExtension );
+ } else {
+ continue;
+ }
+ // If there is no valid language continue:
+ if ( !isset( $langNames[ $languageKey ] ) ) {
+ continue;
+ }
+
+ $textTracks[] = [
+ // @todo Should eventually add special entry
point and output proper WebVTT format:
+ //
http://www.whatwg.org/specs/web-apps/current-work/webvtt.html
+ 'src' => $this->getFullURL( $subTitle,
$contentType ),
+ 'kind' => 'subtitles',
+ 'type' => $contentType,
+ 'title' => $this->getPrefixedDBkey( $subTitle ),
+ 'provider' => $providerName,
+ 'srclang' => $languageKey,
+ 'dir' => Language::factory( $languageKey
)->getDir(),
+ 'label' => wfMessage(
'timedmedia-subtitle-language',
+ $langNames[ $languageKey ],
+ $languageKey )->text()
+ ];
+ }
+ return $textTracks;
}
/**
- * @param $data
+ * Build an array of track information using an API result
+ * @param mixed $data JSON decoded result from a query API request
* @return array
*/
function getTextTracksFromData( $data ) {
$textTracks = [];
- $providerName = $this->file->repo->getName();
- // commons is called shared in production. normalize it to
wikimediacommons
- if ( $providerName == 'shared' ) {
- $providerName = 'wikimediacommons';
- }
- // Provider name should be the same as the interwiki map
- // @@todo more testing with this:
-
- $langNames = Language::fetchLanguageNames( null, 'mw' );
- if ( $data['query'] && $data['query']['allpages'] ) {
- foreach ( $data['query']['allpages'] as $page ) {
- $subTitle = Title::newFromText( $page['title']
);
- $tileParts = explode( '.', $page['title'] );
- if ( count( $tileParts ) >= 3 ) {
- $timedTextExtension = array_pop(
$tileParts );
- $languageKey = array_pop( $tileParts );
- $contentType = $this->getContentType(
$timedTextExtension );
- } else {
- continue;
+ if ( $data !== null && $data['query'] &&
$data['query']['pages'] ) {
+ foreach ( $data['query']['pages'] as $page ) {
+ if ( $page['videoinfo'] ) {
+ foreach ( $page['videoinfo'] as $info )
{
+ if ( $info['timedtext'] ) {
+ foreach (
$info['timedtext'] as $track ) {
+ // Add
validation ?
+ $textTracks[] =
$track;
+ }
+ }
+ }
}
- // If there is no valid language continue:
- if ( !isset( $langNames[ $languageKey ] ) ) {
- continue;
- }
- $namespacePrefix = "TimedText:";
- $textTracks[] = [
- // @todo Should eventually add special
entry point and output proper WebVTT format:
- //
http://www.whatwg.org/specs/web-apps/current-work/webvtt.html
- 'src' => $this->getFullURL(
$page['title'], $contentType ),
- 'kind' => 'subtitles',
- 'type' => $contentType,
- 'title' => $namespacePrefix .
$subTitle->getDBkey(),
- 'provider' => $providerName,
- 'srclang' => $languageKey,
- 'dir' => Language::factory(
$languageKey )->getDir(),
- 'label' => wfMessage(
'timedmedia-subtitle-language',
- $langNames[ $languageKey ],
- $languageKey )->text()
- ];
}
}
return $textTracks;
@@ -290,16 +288,43 @@
return '';
}
+ function getForeignNamespaceName() {
+ if ( $this->$remoteNs !== null ) {
+ return $this->$remoteNsName;
+ }
+ /* Else, we use the canonical namespace, since we can't look up
the actual one */
+ return strtr( MWNamespace::getCanonicalName( NS_TIMEDTEXT ), '
', '_' );
+ }
+
+ /**
+ * Retrieve a namespace prefixed and underscored title
+ * @param Title|ForeignTitle $pageTitle
+ * @return string
+ */
+ function getPrefixedDBkey( $pageTitle ) {
+ if ( $pageTitle instanceof Title ) {
+ return TitleFormatter::getPrefixedDBkey( $pageTitle );
+ } elseif ( $pageTitle instanceof ForeignTitle ) {
+ return $pageTitle->getFullText();
+ }
+ return null;
+ }
+
+ /**
+ * Retrieve a url to the raw subtitle file
+ * Only use for local and foreignDb requests
+ *
+ * @param Title|ForeignTitle $pageTitle
+ * @return string
+ */
function getFullURL( $pageTitle, $contentType ) {
- if ( $this->file->isLocal() ) {
- $subTitle = Title::newFromText( $pageTitle );
- return $subTitle->getFullURL( [
+ if ( $pageTitle instanceof Title ) {
+ return $pageTitle->getFullURL( [
'action' => 'raw',
'ctype' => $contentType
] );
- // } elseif ( $this->file->repo instanceof ForeignDBViaLBRepo )
{
- } else {
- $query = 'title=' . wfUrlencode( $pageTitle ) . '&';
+ } elseif ( $pageTitle instanceof ForeignTitle ) {
+ $query = 'title=' . wfUrlencode(
$pageTitle->getFullText() ) . '&';
$query .= wfArrayToCgi( [
'action' => 'raw',
'ctype' => $contentType
@@ -307,5 +332,6 @@
// Note: This will return false if scriptDirUrl is not
set for repo.
return $this->file->repo->makeUrl( $query );
}
+ return null;
}
}
--
To view, visit https://gerrit.wikimedia.org/r/314862
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I6e4915540a2ddef2c8cc78a920ef3950ddae46b1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/TimedMediaHandler
Gerrit-Branch: master
Gerrit-Owner: Brion VIBBER <[email protected]>
Gerrit-Reviewer: TheDJ <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits