Yaron Koren has submitted this change and it was merged. Change subject: Add file data storage ......................................................................
Add file data storage Change-Id: I7d9cbb1be7519ea5be7b07b865ffe7f1e3fc539b --- M Cargo.hooks.php M Cargo.php A CargoFileData.php M extension.json A maintenance/setCargoFileData.php 5 files changed, 199 insertions(+), 1 deletion(-) Approvals: Yaron Koren: Looks good to me, approved jenkins-bot: Verified diff --git a/Cargo.hooks.php b/Cargo.hooks.php index c291c1f..ccb9fa8 100755 --- a/Cargo.hooks.php +++ b/Cargo.hooks.php @@ -185,8 +185,9 @@ CargoStore::$settings['origin'] = 'page save'; CargoUtils::parsePageForStorage( $article->getTitle(), $content->getNativeData() ); - // Also, save the "page data". + // Also, save the "page data" and (if appropriate) "file data". CargoPageData::storeValuesForPage( $article->getTitle() ); + CargoFileData::storeValuesForFile( $article->getTitle() ); return true; } @@ -203,6 +204,7 @@ // parsed right after this. CargoStore::$settings['origin'] = 'Approved Revs revision approved'; CargoPageData::storeValuesForPage( $title ); + CargoFileData::storeValuesForFile( $title ); return true; } @@ -219,6 +221,7 @@ CargoStore::$settings['origin'] = 'Approved Revs revision unapproved'; } CargoPageData::storeValuesForPage( $title, $egApprovedRevsBlankIfUnapproved ); + CargoFileData::storeValuesForFile( $title, $egApprovedRevsBlankIfUnapproved ); return true; } diff --git a/Cargo.php b/Cargo.php index 25e711b..1006bed 100644 --- a/Cargo.php +++ b/Cargo.php @@ -87,6 +87,7 @@ $wgAutoloadClasses['CargoSQLQuery'] = $dir . '/CargoSQLQuery.php'; $wgAutoloadClasses['CargoQueryDisplayer'] = $dir . '/CargoQueryDisplayer.php'; $wgAutoloadClasses['CargoPageData'] = $dir . '/CargoPageData.php'; +$wgAutoloadClasses['CargoFileData'] = $dir . '/CargoFileData.php'; $wgAutoloadClasses['CargoRecurringEvent'] = $dir . '/parserfunctions/CargoRecurringEvent.php'; $wgAutoloadClasses['CargoDisplayMap'] = $dir . '/parserfunctions/CargoDisplayMap.php'; $wgAutoloadClasses['CargoPopulateTableJob'] = $dir . '/CargoPopulateTableJob.php'; @@ -325,3 +326,4 @@ $wgCargoDrilldownNumRangesForNumbers = 5; $wgCargoPageDataColumns = array(); +$wgCargoFileDataColumns = array(); \ No newline at end of file diff --git a/CargoFileData.php b/CargoFileData.php new file mode 100644 index 0000000..7f4012a --- /dev/null +++ b/CargoFileData.php @@ -0,0 +1,102 @@ +<?php + +/** + * Static functions for dealing with the "_fileData" table. + * + * @author Yaron Koren + */ +class CargoFileData { + + /** + * Set the schema based on what has been entered in LocalSettings.php. + */ + static function getTableSchema() { + global $wgCargoFileDataColumns; + + $fieldTypes = array(); + + if ( in_array( 'mediaType', $wgCargoFileDataColumns ) ) { + $fieldTypes['_mediaType'] = array( 'String', false ); + } + if ( in_array( 'path', $wgCargoFileDataColumns ) ) { + $fieldTypes['_path'] = array( 'String', false ); + } + if ( in_array( 'fullText', $wgCargoFileDataColumns ) ) { + $fieldTypes['_fullText'] = array( 'Searchtext', false ); + } + + $tableSchema = new CargoTableSchema(); + foreach ( $fieldTypes as $field => $fieldVals ) { + list ( $type, $isList ) = $fieldVals; + $fieldDesc = new CargoFieldDescription(); + $fieldDesc->mType = $type; + if ( $isList ) { + $fieldDesc->mIsList = true; + $fieldDesc->setDelimiter( '|' ); + } + $tableSchema->mFieldDescriptions[$field] = $fieldDesc; + } + + return $tableSchema; + } + + /** + */ + public static function storeValuesForFile( $title ) { + global $wgCargoFileDataColumns, $wgLocalFileRepo; + + if ( $title == null ) { + return; + } + + // Exit if we're not in the File namespace. + if ( $title->getNamespace() != NS_FILE ) { + return; + } + + // If there is no _fileData table, getTableSchemas() will + // throw an error. + try { + $tableSchemas = CargoUtils::getTableSchemas( array( '_fileData' ) ); + } catch ( MWException $e ) { + return; + } + + $repo = new LocalRepo( $wgLocalFileRepo ); + $file = LocalFile::newFromTitle( $title, $repo ); + + $fileDataValues = array(); + + if ( in_array( 'mediaType', $wgCargoFileDataColumns ) ) { + $fileDataValues['_mediaType'] = $file->getMimeType(); + } + + if ( in_array( 'path', $wgCargoFileDataColumns ) ) { + $fileDataValues['_path'] = $file->getLocalRefPath(); + } + + if ( in_array( 'fullText', $wgCargoFileDataColumns ) ) { + global $wgCargoPDFToText; + + if ( $wgCargoPDFToText == '' ) { + // Display an error message/ + } elseif ( $file->getMimeType() != 'application/pdf' ) { + // We only handle PDF files. + } else { + // Copied in part from the PdfHandler extension. + $filePath = $file->getLocalRefPath(); + $cmd = wfEscapeShellArg( $wgCargoPDFToText ) . ' '. wfEscapeShellArg( $filePath ) . ' - '; + $retval = ''; + $txt = wfShellExec( $cmd, $retval ); + if ( $retval == 0 ) { + $txt = str_replace( "\r\n", "\n", $txt ); + $txt = str_replace( "\f", "\n\n", $txt ); + $fileDataValues['_fullText'] = $txt; + } + } + } + + CargoStore::storeAllData( $title, '_fileData', $fileDataValues, $tableSchemas['_fileData'] ); + } + +} \ No newline at end of file diff --git a/extension.json b/extension.json index 4718e5b..701fb7c 100755 --- a/extension.json +++ b/extension.json @@ -52,6 +52,7 @@ "CargoSQLQuery": "CargoSQLQuery.php", "CargoQueryDisplayer": "CargoQueryDisplayer.php", "CargoPageData": "CargoPageData.php", + "CargoFileData": "CargoFileData.php", "CargoRecurringEvent": "parserfunctions/CargoRecurringEvent.php", "CargoDisplayMap": "parserfunctions/CargoDisplayMap.php", "CargoPopulateTableJob": "CargoPopulateTableJob.php", @@ -287,6 +288,7 @@ "CargoDrilldownMinValuesForComboBox": 40, "CargoDrilldownNumRangesForNumbers": 5, "CargoPageDataColumns": [], + "CargoFileDataColumns": [], "CargoAllowedSQLFunctions":["COUNT", "FLOOR", "CEIL", "ROUND","MAX", "MIN", "AVG", "SUM", "POWER", "LN", "LOG","CONCAT", "GROUP_CONCAT", "LOWER", "LCASE", "UPPER", "UCASE","SUBSTRING", "FORMAT","NOW", "DATE", "YEAR", "MONTH", "DAYOFMONTH", "DATE_FORMAT","DATE_ADD", "DATE_SUB", "DATEDIFF","NEAR"] }, "manifest_version": 1 diff --git a/maintenance/setCargoFileData.php b/maintenance/setCargoFileData.php new file mode 100644 index 0000000..9e4c41a --- /dev/null +++ b/maintenance/setCargoFileData.php @@ -0,0 +1,89 @@ +<?php + +/** + * This script populates the Cargo _fileData DB table (and possibly other + * auxiliary tables) for all pages in the wiki. + * + * Usage: + * php setCargoFileData.php --delete + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Yaron Koren + * @ingroup Maintenance + */ + +require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); + +$maintClass = "SetCargoFileData"; + +class SetCargoFileData extends Maintenance { + + public function __construct() { + parent::__construct(); + + $this->mDescription = "Stores a set of data for each file in the wiki in one or more database tables, for use within Cargo queries."; + + $this->addOption( "delete", "Delete the file data DB table(s)", false, false ); + } + + public function execute() { + global $wgCargoFileDataColumns; + + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->select( 'cargo_tables', array( 'field_tables' ), + array( 'main_table' => '_fileData' ) ); + + $numRows = $res->numRows(); + if ( $numRows >= 0 ) { + $row = $res->fetchRow(); + $fieldTables = unserialize( $row['field_tables'] ); + CargoDeleteCargoTable::deleteTable( '_fileData', $fieldTables ); + } + + if ( $this->getOption( "delete" ) ) { + if ( $numRows > 0 ) { + $this->output( "\n Deleted file data table(s).\n" ); + } else { + $this->output( "\n No file data tables found; exiting.\n" ); + } + return; + } + + $tableSchema = CargoFileData::getTableSchema(); + $tableSchemaString = $tableSchema->toDBString(); + + $cdb = CargoUtils::getDB(); + $dbw = wfGetDB( DB_MASTER ); + CargoUtils::createCargoTableOrTables( $cdb, $dbw, '_fileData', $tableSchema, $tableSchemaString, -1 ); + + $pages = $dbr->select( 'page', array( 'page_id' ) ); + + while ( $page = $pages->fetchObject() ) { + $title = Title::newFromID( $page->page_id ); + if ( $title == null ) { + continue; + } + CargoFileData::storeValuesForFile( $title ); + $this->output( wfTimestamp( TS_DB ) . ' Stored file data for page "' . $title->getFullText() . "\".\n" ); + } + + $this->output( "\n Finished populating file data table(s).\n" ); + } + +} + +require_once( DO_MAINTENANCE ); \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/316561 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7d9cbb1be7519ea5be7b07b865ffe7f1e3fc539b Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Cargo Gerrit-Branch: master Gerrit-Owner: Yaron Koren <yaro...@gmail.com> Gerrit-Reviewer: Yaron Koren <yaro...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits