Yaron Koren has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/316561

Change subject: Add file data storage
......................................................................

Add file data storage

Change-Id: I7d9cbb1be7519ea5be7b07b865ffe7f1e3fc539b
---
M Cargo.hooks.php
M Cargo.php
A CargoFileData.php
M extension.json
A maintenance/setCargoFileData.php
5 files changed, 199 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Cargo 
refs/changes/61/316561/2

diff --git a/Cargo.hooks.php b/Cargo.hooks.php
index c291c1f..ccb9fa8 100755
--- a/Cargo.hooks.php
+++ b/Cargo.hooks.php
@@ -185,8 +185,9 @@
                CargoStore::$settings['origin'] = 'page save';
                CargoUtils::parsePageForStorage( $article->getTitle(), 
$content->getNativeData() );
 
-               // Also, save the "page data".
+               // Also, save the "page data" and (if appropriate) "file data".
                CargoPageData::storeValuesForPage( $article->getTitle() );
+               CargoFileData::storeValuesForFile( $article->getTitle() );
 
                return true;
        }
@@ -203,6 +204,7 @@
                // parsed right after this.
                CargoStore::$settings['origin'] = 'Approved Revs revision 
approved';
                CargoPageData::storeValuesForPage( $title );
+               CargoFileData::storeValuesForFile( $title );
                return true;
        }
 
@@ -219,6 +221,7 @@
                        CargoStore::$settings['origin'] = 'Approved Revs 
revision unapproved';
                }
                CargoPageData::storeValuesForPage( $title, 
$egApprovedRevsBlankIfUnapproved );
+               CargoFileData::storeValuesForFile( $title, 
$egApprovedRevsBlankIfUnapproved );
                return true;
        }
 
diff --git a/Cargo.php b/Cargo.php
index 25e711b..1006bed 100644
--- a/Cargo.php
+++ b/Cargo.php
@@ -87,6 +87,7 @@
 $wgAutoloadClasses['CargoSQLQuery'] = $dir . '/CargoSQLQuery.php';
 $wgAutoloadClasses['CargoQueryDisplayer'] = $dir . '/CargoQueryDisplayer.php';
 $wgAutoloadClasses['CargoPageData'] = $dir . '/CargoPageData.php';
+$wgAutoloadClasses['CargoFileData'] = $dir . '/CargoFileData.php';
 $wgAutoloadClasses['CargoRecurringEvent'] = $dir . 
'/parserfunctions/CargoRecurringEvent.php';
 $wgAutoloadClasses['CargoDisplayMap'] = $dir . 
'/parserfunctions/CargoDisplayMap.php';
 $wgAutoloadClasses['CargoPopulateTableJob'] = $dir . 
'/CargoPopulateTableJob.php';
@@ -325,3 +326,4 @@
 $wgCargoDrilldownNumRangesForNumbers = 5;
 
 $wgCargoPageDataColumns = array();
+$wgCargoFileDataColumns = array();
\ No newline at end of file
diff --git a/CargoFileData.php b/CargoFileData.php
new file mode 100644
index 0000000..7f4012a
--- /dev/null
+++ b/CargoFileData.php
@@ -0,0 +1,102 @@
+<?php
+
+/**
+ * Static functions for dealing with the "_fileData" table.
+ *
+ * @author Yaron Koren
+ */
+class CargoFileData {
+
+       /**
+        * Set the schema based on what has been entered in LocalSettings.php.
+        */
+       static function getTableSchema() {
+               global $wgCargoFileDataColumns;
+
+               $fieldTypes = array();
+
+               if ( in_array( 'mediaType', $wgCargoFileDataColumns ) ) {
+                       $fieldTypes['_mediaType'] = array( 'String', false );
+               }
+               if ( in_array( 'path', $wgCargoFileDataColumns ) ) {
+                       $fieldTypes['_path'] = array( 'String', false );
+               }
+               if ( in_array( 'fullText', $wgCargoFileDataColumns ) ) {
+                       $fieldTypes['_fullText'] = array( 'Searchtext', false );
+               }
+
+               $tableSchema = new CargoTableSchema();
+               foreach ( $fieldTypes as $field => $fieldVals ) {
+                       list ( $type, $isList ) = $fieldVals;
+                       $fieldDesc = new CargoFieldDescription();
+                       $fieldDesc->mType = $type;
+                       if ( $isList ) {
+                               $fieldDesc->mIsList = true;
+                               $fieldDesc->setDelimiter( '|' );
+                       }
+                       $tableSchema->mFieldDescriptions[$field] = $fieldDesc;
+               }
+
+               return $tableSchema;
+       }
+
+       /**
+        */
+       public static function storeValuesForFile( $title ) {
+               global $wgCargoFileDataColumns, $wgLocalFileRepo;
+
+               if ( $title == null ) {
+                       return;
+               }
+
+               // Exit if we're not in the File namespace.
+               if ( $title->getNamespace() != NS_FILE ) {
+                       return;
+               }
+
+               // If there is no _fileData table, getTableSchemas() will
+               // throw an error.
+               try {
+                       $tableSchemas = CargoUtils::getTableSchemas( array( 
'_fileData' ) );
+               } catch ( MWException $e ) {
+                       return;
+               }
+
+               $repo = new LocalRepo( $wgLocalFileRepo );
+               $file = LocalFile::newFromTitle( $title, $repo );
+
+               $fileDataValues = array();
+
+               if ( in_array( 'mediaType', $wgCargoFileDataColumns ) ) {
+                       $fileDataValues['_mediaType'] = $file->getMimeType();
+               }
+
+               if ( in_array( 'path', $wgCargoFileDataColumns ) ) {
+                       $fileDataValues['_path'] = $file->getLocalRefPath();
+               }
+
+               if ( in_array( 'fullText', $wgCargoFileDataColumns ) ) {
+                       global $wgCargoPDFToText;
+
+                       if ( $wgCargoPDFToText == '' ) {
+                               // Display an error message/
+                       } elseif ( $file->getMimeType() != 'application/pdf' ) {
+                               // We only handle PDF files.
+                       } else {
+                               // Copied in part from the PdfHandler extension.
+                               $filePath = $file->getLocalRefPath();
+                               $cmd = wfEscapeShellArg( $wgCargoPDFToText ) . 
' '. wfEscapeShellArg( $filePath ) . ' - ';
+                               $retval = '';
+                               $txt = wfShellExec( $cmd, $retval );
+                               if ( $retval == 0 ) {
+                                       $txt = str_replace( "\r\n", "\n", $txt 
);
+                                       $txt = str_replace( "\f", "\n\n", $txt 
);
+                                       $fileDataValues['_fullText'] = $txt;
+                               }
+                       }
+               }
+
+               CargoStore::storeAllData( $title, '_fileData', $fileDataValues, 
$tableSchemas['_fileData'] );
+       }
+
+}
\ No newline at end of file
diff --git a/extension.json b/extension.json
index 4718e5b..701fb7c 100755
--- a/extension.json
+++ b/extension.json
@@ -52,6 +52,7 @@
                "CargoSQLQuery": "CargoSQLQuery.php",
                "CargoQueryDisplayer": "CargoQueryDisplayer.php",
                "CargoPageData": "CargoPageData.php",
+               "CargoFileData": "CargoFileData.php",
                "CargoRecurringEvent": 
"parserfunctions/CargoRecurringEvent.php",
                "CargoDisplayMap": "parserfunctions/CargoDisplayMap.php",
                "CargoPopulateTableJob": "CargoPopulateTableJob.php",
@@ -287,6 +288,7 @@
                "CargoDrilldownMinValuesForComboBox": 40,
                "CargoDrilldownNumRangesForNumbers": 5,
                "CargoPageDataColumns": [],
+               "CargoFileDataColumns": [],
                "CargoAllowedSQLFunctions":["COUNT", "FLOOR", "CEIL", 
"ROUND","MAX", "MIN", "AVG", "SUM", "POWER", "LN", "LOG","CONCAT", 
"GROUP_CONCAT", "LOWER", "LCASE", "UPPER", "UCASE","SUBSTRING", "FORMAT","NOW", 
"DATE", "YEAR", "MONTH", "DAYOFMONTH", "DATE_FORMAT","DATE_ADD", "DATE_SUB", 
"DATEDIFF","NEAR"]
        },
        "manifest_version": 1
diff --git a/maintenance/setCargoFileData.php b/maintenance/setCargoFileData.php
new file mode 100644
index 0000000..9e4c41a
--- /dev/null
+++ b/maintenance/setCargoFileData.php
@@ -0,0 +1,89 @@
+<?php
+
+/**
+ * This script populates the Cargo _fileData DB table (and possibly other
+ * auxiliary tables) for all pages in the wiki.
+ *
+ * Usage:
+ *  php setCargoFileData.php --delete
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Yaron Koren
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+
+$maintClass = "SetCargoFileData";
+
+class SetCargoFileData extends Maintenance {
+
+       public function __construct() {
+               parent::__construct();
+
+               $this->mDescription = "Stores a set of data for each file in 
the wiki in one or more database tables, for use within Cargo queries.";
+
+               $this->addOption( "delete", "Delete the file data DB table(s)", 
false, false );
+       }
+
+       public function execute() {
+               global $wgCargoFileDataColumns;
+
+               $dbr = wfGetDB( DB_SLAVE );
+               $res = $dbr->select( 'cargo_tables', array( 'field_tables' ),
+                       array( 'main_table' => '_fileData' ) );
+
+               $numRows = $res->numRows();
+               if ( $numRows >= 0 ) {
+                       $row = $res->fetchRow();
+                       $fieldTables = unserialize( $row['field_tables'] );
+                       CargoDeleteCargoTable::deleteTable( '_fileData', 
$fieldTables );
+               }
+
+               if ( $this->getOption( "delete" ) ) {
+                       if ( $numRows > 0 ) {
+                               $this->output( "\n Deleted file data 
table(s).\n" );
+                       } else {
+                               $this->output( "\n No file data tables found; 
exiting.\n" );
+                       }
+                       return;
+               }
+
+               $tableSchema = CargoFileData::getTableSchema();
+               $tableSchemaString = $tableSchema->toDBString();
+
+               $cdb = CargoUtils::getDB();
+               $dbw = wfGetDB( DB_MASTER );
+               CargoUtils::createCargoTableOrTables( $cdb, $dbw, '_fileData', 
$tableSchema, $tableSchemaString, -1 );
+
+               $pages = $dbr->select( 'page', array( 'page_id' ) );
+
+               while ( $page = $pages->fetchObject() ) {
+                       $title = Title::newFromID( $page->page_id );
+                       if ( $title == null ) {
+                               continue;
+                       }
+                       CargoFileData::storeValuesForFile( $title );
+                       $this->output( wfTimestamp( TS_DB ) . ' Stored file 
data for page "' . $title->getFullText() . "\".\n" );
+               }
+
+               $this->output( "\n Finished populating file data table(s).\n" );
+       }
+
+}
+
+require_once( DO_MAINTENANCE );
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/316561
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7d9cbb1be7519ea5be7b07b865ffe7f1e3fc539b
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Cargo
Gerrit-Branch: master
Gerrit-Owner: Yaron Koren <yaro...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to