Yaron Koren has submitted this change and it was merged.

Change subject: Add #get_file_data for local files.
......................................................................


Add #get_file_data for local files.

Change-Id: Id60107bb20803d225913163a92f3e2e9bac27ae2
---
M ED_ParserFunctions.php
M ED_Utils.php
M ExternalData.i18n.magic.php
M ExternalData.php
M README
5 files changed, 151 insertions(+), 19 deletions(-)

Approvals:
  Yaron Koren: Verified; Looks good to me, approved



diff --git a/ED_ParserFunctions.php b/ED_ParserFunctions.php
index 3c16d6f..49173c0 100644
--- a/ED_ParserFunctions.php
+++ b/ED_ParserFunctions.php
@@ -138,6 +138,91 @@
        }
 
        /**
+        * Render the #get_file_data parser function.
+        */
+       static function doGetFileData( &$parser ) {
+               global $edgCurPageName, $edgValues, $edgCacheExpireTime;
+
+               // If we're handling multiple pages, reset $edgValues
+               // when we move from one page to another.
+               $cur_page_name = $parser->getTitle()->getText();
+               if ( ! isset( $edgCurPageName ) || $edgCurPageName != 
$cur_page_name ) {
+                       $edgValues = array();
+                       $edgCurPageName = $cur_page_name;
+               }
+
+               $params = func_get_args();
+               array_shift( $params ); // we already know the $parser ...
+               $args = EDUtils::parseParams( $params ); // parse params into 
name-value pairs
+               if ( array_key_exists( 'file', $args ) ) {
+                       $file = $args['file'];
+               } elseif ( array_key_exists( 'directory', $args ) ) {
+                       $directory = $args['directory'];
+                       if ( array_key_exists( 'file name', $args ) ) {
+                               $fileName = $args['file name'];
+                       } else {
+                               return EDUtils::formatErrorMessage( wfMessage( 
'externaldata-no-param-specified', 'file name')->parse() );
+                       }
+               } else {
+                       return EDUtils::formatErrorMessage( wfMessage( 
'externaldata-no-param-specified', 'file|directory')->parse() );
+               }
+
+               if ( array_key_exists( 'format', $args ) ) {
+                       $format = strtolower( $args['format'] );
+               } else {
+                       $format = '';
+               }
+               if ( $format == 'xml' ) {
+                       if ( array_key_exists( 'use xpath', $args ) ) {
+                               // Somewhat of a hack - store the fact that
+                               // we're using XPath within the format, even
+                               // though the format is still XML.
+                               $format = 'xml with xpath';
+                       }
+               }
+
+               if ( array_key_exists( 'data', $args ) ) {
+                       // parse the 'data' arg into mappings
+                       if ( $format == 'xml with xpath' ) {
+                               $mappings = EDUtils::paramToArray( 
$args['data'], false, false );
+                       } else {
+                               $mappings = EDUtils::paramToArray( 
$args['data'], false, true );
+                       }
+               } else {
+                       return EDUtils::formatErrorMessage( wfMessage( 
'externaldata-no-param-specified', 'data')->parse() );
+               }
+
+               if ( array_key_exists( 'cache seconds', $args) ) {
+                       // set cache expire time
+                       $cacheExpireTime = $args['cache seconds'];
+               } else {
+                       $cacheExpireTime = $edgCacheExpireTime;
+               }
+
+               if ( isset( $file ) ) {
+                       $external_values = EDUtils::getDataFromFile( $file, 
$format, $mappings );
+               } else {
+                       $external_values = EDUtils::getDataFromDirectory( 
$directory, $fileName, $format, $mappings );
+               }
+
+               if ( is_string( $external_values ) ) {
+                       // It's an error message - display it on the screen.
+                       return EDUtils::formatErrorMessage( $external_values );
+               }
+               if ( count( $external_values ) == 0 ) {
+                       return;
+               }
+
+               if ( array_key_exists( 'filters', $args ) ) {
+                       // parse the 'filters' arg
+                       $filters = EDUtils::paramToArray( $args['filters'], 
true, false );
+               } else {
+                       $filters = array();
+               }
+
+               self::setGlobalValuesArray( $external_values, $filters, 
$mappings );
+       }
+       /**
         * Render the #get_soap_data parser function.
         */
        static function doGetSOAPData( &$parser ) {
diff --git a/ED_Utils.php b/ED_Utils.php
index a41f45c..9535414 100644
--- a/ED_Utils.php
+++ b/ED_Utils.php
@@ -81,7 +81,7 @@
                // 
http://stackoverflow.com/questions/1373735/regexp-split-string-by-commas-and-spaces-but-ignore-the-inside-quotes-and-parent#1381895
                // ...with modifications by Nick Lindridge, ionCube Ltd.
                $pattern = <<<END
-        /
+               /
        [,]
        (?=(?:(?:[^"]*"){2})*[^"]*$)
        (?=(?:(?:[^']*'){2})*[^']*$)
@@ -530,7 +530,7 @@
                return array_filter( $nodes, array( 'EDUtils', 'isNodeNotEmpty' 
) );
        }
 
-       static function getXPathData( $xml, $mappings, $url ) {
+       static function getXPathData( $xml, $mappings, $ns ) {
                global $edgXMLValues;
 
                $edgXMLValues = array();
@@ -542,7 +542,7 @@
                        $matches = array();
                        preg_match_all( '/[\/\@]([a-zA-Z0-9]*):/', $xpath, 
$matches );
                        foreach ( $matches[1] as $namespace ) {
-                               $sxml->registerXPathNamespace( $namespace, $url 
);
+                               $sxml->registerXPathNamespace( $namespace, $ns 
);
                        }
 
                        // Now, get all the matching values, and remove any
@@ -865,6 +865,24 @@
                }
        }
 
+       static private function getData( $contents, $format, $mappings, $source 
) {
+               if ( $format == 'xml' ) {
+                       return self::getXMLData( $contents );
+               } elseif ( $format == 'xml with xpath' ) {
+                       return self::getXPathData( $contents, $mappings, 
$source );
+               } elseif ( $format == 'csv' ) {
+                       return self::getCSVData( $contents, false );
+               } elseif ( $format == 'csv with header' ) {
+                       return self::getCSVData( $contents, true );
+               } elseif ( $format == 'json' ) {
+                       return self::getJSONData( $contents );
+               } elseif ( $format == 'gff' ) {
+                       return self::getGFFData( $contents );
+               } else {
+                       return wfMessage( 'externaldata-web-invalid-format', 
$format )->text();
+               }
+       }
+
        /**
         * Checks whether this URL is allowed, based on the
         * $edgAllowExternalDataFrom whitelist
@@ -899,22 +917,43 @@
                        return "No contents found at URL $url.";
                }
 
-               if ( $format == 'xml' ) {
-                       return self::getXMLData( $url_contents );
-               } elseif ( $format == 'xml with xpath' ) {
-                       return self::getXPathData( $url_contents, $mappings, 
$url );
-               } elseif ( $format == 'csv' ) {
-                       return self::getCSVData( $url_contents, false );
-               } elseif ( $format == 'csv with header' ) {
-                       return self::getCSVData( $url_contents, true );
-               } elseif ( $format == 'json' ) {
-                       return self::getJSONData( $url_contents );
-               } elseif ( $format == 'gff' ) {
-                       return self::getGFFData( $url_contents );
-               } else {
-                       return wfMessage( 'externaldata-web-invalid-format', 
$format )->text();
+               return self::getData( $url_contents, $format, $mappings, $url );
+       }
+
+       static private function getDataFromPath( $path, $format, $mappings ) {
+               $file_contents = file_get_contents( $path );
+               // Show an error message if there's nothing there.
+               if ( empty( $file_contents ) ) {
+                       return "Unable to get file contents.";
                }
-               return array();
+
+               return self::getData( $file_contents, $format, $mappings, $path 
);
+       }
+
+       static public function getDataFromFile( $file, $format, $mappings ) {
+               global $edgFilePath;
+
+               if ( array_key_exists( $file, $edgFilePath ) ) {
+                       return self::getDataFromPath( $edgFilePath[$file], 
$format, $mappings );
+               } else {
+                       return self::formatErrorMessage( "No file is set for ID 
$file." );
+               }
+       }
+
+       static public function getDataFromDirectory( $directory, $fileName, 
$format, $mappings ) {
+               global $edgDirectoryPath;
+
+               if ( array_key_exists( $directory, $edgDirectoryPath ) ) {
+                       $directoryPath = $edgDirectoryPath[$directory];
+                       $path = realpath( $directoryPath . $fileName );
+                       if ( $path !== false && strpos( $path, $directoryPath ) 
=== 0 ) {
+                               return self::getDataFromPath( $path, $format, 
$mappings );
+                       } else {
+                               return self::formatErrorMessage( "File name 
$fileName not allowed for directory ID $directory." );
+                       }
+               } else {
+                       return self::formatErrorMessage( "No directory is set 
for ID $directory." );
+               }
        }
 
        static public function getSOAPData( $url, $requestName, $requestData, 
$responseName, $mappings) {
diff --git a/ExternalData.i18n.magic.php b/ExternalData.i18n.magic.php
index d82aa76..a401258 100644
--- a/ExternalData.i18n.magic.php
+++ b/ExternalData.i18n.magic.php
@@ -8,6 +8,7 @@
 /** English (English) */
 $magicWords['en'] = array(
        'get_web_data' => array( 0, 'get_web_data' ),
+       'get_file_data' => array( 0, 'get_file_data' ),
        'get_soap_data' => array( 0, 'get_soap_data' ),
        'get_ldap_data' => array( 0, 'get_ldap_data' ),
        'get_db_data' => array( 0, 'get_db_data' ),
@@ -203,4 +204,4 @@
        'display_external_table' => array( 0, '显示外部表单' ),
        'store_external_table' => array( 0, '存储外部表单' ),
        'clear_external_data' => array( 0, '清空外部数据' ),
-);
\ No newline at end of file
+);
diff --git a/ExternalData.php b/ExternalData.php
index f889ce1..d44f48d 100644
--- a/ExternalData.php
+++ b/ExternalData.php
@@ -48,8 +48,12 @@
 $edgDBFlags = array();
 $edgDBTablePrefix = array();
 
+$edgDirectoryPath = array();
+$edgFilePath = array();
+
 function edgRegisterParser( &$parser ) {
        $parser->setFunctionHook( 'get_web_data', array( 'EDParserFunctions', 
'doGetWebData' ) );
+       $parser->setFunctionHook( 'get_file_data', array( 'EDParserFunctions', 
'doGetFileData' ) );
        $parser->setFunctionHook( 'get_soap_data', array( 'EDParserFunctions', 
'doGetSOAPData' ) );
        $parser->setFunctionHook( 'get_ldap_data', array( 'EDParserFunctions', 
'doGetLDAPData' ) );
        $parser->setFunctionHook( 'get_db_data', array( 'EDParserFunctions', 
'doGetDBData' ) );
diff --git a/README b/README
index b0d5038..e3f69b7 100644
--- a/README
+++ b/README
@@ -21,6 +21,9 @@
 * #get_web_data retrieves the data from a URL that holds CSV, GFF,
 JSON or XML, and assigns it to local variables or arrays.
 
+* #get_file_data retrieves the data from a local file and accepts
+the same formats as #get-web_data.
+
 * #get_soap_data retrieves data from a URL via SOAP.
 
 * #get_db_data retrieves data from a database, using (in most cases)

-- 
To view, visit https://gerrit.wikimedia.org/r/147991
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id60107bb20803d225913163a92f3e2e9bac27ae2
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/ExternalData
Gerrit-Branch: master
Gerrit-Owner: Scott-linder <[email protected]>
Gerrit-Reviewer: Scott-linder <[email protected]>
Gerrit-Reviewer: Yaron Koren <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to