Yaron Koren has submitted this change and it was merged.
Change subject: Add #get_file_data for local files.
......................................................................
Add #get_file_data for local files.
Change-Id: Id60107bb20803d225913163a92f3e2e9bac27ae2
---
M ED_ParserFunctions.php
M ED_Utils.php
M ExternalData.i18n.magic.php
M ExternalData.php
M README
5 files changed, 151 insertions(+), 19 deletions(-)
Approvals:
Yaron Koren: Verified; Looks good to me, approved
diff --git a/ED_ParserFunctions.php b/ED_ParserFunctions.php
index 3c16d6f..49173c0 100644
--- a/ED_ParserFunctions.php
+++ b/ED_ParserFunctions.php
@@ -138,6 +138,91 @@
}
/**
+ * Render the #get_file_data parser function.
+ */
+ static function doGetFileData( &$parser ) {
+ global $edgCurPageName, $edgValues, $edgCacheExpireTime;
+
+ // If we're handling multiple pages, reset $edgValues
+ // when we move from one page to another.
+ $cur_page_name = $parser->getTitle()->getText();
+ if ( ! isset( $edgCurPageName ) || $edgCurPageName !=
$cur_page_name ) {
+ $edgValues = array();
+ $edgCurPageName = $cur_page_name;
+ }
+
+ $params = func_get_args();
+ array_shift( $params ); // we already know the $parser ...
+ $args = EDUtils::parseParams( $params ); // parse params into
name-value pairs
+ if ( array_key_exists( 'file', $args ) ) {
+ $file = $args['file'];
+ } elseif ( array_key_exists( 'directory', $args ) ) {
+ $directory = $args['directory'];
+ if ( array_key_exists( 'file name', $args ) ) {
+ $fileName = $args['file name'];
+ } else {
+ return EDUtils::formatErrorMessage( wfMessage(
'externaldata-no-param-specified', 'file name')->parse() );
+ }
+ } else {
+ return EDUtils::formatErrorMessage( wfMessage(
'externaldata-no-param-specified', 'file|directory')->parse() );
+ }
+
+ if ( array_key_exists( 'format', $args ) ) {
+ $format = strtolower( $args['format'] );
+ } else {
+ $format = '';
+ }
+ if ( $format == 'xml' ) {
+ if ( array_key_exists( 'use xpath', $args ) ) {
+ // Somewhat of a hack - store the fact that
+ // we're using XPath within the format, even
+ // though the format is still XML.
+ $format = 'xml with xpath';
+ }
+ }
+
+ if ( array_key_exists( 'data', $args ) ) {
+ // parse the 'data' arg into mappings
+ if ( $format == 'xml with xpath' ) {
+ $mappings = EDUtils::paramToArray(
$args['data'], false, false );
+ } else {
+ $mappings = EDUtils::paramToArray(
$args['data'], false, true );
+ }
+ } else {
+ return EDUtils::formatErrorMessage( wfMessage(
'externaldata-no-param-specified', 'data')->parse() );
+ }
+
+ if ( array_key_exists( 'cache seconds', $args) ) {
+ // set cache expire time
+ $cacheExpireTime = $args['cache seconds'];
+ } else {
+ $cacheExpireTime = $edgCacheExpireTime;
+ }
+
+ if ( isset( $file ) ) {
+ $external_values = EDUtils::getDataFromFile( $file,
$format, $mappings );
+ } else {
+ $external_values = EDUtils::getDataFromDirectory(
$directory, $fileName, $format, $mappings );
+ }
+
+ if ( is_string( $external_values ) ) {
+ // It's an error message - display it on the screen.
+ return EDUtils::formatErrorMessage( $external_values );
+ }
+ if ( count( $external_values ) == 0 ) {
+ return;
+ }
+
+ if ( array_key_exists( 'filters', $args ) ) {
+ // parse the 'filters' arg
+ $filters = EDUtils::paramToArray( $args['filters'],
true, false );
+ } else {
+ $filters = array();
+ }
+
+ self::setGlobalValuesArray( $external_values, $filters,
$mappings );
+ }
+ /**
* Render the #get_soap_data parser function.
*/
static function doGetSOAPData( &$parser ) {
diff --git a/ED_Utils.php b/ED_Utils.php
index a41f45c..9535414 100644
--- a/ED_Utils.php
+++ b/ED_Utils.php
@@ -81,7 +81,7 @@
//
http://stackoverflow.com/questions/1373735/regexp-split-string-by-commas-and-spaces-but-ignore-the-inside-quotes-and-parent#1381895
// ...with modifications by Nick Lindridge, ionCube Ltd.
$pattern = <<<END
- /
+ /
[,]
(?=(?:(?:[^"]*"){2})*[^"]*$)
(?=(?:(?:[^']*'){2})*[^']*$)
@@ -530,7 +530,7 @@
return array_filter( $nodes, array( 'EDUtils', 'isNodeNotEmpty'
) );
}
- static function getXPathData( $xml, $mappings, $url ) {
+ static function getXPathData( $xml, $mappings, $ns ) {
global $edgXMLValues;
$edgXMLValues = array();
@@ -542,7 +542,7 @@
$matches = array();
preg_match_all( '/[\/\@]([a-zA-Z0-9]*):/', $xpath,
$matches );
foreach ( $matches[1] as $namespace ) {
- $sxml->registerXPathNamespace( $namespace, $url
);
+ $sxml->registerXPathNamespace( $namespace, $ns
);
}
// Now, get all the matching values, and remove any
@@ -865,6 +865,24 @@
}
}
+ static private function getData( $contents, $format, $mappings, $source
) {
+ if ( $format == 'xml' ) {
+ return self::getXMLData( $contents );
+ } elseif ( $format == 'xml with xpath' ) {
+ return self::getXPathData( $contents, $mappings,
$source );
+ } elseif ( $format == 'csv' ) {
+ return self::getCSVData( $contents, false );
+ } elseif ( $format == 'csv with header' ) {
+ return self::getCSVData( $contents, true );
+ } elseif ( $format == 'json' ) {
+ return self::getJSONData( $contents );
+ } elseif ( $format == 'gff' ) {
+ return self::getGFFData( $contents );
+ } else {
+ return wfMessage( 'externaldata-web-invalid-format',
$format )->text();
+ }
+ }
+
/**
* Checks whether this URL is allowed, based on the
* $edgAllowExternalDataFrom whitelist
@@ -899,22 +917,43 @@
return "No contents found at URL $url.";
}
- if ( $format == 'xml' ) {
- return self::getXMLData( $url_contents );
- } elseif ( $format == 'xml with xpath' ) {
- return self::getXPathData( $url_contents, $mappings,
$url );
- } elseif ( $format == 'csv' ) {
- return self::getCSVData( $url_contents, false );
- } elseif ( $format == 'csv with header' ) {
- return self::getCSVData( $url_contents, true );
- } elseif ( $format == 'json' ) {
- return self::getJSONData( $url_contents );
- } elseif ( $format == 'gff' ) {
- return self::getGFFData( $url_contents );
- } else {
- return wfMessage( 'externaldata-web-invalid-format',
$format )->text();
+ return self::getData( $url_contents, $format, $mappings, $url );
+ }
+
+ static private function getDataFromPath( $path, $format, $mappings ) {
+ $file_contents = file_get_contents( $path );
+ // Show an error message if there's nothing there.
+ if ( empty( $file_contents ) ) {
+ return "Unable to get file contents.";
}
- return array();
+
+ return self::getData( $file_contents, $format, $mappings, $path
);
+ }
+
+ static public function getDataFromFile( $file, $format, $mappings ) {
+ global $edgFilePath;
+
+ if ( array_key_exists( $file, $edgFilePath ) ) {
+ return self::getDataFromPath( $edgFilePath[$file],
$format, $mappings );
+ } else {
+ return self::formatErrorMessage( "No file is set for ID
$file." );
+ }
+ }
+
+ static public function getDataFromDirectory( $directory, $fileName,
$format, $mappings ) {
+ global $edgDirectoryPath;
+
+ if ( array_key_exists( $directory, $edgDirectoryPath ) ) {
+ $directoryPath = $edgDirectoryPath[$directory];
+ $path = realpath( $directoryPath . $fileName );
+ if ( $path !== false && strpos( $path, $directoryPath )
=== 0 ) {
+ return self::getDataFromPath( $path, $format,
$mappings );
+ } else {
+ return self::formatErrorMessage( "File name
$fileName not allowed for directory ID $directory." );
+ }
+ } else {
+ return self::formatErrorMessage( "No directory is set
for ID $directory." );
+ }
}
static public function getSOAPData( $url, $requestName, $requestData,
$responseName, $mappings) {
diff --git a/ExternalData.i18n.magic.php b/ExternalData.i18n.magic.php
index d82aa76..a401258 100644
--- a/ExternalData.i18n.magic.php
+++ b/ExternalData.i18n.magic.php
@@ -8,6 +8,7 @@
/** English (English) */
$magicWords['en'] = array(
'get_web_data' => array( 0, 'get_web_data' ),
+ 'get_file_data' => array( 0, 'get_file_data' ),
'get_soap_data' => array( 0, 'get_soap_data' ),
'get_ldap_data' => array( 0, 'get_ldap_data' ),
'get_db_data' => array( 0, 'get_db_data' ),
@@ -203,4 +204,4 @@
'display_external_table' => array( 0, '显示外部表单' ),
'store_external_table' => array( 0, '存储外部表单' ),
'clear_external_data' => array( 0, '清空外部数据' ),
-);
\ No newline at end of file
+);
diff --git a/ExternalData.php b/ExternalData.php
index f889ce1..d44f48d 100644
--- a/ExternalData.php
+++ b/ExternalData.php
@@ -48,8 +48,12 @@
$edgDBFlags = array();
$edgDBTablePrefix = array();
+$edgDirectoryPath = array();
+$edgFilePath = array();
+
function edgRegisterParser( &$parser ) {
$parser->setFunctionHook( 'get_web_data', array( 'EDParserFunctions',
'doGetWebData' ) );
+ $parser->setFunctionHook( 'get_file_data', array( 'EDParserFunctions',
'doGetFileData' ) );
$parser->setFunctionHook( 'get_soap_data', array( 'EDParserFunctions',
'doGetSOAPData' ) );
$parser->setFunctionHook( 'get_ldap_data', array( 'EDParserFunctions',
'doGetLDAPData' ) );
$parser->setFunctionHook( 'get_db_data', array( 'EDParserFunctions',
'doGetDBData' ) );
diff --git a/README b/README
index b0d5038..e3f69b7 100644
--- a/README
+++ b/README
@@ -21,6 +21,9 @@
* #get_web_data retrieves the data from a URL that holds CSV, GFF,
JSON or XML, and assigns it to local variables or arrays.
+* #get_file_data retrieves the data from a local file and accepts
+the same formats as #get-web_data.
+
* #get_soap_data retrieves data from a URL via SOAP.
* #get_db_data retrieves data from a database, using (in most cases)
--
To view, visit https://gerrit.wikimedia.org/r/147991
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id60107bb20803d225913163a92f3e2e9bac27ae2
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/ExternalData
Gerrit-Branch: master
Gerrit-Owner: Scott-linder <[email protected]>
Gerrit-Reviewer: Scott-linder <[email protected]>
Gerrit-Reviewer: Yaron Koren <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits