Rillke has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/141241

Change subject: Attaching Hooks to tweak MIME type detection
......................................................................

Attaching Hooks to tweak MIME type detection

This is the successor of Icf9eec10bec7c0a7e.

PHP's own module fileinfo module is not capable detecting Chemical
table files. Instead, they are reported as text/plain.

MediaHandlers can be attached by MIME type only. That's why these
changes are required for [[Extension:MolHandler]] to work.

Change-Id: I5f7a56ccfbf7739e8cdbfa3b547e8f36653e6d86
---
A MolHandler.mime.php
M MolHandler.php
2 files changed, 139 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MolHandler 
refs/changes/41/141241/1

diff --git a/MolHandler.mime.php b/MolHandler.mime.php
new file mode 100755
index 0000000..525536d
--- /dev/null
+++ b/MolHandler.mime.php
@@ -0,0 +1,130 @@
+<?php
+/**
+ * Handler for Chemical table files
+ *
+ * Hooks for MIME detection.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ * @file
+ * @ingroup Media
+ */
+
+class MolHandlerMime {
+
+       /**
+        * @param File $file
+        * @return bool
+        */
+       public static function onMimeMagicCustomInfo( $mimeMagic, $addToList ) {
+               static $extraInfo =
+                       'chemical/x-mdl-molfile [DRAWING]
+                       chemical/x-mdl-sdfile [DRAWING]
+                       chemical/x-mdl-rxnfile [DRAWING]
+                       chemical/x-mdl-rdfile [DRAWING]
+                       chemical/x-mdl-rgfile [DRAWING]';
+
+               $addToList( $extraInfo );
+               return true;
+       }
+
+       public static function onMimeMagicCustomTypes( $mimeMagic, $addToList ) 
{
+               static $extraTypes =
+                       'chemical/x-mdl-molfile mol
+                       chemical/x-mdl-sdfile sdf
+                       chemical/x-mdl-rxnfile rxn
+                       chemical/x-mdl-rdfile rd
+                       chemical/x-mdl-rgfile rg';
+
+               $addToList( $extraTypes );
+               return true;
+       }
+
+       private static function  isChemFileExtension( $extension ) {
+               static $types = array(
+                       'mol', 'sdf', 'rxn', 'rd', 'rg',
+               );
+               return in_array( strtolower( $extension ), $types );
+       }
+
+       public static function onMimeMagicImproveFromExtension( $mimeMagic, 
$ext, &$mime ) {
+               if ( ( $mime === 'text/plain' ) && self::isChemFileExtension( 
$ext ) ) {
+                       $mime = $mimeMagic->guessTypesForExtension( $ext );
+               }
+               return true;
+       }
+
+       /**
+        * Guess chemical mime types from file contents.
+        *
+        * @param string $head
+        * @param string $tail
+        * @return bool|string Mime type
+        */
+       private static function doGuessChemicalMime( $head, $tail, $file ) {
+               # Note that a lot of chemical table files contain embedded 
molfiles.
+               # Therefore, always check for them before checking for molfiles!
+               static $headers = array(
+                       '$RXN'                              => 
'chemical/x-mdl-rxnfile',
+                       '$RDFILE '                          => 
'chemical/x-mdl-rdfile',
+                       '$MDL'                              => 
'chemical/x-mdl-rgfile',
+               );
+               static $tailsRegExps = array(
+                       # MDL-Molfile with all kind of line endings
+                       '/\n\s*$$$$\s*$/'                   => 
'chemical/x-mdl-sdfile',
+                       '/\n\s*M  END\s*$/'                 => 
'chemical/x-mdl-molfile',
+               );
+               static $headersRegExps = array(
+                       # MDL-Molfile counts line
+                       # #atoms #bond_numbers #atom_lists [obsolete] 
[999|#propery_lines] <version>
+                       '/\n(\s*\d{1,3}\s+){3}[^\n]*(?:\d+\s+){1,12}V\d{4,5}\n/'
+                               => 'chemical/x-mdl-molfile',
+               );
+
+               # Compare headers
+               foreach ( $headers as $magic => $candidate ) {
+                       if ( strncmp( $head, $magic, strlen( $magic ) ) === 0 ) 
{
+                               wfDebug( __METHOD__ .
+                                       ": magic header in $file recognized as 
$candidate\n" );
+                               return $candidate;
+                       }
+               }
+
+               # Match tails
+               foreach ( $tailsRegExps as $regExp => $candidate ) {
+                       if ( preg_match( $regExp, $tail ) ) {
+                               wfDebug( __METHOD__ .
+                                       ": $file tail recognized by regexp as 
$candidate\n" );
+                               return $candidate;
+                       }
+               }
+
+               # Match headers
+               foreach ( $headersRegExps as $regExp => $candidate ) {
+                       if ( preg_match( $regExp, $head ) ) {
+                               wfDebug( __METHOD__ .
+                                       ": $file head recognized by regexp as 
$candidate\n" );
+                               return $candidate;
+                       }
+               }
+
+               return false;
+       }
+
+       public static function onMimeMagicGuessFromContent( $mimeMagic, &$head, 
&$tail, $file, &$mime ) {
+               $mime = self::doGuessChemicalMime( $head, $tail, $file );
+               return true;
+       }
+}
diff --git a/MolHandler.php b/MolHandler.php
index 5cfc891..161c198 100644
--- a/MolHandler.php
+++ b/MolHandler.php
@@ -69,8 +69,15 @@
        }
 }
 
-# Require modules
-$wgAutoloadClasses['MolHandlerHooks'] = $wgMolHandlerDir . 
'MolHandlerHooks.php';
+# Register modules
+$wgAutoloadClasses['MolHandlerHooks'] = $wgMolHandlerDir . 
'MolHandler.hooks.php';
+$wgAutoloadClasses['MolHandlerMime']  = $wgMolHandlerDir . 
'MolHandler.mime.php';
 $wgAutoloadClasses['MolHandler']      = $wgMolHandlerDir . 
'MolHandler_body.php';
 $wgAutoloadClasses['MolMediaHandler'] = $wgMolHandlerDir . 
'MolMediaHandler.php';
 $wgAutoloadClasses['RxnMediaHandler'] = $wgMolHandlerDir . 
'RxnMediaHandler.php';
+
+# Hook-up MIME type detection
+$wgHooks['MimeMagicCustomInfo'][]           = 
'MolHandlerMime::onMimeMagicCustomInfo';
+$wgHooks['MimeMagicCustomTypes'][]          = 
'MolHandlerMime::onMimeMagicCustomTypes';
+$wgHooks['MimeMagicImproveFromExtension'][] = 
'MolHandlerMime::onMimeMagicImproveFromExtension';
+$wgHooks['MimeMagicGuessFromContent'][]     = 
'MolHandlerMime::onMimeMagicGuessFromContent';

-- 
To view, visit https://gerrit.wikimedia.org/r/141241
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5f7a56ccfbf7739e8cdbfa3b547e8f36653e6d86
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MolHandler
Gerrit-Branch: master
Gerrit-Owner: Rillke <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to