Platonides has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/311878

Change subject: Allow the special string «NUL» to replace a literal byte \x00 
on equivset.in
......................................................................

Allow the special string «NUL» to replace a literal byte \x00 on equivset.in

A literal NUL in the file forbid storing it on MediaWiki and made
some text editors unable to edit the file (if not producing a truncation).

Change-Id: I56904fbf4c0d9ac3d4ca9562713dbf459d03b44c
---
M maintenance/equivset.in
M maintenance/generateEquivset.php
2 files changed, 31 insertions(+), 32 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/AntiSpoof 
refs/changes/78/311878/1

diff --git a/maintenance/equivset.in b/maintenance/equivset.in
index 055ed7d..e37d9a5 100644
--- a/maintenance/equivset.in
+++ b/maintenance/equivset.in
@@ -854,7 +854,7 @@
 20A6 ₦ => 4E N
 20A7 ₧ => 50 P
 20A9 ₩ => 57 W
-200B ​ => 0 
+200B ​ => 0 NUL
 2127 ℧ => 1B1 Ʊ
 2146 ⅆ => 44 D
 217C ⅼ => 4C L
diff --git a/maintenance/generateEquivset.php b/maintenance/generateEquivset.php
index 31104d9..db40e43 100644
--- a/maintenance/generateEquivset.php
+++ b/maintenance/generateEquivset.php
@@ -65,7 +65,7 @@
                        ++$lineNum;
                        $mapToEmpty = false;
 
-                       # Whether the line ends with a null character
+                       # Whether the line ends with a nul character
                        $mapToEmpty = ( strpos( $line, "\0" ) === strlen( $line 
) - 2 );
 
                        $line = trim( $line );
@@ -85,37 +85,36 @@
                        }
                        $error = false;
 
-                       if ( $mapToEmpty ) {
+                       if ( codepointToUtf8( hexdec( $m['hexleft'] ) ) != 
$m['charleft'] ) {
+                               $actual = utf8ToCodepoint( $m['charleft'] );
+                               if ( $actual === false ) {
+                                       $this->output( "Bytes: " . strlen( 
$m['charleft'] ) . "\n" );
+                                       $this->output( bin2hex( $line ) . "\n" 
);
+                                       $hexForm = bin2hex( $m['charleft'] );
+                                       $this->output( "Invalid UTF-8 character 
\"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n" );
+                               } else {
+                                       $this->output( "Error: left number 
({$m['hexleft']}) does not match left character ($actual) " .
+                                                       "at line $lineNum: 
$line\n" );
+                               }
+                               $error = true;
+                       }
+                       if ( !empty( $m['hexright'] ) && codepointToUtf8( 
hexdec( $m['hexright'] ) ) != $m['charright'] ) {
+                               $actual = utf8ToCodepoint( $m['charright'] );
+                               if ( $actual === false ) {
+                                       $hexForm = bin2hex( $m['charright'] );
+                                       $this->output( "Invalid UTF-8 character 
\"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n" );
+                               } else {
+                                       $this->output( "Error: right number 
({$m['hexright']}) does not match right character ($actual) " .
+                                                       "at line $lineNum: 
$line\n" );
+                               }
+                               $error = true;
+                       }
+                       if ( $error ) {
+                               $exitStatus = 1;
+                               continue;
+                       }
+                       if ( $mapToEmpty || $m['charright'] == 'NUL' ) {
                                $m['charright'] = '';
-                       } else {
-                               if ( codepointToUtf8( hexdec( $m['hexleft'] ) ) 
!= $m['charleft'] ) {
-                                       $actual = utf8ToCodepoint( 
$m['charleft'] );
-                                       if ( $actual === false ) {
-                                               $this->output( "Bytes: " . 
strlen( $m['charleft'] ) . "\n" );
-                                               $this->output( bin2hex( $line ) 
. "\n" );
-                                               $hexForm = bin2hex( 
$m['charleft'] );
-                                               $this->output( "Invalid UTF-8 
character \"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n" );
-                                       } else {
-                                               $this->output( "Error: left 
number ({$m['hexleft']}) does not match left character ($actual) " .
-                                                               "at line 
$lineNum: $line\n" );
-                                       }
-                                       $error = true;
-                               }
-                               if ( !empty( $m['hexright'] ) && 
codepointToUtf8( hexdec( $m['hexright'] ) ) != $m['charright'] ) {
-                                       $actual = utf8ToCodepoint( 
$m['charright'] );
-                                       if ( $actual === false ) {
-                                               $hexForm = bin2hex( 
$m['charright'] );
-                                               $this->output( "Invalid UTF-8 
character \"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n" );
-                                       } else {
-                                               $this->output( "Error: right 
number ({$m['hexright']}) does not match right character ($actual) " .
-                                                               "at line 
$lineNum: $line\n" );
-                                       }
-                                       $error = true;
-                               }
-                               if ( $error ) {
-                                       $exitStatus = 1;
-                                       continue;
-                               }
                        }
 
                        # Find the set for the right character, add a new one 
if necessary

-- 
To view, visit https://gerrit.wikimedia.org/r/311878
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I56904fbf4c0d9ac3d4ca9562713dbf459d03b44c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/AntiSpoof
Gerrit-Branch: master
Gerrit-Owner: Platonides <platoni...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to