jenkins-bot has submitted this change and it was merged.

Change subject: Add basic normalization for titles => keys
......................................................................


Add basic normalization for titles => keys

Bug: T146879
Change-Id: I7c8b8b230d8d345dbcaf34d50262a23a01e777f1
---
M extension.json
M src/CognateStore.php
M src/ServiceWiring.php
A src/StringNormalizer.php
M tests/phpunit/CognateStoreTest.php
A tests/phpunit/StringNormalizerTest.php
6 files changed, 74 insertions(+), 6 deletions(-)

Approvals:
  WMDE-leszek: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/extension.json b/extension.json
index 77d8e0c..ec92b36 100644
--- a/extension.json
+++ b/extension.json
@@ -11,6 +11,7 @@
   "AutoloadClasses": {
     "CognateHooks": "src/CognateHooks.php",
     "CognateStore": "src/CognateStore.php",
+    "StringNormalizer": "src/StringNormalizer.php",
     "CognatePageHookHandler": "src/hooks/CognatePageHookHandler.php"
   },
   "config": {
diff --git a/src/CognateStore.php b/src/CognateStore.php
index 53e16e0..1dffe48 100644
--- a/src/CognateStore.php
+++ b/src/CognateStore.php
@@ -14,13 +14,21 @@
         */
        private $loadBalancer;
 
+       /**
+        * @var StringNormalizer
+        */
+       private $stringNormalizer;
+
        const TITLES_TABLE_NAME = 'cognate_titles';
+
 
        /**
         * @param ILoadBalancer $loadBalancer
+        * @param StringNormalizer $stringNormalizer
         */
-       public function __construct( ILoadBalancer $loadBalancer ) {
+       public function __construct( ILoadBalancer $loadBalancer, 
StringNormalizer $stringNormalizer ) {
                $this->loadBalancer = $loadBalancer;
+               $this->stringNormalizer = $stringNormalizer;
        }
 
        /**
@@ -34,7 +42,7 @@
                        'cgti_site' => $siteLinkPrefix,
                        'cgti_title' => $linkTarget->getDBkey(),
                        'cgti_namespace' => $linkTarget->getNamespace(),
-                       'cgti_key' => $linkTarget->getDBkey(),// TODO normalize
+                       'cgti_key' => $this->stringNormalizer->normalize( 
$linkTarget->getDBkey() ),
                ];
                $dbw = $this->loadBalancer->getConnectionRef( DB_MASTER );
                $result = $dbw->insert( self::TITLES_TABLE_NAME, $pageData, 
__METHOD__, [ 'IGNORE' ] );
@@ -72,7 +80,7 @@
                        [ 'cgti_site' ],
                        [
                                'cgti_site != ' . $dbr->addQuotes( 
$siteLinkPrefix ),
-                               'cgti_key' => $linkTarget->getDBkey(),// TODO 
normalize
+                               'cgti_key' => 
$this->stringNormalizer->normalize( $linkTarget->getDBkey() ),
                                'cgti_namespace' => $linkTarget->getNamespace(),
                        ]
                );
diff --git a/src/ServiceWiring.php b/src/ServiceWiring.php
index d4c2ba2..e532368 100644
--- a/src/ServiceWiring.php
+++ b/src/ServiceWiring.php
@@ -15,7 +15,10 @@
                } else {
                        $lb = $lbFactory->getMainLB( $cognateDb );
                }
-               return new CognateStore( $lb );
+               return new CognateStore(
+                       $lb,
+                       new StringNormalizer()
+               );
        },
 
        'CognatePageHookHandler' => function( MediaWikiServices $services ) {
diff --git a/src/StringNormalizer.php b/src/StringNormalizer.php
new file mode 100644
index 0000000..a816892
--- /dev/null
+++ b/src/StringNormalizer.php
@@ -0,0 +1,27 @@
+<?php
+
+/**
+ * @license GNU GPL v2+
+ * @author Addshore
+ */
+class StringNormalizer {
+
+       private $replacements = [
+               '’' => '\'',
+               '…' => '...',
+               '_' => ' ',
+       ];
+
+       /**
+        * @param string $string
+        *
+        * @return mixed
+        */
+       public function normalize( $string ) {
+               foreach ( $this->replacements as $find => $replacement ) {
+                       $string = str_replace( $find, $replacement, $string );
+               }
+               return $string;
+       }
+
+}
diff --git a/tests/phpunit/CognateStoreTest.php 
b/tests/phpunit/CognateStoreTest.php
index f585ebd..59ecdf6 100644
--- a/tests/phpunit/CognateStoreTest.php
+++ b/tests/phpunit/CognateStoreTest.php
@@ -24,7 +24,7 @@
                        'cognate_titles',
                        [ 'cgti_site', 'cgti_title', 'cgti_key', 
'cgti_namespace' ],
                        [ 'cgti_title != "UTPage"' ],
-                       [ [ 'en', 'My_test_page',  'My_test_page', 0 ] ]
+                       [ [ 'en', 'My_test_page',  'My test page', 0 ] ]
                );
        }
 
@@ -35,7 +35,7 @@
                        'cognate_titles',
                        [ 'cgti_site', 'cgti_title', 'cgti_key', 
'cgti_namespace' ],
                        [ 'cgti_title != "UTPage"' ],
-                       [ [ 'en', 'My_second_test_page',  
'My_second_test_page', 0 ] ]
+                       [ [ 'en', 'My_second_test_page',  'My second test 
page', 0 ] ]
                );
        }
 
diff --git a/tests/phpunit/StringNormalizerTest.php 
b/tests/phpunit/StringNormalizerTest.php
new file mode 100644
index 0000000..30214ff
--- /dev/null
+++ b/tests/phpunit/StringNormalizerTest.php
@@ -0,0 +1,29 @@
+<?php
+
+/**
+ * @license GNU GPL v2+
+ * @author Addshore
+ */
+class StringNormalizerTest extends MediaWikiTestCase {
+
+       public function provideNormalizations() {
+               return [
+                       [ 'JustAString', 'JustAString' ],
+                       [ 'Foo_bar', 'Foo bar' ],
+                       [ 'Apostrophe’', 'Apostrophe\'' ],
+                       [ 'ellipsis…', 'ellipsis...' ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideNormalizations
+        */
+       public function testGoodNormalizations( $inputOne, $inputTwo ) {
+               $normalizer = new StringNormalizer();
+
+               $one = $normalizer->normalize( $inputOne );
+
+               $this->assertEquals( $one, $inputTwo );
+       }
+
+}
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/313003
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7c8b8b230d8d345dbcaf34d50262a23a01e777f1
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/Cognate
Gerrit-Branch: master
Gerrit-Owner: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Gabriel Birke <gabriel.bi...@wikimedia.de>
Gerrit-Reviewer: Legoktm <legoktm.wikipe...@gmail.com>
Gerrit-Reviewer: Tobias Gritschacher <tobias.gritschac...@wikimedia.de>
Gerrit-Reviewer: WMDE-Fisch <christoph.jau...@wikimedia.de>
Gerrit-Reviewer: WMDE-leszek <leszek.mani...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to