Legoktm has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/323363

Change subject: De-duplicate errors and trim excessive errors in the same 
category
......................................................................

De-duplicate errors and trim excessive errors in the same category

It's possible to have duplicate, identical lint errors if the same exact
error is repeated in a template transclusion (e.g. {{1x|<b/> <b/>}})
since the position via dsr is the same. In this case, just de-duplicate
the errors since we can't differentiate them.

At the same time, trim excessive errors on the same page in the same
category. It's most likely that if a page has that many of the same
errors, the editor or bot will just fix all of them at the same time, so
we don't need to include all of them in the database. 20 is kind of a
low value, but we can always increase it later on as necessary.

Change-Id: I9cded720169870d0eea574e1a930ce4e9b190ac0
---
M includes/Database.php
M includes/RecordLintJob.php
2 files changed, 25 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Linter 
refs/changes/63/323363/1

diff --git a/includes/Database.php b/includes/Database.php
index a9ee972..1c2e124 100644
--- a/includes/Database.php
+++ b/includes/Database.php
@@ -26,6 +26,13 @@
  * Database logic
  */
 class Database {
+
+       /**
+        * Maximum number of errors to save per category,
+        * for a page, the rest are just dropped
+        */
+       const MAX_PER_CAT = 20;
+
        /**
         * @var int
         */
diff --git a/includes/RecordLintJob.php b/includes/RecordLintJob.php
index 01e2651..ff5ca50 100644
--- a/includes/RecordLintJob.php
+++ b/includes/RecordLintJob.php
@@ -39,15 +39,28 @@
                        return true;
                }
 
+               // [ 'category' => [ 'id' => LintError ] ]
                $errors = [];
-               foreach ( $this->params['errors'] as $error ) {
-                       $errors[] = new LintError(
-                               $error['type'],
-                               $error['params']
+               foreach ( $this->params['errors'] as $errorInfo ) {
+                       $error = new LintError(
+                               $errorInfo['type'],
+                               $errorInfo['params']
                        );
+                       // Use unique id as key to get rid of exact dupes
+                       // (e.g. same category of error in same template)
+                       $errors[$error->category][$error->id()] = $error;
                }
                $lintDb = new Database( $this->title->getArticleID() );
-               $lintDb->setForPage( $errors );
+               $toSet = [];
+               foreach ( $errors as $category => $catErrors ) {
+                       // If there are too many errors for a category, trim 
some of them.
+                       if ( count( $catErrors ) > $lintDb::MAX_PER_CAT ) {
+                               $catErrors = array_slice( $catErrors, 0, 
$lintDb::MAX_PER_CAT );
+                       }
+                       $toSet = array_merge( $toSet, $catErrors );
+               }
+
+               $lintDb->setForPage( $toSet );
                return true;
        }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/323363
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9cded720169870d0eea574e1a930ce4e9b190ac0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Linter
Gerrit-Branch: master
Gerrit-Owner: Legoktm <lego...@member.fsf.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to