Lucas Werkmeister (WMDE) has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/366883 )
Change subject: Detect overlong constraint parameters on DB insert ...................................................................... Detect overlong constraint parameters on DB insert If a constraint has too many parameters, they can be too long to store in the database. On Wikidata, this happens for a few properties with very long exception lists, e. g. taxon name [1]. We can detect this case, but since constraints are typically imported from the constraintsTableUpdate job, we can’t directly report it to the user. Instead, we record it in the parameters as a special "@error" parameter. A second commit (Ia11092746c) teaches the rest of the extension to properly report this error when a constraint check happens. The error detection has to work both when the database is in strict mode – e. g. on a local development wiki: in this case the insert throws an exception – and when it isn’t and the value is silently truncated, as is the case on Wikimedia systems (T108255). In the first case, we can check if the exception message indicates truncation; in the second, we can directly check if the value stored in the database is a truncated version of the real value, and replace it with the error indicator. [1]: https://www.wikidata.org/w/index.php?title=Property:P225&oldid=524136584 Bug: T171295 Change-Id: Ica42a3c0e6af6111b23c6a2e986db44d5ff51294 --- M includes/ConstraintRepository.php M tests/phpunit/ConstraintRepositoryTest.php 2 files changed, 148 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikibaseQualityConstraints refs/changes/83/366883/1 diff --git a/includes/ConstraintRepository.php b/includes/ConstraintRepository.php index 497e5c1..35edd4f 100644 --- a/includes/ConstraintRepository.php +++ b/includes/ConstraintRepository.php @@ -4,6 +4,8 @@ use InvalidArgumentException; use MediaWiki\Logger\LoggerFactory; +use Wikimedia\Rdbms\Database; +use Wikimedia\Rdbms\DBQueryError; use Wikimedia\Rdbms\DBUnexpectedError; use Wikimedia\Rdbms\LikeMatch; use Wikibase\DataModel\Entity\PropertyId; @@ -37,23 +39,119 @@ * @param Constraint[] $constraints * * @throws DBUnexpectedError - * @return bool */ public function insertBatch( array $constraints ) { + $constraintParametersByConstraintId = []; $accumulator = array_map( - function ( Constraint $constraint ) { + function ( Constraint $constraint ) use ( &$constraintParametersByConstraintId ) { + $constraintParameters = json_encode( $constraint->getConstraintParameters(), JSON_FORCE_OBJECT ); + $constraintParametersByConstraintId[$constraint->getConstraintId()] = $constraintParameters; return [ 'constraint_guid' => $constraint->getConstraintId(), 'pid' => $constraint->getPropertyId()->getNumericId(), 'constraint_type_qid' => $constraint->getConstraintTypeItemId(), - 'constraint_parameters' => json_encode( $constraint->getConstraintParameters(), JSON_FORCE_OBJECT ) + 'constraint_parameters' => $constraintParameters ]; }, $constraints ); $db = wfGetDB( DB_MASTER ); - return $db->insert( CONSTRAINT_TABLE, $accumulator ); + try { + $db->insert( CONSTRAINT_TABLE, $accumulator ); + } catch ( DBQueryError $e ) { + $this->handleInsertBatchQueryError( $constraints, $db, $e ); + } + + $this->checkInsertedBatch( $db, $constraintParametersByConstraintId ); + } + + /** + * Handle a query error that occurred in insertBatch(). + * Since we cannot do any useful error reporting in insertBatch + * (it is usually called from a job, so no user-facing output), + * we record the error in the constraint parameters. + * ConstraintParameterParser can then later read it from there and report it. + * + * @param Constraint[] $constraints + * @param Database $db + * @param DBQueryError $e + */ + private function handleInsertBatchQueryError( array $constraints, Database $db, DBQueryError $e ) { + // in batch mode, we have no idea which constraint the error affects, so break it down + $constraintCount = count( $constraints ); + if ( $constraintCount !== 1 ) { + foreach ( array_chunk( $constraints, $constraintCount / 2 ) as $half ) { + $this->insertBatch( $half ); + } + } else { + // record the error in the constraint parameters so it can be reported later + $constraint = $constraints[0]; + $error = []; + if ( strpos( $e->getMessage(), "Data too long for column 'constraint_parameters'" ) !== false ) { + $error['truncated'] = true; + } + $db->insert( CONSTRAINT_TABLE, [ + 'constraint_guid' => $constraint->getConstraintId(), + 'pid' => $constraint->getPropertyId()->getNumericId(), + 'constraint_type_qid' => $constraint->getConstraintTypeItemId(), + 'constraint_parameters' => json_encode( [ '@error' => $error ], JSON_FORCE_OBJECT ) + ] ); + } + } + + /** + * Check that constraints were correctly inserted into the database. + * If they were not, record an error in the constraint parameters + * just like handleInsertBatchQueryError(). + * + * Due to T108255, it’s possible that the constraint parameters were silently truncated + * without an error being thrown that handleInsertBatchQueryError could handle. + * + * @param Database $db + * @param string[] $constraintParametersByConstraintId Associative array from constraint ID to constraint parameters (JSON string) + * + * @throws DBUnexpectedError + */ + private function checkInsertedBatch( Database $db, array $constraintParametersByConstraintId ) { + $constraintParametersFromDb = $db->select( + CONSTRAINT_TABLE, + [ 'constraint_guid', 'constraint_parameters' ], + [ 'constraint_guid' => array_keys( $constraintParametersByConstraintId ) ] + ); + + foreach ( $constraintParametersFromDb as $row ) { + $dbConstraintId = $row->constraint_guid; + $dbConstraintParameters = $row->constraint_parameters; + $constraintParameters = $constraintParametersByConstraintId[$dbConstraintId]; + + if ( $dbConstraintParameters !== $constraintParameters ) { + // constraint parameters were not inserted correctly + $error = []; + $decodedDbConstraintParameters = json_decode( $dbConstraintParameters, true ); + + if ( + $decodedDbConstraintParameters !== null && + array_key_exists( '@error', $decodedDbConstraintParameters ) + ) { + // error already detected in insertBatch, copy error object + $error = $decodedDbConstraintParameters['@error']; + } elseif ( + strpos( $constraintParameters, $dbConstraintParameters ) === 0 + ) { + // database content begins with correct constraint parameters, was truncated + $error['truncated'] = true; + } + + // discard the original parameters and record an error object instead + // (ConstraintParameterParser can then report this as a parameter error later) + $db->update( + CONSTRAINT_TABLE, + [ 'constraint_parameters' => json_encode( [ '@error' => $error ], JSON_FORCE_OBJECT ) ], + [ 'constraint_guid' => $dbConstraintId ] + ); + } + } } /** diff --git a/tests/phpunit/ConstraintRepositoryTest.php b/tests/phpunit/ConstraintRepositoryTest.php index 065211e..5c02e43 100644 --- a/tests/phpunit/ConstraintRepositoryTest.php +++ b/tests/phpunit/ConstraintRepositoryTest.php @@ -94,6 +94,52 @@ ); } + public function testInsertBatchTooLongParameters() { + $this->db->delete( CONSTRAINT_TABLE, '*' ); + + $constraintParameters = [ 'known_exception' => [] ]; + for ( $i = 0; $i < 10000; $i++ ) { + $constraintParameters['known_exception'][] = [ + 'snaktype' => 'value', 'property' => 'P2303', + 'hash' => '1deb3a3ba50cbbf2672b0def6c9e96bcf3f533e5', 'datavalue' => [ + 'value' => [ + 'entity-type' => 'item', 'numeric-id' => 2961108, 'id' => 'Q2961108' + ], + 'type' => 'wikibase-entityid' + ], + ]; + } + + $repo = new ConstraintRepository(); + $repo->insertBatch( [ + new Constraint( + 'P1$13510cdc-0f91-4ea3-b71d-db2a33c27dff', + new PropertyId( 'P1' ), + 'Q1', + $constraintParameters + ) + ] ); + + $this->assertSelect( + CONSTRAINT_TABLE, + [ + 'constraint_guid', + 'pid', + 'constraint_type_qid', + 'constraint_parameters' + ], + [], + [ + [ + 'P1$13510cdc-0f91-4ea3-b71d-db2a33c27dff', + '1', + 'Q1', + '{"@error":{"truncated":true}}' + ] + ] + ); + } + public function testDeleteAll() { $this->insertTestData(); -- To view, visit https://gerrit.wikimedia.org/r/366883 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ica42a3c0e6af6111b23c6a2e986db44d5ff51294 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/WikibaseQualityConstraints Gerrit-Branch: master Gerrit-Owner: Lucas Werkmeister (WMDE) <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
