Addshore has submitted this change and it was merged. Change subject: More strict parsing of DD.MM.YYYY dates ......................................................................
More strict parsing of DD.MM.YYYY dates See the tests. Change-Id: Ic606e500949f81b1d15c4c621b9bba162b947814 --- M lib/includes/parsers/DateTimeParser.php M lib/tests/phpunit/parsers/DateTimeParserTest.php 2 files changed, 16 insertions(+), 18 deletions(-) Approvals: WikidataJenkins: Verified Addshore: Looks good to me, approved jenkins-bot: Checked diff --git a/lib/includes/parsers/DateTimeParser.php b/lib/includes/parsers/DateTimeParser.php index faacb6a..cd53f9f 100644 --- a/lib/includes/parsers/DateTimeParser.php +++ b/lib/includes/parsers/DateTimeParser.php @@ -61,20 +61,17 @@ // Place to put large years when they are found $largeYear = null; - try{ + try { list( $sign, $value ) = $this->eraParser->parse( $value ); - $value = $this->getValueWithFixedYearLengths( - $this->getValueWithFixedSeparators( - $this->monthUnlocalizer->unlocalize( trim( $value ) ) - ) - ); - $value = trim( $value ); + $value = $this->monthUnlocalizer->unlocalize( $value ); + $value = $this->getValueWithFixedSeparators( $value ); + $value = $this->getValueWithFixedYearLengths( $value ); // PHP's DateTime object also can't handle larger than 4 digit years, // e.g. 1 June 202020 - if( preg_match( '/^(.*\D)?(\d{5,})(.*)$/', $value, $matches ) ) { + if ( preg_match( '/^(.*\D)?(\d{5,})(.*)$/', $value, $matches ) ) { $value = $matches[1] . substr( $matches[2], -4 ) . $matches[3]; $largeYear = $matches[2]; } @@ -83,7 +80,7 @@ // Parse using the DateTime object (this will allow us to format the date in a nicer way) $dateTime = new DateTime( $value ); - if( $largeYear === null ) { + if ( $largeYear === null ) { $timeString = $sign . $dateTime->format( 'Y-m-d\TH:i:s\Z' ); } else { $timeString = $sign . $largeYear . $dateTime->format( '-m-d\TH:i:s\Z' ); @@ -92,8 +89,7 @@ // Pass the reformatted string into a base parser that parses this +/-Y-m-d\TH:i:s\Z format with a precision $valueParser = new \ValueParsers\TimeParser( $calendarModelParser, $options ); return $valueParser->parse( $timeString ); - } - catch( Exception $exception ) { + } catch ( Exception $exception ) { throw new ParseException( $exception->getMessage(), $rawValue, self::FORMAT_NAME ); } } @@ -126,7 +122,7 @@ * @return mixed */ private function getValueWithFixedSeparators( $value ) { - return preg_replace( '/[\s.]+/', '.', $value ); + return preg_replace( '/(?<=\d)[.\s]\s*/', '.', $value ); } /** @@ -138,22 +134,22 @@ * @return string */ private function getValueWithFixedYearLengths( $value ) { - if( preg_match( '/^(\d+)(\D)(\d+)(\D)(\d+)$/', $value, $dateParts ) ) { - if( $dateParts[1] > 31 && $dateParts[5] <= 31 ) { + if ( preg_match( '/^(\d+)(\D)(\d+)(\D)(\d+)$/', $value, $dateParts ) ) { + if ( $dateParts[1] > 31 && $dateParts[5] <= 31 ) { // the year looks like it is at the front - if( strlen( $dateParts[1] ) < 4 ) { + if ( strlen( $dateParts[1] ) < 4 ) { $value = str_pad( $dateParts[1], 4, '0', STR_PAD_LEFT ) . $dateParts[2] . $dateParts[3] . $dateParts[4] . $dateParts[5]; } } else { // presume the year is at the back - if( strlen( $dateParts[5] ) < 4 ) { + if ( strlen( $dateParts[5] ) < 4 ) { $value = $dateParts[1] . $dateParts[2] . $dateParts[3] . $dateParts[4] . str_pad( $dateParts[5], 4, '0', STR_PAD_LEFT ); } } } else { - if( preg_match( '/^(.*\D)(\d{1,3})$/', $value, $matches ) ) { + if ( preg_match( '/^(.*\D)(\d{1,3})$/', $value, $matches ) ) { $value = $matches[1] . str_pad( $matches[2], 4, '0', STR_PAD_LEFT ); } } diff --git a/lib/tests/phpunit/parsers/DateTimeParserTest.php b/lib/tests/phpunit/parsers/DateTimeParserTest.php index 51ce8eb..51f7e7a 100644 --- a/lib/tests/phpunit/parsers/DateTimeParserTest.php +++ b/lib/tests/phpunit/parsers/DateTimeParserTest.php @@ -72,7 +72,7 @@ array( '+0000000000002010-10-10T00:00:00Z', 0 , 0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ), '10.10.2010' => array( '+0000000000002010-10-10T00:00:00Z', 0 , 0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ), - '10. 10. 2010' => + ' 10. 10. 2010 ' => array( '+0000000000002010-10-10T00:00:00Z', 0 , 0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ), '10 10 2010' => array( '+0000000000002010-10-10T00:00:00Z', 0 , 0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ), @@ -174,6 +174,8 @@ // These are just wrong! 'June June June', '111 111 111', + '10 .10 .2010', + '10...10...2010', 'Jann 2014', '1980x', '1980s', // supported by MWTimeIsoParser -- To view, visit https://gerrit.wikimedia.org/r/156105 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic606e500949f81b1d15c4c621b9bba162b947814 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> Gerrit-Reviewer: Addshore <addshorew...@gmail.com> Gerrit-Reviewer: WikidataJenkins <wikidata-servi...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits