Addshore has submitted this change and it was merged.

Change subject: More strict parsing of DD.MM.YYYY dates
......................................................................


More strict parsing of DD.MM.YYYY dates

See the tests.

Change-Id: Ic606e500949f81b1d15c4c621b9bba162b947814
---
M lib/includes/parsers/DateTimeParser.php
M lib/tests/phpunit/parsers/DateTimeParserTest.php
2 files changed, 16 insertions(+), 18 deletions(-)

Approvals:
  WikidataJenkins: Verified
  Addshore: Looks good to me, approved
  jenkins-bot: Checked



diff --git a/lib/includes/parsers/DateTimeParser.php 
b/lib/includes/parsers/DateTimeParser.php
index faacb6a..cd53f9f 100644
--- a/lib/includes/parsers/DateTimeParser.php
+++ b/lib/includes/parsers/DateTimeParser.php
@@ -61,20 +61,17 @@
                // Place to put large years when they are found
                $largeYear = null;
 
-               try{
+               try {
                        list( $sign, $value ) = $this->eraParser->parse( $value 
);
 
-                       $value = $this->getValueWithFixedYearLengths(
-                               $this->getValueWithFixedSeparators(
-                                       $this->monthUnlocalizer->unlocalize( 
trim( $value ) )
-                               )
-                       );
-
                        $value = trim( $value );
+                       $value = $this->monthUnlocalizer->unlocalize( $value );
+                       $value = $this->getValueWithFixedSeparators( $value );
+                       $value = $this->getValueWithFixedYearLengths( $value );
 
                        // PHP's DateTime object also can't handle larger than 
4 digit years,
                        // e.g. 1 June 202020
-                       if( preg_match( '/^(.*\D)?(\d{5,})(.*)$/', $value, 
$matches ) ) {
+                       if ( preg_match( '/^(.*\D)?(\d{5,})(.*)$/', $value, 
$matches ) ) {
                                $value = $matches[1] . substr( $matches[2], -4 
) . $matches[3];
                                $largeYear = $matches[2];
                        }
@@ -83,7 +80,7 @@
 
                        // Parse using the DateTime object (this will allow us 
to format the date in a nicer way)
                        $dateTime = new DateTime( $value );
-                       if( $largeYear === null ) {
+                       if ( $largeYear === null ) {
                                $timeString = $sign . $dateTime->format( 
'Y-m-d\TH:i:s\Z' );
                        } else {
                                $timeString = $sign . $largeYear . 
$dateTime->format( '-m-d\TH:i:s\Z' );
@@ -92,8 +89,7 @@
                        // Pass the reformatted string into a base parser that 
parses this +/-Y-m-d\TH:i:s\Z format with a precision
                        $valueParser = new \ValueParsers\TimeParser( 
$calendarModelParser, $options );
                        return $valueParser->parse( $timeString );
-               }
-               catch( Exception $exception ) {
+               } catch ( Exception $exception ) {
                        throw new ParseException( $exception->getMessage(), 
$rawValue, self::FORMAT_NAME );
                }
        }
@@ -126,7 +122,7 @@
         * @return mixed
         */
        private function getValueWithFixedSeparators( $value ) {
-               return preg_replace( '/[\s.]+/', '.', $value );
+               return preg_replace( '/(?<=\d)[.\s]\s*/', '.', $value );
        }
 
        /**
@@ -138,22 +134,22 @@
         * @return string
         */
        private function getValueWithFixedYearLengths( $value ) {
-               if( preg_match( '/^(\d+)(\D)(\d+)(\D)(\d+)$/', $value, 
$dateParts ) ) {
-                       if( $dateParts[1] > 31 && $dateParts[5] <= 31 ) {
+               if ( preg_match( '/^(\d+)(\D)(\d+)(\D)(\d+)$/', $value, 
$dateParts ) ) {
+                       if ( $dateParts[1] > 31 && $dateParts[5] <= 31 ) {
                                // the year looks like it is at the front
-                               if( strlen( $dateParts[1] ) < 4 ) {
+                               if ( strlen( $dateParts[1] ) < 4 ) {
                                        $value = str_pad( $dateParts[1], 4, 
'0', STR_PAD_LEFT )
                                                . $dateParts[2] . $dateParts[3] 
. $dateParts[4] . $dateParts[5];
                                }
                        } else {
                                // presume the year is at the back
-                               if( strlen( $dateParts[5] ) < 4 ) {
+                               if ( strlen( $dateParts[5] ) < 4 ) {
                                        $value = $dateParts[1] . $dateParts[2] 
. $dateParts[3] . $dateParts[4]
                                                . str_pad( $dateParts[5], 4, 
'0', STR_PAD_LEFT );
                                }
                        }
                } else {
-                       if( preg_match( '/^(.*\D)(\d{1,3})$/', $value, $matches 
) ) {
+                       if ( preg_match( '/^(.*\D)(\d{1,3})$/', $value, 
$matches ) ) {
                                $value = $matches[1] . str_pad( $matches[2], 4, 
'0', STR_PAD_LEFT );
                        }
                }
diff --git a/lib/tests/phpunit/parsers/DateTimeParserTest.php 
b/lib/tests/phpunit/parsers/DateTimeParserTest.php
index 51ce8eb..51f7e7a 100644
--- a/lib/tests/phpunit/parsers/DateTimeParserTest.php
+++ b/lib/tests/phpunit/parsers/DateTimeParserTest.php
@@ -72,7 +72,7 @@
                                array( '+0000000000002010-10-10T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
                        '10.10.2010' =>
                                array( '+0000000000002010-10-10T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
-                       '10. 10. 2010' =>
+                       '  10.  10.  2010  ' =>
                                array( '+0000000000002010-10-10T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
                        '10 10 2010' =>
                                array( '+0000000000002010-10-10T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
@@ -174,6 +174,8 @@
                        // These are just wrong!
                        'June June June',
                        '111 111 111',
+                       '10  .10  .2010',
+                       '10...10...2010',
                        'Jann 2014',
                        '1980x',
                        '1980s', // supported by MWTimeIsoParser

-- 
To view, visit https://gerrit.wikimedia.org/r/156105
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic606e500949f81b1d15c4c621b9bba162b947814
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de>
Gerrit-Reviewer: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: WikidataJenkins <wikidata-servi...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to