[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user barrotsteindev closed the pull request at: https://github.com/apache/lucene-solr/pull/435 --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user barrotsteindev commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209660011 --- Diff: solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java --- @@ -159,8 +170,9 @@ public void init(NamedList args) { Collection formatsParam = args.removeConfigArgs(FORMATS_PARAM); if (null != formatsParam) { for (String value : formatsParam) { -DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseCaseInsensitive() - .appendPattern(value).toFormatter(locale).withZone(defaultTimeZone); +DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseLenient().parseCaseInsensitive() --- End diff -- Less work is always welcome ð --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user barrotsteindev commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209462345 --- Diff: solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java --- @@ -896,13 +899,125 @@ public void testCascadingParsers() throws Exception { assertTrue(mixedDates.isEmpty()); } - private Date parse(DateTimeFormatter dateTimeFormatter, String dateString) { + // TestsExtractionDateUtil that mimic TestExtractionDateUtil + public void testISO8601() throws IOException { +// dates with atypical years +// This test tries to mimic TestExtractionDateUtil#testISO8601 + +String[] dateStrings = { +"0001-01-01T01:01:01Z", "+12021-12-01T03:03:03Z", +"-04-04T04:04:04Z", "-0005-05-05T05:05:05Z", +"-2021-12-01T04:04:04Z", "-12021-12-01T02:02:02Z" +}; + +int id = 1; + +// ensure strings are parsed +for(String notInFormatDateString: dateStrings) { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + SolrInputDocument d = processAdd("parse-date-extraction-util-formats", doc(f("id", id), f("date_dt", notInFormatDateString))); + assertNotNull(d); + assertTrue("Date string: " + notInFormatDateString + " was not parsed as a date", d.getFieldValue("date_dt") instanceof Date); + assertEquals(notInFormatDateString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString()); + assertU(commit()); + assertQ(req("id:" + id), "//date[@name='date_dt'][.='" + notInFormatDateString + "']"); + ++id; +} + +// odd values are date strings, even values are expected strings +String[] lenientDateStrings = { +"10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z", +"995-1-2T3:4:5Z", "0995-01-02T03:04:05Z", +"2021-01-01t03:04:05", "2021-01-01T03:04:05Z", +"2021-12-01 04:04:04", "2021-12-01T04:04:04Z" +}; + +// ensure sure strings that should be parsed using lenient resolver are properly parsed +for(int i = 0; i < lenientDateStrings.length; ++i) { + String lenientDateString = lenientDateStrings[i]; + String expectedString = lenientDateStrings[++i]; + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + SolrInputDocument d = processAdd("parse-date-extraction-util-formats", doc(f("id", id), f("date_dt", lenientDateString))); + assertNotNull(d); + assertTrue("Date string: " + lenientDateString + " was not parsed as a date", + d.getFieldValue("date_dt") instanceof Date); + assertEquals(expectedString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString()); + ++id; +} + } + + // this test has had problems when the JDK timezone is Americas/Metlakatla + public void testAKSTZone() throws IOException { +final String inputString = "Thu Nov 13 04:35:51 AKST 2008"; + +final long expectTs = 1226583351000L; +assertEquals(expectTs, +DateTimeFormatter.ofPattern("EEE MMM d HH:mm:ss z ", Locale.ENGLISH) +.withZone(ZoneId.of("UTC")).parse(inputString, Instant::from).toEpochMilli()); + +assertParsedDate(inputString, Date.from(Instant.ofEpochMilli(expectTs)), "parse-date-extraction-util-formats"); + } + + public void testNoTime() throws IOException { +Instant instant = instant(2005, 10, 7, 0, 0, 0); +String inputString = "2005-10-07"; +assertParsedDate(inputString, Date.from(instant), "parse-date-extraction-util-formats"); + } + + public void testRfc1123() throws IOException { +assertParsedDate("Fri, 07 Oct 2005 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-extraction-util-formats"); + } + + public void testRfc1036() throws IOException { +assertParsedDate("Friday, 07-Oct-05 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-extraction-util-formats"); + } + + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12593;) --- End diff -- ran the tests with that particular flag and it seems to pass, so I removed the annotation. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209458462 --- Diff: solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java --- @@ -159,8 +170,9 @@ public void init(NamedList args) { Collection formatsParam = args.removeConfigArgs(FORMATS_PARAM); if (null != formatsParam) { for (String value : formatsParam) { -DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseCaseInsensitive() - .appendPattern(value).toFormatter(locale).withZone(defaultTimeZone); +DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseLenient().parseCaseInsensitive() --- End diff -- nah; it's fine. Less to document :-) --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user barrotsteindev commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209446062 --- Diff: solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java --- @@ -159,8 +170,9 @@ public void init(NamedList args) { Collection formatsParam = args.removeConfigArgs(FORMATS_PARAM); if (null != formatsParam) { for (String value : formatsParam) { -DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseCaseInsensitive() - .appendPattern(value).toFormatter(locale).withZone(defaultTimeZone); +DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseLenient().parseCaseInsensitive() --- End diff -- I guess it can be added as an option, should I work on it? --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209442153 --- Diff: solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java --- @@ -120,6 +121,16 @@ public UpdateRequestProcessor getInstance(SolrQueryRequest req, protected Object mutateValue(Object srcVal) { if (srcVal instanceof CharSequence) { String srcStringVal = srcVal.toString(); + // trim single quotes around date if present --- End diff -- Oh yeah. It's unfortunate such ugly data comes in as such... but it's at least cheap & safe to do this so I guess it's okay. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209442225 --- Diff: solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java --- @@ -159,8 +170,9 @@ public void init(NamedList args) { Collection formatsParam = args.removeConfigArgs(FORMATS_PARAM); if (null != formatsParam) { for (String value : formatsParam) { -DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseCaseInsensitive() - .appendPattern(value).toFormatter(locale).withZone(defaultTimeZone); +DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseLenient().parseCaseInsensitive() --- End diff -- I see you're hard-coding leniency and not making it an option. It's fine, I think. I'm not sure why someone would expressly want it to be strict (would want an error), not for an application of parsing like we see here. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209442197 --- Diff: solr/core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml --- @@ -109,6 +109,21 @@ + --- End diff -- Soon there will be no such "date extraction util" so I suggest another name. Hmmm. Perhaps "parse-date-patterns-from-extract-contrib". Yeah it's long but no big deal. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209442210 --- Diff: solr/core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml --- @@ -109,6 +109,21 @@ + + + UTC + en + +-MM-dd['T'[HH:mm:ss['.'SSS]['Z' --- End diff -- After working with you on the previous issue, I now know that `'Z'` isn't appropriate if the defaultTimeZone is configurable, since as-such it's a literal that is ignored -- and the defaultTimeZone would take effect which might not be UTC. We know 'z' is right now. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209442336 --- Diff: solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java --- @@ -896,13 +899,125 @@ public void testCascadingParsers() throws Exception { assertTrue(mixedDates.isEmpty()); } - private Date parse(DateTimeFormatter dateTimeFormatter, String dateString) { + // TestsExtractionDateUtil that mimic TestExtractionDateUtil + public void testISO8601() throws IOException { +// dates with atypical years +// This test tries to mimic TestExtractionDateUtil#testISO8601 + +String[] dateStrings = { +"0001-01-01T01:01:01Z", "+12021-12-01T03:03:03Z", +"-04-04T04:04:04Z", "-0005-05-05T05:05:05Z", +"-2021-12-01T04:04:04Z", "-12021-12-01T02:02:02Z" +}; + +int id = 1; + +// ensure strings are parsed +for(String notInFormatDateString: dateStrings) { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + SolrInputDocument d = processAdd("parse-date-extraction-util-formats", doc(f("id", id), f("date_dt", notInFormatDateString))); + assertNotNull(d); + assertTrue("Date string: " + notInFormatDateString + " was not parsed as a date", d.getFieldValue("date_dt") instanceof Date); + assertEquals(notInFormatDateString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString()); + assertU(commit()); + assertQ(req("id:" + id), "//date[@name='date_dt'][.='" + notInFormatDateString + "']"); + ++id; +} + +// odd values are date strings, even values are expected strings +String[] lenientDateStrings = { +"10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z", +"995-1-2T3:4:5Z", "0995-01-02T03:04:05Z", +"2021-01-01t03:04:05", "2021-01-01T03:04:05Z", +"2021-12-01 04:04:04", "2021-12-01T04:04:04Z" +}; + +// ensure sure strings that should be parsed using lenient resolver are properly parsed +for(int i = 0; i < lenientDateStrings.length; ++i) { + String lenientDateString = lenientDateStrings[i]; + String expectedString = lenientDateStrings[++i]; + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + SolrInputDocument d = processAdd("parse-date-extraction-util-formats", doc(f("id", id), f("date_dt", lenientDateString))); + assertNotNull(d); + assertTrue("Date string: " + lenientDateString + " was not parsed as a date", + d.getFieldValue("date_dt") instanceof Date); + assertEquals(expectedString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString()); + ++id; +} + } + + // this test has had problems when the JDK timezone is Americas/Metlakatla + public void testAKSTZone() throws IOException { +final String inputString = "Thu Nov 13 04:35:51 AKST 2008"; + +final long expectTs = 1226583351000L; +assertEquals(expectTs, +DateTimeFormatter.ofPattern("EEE MMM d HH:mm:ss z ", Locale.ENGLISH) +.withZone(ZoneId.of("UTC")).parse(inputString, Instant::from).toEpochMilli()); + +assertParsedDate(inputString, Date.from(Instant.ofEpochMilli(expectTs)), "parse-date-extraction-util-formats"); + } + + public void testNoTime() throws IOException { +Instant instant = instant(2005, 10, 7, 0, 0, 0); +String inputString = "2005-10-07"; +assertParsedDate(inputString, Date.from(instant), "parse-date-extraction-util-formats"); + } + + public void testRfc1123() throws IOException { +assertParsedDate("Fri, 07 Oct 2005 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-extraction-util-formats"); + } + + public void testRfc1036() throws IOException { +assertParsedDate("Friday, 07-Oct-05 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-extraction-util-formats"); + } + + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12593;) --- End diff -- This annotation should not be present. It was not present in `SOLR-12561.patch`. It is present today in TestExtractionDateUtil for the string "Thu Nov 13 04:35:51 AKST 2008" and you already have a test for that -- testAKSTZone. Since the URP is not afflicted with the bug causing me to add that AwaitsFix, we don't need the annotation here in this test class at all. If you want to try to see if the bug is gone, you need to set this system property like so:
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
Github user barrotsteindev commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/435#discussion_r209430852 --- Diff: solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java --- @@ -896,13 +899,125 @@ public void testCascadingParsers() throws Exception { assertTrue(mixedDates.isEmpty()); } - private Date parse(DateTimeFormatter dateTimeFormatter, String dateString) { + // TestsExtractionDateUtil that mimic TestExtractionDateUtil + public void testISO8601() throws IOException { +// dates with atypical years +// This test tries to mimic TestExtractionDateUtil#testISO8601 + +String[] dateStrings = { +"0001-01-01T01:01:01Z", "+12021-12-01T03:03:03Z", +"-04-04T04:04:04Z", "-0005-05-05T05:05:05Z", +"-2021-12-01T04:04:04Z", "-12021-12-01T02:02:02Z" +}; + +int id = 1; + +// ensure strings are parsed +for(String notInFormatDateString: dateStrings) { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + SolrInputDocument d = processAdd("parse-date-extraction-util-formats", doc(f("id", id), f("date_dt", notInFormatDateString))); + assertNotNull(d); + assertTrue("Date string: " + notInFormatDateString + " was not parsed as a date", d.getFieldValue("date_dt") instanceof Date); + assertEquals(notInFormatDateString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString()); + assertU(commit()); + assertQ(req("id:" + id), "//date[@name='date_dt'][.='" + notInFormatDateString + "']"); + ++id; +} + +// odd values are date strings, even values are expected strings +String[] lenientDateStrings = { +"10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z", +"995-1-2T3:4:5Z", "0995-01-02T03:04:05Z", +"2021-01-01t03:04:05", "2021-01-01T03:04:05Z", +"2021-12-01 04:04:04", "2021-12-01T04:04:04Z" +}; + +// ensure sure strings that should be parsed using lenient resolver are properly parsed +for(int i = 0; i < lenientDateStrings.length; ++i) { + String lenientDateString = lenientDateStrings[i]; + String expectedString = lenientDateStrings[++i]; + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + SolrInputDocument d = processAdd("parse-date-extraction-util-formats", doc(f("id", id), f("date_dt", lenientDateString))); + assertNotNull(d); + assertTrue("Date string: " + lenientDateString + " was not parsed as a date", + d.getFieldValue("date_dt") instanceof Date); + assertEquals(expectedString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString()); + ++id; +} + } + + // this test has had problems when the JDK timezone is Americas/Metlakatla + public void testAKSTZone() throws IOException { +final String inputString = "Thu Nov 13 04:35:51 AKST 2008"; + +final long expectTs = 1226583351000L; +assertEquals(expectTs, +DateTimeFormatter.ofPattern("EEE MMM d HH:mm:ss z ", Locale.ENGLISH) +.withZone(ZoneId.of("UTC")).parse(inputString, Instant::from).toEpochMilli()); + +assertParsedDate(inputString, Date.from(Instant.ofEpochMilli(expectTs)), "parse-date-extraction-util-formats"); + } + + public void testNoTime() throws IOException { +Instant instant = instant(2005, 10, 7, 0, 0, 0); +String inputString = "2005-10-07"; +assertParsedDate(inputString, Date.from(instant), "parse-date-extraction-util-formats"); + } + + public void testRfc1123() throws IOException { +assertParsedDate("Fri, 07 Oct 2005 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-extraction-util-formats"); + } + + public void testRfc1036() throws IOException { +assertParsedDate("Friday, 07-Oct-05 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-extraction-util-formats"); + } + + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12593;) --- End diff -- This seems to pass as is, although I am still hesitant, perhaps I missed something? Would be nice if another set of eyes looked at this one. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #435: SOLR-12591: add tests to ensure ParseDateFiel...
GitHub user barrotsteindev opened a pull request: https://github.com/apache/lucene-solr/pull/435 SOLR-12591: add tests to ensure ParseDateFieldUpdateProcessor compati⦠â¦blity with ExtractionDateUtil You can merge this pull request into a Git repository by running: $ git pull https://github.com/barrotsteindev/lucene-solr SOLR-12591 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/lucene-solr/pull/435.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #435 commit 04c60c0580096a02efd651adce5946b8a8561acc Author: Bar Rotstein Date: 2018-08-11T16:10:46Z SOLR-12591: add tests to ensure ParseDateFieldUpdateProcessor compatiblity with ExtractionDateUtil --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org