Repository: any23 Updated Branches: refs/heads/master 63ba2fc82 -> 73ce0c29e
Replaced the static SimpleDateFormat field in ItemPropValue with a ThreadLocal. The previous solution would yield broken results when multiple documents were parsed concurrently. Added a unit test that failed every time on my machine with the old version and succeeds every time with the new version. Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/9afa87db Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/9afa87db Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/9afa87db Branch: refs/heads/master Commit: 9afa87db7efaad706e64e76a82e6a53f657a817f Parents: 63ba2fc Author: Antoni Mylka <[email protected]> Authored: Thu Oct 29 15:11:26 2015 +0100 Committer: Antoni Mylka <[email protected]> Committed: Thu Oct 29 15:11:26 2015 +0100 ---------------------------------------------------------------------- .../microformats2/annotations/package-info.java | 2 +- .../extractor/microdata/ItemPropValue.java | 31 ++++--- .../microdata/MicrodataParserTest.java | 85 ++++++++++++++++++-- 3 files changed, 99 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/9afa87db/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java index 3311c98..8e09321 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java +++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java @@ -21,4 +21,4 @@ * * @see org.apache.any23.extractor.html.MicroformatExtractor */ -package org.apache.any23.extractor.microformats2.annotations; \ No newline at end of file +package org.apache.any23.extractor.html.microformats2.annotations; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/9afa87db/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java index 0dcad10..a74849d 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java @@ -17,13 +17,12 @@ package org.apache.any23.extractor.microdata; -import org.apache.any23.util.StringUtils; - import java.net.MalformedURLException; import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; +import org.apache.any23.util.StringUtils; /** * Describes a possible value for a <b>Microdata item property</b>. @@ -32,7 +31,7 @@ import java.util.Date; */ public class ItemPropValue { - private static final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<SimpleDateFormat>(); /** * Supported types. @@ -45,11 +44,20 @@ public class ItemPropValue { } public static Date parseDateTime(String dateStr) throws ParseException { - return sdf.parse(dateStr); + return getSdf().parse(dateStr); } public static String formatDateTime(Date in) { - return sdf.format(in); + return getSdf().format(in); + } + + private static SimpleDateFormat getSdf() { + SimpleDateFormat simpleDateFormat = sdf.get(); + if (simpleDateFormat == null) { + simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); + sdf.set(simpleDateFormat); + } + return simpleDateFormat; } /** @@ -88,7 +96,7 @@ public class ItemPropValue { if(content instanceof String && ((String) content).trim().length() == 0) { content = "Null"; // ANY23-115 Empty spans seem to break ANY23 - // instead of throwing the exception and in effect failing the entire + // instead of throwing the exception and in effect failing the entire // parse job we wish to be lenient on web content publishers and add // Null (String) as content. //throw new IllegalArgumentException("Invalid content '" + content + "'"); @@ -143,7 +151,9 @@ public class ItemPropValue { * @return <code>true</code> if type is an integer. */ public boolean isInteger() { - if(type != Type.Plain) return false; + if(type != Type.Plain) { + return false; + } try { Integer.parseInt((String) content); return true; @@ -156,7 +166,9 @@ public class ItemPropValue { * @return <code>true</code> if type is a float. */ public boolean isFloat() { - if(type != Type.Plain) return false; + if(type != Type.Plain) { + return false; + } try { Float.parseFloat((String) content); return true; @@ -226,7 +238,7 @@ public class ItemPropValue { if(content instanceof String) { contentStr = "\"" + StringUtils.escapeAsJSONString((String) content) + "\""; } else if(content instanceof Date) { - contentStr = "\"" + sdf.format((Date) content) + "\""; + contentStr = "\"" + getSdf().format((Date) content) + "\""; } else { contentStr = content.toString(); } @@ -258,5 +270,4 @@ public class ItemPropValue { } return false; } - } http://git-wip-us.apache.org/repos/asf/any23/blob/9afa87db/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java index 78d13a4..0db9fbc 100644 --- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java +++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java @@ -17,21 +17,29 @@ package org.apache.any23.extractor.microdata; -import org.apache.any23.extractor.html.TagSoupParser; -import org.apache.any23.util.StreamUtils; -import org.junit.Assert; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; - +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.text.ParseException; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; import java.util.GregorianCalendar; +import java.util.List; import java.util.Properties; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.any23.extractor.html.TagSoupParser; +import org.apache.any23.util.StreamUtils; +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import static org.junit.Assert.assertFalse; /** * Test case for {@link MicrodataParser}. @@ -88,7 +96,58 @@ public class MicrodataParserTest { target.getProperties().get("birthday").get(0).getValue().getAsDate() ); } + + @Test + public void testGetDateConcurrent() throws IOException, ParseException { + final Date expectedDate = new GregorianCalendar(2009, Calendar.MAY, 10).getTime(); // 2009-05-10 + final byte [] content = IOUtils.toByteArray(getClass().getResourceAsStream("/microdata/microdata-basic.html")); + final int threadCount = 10; + final int attemptCount = 100; + final List<Thread> threads = new ArrayList<Thread>(); + final CyclicBarrier barrier = new CyclicBarrier(threadCount + 1); + final AtomicBoolean foundFailure = new AtomicBoolean(false); + for (int i = 0; i < threadCount; i++) { + threads.add(new Thread("Test-thread-" + i) { + @Override + public void run() { + await(barrier); + try { + int counter = 0; + while (counter++ < attemptCount && !foundFailure.get()) { + final Document document = getDom(content); + final MicrodataParserReport report = MicrodataParser.getMicrodata(document); + final ItemScope target = report.getDetectedItemScopes()[4]; + Date actualDate = target.getProperties().get("birthday").get(0).getValue().getAsDate(); + if (!expectedDate.equals(actualDate)) { + foundFailure.set(true); + } + } + } + catch (Exception ex) { + ex.printStackTrace(); + foundFailure.set(true); + } + await(barrier); + } + }); + } + for (Thread thread : threads) { + thread.start(); + } + await(barrier); + await(barrier); + assertFalse(foundFailure.get()); + } + private void await(CyclicBarrier barrier) { + try { + barrier.await(); + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + } + /** * Test the main use case of {@link MicrodataParser#deferProperties(String...)} * @@ -156,6 +215,16 @@ public class MicrodataParserTest { is.close(); } } + + private Document getDom(byte [] document) throws IOException { + final InputStream is = new ByteArrayInputStream(document); + try { + final TagSoupParser tagSoupParser = new TagSoupParser(is, "http://test-document"); + return tagSoupParser.getDOM(); + } finally { + is.close(); + } + } private Document getMicrodataDom(String htmlFile) throws IOException { return getDom("/microdata/" + htmlFile + ".html");
