Repository: any23
Updated Branches:
  refs/heads/master 63ba2fc82 -> 73ce0c29e


Replaced the static SimpleDateFormat field in ItemPropValue with a
ThreadLocal. The previous solution would yield broken results when
multiple documents were parsed concurrently. Added a unit test that
failed every time on my machine with the old version and succeeds every
time with the new version.

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/9afa87db
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/9afa87db
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/9afa87db

Branch: refs/heads/master
Commit: 9afa87db7efaad706e64e76a82e6a53f657a817f
Parents: 63ba2fc
Author: Antoni Mylka <[email protected]>
Authored: Thu Oct 29 15:11:26 2015 +0100
Committer: Antoni Mylka <[email protected]>
Committed: Thu Oct 29 15:11:26 2015 +0100

----------------------------------------------------------------------
 .../microformats2/annotations/package-info.java |  2 +-
 .../extractor/microdata/ItemPropValue.java      | 31 ++++---
 .../microdata/MicrodataParserTest.java          | 85 ++++++++++++++++++--
 3 files changed, 99 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/9afa87db/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
index 3311c98..8e09321 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
@@ -21,4 +21,4 @@
  *
  *  @see org.apache.any23.extractor.html.MicroformatExtractor
  */
-package org.apache.any23.extractor.microformats2.annotations;
\ No newline at end of file
+package org.apache.any23.extractor.html.microformats2.annotations;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/9afa87db/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java 
b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index 0dcad10..a74849d 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -17,13 +17,12 @@
 
 package org.apache.any23.extractor.microdata;
 
-import org.apache.any23.util.StringUtils;
-
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.Date;
+import org.apache.any23.util.StringUtils;
 
 /**
  * Describes a possible value for a <b>Microdata item property</b>.
@@ -32,7 +31,7 @@ import java.util.Date;
  */
 public class ItemPropValue {
 
-    private static final SimpleDateFormat sdf = new 
SimpleDateFormat("yyyy-MM-dd");
+    private static final ThreadLocal<SimpleDateFormat> sdf = new 
ThreadLocal<SimpleDateFormat>();
 
     /**
      * Supported types.
@@ -45,11 +44,20 @@ public class ItemPropValue {
     }
 
     public static Date parseDateTime(String dateStr) throws ParseException {
-        return sdf.parse(dateStr);
+        return getSdf().parse(dateStr);
     }
 
     public static String formatDateTime(Date in) {
-        return sdf.format(in);
+        return getSdf().format(in);
+    }
+    
+    private static SimpleDateFormat getSdf() {
+        SimpleDateFormat simpleDateFormat = sdf.get();
+        if (simpleDateFormat == null) {
+            simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
+            sdf.set(simpleDateFormat);
+        }
+        return simpleDateFormat;
     }
 
     /**
@@ -88,7 +96,7 @@ public class ItemPropValue {
         if(content instanceof String && ((String) content).trim().length() == 
0) {
             content = "Null";
             // ANY23-115 Empty spans seem to break ANY23
-            // instead of throwing the exception and in effect failing the 
entire 
+            // instead of throwing the exception and in effect failing the 
entire
             // parse job we wish to be lenient on web content publishers and 
add
             // Null (String) as content.
             //throw new IllegalArgumentException("Invalid content '" + content 
+ "'");
@@ -143,7 +151,9 @@ public class ItemPropValue {
      * @return <code>true</code> if type is an integer.
      */
     public boolean isInteger() {
-        if(type != Type.Plain) return false;
+        if(type != Type.Plain) {
+            return false;
+        }
          try {
              Integer.parseInt((String) content);
              return true;
@@ -156,7 +166,9 @@ public class ItemPropValue {
      * @return <code>true</code> if type is a float.
      */
      public boolean isFloat() {
-         if(type != Type.Plain) return false;
+         if(type != Type.Plain) {
+            return false;
+        }
          try {
              Float.parseFloat((String) content);
              return true;
@@ -226,7 +238,7 @@ public class ItemPropValue {
         if(content instanceof String) {
             contentStr = "\"" + StringUtils.escapeAsJSONString((String) 
content) + "\"";
         } else if(content instanceof Date) {
-            contentStr = "\"" + sdf.format((Date) content) + "\"";
+            contentStr = "\"" + getSdf().format((Date) content) + "\"";
         } else {
             contentStr = content.toString();
         }
@@ -258,5 +270,4 @@ public class ItemPropValue {
         }
         return false;
     }
-
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/9afa87db/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
 
b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
index 78d13a4..0db9fbc 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
@@ -17,21 +17,29 @@
 
 package org.apache.any23.extractor.microdata;
 
-import org.apache.any23.extractor.html.TagSoupParser;
-import org.apache.any23.util.StreamUtils;
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Document;
-
+import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
 import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
 import java.util.GregorianCalendar;
+import java.util.List;
 import java.util.Properties;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.any23.extractor.html.TagSoupParser;
+import org.apache.any23.util.StreamUtils;
+import org.apache.commons.io.IOUtils;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import static org.junit.Assert.assertFalse;
 
 /**
  * Test case for {@link MicrodataParser}.
@@ -88,7 +96,58 @@ public class MicrodataParserTest {
                 
target.getProperties().get("birthday").get(0).getValue().getAsDate()
         );
     }
+    
+    @Test
+    public void testGetDateConcurrent() throws IOException, ParseException {
+        final Date expectedDate = new GregorianCalendar(2009, Calendar.MAY, 
10).getTime(); // 2009-05-10
+        final byte [] content = 
IOUtils.toByteArray(getClass().getResourceAsStream("/microdata/microdata-basic.html"));
+        final int threadCount = 10;
+        final int attemptCount = 100;
+        final List<Thread> threads = new ArrayList<Thread>();
+        final CyclicBarrier barrier = new CyclicBarrier(threadCount + 1);
+        final AtomicBoolean foundFailure = new AtomicBoolean(false);
+        for (int i = 0; i < threadCount; i++) {
+            threads.add(new Thread("Test-thread-" + i) {
+                @Override
+                public void run() {
+                    await(barrier);
+                    try {
+                        int counter = 0;
+                        while (counter++ < attemptCount && 
!foundFailure.get()) {
+                            final Document document = getDom(content);
+                            final MicrodataParserReport report = 
MicrodataParser.getMicrodata(document);
+                            final ItemScope target = 
report.getDetectedItemScopes()[4];
+                            Date actualDate = 
target.getProperties().get("birthday").get(0).getValue().getAsDate();
+                            if (!expectedDate.equals(actualDate)) {
+                                foundFailure.set(true);
+                            }
+                        }
+                    }
+                    catch (Exception ex) {
+                        ex.printStackTrace();
+                        foundFailure.set(true);
+                    }
+                    await(barrier);
+                }
+            });
+        }
+        for (Thread thread : threads) {
+            thread.start();
+        }
+        await(barrier);
+        await(barrier);
+        assertFalse(foundFailure.get());
+    }
 
+    private void await(CyclicBarrier barrier) {
+        try {
+            barrier.await();
+        }
+        catch (Exception ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+    
     /**
      * Test the main use case of {@link 
MicrodataParser#deferProperties(String...)}
      *
@@ -156,6 +215,16 @@ public class MicrodataParserTest {
             is.close();
         }
     }
+    
+    private Document getDom(byte [] document) throws IOException {
+        final InputStream is = new ByteArrayInputStream(document);
+        try {
+            final TagSoupParser tagSoupParser = new TagSoupParser(is, 
"http://test-document";);
+            return tagSoupParser.getDOM();
+        } finally {
+            is.close();
+        }
+    }
 
     private Document getMicrodataDom(String htmlFile) throws IOException {
          return getDom("/microdata/" + htmlFile + ".html");

Reply via email to