Repository: any23 Updated Branches: refs/heads/master 6173637bb -> a07d1f058
ANY23-377 don't replace empty strings with 'Null' Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/a07d1f05 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/a07d1f05 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/a07d1f05 Branch: refs/heads/master Commit: a07d1f058fcdc2d994dcd220759310737fe68965 Parents: 6173637 Author: Hans <[email protected]> Authored: Tue Jul 31 16:37:25 2018 -0500 Committer: Hans <[email protected]> Committed: Tue Jul 31 16:46:41 2018 -0500 ---------------------------------------------------------------------- .../extractor/microdata/ItemPropValue.java | 51 +++++++++----------- .../microdata-bad-properties-expected.nquads | 6 +-- 2 files changed, 25 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/a07d1f05/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java index f32b468..b4710de 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java @@ -22,6 +22,8 @@ import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.Objects; + import org.apache.any23.util.StringUtils; /** @@ -47,10 +49,24 @@ public class ItemPropValue { * Supported types. */ public enum Type { - Plain, - Link, - Date, - Nested + Plain(String.class), + Link(String.class), + Date(Date.class), + Nested(ItemScope.class); + + Type(Class<?> contentClass) { + this.contentClass = contentClass; + } + + private final Class<?> contentClass; + + private Object checkClass(Object content) { + Objects.requireNonNull(content, "content cannot be null"); + if (!contentClass.isInstance(content)) { + throw new IllegalArgumentException("content must be a " + contentClass.getName() + " when type is " + this); + } + return content; + } } public static Date parseDateTime(String dateStr) throws ParseException { @@ -77,31 +93,8 @@ public class ItemPropValue { * @param type content type. */ public ItemPropValue(Object content, Type type) { - if(content == null) { - throw new NullPointerException("content cannot be null."); - } - if(type == null) { - throw new NullPointerException("type cannot be null."); - } - if(type == Type.Nested && ! (content instanceof ItemScope) ) { - throw new IllegalArgumentException( - "content must be an " + ItemScope.class + " when type is " + Type.Nested - ); - } - if(type == Type.Date && !(content instanceof Date) ) { - throw new IllegalArgumentException( - "content must be a " + Date.class.getName() + " whe type is " + Type.Date - ); - } - if(content instanceof String && ((String) content).trim().length() == 0) { - // ANY23-115 Empty spans seem to break ANY23 - // instead of throwing the exception and in effect failing the entire - // parse job we wish to be lenient on web content publishers and add - // Null (String) as content. - content = "Null"; - } - this.content = content; - this.type = type; + this.type = Objects.requireNonNull(type, "type cannot be null"); + this.content = type.checkClass(content); } /** http://git-wip-us.apache.org/repos/asf/any23/blob/a07d1f05/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads b/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads index e5b6f29..b759d1b 100644 --- a/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads +++ b/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads @@ -49,7 +49,7 @@ _:node1cjov1p83x7 <http://schema.org/name> "Begin to Knit Classes" <http://bob.e _:node1cjov1p83x7 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> . _:node1cjov1p83x8 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> . _:node1cjov1p83x9 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> . -_:node1cjov1p83x9 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> . +_:node1cjov1p83x9 <http://schema.org/streetAddress> "" <http://bob.example.com/> . _:node1cjov1p83x8 <http://schema.org/address> _:node1cjov1p83x9 <http://bob.example.com/> . _:node1cjov1p83x8 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> . _:node1cjov1p83x7 <http://schema.org/location> _:node1cjov1p83x8 <http://bob.example.com/> . @@ -62,7 +62,7 @@ _:node1cjov1p83x10 <http://schema.org/name> "Cambridge Historic School Museum To _:node1cjov1p83x10 <http://schema.org/description> "Built in 1906, the Cambridge Historic School -Â listed on the..." <http://bob.example.com/> . _:node1cjov1p83x11 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> . _:node1cjov1p83x12 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> . -_:node1cjov1p83x12 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> . +_:node1cjov1p83x12 <http://schema.org/streetAddress> "" <http://bob.example.com/> . _:node1cjov1p83x11 <http://schema.org/address> _:node1cjov1p83x12 <http://bob.example.com/> . _:node1cjov1p83x11 <http://schema.org/name> "Cambridge Historic School" <http://bob.example.com/> . _:node1cjov1p83x10 <http://schema.org/location> _:node1cjov1p83x11 <http://bob.example.com/> . @@ -75,7 +75,7 @@ _:node1cjov1p83x13 <http://schema.org/name> "Begin to Knit Classes" <http://bob. _:node1cjov1p83x13 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> . _:node1cjov1p83x14 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> . _:node1cjov1p83x15 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> . -_:node1cjov1p83x15 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> . +_:node1cjov1p83x15 <http://schema.org/streetAddress> "" <http://bob.example.com/> . _:node1cjov1p83x14 <http://schema.org/address> _:node1cjov1p83x15 <http://bob.example.com/> . _:node1cjov1p83x14 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> . _:node1cjov1p83x13 <http://schema.org/location> _:node1cjov1p83x14 <http://bob.example.com/> .
