Merge branch 'ANY23-226'
Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/93c38a69 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/93c38a69 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/93c38a69 Branch: refs/heads/master Commit: 93c38a69ef1cb130b3e47a458368c5301b582beb Parents: f88cc51 b8bd0ae Author: Peter Ansell <[email protected]> Authored: Sat Mar 21 16:06:41 2015 +1100 Committer: Peter Ansell <[email protected]> Committed: Sat Mar 21 16:06:41 2015 +1100 ---------------------------------------------------------------------- core/pom.xml | 9 - .../any23/cli/ExtractorDocumentation.java | 2 - .../org/apache/any23/cli/MicrodataParser.java | 2 - .../java/org/apache/any23/cli/MimeDetector.java | 2 - .../org/apache/any23/cli/PluginVerifier.java | 2 - .../main/java/org/apache/any23/cli/Rover.java | 2 - .../java/org/apache/any23/cli/VocabPrinter.java | 2 - .../extractor/csv/CSVExtractorFactory.java | 2 - .../extractor/html/AdrExtractorFactory.java | 2 - .../apache/any23/extractor/html/DomUtils.java | 70 + .../extractor/html/EmbeddedJSONLDExtractor.java | 256 +++ .../html/EmbeddedJSONLDExtractorFactory.java | 56 + .../extractor/html/GeoExtractorFactory.java | 2 - .../html/HCalendarExtractorFactory.java | 2 - .../extractor/html/HCardExtractorFactory.java | 2 - .../html/HListingExtractorFactory.java | 2 - .../extractor/html/HRecipeExtractorFactory.java | 2 - .../extractor/html/HResumeExtractorFactory.java | 2 - .../html/HReviewAggregateExtractorFactory.java | 2 - .../extractor/html/HReviewExtractorFactory.java | 2 - .../html/HTMLMetaExtractorFactory.java | 2 - .../html/HeadLinkExtractorFactory.java | 2 - .../extractor/html/ICBMExtractorFactory.java | 2 - .../extractor/html/LicenseExtractorFactory.java | 2 - .../extractor/html/SpeciesExtractorFactory.java | 2 - .../extractor/html/TitleExtractorFactory.java | 2 - .../html/TurtleHTMLExtractorFactory.java | 2 - .../extractor/html/XFNExtractorFactory.java | 2 - .../microdata/MicrodataExtractorFactory.java | 2 - .../any23/extractor/rdf/BaseRDFExtractor.java | 5 +- .../extractor/rdf/JSONLDExtractorFactory.java | 2 - .../extractor/rdf/NQuadsExtractorFactory.java | 2 - .../extractor/rdf/NTriplesExtractorFactory.java | 2 - .../extractor/rdf/RDFXMLExtractorFactory.java | 2 - .../extractor/rdf/TriXExtractorFactory.java | 2 - .../extractor/rdf/TurtleExtractorFactory.java | 2 - .../extractor/rdfa/RDFa11ExtractorFactory.java | 2 - .../extractor/rdfa/RDFaExtractorFactory.java | 2 - .../extractor/xpath/XPathExtractorFactory.java | 2 - .../apache/any23/writer/JSONWriterFactory.java | 2 - .../any23/writer/NQuadsWriterFactory.java | 2 - .../any23/writer/NTriplesWriterFactory.java | 2 - .../any23/writer/RDFXMLWriterFactory.java | 2 - .../apache/any23/writer/TriXWriterFactory.java | 2 - .../any23/writer/TurtleWriterFactory.java | 2 - .../any23/writer/URIListWriterFactory.java | 2 - .../META-INF/services/org.apache.any23.cli.Tool | 6 + .../org.apache.any23.extractor.ExtractorFactory | 28 + .../org.apache.any23.writer.WriterFactory | 7 + .../extractor/html/example-embedded-jsonld.html | 34 + .../apache/any23/prefixes/prefixes.properties | 1 + .../any23/extractor/csv/CSVExtractorTest.java | 164 +- .../example/ExampleExtractorFactory.java | 2 - .../html/AbstractExtractorTestCase.java | 1459 +++++++------- .../html/EmbeddedJSONLDExtractorTest.java | 50 + .../extractor/html/HCalendarExtractorTest.java | 730 +++---- .../extractor/html/HCardExtractorTest.java | 1872 +++++++++--------- .../extractor/html/HListingExtractorTest.java | 600 +++--- .../extractor/html/HRecipeExtractorTest.java | 56 +- .../extractor/html/HResumeExtractorTest.java | 244 ++- .../extractor/html/HReviewExtractorTest.java | 539 ++--- .../extractor/html/HTMLMetaExtractorTest.java | 95 +- .../any23/extractor/html/RDFMergerTest.java | 920 ++++----- .../extractor/html/SpeciesExtractorTest.java | 2 +- .../extractor/html/TurtleHTMLExtractorTest.java | 2 +- .../extractor/rdf/JSONLDExtractorTest.java | 8 - .../rdfa/AbstractRDFaExtractorTestCase.java | 243 ++- .../any23/io/nquads/NQuadsParserFactory.java | 2 - .../any23/io/nquads/NQuadsWriterFactory.java | 2 - .../main/java/org/apache/any23/cli/Crawler.java | 2 - .../META-INF/services/org.apache.any23.cli.Tool | 1 + .../htmlscraper/HTMLScraperExtractor.java | 2 - .../HTMLScraperExtractorFactory.java | 2 - .../org.apache.any23.extractor.ExtractorFactory | 1 + .../java/org/apache/any23/plugin/PluginIT.java | 2 +- .../plugin/officescraper/ExcelExtractor.java | 2 - .../officescraper/ExcelExtractorFactory.java | 2 - .../org.apache.any23.extractor.ExtractorFactory | 1 + src/site/apt/any23-plugins.apt | 1 - .../src/test/resources/html/encoding-test.html | 2 +- ...html-embedded-jsonld-extractor-multiple.html | 45 + .../html/html-embedded-jsonld-extractor.html | 34 + 82 files changed, 4113 insertions(+), 3528 deletions(-) ----------------------------------------------------------------------
