This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/any23.git
The following commit(s) were added to refs/heads/master by this push:
new 15126ef ANY23-520 Augment any23 extractor CLI to print all mimetypes
for a given extractor (#212)
15126ef is described below
commit 15126ef5a535ef64781be4d2a9a7909df94b293e
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Tue Oct 19 08:32:59 2021 -0700
ANY23-520 Augment any23 extractor CLI to print all mimetypes for a given
extractor (#212)
* ANY23-520 Augment any23 extractor CLI to print all mimetypes for a given
extractor
---
.../apache/any23/cli/ExtractorDocumentation.java | 9 ++--
src/site/apt/getting-started.apt | 61 ++++++++++++----------
2 files changed, 38 insertions(+), 32 deletions(-)
diff --git a/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
index ed19420..f0b8492 100644
--- a/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
+++ b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
@@ -40,7 +40,8 @@ import java.util.Locale;
"extractor" }, commandDescription = "Utility for obtaining
documentation about metadata extractors.")
public class ExtractorDocumentation extends BaseTool {
- @Parameter(names = { "-l", "--list" }, description = "shows the names of
all available extractors")
+ @Parameter(names = { "-l",
+ "--list" }, description = "shows the names, labels and supported
mimetypes of all available extractors")
private boolean showList;
@Parameter(names = { "-i", "--input" }, description = "shows example input
for the given extractor")
@@ -88,15 +89,15 @@ public class ExtractorDocumentation extends BaseTool {
}
/**
- * Prints the list of all the available extractors.
+ * Prints the list of all the available extractor names, labels and
supported mimetypes.
*
* @param registry
* the {@link org.apache.any23.extractor.ExtractorRegistry}
containing all extractors
*/
public void printExtractorList(ExtractorRegistry registry) {
for (ExtractorFactory factory : registry.getExtractorGroup()) {
- out.println(
- String.format(Locale.ROOT, "%25s [%15s]",
factory.getExtractorName(), factory.getExtractorLabel()));
+ out.println(String.format(Locale.ROOT, "%25s [%15s] %15s",
factory.getExtractorName(),
+ factory.getExtractorLabel(),
factory.getSupportedMIMETypes()));
}
}
diff --git a/src/site/apt/getting-started.apt b/src/site/apt/getting-started.apt
index c339788..ac239c1 100644
--- a/src/site/apt/getting-started.apt
+++ b/src/site/apt/getting-started.apt
@@ -209,34 +209,39 @@ core$ any23 rover -t -f quad myfoaf.rdf
+--------------------------------------
cli$ any23 extractor --list
- csv [org.apache.any23.extractor.csv.CSVExtractorFactory]
- html-embedded-jsonld
[org.apache.any23.extractor.html.EmbeddedJSONLDExtractorFactory]
- html-head-icbm
[org.apache.any23.extractor.html.ICBMExtractorFactory]
- html-head-links
[org.apache.any23.extractor.html.HeadLinkExtractorFactory]
- html-head-meta
[org.apache.any23.extractor.html.HTMLMetaExtractorFactory]
- html-head-title
[org.apache.any23.extractor.html.TitleExtractorFactory]
- html-mf-adr [org.apache.any23.extractor.html.AdrExtractorFactory]
- html-mf-geo [org.apache.any23.extractor.html.GeoExtractorFactory]
- html-mf-hcalendar
[org.apache.any23.extractor.html.HCalendarExtractorFactory]
- html-mf-hcard
[org.apache.any23.extractor.html.HCardExtractorFactory]
- html-mf-hlisting
[org.apache.any23.extractor.html.HListingExtractorFactory]
- html-mf-hrecipe
[org.apache.any23.extractor.html.HRecipeExtractorFactory]
- html-mf-hresume
[org.apache.any23.extractor.html.HResumeExtractorFactory]
- html-mf-hreview
[org.apache.any23.extractor.html.HReviewExtractorFactory]
-html-mf-hreview-aggregate
[org.apache.any23.extractor.html.HReviewAggregateExtractorFactory]
- html-mf-license
[org.apache.any23.extractor.html.LicenseExtractorFactory]
- html-mf-species
[org.apache.any23.extractor.html.SpeciesExtractorFactory]
- html-mf-xfn [org.apache.any23.extractor.html.XFNExtractorFactory]
- html-microdata
[org.apache.any23.extractor.microdata.MicrodataExtractorFactory]
- html-rdfa11
[org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory]
- html-xpath
[org.apache.any23.extractor.xpath.XPathExtractorFactory]
- rdf-jsonld
[org.apache.any23.extractor.rdf.JSONLDExtractorFactory]
- rdf-nq
[org.apache.any23.extractor.rdf.NQuadsExtractorFactory]
- rdf-nt
[org.apache.any23.extractor.rdf.NTriplesExtractorFactory]
- rdf-trix [org.apache.any23.extractor.rdf.TriXExtractorFactory]
- rdf-turtle
[org.apache.any23.extractor.rdf.TurtleExtractorFactory]
- rdf-xml
[org.apache.any23.extractor.rdf.RDFXMLExtractorFactory]
- yaml
[org.apache.any23.extractor.yaml.YAMLExtractorFactory]
+ csv [org.apache.any23.extractor.csv.CSVExtractorFactory]
[text/csv;q=0.1]
+ html-embedded-jsonld
[org.apache.any23.extractor.html.EmbeddedJSONLDExtractorFactory]
[text/html;q=0.02, application/xhtml+xml;q=0.02]
+ html-head-icbm
[org.apache.any23.extractor.html.ICBMExtractorFactory] [text/html;q=0.01,
application/xhtml+xml;q=0.01]
+ html-head-links
[org.apache.any23.extractor.html.HeadLinkExtractorFactory] [text/html;q=0.05,
application/xhtml+xml;q=0.05]
+ html-head-meta
[org.apache.any23.extractor.html.HTMLMetaExtractorFactory] [text/html;q=0.02,
application/xhtml+xml;q=0.02]
+ html-head-title
[org.apache.any23.extractor.html.TitleExtractorFactory] [text/html;q=0.02,
application/xhtml+xml;q=0.02]
+ html-mf-adr
[org.apache.any23.extractor.html.AdrExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-geo
[org.apache.any23.extractor.html.GeoExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-hcalendar
[org.apache.any23.extractor.html.HCalendarExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-hcard
[org.apache.any23.extractor.html.HCardExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-hlisting
[org.apache.any23.extractor.html.HListingExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-hrecipe
[org.apache.any23.extractor.html.HRecipeExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-hresume
[org.apache.any23.extractor.html.HResumeExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-hreview
[org.apache.any23.extractor.html.HReviewExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+html-mf-hreview-aggregate
[org.apache.any23.extractor.html.HReviewAggregateExtractorFactory]
[text/html;q=0.1, application/xhtml+xml;q=0.1]
+ html-mf-license
[org.apache.any23.extractor.html.LicenseExtractorFactory] [text/html;q=0.01,
application/xhtml+xml;q=0.01]
+ html-mf-species
[org.apache.any23.extractor.html.SpeciesExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-mf-xfn
[org.apache.any23.extractor.html.XFNExtractorFactory] [text/html;q=0.1,
application/xhtml+xml;q=0.1]
+ html-microdata
[org.apache.any23.extractor.microdata.MicrodataExtractorFactory]
[text/html;q=0.1, application/xhtml+xml;q=0.1]
+ html-rdfa11
[org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory]
[application/xhtml+xml;q=0.3, application/html;q=0.3, text/html;q=0.3]
+ html-xpath
[org.apache.any23.extractor.xpath.XPathExtractorFactory] [text/html;q=0.02,
application/xhtml+xml;q=0.02]
+ ical
[org.apache.any23.extractor.calendar.ICalExtractorFactory] [text/calendar]
+ jcal
[org.apache.any23.extractor.calendar.JCalExtractorFactory]
[application/calendar+json]
+ owl-functional
[org.apache.any23.extractor.rdf.FunctionalSyntaxExtractorFactory]
[text/owl-functional]
+ owl-manchester
[org.apache.any23.extractor.rdf.ManchesterSyntaxExtractorFactory]
[text/owl-manchester]
+ rdf-jsonld
[org.apache.any23.extractor.rdf.JSONLDExtractorFactory]
[application/ld+json;q=0.1]
+ rdf-nq
[org.apache.any23.extractor.rdf.NQuadsExtractorFactory] [application/n-quads,
text/x-nquads;q=0.1, text/rdf+nq;q=0.1, text/nq;q=0.1, text/nquads;q=0.1,
text/n-quads;q=0.1]
+ rdf-nt
[org.apache.any23.extractor.rdf.NTriplesExtractorFactory]
[application/n-triples;q=0.1, text/nt;q=0.1, text/ntriples;q=0.1,
text/plain;q=0.1]
+ rdf-trix
[org.apache.any23.extractor.rdf.TriXExtractorFactory] [application/trix]
+ rdf-turtle
[org.apache.any23.extractor.rdf.TurtleExtractorFactory] [text/turtle,
text/rdf+n3, text/n3, application/n3, application/x-turtle, application/turtle]
+ rdf-xml
[org.apache.any23.extractor.rdf.RDFXMLExtractorFactory] [application/rdf+xml,
text/rdf, text/rdf+xml, application/rdf]
+ xcal
[org.apache.any23.extractor.calendar.XCalExtractorFactory]
[application/calendar+xml]
+ yaml
[org.apache.any23.extractor.yaml.YAMLExtractorFactory] [text/x-yaml;q=0.5]
+--------------------------------------
** The MicrodataParser tool