http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java deleted file mode 100644 index 19c59bf..0000000 --- a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import com.beust.jcommander.IStringConverter; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.ParameterException; -import com.beust.jcommander.Parameters; -import org.apache.any23.extractor.html.TagSoupParser; -import org.apache.any23.http.DefaultHTTPClient; -import org.apache.any23.source.DocumentSource; -import org.apache.any23.source.FileDocumentSource; -import org.apache.any23.source.HTTPDocumentSource; -import org.apache.any23.util.StreamUtils; - -import java.io.File; -import java.io.InputStream; -import java.net.URISyntaxException; -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Command line <i>Microdata</i> parser, accepting both files and URLs and - * returing a <i>JSON</i> representation of the extracted metadata as described at - * <a href="http://www.w3.org/TR/microdata/#json">Microdata JSON Specification</a>. - * - * @author Michele Mostarda ([email protected]) - */ -@Parameters( commandNames = { "microdata" }, commandDescription = "Commandline Tool for extracting Microdata from file/HTTP source.") -public class MicrodataParser implements Tool { - - private static final Pattern HTTP_DOCUMENT_PATTERN = Pattern.compile("^https?://.*"); - - private static final Pattern FILE_DOCUMENT_PATTERN = Pattern.compile("^file:(.*)$"); - - @Parameter( - arity = 1, - description = "Input document URL, {http://path/to/resource.html|file:/path/to/localFile.html}", - converter = MicrodataParserDocumentSourceConverter.class - ) - private List<DocumentSource> document = new LinkedList<DocumentSource>(); - - public void run() throws Exception { - if (document.isEmpty()) { - throw new IllegalArgumentException("No input document URL specified"); - } - InputStream documentInputInputStream = null; - try { - final DocumentSource documentSource = document.get(0); - documentInputInputStream = documentSource.openInputStream(); - final TagSoupParser tagSoupParser = new TagSoupParser( - documentInputInputStream, - documentSource.getDocumentIRI() - ); - org.apache.any23.extractor.microdata.MicrodataParser.getMicrodataAsJSON(tagSoupParser.getDOM(), System.out); - } finally { - if (documentInputInputStream != null) StreamUtils.closeGracefully(documentInputInputStream); - } - } - - public static final class MicrodataParserDocumentSourceConverter implements IStringConverter<DocumentSource> { - - @Override - public DocumentSource convert( String value ) { - final Matcher httpMatcher = HTTP_DOCUMENT_PATTERN.matcher(value); - if (httpMatcher.find()) { - try { - return new HTTPDocumentSource(DefaultHTTPClient.createInitializedHTTPClient(), value); - } catch ( URISyntaxException e ) { - throw new ParameterException("Invalid source IRI: '" + value + "'"); - } - } - final Matcher fileMatcher = FILE_DOCUMENT_PATTERN.matcher(value); - if (fileMatcher.find()) { - return new FileDocumentSource( new File( fileMatcher.group(1) ) ); - } - throw new ParameterException("Invalid source protocol: '" + value + "'"); - } - - } - -}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/MimeDetector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/MimeDetector.java b/core/src/main/java/org/apache/any23/cli/MimeDetector.java deleted file mode 100644 index c9072cb..0000000 --- a/core/src/main/java/org/apache/any23/cli/MimeDetector.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import com.beust.jcommander.IStringConverter; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.Parameters; -import org.apache.any23.http.DefaultHTTPClient; -import org.apache.any23.http.DefaultHTTPClientConfiguration; -import org.apache.any23.http.HTTPClient; -import org.apache.any23.mime.MIMEType; -import org.apache.any23.mime.MIMETypeDetector; -import org.apache.any23.mime.TikaMIMETypeDetector; -import org.apache.any23.source.DocumentSource; -import org.apache.any23.source.FileDocumentSource; -import org.apache.any23.source.HTTPDocumentSource; -import org.apache.any23.source.StringDocumentSource; - -import java.io.File; -import java.net.URISyntaxException; -import java.util.LinkedList; -import java.util.List; - -/** - * Commandline tool to detect <b>MIME Type</b>s from - * file, HTTP and direct input sources. - * The implementation of this tool is based on {@link org.apache.any23.mime.TikaMIMETypeDetector}. - * - * @author Michele Mostarda ([email protected]) - */ -@Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.") -public class MimeDetector implements Tool{ - - public static final String FILE_DOCUMENT_PREFIX = "file://"; - - public static final String INLINE_DOCUMENT_PREFIX = "inline://"; - - public static final String URL_DOCUMENT_RE = "^https?://.*"; - - @Parameter( - arity = 1, - description = "Input document URL, {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}", - converter = MimeDetectorDocumentSourceConverter.class - ) - private List<DocumentSource> document = new LinkedList<DocumentSource>(); - - public void run() throws Exception { - if (document.isEmpty()) { - throw new IllegalArgumentException("No input document URL specified"); - } - - final DocumentSource documentSource = document.get(0); - final MIMETypeDetector detector = new TikaMIMETypeDetector(); - final MIMEType mimeType = detector.guessMIMEType( - documentSource.getDocumentIRI(), - documentSource.openInputStream(), - MIMEType.parse(documentSource.getContentType()) - ); - System.out.println(mimeType); - } - - public static final class MimeDetectorDocumentSourceConverter implements IStringConverter<DocumentSource> { - - @Override - public DocumentSource convert( String document ) { - if (document.startsWith(FILE_DOCUMENT_PREFIX)) { - return new FileDocumentSource( new File( document.substring(FILE_DOCUMENT_PREFIX.length()) ) ); - } - if (document.startsWith(INLINE_DOCUMENT_PREFIX)) { - return new StringDocumentSource( document.substring(INLINE_DOCUMENT_PREFIX.length()), "" ); - } - if (document.matches(URL_DOCUMENT_RE)) { - final HTTPClient client = new DefaultHTTPClient(); - client.init( DefaultHTTPClientConfiguration.singleton() ); - try { - return new HTTPDocumentSource(client, document); - } catch ( URISyntaxException e ) { - throw new IllegalArgumentException("Invalid source IRI: '" + document + "'"); - } - } - throw new IllegalArgumentException("Unsupported protocol for document " + document); - } - - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java deleted file mode 100644 index a747b49..0000000 --- a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import com.beust.jcommander.Parameter; -import com.beust.jcommander.Parameters; -import com.beust.jcommander.converters.FileConverter; -import org.apache.any23.extractor.ExtractorFactory; -import org.apache.any23.mime.MIMEType; -import org.apache.any23.plugin.Any23PluginManager; -import org.apache.any23.plugin.Author; -import java.io.File; -import java.io.PrintStream; -import java.util.Collection; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -/** - * Commandline utility to verify the <b>Any23</b> plugins - * and extract basic information. - * - * @author Michele Mostarda ([email protected]) - */ -@Parameters(commandNames = { "verify" }, commandDescription = "Utility for plugin management verification.") -public class PluginVerifier implements Tool { - - private Any23PluginManager pluginManager = Any23PluginManager.getInstance(); - - @Parameter( - description = "plugins-dir", - converter = FileConverter.class - ) - private List<File> pluginsDirs = new LinkedList<File>(); - - public void run() throws Exception { - if (pluginsDirs.isEmpty()) { - throw new IllegalArgumentException("No plugin directory specified."); - } - - final File pluginsDir = pluginsDirs.get(0); - if (!pluginsDir.isDirectory()) { - throw new IllegalArgumentException("<plugins-dir> must be a valid dir."); - } - - pluginManager.loadJARDir(pluginsDir); - - final Iterator<ExtractorFactory> plugins = pluginManager.getExtractors(); - - while (plugins.hasNext()) { - printPluginData(plugins.next(), System.out); - System.out.println("------------------------------------------------------------------------"); - } - } - - private String getMimeTypesStr(Collection<MIMEType> mimeTypes) { - final StringBuilder sb = new StringBuilder(); - for (MIMEType mt : mimeTypes) { - sb.append(mt).append(' '); - } - return sb.toString(); - } - - private void printPluginData(ExtractorFactory extractorFactory, PrintStream ps) { - final Author authorAnnotation = extractorFactory.getClass().getAnnotation(Author.class); - ps.printf("Plugin author : %s\n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name()); - ps.printf("Plugin factory : %s\n", extractorFactory.getClass()); - ps.printf("Plugin mime-types: %s\n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() )); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/Rover.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/Rover.java b/core/src/main/java/org/apache/any23/cli/Rover.java deleted file mode 100644 index 26a8663..0000000 --- a/core/src/main/java/org/apache/any23/cli/Rover.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import com.beust.jcommander.IStringConverter; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.ParameterException; -import com.beust.jcommander.Parameters; -import com.beust.jcommander.converters.FileConverter; -import org.apache.any23.Any23; -import org.apache.any23.configuration.Configuration; -import org.apache.any23.configuration.DefaultConfiguration; -import org.apache.any23.extractor.ExtractionParameters; -import org.apache.any23.extractor.ExtractionParameters.ValidationMode; -import org.apache.any23.filter.IgnoreAccidentalRDFa; -import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments; -import org.apache.any23.source.DocumentSource; -import org.apache.any23.writer.BenchmarkTripleHandler; -import org.apache.any23.writer.LoggingTripleHandler; -import org.apache.any23.writer.ReportingTripleHandler; -import org.apache.any23.writer.TripleHandler; -import org.apache.any23.writer.TripleHandlerException; -import org.apache.any23.writer.WriterFactoryRegistry; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.LinkedList; -import java.util.List; - -import static java.lang.String.format; - -/** - * A default rover implementation. Goes and fetches a URL using an hint - * as to what format should require, then tries to convert it to RDF. - * - * @author Michele Mostarda ([email protected]) - * @author Richard Cyganiak ([email protected]) - * @author Gabriele Renzi - */ -@Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.") -public class Rover implements Tool { - - private static final List<String> FORMATS = WriterFactoryRegistry.getInstance().getIdentifiers(); - - private static final int DEFAULT_FORMAT_INDEX = 0; - - private static final Logger logger = LoggerFactory.getLogger(Rover.class); - - @Parameter( - names = { "-o", "--output" }, - description = "Specify Output file (defaults to standard output)", - converter = PrintStreamConverter.class - ) - private PrintStream outputStream = System.out; - - @Parameter(description = "input IRIs {<url>|<file>}+", converter = ArgumentToIRIConverter.class) - protected List<String> inputIRIs = new LinkedList<String>(); - - @Parameter(names = { "-e", "--extractors" }, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle") - private List<String> extractors = new LinkedList<String>(); - - @Parameter(names = { "-f", "--format" }, description = "the output format") - private String format = FORMATS.get(DEFAULT_FORMAT_INDEX); - - @Parameter( - names = { "-l", "--log" }, - description = "Produce log within a file.", - converter = FileConverter.class - ) - private File logFile = null; - - @Parameter(names = { "-s", "--stats" }, description = "Print out extraction statistics.") - private boolean statistics; - - @Parameter(names = { "-t", "--notrivial" }, description = "Filter trivial statements (e.g. CSS related ones).") - private boolean noTrivial; - - @Parameter(names = { "-p", "--pedantic" }, description = "Validate and fixes HTML content detecting commons issues.") - private boolean pedantic; - - @Parameter(names = { "-n", "--nesting" }, description = "Disable production of nesting triples.") - private boolean nestingDisabled; - - @Parameter(names = { "-d", "--defaultns" }, description = "Override the default namespace used to produce statements.") - private String defaultns; - - // non parameters - - private TripleHandler tripleHandler; - - private ReportingTripleHandler reportingTripleHandler; - - private BenchmarkTripleHandler benchmarkTripleHandler; - - private Any23 any23; - - private ExtractionParameters extractionParameters; - - protected void configure() { - try { - tripleHandler = WriterFactoryRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream); - } catch (Exception e) { - throw new NullPointerException( - format("Invalid output format '%s', admitted values: %s", - format, - FORMATS - ) - ); - } - - if (logFile != null) { - try { - tripleHandler = new LoggingTripleHandler(tripleHandler, new PrintWriter(logFile)); - } catch (FileNotFoundException fnfe) { - throw new IllegalArgumentException( format("Can not write to log file [%s]", logFile), fnfe ); - } - } - - if (statistics) { - benchmarkTripleHandler = new BenchmarkTripleHandler(tripleHandler); - tripleHandler = benchmarkTripleHandler; - } - - if (noTrivial) { - tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(tripleHandler), - true // suppress stylesheet triples. - ); - } - - reportingTripleHandler = new ReportingTripleHandler(tripleHandler); - - final Configuration configuration = DefaultConfiguration.singleton(); - extractionParameters = - pedantic - ? - new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled) - : - new ExtractionParameters(configuration, ValidationMode.None , nestingDisabled); - if (defaultns != null) { - extractionParameters.setProperty(ExtractionParameters.EXTRACTION_CONTEXT_IRI_PROPERTY, - defaultns); - } - - any23 = (extractors.isEmpty()) ? new Any23() - : new Any23(extractors.toArray(new String[extractors.size()])); - any23.setHTTPUserAgent(Any23.DEFAULT_HTTP_CLIENT_USER_AGENT + "/" + Any23.VERSION); - } - - protected String printReports() { - final StringBuilder sb = new StringBuilder(); - if (benchmarkTripleHandler != null) sb.append( benchmarkTripleHandler.report() ).append('\n'); - if (reportingTripleHandler != null) sb.append( reportingTripleHandler.printReport() ).append('\n'); - return sb.toString(); - } - - protected void performExtraction(DocumentSource documentSource) throws Exception { - if (!any23.extract(extractionParameters, documentSource, tripleHandler).hasMatchingExtractors()) { - throw new IllegalStateException(format("No suitable extractors found for source %s", documentSource)); - } - } - - protected void close() { - if (tripleHandler != null) { - try { - tripleHandler.close(); - } catch (TripleHandlerException the) { - throw new RuntimeException("Error while closing TripleHandler", the); - } - } - - if (outputStream != null && outputStream != System.out) { // TODO: low - find better solution to avoid closing system out. - outputStream.close(); - } - } - - public void run() throws Exception { - if (inputIRIs.isEmpty()) { - throw new IllegalArgumentException("Expected at least 1 argument."); - } - - configure(); - - // perform conversions - - try { - final long start = System.currentTimeMillis(); - for (String inputIRI : inputIRIs) { - DocumentSource source = any23.createDocumentSource(inputIRI); - - performExtraction( source ); - } - final long elapsed = System.currentTimeMillis() - start; - - if (benchmarkTripleHandler != null) { - System.err.println(benchmarkTripleHandler.report()); - } - - logger.info("Extractors used: " + reportingTripleHandler.getExtractorNames()); - logger.info(reportingTripleHandler.getTotalTriples() + " triples, " + elapsed + "ms"); - } finally { - close(); - } - } - - public static final class ArgumentToIRIConverter implements IStringConverter<String> { - - @Override - public String convert(String uri) { - uri = uri.trim(); - if (uri.toLowerCase().startsWith("http:") || uri.toLowerCase().startsWith("https:")) { - try { - return new URL(uri).toString(); - } catch (MalformedURLException murle) { - throw new ParameterException(format("Invalid IRI: '%s': %s", uri, murle.getMessage())); - } - } - - final File f = new File(uri); - if (!f.exists()) { - throw new ParameterException(format("No such file: [%s]", f.getAbsolutePath())); - } - if (f.isDirectory()) { - throw new ParameterException(format("Found a directory: [%s]", f.getAbsolutePath())); - } - return f.toURI().toString(); - } - - } - - public static final class PrintStreamConverter implements IStringConverter<PrintStream> { - - @Override - public PrintStream convert( String value ) { - final File file = new File(value); - try { - return new PrintStream(file); - } catch (FileNotFoundException fnfe) { - throw new ParameterException(format("Cannot open file '%s': %s", file, fnfe.getMessage())); - } - } - - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/ToolRunner.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/ToolRunner.java b/core/src/main/java/org/apache/any23/cli/ToolRunner.java deleted file mode 100644 index 90daeb3..0000000 --- a/core/src/main/java/org/apache/any23/cli/ToolRunner.java +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.converters.FileConverter; -import org.apache.any23.Any23; -import org.apache.any23.plugin.Any23PluginManager; -import org.apache.any23.util.LogUtils; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintStream; -import java.util.Date; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; - -import static java.lang.System.currentTimeMillis; -import static java.lang.System.exit; - -/** - * This class is the main class responsible to provide a uniform command-line - * access points to all the others tools like {@link Rover}. - * - * @see ExtractorDocumentation - * @see Rover - */ -public final class ToolRunner { - - public static final File DEFAULT_PLUGIN_DIR = new File(new File(System.getProperty("user.home")), ".any23/plugins"); - - private static final PrintStream infoStream = System.err; - - @Parameter( names = { "-h", "--help" }, description = "Display help information." ) - private boolean printHelp; - - @Parameter( names = { "-v", "--version" }, description = "Display version information." ) - private boolean showVersion; - - @Parameter( names = { "-X", "--verbose" }, description = "Produce execution verbose output." ) - private boolean verbose; - - @Parameter( - names = { "--plugins-dir" }, - description = "The Any23 plugins directory.", - converter = FileConverter.class - ) - private File pluginsDir = DEFAULT_PLUGIN_DIR; - - public static void main( String[] args ) throws Exception { - exit( new ToolRunner().execute( args ) ); - } - - public int execute(String...args) throws Exception { - JCommander commander = new JCommander(this); - commander.setProgramName(System.getProperty("app.name")); - - // TODO (low) : this dirty solution has been introduced because it is not possible to - // parse arguments ( commander.parse() ) twice. - final File pluginsDirOption; - try { - pluginsDirOption = parsePluginDirOption(args); - } catch (Exception e) { - System.err.println(e.getMessage()); - return 1; - } - if(pluginsDirOption != null) { - pluginsDir = pluginsDirOption; - } - - // add all plugins first - final Iterator<Tool> tools = getToolsInClasspath(); - while (tools.hasNext()) { - Tool tool = tools.next(); - commander.addCommand(tool); - } - - commander.parse(args); - - Map<String, JCommander> commands = commander.getCommands(); - String parsedCommand = commander.getParsedCommand(); - - if (printHelp) { - commander.usage(); - return 0; - } - - if (showVersion) { - printVersionInfo(); - return 0; - } - - if(parsedCommand == null) { - infoStream.println("A command must be specified."); - commander.usage(); - return 1; - } - - if (verbose) { - LogUtils.setVerboseLogging(); - } else { - LogUtils.setDefaultLogging(); - } - - long start = currentTimeMillis(); - int exit = 0; - - Throwable error = null; - - // execute the parsed command - infoStream.println(); - infoStream.println( "------------------------------------------------------------------------" ); - infoStream.printf( "Apache Any23 :: %s%n", parsedCommand ); - infoStream.println( "------------------------------------------------------------------------" ); - infoStream.println(); - - try { - Tool.class.cast( commands.get( parsedCommand ).getObjects().get( 0 ) ).run(); - } catch (Throwable t) { - exit = 1; - error = t; - } finally { - infoStream.println(); - infoStream.println( "------------------------------------------------------------------------" ); - infoStream.printf( "Apache Any23 %s%n", ( exit != 0 ) ? "FAILURE" : "SUCCESS" ); - - if (exit != 0) { - infoStream.println(); - - if (verbose) { - System.err.println( "Execution terminated with errors:" ); - error.printStackTrace(infoStream); - } else { - infoStream.printf( "Execution terminated with errors: %s%n", error.getMessage() ); - } - - infoStream.println(); - } - - infoStream.printf( "Total time: %ss%n", ( ( currentTimeMillis() - start ) / 1000 ) ); - infoStream.printf( "Finished at: %s%n", new Date() ); - - final Runtime runtime = Runtime.getRuntime(); - final int megaUnit = 1024 * 1024; - infoStream.printf( "Final Memory: %sM/%sM%n", ( runtime.totalMemory() - runtime.freeMemory() ) / megaUnit, - runtime.totalMemory() / megaUnit ); - - infoStream.println( "------------------------------------------------------------------------" ); - } - - return exit; - } - - Iterator<Tool> getToolsInClasspath() throws IOException { - final Any23PluginManager pluginManager = Any23PluginManager.getInstance(); - if (pluginsDir.exists() && pluginsDir.isDirectory()) { - pluginManager.loadJARDir(pluginsDir); - } - return pluginManager.getTools(); - } - - private static void printVersionInfo() { - Properties properties = new Properties(); - InputStream input = ToolRunner.class.getClassLoader().getResourceAsStream( "META-INF/maven/org.apache.any23/any23-core/pom.properties" ); - - if ( input != null ) { - try { - properties.load( input ); - } catch ( IOException e ) { - // ignore, just don't load the properties - } finally { - try { - input.close(); - } catch (IOException e) { - // close quietly - } - } - } - - infoStream.printf( "Apache Any23 %s%n", Any23.VERSION ); - infoStream.printf( "Java version: %s, vendor: %s%n", - System.getProperty( "java.version" ), - System.getProperty( "java.vendor" ) ); - infoStream.printf( "Java home: %s%n", System.getProperty( "java.home" ) ); - infoStream.printf( "Default locale: %s_%s, platform encoding: %s%n", - System.getProperty( "user.language" ), - System.getProperty( "user.country" ), - System.getProperty( "sun.jnu.encoding" ) ); - infoStream.printf( "OS name: \"%s\", version: \"%s\", arch: \"%s\", family: \"%s\"%n", - System.getProperty( "os.name" ), - System.getProperty( "os.version" ), - System.getProperty( "os.arch" ), - getOsFamily() ); - } - - private static final String getOsFamily() { - String osName = System.getProperty( "os.name" ).toLowerCase(); - String pathSep = System.getProperty( "path.separator" ); - - if (osName.contains("windows")) { - return "windows"; - } else if (osName.contains("os/2")) { - return "os/2"; - } else if (osName.contains("z/os") || osName.contains("os/390")) { - return "z/os"; - } else if (osName.contains("os/400")) { - return "os/400"; - } else if (pathSep.equals( ";" )) { - return "dos"; - } else if (osName.contains("mac")) { - if (osName.endsWith("x")) { - return "mac"; // MACOSX - } - return "unix"; - } else if (osName.contains("nonstop_kernel")) { - return "tandem"; - } else if (osName.contains("openvms")) { - return "openvms"; - } else if (pathSep.equals(":")) { - return "unix"; - } - - return "undefined"; - } - - private static File parsePluginDirOption(String[] args) { - int optionIndex = -1; - for(int i = 0; i < args.length; i++) { - if("--plugins-dir".equals(args[i])) { - optionIndex = i; - } - } - if(optionIndex == -1) return null; - - if(optionIndex == args.length - 1) { - throw new IllegalArgumentException("Missing argument for --plugins-dir option."); - } - final File pluginsDir = new File( args[optionIndex + 1] ); - if( ! pluginsDir.isDirectory() ) { - throw new IllegalArgumentException("Expected a directory for --plugins-dir option value."); - } - return pluginsDir; - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java deleted file mode 100644 index 7fde887..0000000 --- a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.apache.any23.vocab.RDFSchemaUtils; -import org.eclipse.rdf4j.rio.RDFFormat; -import org.eclipse.rdf4j.rio.RDFWriterRegistry; -import org.eclipse.rdf4j.rio.Rio; - -import com.beust.jcommander.IStringConverter; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.Parameters; - -/** - * Prints out the vocabulary <i>RDFSchema</i> as <i>NQuads</i>. - * - * @author Michele Mostarda ([email protected]) - */ -@Parameters(commandNames = { "vocab" }, commandDescription = "Prints out the RDF Schema of the vocabularies used by Any23.") -public class VocabPrinter implements Tool { - - @Parameter(names = { "-f", "--format" }, description = "Vocabulary output format", converter = RDFFormatConverter.class) - private RDFFormat format = RDFFormat.NQUADS; - - public void run() throws Exception { - RDFSchemaUtils.serializeVocabularies(format, System.out); - } - - public static final class RDFFormatConverter implements - IStringConverter<RDFFormat> { - - @Override - public RDFFormat convert(String value) { - return RDFWriterRegistry.getInstance().getFileFormatForMIMEType(value).orElseThrow(Rio.unsupportedFormat(value)); - } - - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/package-info.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/package-info.java b/core/src/main/java/org/apache/any23/cli/package-info.java deleted file mode 100644 index 40ae928..0000000 --- a/core/src/main/java/org/apache/any23/cli/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * This package contains some command-line utilities which allow users - * to use the main <i>Any23</i> features via <i>commandline</i> shell. - */ -package org.apache.any23.cli; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java b/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java deleted file mode 100644 index 98616ba..0000000 --- a/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.junit.Test; - -/** - * Test case for {@link ExtractorDocumentation} CLI. - * - * @author Michele Mostarda ([email protected]) - */ -public class ExtractorDocumentationTest extends ToolTestBase { - - private static final String TARGET_EXTRACTOR = "html-microdata"; - - public ExtractorDocumentationTest() { - super(ExtractorDocumentation.class); - } - - @Test - public void testList() throws Exception { - runToolCheckExit0("--list"); - } - - @Test - public void testAll() throws Exception { - runToolCheckExit0("--all"); - } - - //@Ignore("no available example") - @Test - public void testExampleInput() throws Exception { - runToolCheckExit0("-i", TARGET_EXTRACTOR); - } - - //@Ignore("no available example") - @Test - public void testExampleOutput() throws Exception { - runToolCheckExit0("-o", TARGET_EXTRACTOR); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java deleted file mode 100644 index a80e729..0000000 --- a/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.junit.Ignore; -import org.junit.Test; - -/** - * Test case for {@link MicrodataParser} CLI. - * - * @author Michele Mostarda ([email protected]) - */ -public class MicrodataParserTest extends ToolTestBase { - - public MicrodataParserTest() { - super(MicrodataParser.class); - } - - @Test - public void testRunOnFile() throws Exception { - runToolCheckExit0("file:"+copyResourceToTempFile("/microdata/microdata-nested.html").getAbsolutePath()); - } - - @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content") - @Test - public void testRunOnHTTPResource() throws Exception { - runToolCheckExit0("http://www.imdb.com/title/tt1375666/"); - } - - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java b/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java deleted file mode 100644 index 3894d32..0000000 --- a/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.junit.Test; - -/** - * Test case for {@link MimeDetector} CLI. - * - * @author Michele Mostarda ([email protected]) - */ -public class MimeDetectorTest extends ToolTestBase { - - public MimeDetectorTest() { - super(MimeDetector.class); - } - - @Test - public void testDetectURL() throws Exception { - assumeOnlineAllowed(); - runToolCheckExit0("http://twitter.com#micmos"); - } - - @Test - public void testDetectFile() throws Exception { - assumeOnlineAllowed(); - runToolCheckExit0("file://"+copyResourceToTempFile("/application/trix/test1.trx").getAbsolutePath()); - } - - @Test - public void testDetectInline() throws Exception { - assumeOnlineAllowed(); - runToolCheckExit0( new String[] {"inline://<http://s> <http://p> <http://o> ."} ); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java b/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java deleted file mode 100644 index bdee9ae..0000000 --- a/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.junit.Test; - -/** - * Test case for {@link PluginVerifier} CLI. - * - * @author Michele Mostarda ([email protected]) - */ -public class PluginVerifierTest extends ToolTestBase { - - public PluginVerifierTest() { - super(PluginVerifier.class); - } - - @Test - public void testRun() throws Exception { - runToolCheckExit0("."); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/RoverTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/RoverTest.java b/core/src/test/java/org/apache/any23/cli/RoverTest.java deleted file mode 100644 index 893220a..0000000 --- a/core/src/test/java/org/apache/any23/cli/RoverTest.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.apache.any23.rdf.RDFUtils; -import org.apache.any23.util.FileUtils; -import org.apache.any23.util.StringUtils; -import org.apache.any23.util.URLUtils; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Ignore; -import org.junit.Test; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.rio.RDFFormat; - -import java.io.File; -import java.util.Arrays; - -/** - * Test case for {@link Rover}. - * - * @author Michele Mostarda ([email protected]) - */ -@Ignore("Twitter microdata not parsing correctly right now") -public class RoverTest extends ToolTestBase { - - private static final String[] TARGET_FILES = { - "/microdata/microdata-nested.html", - "/org/apache/any23/extractor/csv/test-semicolon.csv" - }; - - private static final String[] TARGET_URLS = { - "http://twitter.com/micmos", - "http://twitter.com/dpalmisano" - }; - - public RoverTest() { - super(Rover.class); - } - - @Test - public void testRunMultiFiles() throws Exception { - - String[] copiedTargets = new String[TARGET_FILES.length]; - for(int i = 0; i < TARGET_FILES.length; i++) - { - File tempFile = copyResourceToTempFile(TARGET_FILES[i]); - - copiedTargets[i] = tempFile.getAbsolutePath(); - } - - runWithMultiSourcesAndVerify(copiedTargets, 0); - } - - @Test - public void testRunWithDefaultNS() throws Exception { - final String DEFAULT_GRAPH = "http://test/default/ns"; - final File outFile = File.createTempFile("rover-test", "out", tempDirectory); - final int exitCode = runTool( - String.format( - "-o %s -f nquads -p -n %s -d %s", - outFile.getAbsolutePath(), - copyResourceToTempFile("/cli/rover-test1.nq").getAbsolutePath(), - DEFAULT_GRAPH - ) - ); - - Assert.assertEquals("Unexpected exit code.", 0, exitCode); - Assert.assertTrue(outFile.exists()); - final String fileContent = FileUtils.readFileContent(outFile); - final String[] lines = fileContent.split("\\n"); - int graphCounter = 0; - for(String line : lines) { - if(line.contains(DEFAULT_GRAPH)) { - graphCounter++; - } - } - Assert.assertEquals(0, graphCounter); - } - - /* BEGIN: online tests. */ - - @Test - public void testRunMultiURLs() throws Exception { - // Assuming first accessibility to remote resources. - assumeOnlineAllowed(); - for(String targetURL : TARGET_URLS) { - Assume.assumeTrue( URLUtils.isOnline(targetURL) ); - } - - runWithMultiSourcesAndVerify(TARGET_URLS, 0); - } - - private void runWithMultiSourcesAndVerify(String[] targets, int expectedExit) throws Exception { - final File outFile = File.createTempFile("rover-test", "out", tempDirectory); - final File logFile = File.createTempFile("rover-test", "log", tempDirectory); - - final int exitCode = runTool( - String.format( - "-o %s -f nquads -l %s -p -n %s", - outFile.getAbsolutePath(), - logFile.getAbsolutePath(), - StringUtils.join(" ", targets) - ) - ); - Assert.assertEquals("Unexpected exit code.", expectedExit, exitCode); - - Assert.assertTrue(outFile.exists()); - Assert.assertTrue(logFile.exists()); - - final String logFileContent = FileUtils.readFileContent(logFile); - Assert.assertEquals( - "Unexpected number of log lines.", - targets.length + 1, // Header line. - StringUtils.countNL(logFileContent) - ); - - final String outNQuads = FileUtils.readFileContent(outFile); - final Statement[] statements = RDFUtils.parseRDF(RDFFormat.NQUADS, outNQuads); - System.out.println(Arrays.toString(statements)); - Assert.assertTrue("Unexpected number of statements.", statements.length > 9); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java b/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java deleted file mode 100644 index 881a782..0000000 --- a/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import junit.framework.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; - -import static org.junit.Assert.assertTrue; - -/** - * Test case for {@link ToolRunner}. - * - * @author Michele Mostarda ([email protected]) - */ -public class ToolRunnerTest { - - private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){{ - add(ExtractorDocumentation.class); - add(MicrodataParser.class); - add(MimeDetector.class); - add(PluginVerifier.class); - add(Rover.class); - add(VocabPrinter.class); - }}; - - @Test - public void testGetToolsInClasspath() throws IOException { - Iterator<Tool> tools = new ToolRunner().getToolsInClasspath(); - assertTrue("No core tools have been detected", tools.hasNext()); - while (tools.hasNext()) { - assertTrue("Some core tools have not been detected.", coreTools.contains(tools.next().getClass())); - } - } - - @Test - public void testGetVersion() throws Exception { - Assert.assertEquals(0, new ToolRunner().execute("-v") ); - } - - @Test - public void testGetHelp() throws Exception { - Assert.assertEquals(0, new ToolRunner().execute("-h") ); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/ToolTestBase.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/ToolTestBase.java b/core/src/test/java/org/apache/any23/cli/ToolTestBase.java deleted file mode 100644 index fef49cd..0000000 --- a/core/src/test/java/org/apache/any23/cli/ToolTestBase.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import com.beust.jcommander.Parameters; -import org.apache.any23.Any23OnlineTestBase; - -import java.util.Arrays; - -import static java.lang.String.format; -import static org.junit.Assert.assertEquals; - -/** - * Base class for <i>CLI</i> related tests. - * - * @author Michele Mostarda ([email protected]) - */ -// TODO: improve support for Tool testing, intercept i/o streams. -public abstract class ToolTestBase extends Any23OnlineTestBase { - - public static final String TOOL_RUN_METHOD = "run"; - - private final Class<? extends Tool> toolClazz; - - protected ToolTestBase(Class<? extends Tool> tool) { - if (tool == null) throw new NullPointerException(); - toolClazz = tool; - } - - /** - * Runs the underlying tool. - * - * @param args tool arguments. - * @return the tool exit code. - * @throws Exception - */ - protected int runTool(String... args) throws Exception { - final String commandName = toolClazz.getAnnotation( Parameters.class ).commandNames()[0]; - - final String[] enhancedArgs = new String[args.length + 1]; - enhancedArgs[0] = commandName; - System.arraycopy( args, 0, enhancedArgs, 1, args.length ); - - return new ToolRunner().execute( enhancedArgs ); - } - - /** - * Runs the underlying tool. - * - * @param args args tool arguments. - * @return the tool exit code. - * @throws Exception - */ - protected int runTool(String args) throws Exception { - return runTool(args.split(" ")); - } - - /** - * Runs the underlying tool and verify the exit code to <code>0</code>. - * - * @param args tool arguments. - * @throws Exception - */ - protected void runToolCheckExit0(String... args) throws Exception { - assertEquals( - format( - "Unexpected exit code for tool [%s] invoked with %s", - toolClazz.getSimpleName(), - Arrays.asList(args) - ), - 0, - runTool(args) - ); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java b/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java deleted file mode 100644 index 1c841dc..0000000 --- a/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.cli; - -import org.junit.Test; - -/** - * Test case for {@link VocabPrinter} CLI. - * - * @author Michele Mostarda ([email protected]) - */ -public class VocabPrinterTest extends ToolTestBase { - - public VocabPrinterTest() { - super(VocabPrinter.class); - } - - @Test - public void testRun() throws Exception { - runToolCheckExit0(); - } - -} http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/plugins/basic-crawler/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml index bffd7e2..d5a5b05 100644 --- a/plugins/basic-crawler/pom.xml +++ b/plugins/basic-crawler/pom.xml @@ -27,7 +27,6 @@ <groupId>org.apache.any23.plugins</groupId> <artifactId>apache-any23-basic-crawler</artifactId> - <version>1.0.6-SNAPSHOT</version> <name>Apache Any23 :: Plugins :: Basic Crawler</name> <description>Any23 plugin for crawling sites.</description> @@ -56,6 +55,21 @@ <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.any23</groupId> + <artifactId>apache-any23-cli</artifactId> + <version>2.0-SNAPSHOT</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.apache.any23</groupId> + <artifactId>apache-any23-cli</artifactId> + <version>2.0-SNAPSHOT</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> + <!-- Crawler4j --> <dependency> <groupId>edu.uci.ics</groupId> http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/plugins/html-scraper/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/html-scraper/pom.xml b/plugins/html-scraper/pom.xml index e04651a..359b08a 100644 --- a/plugins/html-scraper/pom.xml +++ b/plugins/html-scraper/pom.xml @@ -27,7 +27,6 @@ <groupId>org.apache.any23.plugins</groupId> <artifactId>apache-any23-html-scraper</artifactId> - <version>1.0.7-SNAPSHOT</version> <name>Apache Any23 :: Plugins :: HTML Scraper</name> <description>Any23 plugin for scraping HTML code.</description> http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/plugins/office-scraper/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/office-scraper/pom.xml b/plugins/office-scraper/pom.xml index db5e3ef..40476a3 100644 --- a/plugins/office-scraper/pom.xml +++ b/plugins/office-scraper/pom.xml @@ -27,7 +27,6 @@ <groupId>org.apache.any23.plugins</groupId> <artifactId>apache-any23-office-scraper</artifactId> - <version>1.0.6-SNAPSHOT</version> <name>Apache Any23 :: Plugins :: Office Scraper</name> <description>Any23 plugin for scraping metadata from MS Office related file formats.</description> http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 0a8c69b..db0448b 100644 --- a/pom.xml +++ b/pom.xml @@ -203,6 +203,7 @@ <module>mime</module> <module>encoding</module> <module>core</module> + <module>cli</module> <module>plugins/basic-crawler</module> <module>plugins/html-scraper</module> <module>plugins/office-scraper</module>
