This is an automated email from the ASF dual-hosted git repository. epugh pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
commit dcc3ee1b9e3a0f8fb0dfb928b6b26e755f3cf938 Author: Eric Pugh <[email protected]> AuthorDate: Fri Jul 7 08:07:51 2023 -0400 SOLR-6994: Integrate post tool into bin/solr and bin/solr.cmd (#1634) We now support the Post tool on windows by reusing the SolrCLI infrastructure, instead of having a separate post.cmd. This commit does NOT remove the post.jar compilation, or the bin/post shell script. It does update the Ref Guide to refer to bin/solr post. In the future we will want to come to a resolution on the post.jar, and maybe rename bin/solr post to something else like bin/solr indexer? We have very basic bats and unit test now. --- solr/CHANGES.txt | 2 + solr/bin/post | 1 + solr/bin/solr | 9 +- solr/bin/solr.cmd | 1 + .../src/java/org/apache/solr/cli/DeleteTool.java | 2 +- .../src/java/org/apache/solr/cli/PostTool.java | 143 +++++++++++++++++ .../java/org/apache/solr/cli/RunExampleTool.java | 8 + .../java/org/apache/solr/cli/SimplePostTool.java | 174 ++++++++++++++------- .../core/src/java/org/apache/solr/cli/SolrCLI.java | 10 +- .../src/test/org/apache/solr/cli/PostToolTest.java | 55 +++++++ solr/packaging/test/test_extraction.bats | 110 +++++++++++++ solr/packaging/test/test_help.bats | 12 +- solr/packaging/test/test_post.bats | 173 ++++++++++++++++++++ .../modules/indexing-guide/pages/post-tool.adoc | 107 ++++++------- 14 files changed, 675 insertions(+), 132 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a588e42fd91..2974bf88f94 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -51,6 +51,8 @@ Other Changes * SOLR-16915: Lower the AffinityPlacementPlugin's default minimalFreeDiskGB to 5 GB (Houston Putman) +* SOLR-6994: Implement Windows version of bin/post via implementing bin/solr post command that works on Windows and Unix. Deprecate bin/post. (Eric Pugh, Will White) + ================== 9.3.0 ================== Upgrade Notes diff --git a/solr/bin/post b/solr/bin/post index c9ad8510802..e7ef443afd2 100755 --- a/solr/bin/post +++ b/solr/bin/post @@ -49,6 +49,7 @@ fi # test that Java exists and is executable on this server "$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; } +echo "The bin/post script is deprecated in favour of the bin/solr post command. Please update your scripts." # ===== post specific code diff --git a/solr/bin/solr b/solr/bin/solr index 79b1c6c8046..7aeae89b8af 100644 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -690,7 +690,6 @@ function print_usage() { elif [ "$CMD" == "package" ]; then echo "" run_tool package -solrUrl "http://fake.to.allow.help.to.run" "help" - fi } # end print_usage @@ -1477,12 +1476,6 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then exit $? fi - -if [[ "$SCRIPT_CMD" == "export" ]]; then - run_tool export "$@" - exit $? -fi - if [[ "$SCRIPT_CMD" == "package" ]]; then if [ $# -gt 0 ]; then while true; do @@ -1644,7 +1637,7 @@ fi # verify the command given is supported -if [ "$SCRIPT_CMD" != "stop" ] && [ "$SCRIPT_CMD" != "start" ] && [ "$SCRIPT_CMD" != "restart" ] && [ "$SCRIPT_CMD" != "status" ] && [ "$SCRIPT_CMD" != "assert" ]; then +if [ "$SCRIPT_CMD" != "stop" ] && [ "$SCRIPT_CMD" != "start" ] && [ "$SCRIPT_CMD" != "restart" ] && [ "$SCRIPT_CMD" != "status" ]; then # handoff this command to the SolrCLI and let it handle the option parsing and validation run_tool "$SCRIPT_CMD" "$@" exit $? diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index bfdeab06c92..7b3ecdd9281 100755 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -237,6 +237,7 @@ IF "%1"=="export" goto run_solrcli IF "%1"=="package" goto run_solrcli IF "%1"=="api" goto run_solrcli IF "%1"=="postlogs" goto run_solrcli +IF "%1"=="post" goto run_solrcli REM Only allow the command to be the first argument, assume start if not supplied IF "%1"=="start" goto set_script_cmd diff --git a/solr/core/src/java/org/apache/solr/cli/DeleteTool.java b/solr/core/src/java/org/apache/solr/cli/DeleteTool.java index d4d3d2e1943..78e36416d05 100644 --- a/solr/core/src/java/org/apache/solr/cli/DeleteTool.java +++ b/solr/core/src/java/org/apache/solr/cli/DeleteTool.java @@ -234,4 +234,4 @@ public class DeleteTool extends ToolBase { echoIfVerbose("\n", cli); } } -} // end DeleteTool class +} diff --git a/solr/core/src/java/org/apache/solr/cli/PostTool.java b/solr/core/src/java/org/apache/solr/cli/PostTool.java new file mode 100644 index 00000000000..5e6b5efeef0 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cli/PostTool.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cli; + +import java.io.OutputStream; +import java.io.PrintStream; +import java.net.URL; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; + +public class PostTool extends ToolBase { + + public PostTool() { + this(CLIO.getOutStream()); + } + + public PostTool(PrintStream stdout) { + super(stdout); + } + + @Override + public String getName() { + return "post"; + } + + @Override + public List<Option> getOptions() { + return List.of( + Option.builder("url") + .argName("url") + .hasArg() + .required(true) + .desc("<base Solr update URL>") + .build(), + Option.builder("commit").required(false).desc("Issue a commit at end of post").build(), + Option.builder("optimize").required(false).desc("Issue an optimize at end of post").build(), + Option.builder("mode") + .argName("mode") + .hasArg(true) + .required(false) + .desc("Files crawls files, web crawls website. default: files.") + .build(), + Option.builder("recursive") + .argName("recursive") + .hasArg(true) + .required(false) + .desc("For web crawl, how deep to go. default: 1") + .build(), + Option.builder("delay") + .argName("delay") + .hasArg(true) + .required(false) + .desc( + "If recursive then delay will be the wait time between posts. default: 10 for web, 0 for files") + .build(), + Option.builder("type") + .argName("content-type") + .hasArg(true) + .required(false) + .desc("default: application/json") + .build(), + Option.builder("filetypes") + .argName("<type>[,<type>,...]") + .hasArg(true) + .required(false) + .desc("default: " + SimplePostTool.DEFAULT_FILE_TYPES) + .build(), + Option.builder("params") + .argName("<key>=<value>[&<key>=<value>...]") + .hasArg(true) + .required(false) + .desc("values must be URL-encoded; these pass through to Solr update request") + .build(), + Option.builder("out") + .required(false) + .desc("sends Solr response outputs to console") + .build(), + Option.builder("format") + .required(false) + .desc( + "sends application/json content as Solr commands to /update instead of /update/json/docs") + .build()); + } + + @Override + public void runImpl(CommandLine cli) throws Exception { + SolrCLI.raiseLogLevelUnlessVerbose(cli); + + String url = cli.getOptionValue("url"); + URL solrUrl = new URL(url); + + String mode = SimplePostTool.DEFAULT_DATA_MODE; + if (cli.hasOption("mode")) { + mode = cli.getOptionValue("mode"); + } + boolean auto = true; + String type = null; + if (cli.hasOption("type")) { + type = cli.getOptionValue("type"); + } + String format = + cli.hasOption("format") + ? SimplePostTool.FORMAT_SOLR + : ""; // i.e not solr formatted json commands + + String fileTypes = SimplePostTool.DEFAULT_FILE_TYPES; + if (cli.hasOption("filetypes")) { + fileTypes = cli.getOptionValue("filetypes"); + } + + int defaultDelay = (mode.equals((SimplePostTool.DATA_MODE_WEB)) ? 10 : 0); + int delay = Integer.parseInt(cli.getOptionValue("delay", String.valueOf(defaultDelay))); + int recursive = Integer.parseInt(cli.getOptionValue("recursive", "1")); + + OutputStream out = cli.hasOption("out") ? CLIO.getOutStream() : null; + boolean commit = cli.hasOption("commit"); + boolean optimize = cli.hasOption("optimize"); + + String[] args = cli.getArgs(); + + SimplePostTool spt = + new SimplePostTool( + mode, solrUrl, auto, type, format, recursive, delay, fileTypes, out, commit, optimize, + args); + + spt.execute(); + } +} diff --git a/solr/core/src/java/org/apache/solr/cli/RunExampleTool.java b/solr/core/src/java/org/apache/solr/cli/RunExampleTool.java index a3615ef92c4..47dfaf30d6f 100644 --- a/solr/core/src/java/org/apache/solr/cli/RunExampleTool.java +++ b/solr/core/src/java/org/apache/solr/cli/RunExampleTool.java @@ -315,12 +315,20 @@ public class RunExampleTool extends ToolBase { String currentPropVal = System.getProperty("url"); System.setProperty("url", updateUrl); + String currentTypeVal = System.getProperty("type"); + // We assume that example docs are always in XML. + System.setProperty("type", "application/xml"); SimplePostTool.main(new String[] {exampledocsDir.getAbsolutePath() + "/*.xml"}); if (currentPropVal != null) { System.setProperty("url", currentPropVal); // reset } else { System.clearProperty("url"); } + if (currentTypeVal != null) { + System.setProperty("type", currentTypeVal); // reset + } else { + System.clearProperty("type"); + } } else { echo( "exampledocs directory not found, skipping indexing step for the techproducts example"); diff --git a/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java b/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java index 4ccbc871f33..11478366711 100644 --- a/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java +++ b/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java @@ -67,6 +67,7 @@ import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.apache.solr.common.util.Utils; import org.apache.solr.util.RTimer; +import org.apache.solr.util.SolrVersion; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -76,12 +77,14 @@ import org.xml.sax.SAXException; * A simple utility class for posting raw updates to a Solr server. It has a main method, so it can * be run on the command line. View this not as a best-practice code example, but as a standalone * example built with an explicit purpose of not having external jar dependencies. + * + * @deprecated This class is being replaced with bin/solr tool. Please use {@link PostTool} instead. */ +@Deprecated(since = "9.2") public class SimplePostTool { private static final String DEFAULT_POST_HOST = "localhost"; private static final String DEFAULT_POST_PORT = "8983"; - private static final String VERSION_OF_THIS_TOOL = - "5.0.0"; // TODO: hardcoded for now, but eventually to sync with actual Solr version + private static final String VERSION_OF_THIS_TOOL = SolrVersion.LATEST_STRING; private static final String DEFAULT_COMMIT = "yes"; private static final String DEFAULT_OPTIMIZE = "no"; @@ -90,8 +93,8 @@ public class SimplePostTool { private static final String DEFAULT_RECURSIVE = "0"; private static final int DEFAULT_WEB_DELAY = 10; private static final int MAX_WEB_DEPTH = 10; - private static final String DEFAULT_CONTENT_TYPE = "application/xml"; - private static final String DEFAULT_FILE_TYPES = + public static final String DEFAULT_CONTENT_TYPE = "application/json"; + public static final String DEFAULT_FILE_TYPES = "xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log"; private static final String BASIC_AUTH = "basicauth"; @@ -101,6 +104,8 @@ public class SimplePostTool { static final String DATA_MODE_WEB = "web"; static final String DEFAULT_DATA_MODE = DATA_MODE_FILES; + static final String FORMAT_SOLR = "solr"; + // Input args boolean auto = false; int recursive = 0; @@ -141,7 +146,7 @@ public class SimplePostTool { mimeMap.put("xml", "application/xml"); mimeMap.put("csv", "text/csv"); mimeMap.put("json", "application/json"); - mimeMap.put("jsonl", "application/json"); + mimeMap.put("jsonl", "application/jsonl"); mimeMap.put("pdf", "application/pdf"); mimeMap.put("rtf", "text/rtf"); mimeMap.put("html", "text/html"); @@ -198,8 +203,12 @@ public class SimplePostTool { return; } - if (commit) commit(); - if (optimize) optimize(); + if (commit) { + commit(); + } + if (optimize) { + optimize(); + } displayTiming((long) timer.getTime()); } @@ -230,7 +239,6 @@ public class SimplePostTool { } String params = System.getProperty("params", ""); - String host = System.getProperty("host", DEFAULT_POST_HOST); String port = System.getProperty("port", DEFAULT_POST_PORT); String core = System.getProperty("c"); @@ -244,6 +252,7 @@ public class SimplePostTool { if (urlStr == null) { urlStr = String.format(Locale.ROOT, "http://%s:%s/solr/%s/update", host, port, core); } + urlStr = SimplePostTool.appendParam(urlStr, params); URL url = new URL(urlStr); String user = null; @@ -252,7 +261,9 @@ public class SimplePostTool { } else if (System.getProperty(BASIC_AUTH) != null) { user = System.getProperty(BASIC_AUTH).trim().split(":")[0]; } - if (user != null) info("Basic Authentication enabled, user=" + user); + if (user != null) { + info("Basic Authentication enabled, user=" + user); + } boolean auto = isOn(System.getProperty("auto", DEFAULT_AUTO)); String type = System.getProperty("type"); @@ -263,7 +274,9 @@ public class SimplePostTool { try { recursive = Integer.parseInt(r); } catch (Exception e) { - if (isOn(r)) recursive = DATA_MODE_WEB.equals(mode) ? 1 : 999; + if (isOn(r)) { + recursive = DATA_MODE_WEB.equals(mode) ? 1 : 999; + } } // Delay int delay = DATA_MODE_WEB.equals(mode) ? DEFAULT_WEB_DELAY : 0; @@ -343,9 +356,12 @@ public class SimplePostTool { + solrUrl + (!auto ? " using content-type " + (type == null ? DEFAULT_CONTENT_TYPE : type) : "") + "..."); - if (auto) info("Entering auto mode. File endings considered are " + fileTypes); - if (recursive > 0) + if (auto) { + info("Entering auto mode. File endings considered are " + fileTypes); + } + if (recursive > 0) { info("Entering recursive mode, max depth=" + recursive + ", delay=" + delay + "s"); + } int numFilesPosted = postFiles(args, 0, out, type); info(numFilesPosted + " files indexed."); } @@ -382,9 +398,10 @@ public class SimplePostTool { recursive = MAX_WEB_DEPTH; warn("Too large recursion depth for web mode, limiting to " + MAX_WEB_DEPTH + "..."); } - if (delay < DEFAULT_WEB_DELAY) + if (delay < DEFAULT_WEB_DELAY) { warn( "Never crawl an external web site faster than every 10 seconds, your IP will probably be blocked"); + } info("Entering recursive mode, depth=" + recursive + ", delay=" + delay + "s"); } numPagesPosted = postWebPages(args, 0, out); @@ -425,8 +442,8 @@ public class SimplePostTool { + ")\n" + " -Dtype=<content-type> (default=" + DEFAULT_CONTENT_TYPE - + ")\n" + " -Dhost=<host> (default: " + + ")\n" + DEFAULT_POST_HOST + ")\n" + " -Dport=<port> (default: " @@ -545,7 +562,9 @@ public class SimplePostTool { * @return number of files posted total */ private int postDirectory(File dir, OutputStream out, String type) { - if (dir.isHidden() && !dir.getName().equals(".")) return (0); + if (dir.isHidden() && !dir.getName().equals(".")) { + return (0); + } info( "Indexing directory " + dir.getPath() @@ -577,7 +596,9 @@ public class SimplePostTool { int filesPosted = 0; for (File srcFile : files) { try { - if (!srcFile.isFile() || srcFile.isHidden()) continue; + if (!srcFile.isFile() || srcFile.isHidden()) { + continue; + } postFile(srcFile, out, type); Thread.sleep(delay * 1000L); filesPosted++; @@ -599,7 +620,9 @@ public class SimplePostTool { int handleGlob(File globFile, OutputStream out, String type) { int filesPosted = 0; File parent = globFile.getParentFile(); - if (parent == null) parent = new File("."); + if (parent == null) { + parent = new File("."); + } String fileGlob = globFile.getName(); GlobFileFilter ff = new GlobFileFilter(fileGlob, false); File[] fileList = parent.listFiles(ff); @@ -642,9 +665,15 @@ public class SimplePostTool { * @return the normalized URL string */ protected static String normalizeUrlEnding(String link) { - if (link.contains("#")) link = link.substring(0, link.indexOf('#')); - if (link.endsWith("?")) link = link.substring(0, link.length() - 1); - if (link.endsWith("/")) link = link.substring(0, link.length() - 1); + if (link.contains("#")) { + link = link.substring(0, link.indexOf('#')); + } + if (link.endsWith("?")) { + link = link.substring(0, link.length() - 1); + } + if (link.endsWith("/")) { + link = link.substring(0, link.length() - 1); + } return link; } @@ -754,7 +783,9 @@ public class SimplePostTool { if (!path.endsWith("/")) { int sep = path.lastIndexOf('/'); String file = path.substring(sep + 1); - if (file.contains(".") || file.contains("?")) path = path.substring(0, sep); + if (file.contains(".") || file.contains("?")) { + path = path.substring(0, sep); + } } link = baseUrl.getProtocol() + "://" + baseUrl.getAuthority() + path + "/" + link; } @@ -778,7 +809,9 @@ public class SimplePostTool { protected boolean typeSupported(String type) { for (Map.Entry<String, String> entry : mimeMap.entrySet()) { if (entry.getValue().equals(type)) { - if (fileTypes.contains(entry.getKey())) return true; + if (fileTypes.contains(entry.getKey())) { + return true; + } } } return false; @@ -829,7 +862,9 @@ public class SimplePostTool { public static String appendParam(String url, String param) { String[] pa = param.split("&"); for (String p : pa) { - if (p.trim().length() == 0) continue; + if (p.trim().length() == 0) { + continue; + } String[] kv = p.split("="); if (kv.length == 2) { url = url + (url.contains("?") ? "&" : "?") + kv[0] + "=" + kv[1]; @@ -853,7 +888,7 @@ public class SimplePostTool { // TODO: Add a flag that disables /update and sends all to /update/extract, to avoid CSV, // JSON, and XML files // TODO: from being interpreted as Solr documents internally - if (type.equals("application/json") && !"solr".equals(format)) { + if (type.equals("application/json") && !FORMAT_SOLR.equals(format)) { suffix = "/json/docs"; String urlStr = appendUrlPath(solrUrl, suffix).toString(); url = new URL(urlStr); @@ -865,18 +900,22 @@ public class SimplePostTool { // SolrCell suffix = "/extract"; String urlStr = appendUrlPath(solrUrl, suffix).toString(); - if (!urlStr.contains("resource.name")) + if (!urlStr.contains("resource.name")) { urlStr = appendParam( urlStr, "resource.name=" + URLEncoder.encode(file.getAbsolutePath(), UTF_8)); - if (!urlStr.contains("literal.id")) + } + if (!urlStr.contains("literal.id")) { urlStr = appendParam( urlStr, "literal.id=" + URLEncoder.encode(file.getAbsolutePath(), UTF_8)); + } url = new URL(urlStr); } } else { - if (type == null) type = DEFAULT_CONTENT_TYPE; + if (type == null) { + type = DEFAULT_CONTENT_TYPE; + } } info( "POSTing file " @@ -891,7 +930,9 @@ public class SimplePostTool { warn("Can't open/read file: " + file); } finally { try { - if (is != null) is.close(); + if (is != null) { + is.close(); + } } catch (IOException e) { fatal("IOException while closing file: " + e); } @@ -941,7 +982,9 @@ public class SimplePostTool { /** Performs a simple get on the given URL */ public void doGet(URL url) { try { - if (mockMode) return; + if (mockMode) { + return; + } HttpURLConnection urlc = (HttpURLConnection) url.openConnection(); basicAuth(urlc); urlc.connect(); @@ -960,30 +1003,35 @@ public class SimplePostTool { */ public boolean postData( InputStream data, Long length, OutputStream output, String type, URL url) { - if (mockMode) return true; + if (mockMode) { + return true; + } + boolean success = true; - if (type == null) type = DEFAULT_CONTENT_TYPE; - HttpURLConnection urlc = null; + if (type == null) { + type = DEFAULT_CONTENT_TYPE; + } + HttpURLConnection urlConnection = null; try { try { - urlc = (HttpURLConnection) url.openConnection(); + urlConnection = (HttpURLConnection) url.openConnection(); try { - urlc.setRequestMethod("POST"); + urlConnection.setRequestMethod("POST"); } catch (ProtocolException e) { fatal("Shouldn't happen: HttpURLConnection doesn't support POST??" + e); } - urlc.setDoOutput(true); - urlc.setDoInput(true); - urlc.setUseCaches(false); - urlc.setAllowUserInteraction(false); - urlc.setRequestProperty("Content-type", type); - basicAuth(urlc); + urlConnection.setDoOutput(true); + urlConnection.setDoInput(true); + urlConnection.setUseCaches(false); + urlConnection.setAllowUserInteraction(false); + urlConnection.setRequestProperty("Content-type", type); + basicAuth(urlConnection); if (null != length) { - urlc.setFixedLengthStreamingMode(length); + urlConnection.setFixedLengthStreamingMode(length); } else { - urlc.setChunkedStreamingMode(-1); // use JDK default chunkLen, 4k in Java 8. + urlConnection.setChunkedStreamingMode(-1); // use JDK default chunkLen, 4k in Java 8. } - urlc.connect(); + urlConnection.connect(); } catch (IOException e) { fatal("Connection error (is Solr running at " + solrUrl + " ?): " + e); success = false; @@ -991,15 +1039,15 @@ public class SimplePostTool { fatal("POST failed with error " + e.getMessage()); } - try (final OutputStream out = urlc.getOutputStream()) { + try (final OutputStream out = urlConnection.getOutputStream()) { pipe(data, out); } catch (IOException e) { fatal("IOException while posting data: " + e); } try { - success &= checkResponseCode(urlc); - try (final InputStream in = urlc.getInputStream()) { + success &= checkResponseCode(urlConnection); + try (final InputStream in = urlConnection.getInputStream()) { pipe(in, output); } } catch (IOException e) { @@ -1010,7 +1058,9 @@ public class SimplePostTool { "Looks like Solr is secured and would not let us in. Try with another user in '-u' parameter"); } } finally { - if (urlc != null) urlc.disconnect(); + if (urlConnection != null) { + urlConnection.disconnect(); + } } return success; } @@ -1093,15 +1143,22 @@ public class SimplePostTool { byte[] buf = new byte[1024]; int read = 0; while ((read = source.read(buf)) >= 0) { - if (null != dest) dest.write(buf, 0, read); + if (null != dest) { + dest.write(buf, 0, read); + } + } + if (null != dest) { + dest.flush(); } - if (null != dest) dest.flush(); } public FileFilter getFileFilterFromFileTypes(String fileTypes) { String glob; - if (fileTypes.equals("*")) glob = ".*"; - else glob = "^.*\\.(" + fileTypes.replace(",", "|") + ")$"; + if (fileTypes.equals("*")) { + glob = ".*"; + } else { + glob = "^.*\\.(" + fileTypes.replace(",", "|") + ")$"; + } return new GlobFileFilter(glob, true); } @@ -1132,7 +1189,9 @@ public class SimplePostTool { if (nodes.getLength() > 0) { for (int i = 0; i < nodes.getLength(); i++) { sb.append(nodes.item(i).getNodeValue()).append(' '); - if (!concatAll) break; + if (!concatAll) { + break; + } } return sb.toString().trim(); } else return ""; @@ -1308,11 +1367,12 @@ public class SimplePostTool { * @return a set of URIs parsed from the page */ protected Set<URI> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) { - Set<URI> l = new HashSet<>(); + Set<URI> linksFromPage = new HashSet<>(); URL url = null; try { ByteArrayOutputStream os = new ByteArrayOutputStream(); URL extractUrl = new URL(appendParam(postUrl.toString(), "extractOnly=true")); + extractUrl = new URL(appendParam(extractUrl.toString(), "wt=xml")); boolean success = postData(is, null, os, type, extractUrl); if (success) { Document d = makeDom(os.toByteArray()); @@ -1322,10 +1382,12 @@ public class SimplePostTool { for (int i = 0; i < links.getLength(); i++) { String link = links.item(i).getTextContent(); link = computeFullUrl(u, link); - if (link == null) continue; + if (link == null) { + continue; + } URI newUri = new URI(link); if (newUri.getAuthority() == null || !newUri.getAuthority().equals(u.getAuthority())) { - l.add(newUri); + linksFromPage.add(newUri); } } } @@ -1337,7 +1399,7 @@ public class SimplePostTool { throw new RuntimeException(e); } - return l; + return linksFromPage; } } diff --git a/solr/core/src/java/org/apache/solr/cli/SolrCLI.java b/solr/core/src/java/org/apache/solr/cli/SolrCLI.java index ce1ea16d564..a11fc7bdfa6 100755 --- a/solr/core/src/java/org/apache/solr/cli/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/cli/SolrCLI.java @@ -242,6 +242,7 @@ public class SolrCLI implements CLIO { else if ("package".equals(toolType)) return new PackageTool(); else if ("postlogs".equals(toolType)) return new PostLogsTool(); else if ("version".equals(toolType)) return new VersionTool(); + else if ("post".equals(toolType)) return new PostTool(); // If you add a built-in tool to this class, add it here to avoid // classpath scanning @@ -322,6 +323,7 @@ public class SolrCLI implements CLIO { try { cli = (new GnuParser()).parse(options, args); } catch (ParseException exp) { + // Check if we passed in a help argument with a non parsing set of arguments. boolean hasHelpArg = false; if (args != null) { for (String arg : args) { @@ -333,10 +335,12 @@ public class SolrCLI implements CLIO { } if (!hasHelpArg) { CLIO.err("Failed to parse command-line arguments due to: " + exp.getMessage()); + exit(1); + } else { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp(toolName, options); + exit(0); } - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp(toolName, options); - exit(1); } if (cli.hasOption("help")) { diff --git a/solr/core/src/test/org/apache/solr/cli/PostToolTest.java b/solr/core/src/test/org/apache/solr/cli/PostToolTest.java new file mode 100644 index 00000000000..681d788138c --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cli/PostToolTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cli; + +import static org.apache.solr.cli.SolrCLI.findTool; +import static org.apache.solr.cli.SolrCLI.parseCmdLine; + +import org.apache.commons.cli.CommandLine; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.junit.BeforeClass; +import org.junit.Test; + +public class PostToolTest extends SolrCloudTestCase { + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1) + .addConfig( + "config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) + .configure(); + } + + @Test + public void testBasicRun() throws Exception { + final String collection = "aliasedCollection"; + CollectionAdminRequest.createCollection(collection, "config", 1, 1) + .process(cluster.getSolrClient()); + + String[] args = {"post", "-url", "http://localhost:8983/solr/aliasedCollection", "blah.json"}; + assertEquals(0, runTool(args)); + } + + private int runTool(String[] args) throws Exception { + Tool tool = findTool(args); + assertTrue(tool instanceof PostTool); + CommandLine cli = parseCmdLine(tool.getName(), args, tool.getOptions()); + return tool.runTool(cli); + } +} diff --git a/solr/packaging/test/test_extraction.bats b/solr/packaging/test/test_extraction.bats new file mode 100644 index 00000000000..875f4ca2e59 --- /dev/null +++ b/solr/packaging/test/test_extraction.bats @@ -0,0 +1,110 @@ +#!/usr/bin/env bats + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +load bats_helper + +setup() { + common_clean_setup +} + +teardown() { + # save a snapshot of SOLR_HOME for failed tests + save_home_on_failure + + delete_all_collections + SOLR_STOP_WAIT=1 solr stop -all >/dev/null 2>&1 +} + +@test "using curl to extract a single pdf file" { + + # Disable security manager to allow extraction + # This appears to be a bug. + export SOLR_SECURITY_MANAGER_ENABLED=false + solr start -c -Dsolr.modules=extraction + + + solr create_collection -c gettingstarted -d _default + + + curl -X POST -H 'Content-type:application/json' -d '{ + "add-requesthandler": { + "name": "/update/extract", + "class": "solr.extraction.ExtractingRequestHandler", + "defaults":{ "lowernames": "true", "captureAttr":"true"} + } + }' 'http://localhost:8983/solr/gettingstarted/config' + + curl 'http://localhost:8983/solr/gettingstarted/update/extract?literal.id=doc1&commit=true' -F "myfile=@${SOLR_TIP}/example/exampledocs/solr-word.pdf" + + run curl 'http://localhost:8983/solr/gettingstarted/select?q=id:doc1' + assert_output --partial '"numFound":1' +} + +@test "using the bin/solr post tool to extract content from pdf" { + + # Disable security manager to allow extraction + # This appears to be a bug. + export SOLR_SECURITY_MANAGER_ENABLED=false + solr start -c -Dsolr.modules=extraction + + solr create_collection -c content_extraction -d _default + + curl -X POST -H 'Content-type:application/json' -d '{ + "add-requesthandler": { + "name": "/update/extract", + "class": "solr.extraction.ExtractingRequestHandler", + "defaults":{ "lowernames": "true", "captureAttr":"true"} + } + }' 'http://localhost:8983/solr/content_extraction/config' + + # We filter to pdf to invoke the Extract handler. + run solr post -filetypes pdf -commit -url http://localhost:8983/solr/content_extraction/update ${SOLR_TIP}/example/exampledocs + + assert_output --partial '1 files indexed.' + refute_output --partial 'ERROR' + + run curl 'http://localhost:8983/solr/content_extraction/select?q=*:*' + assert_output --partial '"numFound":1' +} + +@test "using the bin/solr post tool to crawl web site" { + + # Disable security manager to allow extraction + # This appears to be a bug. + export SOLR_SECURITY_MANAGER_ENABLED=false + solr start -c -Dsolr.modules=extraction + + solr create_collection -c website_extraction -d _default + + curl -X POST -H 'Content-type:application/json' -d '{ + "add-requesthandler": { + "name": "/update/extract", + "class": "solr.extraction.ExtractingRequestHandler", + "defaults":{ "lowernames": "true", "captureAttr":"true"} + } + }' 'http://localhost:8983/solr/website_extraction/config' + + # Change to -recursive 1 to crawl multiple pages, but may be too slow. + run solr post -mode web -commit -url http://localhost:8983/solr/website_extraction/update -recursive 0 -delay 1 https://solr.apache.org/ + + assert_output --partial 'POSTed web resource https://solr.apache.org (depth: 0)' + refute_output --partial 'ERROR' + + run curl 'http://localhost:8983/solr/website_extraction/select?q=*:*' + assert_output --partial '"numFound":1' +} diff --git a/solr/packaging/test/test_help.bats b/solr/packaging/test/test_help.bats index 0bdcb17566f..ed7a91b78b4 100644 --- a/solr/packaging/test/test_help.bats +++ b/solr/packaging/test/test_help.bats @@ -90,5 +90,13 @@ setup() { } @test "assert help flag prints help" { - skip "Currently the assert -help flag doesn't return nice help text!" -} \ No newline at end of file + run solr assert -help + assert_output --partial 'usage: assert' + refute_output --partial 'ERROR' +} + +@test "post help flag prints help" { + run solr post -help + assert_output --partial 'usage: post' + refute_output --partial 'ERROR' +} diff --git a/solr/packaging/test/test_post.bats b/solr/packaging/test/test_post.bats new file mode 100644 index 00000000000..2b8c18bfe46 --- /dev/null +++ b/solr/packaging/test/test_post.bats @@ -0,0 +1,173 @@ +#!/usr/bin/env bats + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load bats_helper + +setup_file() { + common_clean_setup + solr start -c -Dsolr.modules=extraction +} + +teardown_file() { + common_setup + solr stop -all +} + +setup() { + common_setup +} + +teardown() { + # save a snapshot of SOLR_HOME for failed tests + save_home_on_failure +} + +@test "Check help command" { + + run solr post + assert_output --partial 'Failed to parse command-line arguments due to: Missing required option: url' + + run solr post -h + assert_output --partial 'usage: post' + refute_output --partial 'ERROR' + + run solr post -help + assert_output --partial 'usage: post' + refute_output --partial 'ERROR' + +} + + +@test "basic post with a type specified" { + + run solr create_collection -c monitors -d _default + assert_output --partial "Created collection 'monitors'" + + run solr post -type application/xml -url http://localhost:8983/solr/monitors/update ${SOLR_TIP}/example/exampledocs/monitor.xml + + assert_output --partial '1 files indexed.' + refute_output --partial 'ERROR' +} + +@test "basic post WITHOUT a type specified" { + + solr create_collection -c monitors_no_type -d _default + + run solr post -url http://localhost:8983/solr/monitors_no_type/update -commit ${SOLR_TIP}/example/exampledocs/monitor.xml + + assert_output --partial '1 files indexed.' + refute_output --partial 'ERROR' + run curl 'http://localhost:8983/solr/monitors_no_type/select?q=*:*' + assert_output --partial '"numFound":1' + + solr create_collection -c books_no_type -d _default + + run solr post -url http://localhost:8983/solr/books_no_type/update -commit ${SOLR_TIP}/example/exampledocs/books.json + + assert_output --partial '1 files indexed.' + refute_output --partial 'ERROR' + run curl 'http://localhost:8983/solr/books_no_type/select?q=*:*' + assert_output --partial '"numFound":4' + + solr create_collection -c books_csv_no_type -d _default + + run solr post -url http://localhost:8983/solr/books_csv_no_type/update -commit ${SOLR_TIP}/example/exampledocs/books.csv + + assert_output --partial '1 files indexed.' + refute_output --partial 'ERROR' + run curl 'http://localhost:8983/solr/books_csv_no_type/select?q=*:*' + assert_output --partial '"numFound":10' +} + +@test "crawling a directory" { + + solr create_collection -c mixed_content -d _default + + # We filter to xml,json,and csv as we don't want to invoke the Extract handler. + run solr post -filetypes xml,json,csv -url http://localhost:8983/solr/mixed_content/update -commit ${SOLR_TIP}/example/exampledocs + + assert_output --partial '16 files indexed.' + refute_output --partial 'ERROR' + run curl 'http://localhost:8983/solr/mixed_content/select?q=*:*' + assert_output --partial '"numFound":46' +} + +# this test doesn't complete due to issues in posting to the /extract handler +@test "crawling a web site" { + solr create_collection -c webcrawl -d _default + + curl -X POST -H 'Content-type:application/json' -d '{ + "add-requesthandler": { + "name": "/update/extract", + "class": "solr.extraction.ExtractingRequestHandler", + "defaults":{ "lowernames": "true", "captureAttr":"true"} + } + }' 'http://localhost:8983/solr/webcrawl/config' + + run solr post -mode web -url http://localhost:8983/webcrawl/update -recursive 1 -delay 1 https://solr.apache.org + assert_output --partial 'Entering crawl at level 0' +} + +@test "commit and optimize and delete" { + + run solr create_collection -c monitors2 -d _default + assert_output --partial "Created collection 'monitors2'" + + run solr post -url http://localhost:8983/solr/monitors2/update -type application/xml -commit -optimize ${SOLR_TIP}/example/exampledocs/monitor.xml + + assert_output --partial '1 files indexed.' + assert_output --partial 'COMMITting Solr index' + assert_output --partial 'Performing an OPTIMIZE' + refute_output --partial 'ERROR' + +} + +@test "args mode" { + + run solr create_collection -c test_args -d _default + assert_output --partial "Created collection 'test_args'" + + run solr post -url http://localhost:8983/solr/test_args/update -mode args -type application/xml -out -commit "<delete><query>*:*</query></delete>" + assert_output --partial '<int name="status">0</int>' + + # confirm default type + run solr post -url http://localhost:8983/solr/test_args/update -mode args -out -commit "{'delete': {'query': '*:*'}}" + assert_output --partial '"status":0' + + # confirm we don't get back output without -out + run solr post -url http://localhost:8983/solr/test_args/update -mode args -commit "{'delete': {'query': '*:*'}}" + refute_output --partial '"status":0' + + run solr post -url http://localhost:8983/solr/test_args/update -mode args -commit -type text/csv -out $'id,value\nROW1,0.47' + assert_output --partial '"status":0' + run curl 'http://localhost:8983/solr/test_args/select?q=id:ROW1' + assert_output --partial '"numFound":1' +} + +# function used because run echo | solr ends up being (run echo) | solr and we loose the output capture. +capture_echo_to_solr() { + echo "{'commit': {}}" | solr post -url http://localhost:8983/solr/test_stdin/update -mode stdin -type application/json -out +} + +@test "stdin mode" { + + run solr create_collection -c test_stdin -d _default + assert_output --partial "Created collection 'test_stdin'" + + run capture_echo_to_solr + assert_output --partial '"status":0' +} diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc index f2d303b5abc..28a47d28bbc 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc @@ -18,106 +18,114 @@ Solr includes a simple command line tool for POSTing various types of content to a Solr server. -The tool is `bin/post`. -The bin/post tool is a Unix shell script; for Windows (non-Cygwin) usage, see the section <<Post Tool Windows Support>> below. +The tool is `bin/solr post`. +The bin/solr post tool is a Unix shell script; for Windows (non-Cygwin) usage, see the section <<Post Tool Windows Support>> below. NOTE: This tool is meant for use by new users exploring Solr's capabilities, and is not intended as a robust solution to be used for indexing documents into production systems. +NOTE: You may be familiar with SimplePostTool and the bin/post Unix shell script. While this is still available, it is deprecated and will be removed in Solr 10. + + To run it, open a window and enter: -[source,bash] +[,console] ---- -bin/post -c gettingstarted example/films/films.json +$ bin/solr post -url http://localhost:8983/gettingstarted/update example/films/films.json ---- This will contact the server at `localhost:8983`. -Specifying the `collection/core name` is *mandatory*. -The `-help` (or simply `-h`) option will output information on its usage (i.e., `bin/post -help)`. +The `-help` (or simply `-h`) option will output information on its usage (i.e., `bin/solr post -help)`. -== Using the bin/post Tool +== Using the bin/solr post Tool -Specifying either the `collection/core name` or the full update `url` is *mandatory* when using `bin/post`. +Specifying the full update `url` is *mandatory* when using `bin/solr post`. -The basic usage of `bin/post` is: +The basic usage of `bin/solr post` is: [source,plain] ---- -$ bin/post -h -Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]> +$ bin/solr post -h +Usage: post -url http://localhost:8983/gettingstarted/update [OPTIONS] <files|directories|urls|-d ["...",...]> or post -help - collection name defaults to DEFAULT_SOLR_COLLECTION if not specified - OPTIONS ======= Solr options: - -url <base Solr update URL> (overrides collection, host, and port) - -host <host> (default: localhost) - -p or -port <port> (default: 8983) - -commit yes|no (default: yes) + -url <base Solr update URL> + -commit issue a commit -u or -user <user:pass> (sets BasicAuth credentials) Web crawl options: -recursive <depth> (default: 1) -delay <seconds> (default: 10) - Directory crawl options: -delay <seconds> (default: 0) stdin/args options: - -type <content/type> (default: application/xml) + -type <content/type> (default: application/json) Other options: -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log) -params "<key>=<value>[&<key>=<value>...]" (values must be URL-encoded; these pass through to Solr update request) - -out yes|no (default: no; yes outputs Solr response to console) -... + -out output the Solr responses to console + -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs + +Examples: + +* JSON file: bin/solr post -url http://localhost:8983/wizbang/update events.json +* XML files: bin/solr post -url http://localhost:8983/records/update article*.xml +* CSV file: bin/solr post -url http://localhost:8983/signals/update LATEST-signals.csv +* Directory of files: bin/solr post -filetypes xml,json,csv -url http://localhost:8983/myfiles/update ~/Documents +* Web crawl: bin/solr post -mode web -url http://localhost:8983/gettingstarted/update -recursive 1 -delay 1 https://solr.apache.org/ +* Standard input (stdin): echo '{commit: {}}' | bin/solr post -mode stdin -url http://localhost:8983/my_collection/update -out +* Data as string: bin/solr post -url http://localhost:8983/signals/update -mode args -type text/csv -out $'id,value\n1,0.47' + ---- -== Examples Using bin/post +== Examples Using bin/solr post -There are several ways to use `bin/post`. +There are several ways to use `bin/solr post`. This section presents several examples. === Indexing XML -Add all documents with file extension `.xml` to collection or core named `gettingstarted`. +Add all documents with file extension `.xml` to the collection named `gettingstarted`. [source,bash] ---- -bin/post -c gettingstarted *.xml +bin/solr post -url http://localhost:8983/solr/gettingstarted/update *.xml ---- -Add all documents with file extension `.xml` to the `gettingstarted` collection/core on Solr running on port `8984`. +Add all documents with file extension `.xml` to the `gettingstarted` collection on Solr running on port `8984`. [source,bash] ---- -bin/post -c gettingstarted -p 8984 *.xml +bin/solr post -url http://localhost:8984/solr/gettingstarted/update *.xml ---- Send XML arguments to delete a document from `gettingstarted`. [source,bash] ---- -bin/post -c gettingstarted -d '<delete><id>42</id></delete>' +bin/solr post -url http://localhost:8983/solr/gettingstarted/update -mode args -type application/xml '<delete><id>42</id></delete>' ---- -=== Indexing CSV +=== Indexing CSV and JSON -Index all CSV files into `gettingstarted`: +Index all CSV and JSON files into `gettingstarted` from current directory: [source,bash] ---- -bin/post -c gettingstarted *.csv +bin/solr post -url http://localhost:8983/solr/gettingstarted/update -filetypes json,csv . ---- Index a tab-separated file into `gettingstarted`: [source,bash] ---- -bin/post -c signals -params "separator=%09" -type text/csv data.tsv +bin/solr post -url http://localhost:8984/solr/signals/update -params "separator=%09" -type text/csv data.tsv ---- The content type (`-type`) parameter is required to treat the file as the proper type, otherwise it will be ignored and a WARNING logged as it does not know what type of content a .tsv file is. @@ -129,7 +137,7 @@ Index all JSON files into `gettingstarted`. [source,bash] ---- -bin/post -c gettingstarted *.json +bin/solr post -url http://localhost:8983/solr/gettingstarted/update *.json ---- === Indexing Rich Documents (PDF, Word, HTML, etc.) @@ -138,21 +146,21 @@ Index a PDF file into `gettingstarted`. [source,bash] ---- -bin/post -c gettingstarted a.pdf +bin/solr post -url http://localhost:8983/solr/gettingstarted/update a.pdf ---- Automatically detect content types in a folder, and recursively scan it for documents for indexing into `gettingstarted`. [source,bash] ---- -bin/post -c gettingstarted afolder/ +bin/solr post -url http://localhost:8983/solr/gettingstarted/update afolder/ ---- Automatically detect content types in a folder, but limit it to PPT and HTML files and index into `gettingstarted`. [source,bash] ---- -bin/post -c gettingstarted -filetypes ppt,html afolder/ +bin/solr post -url http://localhost:8983/solr/gettingstarted/update -filetypes ppt,html afolder/ ---- === Indexing to a Password Protected Solr (Basic Auth) @@ -161,30 +169,5 @@ Index a PDF as the user "solr" with password "SolrRocks": [source,bash] ---- -bin/post -u solr:SolrRocks -c gettingstarted a.pdf +bin/solr post -u solr:SolrRocks -url http://localhost:8983/solr/gettingstarted/update a.pdf ---- - -== Post Tool Windows Support - -`bin/post` is a Unix shell script and as such cannot be used directly on Windows. -However it delegates its work to a cross-platform capable Java program called "SimplePostTool" or `post.jar`, that can be used in Windows environments. - -The argument syntax differs significantly from `bin/post`, so your first step should be to print the SimplePostTool help text. - -[source,plain] ----- -$ java -jar example\exampledocs\post.jar -h ----- - -This command prints information about all the arguments and System properties available to SimplePostTool users. -There are also examples showing how to post files, crawl a website or file system folder, and send update commands (deletes, etc.) directly to Solr. - -Most usage involves passing both Java System properties and program arguments on the command line. Consider the example below: - -[source,plain] ----- -$ java -jar -Dc=gettingstarted -Dauto example\exampledocs\post.jar example\exampledocs\* ----- - -This indexes the contents of the `exampledocs` directory into a collection called `gettingstarted`. -The `-Dauto` System property governs whether or not Solr sends the document type to Solr during extraction.
