This is an automated email from the ASF dual-hosted git repository.
epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new d3ea49d SOLR-15786 (#409)
d3ea49d is described below
commit d3ea49d6e7754c7c3f56e3a6ef7efbc2a94ee389
Author: Eric Pugh <[email protected]>
AuthorDate: Sat Dec 4 08:44:47 2021 -0500
SOLR-15786 (#409)
* Introduce the films example to the bin/solr start -e command
* Address SOLR-9456, Freebase is no longer available. Thanks Google ;-(
---
solr/CHANGES.txt | 2 +
solr/bin/solr | 1 +
solr/bin/solr.cmd | 3 +-
.../src/java/org/apache/solr/util/SolrCLI.java | 42 +++++++-
solr/example/README.md | 8 +-
solr/example/films/README.md | 29 ++---
solr/example/films/film_data_generator.py | 117 ---------------------
.../src/solr-control-script-reference.adoc | 10 ++
8 files changed, 72 insertions(+), 140 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9b04022..ac119ea 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -179,6 +179,8 @@ when told to. The admin UI now tells it to. (Nazerke
Seidan, David Smiley)
* SOLR-15427: Nested docs: [child limit=...] now defaults to -1 which is
interpreted as unlimited.
(David Smiley)
+* SOLR-15786: Add the "films" example to SolrCLI via -e films parameter. (Eric
Pugh)
+
Build
---------------------
diff --git a/solr/bin/solr b/solr/bin/solr
index 4d609ad..d0a64d7 100755
--- a/solr/bin/solr
+++ b/solr/bin/solr
@@ -392,6 +392,7 @@ function print_usage() {
echo " cloud: SolrCloud example"
echo " techproducts: Comprehensive example illustrating many of
Solr's core capabilities"
echo " schemaless: Schema-less example"
+ echo " films: Example of starting with _default configset and
adding explicit fields dynamically"
echo ""
echo " -a Additional parameters to pass to the JVM when
starting Solr, such as to setup"
echo " Java debug options. For example, to enable a Java
debugger to attach to the Solr JVM"
diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd
index 2526f28..03cbd2d 100755
--- a/solr/bin/solr.cmd
+++ b/solr/bin/solr.cmd
@@ -368,6 +368,7 @@ goto done
@echo cloud: SolrCloud example
@echo techproducts: Comprehensive example illustrating many of Solr's
core capabilities
@echo schemaless: Schema-less example
+@echo films: Example of starting with _default configset and
defining explicit fields dynamically
@echo.
@echo -a opts Additional parameters to pass to the JVM when starting
Solr, such as to setup
@echo Java debug options. For example, to enable a Java
debugger to attach to the Solr JVM
@@ -1910,7 +1911,7 @@ IF "!ZK_OP!"=="upconfig" (
)
goto done
-
+
:run_auth
IF "%1"=="-help" goto usage
IF "%1"=="-usage" goto usage
diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java
b/solr/core/src/java/org/apache/solr/util/SolrCLI.java
index bd83ffa..5128501 100755
--- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java
+++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java
@@ -2656,7 +2656,7 @@ public class SolrCLI implements CLIO {
.argName("NAME")
.hasArg()
.required(true)
- .desc("Name of the example to launch, one of: cloud,
techproducts, schemaless.")
+ .desc("Name of the example to launch, one of: cloud,
techproducts, schemaless, films.")
.longOpt("example")
.build(),
Option.builder("script")
@@ -2773,11 +2773,11 @@ public class SolrCLI implements CLIO {
String exampleType = cli.getOptionValue("example");
if ("cloud".equals(exampleType)) {
runCloudExample(cli);
- } else if ("techproducts".equals(exampleType) ||
"schemaless".equals(exampleType)) {
+ } else if ("techproducts".equals(exampleType) ||
"schemaless".equals(exampleType) || "films".equals(exampleType)) {
runExample(cli, exampleType);
} else {
throw new IllegalArgumentException("Unsupported example "+exampleType+
- "! Please choose one of: cloud, schemaless, or techproducts");
+ "! Please choose one of: cloud, schemaless, techproducts, or
films");
}
}
@@ -2859,8 +2859,42 @@ public class SolrCLI implements CLIO {
echo("exampledocs directory not found, skipping indexing step for
the techproducts example");
}
}
+ else if ("films".equals(exampleName) && !alreadyExists) {
+ echo("Adding name and initial_release_data fields to films schema
\"_default\"");
- echo("\nSolr "+exampleName+" example launched successfully. Direct your
Web browser to "+solrUrl+" to visit the Solr Admin UI");
+ HttpSolrClient solrClient = new
HttpSolrClient.Builder(solrUrl).build();
+ try {
+ SolrCLI.postJsonToSolr(solrClient, "/" + collectionName + "/schema",
"{\n" +
+ " \"add-field\" : {\n" +
+ " \"name\":\"name\",\n" +
+ " \"type\":\"text_general\",\n" +
+ " \"multiValued\":false,\n" +
+ " \"stored\":true\n" +
+ " },\n" +
+ " \"add-field\" : {\n" +
+ " \"name\":\"initial_release_date\",\n" +
+ " \"type\":\"pdate\",\n" +
+ " \"stored\":true\n" +
+ " }\n" +
+ " }");
+ } catch (Exception ex) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, ex);
+ }
+
+ File filmsJsonFile = new File(exampleDir, "films/films.json");
+ String updateUrl = String.format(Locale.ROOT, "%s/%s/update/json",
solrUrl, collectionName);
+ echo("Indexing films example docs from " +
filmsJsonFile.getAbsolutePath());
+ String currentPropVal = System.getProperty("url");
+ System.setProperty("url", updateUrl);
+ SimplePostTool.main(new String[] {filmsJsonFile.getAbsolutePath()});
+ if (currentPropVal != null) {
+ System.setProperty("url", currentPropVal); // reset
+ } else {
+ System.clearProperty("url");
+ }
+ }
+
+ echo("\nSolr "+exampleName+" example launched successfully. Direct
your Web browser to "+solrUrl+" to visit the Solr Admin UI");
}
protected void runCloudExample(CommandLine cli) throws Exception {
diff --git a/solr/example/README.md b/solr/example/README.md
index 491d88a..fa668ff 100644
--- a/solr/example/README.md
+++ b/solr/example/README.md
@@ -18,15 +18,16 @@
Solr example
------------
-This directory contains Solr examples. Each example is contained in a
+This directory contains Solr examples. Each example is contained in a
separate directory. To run a specific example, do:
```
bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
-
+
cloud : SolrCloud example
schemaless : Schema-less example (schema is inferred from data during
indexing)
techproducts : Kitchen sink example providing comprehensive examples of
Solr features
+ films : Example of starting with _default configset and defining
explicit fields dynamically.
```
For instance, if you want to run the SolrCloud example, do:
@@ -65,7 +66,7 @@ For more information about the "Solr Home" and Solr specific
configuration
For a Solr tutorial
- * https://solr.apache.org/resources.html
+ * https://solr.apache.org/resources.html
For a list of other tutorials and introductory articles.
@@ -91,4 +92,3 @@ be convenient when first getting started, but eventually you
will want to
log just to a file. To configure logging, edit the log4j2.xml file in
".../server/resources".
It is also possible to setup log4j or other popular logging frameworks.
-
diff --git a/solr/example/films/README.md b/solr/example/films/README.md
index d82322c..bd4bf82 100644
--- a/solr/example/films/README.md
+++ b/solr/example/films/README.md
@@ -1,7 +1,8 @@
-We have a movie data set in JSON, Solr XML, and CSV formats.
-All 3 formats contain the same data. You can use any one format to index
documents to Solr.
+We have a movie data set in JSON, Solr XML, and CSV formats. All 3 formats
contain the same data. You can use any one format to index documents to Solr.
-The data is fetched from Freebase and the data license is present in the
films-LICENSE.txt file.
+This example uses the _default configset that ships with Solr plus some custom
fields added via Schema API.
+
+The data is was fetched from Freebase and the data license is present in the
films-LICENSE.txt file. Freebase was shutdown in 2016 by Google.
This data consists of the following fields:
* "id" - unique identifier for the movie
@@ -10,6 +11,11 @@ This data consists of the following fields:
* "initial_release_date" - The earliest official initial film screening date
in any country
* "genre" - The genre(s) that the movie belongs to
+ The "name" and "initial_release_date" are created via the Schema API, and the
"genre" and "direct_by" fields
+ are created by the use of an Update Request Processor Change called
"add-unknown-fields-to-the-schema".
+
+ The below steps walk you through learning how to start up Solr, setup the
films collection yourself, and then load data. You can also run `bin/solr
start -e films` or `bin/solr start -c -e films` for SolrCloud version.
+
Steps:
* Start Solr:
```
@@ -17,13 +23,13 @@ This data consists of the following fields:
```
* Create a "films" core:
-
+
```
bin/solr create -c films
```
* Set the schema on a couple of fields that Solr would otherwise guess
differently (than we'd like) about:
-
+
```
curl http://localhost:8983/solr/films/schema -X POST -H
'Content-type:application/json' --data-binary '{
"add-field" : {
@@ -44,7 +50,7 @@ This data consists of the following fields:
- JSON: `bin/post -c films example/films/films.json`
- XML: `bin/post -c films example/films/films.xml`
- - CSV:
+ - CSV:
```
bin/post \
-c films \
@@ -53,7 +59,7 @@ This data consists of the following fields:
```
* Let's get searching!
- Search for 'Batman':
-
+
http://localhost:8983/solr/films/query?q=name:batman
* If you get an error about the name field not existing, you haven't
yet indexed the data
@@ -62,17 +68,12 @@ This data consists of the following fields:
It's easiest to simply reset the environment and try again, ensuring
that each step successfully executes.
- Show me all 'Super hero' movies:
-
+
http://localhost:8983/solr/films/query?q=*:*&fq=genre:%22Superhero%20movie%22
- Let's see the distribution of genres across all the movies. See the
facet section of the response for the counts:
-
-
http://localhost:8983/solr/films/query?q=*:*&facet=true&facet.field=genre
-Exploring the data further -
-
- * Increase the MAX_ITERATIONS value, put in your freebase API_KEY and run
the film_data_generator.py script using Python 3.
- Now re-index Solr with the new data.
+
http://localhost:8983/solr/films/query?q=*:*&facet=true&facet.field=genre
FAQ:
Why override the schema of the _name_ and _initial_release_date_ fields?
diff --git a/solr/example/films/film_data_generator.py
b/solr/example/films/film_data_generator.py
deleted file mode 100644
index f23224f..0000000
--- a/solr/example/films/film_data_generator.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This will generate a movie data set of 1100 records.
-These are the first 1100 movies which appear when querying the Freebase of
type '/film/film'.
-Here is the link to the freebase page -
https://www.freebase.com/film/film?schema=
-
-Usage - python3 film_data_generator.py
-"""
-
-import csv
-import copy
-import json
-import codecs
-import datetime
-import urllib.parse
-import urllib.request
-import xml.etree.cElementTree as ET
-from xml.dom import minidom
-
-MAX_ITERATIONS=10 #10 limits it to 1100 docs
-
-# You need an API Key by Google to run this
-API_KEY = '<insert your Google developer API key>'
-service_url = 'https://www.googleapis.com/freebase/v1/mqlread'
-query = [{
- "id": None,
- "name": None,
- "initial_release_date": None,
- "directed_by": [],
- "genre": [],
- "type": "/film/film",
- "initial_release_date>" : "2000"
-}]
-
-def gen_csv(filmlist):
- filmlistDup = copy.deepcopy(filmlist)
- #Convert multi-valued to % delimited string
- for film in filmlistDup:
- for key in film:
- if isinstance(film[key], list):
- film[key] = '|'.join(film[key])
- keys = ['name', 'directed_by', 'genre', 'type', 'id', 'initial_release_date']
- with open('films.csv', 'w', newline='', encoding='utf8') as csvfile:
- dict_writer = csv.DictWriter(csvfile, keys)
- dict_writer.writeheader()
- dict_writer.writerows(filmlistDup)
-
-def gen_json(filmlist):
- filmlistDup = copy.deepcopy(filmlist)
- with open('films.json', 'w') as jsonfile:
- jsonfile.write(json.dumps(filmlist, indent=2))
-
-def gen_xml(filmlist):
- root = ET.Element("add")
- for film in filmlist:
- doc = ET.SubElement(root, "doc")
- for key in film:
- if isinstance(film[key], list):
- for value in film[key]:
- field = ET.SubElement(doc, "field")
- field.set("name", key)
- field.text=value
- else:
- field = ET.SubElement(doc, "field")
- field.set("name", key)
- field.text=film[key]
- tree = ET.ElementTree(root)
- with open('films.xml', 'w') as f:
- f.write(
minidom.parseString(ET.tostring(tree.getroot(),'utf-8')).toprettyxml(indent="
") )
-
-def do_query(filmlist, cursor=""):
- params = {
- 'query': json.dumps(query),
- 'key': API_KEY,
- 'cursor': cursor
- }
- url = service_url + '?' + urllib.parse.urlencode(params)
- data = urllib.request.urlopen(url).read().decode('utf-8')
- response = json.loads(data)
- for item in response['result']:
- del item['type'] # It's always /film/film. No point of adding this.
- try:
- datetime.datetime.strptime(item['initial_release_date'], "%Y-%m-%d")
- except ValueError:
- #Date time not formatted properly. Keeping it simple by removing the
date field from that doc
- del item['initial_release_date']
- filmlist.append(item)
- return response.get("cursor")
-
-
-if __name__ == "__main__":
- filmlist = []
- cursor = do_query(filmlist)
- i=0
- while(cursor):
- cursor = do_query(filmlist, cursor)
- i = i+1
- if i==MAX_ITERATIONS:
- break
-
- gen_json(filmlist)
- gen_csv(filmlist)
- gen_xml(filmlist)
\ No newline at end of file
diff --git a/solr/solr-ref-guide/src/solr-control-script-reference.adoc
b/solr/solr-ref-guide/src/solr-control-script-reference.adoc
index db956c9..732b36e 100644
--- a/solr/solr-ref-guide/src/solr-control-script-reference.adoc
+++ b/solr/solr-ref-guide/src/solr-control-script-reference.adoc
@@ -111,6 +111,7 @@ The available options are:
* `cloud`
* `techproducts`
* `schemaless`
+* `films`
+
See the section <<Running with Example Configurations>> below for more details
on the example configurations.
+
@@ -321,12 +322,21 @@ When using this example, you can choose from any of the
available configsets fou
* *techproducts*: This example starts a single-node Solr instance with a
schema designed for the sample documents included in the
`$SOLR_HOME/example/exampledocs` directory.
+
The configset used can be found in
`$SOLR_HOME/server/solr/configsets/sample_techproducts_configs`.
++
+The data used can be found in `$SOLR_HOME/example/exampledocs/`.
* *schemaless*: This example starts a single-node Solr instance using a
managed schema, as described in the section <<schema-factory.adoc#,Schema
Factory Definition in SolrConfig>>, and provides a very minimal pre-defined
schema.
Solr will run in <<schemaless-mode.adoc#,Schemaless Mode>> with this
configuration, where Solr will create fields in the schema on the fly and will
guess field types used in incoming documents.
+
The configset used can be found in
`$SOLR_HOME/server/solr/configsets/_default`.
+* *films*: This example starts a single-node Solr instance using a managed
schema, as described in the section <<schema-factory.adoc#,Schema Factory
Definition in SolrConfig>>, and then uses the Schema API to create some custom
fields.
+Solr will run in <<schemaless-mode.adoc#,Schemaless Mode>> with this
configuration, where Solr will create fields in the schema on the fly and will
guess field types used in incoming documents as well. It then loads some
sample film data.
++
+The configset used can be found in
`$SOLR_HOME/server/solr/configsets/_default`.
++
+The film data used can be found in `$SOLR_HOME/example/films/films.json`.
+
[IMPORTANT]
====
The run in-foreground option (`-f`) is not compatible with the `-e` option
since the script needs to perform additional tasks after starting the Solr
server.