[solr] branch main updated: SOLR-15786 (#409)

epugh Sat, 04 Dec 2021 05:44:56 -0800

This is an automated email from the ASF dual-hosted git repository.

epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git



The following commit(s) were added to refs/heads/main by this push:
     new d3ea49d  SOLR-15786 (#409)
d3ea49d is described below

commit d3ea49d6e7754c7c3f56e3a6ef7efbc2a94ee389
Author: Eric Pugh <[email protected]>
AuthorDate: Sat Dec 4 08:44:47 2021 -0500

    SOLR-15786 (#409)
    
    * Introduce the films example to the bin/solr start -e command
    
    * Address SOLR-9456, Freebase is no longer available.  Thanks Google ;-(
---
 solr/CHANGES.txt                                   |   2 +
 solr/bin/solr                                      |   1 +
 solr/bin/solr.cmd                                  |   3 +-
 .../src/java/org/apache/solr/util/SolrCLI.java     |  42 +++++++-
 solr/example/README.md                             |   8 +-
 solr/example/films/README.md                       |  29 ++---
 solr/example/films/film_data_generator.py          | 117 ---------------------
 .../src/solr-control-script-reference.adoc         |  10 ++
 8 files changed, 72 insertions(+), 140 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9b04022..ac119ea 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -179,6 +179,8 @@ when told to. The admin UI now tells it to. (Nazerke 
Seidan, David Smiley)
 * SOLR-15427: Nested docs: [child limit=...] now defaults to -1 which is 
interpreted as unlimited.
   (David Smiley)
 
+* SOLR-15786: Add the "films" example to SolrCLI via -e films parameter. (Eric 
Pugh)
+
 Build
 ---------------------
 
diff --git a/solr/bin/solr b/solr/bin/solr
index 4d609ad..d0a64d7 100755
--- a/solr/bin/solr
+++ b/solr/bin/solr
@@ -392,6 +392,7 @@ function print_usage() {
     echo "      cloud:         SolrCloud example"
     echo "      techproducts:  Comprehensive example illustrating many of 
Solr's core capabilities"
     echo "      schemaless:    Schema-less example"
+    echo "      films:         Example of starting with _default configset and 
adding explicit fields dynamically"
     echo ""
     echo "  -a            Additional parameters to pass to the JVM when 
starting Solr, such as to setup"
     echo "                  Java debug options. For example, to enable a Java 
debugger to attach to the Solr JVM"
diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd
index 2526f28..03cbd2d 100755
--- a/solr/bin/solr.cmd
+++ b/solr/bin/solr.cmd
@@ -368,6 +368,7 @@ goto done
 @echo       cloud:          SolrCloud example
 @echo       techproducts:   Comprehensive example illustrating many of Solr's 
core capabilities
 @echo       schemaless:     Schema-less example
+@echo       films:          Example of starting with _default configset and 
defining explicit fields dynamically
 @echo.
 @echo   -a opts       Additional parameters to pass to the JVM when starting 
Solr, such as to setup
 @echo                 Java debug options. For example, to enable a Java 
debugger to attach to the Solr JVM
@@ -1910,7 +1911,7 @@ IF "!ZK_OP!"=="upconfig" (
 )
 goto done
 
- 
+
 :run_auth
 IF "%1"=="-help" goto usage
 IF "%1"=="-usage" goto usage
diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java 
b/solr/core/src/java/org/apache/solr/util/SolrCLI.java
index bd83ffa..5128501 100755
--- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java
+++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java
@@ -2656,7 +2656,7 @@ public class SolrCLI implements CLIO {
               .argName("NAME")
               .hasArg()
               .required(true)
-              .desc("Name of the example to launch, one of: cloud, 
techproducts, schemaless.")
+              .desc("Name of the example to launch, one of: cloud, 
techproducts, schemaless, films.")
               .longOpt("example")
               .build(),
           Option.builder("script")
@@ -2773,11 +2773,11 @@ public class SolrCLI implements CLIO {
       String exampleType = cli.getOptionValue("example");
       if ("cloud".equals(exampleType)) {
         runCloudExample(cli);
-      } else if ("techproducts".equals(exampleType) || 
"schemaless".equals(exampleType)) {
+      } else if ("techproducts".equals(exampleType) || 
"schemaless".equals(exampleType) || "films".equals(exampleType)) {
         runExample(cli, exampleType);
       } else {
         throw new IllegalArgumentException("Unsupported example "+exampleType+
-            "! Please choose one of: cloud, schemaless, or techproducts");
+            "! Please choose one of: cloud, schemaless, techproducts, or 
films");
       }
     }
 
@@ -2859,8 +2859,42 @@ public class SolrCLI implements CLIO {
           echo("exampledocs directory not found, skipping indexing step for 
the techproducts example");
         }
       }
+      else if ("films".equals(exampleName) && !alreadyExists) {
+        echo("Adding name and initial_release_data fields to films schema 
\"_default\"");
 
-      echo("\nSolr "+exampleName+" example launched successfully. Direct your 
Web browser to "+solrUrl+" to visit the Solr Admin UI");
+        HttpSolrClient solrClient = new 
HttpSolrClient.Builder(solrUrl).build();
+        try {
+          SolrCLI.postJsonToSolr(solrClient, "/" + collectionName + "/schema", 
"{\n" +
+                  "        \"add-field\" : {\n" +
+                  "          \"name\":\"name\",\n" +
+                  "          \"type\":\"text_general\",\n" +
+                  "          \"multiValued\":false,\n" +
+                  "          \"stored\":true\n" +
+                  "        },\n" +
+                  "        \"add-field\" : {\n" +
+                  "          \"name\":\"initial_release_date\",\n" +
+                  "          \"type\":\"pdate\",\n" +
+                  "          \"stored\":true\n" +
+                  "        }\n" +
+                  "      }");
+        } catch (Exception ex) {
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, ex);
+        }
+
+        File filmsJsonFile = new File(exampleDir, "films/films.json");
+        String updateUrl = String.format(Locale.ROOT, "%s/%s/update/json", 
solrUrl, collectionName);
+        echo("Indexing films example docs from " + 
filmsJsonFile.getAbsolutePath());
+        String currentPropVal = System.getProperty("url");
+        System.setProperty("url", updateUrl);
+        SimplePostTool.main(new String[] {filmsJsonFile.getAbsolutePath()});
+        if (currentPropVal != null) {
+          System.setProperty("url", currentPropVal); // reset
+        } else {
+          System.clearProperty("url");
+        }
+      }
+
+        echo("\nSolr "+exampleName+" example launched successfully. Direct 
your Web browser to "+solrUrl+" to visit the Solr Admin UI");
     }
 
     protected void runCloudExample(CommandLine cli) throws Exception {
diff --git a/solr/example/README.md b/solr/example/README.md
index 491d88a..fa668ff 100644
--- a/solr/example/README.md
+++ b/solr/example/README.md
@@ -18,15 +18,16 @@
 Solr example
 ------------
 
-This directory contains Solr examples. Each example is contained in a 
+This directory contains Solr examples. Each example is contained in a
 separate directory. To run a specific example, do:
 
 ```
   bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
-  
+
     cloud        : SolrCloud example
     schemaless   : Schema-less example (schema is inferred from data during 
indexing)
     techproducts : Kitchen sink example providing comprehensive examples of 
Solr features
+    films        : Example of starting with _default configset and defining 
explicit fields dynamically.
 ```
 
 For instance, if you want to run the SolrCloud example, do:
@@ -65,7 +66,7 @@ For more information about the "Solr Home" and Solr specific 
configuration
 
 For a Solr tutorial
 
- * https://solr.apache.org/resources.html 
+ * https://solr.apache.org/resources.html
 
 For a list of other tutorials and introductory articles.
 
@@ -91,4 +92,3 @@ be convenient when first getting started, but eventually you 
will want to
 log just to a file. To configure logging, edit the log4j2.xml file in
 ".../server/resources".
 It is also possible to setup log4j or other popular logging frameworks.
-
diff --git a/solr/example/films/README.md b/solr/example/films/README.md
index d82322c..bd4bf82 100644
--- a/solr/example/films/README.md
+++ b/solr/example/films/README.md
@@ -1,7 +1,8 @@
-We have a movie data set in JSON, Solr XML, and CSV formats.
-All 3 formats contain the same data.  You can use any one format to index 
documents to Solr.
+We have a movie data set in JSON, Solr XML, and CSV formats.  All 3 formats 
contain the same data.  You can use any one format to index documents to Solr.
 
-The data is fetched from Freebase and the data license is present in the 
films-LICENSE.txt file.
+This example uses the _default configset that ships with Solr plus some custom 
fields added via Schema API.
+
+The data is was fetched from Freebase and the data license is present in the 
films-LICENSE.txt file.  Freebase was shutdown in 2016 by Google.
 
 This data consists of the following fields:
  * "id" - unique identifier for the movie
@@ -10,6 +11,11 @@ This data consists of the following fields:
  * "initial_release_date" - The earliest official initial film screening date 
in any country
  * "genre" - The genre(s) that the movie belongs to
 
+ The "name" and "initial_release_date" are created via the Schema API, and the 
"genre" and "direct_by" fields
+ are created by the use of an Update Request Processor Change called 
"add-unknown-fields-to-the-schema".
+
+ The below steps walk you through learning how to start up Solr, setup the 
films collection yourself, and then load data.  You can also run `bin/solr 
start -e films` or `bin/solr start -c -e films` for SolrCloud version.
+
  Steps:
    * Start Solr:
      ```
@@ -17,13 +23,13 @@ This data consists of the following fields:
      ```
 
    * Create a "films" core:
-   
+
      ```
      bin/solr create -c films
      ```
 
    * Set the schema on a couple of fields that Solr would otherwise guess 
differently (than we'd like) about:
-   
+
       ```
       curl http://localhost:8983/solr/films/schema -X POST -H 
'Content-type:application/json' --data-binary '{
         "add-field" : {
@@ -44,7 +50,7 @@ This data consists of the following fields:
 
      - JSON: `bin/post -c films example/films/films.json`
      - XML: `bin/post -c films example/films/films.xml`
-     - CSV: 
+     - CSV:
      ```
          bin/post \
                   -c films \
@@ -53,7 +59,7 @@ This data consists of the following fields:
      ```
    * Let's get searching!
      - Search for 'Batman':
-     
+
        http://localhost:8983/solr/films/query?q=name:batman
 
        * If you get an error about the name field not existing, you haven't 
yet indexed the data
@@ -62,17 +68,12 @@ This data consists of the following fields:
          It's easiest to simply reset the environment and try again, ensuring 
that each step successfully executes.
 
      - Show me all 'Super hero' movies:
-     
+
        
http://localhost:8983/solr/films/query?q=*:*&fq=genre:%22Superhero%20movie%22
 
      - Let's see the distribution of genres across all the movies. See the 
facet section of the response for the counts:
-     
-       
http://localhost:8983/solr/films/query?q=*:*&facet=true&facet.field=genre
 
-Exploring the data further - 
-
-  * Increase the MAX_ITERATIONS value, put in your freebase API_KEY and run 
the film_data_generator.py script using Python 3.
-    Now re-index Solr with the new data.
+       
http://localhost:8983/solr/films/query?q=*:*&facet=true&facet.field=genre
 
 FAQ:
   Why override the schema of the _name_ and _initial_release_date_ fields?
diff --git a/solr/example/films/film_data_generator.py 
b/solr/example/films/film_data_generator.py
deleted file mode 100644
index f23224f..0000000
--- a/solr/example/films/film_data_generator.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This will generate a movie data set of 1100 records.
-These are the first 1100 movies which appear when querying the Freebase of 
type '/film/film'.
-Here is the link to the freebase page - 
https://www.freebase.com/film/film?schema=
-
-Usage - python3 film_data_generator.py
-"""
-
-import csv
-import copy
-import json
-import codecs
-import datetime
-import urllib.parse
-import urllib.request
-import xml.etree.cElementTree as ET
-from xml.dom import minidom
-
-MAX_ITERATIONS=10  #10 limits it to 1100 docs
-
-# You need an API Key by Google to run this
-API_KEY = '<insert your Google developer API key>'
-service_url = 'https://www.googleapis.com/freebase/v1/mqlread'
-query = [{
-  "id": None,
-  "name": None,
-  "initial_release_date": None,
-  "directed_by": [],
-  "genre": [],
-  "type": "/film/film",
-  "initial_release_date>" : "2000"
-}]
-
-def gen_csv(filmlist):
-  filmlistDup = copy.deepcopy(filmlist)
-  #Convert multi-valued to % delimited string
-  for film in filmlistDup:
-      for key in film:
-        if isinstance(film[key], list):
-          film[key] = '|'.join(film[key])
-  keys = ['name', 'directed_by', 'genre', 'type', 'id', 'initial_release_date']
-  with open('films.csv', 'w', newline='', encoding='utf8') as csvfile:
-    dict_writer = csv.DictWriter(csvfile, keys)
-    dict_writer.writeheader()
-    dict_writer.writerows(filmlistDup)
-
-def gen_json(filmlist):
-  filmlistDup = copy.deepcopy(filmlist)
-  with open('films.json', 'w') as jsonfile:
-    jsonfile.write(json.dumps(filmlist, indent=2))
-
-def gen_xml(filmlist):
-  root = ET.Element("add")
-  for film in filmlist:
-    doc = ET.SubElement(root, "doc")
-    for key in film:
-      if isinstance(film[key], list):
-        for value in film[key]:
-          field = ET.SubElement(doc, "field")
-          field.set("name", key)
-          field.text=value
-      else:
-        field = ET.SubElement(doc, "field")
-        field.set("name", key)
-        field.text=film[key]
-  tree = ET.ElementTree(root)
-  with open('films.xml', 'w') as f:
-    f.write( 
minidom.parseString(ET.tostring(tree.getroot(),'utf-8')).toprettyxml(indent="  
") )
-
-def do_query(filmlist, cursor=""):
-  params = {
-          'query': json.dumps(query),
-          'key': API_KEY,
-          'cursor': cursor
-  }
-  url = service_url + '?' + urllib.parse.urlencode(params)
-  data = urllib.request.urlopen(url).read().decode('utf-8')
-  response = json.loads(data)
-  for item in response['result']:
-    del item['type'] # It's always /film/film. No point of adding this.
-    try:
-      datetime.datetime.strptime(item['initial_release_date'], "%Y-%m-%d")
-    except ValueError:
-      #Date time not formatted properly. Keeping it simple by removing the 
date field from that doc
-      del item['initial_release_date']
-    filmlist.append(item)
-  return response.get("cursor")
-
-
-if __name__ == "__main__":
-  filmlist = []
-  cursor = do_query(filmlist)
-  i=0
-  while(cursor):
-      cursor = do_query(filmlist, cursor)
-      i = i+1
-      if i==MAX_ITERATIONS:
-          break
-
-  gen_json(filmlist)
-  gen_csv(filmlist)
-  gen_xml(filmlist)
\ No newline at end of file
diff --git a/solr/solr-ref-guide/src/solr-control-script-reference.adoc 
b/solr/solr-ref-guide/src/solr-control-script-reference.adoc
index db956c9..732b36e 100644
--- a/solr/solr-ref-guide/src/solr-control-script-reference.adoc
+++ b/solr/solr-ref-guide/src/solr-control-script-reference.adoc
@@ -111,6 +111,7 @@ The available options are:
 * `cloud`
 * `techproducts`
 * `schemaless`
+* `films`
 +
 See the section <<Running with Example Configurations>> below for more details 
on the example configurations.
 +
@@ -321,12 +322,21 @@ When using this example, you can choose from any of the 
available configsets fou
 * *techproducts*: This example starts a single-node Solr instance with a 
schema designed for the sample documents included in the 
`$SOLR_HOME/example/exampledocs` directory.
 +
 The configset used can be found in 
`$SOLR_HOME/server/solr/configsets/sample_techproducts_configs`.
++
+The data used can be found in `$SOLR_HOME/example/exampledocs/`.
 
 * *schemaless*: This example starts a single-node Solr instance using a 
managed schema, as described in the section <<schema-factory.adoc#,Schema 
Factory Definition in SolrConfig>>, and provides a very minimal pre-defined 
schema.
 Solr will run in <<schemaless-mode.adoc#,Schemaless Mode>> with this 
configuration, where Solr will create fields in the schema on the fly and will 
guess field types used in incoming documents.
 +
 The configset used can be found in 
`$SOLR_HOME/server/solr/configsets/_default`.
 
+* *films*: This example starts a single-node Solr instance using a managed 
schema, as described in the section <<schema-factory.adoc#,Schema Factory 
Definition in SolrConfig>>, and then uses the Schema API to create some custom 
fields.
+Solr will run in <<schemaless-mode.adoc#,Schemaless Mode>> with this 
configuration, where Solr will create fields in the schema on the fly and will 
guess field types used in incoming documents as well.  It then loads some 
sample film data.
++
+The configset used can be found in 
`$SOLR_HOME/server/solr/configsets/_default`.
++
+The film data used can be found in `$SOLR_HOME/example/films/films.json`.
+
 [IMPORTANT]
 ====
 The run in-foreground option (`-f`) is not compatible with the `-e` option 
since the script needs to perform additional tasks after starting the Solr 
server.

[solr] branch main updated: SOLR-15786 (#409)

Reply via email to