[solr] branch main updated: SOLR-15467: Rename Parrallel SQL to just SQL throughout Solr docs (#364)

epugh Thu, 04 Nov 2021 05:30:38 -0700

This is an automated email from the ASF dual-hosted git repository.

epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git



The following commit(s) were added to refs/heads/main by this push:
     new 1fffc52  SOLR-15467: Rename Parrallel SQL to just SQL throughout Solr 
docs (#364)
1fffc52 is described below

commit 1fffc52103e77563a30fd307df1eb0b7a79a3377
Author: Eric Pugh <[email protected]>
AuthorDate: Thu Nov 4 08:30:26 2021 -0400

    SOLR-15467: Rename Parrallel SQL to just SQL throughout Solr docs (#364)
    
    * Fixed up the techproducts demo configset to work with parrallel SQL
    * Added documentation about the SQL UI in the Solr Admin
    * Reordered the content to put the SQL syntax details at the top, and move 
the design docs to the bottom.
---
 .../conf/managed-schema                            |   9 +-
 .../src/images/parallel-sql-interface/cluster.png  | Bin 3067133 -> 0 bytes
 .../src/images/sql-screen/sql-query-ui.png         | Bin 0 -> 129399 bytes
 .../src/implicit-requesthandlers.adoc              |   4 +-
 .../src/major-changes-in-solr-6.adoc               |   5 +-
 .../src/major-changes-in-solr-7.adoc               |   2 +-
 .../src/query-syntax-and-parsers.adoc              |   8 +-
 solr/solr-ref-guide/src/solr-admin-ui.adoc         |   1 +
 ...{parallel-sql-interface.adoc => sql-query.adoc} | 308 ++++++++++-----------
 solr/solr-ref-guide/src/sql-screen.adoc            |  26 ++
 solr/webapp/web/partials/sqlquery.html             |   2 +-
 11 files changed, 200 insertions(+), 165 deletions(-)

diff --git 
a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema 
b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
index da016ab..3c4abfc 100644
--- 
a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
+++ 
b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
@@ -134,7 +134,7 @@
    <field name="pre" type="preanalyzed" indexed="true" stored="true"/>
    <field name="sku" type="text_en_splitting_tight" indexed="true" 
stored="true" omitNorms="true"/>
    <field name="name" type="text_general" indexed="true" stored="true"/>
-   <field name="manu" type="text_gen_sort" indexed="true" stored="true" 
omitNorms="true" multiValued="false"/>
+   <field name="manu" type="text_gen_sort" indexed="true" stored="true" 
omitNorms="true" multiValued="false" docValues="true" 
useDocValuesAsStored="true"/>
    <field name="cat" type="string" indexed="true" stored="true" 
multiValued="true"/>
    <field name="features" type="text_general" indexed="true" stored="true" 
multiValued="true"/>
    <field name="includes" type="text_general" indexed="true" stored="true" 
termVectors="true" termPositions="true" termOffsets="true" />
@@ -185,7 +185,11 @@
 
    <!-- non-tokenized version of manufacturer to make it easier to sort or 
group
         results by manufacturer.  copied from "manu" via copyField -->
-   <field name="manu_exact" type="string" indexed="true" stored="false" 
docValues="false" />
+   <field name="manu_exact" type="string" indexed="false" stored="false" 
docValues="true" />
+
+   <!-- non-tokenized version of name to make it easier to sort or group
+        results by product name.  copied from "name" via copyField -->
+   <field name="name_exact" type="string" indexed="false" stored="false" 
docValues="true" />
 
    <field name="payloads" type="payloads" indexed="true" stored="true"/>
 
@@ -247,6 +251,7 @@
    <copyField source="features" dest="text"/>
    <copyField source="includes" dest="text"/>
    <copyField source="manu" dest="manu_exact"/>
+   <copyField source="name" dest="name_exact"/>
 
    <!-- Copy the price into a currency enabled field (default USD) -->
    <copyField source="price" dest="price_c"/>
diff --git a/solr/solr-ref-guide/src/images/parallel-sql-interface/cluster.png 
b/solr/solr-ref-guide/src/images/parallel-sql-interface/cluster.png
deleted file mode 100644
index 10f134f..0000000
Binary files 
a/solr/solr-ref-guide/src/images/parallel-sql-interface/cluster.png and 
/dev/null differ
diff --git a/solr/solr-ref-guide/src/images/sql-screen/sql-query-ui.png 
b/solr/solr-ref-guide/src/images/sql-screen/sql-query-ui.png
new file mode 100644
index 0000000..1c97a9e
Binary files /dev/null and 
b/solr/solr-ref-guide/src/images/sql-screen/sql-query-ui.png differ
diff --git a/solr/solr-ref-guide/src/implicit-requesthandlers.adoc 
b/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
index 7789527..b9b0c38 100644
--- a/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
+++ b/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
@@ -242,9 +242,9 @@ Graph Traversal:: Return 
http://graphml.graphdrawing.org/[GraphML] formatted out
 |`solr/<collection>/graph` 
|{solr-javadocs}/core/org/apache/solr/handler/GraphHandler.html[GraphHandler] 
|`_ADMIN_GRAPH`
 |===
 
-SQL:: Front end of the Parallel SQL interface.
+SQL:: SQL query support.
 +
-*Documentation*: <<parallel-sql-interface.adoc#sql-request-handler,SQL Request 
Handler>>
+*Documentation*: <<sql-query.adoc#sql-request-handler,SQL Request Handler>>
 +
 [cols="3*.",frame=none,grid=cols,options="header"]
 |===
diff --git a/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc 
b/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
index 81dc4ac..04fced2 100644
--- a/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
+++ b/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
@@ -38,9 +38,10 @@ Several new expression types have been added in Solr 6:
 * JDBC connections to pull data from other systems and join with documents in 
the Solr index.
 
 [[major-5-6-parallel-sql]]
-=== Parallel SQL Interface
+=== SQL Query
+
+Built on streaming expressions, new in Solr 6 is a <<sql-query.adoc#,SQL 
support>> to be able to send SQL queries to Solr. SQL statements are compiled 
to streaming expressions on the fly, providing the full range of aggregations 
available to streaming expression requests. A JDBC driver is included, which 
allows using SQL clients and database visualization tools to query your Solr 
index and import data to other systems.
 
-Built on streaming expressions, new in Solr 6 is a 
<<parallel-sql-interface.adoc#,Parallel SQL interface>> to be able to send SQL 
queries to Solr. SQL statements are compiled to streaming expressions on the 
fly, providing the full range of aggregations available to streaming expression 
requests. A JDBC driver is included, which allows using SQL clients and 
database visualization tools to query your Solr index and import data to other 
systems.
 
 === Cross Data Center Replication
 
diff --git a/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc 
b/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
index 0f9b8e4..9d4dfe3 100644
--- a/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
+++ b/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
@@ -199,4 +199,4 @@ Note again that this is not a complete list of all changes 
that may impact your
 * The <<caches-warming.adoc#,`maxWarmingSearchers` parameter>> now defaults to 
1, and more importantly commits will now block if this limit is exceeded 
instead of throwing an exception (a good thing). Consequently there is no 
longer a risk in overlapping commits. Nonetheless users should continue to 
avoid excessive committing. Users are advised to remove any pre-existing 
`maxWarmingSearchers` entries from their `solrconfig.xml` files.
 * The <<other-parsers.adoc#complex-phrase-query-parser,Complex Phrase query 
parser>> now supports leading wildcards. Beware of its possible heaviness, 
users are encouraged to use ReversedWildcardFilter in index time analysis.
 * The JMX metric "avgTimePerRequest" (and the corresponding metric in the 
metrics API for each handler) used to be a simple non-decaying average based on 
total cumulative time and the number of requests. The Codahale Metrics 
implementation applies exponential decay to this value, which heavily biases 
the average towards the last 5 minutes.
-* Parallel SQL now uses Apache Calcite as its SQL framework. As part of this 
change the default aggregation mode has been changed to `facet` rather than 
`map_reduce`. There have also been changes to the SQL aggregate response and 
some SQL syntax changes. Consult the <<parallel-sql-interface.adoc#,Parallel 
SQL Interface>> documentation for full details.
+* Parallel SQL now uses Apache Calcite as its SQL framework. As part of this 
change the default aggregation mode has been changed to `facet` rather than 
`map_reduce`. There have also been changes to the SQL aggregate response and 
some SQL syntax changes. Consult the <<sql-query.adoc#,SQL Query>> 
documentation for full details.
diff --git a/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc 
b/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
index f2419de..a46f7e3 100644
--- a/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
+++ b/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
@@ -11,8 +11,9 @@
     join-query-parser, \
     spatial-search, \
     other-parsers, \
-    parallel-sql-interface, \
-    query-screen
+    sql-query, \
+    query-screen, \
+    sql-screen
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -60,8 +61,9 @@ If you have custom parsing needs, you may want to extend that 
class to create yo
 | <<join-query-parser.adoc#,Join Query Parser>>: Query parser to facilitate 
joins.
 | <<spatial-search.adoc#,Spatial Search>>: Solr's spatial search capabilities.
 | <<other-parsers.adoc#,Other Parsers>>: More parsers designed for use in 
specific situations.
-| <<parallel-sql-interface.adoc#,Parallel SQL Interface>>: SQL interface for 
Solr.
+| <<sql-query.adoc#,SQL Query>>: SQL language support for Solr.
 | <<query-screen.adoc#,Query Screen>>: Form-based query builder.
+| <<sql-screen.adoc#,SQL Screen>>: SQL query runner with tabular results.
 |===
 // end::parser-sections[]
 ****
diff --git a/solr/solr-ref-guide/src/solr-admin-ui.adoc 
b/solr/solr-ref-guide/src/solr-admin-ui.adoc
index 6761ec4..b3001fa 100644
--- a/solr/solr-ref-guide/src/solr-admin-ui.adoc
+++ b/solr/solr-ref-guide/src/solr-admin-ui.adoc
@@ -169,6 +169,7 @@ Here are sections throughout the Guide describing each 
screen of the Admin UI:
 | <<configuration-files.adoc#files-screen,Files Screen>>: Configuration file 
browser.
 | <<query-screen.adoc#,Query Screen>>: Form-based query builder.
 | <<stream-screen.adoc#,Stream Screen>>: Submit streaming expressions and see 
results and parsing explanations.
+| <<sql-screen.adoc#,SQL Screen>>: SQL query runner with tabular results.
 | <<schema-browser-screen.adoc#,Schema Browser Screen>>: Field-level schema 
details.
 |===
 // end::ui-collection-tools[]
diff --git a/solr/solr-ref-guide/src/parallel-sql-interface.adoc 
b/solr/solr-ref-guide/src/sql-query.adoc
similarity index 96%
rename from solr/solr-ref-guide/src/parallel-sql-interface.adoc
rename to solr/solr-ref-guide/src/sql-query.adoc
index b542df6..cde7bcf 100644
--- a/solr/solr-ref-guide/src/parallel-sql-interface.adoc
+++ b/solr/solr-ref-guide/src/sql-query.adoc
@@ -1,4 +1,4 @@
-= Parallel SQL Interface
+= SQL Query Language
 :page-children: jdbc-dbvisualizer, \
     jdbc-squirrel, \
     jdbc-zeppelin, \
@@ -21,16 +21,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-Solr's Parallel SQL Interface brings the power of SQL to SolrCloud.
+Solr's SQL Query support brings the power of SQL to SolrCloud.
 
 The SQL interface seamlessly combines SQL with Solr's full-text search 
capabilities.
-Both MapReduce style and JSON Facet API aggregations are supported, which 
means the SQL interface can be used to support both *high query volume* and 
*high cardinality* use cases.
+Both MapReduce style and JSON Facet API aggregations are supported, which 
means that SQL querying can be used to support both *high query volume* and 
*high cardinality* use cases.
 
 == SQL Architecture
 
 The SQL interface allows sending a SQL query to Solr and getting documents 
streamed back in response.
 Under the covers, Solr's SQL interface uses the Apache Calcite SQL engine to 
translate SQL queries to physical query plans implemented as 
<<streaming-expressions.adoc#,Streaming Expressions>>.
 
+More information about how to Solr supports SQL queries for Solr is described 
in the <<Configuration>> section below.
+
 === Solr Collections and DB Tables
 
 In a standard `SELECT` statement such as `SELECT <expressions> FROM <table>`, 
the table names correspond to Solr collection names.
@@ -47,155 +49,11 @@ For example, we could index Solr's sample documents and 
then construct an SQL qu
 
 [source,sql]
 ----
-SELECT manu as mfr, price as retail FROM techproducts
+SELECT name_exact as name, manu as mfr, price as retail FROM techproducts 
ORDER BY retail DESC
 ----
 
-The collection in Solr we are using is "techproducts", and we've asked for the 
"manu" and "price" fields to be returned and aliased with new names.
-While this example does not use those aliases, we could build on this to ORDER 
BY one or more of those fields.
-
-More information about how to structure SQL queries for Solr is included in 
the section <<Solr SQL Syntax>> below.
-
-=== Aggregation Modes
-
-The SQL feature of Solr can work with aggregations (grouping of results) in 
two ways:
-
-* `facet`: This is the *default* aggregation mode, which uses the JSON Facet 
API or StatsComponent for aggregations.
-In this scenario the aggregations logic is pushed down into the search engine 
and only the aggregates are sent across the network.
-This is Solr's normal mode of operation.
-This is fast when the cardinality of GROUP BY fields is low to moderate.
-But it breaks down when you have high cardinality fields in the GROUP BY field.
-* `map_reduce`: This implementation shuffles tuples to worker nodes and 
performs the aggregation on the worker nodes.
-It involves sorting and partitioning the entire result set and sending it to 
worker nodes.
-In this approach the tuples arrive at the worker nodes sorted by the GROUP BY 
fields.
-The worker nodes can then rollup the aggregates one group at a time.
-This allows for unlimited cardinality aggregation, but you pay the price of 
sending the entire result set across the network to worker nodes.
-
-These modes are defined with the `aggregationMode` property when sending the 
request to Solr.
-
-The choice between aggregation modes depends on the cardinality of the fields 
you are working with.
-If you have low-to-moderate cardinality in the fields you are grouping by, the 
'facet' aggregation mode will give you a higher performance because only the 
final groups are returned, very similar to how facets work today.
-If, however, you have high cardinality in the fields, the "map_reduce" 
aggregation mode with worker nodes provide a much more performant option.
-
-== Configuration
-
-The request handlers used for the SQL interface are configured to load 
implicitly, meaning there is little to do to start using this feature.
-
-[[sql-request-handler]]
-=== /sql Request Handler
-
-The `/sql` handler is the front end of the Parallel SQL interface.
-All SQL queries are sent to the `/sql` handler to be processed.
-The handler also coordinates the distributed MapReduce jobs when running 
`GROUP BY` and `SELECT DISTINCT` queries in `map_reduce` mode.
-By default the `/sql` handler will choose worker nodes from its own collection 
to handle the distributed operations.
-In this default scenario the collection where the `/sql` handler resides acts 
as the default worker collection for MapReduce queries.
-
-By default, the `/sql` request handler is configured as an implicit handler, 
meaning that it is always enabled in every Solr installation and no further 
configuration is required.
-
-==== Authorization for SQL Requests
-
-If your Solr cluster is configured to use the 
<<rule-based-authorization-plugin.adoc#,Rule-based Authorization Plugin>>,
-then you need to grant `GET` and `POST` permission on the `/sql`, `/select`, 
and `/export` endpoints for all collections you intend to execute SQL queries 
against.
-The `/select` endpoint is used for `LIMIT` queries, whereas the `/export` 
handler is used for queries without a `LIMIT`, so in most cases, you'll want to 
grant access to both.
-If you're using a worker collection for the `/sql` handler, then you only need 
to grant access to the `/sql` endpoint for the worker collection and not the 
collections in the data tier.
-Behind the scenes, the SQL handler also sends requests using the internal Solr 
server identity to the `/admin/luke` endpoint to get schema metadata for a 
collection.
-Consequently, you do not need to grant explicit permission to the 
`/admin/luke` endpoint for users to execute SQL queries.
-
-[IMPORTANT]
-====
-As described below in the section <<Best Practices>>, you may want to set up a 
separate collection for parallelized SQL queries.
-If you have high cardinality fields and a large amount of data, please be sure 
to review that section and consider using a separate collection.
-====
-
-=== /stream and /export Request Handlers
-
-The Streaming API is an extensible parallel computing framework for SolrCloud.
-<<streaming-expressions.adoc#,Streaming Expressions>> provide a query language 
and a serialization format for the Streaming API.
-
-The Streaming API provides support for fast MapReduce allowing it to perform 
parallel relational algebra on extremely large data sets.
-Under the covers the SQL interface parses SQL queries using the Apache Calcite 
SQL Parser.
-It then translates the queries to the parallel query plan.
-The parallel query plan is expressed using the Streaming API and Streaming 
Expressions.
-
-Like the `/sql` request handler, the `/stream` and `/export` request handlers 
are configured as implicit handlers, and no further configuration is required.
-
-=== Fields
-
-In some cases, fields used in SQL queries must be configured as DocValue 
fields.
-If queries are unlimited, all fields must be DocValue fields.
-If queries are limited (with the `limit` clause) then fields do not have to be 
have DocValues enabled.
-
-.Multi-valued Fields
-[IMPORTANT]
-====
-Multi-valued fields in the project list will be returned as a `List` of 
values; with JDBC, use `getObject(col)` to retrieve the multi-valued field and 
then cast to a `List`.
-In general, you can project, filter, and group by, but you cannot sort by 
multi-valued fields.
-====
-
-=== Sending Queries
-
-The SQL Interface provides a basic JDBC driver and an HTTP interface to 
perform queries.
-
-=== JDBC Driver
-
-The JDBC Driver ships with SolrJ.
-Below is sample code for creating a connection and executing a query with the 
JDBC driver:
-
-[source,java]
-----
-Connection con = null;
-try {
-    con = DriverManager.getConnection("jdbc:solr://" + zkHost + 
"?collection=collection1&aggregationMode=map_reduce&numWorkers=2");
-    stmt = con.createStatement();
-    rs = stmt.executeQuery("SELECT a_s, sum(a_f) as sum FROM collection1 GROUP 
BY a_s ORDER BY sum desc");
-
-    while(rs.next()) {
-        String a_s = rs.getString("a_s");
-        double s = rs.getDouble("sum");
-    }
-} finally {
-    rs.close();
-    stmt.close();
-    con.close();
-}
-----
-
-The connection URL must contain the `zkHost` and the `collection` parameters.
-The collection must be a valid SolrCloud collection at the specified ZooKeeper 
host.
-The collection must also be configured with the `/sql` handler.
-The `aggregationMode` and `numWorkers` parameters are optional.
-
-=== HTTP Interface
-
-Solr accepts parallel SQL queries through the `/sql` handler.
-
-Below is a sample curl command performing a SQL aggregate query in facet mode:
-
-[source,bash]
-----
-curl --data-urlencode 'stmt=SELECT to, count(*) FROM collection4 GROUP BY to 
ORDER BY count(*) desc LIMIT 10' 
http://localhost:8983/solr/collection4/sql?aggregationMode=facet
-----
-
-Below is sample result set:
-
-[source,json]
-----
-{"result-set":{"docs":[
-   {"count(*)":9158,"to":"[email protected]"},
-   {"count(*)":6244,"to":"[email protected]"},
-   {"count(*)":5874,"to":"[email protected]"},
-   {"count(*)":5867,"to":"[email protected]"},
-   {"count(*)":5595,"to":"[email protected]"},
-   {"count(*)":4904,"to":"[email protected]"},
-   {"count(*)":4622,"to":"[email protected]"},
-   {"count(*)":3819,"to":"[email protected]"},
-   {"count(*)":3678,"to":"[email protected]"},
-   {"count(*)":3653,"to":"[email protected]"},
-   {"EOF":"true","RESPONSE_TIME":10}]}
-}
-----
-
-Notice that the result set is an array of tuples with key/value pairs that 
match the SQL column list.
-The final tuple contains the EOF flag which signals the end of the stream.
+The collection in Solr we are using is "techproducts", and we've asked for the 
"name_exact", manu" and "price" fields to be returned,
+with the aliased name "retail" being sorted upon to show most expensive to 
least expensive products.
 
 == Solr SQL Syntax
 
@@ -299,7 +157,7 @@ WHERE (fieldA = 'term1') AND NOT (fieldB = 'term2')
 
 ==== Supported WHERE Operators
 
-The parallel SQL interface supports and pushes down most common SQL operators, 
specifically:
+The SQL query interface supports and pushes down most common SQL operators, 
specifically:
 
 [width="100%",options="header",]
 |===
@@ -460,11 +318,153 @@ ORDER BY SUM(fieldC) ASC
    LIMIT 100
 ----
 
+=== Aggregation Modes
+
+The SQL feature of Solr can work with aggregations (grouping of results) in 
two ways:
+
+* `facet`: This is the *default* aggregation mode, which uses the JSON Facet 
API or StatsComponent for aggregations.
+In this scenario the aggregations logic is pushed down into the search engine 
and only the aggregates are sent across the network.
+This is Solr's normal mode of operation.
+This is fast when the cardinality of GROUP BY fields is low to moderate.
+But it breaks down when you have high cardinality fields in the GROUP BY field.
+* `map_reduce`: This implementation shuffles tuples to worker nodes and 
performs the aggregation on the worker nodes.
+It involves sorting and partitioning the entire result set and sending it to 
worker nodes.
+In this approach the tuples arrive at the worker nodes sorted by the GROUP BY 
fields.
+The worker nodes can then rollup the aggregates one group at a time.
+This allows for unlimited cardinality aggregation, but you pay the price of 
sending the entire result set across the network to worker nodes.
+
+These modes are defined with the `aggregationMode` property when sending the 
request to Solr.
+
+The choice between aggregation modes depends on the cardinality of the fields 
you are working with.
+If you have low-to-moderate cardinality in the fields you are grouping by, the 
'facet' aggregation mode will give you a higher performance because only the 
final groups are returned, very similar to how facets work today.
+If, however, you have high cardinality in the fields, the "map_reduce" 
aggregation mode with worker nodes provide a much more performant option.
+
+== Configuration
+
+The request handlers used for the SQL interface are configured to load 
implicitly, meaning there is little to do to start using this feature.
+
+[[sql-request-handler]]
+=== /sql Request Handler
+
+The `/sql` handler is the front end of the Parallel SQL interface.
+All SQL queries are sent to the `/sql` handler to be processed.
+The handler also coordinates the distributed MapReduce jobs when running 
`GROUP BY` and `SELECT DISTINCT` queries in `map_reduce` mode.
+By default the `/sql` handler will choose worker nodes from its own collection 
to handle the distributed operations.
+In this default scenario the collection where the `/sql` handler resides acts 
as the default worker collection for MapReduce queries.
+
+By default, the `/sql` request handler is configured as an implicit handler, 
meaning that it is always enabled in every Solr installation and no further 
configuration is required.
+
+==== Authorization for SQL Requests
+
+If your Solr cluster is configured to use the 
<<rule-based-authorization-plugin.adoc#,Rule-based Authorization Plugin>>,
+then you need to grant `GET` and `POST` permissions on the `/sql`, `/select`, 
and `/export` endpoints for all collections you intend to execute SQL queries 
against.
+The `/select` endpoint is used for `LIMIT` queries, whereas the `/export` 
handler is used for queries without a `LIMIT`, so in most cases, you'll want to 
grant access to both.
+If you're using a worker collection for the `/sql` handler, then you only need 
to grant access to the `/sql` endpoint for the worker collection and not the 
collections in the data tier.
+Behind the scenes, the SQL handler also sends requests using the internal Solr 
server identity to the `/admin/luke` endpoint to get schema metadata for a 
collection.
+Consequently, you do not need to grant explicit permission to the 
`/admin/luke` endpoint for users to execute SQL queries.
+
+[IMPORTANT]
+====
+As described below in the section <<Best Practices>>, you may want to set up a 
separate collection for parallelized SQL queries.
+If you have high cardinality fields and a large amount of data, please be sure 
to review that section and consider using a separate collection.
+====
+
+=== /stream and /export Request Handlers
+
+The Streaming API is an extensible parallel computing framework for SolrCloud.
+<<streaming-expressions.adoc#,Streaming Expressions>> provide a query language 
and a serialization format for the Streaming API.
+
+The Streaming API provides support for fast MapReduce allowing it to perform 
parallel relational algebra on extremely large data sets.
+Under the covers the SQL interface parses SQL queries using the Apache Calcite 
SQL Parser.
+It then translates the queries to the parallel query plan.
+The parallel query plan is expressed using the Streaming API and Streaming 
Expressions.
+
+Like the `/sql` request handler, the `/stream` and `/export` request handlers 
are configured as implicit handlers, and no further configuration is required.
+
+=== Fields
+
+In some cases, fields used in SQL queries must be configured as DocValue 
fields.
+If queries are unlimited, all fields must be DocValue fields.
+If queries are limited (with the `limit` clause) then fields do not have to be 
have DocValues enabled.
+
+.Multi-valued Fields
+[IMPORTANT]
+====
+Multi-valued fields in the project list will be returned as a `List` of 
values; with JDBC, use `getObject(col)` to retrieve the multi-valued field and 
then cast to a `List`.
+In general, you can project, filter, and group by, but you cannot sort by 
multi-valued fields.
+====
+
+=== Sending Queries
+
+The SQL Interface provides a basic JDBC driver and an HTTP interface to 
perform queries.
+
+=== JDBC Driver
+
+The JDBC Driver ships with SolrJ.
+Below is sample code for creating a connection and executing a query with the 
JDBC driver:
+
+[source,java]
+----
+Connection con = null;
+try {
+    con = DriverManager.getConnection("jdbc:solr://" + zkHost + 
"?collection=collection1&aggregationMode=map_reduce&numWorkers=2");
+    stmt = con.createStatement();
+    rs = stmt.executeQuery("SELECT a_s, sum(a_f) as sum FROM collection1 GROUP 
BY a_s ORDER BY sum desc");
+
+    while(rs.next()) {
+        String a_s = rs.getString("a_s");
+        double s = rs.getDouble("sum");
+    }
+} finally {
+    rs.close();
+    stmt.close();
+    con.close();
+}
+----
+
+The connection URL must contain the `zkHost` and the `collection` parameters.
+The collection must be a valid SolrCloud collection at the specified ZooKeeper 
host.
+The collection must also be configured with the `/sql` handler.
+The `aggregationMode` and `numWorkers` parameters are optional.
+
+=== HTTP Interface
+
+Solr accepts SQL queries through the `/sql` handler.
+
+Below is a sample curl command performing a SQL aggregate query in facet mode:
+
+[source,bash]
+----
+curl --data-urlencode 'stmt=SELECT to, count(*) FROM collection4 GROUP BY to 
ORDER BY count(*) desc LIMIT 10' 
http://localhost:8983/solr/collection4/sql?aggregationMode=facet
+----
+
+Below is sample result set:
+
+[source,json]
+----
+{"result-set":{"docs":[
+   {"count(*)":9158,"to":"[email protected]"},
+   {"count(*)":6244,"to":"[email protected]"},
+   {"count(*)":5874,"to":"[email protected]"},
+   {"count(*)":5867,"to":"[email protected]"},
+   {"count(*)":5595,"to":"[email protected]"},
+   {"count(*)":4904,"to":"[email protected]"},
+   {"count(*)":4622,"to":"[email protected]"},
+   {"count(*)":3819,"to":"[email protected]"},
+   {"count(*)":3678,"to":"[email protected]"},
+   {"count(*)":3653,"to":"[email protected]"},
+   {"EOF":"true","RESPONSE_TIME":10}]}
+}
+----
+
+Notice that the result set is an array of tuples with key/value pairs that 
match the SQL column list.
+The final tuple contains the EOF flag which signals the end of the stream.
+
 == Best Practices
 
-=== Separate Collections
+=== Separate Worker Collection
 
-It makes sense to create a separate SolrCloud collection just for the `/sql` 
handler.
+It makes sense to create a separate SolrCloud worker collection just for the 
`/sql` handler.
 This collection can be created using SolrCloud's standard collection API.
 
 Since this collection only exists to handle `/sql` requests and provide a pool 
of worker nodes, this collection does not need to hold any data.
@@ -507,7 +507,7 @@ This means the tuples are always sorted and partitioned 
before they hit the netw
 The partitioned tuples are sent directly to the correct worker nodes in the 
proper sort order, ready to be reduced.
 
 .How Parallel SQL Queries are Distributed
-image::images/parallel-sql-interface/cluster.png[image,width=492,height=250]
+image::images/sql-query/cluster.png[image,width=492,height=250]
 
 The image above shows the three tiers broken out into different SolrCloud 
collections for clarity.
 In practice the `/sql` handler and worker collection by default share the same 
collection.
diff --git a/solr/solr-ref-guide/src/sql-screen.adoc 
b/solr/solr-ref-guide/src/sql-screen.adoc
new file mode 100644
index 0000000..eab86f5
--- /dev/null
+++ b/solr/solr-ref-guide/src/sql-screen.adoc
@@ -0,0 +1,26 @@
+= SQL Query Screen
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+You can use the *SQL Query* screen to submit a SQL query to a Solr collection 
and get the results in a tabular format.
+
+In the example in the screenshot, a SQL query has been submitted, and the 
screen shows the query results listed as a table.
+
+.Results of a SQL Query
+image::images/sql-screen/sql-query-ui.png[image,height=400]
+
+Learn more about by reading the <<sql-query.adoc#,SQL details>>, including the 
specific <<sql-query.adoc#solr-sql-syntax,SQL syntax>> supported by Solr.
diff --git a/solr/webapp/web/partials/sqlquery.html 
b/solr/webapp/web/partials/sqlquery.html
index 5af437d..af976a0 100644
--- a/solr/webapp/web/partials/sqlquery.html
+++ b/solr/webapp/web/partials/sqlquery.html
@@ -23,7 +23,7 @@ limitations under the License.
       </label>
       <textarea name="stmt" ng-model="stmt" id="sqlexp"></textarea>
       <button type="submit" ng-click="doQuery()">Execute</button>
-      <span><a 
href="https://solr.apache.org/guide/parallel-sql-interface.html"; 
target="_out">syntax help</a></span>
+      <span><a href="https://solr.apache.org/guide/sql-query.html"; 
target="_out">syntax help</a></span>
     </form>
   </div>
   <div id="sql-response">

[solr] branch main updated: SOLR-15467: Rename Parrallel SQL to just SQL throughout Solr docs (#364)

Reply via email to