This is an automated email from the ASF dual-hosted git repository.
epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 1fffc52 SOLR-15467: Rename Parrallel SQL to just SQL throughout Solr
docs (#364)
1fffc52 is described below
commit 1fffc52103e77563a30fd307df1eb0b7a79a3377
Author: Eric Pugh <[email protected]>
AuthorDate: Thu Nov 4 08:30:26 2021 -0400
SOLR-15467: Rename Parrallel SQL to just SQL throughout Solr docs (#364)
* Fixed up the techproducts demo configset to work with parrallel SQL
* Added documentation about the SQL UI in the Solr Admin
* Reordered the content to put the SQL syntax details at the top, and move
the design docs to the bottom.
---
.../conf/managed-schema | 9 +-
.../src/images/parallel-sql-interface/cluster.png | Bin 3067133 -> 0 bytes
.../src/images/sql-screen/sql-query-ui.png | Bin 0 -> 129399 bytes
.../src/implicit-requesthandlers.adoc | 4 +-
.../src/major-changes-in-solr-6.adoc | 5 +-
.../src/major-changes-in-solr-7.adoc | 2 +-
.../src/query-syntax-and-parsers.adoc | 8 +-
solr/solr-ref-guide/src/solr-admin-ui.adoc | 1 +
...{parallel-sql-interface.adoc => sql-query.adoc} | 308 ++++++++++-----------
solr/solr-ref-guide/src/sql-screen.adoc | 26 ++
solr/webapp/web/partials/sqlquery.html | 2 +-
11 files changed, 200 insertions(+), 165 deletions(-)
diff --git
a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
index da016ab..3c4abfc 100644
---
a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
+++
b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
@@ -134,7 +134,7 @@
<field name="pre" type="preanalyzed" indexed="true" stored="true"/>
<field name="sku" type="text_en_splitting_tight" indexed="true"
stored="true" omitNorms="true"/>
<field name="name" type="text_general" indexed="true" stored="true"/>
- <field name="manu" type="text_gen_sort" indexed="true" stored="true"
omitNorms="true" multiValued="false"/>
+ <field name="manu" type="text_gen_sort" indexed="true" stored="true"
omitNorms="true" multiValued="false" docValues="true"
useDocValuesAsStored="true"/>
<field name="cat" type="string" indexed="true" stored="true"
multiValued="true"/>
<field name="features" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="includes" type="text_general" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true" />
@@ -185,7 +185,11 @@
<!-- non-tokenized version of manufacturer to make it easier to sort or
group
results by manufacturer. copied from "manu" via copyField -->
- <field name="manu_exact" type="string" indexed="true" stored="false"
docValues="false" />
+ <field name="manu_exact" type="string" indexed="false" stored="false"
docValues="true" />
+
+ <!-- non-tokenized version of name to make it easier to sort or group
+ results by product name. copied from "name" via copyField -->
+ <field name="name_exact" type="string" indexed="false" stored="false"
docValues="true" />
<field name="payloads" type="payloads" indexed="true" stored="true"/>
@@ -247,6 +251,7 @@
<copyField source="features" dest="text"/>
<copyField source="includes" dest="text"/>
<copyField source="manu" dest="manu_exact"/>
+ <copyField source="name" dest="name_exact"/>
<!-- Copy the price into a currency enabled field (default USD) -->
<copyField source="price" dest="price_c"/>
diff --git a/solr/solr-ref-guide/src/images/parallel-sql-interface/cluster.png
b/solr/solr-ref-guide/src/images/parallel-sql-interface/cluster.png
deleted file mode 100644
index 10f134f..0000000
Binary files
a/solr/solr-ref-guide/src/images/parallel-sql-interface/cluster.png and
/dev/null differ
diff --git a/solr/solr-ref-guide/src/images/sql-screen/sql-query-ui.png
b/solr/solr-ref-guide/src/images/sql-screen/sql-query-ui.png
new file mode 100644
index 0000000..1c97a9e
Binary files /dev/null and
b/solr/solr-ref-guide/src/images/sql-screen/sql-query-ui.png differ
diff --git a/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
b/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
index 7789527..b9b0c38 100644
--- a/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
+++ b/solr/solr-ref-guide/src/implicit-requesthandlers.adoc
@@ -242,9 +242,9 @@ Graph Traversal:: Return
http://graphml.graphdrawing.org/[GraphML] formatted out
|`solr/<collection>/graph`
|{solr-javadocs}/core/org/apache/solr/handler/GraphHandler.html[GraphHandler]
|`_ADMIN_GRAPH`
|===
-SQL:: Front end of the Parallel SQL interface.
+SQL:: SQL query support.
+
-*Documentation*: <<parallel-sql-interface.adoc#sql-request-handler,SQL Request
Handler>>
+*Documentation*: <<sql-query.adoc#sql-request-handler,SQL Request Handler>>
+
[cols="3*.",frame=none,grid=cols,options="header"]
|===
diff --git a/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
b/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
index 81dc4ac..04fced2 100644
--- a/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
+++ b/solr/solr-ref-guide/src/major-changes-in-solr-6.adoc
@@ -38,9 +38,10 @@ Several new expression types have been added in Solr 6:
* JDBC connections to pull data from other systems and join with documents in
the Solr index.
[[major-5-6-parallel-sql]]
-=== Parallel SQL Interface
+=== SQL Query
+
+Built on streaming expressions, new in Solr 6 is a <<sql-query.adoc#,SQL
support>> to be able to send SQL queries to Solr. SQL statements are compiled
to streaming expressions on the fly, providing the full range of aggregations
available to streaming expression requests. A JDBC driver is included, which
allows using SQL clients and database visualization tools to query your Solr
index and import data to other systems.
-Built on streaming expressions, new in Solr 6 is a
<<parallel-sql-interface.adoc#,Parallel SQL interface>> to be able to send SQL
queries to Solr. SQL statements are compiled to streaming expressions on the
fly, providing the full range of aggregations available to streaming expression
requests. A JDBC driver is included, which allows using SQL clients and
database visualization tools to query your Solr index and import data to other
systems.
=== Cross Data Center Replication
diff --git a/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
b/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
index 0f9b8e4..9d4dfe3 100644
--- a/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
+++ b/solr/solr-ref-guide/src/major-changes-in-solr-7.adoc
@@ -199,4 +199,4 @@ Note again that this is not a complete list of all changes
that may impact your
* The <<caches-warming.adoc#,`maxWarmingSearchers` parameter>> now defaults to
1, and more importantly commits will now block if this limit is exceeded
instead of throwing an exception (a good thing). Consequently there is no
longer a risk in overlapping commits. Nonetheless users should continue to
avoid excessive committing. Users are advised to remove any pre-existing
`maxWarmingSearchers` entries from their `solrconfig.xml` files.
* The <<other-parsers.adoc#complex-phrase-query-parser,Complex Phrase query
parser>> now supports leading wildcards. Beware of its possible heaviness,
users are encouraged to use ReversedWildcardFilter in index time analysis.
* The JMX metric "avgTimePerRequest" (and the corresponding metric in the
metrics API for each handler) used to be a simple non-decaying average based on
total cumulative time and the number of requests. The Codahale Metrics
implementation applies exponential decay to this value, which heavily biases
the average towards the last 5 minutes.
-* Parallel SQL now uses Apache Calcite as its SQL framework. As part of this
change the default aggregation mode has been changed to `facet` rather than
`map_reduce`. There have also been changes to the SQL aggregate response and
some SQL syntax changes. Consult the <<parallel-sql-interface.adoc#,Parallel
SQL Interface>> documentation for full details.
+* Parallel SQL now uses Apache Calcite as its SQL framework. As part of this
change the default aggregation mode has been changed to `facet` rather than
`map_reduce`. There have also been changes to the SQL aggregate response and
some SQL syntax changes. Consult the <<sql-query.adoc#,SQL Query>>
documentation for full details.
diff --git a/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
b/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
index f2419de..a46f7e3 100644
--- a/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
+++ b/solr/solr-ref-guide/src/query-syntax-and-parsers.adoc
@@ -11,8 +11,9 @@
join-query-parser, \
spatial-search, \
other-parsers, \
- parallel-sql-interface, \
- query-screen
+ sql-query, \
+ query-screen, \
+ sql-screen
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -60,8 +61,9 @@ If you have custom parsing needs, you may want to extend that
class to create yo
| <<join-query-parser.adoc#,Join Query Parser>>: Query parser to facilitate
joins.
| <<spatial-search.adoc#,Spatial Search>>: Solr's spatial search capabilities.
| <<other-parsers.adoc#,Other Parsers>>: More parsers designed for use in
specific situations.
-| <<parallel-sql-interface.adoc#,Parallel SQL Interface>>: SQL interface for
Solr.
+| <<sql-query.adoc#,SQL Query>>: SQL language support for Solr.
| <<query-screen.adoc#,Query Screen>>: Form-based query builder.
+| <<sql-screen.adoc#,SQL Screen>>: SQL query runner with tabular results.
|===
// end::parser-sections[]
****
diff --git a/solr/solr-ref-guide/src/solr-admin-ui.adoc
b/solr/solr-ref-guide/src/solr-admin-ui.adoc
index 6761ec4..b3001fa 100644
--- a/solr/solr-ref-guide/src/solr-admin-ui.adoc
+++ b/solr/solr-ref-guide/src/solr-admin-ui.adoc
@@ -169,6 +169,7 @@ Here are sections throughout the Guide describing each
screen of the Admin UI:
| <<configuration-files.adoc#files-screen,Files Screen>>: Configuration file
browser.
| <<query-screen.adoc#,Query Screen>>: Form-based query builder.
| <<stream-screen.adoc#,Stream Screen>>: Submit streaming expressions and see
results and parsing explanations.
+| <<sql-screen.adoc#,SQL Screen>>: SQL query runner with tabular results.
| <<schema-browser-screen.adoc#,Schema Browser Screen>>: Field-level schema
details.
|===
// end::ui-collection-tools[]
diff --git a/solr/solr-ref-guide/src/parallel-sql-interface.adoc
b/solr/solr-ref-guide/src/sql-query.adoc
similarity index 96%
rename from solr/solr-ref-guide/src/parallel-sql-interface.adoc
rename to solr/solr-ref-guide/src/sql-query.adoc
index b542df6..cde7bcf 100644
--- a/solr/solr-ref-guide/src/parallel-sql-interface.adoc
+++ b/solr/solr-ref-guide/src/sql-query.adoc
@@ -1,4 +1,4 @@
-= Parallel SQL Interface
+= SQL Query Language
:page-children: jdbc-dbvisualizer, \
jdbc-squirrel, \
jdbc-zeppelin, \
@@ -21,16 +21,18 @@
// specific language governing permissions and limitations
// under the License.
-Solr's Parallel SQL Interface brings the power of SQL to SolrCloud.
+Solr's SQL Query support brings the power of SQL to SolrCloud.
The SQL interface seamlessly combines SQL with Solr's full-text search
capabilities.
-Both MapReduce style and JSON Facet API aggregations are supported, which
means the SQL interface can be used to support both *high query volume* and
*high cardinality* use cases.
+Both MapReduce style and JSON Facet API aggregations are supported, which
means that SQL querying can be used to support both *high query volume* and
*high cardinality* use cases.
== SQL Architecture
The SQL interface allows sending a SQL query to Solr and getting documents
streamed back in response.
Under the covers, Solr's SQL interface uses the Apache Calcite SQL engine to
translate SQL queries to physical query plans implemented as
<<streaming-expressions.adoc#,Streaming Expressions>>.
+More information about how to Solr supports SQL queries for Solr is described
in the <<Configuration>> section below.
+
=== Solr Collections and DB Tables
In a standard `SELECT` statement such as `SELECT <expressions> FROM <table>`,
the table names correspond to Solr collection names.
@@ -47,155 +49,11 @@ For example, we could index Solr's sample documents and
then construct an SQL qu
[source,sql]
----
-SELECT manu as mfr, price as retail FROM techproducts
+SELECT name_exact as name, manu as mfr, price as retail FROM techproducts
ORDER BY retail DESC
----
-The collection in Solr we are using is "techproducts", and we've asked for the
"manu" and "price" fields to be returned and aliased with new names.
-While this example does not use those aliases, we could build on this to ORDER
BY one or more of those fields.
-
-More information about how to structure SQL queries for Solr is included in
the section <<Solr SQL Syntax>> below.
-
-=== Aggregation Modes
-
-The SQL feature of Solr can work with aggregations (grouping of results) in
two ways:
-
-* `facet`: This is the *default* aggregation mode, which uses the JSON Facet
API or StatsComponent for aggregations.
-In this scenario the aggregations logic is pushed down into the search engine
and only the aggregates are sent across the network.
-This is Solr's normal mode of operation.
-This is fast when the cardinality of GROUP BY fields is low to moderate.
-But it breaks down when you have high cardinality fields in the GROUP BY field.
-* `map_reduce`: This implementation shuffles tuples to worker nodes and
performs the aggregation on the worker nodes.
-It involves sorting and partitioning the entire result set and sending it to
worker nodes.
-In this approach the tuples arrive at the worker nodes sorted by the GROUP BY
fields.
-The worker nodes can then rollup the aggregates one group at a time.
-This allows for unlimited cardinality aggregation, but you pay the price of
sending the entire result set across the network to worker nodes.
-
-These modes are defined with the `aggregationMode` property when sending the
request to Solr.
-
-The choice between aggregation modes depends on the cardinality of the fields
you are working with.
-If you have low-to-moderate cardinality in the fields you are grouping by, the
'facet' aggregation mode will give you a higher performance because only the
final groups are returned, very similar to how facets work today.
-If, however, you have high cardinality in the fields, the "map_reduce"
aggregation mode with worker nodes provide a much more performant option.
-
-== Configuration
-
-The request handlers used for the SQL interface are configured to load
implicitly, meaning there is little to do to start using this feature.
-
-[[sql-request-handler]]
-=== /sql Request Handler
-
-The `/sql` handler is the front end of the Parallel SQL interface.
-All SQL queries are sent to the `/sql` handler to be processed.
-The handler also coordinates the distributed MapReduce jobs when running
`GROUP BY` and `SELECT DISTINCT` queries in `map_reduce` mode.
-By default the `/sql` handler will choose worker nodes from its own collection
to handle the distributed operations.
-In this default scenario the collection where the `/sql` handler resides acts
as the default worker collection for MapReduce queries.
-
-By default, the `/sql` request handler is configured as an implicit handler,
meaning that it is always enabled in every Solr installation and no further
configuration is required.
-
-==== Authorization for SQL Requests
-
-If your Solr cluster is configured to use the
<<rule-based-authorization-plugin.adoc#,Rule-based Authorization Plugin>>,
-then you need to grant `GET` and `POST` permission on the `/sql`, `/select`,
and `/export` endpoints for all collections you intend to execute SQL queries
against.
-The `/select` endpoint is used for `LIMIT` queries, whereas the `/export`
handler is used for queries without a `LIMIT`, so in most cases, you'll want to
grant access to both.
-If you're using a worker collection for the `/sql` handler, then you only need
to grant access to the `/sql` endpoint for the worker collection and not the
collections in the data tier.
-Behind the scenes, the SQL handler also sends requests using the internal Solr
server identity to the `/admin/luke` endpoint to get schema metadata for a
collection.
-Consequently, you do not need to grant explicit permission to the
`/admin/luke` endpoint for users to execute SQL queries.
-
-[IMPORTANT]
-====
-As described below in the section <<Best Practices>>, you may want to set up a
separate collection for parallelized SQL queries.
-If you have high cardinality fields and a large amount of data, please be sure
to review that section and consider using a separate collection.
-====
-
-=== /stream and /export Request Handlers
-
-The Streaming API is an extensible parallel computing framework for SolrCloud.
-<<streaming-expressions.adoc#,Streaming Expressions>> provide a query language
and a serialization format for the Streaming API.
-
-The Streaming API provides support for fast MapReduce allowing it to perform
parallel relational algebra on extremely large data sets.
-Under the covers the SQL interface parses SQL queries using the Apache Calcite
SQL Parser.
-It then translates the queries to the parallel query plan.
-The parallel query plan is expressed using the Streaming API and Streaming
Expressions.
-
-Like the `/sql` request handler, the `/stream` and `/export` request handlers
are configured as implicit handlers, and no further configuration is required.
-
-=== Fields
-
-In some cases, fields used in SQL queries must be configured as DocValue
fields.
-If queries are unlimited, all fields must be DocValue fields.
-If queries are limited (with the `limit` clause) then fields do not have to be
have DocValues enabled.
-
-.Multi-valued Fields
-[IMPORTANT]
-====
-Multi-valued fields in the project list will be returned as a `List` of
values; with JDBC, use `getObject(col)` to retrieve the multi-valued field and
then cast to a `List`.
-In general, you can project, filter, and group by, but you cannot sort by
multi-valued fields.
-====
-
-=== Sending Queries
-
-The SQL Interface provides a basic JDBC driver and an HTTP interface to
perform queries.
-
-=== JDBC Driver
-
-The JDBC Driver ships with SolrJ.
-Below is sample code for creating a connection and executing a query with the
JDBC driver:
-
-[source,java]
-----
-Connection con = null;
-try {
- con = DriverManager.getConnection("jdbc:solr://" + zkHost +
"?collection=collection1&aggregationMode=map_reduce&numWorkers=2");
- stmt = con.createStatement();
- rs = stmt.executeQuery("SELECT a_s, sum(a_f) as sum FROM collection1 GROUP
BY a_s ORDER BY sum desc");
-
- while(rs.next()) {
- String a_s = rs.getString("a_s");
- double s = rs.getDouble("sum");
- }
-} finally {
- rs.close();
- stmt.close();
- con.close();
-}
-----
-
-The connection URL must contain the `zkHost` and the `collection` parameters.
-The collection must be a valid SolrCloud collection at the specified ZooKeeper
host.
-The collection must also be configured with the `/sql` handler.
-The `aggregationMode` and `numWorkers` parameters are optional.
-
-=== HTTP Interface
-
-Solr accepts parallel SQL queries through the `/sql` handler.
-
-Below is a sample curl command performing a SQL aggregate query in facet mode:
-
-[source,bash]
-----
-curl --data-urlencode 'stmt=SELECT to, count(*) FROM collection4 GROUP BY to
ORDER BY count(*) desc LIMIT 10'
http://localhost:8983/solr/collection4/sql?aggregationMode=facet
-----
-
-Below is sample result set:
-
-[source,json]
-----
-{"result-set":{"docs":[
- {"count(*)":9158,"to":"[email protected]"},
- {"count(*)":6244,"to":"[email protected]"},
- {"count(*)":5874,"to":"[email protected]"},
- {"count(*)":5867,"to":"[email protected]"},
- {"count(*)":5595,"to":"[email protected]"},
- {"count(*)":4904,"to":"[email protected]"},
- {"count(*)":4622,"to":"[email protected]"},
- {"count(*)":3819,"to":"[email protected]"},
- {"count(*)":3678,"to":"[email protected]"},
- {"count(*)":3653,"to":"[email protected]"},
- {"EOF":"true","RESPONSE_TIME":10}]}
-}
-----
-
-Notice that the result set is an array of tuples with key/value pairs that
match the SQL column list.
-The final tuple contains the EOF flag which signals the end of the stream.
+The collection in Solr we are using is "techproducts", and we've asked for the
"name_exact", manu" and "price" fields to be returned,
+with the aliased name "retail" being sorted upon to show most expensive to
least expensive products.
== Solr SQL Syntax
@@ -299,7 +157,7 @@ WHERE (fieldA = 'term1') AND NOT (fieldB = 'term2')
==== Supported WHERE Operators
-The parallel SQL interface supports and pushes down most common SQL operators,
specifically:
+The SQL query interface supports and pushes down most common SQL operators,
specifically:
[width="100%",options="header",]
|===
@@ -460,11 +318,153 @@ ORDER BY SUM(fieldC) ASC
LIMIT 100
----
+=== Aggregation Modes
+
+The SQL feature of Solr can work with aggregations (grouping of results) in
two ways:
+
+* `facet`: This is the *default* aggregation mode, which uses the JSON Facet
API or StatsComponent for aggregations.
+In this scenario the aggregations logic is pushed down into the search engine
and only the aggregates are sent across the network.
+This is Solr's normal mode of operation.
+This is fast when the cardinality of GROUP BY fields is low to moderate.
+But it breaks down when you have high cardinality fields in the GROUP BY field.
+* `map_reduce`: This implementation shuffles tuples to worker nodes and
performs the aggregation on the worker nodes.
+It involves sorting and partitioning the entire result set and sending it to
worker nodes.
+In this approach the tuples arrive at the worker nodes sorted by the GROUP BY
fields.
+The worker nodes can then rollup the aggregates one group at a time.
+This allows for unlimited cardinality aggregation, but you pay the price of
sending the entire result set across the network to worker nodes.
+
+These modes are defined with the `aggregationMode` property when sending the
request to Solr.
+
+The choice between aggregation modes depends on the cardinality of the fields
you are working with.
+If you have low-to-moderate cardinality in the fields you are grouping by, the
'facet' aggregation mode will give you a higher performance because only the
final groups are returned, very similar to how facets work today.
+If, however, you have high cardinality in the fields, the "map_reduce"
aggregation mode with worker nodes provide a much more performant option.
+
+== Configuration
+
+The request handlers used for the SQL interface are configured to load
implicitly, meaning there is little to do to start using this feature.
+
+[[sql-request-handler]]
+=== /sql Request Handler
+
+The `/sql` handler is the front end of the Parallel SQL interface.
+All SQL queries are sent to the `/sql` handler to be processed.
+The handler also coordinates the distributed MapReduce jobs when running
`GROUP BY` and `SELECT DISTINCT` queries in `map_reduce` mode.
+By default the `/sql` handler will choose worker nodes from its own collection
to handle the distributed operations.
+In this default scenario the collection where the `/sql` handler resides acts
as the default worker collection for MapReduce queries.
+
+By default, the `/sql` request handler is configured as an implicit handler,
meaning that it is always enabled in every Solr installation and no further
configuration is required.
+
+==== Authorization for SQL Requests
+
+If your Solr cluster is configured to use the
<<rule-based-authorization-plugin.adoc#,Rule-based Authorization Plugin>>,
+then you need to grant `GET` and `POST` permissions on the `/sql`, `/select`,
and `/export` endpoints for all collections you intend to execute SQL queries
against.
+The `/select` endpoint is used for `LIMIT` queries, whereas the `/export`
handler is used for queries without a `LIMIT`, so in most cases, you'll want to
grant access to both.
+If you're using a worker collection for the `/sql` handler, then you only need
to grant access to the `/sql` endpoint for the worker collection and not the
collections in the data tier.
+Behind the scenes, the SQL handler also sends requests using the internal Solr
server identity to the `/admin/luke` endpoint to get schema metadata for a
collection.
+Consequently, you do not need to grant explicit permission to the
`/admin/luke` endpoint for users to execute SQL queries.
+
+[IMPORTANT]
+====
+As described below in the section <<Best Practices>>, you may want to set up a
separate collection for parallelized SQL queries.
+If you have high cardinality fields and a large amount of data, please be sure
to review that section and consider using a separate collection.
+====
+
+=== /stream and /export Request Handlers
+
+The Streaming API is an extensible parallel computing framework for SolrCloud.
+<<streaming-expressions.adoc#,Streaming Expressions>> provide a query language
and a serialization format for the Streaming API.
+
+The Streaming API provides support for fast MapReduce allowing it to perform
parallel relational algebra on extremely large data sets.
+Under the covers the SQL interface parses SQL queries using the Apache Calcite
SQL Parser.
+It then translates the queries to the parallel query plan.
+The parallel query plan is expressed using the Streaming API and Streaming
Expressions.
+
+Like the `/sql` request handler, the `/stream` and `/export` request handlers
are configured as implicit handlers, and no further configuration is required.
+
+=== Fields
+
+In some cases, fields used in SQL queries must be configured as DocValue
fields.
+If queries are unlimited, all fields must be DocValue fields.
+If queries are limited (with the `limit` clause) then fields do not have to be
have DocValues enabled.
+
+.Multi-valued Fields
+[IMPORTANT]
+====
+Multi-valued fields in the project list will be returned as a `List` of
values; with JDBC, use `getObject(col)` to retrieve the multi-valued field and
then cast to a `List`.
+In general, you can project, filter, and group by, but you cannot sort by
multi-valued fields.
+====
+
+=== Sending Queries
+
+The SQL Interface provides a basic JDBC driver and an HTTP interface to
perform queries.
+
+=== JDBC Driver
+
+The JDBC Driver ships with SolrJ.
+Below is sample code for creating a connection and executing a query with the
JDBC driver:
+
+[source,java]
+----
+Connection con = null;
+try {
+ con = DriverManager.getConnection("jdbc:solr://" + zkHost +
"?collection=collection1&aggregationMode=map_reduce&numWorkers=2");
+ stmt = con.createStatement();
+ rs = stmt.executeQuery("SELECT a_s, sum(a_f) as sum FROM collection1 GROUP
BY a_s ORDER BY sum desc");
+
+ while(rs.next()) {
+ String a_s = rs.getString("a_s");
+ double s = rs.getDouble("sum");
+ }
+} finally {
+ rs.close();
+ stmt.close();
+ con.close();
+}
+----
+
+The connection URL must contain the `zkHost` and the `collection` parameters.
+The collection must be a valid SolrCloud collection at the specified ZooKeeper
host.
+The collection must also be configured with the `/sql` handler.
+The `aggregationMode` and `numWorkers` parameters are optional.
+
+=== HTTP Interface
+
+Solr accepts SQL queries through the `/sql` handler.
+
+Below is a sample curl command performing a SQL aggregate query in facet mode:
+
+[source,bash]
+----
+curl --data-urlencode 'stmt=SELECT to, count(*) FROM collection4 GROUP BY to
ORDER BY count(*) desc LIMIT 10'
http://localhost:8983/solr/collection4/sql?aggregationMode=facet
+----
+
+Below is sample result set:
+
+[source,json]
+----
+{"result-set":{"docs":[
+ {"count(*)":9158,"to":"[email protected]"},
+ {"count(*)":6244,"to":"[email protected]"},
+ {"count(*)":5874,"to":"[email protected]"},
+ {"count(*)":5867,"to":"[email protected]"},
+ {"count(*)":5595,"to":"[email protected]"},
+ {"count(*)":4904,"to":"[email protected]"},
+ {"count(*)":4622,"to":"[email protected]"},
+ {"count(*)":3819,"to":"[email protected]"},
+ {"count(*)":3678,"to":"[email protected]"},
+ {"count(*)":3653,"to":"[email protected]"},
+ {"EOF":"true","RESPONSE_TIME":10}]}
+}
+----
+
+Notice that the result set is an array of tuples with key/value pairs that
match the SQL column list.
+The final tuple contains the EOF flag which signals the end of the stream.
+
== Best Practices
-=== Separate Collections
+=== Separate Worker Collection
-It makes sense to create a separate SolrCloud collection just for the `/sql`
handler.
+It makes sense to create a separate SolrCloud worker collection just for the
`/sql` handler.
This collection can be created using SolrCloud's standard collection API.
Since this collection only exists to handle `/sql` requests and provide a pool
of worker nodes, this collection does not need to hold any data.
@@ -507,7 +507,7 @@ This means the tuples are always sorted and partitioned
before they hit the netw
The partitioned tuples are sent directly to the correct worker nodes in the
proper sort order, ready to be reduced.
.How Parallel SQL Queries are Distributed
-image::images/parallel-sql-interface/cluster.png[image,width=492,height=250]
+image::images/sql-query/cluster.png[image,width=492,height=250]
The image above shows the three tiers broken out into different SolrCloud
collections for clarity.
In practice the `/sql` handler and worker collection by default share the same
collection.
diff --git a/solr/solr-ref-guide/src/sql-screen.adoc
b/solr/solr-ref-guide/src/sql-screen.adoc
new file mode 100644
index 0000000..eab86f5
--- /dev/null
+++ b/solr/solr-ref-guide/src/sql-screen.adoc
@@ -0,0 +1,26 @@
+= SQL Query Screen
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+You can use the *SQL Query* screen to submit a SQL query to a Solr collection
and get the results in a tabular format.
+
+In the example in the screenshot, a SQL query has been submitted, and the
screen shows the query results listed as a table.
+
+.Results of a SQL Query
+image::images/sql-screen/sql-query-ui.png[image,height=400]
+
+Learn more about by reading the <<sql-query.adoc#,SQL details>>, including the
specific <<sql-query.adoc#solr-sql-syntax,SQL syntax>> supported by Solr.
diff --git a/solr/webapp/web/partials/sqlquery.html
b/solr/webapp/web/partials/sqlquery.html
index 5af437d..af976a0 100644
--- a/solr/webapp/web/partials/sqlquery.html
+++ b/solr/webapp/web/partials/sqlquery.html
@@ -23,7 +23,7 @@ limitations under the License.
</label>
<textarea name="stmt" ng-model="stmt" id="sqlexp"></textarea>
<button type="submit" ng-click="doQuery()">Execute</button>
- <span><a
href="https://solr.apache.org/guide/parallel-sql-interface.html"
target="_out">syntax help</a></span>
+ <span><a href="https://solr.apache.org/guide/sql-query.html"
target="_out">syntax help</a></span>
</form>
</div>
<div id="sql-response">