This is an automated email from the ASF dual-hosted git repository. ctargett pushed a commit to branch jira/solr-15556-antora in repository https://gitbox.apache.org/repos/asf/solr.git
commit 1b21f23a03d93f081c104010ab021218f5fba257 Merge: 40dfdfc d1d4118 Author: Cassandra Targett <[email protected]> AuthorDate: Sun Jan 9 14:52:02 2022 -0600 Merge branch 'main' into jira/solr-15556-antora build.gradle | 5 +- buildSrc/build.gradle | 15 +- .../{build.gradle => scriptDepVersions.gradle} | 24 ++- .../java/org/apache/lucene/gradle/Checksum.java | 195 ++++++++++++++++++++ dev-docs/pmc-chair.adoc | 37 +--- dev-tools/doap/solr.rdf | 9 +- dev-tools/scripts/README.md | 26 +-- dev-tools/scripts/addVersion.py | 129 ++++--------- dev-tools/scripts/create_line_file_docs.py | 17 ++ dev-tools/scripts/githubPRs.py | 8 +- dev-tools/scripts/releaseWizard.py | 5 + dev-tools/scripts/releaseWizard.yaml | 123 ++++++++++++- dev-tools/scripts/releasedJirasRegex.py | 16 +- dev-tools/scripts/scriptutil.py | 14 +- gradle/documentation/changes-to-html.gradle | 24 ++- gradle/documentation/documentation.gradle | 2 +- gradle/help.gradle | 2 +- gradle/releasing.gradle | 55 ------ gradle/solr/packaging.gradle | 2 +- gradle/testing/alternative-jdk-support.gradle | 2 + gradle/testing/randomization.gradle | 4 +- gradle/validation/error-prone.gradle | 12 +- help/dependencies.txt | 4 +- help/gpgSigning.txt | 70 ------- help/publishing.txt | 138 ++++++++++++++ help/workflow.txt | 15 +- settings.gradle | 1 + solr/CHANGES.txt | 81 ++++++++- solr/README.md | 8 +- solr/bin/post | 2 +- ...ExtractNamedEntitiesUpdateProcessorFactory.java | 2 +- solr/contrib/ltr/example/README.md | 2 +- .../org/apache/solr/ltr/feature/SolrFeature.java | 2 +- .../conf/solr-exporter-config.xml | 98 +++++----- ...theus-solr-exporter-integration-test-config.xml | 202 ++++++++++----------- ...rometheus-solr-exporter-scraper-test-config.xml | 8 +- .../test-files/conf/test-config-with-templates.xml | 20 +- .../solr/collection1/conf/managed-schema | 8 +- .../prometheus/scraper/SolrCloudScraperTest.java | 2 +- .../src/java/org/apache/solr/api/V2HttpCall.java | 2 +- .../apache/solr/cloud/OverseerNodePrioritizer.java | 27 ++- .../org/apache/solr/cloud/RecoveryStrategy.java | 11 +- .../java/org/apache/solr/cloud/ZkController.java | 42 ++++- .../solr/cloud/api/collections/AddReplicaCmd.java | 2 +- .../apache/solr/cloud/api/collections/Assign.java | 160 +++++++++------- .../cloud/api/collections/CreateCollectionCmd.java | 8 +- .../cloud/api/collections/DeleteCollectionCmd.java | 2 +- .../cloud/api/collections/DeleteReplicaCmd.java | 10 +- .../cloud/api/collections/OverseerRoleCmd.java | 9 + .../solr/cloud/api/collections/ReplaceNodeCmd.java | 36 ++-- .../solr/cloud/api/collections/RestoreCmd.java | 12 +- .../solr/cloud/api/collections/SplitShardCmd.java | 4 +- .../src/java/org/apache/solr/cluster/Cluster.java | 5 + .../impl/CollectionsRepairEventListener.java | 2 +- .../solr/cluster/placement/PlacementPlugin.java | 28 ++- .../impl/PlacementPluginAssignStrategy.java | 31 ++-- .../placement/impl/PlacementRequestImpl.java | 8 +- .../placement/impl/ReplicaPlacementImpl.java | 4 +- .../impl/SimpleClusterAbstractionsImpl.java | 13 +- .../plugins/AffinityPlacementFactory.java | 158 +++++++++------- .../plugins/MinimizeCoresPlacementFactory.java | 105 ++++++----- .../placement/plugins/RandomPlacementFactory.java | 42 +++-- .../java/org/apache/solr/core/CoreContainer.java | 10 + .../src/java/org/apache/solr/core/NodeRoles.java | 137 ++++++++++++++ .../src/java/org/apache/solr/core/SolrCore.java | 2 +- .../java/org/apache/solr/core/SolrXmlConfig.java | 2 +- .../java/org/apache/solr/handler/ClusterAPI.java | 191 ++++++++++++++----- .../solr/handler/admin/CoreAdminHandler.java | 4 + .../solr/handler/admin/ShowFileRequestHandler.java | 4 +- .../solr/handler/admin/ZookeeperStatusHandler.java | 3 +- .../solr/handler/admin/api/AllCoresStatusAPI.java | 53 ++++++ .../handler/admin/api/SingleCoreStatusAPI.java | 57 ++++++ .../solr/handler/component/FacetComponent.java | 4 +- .../handler/component/RealTimeGetComponent.java | 3 +- .../apache/solr/handler/loader/CSVLoaderBase.java | 24 ++- .../solr/highlight/UnifiedSolrHighlighter.java | 2 - .../org/apache/solr/internal/csv/CSVParser.java | 2 +- .../org/apache/solr/internal/csv/CSVStrategy.java | 157 ++-------------- .../apache/solr/packagemanager/PackageManager.java | 13 +- .../apache/solr/packagemanager/PackageUtils.java | 8 - .../solr/packagemanager/RepositoryManager.java | 3 - .../apache/solr/response/CSVResponseWriter.java | 66 ++++--- .../apache/solr/servlet/CoreContainerProvider.java | 4 +- .../processor/AtomicUpdateDocumentMerger.java | 108 +++++++---- .../processor/UUIDUpdateProcessorFactory.java | 2 +- .../src/java/org/apache/solr/util/SolrCLI.java | 28 ++- .../src/java/org/apache/solr/util/SolrVersion.java | 151 +++++++++++++++ .../apache/solr/analysis/htmlStripReaderTest.html | 2 +- .../org/apache/solr/cloud/CollectionPropsTest.java | 5 +- .../test/org/apache/solr/cloud/NodeRolesTest.java | 122 +++++++++++++ .../OverseerCollectionConfigSetProcessorTest.java | 15 +- .../org/apache/solr/cloud/OverseerRolesTest.java | 4 +- .../org/apache/solr/cloud/ReplaceNodeTest.java | 84 ++++++--- .../apache/solr/cloud/TestRandomFlRTGCloud.java | 8 +- .../placement/ClusterAbstractionsForTest.java | 5 + .../solr/handler/admin/V2CoresAPIMappingTest.java | 52 ++++++ .../solr/highlight/TestUnifiedSolrHighlighter.java | 19 ++ .../apache/solr/internal/csv/CSVParserTest.java | 13 +- .../apache/solr/internal/csv/CSVStrategyTest.java | 27 +-- .../org/apache/solr/search/TestRealTimeGet.java | 8 + .../update/processor/NestedAtomicUpdateTest.java | 133 +++++++++++--- .../test/org/apache/solr/util/TestSolrVersion.java | 65 +++++++ solr/cp-deps.sh | 22 --- solr/distribution/build.gradle | 160 ++++++++++++++++ solr/distribution/source-release.gradle | 50 +++++ solr/docker/build.gradle | 24 ++- solr/docker/gradle-help.txt | 2 +- solr/docker/templates/Dockerfile.body.template | 2 +- solr/example/README.md | 2 +- solr/example/films/README.md | 108 ++++++----- .../error_prone_annotations-2.10.0.jar.sha1 | 1 + .../error_prone_annotations-2.9.0.jar.sha1 | 1 - solr/licenses/log4j-1.2-api-2.15.0.jar.sha1 | 1 - solr/licenses/log4j-1.2-api-2.17.1.jar.sha1 | 1 + solr/licenses/log4j-api-2.15.0.jar.sha1 | 1 - solr/licenses/log4j-api-2.17.1.jar.sha1 | 1 + solr/licenses/log4j-core-2.15.0.jar.sha1 | 1 - solr/licenses/log4j-core-2.17.1.jar.sha1 | 1 + .../log4j-layout-template-json-2.15.0.jar.sha1 | 1 - .../log4j-layout-template-json-2.17.1.jar.sha1 | 1 + solr/licenses/log4j-slf4j-impl-2.15.0.jar.sha1 | 1 - solr/licenses/log4j-slf4j-impl-2.17.1.jar.sha1 | 1 + solr/licenses/log4j-web-2.15.0.jar.sha1 | 1 - solr/licenses/log4j-web-2.17.1.jar.sha1 | 1 + solr/packaging/build.gradle | 61 +------ solr/server/solr/README.md | 6 +- .../configsets/_default/conf/managed-schema.xml | 8 +- .../solr/configsets/_default/conf/solrconfig.xml | 33 +--- .../conf/managed-schema | 2 +- .../conf/solrconfig.xml | 55 ++---- .../conf/update-script.js | 2 +- .../conf/xslt/example_atom.xsl | 2 +- .../conf/xslt/updateXml.xsl | 2 +- solr/server/solr/solr.xml | 12 +- .../modules/configuration-guide/pages/v2-api.adoc | 6 +- .../monitoring-with-prometheus-and-grafana.adoc | 10 +- .../pages/partial-document-updates.adoc | 17 +- .../query-guide/pages/common-query-parameters.adoc | 4 + solr/solr-ref-guide/src/node-roles.adoc | 196 ++++++++++++++++++++ .../src/old-pages/solrcloud-clusters.adoc | 4 + .../solrj/request/CollectionAdminRequest.java | 9 +- .../solr/client/solrj/request/CoreApiMapping.java | 2 - .../solrj/request/json/DirectJsonQueryRequest.java | 4 +- .../solrj/request/json/JsonQueryRequest.java | 2 +- .../client/solrj/request/schema/SchemaRequest.java | 24 +-- .../client/solrj/request/schema/package-info.java | 2 +- .../solrj/response/schema/SchemaResponse.java | 2 +- .../client/solrj/response/schema/package-info.java | 2 +- .../apache/solr/common/cloud/ReplicaPosition.java | 20 +- .../java/org/apache/solr/common/cloud/Slice.java | 2 +- .../apache/solr/common/cloud/ZkStateReader.java | 11 +- .../cluster.security.BasicAuth.Commands.json | 2 +- .../apispec/cluster.security.JwtAuth.Commands.json | 2 +- .../cluster.security.MultiPluginAuth.Commands.json | 2 +- .../cluster.security.RuleBasedAuthorization.json | 2 +- .../cluster.security.authentication.Commands.json | 2 +- .../apispec/cluster.security.authentication.json | 2 +- .../cluster.security.authorization.Commands.json | 2 +- .../apispec/cluster.security.authorization.json | 2 +- .../src/resources/apispec/core.RealtimeGet.json | 2 +- .../apispec/core.SchemaEdit.addCopyField.json | 2 +- .../apispec/core.SchemaEdit.addField.json | 4 +- .../apispec/core.SchemaEdit.addFieldType.json | 2 +- .../apispec/core.SchemaEdit.deleteCopyField.json | 2 +- .../core.SchemaEdit.deleteDynamicField.json | 2 +- .../apispec/core.SchemaEdit.deleteField.json | 2 +- .../apispec/core.SchemaEdit.deleteFieldType.json | 2 +- .../src/resources/apispec/core.SchemaEdit.json | 4 +- .../apispec/core.SchemaRead.copyFields.json | 2 +- .../core.SchemaRead.dynamicFields_fieldTypes.json | 2 +- .../resources/apispec/core.SchemaRead.fields.json | 2 +- .../src/resources/apispec/core.SchemaRead.json | 2 +- solr/solrj/src/resources/apispec/core.Update.json | 2 +- .../resources/apispec/core.config.Commands.json | 6 +- .../apispec/core.config.Commands.runtimeLib.json | 2 +- .../apispec/core.config.Params.Commands.json | 2 +- .../src/resources/apispec/core.config.Params.json | 2 +- solr/solrj/src/resources/apispec/core.config.json | 2 +- .../src/resources/apispec/core.system.blob.json | 2 +- .../resources/apispec/core.system.blob.upload.json | 2 +- .../src/resources/apispec/core.tasks.cancel.json | 2 +- .../src/resources/apispec/core.tasks.list.json | 2 +- solr/solrj/src/resources/apispec/cores.Status.json | 20 -- .../src/java/org/apache/solr/SolrTestCaseJ4.java | 2 +- .../java/org/apache/solr/cloud/ZkTestServer.java | 32 +--- .../src/resources/create-keystores.sh | 4 +- solr/webapp/web/js/angular/controllers/query.js | 92 ++++++---- solr/webapp/web/js/angular/controllers/sqlquery.js | 9 +- solr/webapp/web/partials/query.html | 19 +- versions.lock | 10 +- versions.props | 4 +- 191 files changed, 3302 insertions(+), 1554 deletions(-) diff --cc solr/solr-ref-guide/modules/deployment-guide/pages/monitoring-with-prometheus-and-grafana.adoc index 6834f2d,0000000..befb5ed mode 100644,000000..100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/monitoring-with-prometheus-and-grafana.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/monitoring-with-prometheus-and-grafana.adoc @@@ -1,569 -1,0 +1,569 @@@ += Monitoring with Prometheus and Grafana +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +If you use https://prometheus.io[Prometheus] and https://grafana.com[Grafana] for metrics storage and data visualization, Solr includes a Prometheus exporter to collect metrics and other data. + +A Prometheus exporter (`solr-exporter`) allows users to monitor not only Solr metrics which come from the xref:metrics-reporting.adoc#metrics-api[Metrics API], but also facet counts which come from xref:query-guide:faceting.adoc[] and responses to xref:configuration-guide:collections-api.adoc[] commands and xref:ping.adoc[] requests. + +This graphic provides a more detailed view: + +.solr-exporter Diagram +image::monitoring-with-prometheus-and-grafana/solr-exporter-diagram.png[image,width=600] + +The Prometheus exporter is included in Solr as a contrib, and is located in `contrib/prometheus-exporter` in your Solr instance. + +There are three aspects to running `solr-exporter`: + +* Modify the `solr-exporter-config.xml` to define the data to collect. +Solr has a default configuration you can use, but if you would like to modify it before running the exporter the first time, see the section <<Exporter Configuration>> below. +* Start the exporter from within Solr. +See the section below <<Starting the Exporter>>. +* Modify your Prometheus configuration to listen on the correct port. +See the section below <<Prometheus Configuration>> + +== Starting the Exporter +You can start `solr-exporter` by running `./bin/solr-exporter` (Linux) or `.\bin\solr-exporter.cmd` (Windows) from the `contrib/prometheus-exporter` directory. + +See the commands below depending on your operating system and Solr operating mode: + +[.dynamic-tabs] +-- +[example.tab-pane#solr-exporter-linux] +==== +[.tab-label]*Linux* + +.User-managed / Single-node +[source,bash] +---- +$ cd contrib/prometheus-exporter +$ ./bin/solr-exporter -p 9854 -b http://localhost:8983/solr -f ./conf/solr-exporter-config.xml -n 8 +---- + +.SolrCloud +[source,bash] +---- +$ cd contrib/prometheus-exporter +$ ./bin/solr-exporter -p 9854 -z localhost:2181/solr -f ./conf/solr-exporter-config.xml -n 16 +---- +==== + +[example.tab-pane#solr-exporter-windows] +==== +[.tab-label]*Windows* + +.User-managed / Single-node +[source,text] +---- +> cd contrib\prometheus-exporter +> .\bin\solr-exporter.cmd -p 9854 -b http://localhost:8983/solr -f .\conf\solr-exporter-config.xml -n 8 +---- + +.SolrCloud +[source,text] +---- +> cd contrib\prometheus-exporter +> .\bin\solr-exporter -p 9854 -z localhost:2181/solr -f .\conf\solr-exporter-config.xml -n 16 +---- +==== +-- + +=== Command Line Parameters + +The list of available parameters for the Prometheus Exporter. +All parameters can be provided via an environment variable, instead of through the command line. + +`h`, `--help`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Displays command line help and usage. + +`-p`, `--port`, `$PORT`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `8989` +|=== ++ +The port where Prometheus will listen for new data. +This port will be used to configure Prometheus. +It can be any port not already in use on your server. + +`-b`, `--baseurl`, `$SOLR_URL`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: _see description_ +|=== ++ +The Solr base URL (such as `\http://localhost:8983/solr`) when Solr is running in a user-managed cluster or a single-node installation. +If you are running SolrCloud, do not specify this parameter. +If neither the `-b` parameter nor the `-z` parameter are defined, the default is `-b \http://localhost:8983/solr`. + +`-z`, `--zkhost`, `$ZK_HOST`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: _see description_ +|=== ++ +The ZooKeeper connect string (such as `localhost:9983`, or `localhost:2181/solr`) when Solr is running SolrCloud. +If you are running a user-managed cluster or single-node installation, do not specify this parameter. +If neither the `-b` parameter nor the `-z` parameter are defined, the `-b` parameter default is used. + +`-f`, `--config-file`, `$CONFIG_FILE`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `contrib/prometheus-exporter/conf/solr-exporter-config.xml` +|=== ++ +The path to the configuration file that defines the Solr metrics to read. + +`-n`, `--num-threads`, `$NUM_THREADS`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `1` +|=== ++ +The number of threads. +The `solr-exporter` creates thread pools for requests to Solr. +Request latency can be improved by increasing the number of threads. + +`-s`, `--scrape-interval`, `$SCRAPE_INTERVAL`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `60` seconds +|=== ++ +The number of seconds between collecting metrics from Solr. +The `solr-exporter` collects metrics from Solr every few seconds controlled by this setting. +These metrics are cached and returned regardless of how frequently prometheus is configured to pull metrics from this tool. +The freshness of the metrics can be improved by reducing the scrape interval but do not set it to a very low value because metrics collection can be expensive and can execute arbitrary searches to ping Solr. + +The metrics exposed by `solr-exporter` can be seen at the metrics endpoint: `\http://localhost:8983/solr/admin/metrics`. + +=== Environment Variable Options + +The `./bin` scripts provided with the Prometheus Exporter support the use of custom java options through the following environment variables: + +`JAVA_HEAP`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `512m` +|=== ++ +Sets the initial (`Xms`) and max (`Xmx`) Java heap size. + +`JAVA_MEM`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Custom java memory settings (e.g., `-Xms1g -Xmx2g`). +This is ignored if `JAVA_HEAP` is provided. + +`GC_TUNE`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `-XX:+UseG1GC` +|=== ++ +Custom Java garbage collection settings. + +`JAVA_OPTS`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Extra JVM options. + +`ZK_CREDS_AND_ACLS`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Credentials for connecting to a ZooKeeper host that is protected with ACLs. +For more information on what to include in this variable, refer to the section xref:zookeeper-access-control.adoc#zookeeper-acls-in-solr-scripts[ZooKeeper ACLs in Solr Scripts] or the example <<getting-metrics-from-a-secured-solrcloud>> below. + +`CLASSPATH_PREFIX`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Location of extra libraries to load when starting the `solr-exporter`. + +All <<command-line-parameters>> are able to be provided via environment variables when using the `./bin` scripts. + +=== Getting Metrics from a Secured SolrCloud + +Your SolrCloud security configuration can be injected into `solr-exporter` using environment variables in a fashion similar to other clients using xref:solrj.adoc[]. +This is possible because the main script picks up <<Environment Variable Options>> and passes them on to the Java process. + +The following example assumes a SolrCloud instance secured by xref:basic-authentication-plugin.adoc[], xref:enabling-ssl.adoc[SSL] and xref:zookeeper-access-control.adoc[]. + +Suppose you have a file `basicauth.properties` with the Solr Basic-Auth credentials: + +---- +httpBasicAuthUser=myUser +httpBasicAuthPassword=myPassword +---- + +Then you can start the Exporter as follows (Linux). + +[source,bash] +---- +$ cd contrib/prometheus-exporter +$ export JAVA_OPTS="-Djavax.net.ssl.trustStore=truststore.p12 -Djavax.net.ssl.trustStorePassword=truststorePassword -Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory -Dsolr.httpclient.config=basicauth.properties" +$ export ZK_CREDS_AND_ACLS="-DzkCredentialsProvider=org.apache.solr.common.cloud.VMParamsSingleSetCredentialsDigestZkCredentialsProvider -DzkDigestUsername=readonly-user -DzkDigestPassword=zkUserPassword" +$ export CLASSPATH_PREFIX="../../server/solr-webapp/webapp/WEB-INF/lib/commons-codec-1.11.jar" +$ ./bin/solr-exporter -p 9854 -z zk1:2181,zk2:2181,zk3:2181 -f ./conf/solr-exporter-config.xml -n 16 +---- + +NOTE:: The Exporter needs the `commons-codec` library for SSL/BasicAuth, but does not bring it. +Therefore the example reuses it from the Solr web app. +Of course, you can use a different source. + +== Exporter Configuration +The configuration for the `solr-exporter` defines the data to get from Solr. +This includes the metrics, but can also include queries to the PingRequestHandler, the Collections API, and a query to any query request handler. + +A default example configuration is in `contrib/prometheus-exporter/conf/solr-exporter-config.xml`. +Below is a slightly shortened version of it: + +[source,xml] +---- +<config> + + <rules> + + <ping> + <lst name="request"> + <lst name="query"> + <str name="path">/admin/ping</str> + </lst> + <arr name="jsonQueries"> + <str> + . as $object | $object | + (if $object.status == "OK" then 1.0 else 0.0 end) as $value | + { + name : "solr_ping", + type : "GAUGE", - help : "See following URL: https://lucene.apache.org/solr/guide/ping.html", ++ help : "See following URL: https://solr.apache.org/guide/ping.html", + label_names : [], + label_values : [], + value : $value + } + </str> + </arr> + </lst> + </ping> + + <metrics> + <lst name="request"> + <lst name="query"> + <str name="path">/admin/metrics</str> + <lst name="params"> + <str name="group">all</str> + <str name="type">all</str> + <str name="prefix"></str> + <str name="property"></str> + </lst> + </lst> + <arr name="jsonQueries"> + <!-- + jetty metrics + --> + <str> + .metrics["solr.jetty"] | to_entries | .[] | select(.key | startswith("org.eclipse.jetty.server.handler.DefaultHandler")) | select(.key | endswith("xx-responses")) as $object | + $object.key | split(".") | last | split("-") | first as $status | + $object.value.count as $value | + { + name : "solr_metrics_jetty_response_total", + type : "COUNTER", - help : "See following URL: https://lucene.apache.org/solr/guide/metrics-reporting.html", ++ help : "See following URL: https://solr.apache.org/guide/metrics-reporting.html", + label_names : ["status"], + label_values : [$status], + value : $value + } + </str> +... + </arr> + </lst> + </metrics> + + <collections> + <lst name="request"> + <lst name="query"> + <str name="path">/admin/collections</str> + <lst name="params"> + <str name="action">CLUSTERSTATUS</str> + </lst> + </lst> + <arr name="jsonQueries"> + <str> + .cluster.live_nodes | length as $value| + { + name : "solr_collections_live_nodes", + type : "GAUGE", - help : "See following URL: https://lucene.apache.org/solr/guide/collections-api.html#clusterstatus", ++ help : "See following URL: https://solr.apache.org/guide/collections-api.html#clusterstatus", + label_names : [], + label_values : [], + value : $value + } + </str> +... + </arr> + </lst> + </collections> + + <search> + <lst name="request"> + <lst name="query"> + <str name="collection">collection1</str> + <str name="path">/select</str> + <lst name="params"> + <str name="q">*:*</str> + <str name="start">0</str> + <str name="rows">0</str> + <str name="json.facet"> + { + category: { + type: terms, + field: cat + } + } + </str> + </lst> + </lst> + <arr name="jsonQueries"> + <str> + .facets.category.buckets[] as $object | + $object.val as $term | + $object.count as $value | + { + name : "solr_facets_category", + type : "GAUGE", + help : "Category facets", + label_names : ["term"], + label_values : [$term], + value : $value + } + </str> + </arr> + </lst> + </search> + + </rules> + +</config> +---- + +=== Configuration Tags and Elements +The `solr-exporter` works by making a request to Solr according to the definitions in the configuration file, scraping the response, and converting it to a JSON structure Prometheus can understand. +The configuration file defines the elements to request, how to scrape them, and where to place the extracted data in the JSON template. + +The `solr-exporter` configuration file always starts and closes with two simple elements: + +[source,xml] +---- +<config> + <rules> + + </rules> +</config> +---- + +Between these elements, the data the `solr-exporter` should request is defined. +There are several possible types of requests to make: + +[horizontal] +`<ping>`:: Scrape the response to a xref:ping.adoc[] request. +`<metrics>`:: Scrape the response to a xref:metrics-reporting.adoc#metrics-api[Metrics API] request. +`<collections>`:: Scrape the response to a xref:configuration-guide:collections-api.adoc[] request. +`<search>`:: Scrape the response to a xref:query-guide:query-syntax-and-parsers.adoc[query] request. + +Within each of these types, we need to define the query and how to work with the response. +To do this, we define two additional elements: + +`<query>`:: +Defines the query parameter(s) used for the request. +This section uses several additional properties to define your query: + +`collection`::: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +The collection to issue the query against. +Only used with SolrCloud clusters. + +`core`::: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +The core to issue the query against. +Only used with user-managed clusters or single-node installations. + +`path`::: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +The path to the query endpoint where the request will be sent. +Examples include `admin/metrics` or `/select` or `admin/collections`. + +`params`::: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Additional query parameters. +These will vary depending on the request type and the endpoint. +For example, if using the Metrics endpoint, you can add parameters to limit the query to a certain group and/or prefix. +If you're using the Collections API, the command you want to use would be a parameter. + +`<jsonQueries>`:: +This is an array that defines one or more JSON Queries in jq syntax. +For more details about how to structure these queries, see https://stedolan.github.io/jq/manual/[the jq user manual]. ++ +A jq query has to output JSON in the following format: ++ +[source,json] +---- +{ + "name": "solr_ping", + "type": "GAUGE", - "help": "See following URL: https://lucene.apache.org/solr/guide/ping.html", ++ "help": "See following URL: https://solr.apache.org/guide/ping.html", + "label_names": ["base_url","core"], + "label_values": ["http://localhost:8983/solr","collection1"], + "value": 1.0 +} +---- + +See the section <<Exposition Format>> below for information about what information should go into each property, and an example of how the above example is translated for Prometheus. + +=== Exposition Format + +The `solr-exporter` converts the JSON to the following exposition format: + +[source,plain] +---- +# TYPE <name> <type> +# HELP <name> <help> +<name>{<label_names[0]>=<label_values[0]>,<label_names[1]>=<labelvalues[1]>,...} <value> +---- + +The following parameters should be set: + +`name`:: +The metric name to set. +For more details, see https://prometheus.io/docs/practices/naming/[Prometheus naming best practices]. + +`type`:: +The type of the metric, can be `COUNTER`, `GAUGE`, `SUMMARY`, `HISTOGRAM` or `UNTYPED`. +For more details, see https://prometheus.io/docs/concepts/metric_types/[Prometheus metric types]. + +`help`:: +Help text for the metric. + +`label_names`:: +Label names for the metric. +For more details, see https://prometheus.io/docs/practices/naming/[Prometheus naming best practices]. + +`label_values`:: +Label values for the metric. +For more details, see https://prometheus.io/docs/practices/naming/[Prometheus naming best practices]. + +`value`:: +Value for the metric. +Value must be set to Double type. + +For example, `solr-exporter` converts the JSON in the previous section to the following: + +[source,plain] +---- +# TYPE solr_ping gauge - # HELP solr_ping See following URL: https://lucene.apache.org/solr/guide/ping.html ++# HELP solr_ping See following URL: https://solr.apache.org/guide/ping.html +solr_ping{base_url="http://localhost:8983/solr",core="collection1"} 1.0 +---- + +== Prometheus Configuration + +Prometheus is a separate server that you need to download and deploy. +More information can be found at the Prometheus https://prometheus.io/docs/prometheus/latest/getting_started/[Getting Started] page. + +In order for Prometheus to know about the `solr-exporter`, the listen address must be added to the Prometheus server's `prometheus.yml` configuration file, as in this example: + +[source,plain] +---- +scrape_configs: + - job_name: 'solr' + static_configs: + - targets: ['localhost:9854'] +---- + +If you already have a section for `scrape_configs`, you can add the `job_name` and other values in the same section. + +When you apply the settings to Prometheus, it will start to pull Solr's metrics from `solr-exporter`. + +You can test that the Prometheus server, `solr-exporter`, and Solr are working together by browsing to http://localhost:9090 and +doing a query for `solr_ping` metric in the Prometheus GUI: + +.Prometheus Solr Ping expression +image::monitoring-with-prometheus-and-grafana/prometheus-solr-ping.png[image,width=800] + +== Sample Grafana Dashboard + +To use Grafana for visualization, it must be downloaded and deployed separately. +More information can be found on the Grafana https://grafana.com/docs/grafana/latest/[Documentation] site. +Grafana consumes data from many sources, including the Prometheus server that you previously set up. + +A Grafana sample dashboard is provided in the following JSON file: `contrib/prometheus-exporter/conf/grafana-solr-dashboard.json`. +You can place this with your other Grafana dashboard configurations and modify it as necessary depending on any customization you've done for the `solr-exporter` configuration. + +TIP: You can directly import the Solr dashboard https://grafana.com/grafana/dashboards/12456[via grafana.com] by using the Import function with the dashboard id `12456`. + +This screenshot shows what it might look like: + +.Grafana Dashboard +image::monitoring-with-prometheus-and-grafana/grafana-solr-dashboard.png[image,width=800] diff --cc solr/solr-ref-guide/modules/indexing-guide/pages/partial-document-updates.adoc index 3aee891,0000000..eb69e3c mode 100644,000000..100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/partial-document-updates.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/partial-document-updates.adoc @@@ -1,544 -1,0 +1,545 @@@ += Partial Document Updates +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Once you have indexed the content you need in your Solr index, you will want to start thinking about your strategy for dealing with changes to those documents. +Solr supports three approaches to updating documents that have only partially changed. + +The first is _<<Atomic Updates,atomic updates>>_. +This approach allows changing only one or more fields of a document without having to reindex the entire document. + +The second approach is known as _<<In-Place Updates,in-place updates>>_. +This approach is similar to atomic updates (is a subset of atomic updates in some sense), but can be used only for updating single valued non-indexed and non-stored docValue-based numeric fields. + +The third approach is known as _<<Optimistic Concurrency,optimistic concurrency>>_ or _optimistic locking_. +It is a feature of many NoSQL databases, and allows conditional updating a document based on its version. +This approach includes semantics and rules for how to deal with version matches or mis-matches. + +Atomic Updates (and in-place updates) and Optimistic Concurrency may be used as independent strategies for managing changes to documents, or they may be combined: you can use optimistic concurrency to conditionally apply an atomic update. + +== Atomic Updates + +Solr supports several modifiers that atomically update values of a document. +This allows updating only specific fields, which can help speed indexing processes in an environment where speed of index additions is critical to the application. + +To use atomic updates, add a modifier to the field that needs to be updated. - The content can be updated, added to, or incrementally increased if the field has a numeric type. ++The content can be updated, added to, or, if the field has a numeric type, incrementally increased or decreased. + +`set`:: +Set or replace the field value(s) with the specified value(s), or remove the values if 'null' or empty list is specified as the new value. ++ +May be specified as a single value, or as a list for multiValued fields. + +`add`:: +Adds the specified values to a multiValued field. +May be specified as a single value, or as a list. + +`add-distinct`:: +Adds the specified values to a multiValued field, only if not already present. +May be specified as a single value, or as a list. + +`remove`:: +Removes (all occurrences of) the specified values from a multiValued field. +May be specified as a single value, or as a list. + +`removeregex`:: +Removes all occurrences of the specified regex from a multiValued field. +May be specified as a single value, or as a list. + +`inc`:: - Increments a numeric value by a specific amount. - Must be specified as a single numeric value. ++Increments or decrements a numeric field's value by a specific amount, specified as a single integer or float. ++Postive amounts increment the field's value, and negative decrement. + +=== Field Storage + +The core functionality of atomically updating a document requires that all fields in your schema must be configured as stored (`stored="true"`) or docValues (`docValues="true"`) except for fields which are `<copyField/>` destinations, which must be configured as `stored="false"`. +Atomic updates are applied to the document represented by the existing stored field values. +All data in copyField destinations fields must originate from ONLY copyField sources. + +If `<copyField/>` destinations are configured as stored, then Solr will attempt to index both the current value of the field as well as an additional copy from any source fields. +If such fields contain some information that comes from the indexing program and some information that comes from copyField, then the information which originally came from the indexing program will be lost when an atomic update is made. + +There are other kinds of derived fields that must also be set so they aren't stored. +Some spatial field types, such as BBoxField and LatLonSpatialFieldType, use derived fields. +CurrencyFieldType also uses derived fields. +These types create additional fields which are normally specified by a dynamic field definition. +That dynamic field definition must be not stored, or indexing will fail. + +=== Example Updating Part of a Document + +If the following document exists in our collection: + +[source,json] +---- +{"id":"mydoc", + "price":10, + "popularity":42, + "categories":["kids"], + "sub_categories":["under_5","under_10"], + "promo_ids":["a123x"], + "tags":["free_to_try","buy_now","clearance","on_sale"] +} +---- + +And we apply the following update command: + +[source,json] +---- +{"id":"mydoc", + "price":{"set":99}, - "popularity":{"inc":20}, ++ "popularity":{"inc":-7}, + "categories":{"add":["toys","games"]}, + "sub_categories":{"add-distinct":"under_10"}, + "promo_ids":{"remove":"a123x"}, + "tags":{"remove":["free_to_try","on_sale"]} +} +---- + +The resulting document in our collection will be: + +[source,json] +---- +{"id":"mydoc", + "price":99, - "popularity":62, ++ "popularity":35, + "categories":["kids","toys","games"], + "sub_categories":["under_5","under_10"], + "tags":["buy_now","clearance"] +} +---- + +=== Updating Child Documents + +Solr supports modifying, adding and removing child documents as part of atomic updates. +Syntactically, updates changing the children of a document are very similar to regular atomic updates of simple fields, as demonstrated by the examples below. + +Schema and configuration requirements for updating child documents use the same +<<Field Storage>> requirements for atomic updates mentioned above. + +Under the hood, Solr conceptually behaves similarly for nested documents as for non-nested documents, it's just that it applies to entire trees (from the root) of nested documents instead of stand-alone documents. +You can expect more overhead because of this. +In-place updates avoid that. + +[IMPORTANT] +==== +.Routing Updates using child document Ids in SolrCloud + +When SolrCloud receives document updates, the +xref:deployment-guide:solrcloud-shards-indexing.adoc#document-routing[document routing] rules for the collection is used to determine which shard should process the update based on the `id` of the document. + +When sending an update that specifies the `id` of a _child document_ this will not work by default: the correct shard to send the document to is based on the `id` of the "Root" document for the block the child document is in, *not* the `id` of the child document being updated. + +Solr offers two solutions to address this: + +* Clients may specify a xref:deployment-guide:solrcloud-shards-indexing.adoc#document-routing[`\_route_` parameter], with the `id` of the Root document as the parameter value, on each update to tell Solr which shard should process the update. +* Clients can use the (default) `compositeId` router's "prefix routing" feature when indexing all documents to ensure that all child/descendent documents in a Block use the same `id` prefix as the Root level document. +This will cause Solr's default routing logic to automatically send child document updates to the correct shard. + +Furthermore, you _should_ (sometimes _must_) specify the Root document's ID in the `\_root_` +field of this partial update. +This is how Solr understands that you are updating a child +document, and not a Root document. +Without it, Solr only guesses that the `\_route_` parameter is +equivalent, but it may be absent or not equivalent (e.g., when using the `implicit` router). + +All of the examples below use `id` prefixes, so no `\_route_` parameter will be necessary for these examples. +==== + +For the upcoming examples, we'll assume an index containing the same documents covered in xref:indexing-nested-documents.adoc#example-indexing-syntax[Indexing Nested Documents]: + +include::indexing-nested-documents.adoc[tag=sample-indexing-deeply-nested-documents] + +==== Modifying Child Document Fields + +All of the <<atomic-updates,Atomic Update operations>> mentioned above are supported for "real" fields of Child Documents: + +[source,bash] +---- +curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[ +{ + "id": "P11!S31", + "_root_": "P11!prod", + "price_i": { "inc": 73 }, + "color_s": { "set": "GREY" } +} ]' +---- + +==== Replacing All Child Documents + +As with normal (multiValued) fields, the `set` keyword can be used to replace all child documents in a pseudo-field: + +[source,bash] +---- +curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[ +{ + "id": "P22!S22", + "_root_": "P22!prod", + "manuals": { "set": [ { "id": "P22!D77", + "name_s": "Why Red Pens Are the Best", + "content_t": "... correcting papers ...", + }, + { "id": "P22!D88", + "name_s": "How to get Red ink stains out of fabric", + "content_t": "... vinegar ...", + } ] } + +} ]' +---- + +==== Adding a Child Document + +As with normal (multiValued) fields, the `add` keyword can be used to add additional child documents to a pseudo-field: + +[source,bash] +---- +curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[ +{ + "id": "P11!S21", + "_root_": "P11!prod", + "manuals": { "add": { "id": "P11!D99", + "name_s": "Why Red Staplers Are the Best", + "content_t": "Once upon a time, Mike Judge ...", + } } +} ]' +---- + ++Note that this is add-or-replace (by ID). Meaning, if it happens that doc `P11!S21` already has a child doc with the ID `P11!D99` (the one we are adding), then it will be replaced. + +==== Removing a Child Document + +As with normal (multiValued) fields, the `remove` keyword can be used to remove a child document (by `id`) from it's pseudo-field: + +[source,bash] +---- +curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[ +{ + "id": "P11!S21", + "_root_": "P11!prod", + "manuals": { "remove": { "id": "P11!D41" } } +} ]' +---- + + +== In-Place Updates + +In-place updates are very similar to atomic updates; in some sense, this is a subset of atomic updates. +In regular atomic updates, the entire document is reindexed internally during the application of the update. +However, in this approach, only the fields to be updated are affected and the rest of the documents are not reindexed internally. +Hence, the efficiency of updating in-place is unaffected by the size of the documents that are updated (i.e., number of fields, size of fields, etc.). +Apart from these internal differences in efficiency, there is no functional difference between atomic updates and in-place updates. + +An atomic update operation is performed using this In-Place approach only when the fields to be updated meet these three conditions: + +* are non-indexed (`indexed="false"`), non-stored (`stored="false"`), single valued (`multiValued="false"`) numeric docValues (`docValues="true"`) fields; +* the `\_version_` field is also a non-indexed, non-stored single valued docValues field; and, +* copy targets of updated fields, if any, are also non-indexed, non-stored single valued numeric docValues fields. + +To use in-place updates, add a modifier to the field that needs to be updated. - The content can be updated or incrementally increased. ++The content can be updated or incremented/decremented. + +`set`:: +Set or replace the field value(s) with the specified value(s). +May be specified as a single value. + +`inc`:: - Increments a numeric value by a specific amount. - Must be specified as a single numeric value. ++Increments or decrements a numeric field's value by a specific amount, specified as a single integer or float. ++Postive amounts increment the field's value, and negative decrement. + +[TIP] +==== +.Preventing Atomic Updates That Can't be Done In-Place + +Since it can be tricky to ensure that all of the necessary conditions are satisfied to ensure that an update can be done In-Place, Solr supports a request parameter option named `update.partial.requireInPlace`. +When set to `true`, an atomic update that can not be done In-Place will fail. +Users can specify this option when they would prefer that an update request "fail fast" if it can't be done In-Place. +==== + +=== In-Place Update Example + +If the price and popularity fields are defined in the schema as: + +`<field name="price" type="float" indexed="false" stored="false" docValues="true"/>` + +`<field name="popularity" type="float" indexed="false" stored="false" docValues="true"/>` + +If the following document exists in our collection: + +[source,json] +---- +{ + "id":"mydoc", + "price":10, + "popularity":42, + "categories":["kids"], + "promo_ids":["a123x"], + "tags":["free_to_try","buy_now","clearance","on_sale"] +} +---- + +And we apply the following update command: + +[source,json] +---- +{ + "id":"mydoc", + "price":{"set":99}, + "popularity":{"inc":20} +} +---- + +The resulting document in our collection will be: + +[source,json] +---- +{ + "id":"mydoc", + "price":99, + "popularity":62, + "categories":["kids"], + "promo_ids":["a123x"], + "tags":["free_to_try","buy_now","clearance","on_sale"] +} +---- + +== Optimistic Concurrency + +Optimistic Concurrency is a feature of Solr that can be used by client applications which update/replace documents to ensure that the document they are replacing/updating has not been concurrently modified by another client application. +This feature works by requiring a `\_version_` field on all documents in the index, and comparing that to a `\_version_` specified as part of the update command. +By default, Solr's Schema includes a `\_version_` field, and this field is automatically added to each new document. + +In general, using optimistic concurrency involves the following work flow: + +. A client reads a document. +In Solr, one might retrieve the document with the `/get` handler to be sure to have the latest version. +. A client changes the document locally. +. The client resubmits the changed document to Solr, for example, perhaps with the `/update` handler. +. If there is a version conflict (HTTP error code 409), the client starts the process over. + +When the client resubmits a changed document to Solr, the `\_version_` can be included with the update to invoke optimistic concurrency control. +Specific semantics are used to define when the document should be updated or when to report a conflict. + +* If the content in the `\_version_` field is greater than '1' (i.e., '12345'), then the `\_version_` in the document must match the `\_version_` in the index. +* If the content in the `\_version_` field is equal to '1', then the document must simply exist. +In this case, no version matching occurs, but if the document does not exist, the updates will be rejected. +* If the content in the `\_version_` field is less than '0' (i.e., '-1'), then the document must *not* exist. +In this case, no version matching occurs, but if the document exists, the updates will be rejected. +* If the content in the `\_version_` field is equal to '0', then it doesn't matter if the versions match or if the document exists or not. +If it exists, it will be overwritten; if it does not exist, it will be added. + +When documents are added/updated in batches even a single version conflict may lead to rejecting the entire batch. +Use the parameter `failOnVersionConflicts=false` to avoid failure of the entire batch when version constraints fail for one or more documents in a batch. + +If the document being updated does not include the `\_version_` field, and atomic updates are not being used, the document will be treated by normal Solr rules, which is usually to discard the previous version. + +When using Optimistic Concurrency, clients can include an optional `versions=true` request parameter to indicate that the _new_ versions of the documents being added should be included in the response. +This allows clients to immediately know what the `\_version_` is of every document added without needing to make a redundant xref:configuration-guide:realtime-get.adoc[`/get` request]. + +Following are some examples using `versions=true` in queries: + +[source,bash] +---- +$ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?versions=true&omitHeader=true' --data-binary ' +[ { "id" : "aaa" }, + { "id" : "bbb" } ]' +---- +[source,json] +---- +{ + "adds":[ + "aaa",1632740120218042368, + "bbb",1632740120250548224]} +---- + +In this example, we have added 2 documents "aaa" and "bbb". +Because we added `versions=true` to the request, the response shows the document version for each document. + +[source,bash] +---- +$ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?_version_=999999&versions=true&omitHeader=true' --data-binary ' + [{ "id" : "aaa", + "foo_s" : "update attempt with wrong existing version" }]' +---- +[source,json] +---- +{ + "error":{ + "metadata":[ + "error-class","org.apache.solr.common.SolrException", + "root-error-class","org.apache.solr.common.SolrException"], + "msg":"version conflict for aaa expected=999999 actual=1632740120218042368", + "code":409}} +---- + + +In this example, we've attempted to update document "aaa" but specified the wrong version in the request: `_version_=999999` doesn't match the document version we just got when we added the document. +We get an error in response. + +[source,bash] +---- +$ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?_version_=1632740120218042368&versions=true&commit=true&omitHeader=true' --data-binary ' +[{ "id" : "aaa", + "foo_s" : "update attempt with correct existing version" }]' +---- +[source,json] +---- +{ + "adds":[ + "aaa",1632740462042284032]} +---- + +Now we've sent an update with a value for `\_version_` that matches the value in the index, and it succeeds. +Because we included `versions=true` to the update request, the response includes a different value for the `\_version_` field. +[source,bash] +---- +$ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?&versions=true&commit=true&omitHeader=true' --data-binary ' +[{ "id" : "aaa", _version_ : 100, + "foo_s" : "update attempt with wrong existing version embedded in document" }]' +---- +[source,json] +---- +{ + "error":{ + "metadata":[ + "error-class","org.apache.solr.common.SolrException", + "root-error-class","org.apache.solr.common.SolrException"], + "msg":"version conflict for aaa expected=100 actual=1632740462042284032", + "code":409}} +---- + +Now we've sent an update with a value for `\_version_` embedded in the document itself. +This request fails because we have specified the wrong version. +This is useful when documents are sent in a batch and different `\_version_` values need to be specified for each doc. + +[source,bash] +---- +$ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?&versions=true&commit=true&omitHeader=true' --data-binary ' +[{ "id" : "aaa", _version_ : 1632740462042284032, + "foo_s" : "update attempt with correct version embedded in document" }]' +---- +[source,json] +---- +{ + "adds":[ + "aaa",1632741942747987968]} +---- + +Now we've sent an update with a value for `\_version_` embedded in the document itself. +This request fails because we have specified the wrong version. +This is useful when documents are sent in a batch and different `\_version_` values need to be specified for each doc. + + +[source,bash] +---- +$ curl 'http://localhost:8983/solr/techproducts/query?q=*:*&fl=id,_version_&omitHeader=true' +---- + +[source,json] +---- +{ + "response":{"numFound":3,"start":0,"docs":[ + { "_version_":1632740120250548224, + "id":"bbb"}, + { "_version_":1632741942747987968, + "id":"aaa"}] + }} + +---- + +Finally, we can issue a query that requests the `\_version_` field be included in the response, and we can see that for the two documents in our example index. + +[source,bash] +---- +$ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?versions=true&_version_=-1&failOnVersionConflicts=false&omitHeader=true' --data-binary ' +[ { "id" : "aaa" }, + { "id" : "ccc" } ]' +---- +[source,json] +---- +{ + "adds":[ + "ccc",1632740949182382080]} +---- + +In this example, we have added 2 documents "aaa" and "ccc". +As we have specified the parameter `\_version_=-1`, this request should not add the document with the id `aaa` because it already exists. +The request succeeds & does not throw any error because the `failOnVersionConflicts=false` parameter is specified. +The response shows that only document `ccc` is added and `aaa` is silently ignored. + + +For more information, please also see Yonik Seeley's presentation on https://www.youtube.com/watch?v=WYVM6Wz-XTw[NoSQL features in Solr 4] from Apache Lucene EuroCon 2012. + +== Document Centric Versioning Constraints + +Optimistic Concurrency is extremely powerful, and works very efficiently because it uses an internally assigned, globally unique values for the `\_version_` field. +However, in some situations users may want to configure their own document specific version field, where the version values are assigned on a per-document basis by an external system, and have Solr reject updates that attempt to replace a document with an "older" version. +In situations like this the {solr-javadocs}/core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[`DocBasedVersionConstraintsProcessorFactory`] can be useful. + +The basic usage of `DocBasedVersionConstraintsProcessorFactory` is to configure it in `solrconfig.xml` as part of the xref:configuration-guide:update-request-processors.adoc#update-request-processor-configuration[UpdateRequestProcessorChain] and specify the name of your custom `versionField` in your schema that should be checked when validating updates: + +[source,xml] +---- +<processor class="solr.DocBasedVersionConstraintsProcessorFactory"> + <str name="versionField">my_version_l</str> +</processor> +---- + +Note that `versionField` is a comma-delimited list of fields to check for version numbers. +Once configured, this update processor will reject (HTTP error code 409) any attempt to update an existing document where the value of the `my_version_l` field in the "new" document is not greater then the value of that field in the existing document. + +.versionField vs `\_version_` +[IMPORTANT] +==== +The `\_version_` field used by Solr for its normal optimistic concurrency also has important semantics in how updates are distributed to replicas in SolrCloud, and *MUST* be assigned internally by Solr. +Users can not re-purpose that field and specify it as the `versionField` for use in the `DocBasedVersionConstraintsProcessorFactory` configuration. +==== + +`DocBasedVersionConstraintsProcessorFactory` supports the following additional configuration parameters, which are all optional: + +`ignoreOldUpdates`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +If set to `true`, the update will be silently ignored (and return a status 200 to the client) instead of rejecting updates where the `versionField` is too low. + +`deleteVersionParam`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +A String parameter that can be specified to indicate that this processor should also inspect Delete By Id commands. ++ +The value of this option should be the name of a request parameter that the processor will consider mandatory for all attempts to Delete By Id, and must be be used by clients to specify a value for the `versionField` which is greater then the existing value of the document to be deleted. ++ +When using this request parameter, any Delete By Id command with a high enough document version number to succeed will be internally converted into an Add Document command that replaces the existing document with a new one which is empty except for the Unique Key and `versionField` to keeping a record of the deleted version so future Add Document commands will fail if their "new" version is not high enough. ++ +If `versionField` is specified as a list, then this parameter too must be specified as a comma-delimited list of the same size so that the parameters correspond with the fields. + +`supportMissingVersionOnOldDocs`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +If set to `true`, allows any document written *before* this feature is enabled and are missing the `versionField`, to be overwritten. + +Please consult the {solr-javadocs}/core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[DocBasedVersionConstraintsProcessorFactory javadocs] and https://github.com/apache/lucene-solr/blob/master/solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml[test solrconfig.xml file] for additional information and example usages. diff --cc solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc index bbdd5c5,0000000..ddf10ab mode 100644,000000..100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc @@@ -1,476 -1,0 +1,480 @@@ += Common Query Parameters +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Several query parsers share supported query parameters. + +The following sections describe Solr's common query parameters, which are supported by the xref:configuration-guide:requesthandlers-searchcomponents#search-handlers[search request handlers]. + +== defType Parameter + +The defType parameter selects the query parser that Solr should use to process the main query parameter (`q`) in the request. +For example: + +`defType=dismax` + +If no `defType` parameter is specified, then by default, the xref:standard-query-parser.adoc[] is used. +(e.g., `defType=lucene`) + +== sort Parameter + +The `sort` parameter arranges search results in either ascending (`asc`) or descending (`desc`) order. +The parameter can be used with either numerical or alphabetical content. +The directions can be entered in either all lowercase or all uppercase letters (i.e., both `asc` and `ASC` are accepted). + +Solr can sort query responses according to: + +* Document scores +* xref:function-queries.adoc#sort-by-function[Function results] +* The value of any primitive field (numerics, string, boolean, dates, etc.) which has `docValues="true"` (or `multiValued="false"` and `indexed="true"`, in which case the indexed terms will used to build DocValue like structures on the fly at runtime) +* A SortableTextField which implicitly uses `docValues="true"` by default to allow sorting on the original input string regardless of the analyzers used for Searching. +* A single-valued TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term per document. +TextField does not support `docValues="true"`, but a DocValue-like structure will be built on the fly at runtime. +** *NOTE:* If you want to be able to sort on a field whose contents you want to tokenize to facilitate searching, xref:indexing-guide:copy-fields.adoc[use a `copyField` directive] in the Schema to clone the field. +Then search on the field and sort on its clone. + +In the case of primitive fields, or SortableTextFields, that are `multiValued="true"` the representative value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. +This default behavior is equivalent to explicitly sorting using the 2 argument xref:function-queries.adoc#field-function[`field()`] function: `sort=field(name,min) asc` and `sort=field(name,max) desc` + +The table below explains how Solr responds to various settings of the `sort` parameter. + +[%autowidth.stretch,options="header"] +|=== +|Example |Result +| |If the sort parameter is omitted, sorting is performed as though the parameter were set to `score desc`. +|score desc |Sorts in descending order from the highest score to the lowest score. +|price asc |Sorts in ascending order of the price field +|div(popularity,price) desc |Sorts in descending order of the result of the function `popularity / price` +|inStock desc, price asc |Sorts by the contents of the `inStock` field in descending order, then when multiple documents have the same value for the `inStock` field, those results are sorted in ascending order by the contents of the price field. +|categories asc, price asc |Sorts by the lowest value of the (multivalued) `categories` field in ascending order, then when multiple documents have the same lowest `categories` value, those results are sorted in ascending order by the contents of the price field. +|=== + +Regarding the sort parameter's arguments: + +* A sort ordering must include a field name (or `score` as a pseudo field), followed by whitespace (escaped as + or `%20` in URL strings), followed by a sort direction (`asc` or `desc`). + +* Multiple sort orderings can be separated by a comma, using this syntax: `sort=<field name>+<direction>,<field name>+<direction>],...` +** When more than one sort criteria is provided, the second entry will only be used if the first entry results in a tie. +If there is a third entry, it will only be used if the first AND second entries are tied. +And so on. +** If documents tie in all of the explicit sort criteria, Solr uses each document's Lucene document ID as the final tie-breaker. +This internal property is subject to change during segment merges and document updates, which can lead to unexpected result ordering changes. +Users looking to avoid this behavior can add an additional sort criteria on a unique or rarely-shared field such as `id` to prevent ties from occurring (e.g., `price desc,id asc`). + +== start Parameter + +When specified, the `start` parameter specifies an offset into a query's result set and instructs Solr to begin displaying results from this offset. + +The default value is `0`. +In other words, by default, Solr returns results without an offset, beginning where the results themselves begin. + +Setting the `start` parameter to some other number, such as `3`, causes Solr to skip over the preceding records and start at the document identified by the offset. + +You can use the `start` parameter this way for paging. +For example, if the `rows` parameter is set to 10, you could display three successive pages of results by setting start to 0, then re-issuing the same query and setting start to 10, then issuing the query again and setting start to 20. + +== rows Parameter + +You can use the `rows` parameter to paginate results from a query. +The parameter specifies the maximum number of documents from the complete result set that Solr should return to the client at one time. + +The default value is `10`. +That is, by default, Solr returns 10 documents at a time in response to a query. + +== canCancel Parameter + +This parameter defines if this query is cancellable during execution using the +xref:deployment-guide:task-management.adoc[task management] interface. + +== queryUUID Parameter + +For cancellable queries, this allows specifying a custom UUID to identify the query with. +If `canCancel` is specified and `queryUUID` is not set, an auto generated UUID will be assigned to the query. + +If `queryUUID` is specified, this UUID will be used for identifying the query. +Note that if using `queryUUID`, the responsibility of ensuring uniqueness of the UUID lies with the caller. +If a query UUID is reused while the original query UUID is still active, it will cause an exception to be throws for the second query. + +It is recommended that the user either uses all custom UUIDs or depends completely on the system to generate UUID. +Mixing the two can lead to conflict of UUIDs. + +== fq (Filter Query) Parameter + +The `fq` parameter defines a query that can be used to restrict the superset of documents that can be returned, without influencing score. +It can be very useful for speeding up complex queries, since the queries specified with `fq` are cached independently of the main query. +When a later query uses the same filter, there's a cache hit, and filter results are returned quickly from the cache. + +When using the `fq` parameter, keep in mind the following: + +* The `fq` parameter can be specified multiple times in a query. +Documents will only be included in the result if they are in the intersection of the document sets resulting from each instance of the parameter. +In the example below, only documents which have a popularity greater then 10 and have a section of 0 will match. ++ +[source,text] +---- +fq=popularity:[10 TO *]&fq=section:0 +---- + +* Filter queries can involve complicated Boolean queries. +The above example could also be written as a single `fq` with two mandatory clauses like so: ++ +[source,text] +---- +fq=+popularity:[10 TO *] +section:0 +---- + +* The document sets from each filter query are cached independently. +Thus, concerning the previous examples: use a single `fq` containing two mandatory clauses if those clauses appear together often, and use two separate `fq` parameters if they are relatively independent. +(To learn about tuning cache sizes and making sure a filter cache actually exists, see xref:configuration-guide:caches-warming.adoc#caches[Caches].) +* It is also possible to use xref:standard-query-parser.adoc#differences-between-lucenes-classic-query-parser-and-solrs-standard-query-parser[filter(condition) syntax] inside the `fq` to cache clauses individually and - among other things - to achieve union of cached filter queries. + +* As with all parameters: special characters in an URL need to be properly escaped and encoded as hex values. +Online tools are available to help you with URL-encoding. +For example: http://meyerweb.com/eric/tools/dencoder/. + +=== cache Local Parameter + +Solr caches the results of filter queries by default in the xref:configuration-guide:caches-warming.adoc#filter-cache[filter cache]. +To disable it, use the boolean `cache` xref:local-params.adoc[local param], such as `fq={!geofilt cache=false}...`. +Do this when you think a query is unlikely to be repeated. + +Non-cached filter queries also support the `cost` local parameter to provide a _hint_ as to the order in which they are evaluated. +This allows you to order less expensive non-cached filters before expensive non-cached filters. +At the Lucene layer, this maps to `TwoPhaseIterator.matchCost` if the query has a TPI. + +*Post Filters*: For very high cost filters, if `cache=false` _and_ `cost>=100`, _and_ the query implements the `PostFilter` interface, a Collector will be requested from that query and used to filter documents after they have matched the main query and all other filter queries. +There can be multiple post filters; they are also ordered by cost. + +For most queries the default behavior is `cost=0`, but some types of queries (such as `{!frange}`) default to `cost=100`, because they are most efficient when used as a `PostFilter`. + +This is an example of 3 regular filters, where all matching documents generated by each are computed up front and cached independently: + +[source,text] +q=some keywords +fq=quantity_in_stock:[5 TO *] +fq={!frange l=10 u=100}mul(popularity,price) +fq={!frange cost=200 l=0}pow(mul(sum(1, query('tag:smartphone')), div(1,avg_rating)), 2.3) + +These are the same filters run without caching. +The simple range query on the `quantity_in_stock` field will be run in parallel with the main query like a traditional Lucene filter, while the 2 `frange` filters will only be checked against each document has already matched the main query and the `quantity_in_stock` range query -- first the simpler `mul(popularity,price)` will be checked (because of its implicit `cost=100`) and only if it matches will the final very complex filter (with its higher `cost=200`) be checked. + +[source,text] +q=some keywords +fq={!cache=false}quantity_in_stock:[5 TO *] +fq={!frange cache=false l=10 u=100}mul(popularity,price) +fq={!frange cache=false cost=200 l=0}pow(mul(sum(1, query('tag:smartphone')), div(1,avg_rating)), 2.3) + +== fl (Field List) Parameter + +The `fl` parameter limits the information included in a query response to a specified list of fields. +The fields must be either `stored="true"` or `docValues="true"``.` + +The field list can be specified as a space-separated or comma-separated list of field names. +The string "score" can be used to indicate that the score of each document for the particular query should be returned as a field. +The wildcard character `*` selects all the fields in the document which are either `stored="true"` or `docValues="true"` and `useDocValuesAsStored="true"` (which is the default when docValues are enabled). ++Combine the wildcard character with field name to make a glob pattern for matching multiple field names. ++ +You can also add pseudo-fields, functions and transformers to the field list request. + +This table shows some basic examples of how to use `fl`: + +[%autowidth.stretch,options="header"] +|=== +|Field List |Result +|id name price |Return only the id, name, and price fields. +|id,name,price |Return only the id, name, and price fields. +|id name, price |Return only the id, name, and price fields. ++|id na* price |Return the id, name, name_exact, and price fields. ++|id na*e price |Return the id, name, and price fields. +|id score |Return the id field and the score. +|* |Return all the `stored` fields in each document, as well as any `docValues` fields that have `useDocValuesAsStored="true"`. This is the default value of the fl parameter. +|* score |Return all the fields in each document, along with each field's score. +|*,dv_field_name |Return all the `stored` fields in each document, and any `docValues` fields that have `useDocValuesAsStored="true"` and the docValues from dv_field_name even if it has `useDocValuesAsStored="false"` +|=== + +=== Functions with fl + +xref:function-queries.adoc[] can be computed for each document in the result and returned as a pseudo-field: + +[source,text] +---- +fl=id,title,product(price,popularity) +---- + +=== Document Transformers with fl + +xref:document-transformers.adoc[] can be used to modify the information returned about each documents in the results of a query: + +[source,text] +---- +fl=id,title,[explain] +---- + +=== Field Name Aliases + +You can change the key used to in the response for a field, function, or transformer by prefixing it with a `_displayName_:` value. + +For example, `why_score` is the display name below: + +[source,text] +---- +fl=id,sales_price:price,secret_sauce:prod(price,popularity),why_score:[explain style=nl] +---- + +[source,json] +---- +{ +"response": { + "numFound": 2, + "start": 0, + "docs": [{ + "id": "6H500F0", + "secret_sauce": 2100.0, + "sales_price": 350.0, + "why_score": { + "match": true, + "value": 1.052226, + "description": "weight(features:cache in 2) [DefaultSimilarity], result of:", + "details": [{ + "..." +}]}}]}} +---- + +== debug Parameter + +The `debug` parameter can be specified multiple times and supports the following arguments: + +* `debug=query`: return debug information about the query only. +* `debug=timing`: return debug information about how long the query took to process. +* `debug=results`: return debug information about the score results (also known as "explain"). +** By default, score explanations are returned as large string values, using newlines and tab indenting for structure & readability, but an additional `debug.explain.structured=true` parameter may be specified to return this information as nested data structures native to the response format requested by `wt`. +* `debug=all`: return all available debug information about the request request. +An alternative usage is `debug=true`. + +For backwards compatibility with older versions of Solr, `debugQuery=true` may instead be specified as an alternative way to indicate `debug=all`. + +The default behavior is not to include debugging information. + +== explainOther Parameter + +The `explainOther` parameter specifies a Lucene query in order to identify a set of documents. +If this parameter is included and is set to a non-blank value, the query will return debugging information, along with the "explain info" of each document that matches the Lucene query, relative to the main query (which is specified by the `q` parameter). +For example: + +[source,text] +---- +q=supervillians&debugQuery=on&explainOther=id:juggernaut +---- + +The query above allows you to examine the scoring explain info of the top matching documents, compare it to the explain info for documents matching `id:juggernaut`, and determine why the rankings are not as you expect. + +The default value of this parameter is blank, which causes no extra "explain info" to be returned. + +== timeAllowed Parameter + +This parameter specifies the amount of time, in milliseconds, allowed for a search to complete. +If this time expires before the search is complete, any partial results will be returned, but values such as `numFound`, xref:faceting.adoc[facet] counts, and result xref:stats-component.adoc[stats] may not be accurate for the entire result set. +In case of expiration, if `omitHeader` isn't set to `true` the response header contains a special flag called `partialResults`. +When using `timeAllowed` in combination with xref:pagination-of-results.adoc#using-cursors[`cursorMark`], and the `partialResults` flag is present, some matching documents may have been skipped in the result set. +Additionally, if the `partialResults` flag is present, `cursorMark` can match `nextCursorMark` even if there may be more results + +[source,json] +---- +{ + "responseHeader": { + "status": 0, + "zkConnected": true, + "partialResults": true, + "QTime": 20, + "params": { + "q": "*:*" + } + }, + "response": { + "numFound": 77, + "start": 0, + "docs": [ "..." ] + } +} +---- + +This value is only checked at the time of: + +. Query Expansion, and +. Document collection +. Doc Values reading + +As this check is periodically performed, the actual time for which a request can be processed before it is aborted would be marginally greater than or equal to the value of `timeAllowed`. +If the request consumes more time in other stages, custom components, etc., this parameter is not expected to abort the request. +Regular search, JSON Facet and the Analytics component abandon requests in accordance with this parameter. + +== segmentTerminateEarly Parameter + +This parameter may be set to either `true` or `false`. + +If set to `true`, and if xref:configuration-guide:index-segments-merging.adoc#mergepolicyfactory[the mergePolicyFactory] for this collection is a {solr-javadocs}/core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] which uses a `sort` option compatible with <<sort Parameter,the sort parameter>> specified for this query, then Solr will be able to skip documents on a per-segment basis that are definitively not candidates for the current page of results. + +If early termination is used, a `segmentTerminatedEarly` header will be included in the `responseHeader`. + +Similar to using <<timeAllowed Parameter,the `timeAllowed` Parameter>>, when early segment termination happens values such as `numFound`, xref:faceting.adoc[Facet] counts, and result xref:stats-component.adoc[Stats] may not be accurate for the entire result set. + +The default value of this parameter is `false`. + +== omitHeader Parameter + +This parameter may be set to either `true` or `false`. + +If set to `true`, this parameter excludes the header from the returned results. +The header contains information about the request, such as the time it took to complete. +The default value for this parameter is `false`. +When using parameters such as <<timeallowed-parameter,`timeAllowed`>>, and xref:deployment-guide:solrcloud-distributed-requests.adoc#shards-tolerant-parameter[`shards.tolerant`], which can lead to partial results, it is advisable to keep the header, so that the `partialResults` flag can be checked, and values such as `numFound`, `nextCursorMark`, xref:faceting.adoc[Facet] counts, and result xref:stats-component.adoc[Stats] can be interpreted in the context of partial results. + +== wt Parameter + +The `wt` parameter selects the Response Writer that Solr should use to format the query's response. +For detailed descriptions of Response Writers, see xref:response-writers.adoc[]. + +If you do not define the `wt` parameter in your queries, JSON will be returned as the format of the response. + +== logParamsList Parameter + +By default, Solr logs all parameters of requests. +Set this parameter to restrict which parameters of a request are logged. +This may help control logging to only those parameters considered important to your organization. + +For example, you could define this like: + +`logParamsList=q,fq` + +And only the 'q' and 'fq' parameters will be logged. + +If no parameters should be logged, you can send `logParamsList` as empty (i.e., `logParamsList=`). + +TIP: This parameter not only applies to query requests, but to any kind of request to Solr. + +== echoParams Parameter + +The `echoParams` parameter controls what information about request parameters is included in the response header. + +The `echoParams` parameter accepts the following values: + +* `explicit`: Only parameters included in the actual request will be added to the `params` section of the response header. +* `all`: Include all request parameters that contributed to the query. +This will include everything defined in the request handler definition found in `solrconfig.xml` as well as parameters included with the request, plus the `_` parameter. +If a parameter is included in the request handler definition AND the request, it will appear multiple times in the response header. +* `none`: Entirely removes the `params` section of the response header. +No information about the request parameters will be available in the response. + +The default value is `none`, though many `solrconfig.xml` handlers set default to be `explicit`. +Here is an example of a JSON response where the echoParams parameter was set in that SearchHandler's default, +so it itself was not echoed, but only three parameters from the request itself - `q`, `wt`, and `indent`: + +[source,json] +---- +{ + "responseHeader": { + "status": 0, + "QTime": 0, + "params": { + "q": "solr", + "indent": "true", + "wt": "json", + "_": "1458227751857" + } + }, + "response": { + "numFound": 0, + "start": 0, + "docs": [] + } +} +---- + +This is what happens if a similar request is sent that adds `echoParams=all` to the three parameters used in the previous example: + +[source,json] +---- +{ + "responseHeader": { + "status": 0, + "QTime": 0, + "params": { + "q": "solr", + "df": "text", + "indent": "true", + "echoParams": "all", + "rows": "10", + "wt": "json", + "_": "1458228887287" + } + }, + "response": { + "numFound": 0, + "start": 0, + "docs": [] + } +} +---- + +== minExactCount Parameter +When this parameter is used, Solr will count the number of hits accurately at least until this value. +After that, Solr can skip over documents that don't have a score high enough to enter in the top N. +This can greatly improve performance of search queries. +On the other hand, when this parameter is used, the `numFound` may not be exact, and may instead be an approximation. +The `numFoundExact` boolean attribute is included in all responses, indicating if the `numFound` value is exact or an approximation. +If it's an approximation, the real number of hits for the query is guaranteed to be greater or equal `numFound`. + +More about approximate document counting and `minExactCount`: + +* The documents returned in the response are guaranteed to be the docs with the top scores. +This parameter will not make Solr skip documents that are to be returned in the response, it will only allow Solr to skip counting docs that, while they match the query, their score is low enough to not be in the top N. +* Providing `minExactCount` doesn't guarantee that Solr will use approximate hit counting (and thus, provide the speedup). +Some types of queries, or other parameters (like if facets are requested) will require accurate counting. +* Approximate counting can only be used when sorting by `score desc` first (which is the default sort in Solr). +Other fields can be used after `score desc`, but if any other type of sorting is used before score, then the approximation won't be applied. +* When doing distributed queries across multiple shards, each shard will accurately count hits until `minExactCount` (which means the query could be hitting `numShards * minExactCount` docs and `numFound` in the response would still be accurate) +For example: + +[source,text] +q=quick brown fox&minExactCount=100&rows=10 + +[source,json] +---- +"response": { + "numFound": 153, + "start": 0, + "numFoundExact": false, + "docs": [{"doc1"}] +} +---- +Since `numFoundExact=false`, we know the number of documents matching the query is greater or equal to 153. +If we specify a higher value for `minExactCount`: + +[source,text] +q=quick brown fox&minExactCount=200&rows=10 + +[source,json] +---- +"response": { + "numFound": 163, + "start": 0, + "numFoundExact": true, + "docs": [{"doc1"}] +} +---- +In this case we know that `163` is the exact number of hits for the query. +Both queries must have returned the same number of documents in the top 10.
