This is an automated email from the ASF dual-hosted git repository. dsmiley pushed a commit to branch branch_9_8 in repository https://gitbox.apache.org/repos/asf/solr.git
commit 6382b70c52aa47a0c4877061b34073b20497e1d1 Author: Thomas Wöckinger <thomaswoeckin...@users.noreply.github.com> AuthorDate: Wed Feb 12 23:09:28 2025 +0100 SOLR-17649: Fix JSON faceting on multiValued number types (#3158) * Add DV type checks to alert to a problem instead of silently returning nothing * Clarify isNumeric --------- Co-authored-by: Thomas Wöckinger <t...@silbergrau.com> Co-authored-by: David Smiley <dsmi...@apache.org> (cherry picked from commit f66aa9ed07fa76b5db45186073a9b2ac20c01fd4) --- solr/CHANGES.txt | 2 + .../org/apache/solr/search/facet/FacetField.java | 22 +++++---- .../search/facet/FacetFieldProcessorByArrayDV.java | 2 +- .../org/apache/solr/search/facet/FieldUtil.java | 52 ++++++++++++++++------ .../solr/collection1/conf/schema_latest.xml | 2 + .../apache/solr/response/TestRawTransformer.java | 3 +- .../apache/solr/search/facet/TestJsonFacets.java | 29 ++++++++++++ 7 files changed, 89 insertions(+), 23 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e0b8509e0d5..8989430fb30 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -16,6 +16,8 @@ Bug Fixes * SOLR-17652: Fix a bug that could cause long leader elections to leave PULL replicas in DOWN state forever. (hossman) +* SOLR-17649: Fix a regression of faceting on multi-valued EnumFieldType, introduced by an earlier 9.x version. (Thomas Wöckinger, David Smiley) + Dependency Upgrades --------------------- (No changes) diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java index 77e2e373b3b..c4b0fdac2a3 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -18,9 +18,9 @@ package org.apache.solr.search.facet; import java.util.HashMap; import java.util.Map; +import org.apache.lucene.index.DocValuesType; import org.apache.solr.common.SolrException; import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.NumberType; import org.apache.solr.schema.SchemaField; public class FacetField extends FacetRequestSorted { @@ -95,9 +95,9 @@ public class FacetField extends FacetRequestSorted { return new FacetFieldProcessorByArrayDV(fcontext, this, sf); } - NumberType ntype = ft.getNumberType(); + final boolean isNumber = ft.getNumberType() != null; // ensure we can support the requested options for numeric faceting: - if (ntype != null) { + if (isNumber) { if (prefix != null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, @@ -139,11 +139,11 @@ public class FacetField extends FacetRequestSorted { // FieldType.getDocValuesType() if (!multiToken) { - if (mincount > 0 && prefix == null && (ntype != null || method == FacetMethod.DVHASH)) { + if (mincount > 0 && prefix == null && (isNumber || method == FacetMethod.DVHASH)) { // TODO can we auto-pick for strings when term cardinality is much greater than DocSet // cardinality? or if we don't know cardinality but DocSet size is very small return new FacetFieldProcessorByHashDV(fcontext, this, sf); - } else if (ntype == null) { + } else if (isNumber == false) { // single valued string... return new FacetFieldProcessorByArrayDV(fcontext, this, sf); } else { @@ -152,12 +152,13 @@ public class FacetField extends FacetRequestSorted { } } - if (sf.hasDocValues() && sf.getType().isPointField()) { + // multi-valued after this point + + if (sf.hasDocValues() && isNumber && dvType(fcontext, field) != DocValuesType.SORTED_SET) { + // doesn't support SORTED_SET DV (it's a TODO) return new FacetFieldProcessorByHashDV(fcontext, this, sf); } - // multi-valued after this point - if (sf.hasDocValues() || method == FacetMethod.DV || !sf.isUninvertible()) { // single and multi-valued string docValues return new FacetFieldProcessorByArrayDV(fcontext, this, sf); @@ -167,6 +168,11 @@ public class FacetField extends FacetRequestSorted { return new FacetFieldProcessorByArrayUIF(fcontext, this, sf); } + private static DocValuesType dvType(FacetContext fcontext, String field) { + var fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(field); + return fieldInfo == null ? null : fieldInfo.getDocValuesType(); + } + @Override public FacetMerger createFacetMerger(Object prototype) { return new FacetFieldMerger(this); diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java index cb7d5492a91..27fcc38646c 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java @@ -38,7 +38,7 @@ import org.apache.solr.search.facet.SweepCountAware.SegCountGlobal; import org.apache.solr.search.facet.SweepCountAware.SegCountPerSeg; import org.apache.solr.uninverting.FieldCacheImpl; -/** Grabs values from {@link DocValues}. */ +/** Grabs values from {@link SortedDocValues} or {@link SortedDocValues}. */ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray { static boolean unwrap_singleValued_multiDv = true; // only set to false for test coverage diff --git a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java index 07abc6c6ab1..6baac331c92 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java @@ -18,8 +18,12 @@ package org.apache.solr.search.facet; import java.io.IOException; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; @@ -44,27 +48,49 @@ public class FieldUtil { public static SortedDocValues getSortedDocValues( QueryContext context, SchemaField field, QParser qparser) throws IOException { - SortedDocValues si = - context.searcher().getSlowAtomicReader().getSortedDocValues(field.getName()); - // if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() - // instanceof TextField)) { - // } - - return si == null ? DocValues.emptySorted() : si; + var reader = context.searcher().getSlowAtomicReader(); + var dv = reader.getSortedDocValues(field.getName()); + checkDvType(dv, field, reader); + return dv == null ? DocValues.emptySorted() : dv; } public static SortedSetDocValues getSortedSetDocValues( QueryContext context, SchemaField field, QParser qparser) throws IOException { - SortedSetDocValues si = - context.searcher().getSlowAtomicReader().getSortedSetDocValues(field.getName()); - return si == null ? DocValues.emptySortedSet() : si; + var reader = context.searcher().getSlowAtomicReader(); + var dv = reader.getSortedSetDocValues(field.getName()); + checkDvType(dv, field, reader); + return dv == null ? DocValues.emptySortedSet() : dv; } public static NumericDocValues getNumericDocValues( QueryContext context, SchemaField field, QParser qparser) throws IOException { - SolrIndexSearcher searcher = context.searcher(); - NumericDocValues si = searcher.getSlowAtomicReader().getNumericDocValues(field.getName()); - return si == null ? DocValues.emptyNumeric() : si; + var reader = context.searcher().getSlowAtomicReader(); + var dv = reader.getNumericDocValues(field.getName()); + checkDvType(dv, field, reader); + return dv == null ? DocValues.emptyNumeric() : dv; + } + + private static void checkDvType(Object dv, SchemaField field, LeafReader reader) { + if (dv == null) { + return; + } + FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field.getName()); + if (fieldInfo == null) { + return; + } + var dvType = fieldInfo.getDocValuesType(); + if (dvType == DocValuesType.NONE) { + return; + } else if (dvType == DocValuesType.SORTED) { + if (dv instanceof SortedDocValues) return; + } else if (dvType == DocValuesType.SORTED_SET) { + if (dv instanceof SortedSetDocValues) return; + } else if (dvType == DocValuesType.NUMERIC) { + if (dv instanceof NumericDocValues) return; + } else if (dvType == DocValuesType.SORTED_NUMERIC) { + if (dv instanceof SortedNumericDocValues) return; + } + throw new IllegalStateException("Unexpected DocValues type " + dvType + " for field " + field); } /** diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml index 7d1e09faf28..87053fbd6ce 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml @@ -729,6 +729,8 @@ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml"/> + <fieldType name="severityType" class="solr.EnumFieldType" enumsConfig="enumsConfig.xml" enumName="severity"/> + <field name="severity_mv" type="severityType" indexed="true" stored="true" multiValued="true" docValues="true" /> <!-- some examples for different languages (generally ordered by ISO code) --> <!-- REMOVED. these reference things not in the test config, like lang/stopwords_en.txt --> diff --git a/solr/core/src/test/org/apache/solr/response/TestRawTransformer.java b/solr/core/src/test/org/apache/solr/response/TestRawTransformer.java index c2208d5204b..0db7abcf39b 100644 --- a/solr/core/src/test/org/apache/solr/response/TestRawTransformer.java +++ b/solr/core/src/test/org/apache/solr/response/TestRawTransformer.java @@ -90,7 +90,8 @@ public class TestRawTransformer extends SolrCloudTestCase { "stopwords.txt", "synonyms.txt", "protwords.txt", - "currency.xml" + "currency.xml", + "enumsConfig.xml" }) { Files.copy(Path.of(src_dir, file), confDir.resolve(file)); } diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 66673b77f57..9ca008dbe5d 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -4908,6 +4908,35 @@ public class TestJsonFacets extends SolrTestCaseHS { + "}"); } + @Test + public void testMultivalueEnumTypes() throws Exception { + final Client client = Client.localClient(); + + final SolrParams p = params("rows", "0"); + + client.deleteByQuery("*:*", null); + + List<SolrInputDocument> docsToAdd = new ArrayList<>(6); + docsToAdd.add(sdoc("id", "1", "severity_mv", "Not Available", "severity_mv", "Low")); + docsToAdd.add(sdoc("id", "2", "severity_mv", "Low", "severity_mv", "Medium")); + docsToAdd.add(sdoc("id", "3", "severity_mv", "Medium", "severity_mv", "High")); + docsToAdd.add(sdoc("id", "4", "severity_mv", "High", "severity_mv", "Not Available")); + docsToAdd.add(sdoc("id", "5", "severity_mv", "Not Available", "severity_mv", "Low")); + docsToAdd.add(sdoc("id", "6", "severity_mv", "Low", "severity_mv", "Medium")); + + Collections.shuffle(docsToAdd, random()); + for (SolrInputDocument doc : docsToAdd) { + client.add(doc, null); + } + client.commit(); + + client.testJQ( + params(p, "q", "*:*", "json.facet", "{f:{type:terms, method:enum, field:severity_mv}}"), + "facets=={ count:6," + + "f:{ buckets:[{val:Low,count:4},{val:'Not Available',count:3},{val:Medium,count:3},{val:High,count:2}] }" + + "}"); + } + @Test public void testFacetValueTypes() throws Exception { doFacetValueTypeValidation(Client.localClient());