Forgot to add the CHANGES entry. Will do that now On Wed, Sep 10, 2014 at 11:36 AM, <[email protected]> wrote:
> Author: tflobbe > Date: Wed Sep 10 18:36:27 2014 > New Revision: 1624091 > > URL: http://svn.apache.org/r1624091 > Log: > SOLR-6452: StatsComponent's stat 'missing' will work on fields with > docValues=true and indexed=false > > Modified: > > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java > > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java > > lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesStats.java > > lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java > > Modified: > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java?rev=1624091&r1=1624090&r2=1624091&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java > (original) > +++ > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java > Wed Sep 10 18:36:27 2014 > @@ -51,7 +51,7 @@ public class FieldFacetStats { > final boolean calcDistinct; > > public final Map<String, StatsValues> facetStatsValues; > - > + private final Map<Integer, Integer> missingStats; > List<HashMap<String, Integer>> facetStatsTerms; > > final AtomicReader topLevelReader; > @@ -73,6 +73,7 @@ public class FieldFacetStats { > > facetStatsValues = new HashMap<>(); > facetStatsTerms = new ArrayList<>(); > + missingStats = new HashMap<>(); > } > > private StatsValues getStatsValues(String key) throws IOException { > @@ -100,8 +101,10 @@ public class FieldFacetStats { > if (topLevelSortedValues == null) { > topLevelSortedValues = DocValues.getSorted(topLevelReader, name); > } > + > > int term = topLevelSortedValues.getOrd(docID); > + > int arrIdx = term; > if (arrIdx >= 0 && arrIdx < topLevelSortedValues.getValueCount()) { > final String key; > @@ -113,6 +116,8 @@ public class FieldFacetStats { > while (facetStatsTerms.size() <= statsTermNum) { > facetStatsTerms.add(new HashMap<String, Integer>()); > } > + > + > final Map<String, Integer> statsTermCounts = > facetStatsTerms.get(statsTermNum); > Integer statsTermCount = statsTermCounts.get(key); > if (statsTermCount == null) { > @@ -122,6 +127,7 @@ public class FieldFacetStats { > } > return true; > } > + > return false; > } > > @@ -132,8 +138,7 @@ public class FieldFacetStats { > while (facetStatsTerms.size() <= statsTermNum) { > facetStatsTerms.add(new HashMap<String, Integer>()); > } > - for (Map.Entry<String, Integer> stringIntegerEntry : > facetStatsTerms.get(statsTermNum).entrySet()) { > - Map.Entry pairs = (Map.Entry) stringIntegerEntry; > + for (Map.Entry<String, Integer> pairs : > facetStatsTerms.get(statsTermNum).entrySet()) { > String key = (String) pairs.getKey(); > StatsValues facetStats = facetStatsValues.get(key); > if (facetStats == null) { > @@ -156,6 +161,35 @@ public class FieldFacetStats { > } > } > > + public void facetMissingNum(int docID) throws IOException { > + if (topLevelSortedValues == null) { > + topLevelSortedValues = DocValues.getSorted(topLevelReader, name); > + } > + > + int ord = topLevelSortedValues.getOrd(docID); > + if (ord != -1) { > + Integer missingCount = missingStats.get(ord); > + if (missingCount == null) { > + missingStats.put(ord, 1); > + } else { > + missingStats.put(ord, missingCount + 1); > + } > + } > + } > + > + public void accumulateMissing() throws IOException { > + StatsValues statsValue; > + > + for (Map.Entry<Integer, Integer> entry : missingStats.entrySet()) { > + if (entry.getKey() >= 0) { > + String key = > topLevelSortedValues.lookupOrd(entry.getKey()).utf8ToString(); > + if ((statsValue = facetStatsValues.get(key)) != null) { > + statsValue.addMissing(entry.getValue()); > + } > + } > + } > + return; > + } > } > > > > Modified: > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1624091&r1=1624090&r2=1624091&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java > (original) > +++ > lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java > Wed Sep 10 18:36:27 2014 > @@ -39,7 +39,6 @@ import org.apache.solr.common.util.Simpl > import org.apache.solr.common.util.StrUtils; > import org.apache.solr.request.DocValuesStats; > import org.apache.solr.request.SolrQueryRequest; > -import org.apache.solr.request.UnInvertedField; > import org.apache.solr.schema.FieldType; > import org.apache.solr.schema.IndexSchema; > import org.apache.solr.schema.SchemaField; > > Modified: > lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesStats.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesStats.java?rev=1624091&r1=1624090&r2=1624091&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesStats.java > (original) > +++ > lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesStats.java > Wed Sep 10 18:36:27 2014 > @@ -23,7 +23,6 @@ import java.util.Map; > > import org.apache.lucene.index.AtomicReaderContext; > import org.apache.lucene.index.DocValues; > -import org.apache.lucene.index.Term; > import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; > import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; > import org.apache.lucene.index.MultiDocValues.OrdinalMap; > @@ -32,10 +31,9 @@ import org.apache.lucene.index.SortedSet > import org.apache.lucene.search.DocIdSet; > import org.apache.lucene.search.DocIdSetIterator; > import org.apache.lucene.search.Filter; > -import org.apache.lucene.search.TermQuery; > -import org.apache.lucene.search.TermRangeQuery; > import org.apache.lucene.util.BytesRef; > import org.apache.lucene.util.LongValues; > +import org.apache.solr.common.SolrException; > import org.apache.solr.handler.component.FieldFacetStats; > import org.apache.solr.handler.component.StatsValues; > import org.apache.solr.handler.component.StatsValuesFactory; > @@ -62,11 +60,17 @@ public class DocValuesStats { > //Initialize facetstats, if facets have been passed in > final FieldFacetStats[] facetStats = new > FieldFacetStats[facet.length]; > int upto = 0; > + > for (String facetField : facet) { > + SchemaField fsf = searcher.getSchema().getField(facetField); > + if ( fsf.multiValued()) { > + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, > + "Stats can only facet on single-valued fields, not: " + > facetField ); > + } > + > SchemaField facetSchemaField = > searcher.getSchema().getField(facetField); > facetStats[upto++] = new FieldFacetStats(searcher, facetField, > schemaField, facetSchemaField, calcDistinct); > } > - > // TODO: remove multiValuedFieldCache(), check dv type / uninversion > type? > final boolean multiValued = schemaField.multiValued() || > ft.multiValuedFieldCache(); > > @@ -74,6 +78,7 @@ public class DocValuesStats { > OrdinalMap ordinalMap = null; // for mapping per-segment ords to > global ones > if (multiValued) { > si = searcher.getAtomicReader().getSortedSetDocValues(fieldName); > + > if (si instanceof MultiSortedSetDocValues) { > ordinalMap = ((MultiSortedSetDocValues)si).mapping; > } > @@ -90,26 +95,27 @@ public class DocValuesStats { > if (si.getValueCount() >= Integer.MAX_VALUE) { > throw new UnsupportedOperationException("Currently this stats > method is limited to " + Integer.MAX_VALUE + " unique terms"); > } > - > - DocSet missing = docs.andNot( searcher.getDocSet(new > TermRangeQuery(fieldName, null, null, false, false))); > - > - final int nTerms = (int) si.getValueCount(); > > + int missingDocCountTotal = 0; > + final int nTerms = (int) si.getValueCount(); > // count collection array only needs to be as big as the number of > terms we are > // going to collect counts for. > final int[] counts = new int[nTerms]; > > Filter filter = docs.getTopFilter(); > List<AtomicReaderContext> leaves = > searcher.getTopReaderContext().leaves(); > + > for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { > AtomicReaderContext leaf = leaves.get(subIndex); > DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets > already exclude any deleted docs > DocIdSetIterator disi = null; > + > if (dis != null) { > disi = dis.iterator(); > } > if (disi != null) { > int docBase = leaf.docBase; > + > if (multiValued) { > SortedSetDocValues sub = > leaf.reader().getSortedSetDocValues(fieldName); > if (sub == null) { > @@ -118,23 +124,23 @@ public class DocValuesStats { > final SortedDocValues singleton = > DocValues.unwrapSingleton(sub); > if (singleton != null) { > // some codecs may optimize SORTED_SET storage for > single-valued fields > - accumSingle(counts, docBase, facetStats, singleton, disi, > subIndex, ordinalMap); > + missingDocCountTotal += accumSingle(counts, docBase, > facetStats, singleton, disi, subIndex, ordinalMap); > } else { > - accumMulti(counts, docBase, facetStats, sub, disi, subIndex, > ordinalMap); > + missingDocCountTotal += accumMulti(counts, docBase, > facetStats, sub, disi, subIndex, ordinalMap); > } > } else { > SortedDocValues sub = > leaf.reader().getSortedDocValues(fieldName); > if (sub == null) { > sub = DocValues.emptySorted(); > } > - accumSingle(counts, docBase, facetStats, sub, disi, subIndex, > ordinalMap); > + missingDocCountTotal += accumSingle(counts, docBase, > facetStats, sub, disi, subIndex, ordinalMap); > } > } > } > - > // add results in index order > for (int ord = 0; ord < counts.length; ord++) { > int count = counts[ord]; > + > if (count > 0) { > final BytesRef value = si.lookupOrd(ord); > res.accumulate(value, count); > @@ -143,26 +149,23 @@ public class DocValuesStats { > } > } > } > - > - res.addMissing(missing.size()); > + res.addMissing(missingDocCountTotal); > + > if (facetStats.length > 0) { > for (FieldFacetStats f : facetStats) { > - Map<String, StatsValues> facetStatsValues = f.facetStatsValues; > - FieldType facetType = searcher.getSchema().getFieldType(f.name); > - for (Map.Entry<String,StatsValues> entry : > facetStatsValues.entrySet()) { > - String termLabel = entry.getKey(); > - int missingCount = searcher.numDocs(new TermQuery(new Term( > f.name, facetType.toInternal(termLabel))), missing); > - entry.getValue().addMissing(missingCount); > - } > + Map<String,StatsValues> facetStatsValues = f.facetStatsValues; > + f.accumulateMissing(); > res.addFacet(f.name, facetStatsValues); > } > } > + > return res; > } > > /** accumulates per-segment single-valued stats */ > - static void accumSingle(int counts[], int docBase, FieldFacetStats[] > facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, > OrdinalMap map) throws IOException { > + static int accumSingle(int counts[], int docBase, FieldFacetStats[] > facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, > OrdinalMap map) throws IOException { > final LongValues ordMap = map == null ? null : > map.getGlobalOrds(subIndex); > + int missingDocCount = 0; > int doc; > while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { > int term = si.getOrd(doc); > @@ -174,18 +177,29 @@ public class DocValuesStats { > for (FieldFacetStats f : facetStats) { > f.facetTermNum(docBase + doc, term); > } > + }else{ > + for (FieldFacetStats f : facetStats) { > + f.facetMissingNum(docBase + doc); > + } > + > + missingDocCount++; > } > } > + return missingDocCount; > } > > /** accumulates per-segment multi-valued stats */ > - static void accumMulti(int counts[], int docBase, FieldFacetStats[] > facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, > OrdinalMap map) throws IOException { > + > + static int accumMulti(int counts[], int docBase, FieldFacetStats[] > facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, > OrdinalMap map) throws IOException { > final LongValues ordMap = map == null ? null : > map.getGlobalOrds(subIndex); > + int missingDocCount = 0; > int doc; > while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { > si.setDocument(doc); > long ord; > + boolean emptyTerm = true; > while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { > + emptyTerm = false; > int term = (int) ord; > if (map != null) { > term = (int) ordMap.get(term); > @@ -195,6 +209,15 @@ public class DocValuesStats { > f.facetTermNum(docBase + doc, term); > } > } > + if (emptyTerm){ > + for (FieldFacetStats f : facetStats) { > + f.facetMissingNum(docBase + doc); > + } > + > + missingDocCount++; > + } > } > + > + return missingDocCount; > } > } > > Modified: > lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=1624091&r1=1624090&r2=1624091&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java > (original) > +++ > lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java > Wed Sep 10 18:36:27 2014 > @@ -28,10 +28,9 @@ import java.util.Map; > import java.util.TimeZone; > > import org.apache.lucene.util.LuceneTestCase; > -import org.apache.solr.SolrTestCaseJ4; > -import org.apache.solr.common.params.SolrParams; > import org.apache.solr.common.params.CommonParams; > import org.apache.solr.common.params.MapSolrParams; > +import org.apache.solr.common.params.SolrParams; > import org.apache.solr.common.params.StatsParams; > import org.apache.solr.core.SolrCore; > import org.apache.solr.request.LocalSolrQueryRequest; > @@ -64,9 +63,8 @@ public class StatsComponentTest extends > for (String f : new String[] { > "stats_i","stats_l","stats_f","stats_d", > "stats_ti","stats_tl","stats_tf","stats_td", > - "stats_ti_dv","stats_tl_dv","stats_tf_dv","stats_td_dv" > -// , TODO: enable this test after SOLR-6452 is fixed > -// > "stats_ti_ni_dv","stats_tl_ni_dv","stats_tf_ni_dv","stats_td_ni_dv" > + "stats_ti_dv","stats_tl_dv","stats_tf_dv","stats_td_dv", > + > "stats_ti_ni_dv","stats_tl_ni_dv","stats_tf_ni_dv","stats_td_ni_dv" > }) { > > // all of our checks should work with all of these params > @@ -91,9 +89,8 @@ public class StatsComponentTest extends > > for (String f : new String[] {"stats_ii", > "stats_tis","stats_tfs","stats_tls","stats_tds", // trie > fields > - "stats_tis_dv","stats_tfs_dv","stats_tls_dv","stats_tds_dv" > // Doc Values > -// , TODO: enable this test after SOLR-6452 is fixed > - > //"stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv" > // Doc Values Not indexed > + "stats_tis_dv","stats_tfs_dv","stats_tls_dv","stats_tds_dv", > // Doc Values > + > "stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv" // > Doc Values Not indexed > }) { > > doTestMVFieldStatisticsResult(f); > @@ -153,6 +150,9 @@ public class StatsComponentTest extends > assertU(adoc("id", "3", f, "-30", f, "-1", "active_s", "false")); > assertU(adoc("id", "4", f, "-40", f, "10", "active_s", "false")); > assertU(adoc("id", "5", "active_s", "false")); > + assertU(adoc("id", "6", "active_s", "false")); > + assertU(adoc("id", "7", "active_s", "true")); > + > assertU(commit()); > > // with or w/o these excluded filters, results should be the same > @@ -171,7 +171,7 @@ public class StatsComponentTest extends > , "//double[@name='max'][.='200.0']" > , "//double[@name='sum'][.='9.0']" > , "//long[@name='count'][.='8']" > - , "//long[@name='missing'][.='1']" > + , "//long[@name='missing'][.='3']" > , "//long[@name='countDistinct'][.='8']" > , "count(//arr[@name='distinctValues']/*)=8" > , "//double[@name='sumOfSquares'][.='53101.0']" > @@ -186,7 +186,7 @@ public class StatsComponentTest extends > , "//double[@name='max'][.='200.0']" > , "//double[@name='sum'][.='119.0']" > , "//long[@name='count'][.='6']" > - , "//long[@name='missing'][.='1']" > + , "//long[@name='missing'][.='3']" > , "//long[@name='countDistinct'][.='6']" > , "count(//arr[@name='distinctValues']/*)=6" > , "//double[@name='sumOfSquares'][.='43001.0']" > @@ -202,7 +202,7 @@ public class StatsComponentTest extends > , "//double[@name='max'][.='200.0']" > , "//double[@name='sum'][.='9.0']" > , "//long[@name='count'][.='8']" > - , "//long[@name='missing'][.='1']" > + , "//long[@name='missing'][.='3']" > , "//long[@name='countDistinct'][.='8']" > , "count(//lst[@name='" + f + > "']/arr[@name='distinctValues']/*)=8" > , "//double[@name='sumOfSquares'][.='53101.0']" > @@ -216,7 +216,7 @@ public class StatsComponentTest extends > , "//lst[@name='true']/double[@name='max'][.='200.0']" > , "//lst[@name='true']/double[@name='sum'][.='70.0']" > , "//lst[@name='true']/long[@name='count'][.='4']" > - , "//lst[@name='true']/long[@name='missing'][.='0']" > + , "//lst[@name='true']/long[@name='missing'][.='1']" > , "//lst[@name='true']//long[@name='countDistinct'][.='4']" > , > "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" > , > "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']" > @@ -230,7 +230,7 @@ public class StatsComponentTest extends > , "//lst[@name='false']/double[@name='max'][.='10.0']" > , "//lst[@name='false']/double[@name='sum'][.='-61.0']" > , "//lst[@name='false']/long[@name='count'][.='4']" > - , "//lst[@name='false']/long[@name='missing'][.='1']" > + , "//lst[@name='false']/long[@name='missing'][.='2']" > , "//lst[@name='true']//long[@name='countDistinct'][.='4']" > , > "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" > , > "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']" > @@ -711,4 +711,31 @@ public class StatsComponentTest extends > Collections.addAll(cat_docValues, comparables); > return cat_docValues; > } > + > + > +// public void testOtherFacetStatsResult() throws Exception { > +// > +// assertU(adoc("id", "1", "stats_tls_dv", "10", "active_i", "1")); > +// assertU(adoc("id", "2", "stats_tls_dv", "20", "active_i", "1")); > +// assertU(commit()); > +// assertU(adoc("id", "3", "stats_tls_dv", "30", "active_i", "2")); > +// assertU(adoc("id", "4", "stats_tls_dv", "40", "active_i", "2")); > +// assertU(commit()); > +// > +// final String pre = > "//lst[@name='stats_fields']/lst[@name='stats_tls_dv']/lst[@name='facets']/lst[@name='active_i']"; > +// > +// assertQ("test value for active_s=true", req("q", "*:*", "stats", > "true", "stats.field", "stats_tls_dv", "stats.facet", "active_i","indent", > "true") > +// , "*[count("+pre+")=1]" > +// , pre+"/lst[@name='1']/double[@name='min'][.='10.0']" > +// , pre+"/lst[@name='1']/double[@name='max'][.='20.0']" > +// , pre+"/lst[@name='1']/double[@name='sum'][.='30.0']" > +// , pre+"/lst[@name='1']/long[@name='count'][.='2']" > +// , pre+"/lst[@name='1']/long[@name='missing'][.='0']" > +// , pre + > "/lst[@name='true']/long[@name='countDistinct'][.='2']" > +// , "count(" + pre + > "/lst[@name='true']/arr[@name='distinctValues']/*)=2" > +// , > pre+"/lst[@name='1']/double[@name='sumOfSquares'][.='500.0']" > +// , pre+"/lst[@name='1']/double[@name='mean'][.='15.0']" > +// , > pre+"/lst[@name='1']/double[@name='stddev'][.='7.0710678118654755']" > +// ); > +// } > } > > >
