This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 2d3ba67e49cac4260298dbf59e9b5c71076570b2 Author: Ron Clabo <[email protected]> AuthorDate: Fri Apr 2 17:28:32 2021 -0400 Added Additional GroupingSearch tests to demonstrate usage. --- src/Lucene.Net.Tests.Grouping/TestGroupingExtra.cs | 502 +++++++++++++++++++++ 1 file changed, 502 insertions(+) diff --git a/src/Lucene.Net.Tests.Grouping/TestGroupingExtra.cs b/src/Lucene.Net.Tests.Grouping/TestGroupingExtra.cs new file mode 100644 index 0000000..52d7243 --- /dev/null +++ b/src/Lucene.Net.Tests.Grouping/TestGroupingExtra.cs @@ -0,0 +1,502 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Queries.Function; +using Lucene.Net.Queries.Function.ValueSources; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Lucene.Net.Util.Mutable; +using NUnit.Framework; +using System.Collections; +using System.Text; + +namespace Lucene.Net.Search.Grouping +{ + + /// <summary> + /// LUCENENET: File not includes in java Lucene. This file contains extra + /// tests to test a few specific ways of using grouping. + /// </summary> + public class TestGroupingExtra : LuceneTestCase + { + + /// <summary> + /// LUCENENET: Additional Unit Test. Tests grouping by a StringField via the + /// 2 pass by field name approach. Uses FieldCache, not DocValues. + /// </summary> + [Test] + public void GroupingSearch_ViaName_StringSorted_UsingFieldCache_Top3Groups_Top4DocsEach() + { + string[,] carData = GetCarData(); + + Directory indexDir = NewDirectory(); + Analyzer standardAnalyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); + + IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, standardAnalyzer); + IndexWriter writer = new IndexWriter(indexDir, indexConfig); + + int carCount = carData.GetLength(0); + Document doc = new Document(); + for (int i = 0; i < carCount; i++) + { + doc.Fields.Clear(); + doc.Add(new StringField("carMake", carData[i, 0], Field.Store.YES)); + doc.Add(new StringField("carModel", carData[i, 1], Field.Store.YES)); + doc.Add(new StringField("carColor", carData[i, 2], Field.Store.YES)); + writer.AddDocument(doc); + } + writer.Commit(); + + GroupingSearch groupingSearch = new GroupingSearch("carMake"); + groupingSearch.SetAllGroups(true); //true = compute all groups matching the query + groupingSearch.SetGroupDocsLimit(4); //max docs returned in a group + groupingSearch.SetGroupSort(new Sort(new SortField("carMake", SortFieldType.STRING))); + groupingSearch.SetSortWithinGroup(new Sort(new SortField("carModel", SortFieldType.STRING))); + groupingSearch.SetFillSortFields(true); + groupingSearch.SetCachingInMB(10, cacheScores: true); + + IndexReader reader = writer.GetReader(applyAllDeletes: true); + IndexSearcher searcher = new IndexSearcher(reader); + Query matchAllQuery = new MatchAllDocsQuery(); + ITopGroups<object> topGroups = groupingSearch.Search(searcher, matchAllQuery, groupOffset: 0, groupLimit: 3); + + + int? totalGroupCount = topGroups.TotalGroupCount; //null if not computed + int totalGroupedHitCount = topGroups.TotalGroupedHitCount; + + StringBuilder sb = new StringBuilder(); + foreach (GroupDocs<BytesRef> groupDocs in topGroups.Groups) + { + if (groupDocs.GroupValue != null) + { + sb.AppendLine($"\r\nGroup: {groupDocs.GroupValue.Utf8ToString()}"); + } + else + { + sb.AppendLine($"\r\nUngrouped"); //Happens when matching documents don't contain the group field + } + + foreach (ScoreDoc scoreDoc in groupDocs.ScoreDocs) + { + doc = searcher.Doc(scoreDoc.Doc); + sb.AppendLine($"{doc.GetField("carMake").GetStringValue()} {doc.GetField("carModel").GetStringValue()} {doc.GetField("carColor").GetStringValue()}"); + } + } + + string output = sb.ToString(); + string expectdValue = "\r\nGroup: Audi\r\nAudi A3 Orange\r\nAudi A3 Green\r\nAudi A3 Blue\r\nAudi S4 Yellow\r\n\r\nGroup: Bently\r\nBently Arnage Grey\r\nBently Arnage Blue\r\nBently Azure Green\r\nBently Azure Blue\r\n\r\nGroup: Ford\r\nFord Aspire Yellow\r\nFord Aspire Blue\r\nFord Bronco Green\r\nFord Bronco Orange\r\n"; + assertEquals(expectdValue, output); + + /* Output: + + Group: Audi + Audi A3 Orange + Audi A3 Green + Audi A3 Blue + Audi S4 Yellow + + Group: Bently + Bently Arnage Grey + Bently Arnage Blue + Bently Azure Green + Bently Azure Blue + + Group: Ford + Ford Aspire Yellow + Ford Aspire Blue + Ford Bronco Green + Ford Bronco Orange + */ + + + } + + /// <summary> + /// LUCENENET: Additional Unit Test. Tests grouping by a StringField via the + /// 2 pass by field name approach. Uses FieldCache, not DocValues. + /// </summary> + [Test] + public void GroupingSearch_ViaName_StringSorted_UsingDocValues_Top3Groups_Top4DocsEach() + { + string[,] carData = GetCarData(); + + Directory indexDir = NewDirectory(); + Analyzer standardAnalyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); + + IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, standardAnalyzer); + IndexWriter writer = new IndexWriter(indexDir, indexConfig); + + int carCount = carData.GetLength(0); + Document doc = new Document(); + for (int i = 0; i < carCount; i++) + { + doc.Fields.Clear(); + doc.Add(new StringField("carMake", carData[i, 0], Field.Store.YES)); + doc.Add(new SortedDocValuesField("carMake_dv", new BytesRef(carData[i, 0]))); + doc.Add(new StringField("carModel", carData[i, 1], Field.Store.YES)); + doc.Add(new SortedDocValuesField("carModel_dv", new BytesRef(carData[i, 1]))); + doc.Add(new StringField("carColor", carData[i, 2], Field.Store.YES)); + writer.AddDocument(doc); + } + writer.Commit(); + + GroupingSearch groupingSearch = new GroupingSearch("carMake"); + groupingSearch.SetAllGroups(true); //true = compute all groups matching the query + groupingSearch.SetGroupDocsLimit(4); //max docs returned in a group + groupingSearch.SetGroupSort(new Sort(new SortField("carMake_dv", SortFieldType.STRING))); + groupingSearch.SetSortWithinGroup(new Sort(new SortField("carModel_dv", SortFieldType.STRING))); + groupingSearch.SetFillSortFields(true); + groupingSearch.SetCachingInMB(10, cacheScores: true); + + IndexReader reader = writer.GetReader(applyAllDeletes: true); + IndexSearcher searcher = new IndexSearcher(reader); + Query matchAllQuery = new MatchAllDocsQuery(); + ITopGroups<object> topGroups = groupingSearch.Search(searcher, matchAllQuery, groupOffset: 0, groupLimit: 3); + + + int? totalGroupCount = topGroups.TotalGroupCount; //null if not computed + int totalGroupedHitCount = topGroups.TotalGroupedHitCount; + + StringBuilder sb = new StringBuilder(); + foreach (GroupDocs<BytesRef> groupDocs in topGroups.Groups) + { + if (groupDocs.GroupValue != null) + { + sb.AppendLine($"\r\nGroup: {groupDocs.GroupValue.Utf8ToString()}"); + } + else + { + sb.AppendLine($"\r\nUngrouped"); //Happens when matching documents don't contain the group field + } + + foreach (ScoreDoc scoreDoc in groupDocs.ScoreDocs) + { + doc = searcher.Doc(scoreDoc.Doc); + sb.AppendLine($"{doc.GetField("carMake").GetStringValue()} {doc.GetField("carModel").GetStringValue()} {doc.GetField("carColor").GetStringValue()}"); + } + } + + string output = sb.ToString(); + string expectdValue = "\r\nGroup: Audi\r\nAudi A3 Orange\r\nAudi A3 Green\r\nAudi A3 Blue\r\nAudi S4 Yellow\r\n\r\nGroup: Bently\r\nBently Arnage Grey\r\nBently Arnage Blue\r\nBently Azure Green\r\nBently Azure Blue\r\n\r\nGroup: Ford\r\nFord Aspire Yellow\r\nFord Aspire Blue\r\nFord Bronco Green\r\nFord Bronco Orange\r\n"; + assertEquals(expectdValue, output); + + /* Output: + + Group: Audi + Audi A3 Orange + Audi A3 Green + Audi A3 Blue + Audi S4 Yellow + + Group: Bently + Bently Arnage Grey + Bently Arnage Blue + Bently Azure Green + Bently Azure Blue + + Group: Ford + Ford Aspire Yellow + Ford Aspire Blue + Ford Bronco Green + Ford Bronco Orange + + */ + } + + /// <summary> + /// LUCENENET: Additional Unit Test. Tests grouping by an Int32 via the + /// 2 pass by field name approach. Uses FieldCache, not DocValues. + /// </summary> + public virtual void GroupingSearch_ViaName_Int32Sorted_UsingFieldCache_Top10Groups_Top10DocsEach() + { + int[,] numericData = GetNumbers(); + + Directory indexDir = NewDirectory(); + Analyzer standardAnalyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); + + IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, standardAnalyzer); + IndexWriter writer = new IndexWriter(indexDir, indexConfig); + + //Normally we can not group on a Int32Field because it's stored as a 8 term trie structure + //by default. But by specifying int.MaxValue as the NumericPrecisionStep we force the inverted + //index to store the value as a single term. This allows us to use it for grouping (although + //it's no longer good for range queries as they will be slow if the range is large). + + var int32OneTerm = new FieldType + { + IsIndexed = true, + IsTokenized = true, + OmitNorms = true, + IndexOptions = IndexOptions.DOCS_ONLY, + NumericType = Documents.NumericType.INT32, + NumericPrecisionStep = int.MaxValue, //Ensures a single term is generated not a trie + IsStored = true + }; + int32OneTerm.Freeze(); + + int rowCount = numericData.GetLength(0); + Document doc = new Document(); + for (int i = 0; i < rowCount; i++) + { + doc.Fields.Clear(); + doc.Add(new Int32Field("major", numericData[i, 0], int32OneTerm)); + doc.Add(new Int32Field("minor", numericData[i, 1], int32OneTerm)); + doc.Add(new StoredField("rev", numericData[i, 2])); + writer.AddDocument(doc); + } + writer.Commit(); + + GroupingSearch groupingSearch = new GroupingSearch("major"); + groupingSearch.SetAllGroups(true); //true = compute all groups matching the query + groupingSearch.SetGroupDocsLimit(10); //max docs returned in a group + groupingSearch.SetGroupSort(new Sort(new SortField("major", SortFieldType.INT32))); + groupingSearch.SetSortWithinGroup(new Sort(new SortField("minor", SortFieldType.INT32))); + + IndexReader reader = writer.GetReader(applyAllDeletes: true); + IndexSearcher searcher = new IndexSearcher(reader); + Query matchAllQuery = new MatchAllDocsQuery(); + ITopGroups<BytesRef> topGroups = groupingSearch.Search<BytesRef>(searcher, matchAllQuery, groupOffset: 0, groupLimit: 10); + + var val = FieldCache.DEFAULT; + + StringBuilder sb = new StringBuilder(); + foreach (GroupDocs<BytesRef> groupDocs in topGroups.Groups) + { + if (groupDocs.GroupValue != null) + { + int val2 = NumericUtils.PrefixCodedToInt32(groupDocs.GroupValue); + sb.AppendLine($"\r\nGroup: {val2}"); + } + else + { + sb.AppendLine($"\r\nUngrouped"); //Happens when matching documents don't contain the group field + } + + foreach (ScoreDoc scoreDoc in groupDocs.ScoreDocs) + { + doc = searcher.Doc(scoreDoc.Doc); + sb.AppendLine($"{doc.GetField("major").GetInt32Value()} {doc.GetField("minor").GetInt32Value()} {doc.GetField("rev").GetInt32Value()}"); + } + } + + string output = sb.ToString(); + string expectdValue = "\r\nGroup: 1000\r\n1000 1102 21\r\n1000 1123 45\r\n\r\nGroup: 2000\r\n2000 2222 7\r\n2000 2888 88\r\n\r\nGroup: 3000\r\n3000 3123 11\r\n3000 3222 37\r\n3000 3993 9\r\n\r\nGroup: 4000\r\n4000 4001 88\r\n4000 4011 10\r\n\r\nGroup: 8000\r\n8000 8123 28\r\n8000 8888 8\r\n8000 8998 92\r\n"; + assertEquals(expectdValue, output); + + /* Output: + + Group: 1000 + 1000 1102 21 + 1000 1123 45 + + Group: 2000 + 2000 2222 7 + 2000 2888 88 + + Group: 3000 + 3000 3123 11 + 3000 3222 37 + 3000 3993 9 + + Group: 4000 + 4000 4001 88 + 4000 4011 10 + + Group: 8000 + 8000 8123 28 + 8000 8888 8 + 8000 8998 92 + */ + } + + + /// <summary> + /// LUCENENET: Additional Unit Test. Tests grouping by an Int32 via the + /// 2 pass by function/ValueSource/MutableValue approach. Uses FieldCache, not DocValues. + /// </summary> + [Test] + public virtual void GroupingSearch_ViaFunction_Int32Sorted_UsingFieldCache_Top10Groups_Top10DocsEach() + { + int[,] numericData = GetNumbers(); + + Directory indexDir = NewDirectory(); + Analyzer standardAnalyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); + + IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, standardAnalyzer); + IndexWriter writer = new IndexWriter(indexDir, indexConfig); + + + //Normally we can not group on a Int32Field because it's stored as a 8 term trie structure + //by default. But by specifying int.MaxValue as the NumericPrecisionStep we force the inverted + //index to store the value as a single term. This allows us to use it for grouping (although + //it's no longer good for range queries as they will be slow if the range is large). + + var int32OneTerm = new FieldType + { + IsIndexed = true, + IsTokenized = true, + OmitNorms = true, + IndexOptions = IndexOptions.DOCS_ONLY, + NumericType = Documents.NumericType.INT32, + NumericPrecisionStep = int.MaxValue, //Ensures a single term is generated not a trie + IsStored = true + }; + int32OneTerm.Freeze(); + + int rowCount = numericData.GetLength(0); + Document doc = new Document(); + for (int i = 0; i < rowCount; i++) + { + doc.Fields.Clear(); + doc.Add(new Int32Field("major", numericData[i, 0], int32OneTerm)); + doc.Add(new Int32Field("minor", numericData[i, 1], int32OneTerm)); + doc.Add(new StoredField("rev", numericData[i, 2])); + writer.AddDocument(doc); + } + writer.Commit(); + + ValueSource vs = new BytesRefFieldSource("major"); + GroupingSearch groupingSearch = new GroupingSearch(vs, new Hashtable()); + groupingSearch.SetAllGroups(true); //true = compute all groups matching the query + groupingSearch.SetGroupDocsLimit(10); //max docs returned in a group + groupingSearch.SetGroupSort(new Sort(new SortField("major", SortFieldType.INT32))); + groupingSearch.SetSortWithinGroup(new Sort(new SortField("minor", SortFieldType.INT32))); + + IndexReader reader = writer.GetReader(applyAllDeletes: true); + IndexSearcher searcher = new IndexSearcher(reader); + Query matchAllQuery = new MatchAllDocsQuery(); + ITopGroups<object> topGroups = groupingSearch.Search(searcher, matchAllQuery, groupOffset: 0, groupLimit: 10); + + var val = FieldCache.DEFAULT; + + StringBuilder sb = new StringBuilder(); + foreach (GroupDocs<MutableValue> groupDocs in topGroups.Groups) + { + + if(groupDocs.GroupValue != null) + { + BytesRef bytesRef = ((MutableValueStr)groupDocs.GroupValue).Value; + int major = NumericUtils.PrefixCodedToInt32(bytesRef); + sb.AppendLine($"\r\nGroup: {major}"); + } + else + { + sb.AppendLine($"\r\nUngrouped"); //Happens when matching documents don't contain the group field + } + + foreach (ScoreDoc scoreDoc in groupDocs.ScoreDocs) + { + doc = searcher.Doc(scoreDoc.Doc); + sb.AppendLine($"{doc.GetField("major").GetInt32Value()} {doc.GetField("minor").GetInt32Value()} {doc.GetField("rev").GetInt32Value()}"); + } + } + + string output = sb.ToString(); + string expectdValue = "\r\nGroup: 1000\r\n1000 1102 21\r\n1000 1123 45\r\n\r\nGroup: 2000\r\n2000 2222 7\r\n2000 2888 88\r\n\r\nGroup: 3000\r\n3000 3123 11\r\n3000 3222 37\r\n3000 3993 9\r\n\r\nGroup: 4000\r\n4000 4001 88\r\n4000 4011 10\r\n\r\nGroup: 8000\r\n8000 8123 28\r\n8000 8888 8\r\n8000 8998 92\r\n"; + assertEquals(expectdValue, output); + + /* Output: + * + Group: 1000 + 1000 1102 21 + 1000 1123 45 + + Group: 2000 + 2000 2222 7 + 2000 2888 88 + + Group: 3000 + 3000 3123 11 + 3000 3222 37 + 3000 3993 9 + + Group: 4000 + 4000 4001 88 + 4000 4011 10 + + Group: 8000 + 8000 8123 28 + 8000 8888 8 + 8000 8998 92 + */ + } + + + private string[,] GetCarData() + { + + return new string[,] { + { "Toyota", "4Runner" , "Blue" }, + { "Toyota", "4Runner" , "Green" }, + { "Ford", "Bronco" , "Green" }, + { "Ford", "Expedition" , "Yellow" }, + { "Ford", "Expedition" , "Blue" }, + { "Toyota", "Celica" , "Orange" }, + { "Audi", "S4" , "Yellow" }, + { "Audi", "A3" , "Orange" }, + { "Toyota", "Camry" , "Yellow" }, + { "Ford", "F150 Truck" , "Green" }, + { "Bently", "Azure" , "Green" }, + { "Ford", "Bronco" , "Orange" }, + { "Ford", "Aspire" , "Yellow" }, + { "Audi", "A3" , "Green" }, + { "Audi", "S4" , "Blue" }, + { "Bently", "Arnage" , "Grey" }, + { "Toyota", "4Runner" , "Yellow" }, + { "Toyota", "Camry" , "Blue" }, + { "Bently", "Azure" , "Blue" }, + { "Ford", "Bronco" , "Blue" }, + { "Ford", "Expedition" , "Green" }, + { "Ford", "F150 Truck" , "Blue" }, + { "Toyota", "Celica" , "Blue" }, + { "Ford", "F150 Truck" , "Yellow" }, + { "Ford", "Aspire" , "Blue" }, + { "Audi", "A3" , "Blue" }, + { "Bently", "Arnage" , "Blue" }, + }; + } + + private int[,] GetNumbers() + { + + return new int[,] { + { 1000, 1102 , 21 }, + { 4000, 4001 , 88 }, + { 8000, 8123 , 28 }, + { 4000, 4011 , 10 }, + { 2000, 2222 , 7 }, + { 3000, 3222 , 37 }, + { 2000, 2888 , 88 }, + { 3000, 3123 , 11 }, + { 8000, 8888 , 8 }, + { 1000, 1123 , 45 }, + { 3000, 3993 , 9 }, + { 8000, 8998 , 92 }, + }; + } + + + } +}
