Port Facet.Index
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/71e218cb Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/71e218cb Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/71e218cb Branch: refs/heads/branch_4x Commit: 71e218cb010cd5819b9774fe76c41b9a3344dc8a Parents: b362722 Author: Paul Irwin <[email protected]> Authored: Wed Nov 6 09:49:10 2013 -0500 Committer: Paul Irwin <[email protected]> Committed: Wed Nov 6 09:49:10 2013 -0500 ---------------------------------------------------------------------- src/contrib/Facet/Contrib.Facet.csproj | 4 + src/contrib/Facet/Index/CountingListBuilder.cs | 139 +++++++++++++++++++ src/contrib/Facet/Index/DrillDownStream.cs | 62 +++++++++ src/contrib/Facet/Index/FacetFields.cs | 121 ++++++++++++++++ src/contrib/Facet/Index/ICategoryListBuilder.cs | 14 ++ 5 files changed, 340 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Contrib.Facet.csproj ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Contrib.Facet.csproj b/src/contrib/Facet/Contrib.Facet.csproj index 1c26fcb..5c15e5b 100644 --- a/src/contrib/Facet/Contrib.Facet.csproj +++ b/src/contrib/Facet/Contrib.Facet.csproj @@ -74,6 +74,10 @@ <Compile Include="Encoding\UniqueValuesIntEncoder.cs" /> <Compile Include="Encoding\VInt8IntDecoder.cs" /> <Compile Include="Encoding\VInt8IntEncoder.cs" /> + <Compile Include="Index\CountingListBuilder.cs" /> + <Compile Include="Index\DrillDownStream.cs" /> + <Compile Include="Index\FacetFields.cs" /> + <Compile Include="Index\ICategoryListBuilder.cs" /> <Compile Include="Params\CategoryListParams.cs" /> <Compile Include="Params\FacetIndexingParams.cs" /> <Compile Include="Params\FacetSearchParams.cs" /> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/CountingListBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/CountingListBuilder.cs b/src/contrib/Facet/Index/CountingListBuilder.cs new file mode 100644 index 0000000..3082c35 --- /dev/null +++ b/src/contrib/Facet/Index/CountingListBuilder.cs @@ -0,0 +1,139 @@ +using Lucene.Net.Facet.Encoding; +using Lucene.Net.Facet.Params; +using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Facet.Util; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public class CountingListBuilder : ICategoryListBuilder + { + private abstract class OrdinalsEncoder + { + internal OrdinalsEncoder() + { + } + + public abstract IDictionary<string, BytesRef> Encode(IntsRef ordinals); + } + + private sealed class NoPartitionsOrdinalsEncoder : OrdinalsEncoder + { + private readonly IntEncoder encoder; + private readonly string name = @""; + + internal NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams) + { + encoder = categoryListParams.CreateEncoder(); + } + + public override IDictionary<string, BytesRef> Encode(IntsRef ordinals) + { + BytesRef bytes = new BytesRef(128); + encoder.Encode(ordinals, bytes); + return new Dictionary<string, BytesRef>() { { name, bytes } }; + } + } + + private sealed class PerPartitionOrdinalsEncoder : OrdinalsEncoder + { + private readonly FacetIndexingParams indexingParams; + private readonly CategoryListParams categoryListParams; + private readonly int partitionSize; + private readonly HashMap<String, IntEncoder> partitionEncoder = new HashMap<String, IntEncoder>(); + + internal PerPartitionOrdinalsEncoder(FacetIndexingParams indexingParams, CategoryListParams categoryListParams) + { + this.indexingParams = indexingParams; + this.categoryListParams = categoryListParams; + this.partitionSize = indexingParams.PartitionSize; + } + + public override IDictionary<String, BytesRef> Encode(IntsRef ordinals) + { + HashMap<String, IntsRef> partitionOrdinals = new HashMap<String, IntsRef>(); + for (int i = 0; i < ordinals.length; i++) + { + int ordinal = ordinals.ints[i]; + string name = PartitionsUtils.PartitionNameByOrdinal(indexingParams, ordinal); + IntsRef partitionOrds = partitionOrdinals[name]; + if (partitionOrds == null) + { + partitionOrds = new IntsRef(32); + partitionOrdinals[name] = partitionOrds; + partitionEncoder[name] = categoryListParams.CreateEncoder(); + } + + partitionOrds.ints[partitionOrds.length++] = ordinal % partitionSize; + } + + HashMap<String, BytesRef> partitionBytes = new HashMap<String, BytesRef>(); + foreach (KeyValuePair<String, IntsRef> e in partitionOrdinals) + { + string name = e.Key; + IntEncoder encoder = partitionEncoder[name]; + BytesRef bytes = new BytesRef(128); + encoder.Encode(e.Value, bytes); + partitionBytes[name] = bytes; + } + + return partitionBytes; + } + } + + private readonly OrdinalsEncoder ordinalsEncoder; + private readonly ITaxonomyWriter taxoWriter; + private readonly CategoryListParams clp; + + public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams, ITaxonomyWriter taxoWriter) + { + this.taxoWriter = taxoWriter; + this.clp = categoryListParams; + if (indexingParams.PartitionSize == int.MaxValue) + { + ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams); + } + else + { + ordinalsEncoder = new PerPartitionOrdinalsEncoder(indexingParams, categoryListParams); + } + } + + public IDictionary<string, BytesRef> Build(IntsRef ordinals, IEnumerable<CategoryPath> categories) + { + int upto = ordinals.length; + IEnumerator<CategoryPath> iter = categories.GetEnumerator(); + for (int i = 0; i < upto; i++) + { + int ordinal = ordinals.ints[i]; + iter.MoveNext(); + CategoryPath cp = iter.Current; + CategoryListParams.OrdinalPolicy op = clp.GetOrdinalPolicy(cp.components[0]); + if (op != CategoryListParams.OrdinalPolicy.NO_PARENTS) + { + int parent = taxoWriter.GetParent(ordinal); + if (parent > 0) + { + while (parent > 0) + { + ordinals.ints[ordinals.length++] = parent; + parent = taxoWriter.GetParent(parent); + } + + if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION) + { + ordinals.length--; + } + } + } + } + + return ordinalsEncoder.Encode(ordinals); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/DrillDownStream.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/DrillDownStream.cs b/src/contrib/Facet/Index/DrillDownStream.cs new file mode 100644 index 0000000..64c998e --- /dev/null +++ b/src/contrib/Facet/Index/DrillDownStream.cs @@ -0,0 +1,62 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Facet.Params; +using Lucene.Net.Facet.Taxonomy; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public class DrillDownStream : TokenStream + { + private readonly FacetIndexingParams indexingParams; + private readonly IEnumerator<CategoryPath> categories; + private readonly ICharTermAttribute termAttribute; + private CategoryPath current; + private bool isParent; + + public DrillDownStream(IEnumerable<CategoryPath> categories, FacetIndexingParams indexingParams) + { + termAttribute = AddAttribute<ICharTermAttribute>(); + this.categories = categories.GetEnumerator(); + this.indexingParams = indexingParams; + } + + protected virtual void AddAdditionalAttributes(CategoryPath category, bool isParent) + { + } + + public override bool IncrementToken() + { + if (current.length == 0) + { + if (!categories.MoveNext()) + { + return false; + } + + current = categories.Current; + termAttribute.ResizeBuffer(current.FullPathLength()); + isParent = false; + } + + int nChars = indexingParams.DrillDownTermText(current, termAttribute.Buffer); + termAttribute.SetLength(nChars); + AddAdditionalAttributes(current, isParent); + current = current.Subpath(current.length - 1); + isParent = true; + return true; + } + + public override void Reset() + { + // TODO: validate this logic + categories.MoveNext(); + current = categories.Current; + termAttribute.ResizeBuffer(current.FullPathLength()); + isParent = false; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/FacetFields.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/FacetFields.cs b/src/contrib/Facet/Index/FacetFields.cs new file mode 100644 index 0000000..e950a3d --- /dev/null +++ b/src/contrib/Facet/Index/FacetFields.cs @@ -0,0 +1,121 @@ +using Lucene.Net.Documents; +using Lucene.Net.Facet.Params; +using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Index; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public class FacetFields + { + private static readonly FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED); + + static FacetFields() + { + DRILL_DOWN_TYPE.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; + DRILL_DOWN_TYPE.OmitNorms = true; + DRILL_DOWN_TYPE.Freeze(); + } + + protected readonly ITaxonomyWriter taxonomyWriter; + protected readonly FacetIndexingParams indexingParams; + + public FacetFields(ITaxonomyWriter taxonomyWriter) + : this(taxonomyWriter, FacetIndexingParams.DEFAULT) + { + } + + public FacetFields(ITaxonomyWriter taxonomyWriter, FacetIndexingParams params_renamed) + { + this.taxonomyWriter = taxonomyWriter; + this.indexingParams = params_renamed; + } + + protected virtual IDictionary<CategoryListParams, IEnumerable<CategoryPath>> CreateCategoryListMapping(IEnumerable<CategoryPath> categories) + { + if (indexingParams.AllCategoryListParams.Count == 1) + { + return new Dictionary<CategoryListParams, IEnumerable<CategoryPath>>() { { indexingParams.GetCategoryListParams(null), categories } }; + } + + HashMap<CategoryListParams, IEnumerable<CategoryPath>> categoryLists = new HashMap<CategoryListParams, IEnumerable<CategoryPath>>(); + foreach (CategoryPath cp in categories) + { + CategoryListParams clp = indexingParams.GetCategoryListParams(cp); + List<CategoryPath> list = (List<CategoryPath>)categoryLists[clp]; + if (list == null) + { + list = new List<CategoryPath>(); + categoryLists[clp] = list; + } + + list.Add(cp); + } + + return categoryLists; + } + + protected virtual IDictionary<String, BytesRef> GetCategoryListData(CategoryListParams categoryListParams, IntsRef ordinals, IEnumerable<CategoryPath> categories) + { + return new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter).Build(ordinals, categories); + } + + protected virtual DrillDownStream GetDrillDownStream(IEnumerable<CategoryPath> categories) + { + return new DrillDownStream(categories, indexingParams); + } + + protected virtual FieldType DrillDownFieldType() + { + return DRILL_DOWN_TYPE; + } + + protected virtual void AddCountingListData(Document doc, IDictionary<String, BytesRef> categoriesData, string field) + { + foreach (KeyValuePair<String, BytesRef> entry in categoriesData) + { + doc.Add(new BinaryDocValuesField(field + entry.Key, entry.Value)); + } + } + + public virtual void AddFields(Document doc, IEnumerable<CategoryPath> categories) + { + if (categories == null) + { + throw new ArgumentException(@"categories should not be null"); + } + + IDictionary<CategoryListParams, IEnumerable<CategoryPath>> categoryLists = CreateCategoryListMapping(categories); + IntsRef ordinals = new IntsRef(32); + foreach (KeyValuePair<CategoryListParams, IEnumerable<CategoryPath>> e in categoryLists) + { + CategoryListParams clp = e.Key; + string field = clp.field; + ordinals.length = 0; + int maxNumOrds = 0; + foreach (CategoryPath cp in e.Value) + { + int ordinal = taxonomyWriter.AddCategory(cp); + maxNumOrds += cp.length; + if (ordinals.ints.Length < maxNumOrds) + { + ordinals.Grow(maxNumOrds); + } + + ordinals.ints[ordinals.length++] = ordinal; + } + + IDictionary<String, BytesRef> categoriesData = GetCategoryListData(clp, ordinals, e.Value); + AddCountingListData(doc, categoriesData, field); + DrillDownStream drillDownStream = GetDrillDownStream(e.Value); + Field drillDown = new Field(field, drillDownStream, DrillDownFieldType()); + doc.Add(drillDown); + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/ICategoryListBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/ICategoryListBuilder.cs b/src/contrib/Facet/Index/ICategoryListBuilder.cs new file mode 100644 index 0000000..1b0890f --- /dev/null +++ b/src/contrib/Facet/Index/ICategoryListBuilder.cs @@ -0,0 +1,14 @@ +using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public interface ICategoryListBuilder + { + IDictionary<string, BytesRef> Build(IntsRef ordinals, IEnumerable<CategoryPath> categories); + } +}
