Port Facet.Index

Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/71e218cb
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/71e218cb
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/71e218cb

Branch: refs/heads/branch_4x
Commit: 71e218cb010cd5819b9774fe76c41b9a3344dc8a
Parents: b362722
Author: Paul Irwin <[email protected]>
Authored: Wed Nov 6 09:49:10 2013 -0500
Committer: Paul Irwin <[email protected]>
Committed: Wed Nov 6 09:49:10 2013 -0500

----------------------------------------------------------------------
 src/contrib/Facet/Contrib.Facet.csproj          |   4 +
 src/contrib/Facet/Index/CountingListBuilder.cs  | 139 +++++++++++++++++++
 src/contrib/Facet/Index/DrillDownStream.cs      |  62 +++++++++
 src/contrib/Facet/Index/FacetFields.cs          | 121 ++++++++++++++++
 src/contrib/Facet/Index/ICategoryListBuilder.cs |  14 ++
 5 files changed, 340 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Contrib.Facet.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Contrib.Facet.csproj 
b/src/contrib/Facet/Contrib.Facet.csproj
index 1c26fcb..5c15e5b 100644
--- a/src/contrib/Facet/Contrib.Facet.csproj
+++ b/src/contrib/Facet/Contrib.Facet.csproj
@@ -74,6 +74,10 @@
     <Compile Include="Encoding\UniqueValuesIntEncoder.cs" />
     <Compile Include="Encoding\VInt8IntDecoder.cs" />
     <Compile Include="Encoding\VInt8IntEncoder.cs" />
+    <Compile Include="Index\CountingListBuilder.cs" />
+    <Compile Include="Index\DrillDownStream.cs" />
+    <Compile Include="Index\FacetFields.cs" />
+    <Compile Include="Index\ICategoryListBuilder.cs" />
     <Compile Include="Params\CategoryListParams.cs" />
     <Compile Include="Params\FacetIndexingParams.cs" />
     <Compile Include="Params\FacetSearchParams.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/CountingListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/CountingListBuilder.cs 
b/src/contrib/Facet/Index/CountingListBuilder.cs
new file mode 100644
index 0000000..3082c35
--- /dev/null
+++ b/src/contrib/Facet/Index/CountingListBuilder.cs
@@ -0,0 +1,139 @@
+using Lucene.Net.Facet.Encoding;
+using Lucene.Net.Facet.Params;
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Facet.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public class CountingListBuilder : ICategoryListBuilder
+    {
+        private abstract class OrdinalsEncoder
+        {
+            internal OrdinalsEncoder()
+            {
+            }
+
+            public abstract IDictionary<string, BytesRef> Encode(IntsRef 
ordinals);
+        }
+
+        private sealed class NoPartitionsOrdinalsEncoder : OrdinalsEncoder
+        {
+            private readonly IntEncoder encoder;
+            private readonly string name = @"";
+            
+            internal NoPartitionsOrdinalsEncoder(CategoryListParams 
categoryListParams)
+            {
+                encoder = categoryListParams.CreateEncoder();
+            }
+
+            public override IDictionary<string, BytesRef> Encode(IntsRef 
ordinals)
+            {
+                BytesRef bytes = new BytesRef(128);
+                encoder.Encode(ordinals, bytes);
+                return new Dictionary<string, BytesRef>() { { name, bytes } };
+            }
+        }
+
+        private sealed class PerPartitionOrdinalsEncoder : OrdinalsEncoder
+        {
+            private readonly FacetIndexingParams indexingParams;
+            private readonly CategoryListParams categoryListParams;
+            private readonly int partitionSize;
+            private readonly HashMap<String, IntEncoder> partitionEncoder = 
new HashMap<String, IntEncoder>();
+
+            internal PerPartitionOrdinalsEncoder(FacetIndexingParams 
indexingParams, CategoryListParams categoryListParams)
+            {
+                this.indexingParams = indexingParams;
+                this.categoryListParams = categoryListParams;
+                this.partitionSize = indexingParams.PartitionSize;
+            }
+
+            public override IDictionary<String, BytesRef> Encode(IntsRef 
ordinals)
+            {
+                HashMap<String, IntsRef> partitionOrdinals = new 
HashMap<String, IntsRef>();
+                for (int i = 0; i < ordinals.length; i++)
+                {
+                    int ordinal = ordinals.ints[i];
+                    string name = 
PartitionsUtils.PartitionNameByOrdinal(indexingParams, ordinal);
+                    IntsRef partitionOrds = partitionOrdinals[name];
+                    if (partitionOrds == null)
+                    {
+                        partitionOrds = new IntsRef(32);
+                        partitionOrdinals[name] = partitionOrds;
+                        partitionEncoder[name] = 
categoryListParams.CreateEncoder();
+                    }
+
+                    partitionOrds.ints[partitionOrds.length++] = ordinal % 
partitionSize;
+                }
+
+                HashMap<String, BytesRef> partitionBytes = new HashMap<String, 
BytesRef>();
+                foreach (KeyValuePair<String, IntsRef> e in partitionOrdinals)
+                {
+                    string name = e.Key;
+                    IntEncoder encoder = partitionEncoder[name];
+                    BytesRef bytes = new BytesRef(128);
+                    encoder.Encode(e.Value, bytes);
+                    partitionBytes[name] = bytes;
+                }
+
+                return partitionBytes;
+            }
+        }
+
+        private readonly OrdinalsEncoder ordinalsEncoder;
+        private readonly ITaxonomyWriter taxoWriter;
+        private readonly CategoryListParams clp;
+
+        public CountingListBuilder(CategoryListParams categoryListParams, 
FacetIndexingParams indexingParams, ITaxonomyWriter taxoWriter)
+        {
+            this.taxoWriter = taxoWriter;
+            this.clp = categoryListParams;
+            if (indexingParams.PartitionSize == int.MaxValue)
+            {
+                ordinalsEncoder = new 
NoPartitionsOrdinalsEncoder(categoryListParams);
+            }
+            else
+            {
+                ordinalsEncoder = new 
PerPartitionOrdinalsEncoder(indexingParams, categoryListParams);
+            }
+        }
+
+        public IDictionary<string, BytesRef> Build(IntsRef ordinals, 
IEnumerable<CategoryPath> categories)
+        {
+            int upto = ordinals.length;
+            IEnumerator<CategoryPath> iter = categories.GetEnumerator();
+            for (int i = 0; i < upto; i++)
+            {
+                int ordinal = ordinals.ints[i];
+                iter.MoveNext();
+                CategoryPath cp = iter.Current;
+                CategoryListParams.OrdinalPolicy op = 
clp.GetOrdinalPolicy(cp.components[0]);
+                if (op != CategoryListParams.OrdinalPolicy.NO_PARENTS)
+                {
+                    int parent = taxoWriter.GetParent(ordinal);
+                    if (parent > 0)
+                    {
+                        while (parent > 0)
+                        {
+                            ordinals.ints[ordinals.length++] = parent;
+                            parent = taxoWriter.GetParent(parent);
+                        }
+
+                        if (op == 
CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION)
+                        {
+                            ordinals.length--;
+                        }
+                    }
+                }
+            }
+
+            return ordinalsEncoder.Encode(ordinals);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/DrillDownStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/DrillDownStream.cs 
b/src/contrib/Facet/Index/DrillDownStream.cs
new file mode 100644
index 0000000..64c998e
--- /dev/null
+++ b/src/contrib/Facet/Index/DrillDownStream.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Facet.Params;
+using Lucene.Net.Facet.Taxonomy;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public class DrillDownStream : TokenStream
+    {
+        private readonly FacetIndexingParams indexingParams;
+        private readonly IEnumerator<CategoryPath> categories;
+        private readonly ICharTermAttribute termAttribute;
+        private CategoryPath current;
+        private bool isParent;
+
+        public DrillDownStream(IEnumerable<CategoryPath> categories, 
FacetIndexingParams indexingParams)
+        {
+            termAttribute = AddAttribute<ICharTermAttribute>();
+            this.categories = categories.GetEnumerator();
+            this.indexingParams = indexingParams;
+        }
+
+        protected virtual void AddAdditionalAttributes(CategoryPath category, 
bool isParent)
+        {
+        }
+
+        public override bool IncrementToken()
+        {
+            if (current.length == 0)
+            {
+                if (!categories.MoveNext())
+                {
+                    return false;
+                }
+
+                current = categories.Current;
+                termAttribute.ResizeBuffer(current.FullPathLength());
+                isParent = false;
+            }
+
+            int nChars = indexingParams.DrillDownTermText(current, 
termAttribute.Buffer);
+            termAttribute.SetLength(nChars);
+            AddAdditionalAttributes(current, isParent);
+            current = current.Subpath(current.length - 1);
+            isParent = true;
+            return true;
+        }
+
+        public override void Reset()
+        {
+            // TODO: validate this logic
+            categories.MoveNext();
+            current = categories.Current;
+            termAttribute.ResizeBuffer(current.FullPathLength());
+            isParent = false;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/FacetFields.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/FacetFields.cs 
b/src/contrib/Facet/Index/FacetFields.cs
new file mode 100644
index 0000000..e950a3d
--- /dev/null
+++ b/src/contrib/Facet/Index/FacetFields.cs
@@ -0,0 +1,121 @@
+using Lucene.Net.Documents;
+using Lucene.Net.Facet.Params;
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public class FacetFields
+    {
+        private static readonly FieldType DRILL_DOWN_TYPE = new 
FieldType(TextField.TYPE_NOT_STORED);
+
+        static FacetFields()
+        {
+            DRILL_DOWN_TYPE.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
+            DRILL_DOWN_TYPE.OmitNorms = true;
+            DRILL_DOWN_TYPE.Freeze();
+        }
+
+        protected readonly ITaxonomyWriter taxonomyWriter;
+        protected readonly FacetIndexingParams indexingParams;
+
+        public FacetFields(ITaxonomyWriter taxonomyWriter)
+            : this(taxonomyWriter, FacetIndexingParams.DEFAULT)
+        {
+        }
+
+        public FacetFields(ITaxonomyWriter taxonomyWriter, FacetIndexingParams 
params_renamed)
+        {
+            this.taxonomyWriter = taxonomyWriter;
+            this.indexingParams = params_renamed;
+        }
+
+        protected virtual IDictionary<CategoryListParams, 
IEnumerable<CategoryPath>> CreateCategoryListMapping(IEnumerable<CategoryPath> 
categories)
+        {
+            if (indexingParams.AllCategoryListParams.Count == 1)
+            {
+                return new Dictionary<CategoryListParams, 
IEnumerable<CategoryPath>>() { { indexingParams.GetCategoryListParams(null), 
categories } };
+            }
+
+            HashMap<CategoryListParams, IEnumerable<CategoryPath>> 
categoryLists = new HashMap<CategoryListParams, IEnumerable<CategoryPath>>();
+            foreach (CategoryPath cp in categories)
+            {
+                CategoryListParams clp = 
indexingParams.GetCategoryListParams(cp);
+                List<CategoryPath> list = 
(List<CategoryPath>)categoryLists[clp];
+                if (list == null)
+                {
+                    list = new List<CategoryPath>();
+                    categoryLists[clp] = list;
+                }
+
+                list.Add(cp);
+            }
+
+            return categoryLists;
+        }
+
+        protected virtual IDictionary<String, BytesRef> 
GetCategoryListData(CategoryListParams categoryListParams, IntsRef ordinals, 
IEnumerable<CategoryPath> categories)
+        {
+            return new CountingListBuilder(categoryListParams, indexingParams, 
taxonomyWriter).Build(ordinals, categories);
+        }
+
+        protected virtual DrillDownStream 
GetDrillDownStream(IEnumerable<CategoryPath> categories)
+        {
+            return new DrillDownStream(categories, indexingParams);
+        }
+
+        protected virtual FieldType DrillDownFieldType()
+        {
+            return DRILL_DOWN_TYPE;
+        }
+
+        protected virtual void AddCountingListData(Document doc, 
IDictionary<String, BytesRef> categoriesData, string field)
+        {
+            foreach (KeyValuePair<String, BytesRef> entry in categoriesData)
+            {
+                doc.Add(new BinaryDocValuesField(field + entry.Key, 
entry.Value));
+            }
+        }
+
+        public virtual void AddFields(Document doc, IEnumerable<CategoryPath> 
categories)
+        {
+            if (categories == null)
+            {
+                throw new ArgumentException(@"categories should not be null");
+            }
+
+            IDictionary<CategoryListParams, IEnumerable<CategoryPath>> 
categoryLists = CreateCategoryListMapping(categories);
+            IntsRef ordinals = new IntsRef(32);
+            foreach (KeyValuePair<CategoryListParams, 
IEnumerable<CategoryPath>> e in categoryLists)
+            {
+                CategoryListParams clp = e.Key;
+                string field = clp.field;
+                ordinals.length = 0;
+                int maxNumOrds = 0;
+                foreach (CategoryPath cp in e.Value)
+                {
+                    int ordinal = taxonomyWriter.AddCategory(cp);
+                    maxNumOrds += cp.length;
+                    if (ordinals.ints.Length < maxNumOrds)
+                    {
+                        ordinals.Grow(maxNumOrds);
+                    }
+
+                    ordinals.ints[ordinals.length++] = ordinal;
+                }
+
+                IDictionary<String, BytesRef> categoriesData = 
GetCategoryListData(clp, ordinals, e.Value);
+                AddCountingListData(doc, categoriesData, field);
+                DrillDownStream drillDownStream = GetDrillDownStream(e.Value);
+                Field drillDown = new Field(field, drillDownStream, 
DrillDownFieldType());
+                doc.Add(drillDown);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/ICategoryListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/ICategoryListBuilder.cs 
b/src/contrib/Facet/Index/ICategoryListBuilder.cs
new file mode 100644
index 0000000..1b0890f
--- /dev/null
+++ b/src/contrib/Facet/Index/ICategoryListBuilder.cs
@@ -0,0 +1,14 @@
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public interface ICategoryListBuilder
+    {
+        IDictionary<string, BytesRef> Build(IntsRef ordinals, 
IEnumerable<CategoryPath> categories);
+    }
+}

Reply via email to