Port Facet.Codecs
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9e1b6df7 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9e1b6df7 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9e1b6df7 Branch: refs/heads/branch_4x Commit: 9e1b6df70f570cfe529c9bcd9c9097b780aa3360 Parents: 2332015 Author: Paul Irwin <[email protected]> Authored: Sun Nov 3 19:15:58 2013 -0500 Committer: Paul Irwin <[email protected]> Committed: Sun Nov 3 19:15:58 2013 -0500 ---------------------------------------------------------------------- .../Codecs/Facet42/Facet42BinaryDocValues.cs | 33 ++++++ .../Facet/Codecs/Facet42/Facet42Codec.cs | 48 ++++++++ .../Codecs/Facet42/Facet42DocValuesConsumer.cs | 113 +++++++++++++++++++ .../Codecs/Facet42/Facet42DocValuesFormat.cs | 32 ++++++ .../Codecs/Facet42/Facet42DocValuesProducer.cs | 71 ++++++++++++ src/contrib/Facet/Contrib.Facet.csproj | 5 + src/core/Codecs/Lucene42/Lucene42Codec.cs | 4 +- 7 files changed, 304 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs b/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs new file mode 100644 index 0000000..fdaffd5 --- /dev/null +++ b/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs @@ -0,0 +1,33 @@ +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Lucene.Net.Util.Packed; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Codecs.Facet42 +{ + internal class Facet42BinaryDocValues : BinaryDocValues + { + private readonly sbyte[] bytes; + private readonly PackedInts.IReader addresses; + + internal Facet42BinaryDocValues(DataInput in_renamed) + { + int totBytes = in_renamed.ReadVInt(); + bytes = new sbyte[totBytes]; + in_renamed.ReadBytes(bytes, 0, totBytes); + addresses = PackedInts.GetReader(in_renamed); + } + + public override void Get(int docID, BytesRef ret) + { + int start = (int)addresses.Get(docID); + ret.bytes = bytes; + ret.offset = start; + ret.length = (int)(addresses.Get(docID + 1) - start); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs b/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs new file mode 100644 index 0000000..58fa223 --- /dev/null +++ b/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs @@ -0,0 +1,48 @@ +using Lucene.Net.Codecs; +using Lucene.Net.Codecs.Lucene42; +using Lucene.Net.Facet.Params; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Codecs.Facet42 +{ + public class Facet42Codec : Lucene42Codec + { + private readonly ISet<String> facetFields; + private readonly DocValuesFormat facetsDVFormat = DocValuesFormat.ForName(@"Facet42"); + private readonly DocValuesFormat lucene42DVFormat = DocValuesFormat.ForName(@"Lucene42"); + + public Facet42Codec() + : this(FacetIndexingParams.DEFAULT) + { + } + + public Facet42Codec(FacetIndexingParams fip) + { + if (fip.PartitionSize != int.MaxValue) + { + throw new ArgumentException("this Codec does not support partitions"); + } + + this.facetFields = new HashSet<String>(); + foreach (CategoryListParams clp in fip.AllCategoryListParams) + { + facetFields.Add(clp.field); + } + } + + public override DocValuesFormat GetDocValuesFormatForField(string field) + { + if (facetFields.Contains(field)) + { + return facetsDVFormat; + } + else + { + return lucene42DVFormat; + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs new file mode 100644 index 0000000..bf0b4d4 --- /dev/null +++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs @@ -0,0 +1,113 @@ +using Lucene.Net.Codecs; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Lucene.Net.Util.Packed; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Codecs.Facet42 +{ + public class Facet42DocValuesConsumer : DocValuesConsumer + { + readonly IndexOutput output; + readonly int maxDoc; + readonly float acceptableOverheadRatio; + + public Facet42DocValuesConsumer(SegmentWriteState state) + : this(state, PackedInts.DEFAULT) + { + } + + public Facet42DocValuesConsumer(SegmentWriteState state, float acceptableOverheadRatio) + { + this.acceptableOverheadRatio = acceptableOverheadRatio; + bool success = false; + try + { + string fileName = IndexFileNames.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION); + output = state.directory.CreateOutput(fileName, state.context); + CodecUtil.WriteHeader(output, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_CURRENT); + maxDoc = state.segmentInfo.DocCount; + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException((IDisposable)this); + } + } + } + + public override void AddNumericField(FieldInfo field, IEnumerable<long> values) + { + throw new NotSupportedException(@"FacetsDocValues can only handle binary fields"); + } + + public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) + { + output.WriteVInt(field.number); + long totBytes = 0; + foreach (BytesRef v in values) + { + totBytes += v.length; + } + + if (totBytes > int.MaxValue) + { + throw new InvalidOperationException(@"too many facets in one segment: Facet42DocValues cannot handle more than 2 GB facet data per segment"); + } + + output.WriteVInt((int)totBytes); + foreach (BytesRef v in values) + { + output.WriteBytes(v.bytes, v.offset, v.length); + } + + PackedInts.Writer w = PackedInts.GetWriter(output, maxDoc + 1, PackedInts.BitsRequired(totBytes + 1), acceptableOverheadRatio); + int address = 0; + foreach (BytesRef v in values) + { + w.Add(address); + address += v.length; + } + + w.Add(address); + w.Finish(); + } + + public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<int> docToOrd) + { + throw new NotSupportedException(@"FacetsDocValues can only handle binary fields"); + } + + public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<int> docToOrdCount, IEnumerable<long> ords) + { + throw new NotSupportedException(@"FacetsDocValues can only handle binary fields"); + } + + protected override void Dispose(bool disposing) + { + bool success = false; + try + { + output.WriteVInt(-1); + success = true; + } + finally + { + if (success) + { + IOUtils.Close(output); + } + else + { + IOUtils.CloseWhileHandlingException((IDisposable)output); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs new file mode 100644 index 0000000..4de3192 --- /dev/null +++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs @@ -0,0 +1,32 @@ +using Lucene.Net.Codecs; +using Lucene.Net.Index; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Codecs.Facet42 +{ + public sealed class Facet42DocValuesFormat : DocValuesFormat + { + public const string CODEC = @"FacetsDocValues"; + public const string EXTENSION = @"fdv"; + public const int VERSION_START = 0; + public const int VERSION_CURRENT = VERSION_START; + + public Facet42DocValuesFormat() + : base(@"Facet42") + { + } + + public override DocValuesConsumer FieldsConsumer(SegmentWriteState state) + { + return new Facet42DocValuesConsumer(state); + } + + public override DocValuesProducer FieldsProducer(SegmentReadState state) + { + return new Facet42DocValuesProducer(state); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs new file mode 100644 index 0000000..6ec8239 --- /dev/null +++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs @@ -0,0 +1,71 @@ +using Lucene.Net.Codecs; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Codecs.Facet42 +{ + internal class Facet42DocValuesProducer : DocValuesProducer + { + private readonly IDictionary<int, Facet42BinaryDocValues> fields = new HashMap<int, Facet42BinaryDocValues>(); + + internal Facet42DocValuesProducer(SegmentReadState state) + { + string fileName = IndexFileNames.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION); + IndexInput input = state.directory.OpenInput(fileName, state.context); + bool success = false; + try + { + CodecUtil.CheckHeader(input, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_START, Facet42DocValuesFormat.VERSION_START); + int fieldNumber = input.ReadVInt(); + while (fieldNumber != -1) + { + fields[fieldNumber] = new Facet42BinaryDocValues(input); + fieldNumber = input.ReadVInt(); + } + + success = true; + } + finally + { + if (success) + { + IOUtils.Close(input); + } + else + { + IOUtils.CloseWhileHandlingException((IDisposable)input); + } + } + } + + public override NumericDocValues GetNumeric(FieldInfo field) + { + throw new NotSupportedException(@"FacetsDocValues only implements binary"); + } + + public override BinaryDocValues GetBinary(FieldInfo field) + { + return fields[field.number]; + } + + public override SortedDocValues GetSorted(FieldInfo field) + { + throw new NotSupportedException(@"FacetsDocValues only implements binary"); + } + + public override SortedSetDocValues GetSortedSet(FieldInfo field) + { + throw new NotSupportedException(@"FacetsDocValues only implements binary"); + } + + protected override void Dispose(bool disposing) + { + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Contrib.Facet.csproj ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Contrib.Facet.csproj b/src/contrib/Facet/Contrib.Facet.csproj index ff48e89..750ab35 100644 --- a/src/contrib/Facet/Contrib.Facet.csproj +++ b/src/contrib/Facet/Contrib.Facet.csproj @@ -39,6 +39,11 @@ <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> + <Compile Include="Codecs\Facet42\Facet42BinaryDocValues.cs" /> + <Compile Include="Codecs\Facet42\Facet42Codec.cs" /> + <Compile Include="Codecs\Facet42\Facet42DocValuesConsumer.cs" /> + <Compile Include="Codecs\Facet42\Facet42DocValuesFormat.cs" /> + <Compile Include="Codecs\Facet42\Facet42DocValuesProducer.cs" /> <Compile Include="Collections\LRUHashMap.cs" /> <Compile Include="Encoding\DGapVInt8IntDecoder.cs" /> <Compile Include="Encoding\DGapVInt8IntEncoder.cs" /> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/core/Codecs/Lucene42/Lucene42Codec.cs ---------------------------------------------------------------------- diff --git a/src/core/Codecs/Lucene42/Lucene42Codec.cs b/src/core/Codecs/Lucene42/Lucene42Codec.cs index 2484e34..c8c766e 100644 --- a/src/core/Codecs/Lucene42/Lucene42Codec.cs +++ b/src/core/Codecs/Lucene42/Lucene42Codec.cs @@ -88,12 +88,12 @@ namespace Lucene.Net.Codecs.Lucene42 get { return liveDocsFormat; } } - public PostingsFormat GetPostingsFormatForField(String field) + public virtual PostingsFormat GetPostingsFormatForField(String field) { return defaultFormat; } - public DocValuesFormat GetDocValuesFormatForField(String field) + public virtual DocValuesFormat GetDocValuesFormatForField(String field) { return defaultDVFormat; }
