Lucene.Net.TestFramework: Renamed Codecs\lucene40\ to Codecs\Lucene40\
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/c0e9469c Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/c0e9469c Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/c0e9469c Branch: refs/heads/api-work Commit: c0e9469cc2dabf993d19e1ef342956778dfe686e Parents: 8304ca8 Author: Shad Storhaug <[email protected]> Authored: Sun Feb 26 03:12:28 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Mon Feb 27 06:17:57 2017 +0700 ---------------------------------------------------------------------- .../Codecs/Lucene40/Lucene40DocValuesWriter.cs | 624 +++++++++++++++++++ .../Codecs/Lucene40/Lucene40FieldInfosWriter.cs | 134 ++++ .../Codecs/Lucene40/Lucene40PostingsWriter.cs | 381 +++++++++++ .../Codecs/Lucene40/Lucene40RWCodec.cs | 100 +++ .../Lucene40/Lucene40RWDocValuesFormat.cs | 66 ++ .../Codecs/Lucene40/Lucene40RWNormsFormat.cs | 66 ++ .../Codecs/Lucene40/Lucene40RWPostingsFormat.cs | 84 +++ .../Codecs/Lucene40/Lucene40SkipListWriter.cs | 168 +++++ .../Codecs/lucene40/Lucene40DocValuesWriter.cs | 624 ------------------- .../Codecs/lucene40/Lucene40FieldInfosWriter.cs | 134 ---- .../Codecs/lucene40/Lucene40PostingsWriter.cs | 381 ----------- .../Codecs/lucene40/Lucene40RWCodec.cs | 100 --- .../lucene40/Lucene40RWDocValuesFormat.cs | 66 -- .../Codecs/lucene40/Lucene40RWNormsFormat.cs | 66 -- .../Codecs/lucene40/Lucene40RWPostingsFormat.cs | 84 --- .../Codecs/lucene40/Lucene40SkipListWriter.cs | 168 ----- .../Lucene.Net.TestFramework.csproj | 16 +- 17 files changed, 1631 insertions(+), 1631 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs new file mode 100644 index 0000000..42856fc --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs @@ -0,0 +1,624 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Lucene.Net.Codecs.Lucene40 +{ + using BytesRef = Lucene.Net.Util.BytesRef; + using CompoundFileDirectory = Lucene.Net.Store.CompoundFileDirectory; + using Directory = Lucene.Net.Store.Directory; + using FieldInfo = Lucene.Net.Index.FieldInfo; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOUtils = Lucene.Net.Util.IOUtils; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using LegacyDocValuesType = Lucene.Net.Codecs.Lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; + using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s; + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + +#pragma warning disable 612, 618 + internal class Lucene40DocValuesWriter : DocValuesConsumer + { + private readonly Directory Dir; + private readonly SegmentWriteState State; + private readonly string LegacyKey; + private const string SegmentSuffix = "dv"; + + // note: intentionally ignores seg suffix + internal Lucene40DocValuesWriter(SegmentWriteState state, string filename, string legacyKey) + { + this.State = state; + this.LegacyKey = legacyKey; + this.Dir = new CompoundFileDirectory(state.Directory, filename, state.Context, true); + } + + public override void AddNumericField(FieldInfo field, IEnumerable<long?> values) + { + // examine the values to determine best type to use + long minValue = long.MaxValue; + long maxValue = long.MinValue; + foreach (long? n in values) + { + long v = n == null ? 0 : (long)n; + minValue = Math.Min(minValue, v); + maxValue = Math.Max(maxValue, v); + } + + string fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); + IndexOutput data = Dir.CreateOutput(fileName, State.Context); + bool success = false; + try + { + if (minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue && PackedInt32s.BitsRequired(maxValue - minValue) > 4) + { + // fits in a byte[], would be more than 4bpv, just write byte[] + AddBytesField(field, data, values); + } + else if (minValue >= short.MinValue && maxValue <= short.MaxValue && PackedInt32s.BitsRequired(maxValue - minValue) > 8) + { + // fits in a short[], would be more than 8bpv, just write short[] + AddShortsField(field, data, values); + } + else if (minValue >= int.MinValue && maxValue <= int.MaxValue && PackedInt32s.BitsRequired(maxValue - minValue) > 16) + { + // fits in a int[], would be more than 16bpv, just write int[] + AddIntsField(field, data, values); + } + else + { + AddVarIntsField(field, data, values, minValue, maxValue); + } + success = true; + } + finally + { + if (success) + { + IOUtils.Close(data); + } + else + { + IOUtils.CloseWhileHandlingException(data); + } + } + } + + private void AddBytesField(FieldInfo field, IndexOutput output, IEnumerable<long?> values) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_8.Name); + CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); + output.WriteInt32(1); // size + foreach (long? n in values) + { + output.WriteByte(n == null ? (byte)0 : (byte)n); + } + } + + private void AddShortsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_16.Name); + CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); + output.WriteInt32(2); // size + foreach (long? n in values) + { + output.WriteInt16(n == null ? (short)0 : (short)n); + } + } + + private void AddIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_32.Name); + CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); + output.WriteInt32(4); // size + foreach (long? n in values) + { + output.WriteInt32(n == null ? 0 : (int)n); + } + } + + private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values, long minValue, long maxValue) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name); + + CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); + + long delta = maxValue - minValue; + + if (delta < 0) + { + // writes longs + output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64); + foreach (long? n in values) + { + output.WriteInt64(n == null ? 0 : n.Value); + } + } + else + { + // writes packed ints + output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED); + output.WriteInt64(minValue); + output.WriteInt64(0 - minValue); // default value (representation of 0) + PackedInt32s.Writer writer = PackedInt32s.GetWriter(output, State.SegmentInfo.DocCount, PackedInt32s.BitsRequired(delta), PackedInt32s.DEFAULT); + foreach (long? n in values) + { + long v = n == null ? 0 : (long)n; + writer.Add(v - minValue); + } + writer.Finish(); + } + } + + public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) + { + // examine the values to determine best type to use + HashSet<BytesRef> uniqueValues = new HashSet<BytesRef>(); + int minLength = int.MaxValue; + int maxLength = int.MinValue; + + var vals = values.ToArray(); + + for (int i = 0; i < vals.Length; i++) + { + var b = vals[i]; + + if (b == null) + { + b = vals[i] = new BytesRef(); // 4.0 doesnt distinguish + } + if (b.Length > Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH) + { + throw new System.ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " + Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH); + } + minLength = Math.Min(minLength, b.Length); + maxLength = Math.Max(maxLength, b.Length); + if (uniqueValues != null) + { + if (uniqueValues.Add(BytesRef.DeepCopyOf(b))) + { + if (uniqueValues.Count > 256) + { + uniqueValues = null; + } + } + } + } + + int maxDoc = State.SegmentInfo.DocCount; + bool @fixed = minLength == maxLength; + bool dedup = uniqueValues != null && uniqueValues.Count * 2 < maxDoc; + + if (dedup) + { + // we will deduplicate and deref values + bool success = false; + IndexOutput data = null; + IndexOutput index = null; + string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); + string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); + try + { + data = Dir.CreateOutput(dataName, State.Context); + index = Dir.CreateOutput(indexName, State.Context); + if (@fixed) + { + AddFixedDerefBytesField(field, data, index, values, minLength); + } + else + { + AddVarDerefBytesField(field, data, index, values); + } + success = true; + } + finally + { + if (success) + { + IOUtils.Close(data, index); + } + else + { + IOUtils.CloseWhileHandlingException(data, index); + } + } + } + else + { + // we dont deduplicate, just write values straight + if (@fixed) + { + // fixed byte[] + string fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); + IndexOutput data = Dir.CreateOutput(fileName, State.Context); + bool success = false; + try + { + AddFixedStraightBytesField(field, data, values, minLength); + success = true; + } + finally + { + if (success) + { + IOUtils.Close(data); + } + else + { + IOUtils.CloseWhileHandlingException(data); + } + } + } + else + { + // variable byte[] + bool success = false; + IndexOutput data = null; + IndexOutput index = null; + string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); + string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); + try + { + data = Dir.CreateOutput(dataName, State.Context); + index = Dir.CreateOutput(indexName, State.Context); + AddVarStraightBytesField(field, data, index, values); + success = true; + } + finally + { + if (success) + { + IOUtils.Close(data, index); + } + else + { + IOUtils.CloseWhileHandlingException(data, index); + } + } + } + } + } + + private void AddFixedStraightBytesField(FieldInfo field, IndexOutput output, IEnumerable<BytesRef> values, int length) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_STRAIGHT.Name); + + CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT); + + output.WriteInt32(length); + foreach (BytesRef v in values) + { + if (v != null) + { + output.WriteBytes(v.Bytes, v.Offset, v.Length); + } + } + } + + // NOTE: 4.0 file format docs are crazy/wrong here... + private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name); + + CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); + + CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); + + /* values */ + + long startPos = data.FilePointer; + + foreach (BytesRef v in values) + { + if (v != null) + { + data.WriteBytes(v.Bytes, v.Offset, v.Length); + } + } + + /* addresses */ + + long maxAddress = data.FilePointer - startPos; + index.WriteVInt64(maxAddress); + + int maxDoc = State.SegmentInfo.DocCount; + Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl + + PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); + long currentPosition = 0; + foreach (BytesRef v in values) + { + w.Add(currentPosition); + if (v != null) + { + currentPosition += v.Length; + } + } + // write sentinel + Debug.Assert(currentPosition == maxAddress); + w.Add(currentPosition); + w.Finish(); + } + + private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, int length) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name); + + CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); + + CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); + + // deduplicate + SortedSet<BytesRef> dictionary = new SortedSet<BytesRef>(); + foreach (BytesRef v in values) + { + dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); + } + + /* values */ + data.WriteInt32(length); + foreach (BytesRef v in dictionary) + { + data.WriteBytes(v.Bytes, v.Offset, v.Length); + } + + /* ordinals */ + int valueCount = dictionary.Count; + Debug.Assert(valueCount > 0); + index.WriteInt32(valueCount); + int maxDoc = State.SegmentInfo.DocCount; + PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); + + BytesRef brefDummy; + foreach (BytesRef v in values) + { + brefDummy = v; + + if (v == null) + { + brefDummy = new BytesRef(); + } + //int ord = dictionary.HeadSet(brefDummy).Size(); + int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0); + w.Add(ord); + } + w.Finish(); + } + + private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.Name); + + CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); + + CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); + + // deduplicate + SortedSet<BytesRef> dictionary = new SortedSet<BytesRef>(); + foreach (BytesRef v in values) + { + dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); + } + + /* values */ + long startPosition = data.FilePointer; + long currentAddress = 0; + Dictionary<BytesRef, long> valueToAddress = new Dictionary<BytesRef, long>(); + foreach (BytesRef v in dictionary) + { + currentAddress = data.FilePointer - startPosition; + valueToAddress[v] = currentAddress; + WriteVShort(data, v.Length); + data.WriteBytes(v.Bytes, v.Offset, v.Length); + } + + /* ordinals */ + long totalBytes = data.FilePointer - startPosition; + index.WriteInt64(totalBytes); + int maxDoc = State.SegmentInfo.DocCount; + PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(currentAddress), PackedInt32s.DEFAULT); + + foreach (BytesRef v in values) + { + w.Add(valueToAddress[v == null ? new BytesRef() : v]); + } + w.Finish(); + } + + // the little vint encoding used for var-deref + private static void WriteVShort(IndexOutput o, int i) + { + Debug.Assert(i >= 0 && i <= short.MaxValue); + if (i < 128) + { + o.WriteByte((byte)(sbyte)i); + } + else + { + o.WriteByte((byte)unchecked((sbyte)(0x80 | (i >> 8)))); + o.WriteByte((byte)unchecked((sbyte)(i & 0xff))); + } + } + + public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd) + { + // examine the values to determine best type to use + int minLength = int.MaxValue; + int maxLength = int.MinValue; + foreach (BytesRef b in values) + { + minLength = Math.Min(minLength, b.Length); + maxLength = Math.Max(maxLength, b.Length); + } + + // but dont use fixed if there are missing values (we are simulating how lucene40 wrote dv...) + bool anyMissing = false; + foreach (long n in docToOrd) + { + if ((long)n == -1) + { + anyMissing = true; + break; + } + } + + bool success = false; + IndexOutput data = null; + IndexOutput index = null; + string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); + string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); + + try + { + data = Dir.CreateOutput(dataName, State.Context); + index = Dir.CreateOutput(indexName, State.Context); + if (minLength == maxLength && !anyMissing) + { + // fixed byte[] + AddFixedSortedBytesField(field, data, index, values, docToOrd, minLength); + } + else + { + // var byte[] + // three cases for simulating the old writer: + // 1. no missing + // 2. missing (and empty string in use): remap ord=-1 -> ord=0 + // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values + if (!anyMissing) + { + AddVarSortedBytesField(field, data, index, values, docToOrd); + } + else if (minLength == 0) + { + AddVarSortedBytesField(field, data, index, values, MissingOrdRemapper.MapMissingToOrd0(docToOrd)); + } + else + { + AddVarSortedBytesField(field, data, index, MissingOrdRemapper.InsertEmptyValue(values), MissingOrdRemapper.MapAllOrds(docToOrd)); + } + } + success = true; + } + finally + { + if (success) + { + IOUtils.Close(data, index); + } + else + { + IOUtils.CloseWhileHandlingException(data, index); + } + } + } + + private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd, int length) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name); + + CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); + + CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); + + /* values */ + + data.WriteInt32(length); + int valueCount = 0; + foreach (BytesRef v in values) + { + data.WriteBytes(v.Bytes, v.Offset, v.Length); + valueCount++; + } + + /* ordinals */ + + index.WriteInt32(valueCount); + int maxDoc = State.SegmentInfo.DocCount; + Debug.Assert(valueCount > 0); + PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); + foreach (long n in docToOrd) + { + w.Add((long)n); + } + w.Finish(); + } + + private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd) + { + field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name); + + CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); + + CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); + + /* values */ + + long startPos = data.FilePointer; + + int valueCount = 0; + foreach (BytesRef v in values) + { + data.WriteBytes(v.Bytes, v.Offset, v.Length); + valueCount++; + } + + /* addresses */ + + long maxAddress = data.FilePointer - startPos; + index.WriteInt64(maxAddress); + + Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl + + PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); + long currentPosition = 0; + foreach (BytesRef v in values) + { + w.Add(currentPosition); + currentPosition += v.Length; + } + // write sentinel + Debug.Assert(currentPosition == maxAddress); + w.Add(currentPosition); + w.Finish(); + + /* ordinals */ + + int maxDoc = State.SegmentInfo.DocCount; + Debug.Assert(valueCount > 0); + PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); + foreach (long n in docToOrd) + { + ords.Add((long)n); + } + ords.Finish(); + } + + public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords) + { + throw new System.NotSupportedException("Lucene 4.0 does not support SortedSet docvalues"); + } + + protected override void Dispose(bool disposing) + { + Dir.Dispose(); + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs new file mode 100644 index 0000000..688e365 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs @@ -0,0 +1,134 @@ +using System; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene40 +{ + using Directory = Lucene.Net.Store.Directory; + using DocValuesType = Lucene.Net.Index.DocValuesType; + using FieldInfo = Lucene.Net.Index.FieldInfo; + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + using IndexOptions = Lucene.Net.Index.IndexOptions; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using LegacyDocValuesType = Lucene.Net.Codecs.Lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; + + /// <summary> + /// Lucene 4.0 FieldInfos writer. + /// </summary> + /// <seealso> cref= Lucene40FieldInfosFormat + /// @lucene.experimental </seealso> + [Obsolete] + public class Lucene40FieldInfosWriter : FieldInfosWriter + { + /// <summary> + /// Sole constructor. </summary> + public Lucene40FieldInfosWriter() + { + } + + public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) + { + string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); + IndexOutput output = directory.CreateOutput(fileName, context); + bool success = false; + try + { + CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT); + output.WriteVInt32(infos.Count); + foreach (FieldInfo fi in infos) + { + IndexOptions? indexOptions = fi.IndexOptions; + sbyte bits = 0x0; + if (fi.HasVectors) + { + bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR; + } + if (fi.OmitsNorms) + { + bits |= Lucene40FieldInfosFormat.OMIT_NORMS; + } + if (fi.HasPayloads) + { + bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS; + } + if (fi.IsIndexed) + { + bits |= Lucene40FieldInfosFormat.IS_INDEXED; + Debug.Assert(indexOptions >= IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); + if (indexOptions == IndexOptions.DOCS_ONLY) + { + bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; + } + else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + { + bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; + } + else if (indexOptions == IndexOptions.DOCS_AND_FREQS) + { + bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS; + } + } + output.WriteString(fi.Name); + output.WriteVInt32(fi.Number); + output.WriteByte((byte)bits); + + // pack the DV types in one byte + sbyte dv = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); + sbyte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY)); + Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); + var val = unchecked((sbyte)(0xff & ((nrm << 4) | dv))); + output.WriteByte((byte)val); + output.WriteStringStringMap(fi.Attributes); + } + success = true; + } + finally + { + if (success) + { + output.Dispose(); + } + else + { + IOUtils.CloseWhileHandlingException(output); + } + } + } + + /// <summary> + /// 4.0-style docvalues byte </summary> + public virtual sbyte DocValuesByte(DocValuesType? type, string legacyTypeAtt) + { + if (type == null) + { + Debug.Assert(legacyTypeAtt == null); + return 0; + } + else + { + Debug.Assert(legacyTypeAtt != null); + return (sbyte)LegacyDocValuesType.ordinalLookup[legacyTypeAtt]; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs new file mode 100644 index 0000000..11e2dc0 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs @@ -0,0 +1,381 @@ +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene40 +{ + using BytesRef = Lucene.Net.Util.BytesRef; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Consumes doc & freq, writing them using the current + /// index file format + /// </summary> + + using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; + using DataOutput = Lucene.Net.Store.DataOutput; + using FieldInfo = Lucene.Net.Index.FieldInfo; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOptions = Lucene.Net.Index.IndexOptions; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOUtils = Lucene.Net.Util.IOUtils; + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + + /// <summary> + /// Concrete class that writes the 4.0 frq/prx postings format. + /// </summary> + /// <seealso> cref= Lucene40PostingsFormat + /// @lucene.experimental </seealso> +#pragma warning disable 612, 618 + public sealed class Lucene40PostingsWriter : PostingsWriterBase + { + internal readonly IndexOutput FreqOut; + internal readonly IndexOutput ProxOut; + internal readonly Lucene40SkipListWriter SkipListWriter; + + /// <summary> + /// Expert: The fraction of TermDocs entries stored in skip tables, + /// used to accelerate <seealso cref="DocsEnum#advance(int)"/>. Larger values result in + /// smaller indexes, greater acceleration, but fewer accelerable cases, while + /// smaller values result in bigger indexes, less acceleration and more + /// accelerable cases. More detailed experiments would be useful here. + /// </summary> + internal const int DEFAULT_SKIP_INTERVAL = 16; + + internal readonly int SkipInterval; + + /// <summary> + /// Expert: minimum docFreq to write any skip data at all + /// </summary> + internal readonly int SkipMinimum; + + /// <summary> + /// Expert: The maximum number of skip levels. Smaller values result in + /// slightly smaller indexes, but slower skipping in big posting lists. + /// </summary> + internal readonly int MaxSkipLevels = 10; + + internal readonly int TotalNumDocs; + + internal IndexOptions? IndexOptions; + internal bool StorePayloads; + internal bool StoreOffsets; + + // Starts a new term + internal long FreqStart; + + internal long ProxStart; + internal FieldInfo FieldInfo; + internal int LastPayloadLength; + internal int LastOffsetLength; + internal int LastPosition; + internal int LastOffset; + + internal static readonly StandardTermState EmptyState = new StandardTermState(); + internal StandardTermState LastState; + + // private String segment; + + /// <summary> + /// Creates a <seealso cref="Lucene40PostingsWriter"/>, with the + /// <seealso cref="#DEFAULT_SKIP_INTERVAL"/>. + /// </summary> + public Lucene40PostingsWriter(SegmentWriteState state) + : this(state, DEFAULT_SKIP_INTERVAL) + { + } + + /// <summary> + /// Creates a <seealso cref="Lucene40PostingsWriter"/>, with the + /// specified {@code skipInterval}. + /// </summary> + public Lucene40PostingsWriter(SegmentWriteState state, int skipInterval) + : base() + { + this.SkipInterval = skipInterval; + this.SkipMinimum = skipInterval; // set to the same for now + // this.segment = state.segmentName; + string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION); + FreqOut = state.Directory.CreateOutput(fileName, state.Context); + bool success = false; + IndexOutput proxOut = null; + try + { + CodecUtil.WriteHeader(FreqOut, Lucene40PostingsReader.FRQ_CODEC, Lucene40PostingsReader.VERSION_CURRENT); + // TODO: this is a best effort, if one of these fields has no postings + // then we make an empty prx file, same as if we are wrapped in + // per-field postingsformat. maybe... we shouldn't + // bother w/ this opto? just create empty prx file...? + if (state.FieldInfos.HasProx) + { + // At least one field does not omit TF, so create the + // prox file + fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION); + proxOut = state.Directory.CreateOutput(fileName, state.Context); + CodecUtil.WriteHeader(proxOut, Lucene40PostingsReader.PRX_CODEC, Lucene40PostingsReader.VERSION_CURRENT); + } + else + { + // Every field omits TF so we will write no prox file + proxOut = null; + } + this.ProxOut = proxOut; + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException(FreqOut, proxOut); + } + } + + TotalNumDocs = state.SegmentInfo.DocCount; + + SkipListWriter = new Lucene40SkipListWriter(skipInterval, MaxSkipLevels, TotalNumDocs, FreqOut, proxOut); + } + + public override void Init(IndexOutput termsOut) + { + CodecUtil.WriteHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT); + termsOut.WriteInt32(SkipInterval); // write skipInterval + termsOut.WriteInt32(MaxSkipLevels); // write maxSkipLevels + termsOut.WriteInt32(SkipMinimum); // write skipMinimum + } + + public override BlockTermState NewTermState() + { + return new StandardTermState(); + } + + public override void StartTerm() + { + FreqStart = FreqOut.FilePointer; + //if (DEBUG) System.out.println("SPW: startTerm freqOut.fp=" + freqStart); + if (ProxOut != null) + { + ProxStart = ProxOut.FilePointer; + } + // force first payload to write its length + LastPayloadLength = -1; + // force first offset to write its length + LastOffsetLength = -1; + SkipListWriter.ResetSkip(); + } + + // Currently, this instance is re-used across fields, so + // our parent calls setField whenever the field changes + public override int SetField(FieldInfo fieldInfo) + { + //System.out.println("SPW: setField"); + /* + if (BlockTreeTermsWriter.DEBUG && fieldInfo.Name.equals("id")) { + DEBUG = true; + } else { + DEBUG = false; + } + */ + this.FieldInfo = fieldInfo; + IndexOptions = fieldInfo.IndexOptions; + + StoreOffsets = IndexOptions >= Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + StorePayloads = fieldInfo.HasPayloads; + LastState = EmptyState; + //System.out.println(" set init blockFreqStart=" + freqStart); + //System.out.println(" set init blockProxStart=" + proxStart); + return 0; + } + + internal int LastDocID; + internal int Df; + + public override void StartDoc(int docID, int termDocFreq) + { + // if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer()); + + int delta = docID - LastDocID; + + if (docID < 0 || (Df > 0 && delta <= 0)) + { + throw new CorruptIndexException("docs out of order (" + docID + " <= " + LastDocID + " ) (freqOut: " + FreqOut + ")"); + } + + if ((++Df % SkipInterval) == 0) + { + SkipListWriter.SetSkipData(LastDocID, StorePayloads, LastPayloadLength, StoreOffsets, LastOffsetLength); + SkipListWriter.BufferSkip(Df); + } + + Debug.Assert(docID < TotalNumDocs, "docID=" + docID + " totalNumDocs=" + TotalNumDocs); + + LastDocID = docID; + if (IndexOptions == Index.IndexOptions.DOCS_ONLY) + { + FreqOut.WriteVInt32(delta); + } + else if (1 == termDocFreq) + { + FreqOut.WriteVInt32((delta << 1) | 1); + } + else + { + FreqOut.WriteVInt32(delta << 1); + FreqOut.WriteVInt32(termDocFreq); + } + + LastPosition = 0; + LastOffset = 0; + } + + /// <summary> + /// Add a new position & payload </summary> + public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) + { + //if (DEBUG) System.out.println("SPW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.Length + " bytes")) + " proxFP=" + proxOut.getFilePointer()); + Debug.Assert(IndexOptions >= Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, "invalid indexOptions: " + IndexOptions); + Debug.Assert(ProxOut != null); + + int delta = position - LastPosition; + + Debug.Assert(delta >= 0, "position=" + position + " lastPosition=" + LastPosition); // not quite right (if pos=0 is repeated twice we don't catch it) + + LastPosition = position; + + int payloadLength = 0; + + if (StorePayloads) + { + payloadLength = payload == null ? 0 : payload.Length; + + if (payloadLength != LastPayloadLength) + { + LastPayloadLength = payloadLength; + ProxOut.WriteVInt32((delta << 1) | 1); + ProxOut.WriteVInt32(payloadLength); + } + else + { + ProxOut.WriteVInt32(delta << 1); + } + } + else + { + ProxOut.WriteVInt32(delta); + } + + if (StoreOffsets) + { + // don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms, + // and the numbers aren't that much smaller anyways. + int offsetDelta = startOffset - LastOffset; + int offsetLength = endOffset - startOffset; + Debug.Assert(offsetDelta >= 0 && offsetLength >= 0, "startOffset=" + startOffset + ",lastOffset=" + LastOffset + ",endOffset=" + endOffset); + if (offsetLength != LastOffsetLength) + { + ProxOut.WriteVInt32(offsetDelta << 1 | 1); + ProxOut.WriteVInt32(offsetLength); + } + else + { + ProxOut.WriteVInt32(offsetDelta << 1); + } + LastOffset = startOffset; + LastOffsetLength = offsetLength; + } + + if (payloadLength > 0) + { + ProxOut.WriteBytes(payload.Bytes, payload.Offset, payloadLength); + } + } + + public override void FinishDoc() + { + } + + internal class StandardTermState : BlockTermState + { + public long FreqStart; + public long ProxStart; + public long SkipOffset; + } + + /// <summary> + /// Called when we are done adding docs to this term </summary> + public override void FinishTerm(BlockTermState _state) + { + StandardTermState state = (StandardTermState)_state; + // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart); + Debug.Assert(state.DocFreq > 0); + + // TODO: wasteful we are counting this (counting # docs + // for this term) in two places? + Debug.Assert(state.DocFreq == Df); + state.FreqStart = FreqStart; + state.ProxStart = ProxStart; + if (Df >= SkipMinimum) + { + state.SkipOffset = SkipListWriter.WriteSkip(FreqOut) - FreqStart; + } + else + { + state.SkipOffset = -1; + } + LastDocID = 0; + Df = 0; + } + + public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute) + { + StandardTermState state = (StandardTermState)_state; + if (absolute) + { + LastState = EmptyState; + } + @out.WriteVInt64(state.FreqStart - LastState.FreqStart); + if (state.SkipOffset != -1) + { + Debug.Assert(state.SkipOffset > 0); + @out.WriteVInt64(state.SkipOffset); + } + if (IndexOptions >= Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) + { + @out.WriteVInt64(state.ProxStart - LastState.ProxStart); + } + LastState = state; + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + try + { + FreqOut.Dispose(); + } + finally + { + if (ProxOut != null) + { + ProxOut.Dispose(); + } + } + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs new file mode 100644 index 0000000..79fbb42 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs @@ -0,0 +1,100 @@ +namespace Lucene.Net.Codecs.Lucene40 +{ + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Read-write version of Lucene40Codec for testing </summary> +#pragma warning disable 612, 618 + public sealed class Lucene40RWCodec : Lucene40Codec + { + private readonly FieldInfosFormat fieldInfos; + + /// <summary> + /// LUCENENET specific + /// Creates the codec with OldFormatImpersonationIsActive = true. + /// </summary> + /// <remarks> + /// Added so that SPIClassIterator can locate this Codec. The iterator + /// only recognises classes that have empty constructors. + /// </remarks> + public Lucene40RWCodec() + : this(true) + { } + + /// <param name="oldFormatImpersonationIsActive"> + /// LUCENENET specific + /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> + /// </param> + public Lucene40RWCodec(bool oldFormatImpersonationIsActive) : base() + { + fieldInfos = new Lucene40FieldInfosFormatAnonymousInnerClassHelper(oldFormatImpersonationIsActive); + DocValues = new Lucene40RWDocValuesFormat(oldFormatImpersonationIsActive); + Norms = new Lucene40RWNormsFormat(oldFormatImpersonationIsActive); + } + + private class Lucene40FieldInfosFormatAnonymousInnerClassHelper : Lucene40FieldInfosFormat + { + private readonly bool _oldFormatImpersonationIsActive; + + /// <param name="oldFormatImpersonationIsActive"> + /// LUCENENET specific + /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> + /// </param> + public Lucene40FieldInfosFormatAnonymousInnerClassHelper(bool oldFormatImpersonationIsActive) : base() + { + _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive; + } + + public override FieldInfosWriter FieldInfosWriter + { + get + { + if (!_oldFormatImpersonationIsActive) + { + return base.FieldInfosWriter; + } + else + { + return new Lucene40FieldInfosWriter(); + } + } + } + } + + private readonly DocValuesFormat DocValues; + private readonly NormsFormat Norms; + + public override FieldInfosFormat FieldInfosFormat + { + get { return fieldInfos; } + } + + public override DocValuesFormat DocValuesFormat + { + get { return DocValues; } + } + + public override NormsFormat NormsFormat + { + get { return Norms; } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs new file mode 100644 index 0000000..2281475 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs @@ -0,0 +1,66 @@ +namespace Lucene.Net.Codecs.Lucene40 +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + + /// <summary> + /// Read-write version of <seealso cref="Lucene40DocValuesFormat"/> for testing </summary> +#pragma warning disable 612, 618 + public class Lucene40RWDocValuesFormat : Lucene40DocValuesFormat + { + private readonly bool _oldFormatImpersonationIsActive; + + /// <summary> + /// LUCENENET specific + /// Creates the codec with OldFormatImpersonationIsActive = true. + /// </summary> + /// <remarks> + /// Added so that SPIClassIterator can locate this Codec. The iterator + /// only recognises classes that have empty constructors. + /// </remarks> + public Lucene40RWDocValuesFormat() + : this(true) + { } + + /// <param name="oldFormatImpersonationIsActive"> + /// LUCENENET specific + /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> + /// </param> + public Lucene40RWDocValuesFormat(bool oldFormatImpersonationIsActive) : base() + { + _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive; + } + + public override DocValuesConsumer FieldsConsumer(SegmentWriteState state) + { + if (!_oldFormatImpersonationIsActive) + { + return base.FieldsConsumer(state); + } + else + { + string filename = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "dv", IndexFileNames.COMPOUND_FILE_EXTENSION); + return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY); + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs new file mode 100644 index 0000000..0830c86 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs @@ -0,0 +1,66 @@ +namespace Lucene.Net.Codecs.Lucene40 +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + + /// <summary> + /// Read-write version of <seealso cref="Lucene40NormsFormat"/> for testing </summary> +#pragma warning disable 612, 618 + public class Lucene40RWNormsFormat : Lucene40NormsFormat + { + private readonly bool _oldFormatImpersonationIsActive; + + /// <summary> + /// LUCENENET specific + /// Creates the codec with OldFormatImpersonationIsActive = true. + /// </summary> + /// <remarks> + /// Added so that SPIClassIterator can locate this Codec. The iterator + /// only recognises classes that have empty constructors. + /// </remarks> + public Lucene40RWNormsFormat() + : this(true) + { } + + /// <param name="oldFormatImpersonationIsActive"> + /// LUCENENET specific + /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> + /// </param> + public Lucene40RWNormsFormat(bool oldFormatImpersonationIsActive) : base() + { + _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive; + } + + public override DocValuesConsumer NormsConsumer(SegmentWriteState state) + { + if (!_oldFormatImpersonationIsActive) + { + return base.NormsConsumer(state); + } + else + { + string filename = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "nrm", IndexFileNames.COMPOUND_FILE_EXTENSION); + return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY); + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs new file mode 100644 index 0000000..7a2c9cf --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs @@ -0,0 +1,84 @@ +namespace Lucene.Net.Codecs.Lucene40 +{ + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Read-write version of <seealso cref="Lucene40PostingsFormat"/> for testing. + /// </summary> +#pragma warning disable 612, 618 + public class Lucene40RWPostingsFormat : Lucene40PostingsFormat + { + private readonly bool _oldFormatImpersonationIsActive; + + /// <summary> + /// LUCENENET specific + /// Creates the codec with OldFormatImpersonationIsActive = true. + /// </summary> + /// <remarks> + /// Added so that SPIClassIterator can locate this Codec. The iterator + /// only recognises classes that have empty constructors. + /// </remarks> + public Lucene40RWPostingsFormat() + : this(true) + { } + + /// <param name="oldFormatImpersonationIsActive"> + /// LUCENENET specific + /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> + /// </param> + public Lucene40RWPostingsFormat(bool oldFormatImpersonationIsActive) : base() + { + _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive; + } + + public override FieldsConsumer FieldsConsumer(SegmentWriteState state) + { + if (!_oldFormatImpersonationIsActive) + { + return base.FieldsConsumer(state); + } + else + { + PostingsWriterBase docs = new Lucene40PostingsWriter(state); + + // TODO: should we make the terms index more easily + // pluggable? Ie so that this codec would record which + // index impl was used, and switch on loading? + // Or... you must make a new Codec for this? + bool success = false; + try + { + FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, m_minBlockSize, m_maxBlockSize); + success = true; + return ret; + } + finally + { + if (!success) + { + docs.Dispose(); + } + } + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs new file mode 100644 index 0000000..aa8e52e --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs @@ -0,0 +1,168 @@ +using System; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene40 +{ + using Lucene.Net.Support; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using IndexOutput = Lucene.Net.Store.IndexOutput; + + /// <summary> + /// Implements the skip list writer for the 4.0 posting list format + /// that stores positions and payloads. + /// </summary> + /// <seealso> cref= Lucene40PostingsFormat </seealso> + /// @deprecated Only for reading old 4.0 segments + [Obsolete("Only for reading old 4.0 segments")] + public class Lucene40SkipListWriter : MultiLevelSkipListWriter + { + private int[] LastSkipDoc; + private int[] LastSkipPayloadLength; + private int[] LastSkipOffsetLength; + private long[] LastSkipFreqPointer; + private long[] LastSkipProxPointer; + + private IndexOutput FreqOutput; + private IndexOutput ProxOutput; + + private int CurDoc; + private bool CurStorePayloads; + private bool CurStoreOffsets; + private int CurPayloadLength; + private int CurOffsetLength; + private long CurFreqPointer; + private long CurProxPointer; + + /// <summary> + /// Sole constructor. </summary> + public Lucene40SkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) + : base(skipInterval, numberOfSkipLevels, docCount) + { + this.FreqOutput = freqOutput; + this.ProxOutput = proxOutput; + + LastSkipDoc = new int[numberOfSkipLevels]; + LastSkipPayloadLength = new int[numberOfSkipLevels]; + LastSkipOffsetLength = new int[numberOfSkipLevels]; + LastSkipFreqPointer = new long[numberOfSkipLevels]; + LastSkipProxPointer = new long[numberOfSkipLevels]; + } + + /// <summary> + /// Sets the values for the current skip data. + /// </summary> + public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength, bool storeOffsets, int offsetLength) + { + Debug.Assert(storePayloads || payloadLength == -1); + Debug.Assert(storeOffsets || offsetLength == -1); + this.CurDoc = doc; + this.CurStorePayloads = storePayloads; + this.CurPayloadLength = payloadLength; + this.CurStoreOffsets = storeOffsets; + this.CurOffsetLength = offsetLength; + this.CurFreqPointer = FreqOutput.FilePointer; + if (ProxOutput != null) + { + this.CurProxPointer = ProxOutput.FilePointer; + } + } + + public override void ResetSkip() + { + base.ResetSkip(); + Arrays.Fill(LastSkipDoc, 0); + Arrays.Fill(LastSkipPayloadLength, -1); // we don't have to write the first length in the skip list + Arrays.Fill(LastSkipOffsetLength, -1); // we don't have to write the first length in the skip list + Arrays.Fill(LastSkipFreqPointer, FreqOutput.FilePointer); + if (ProxOutput != null) + { + Arrays.Fill(LastSkipProxPointer, ProxOutput.FilePointer); + } + } + + protected override void WriteSkipData(int level, IndexOutput skipBuffer) + { + // To efficiently store payloads/offsets in the posting lists we do not store the length of + // every payload/offset. Instead we omit the length if the previous lengths were the same + // + // However, in order to support skipping, the length at every skip point must be known. + // So we use the same length encoding that we use for the posting lists for the skip data as well: + // Case 1: current field does not store payloads/offsets + // SkipDatum --> DocSkip, FreqSkip, ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // DocSkip records the document number before every SkipInterval th document in TermFreqs. + // Document numbers are represented as differences from the previous value in the sequence. + // Case 2: current field stores payloads/offsets + // SkipDatum --> DocSkip, PayloadLength?,OffsetLength?,FreqSkip,ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // PayloadLength,OffsetLength--> VInt + // In this case DocSkip/2 is the difference between + // the current and the previous value. If DocSkip + // is odd, then a PayloadLength encoded as VInt follows, + // if DocSkip is even, then it is assumed that the + // current payload/offset lengths equals the lengths at the previous + // skip point + int delta = CurDoc - LastSkipDoc[level]; + + if (CurStorePayloads || CurStoreOffsets) + { + Debug.Assert(CurStorePayloads || CurPayloadLength == LastSkipPayloadLength[level]); + Debug.Assert(CurStoreOffsets || CurOffsetLength == LastSkipOffsetLength[level]); + + if (CurPayloadLength == LastSkipPayloadLength[level] && CurOffsetLength == LastSkipOffsetLength[level]) + { + // the current payload/offset lengths equals the lengths at the previous skip point, + // so we don't store the lengths again + skipBuffer.WriteVInt32(delta << 1); + } + else + { + // the payload and/or offset length is different from the previous one. We shift the DocSkip, + // set the lowest bit and store the current payload and/or offset lengths as VInts. + skipBuffer.WriteVInt32(delta << 1 | 1); + + if (CurStorePayloads) + { + skipBuffer.WriteVInt32(CurPayloadLength); + LastSkipPayloadLength[level] = CurPayloadLength; + } + if (CurStoreOffsets) + { + skipBuffer.WriteVInt32(CurOffsetLength); + LastSkipOffsetLength[level] = CurOffsetLength; + } + } + } + else + { + // current field does not store payloads or offsets + skipBuffer.WriteVInt32(delta); + } + + skipBuffer.WriteVInt32((int)(CurFreqPointer - LastSkipFreqPointer[level])); + skipBuffer.WriteVInt32((int)(CurProxPointer - LastSkipProxPointer[level])); + + LastSkipDoc[level] = CurDoc; + + LastSkipFreqPointer[level] = CurFreqPointer; + LastSkipProxPointer[level] = CurProxPointer; + } + } +} \ No newline at end of file
