Lucene.Net.TestFramework: Renamed Codecs\lucene3x\ to Codecs\Lucene3x\
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8304ca82 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8304ca82 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8304ca82 Branch: refs/heads/api-work Commit: 8304ca827465fcbfdd6993bfd19864c48bedf0d5 Parents: 362f0d3 Author: Shad Storhaug <[email protected]> Authored: Sun Feb 26 03:10:42 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Mon Feb 27 06:17:56 2017 +0700 ---------------------------------------------------------------------- .../Codecs/Lucene3x/PreFlexRWCodec.cs | 151 +++++++++ .../Lucene3x/PreFlexRWFieldInfosFormat.cs | 45 +++ .../Lucene3x/PreFlexRWFieldInfosReader.cs | 133 ++++++++ .../Lucene3x/PreFlexRWFieldInfosWriter.cs | 130 ++++++++ .../Codecs/Lucene3x/PreFlexRWFieldsWriter.cs | 269 +++++++++++++++ .../Codecs/Lucene3x/PreFlexRWNormsConsumer.cs | 116 +++++++ .../Codecs/Lucene3x/PreFlexRWNormsFormat.cs | 35 ++ .../Codecs/Lucene3x/PreFlexRWPostingsFormat.cs | 87 +++++ .../Lucene3x/PreFlexRWSegmentInfoFormat.cs | 37 ++ .../Lucene3x/PreFlexRWSegmentInfoWriter.cs | 47 +++ .../Codecs/Lucene3x/PreFlexRWSkipListWriter.cs | 138 ++++++++ .../Lucene3x/PreFlexRWStoredFieldsFormat.cs | 34 ++ .../Lucene3x/PreFlexRWStoredFieldsWriter.cs | 214 ++++++++++++ .../Lucene3x/PreFlexRWTermVectorsFormat.cs | 74 ++++ .../Lucene3x/PreFlexRWTermVectorsWriter.cs | 243 ++++++++++++++ .../Codecs/Lucene3x/TermInfosWriter.cs | 334 +++++++++++++++++++ .../Codecs/lucene3x/PreFlexRWCodec.cs | 151 --------- .../lucene3x/PreFlexRWFieldInfosFormat.cs | 45 --- .../lucene3x/PreFlexRWFieldInfosReader.cs | 133 -------- .../lucene3x/PreFlexRWFieldInfosWriter.cs | 130 -------- .../Codecs/lucene3x/PreFlexRWFieldsWriter.cs | 269 --------------- .../Codecs/lucene3x/PreFlexRWNormsConsumer.cs | 116 ------- .../Codecs/lucene3x/PreFlexRWNormsFormat.cs | 35 -- .../Codecs/lucene3x/PreFlexRWPostingsFormat.cs | 87 ----- .../lucene3x/PreFlexRWSegmentInfoFormat.cs | 37 -- .../lucene3x/PreFlexRWSegmentInfoWriter.cs | 47 --- .../Codecs/lucene3x/PreFlexRWSkipListWriter.cs | 138 -------- .../lucene3x/PreFlexRWStoredFieldsFormat.cs | 34 -- .../lucene3x/PreFlexRWStoredFieldsWriter.cs | 214 ------------ .../lucene3x/PreFlexRWTermVectorsFormat.cs | 74 ---- .../lucene3x/PreFlexRWTermVectorsWriter.cs | 243 -------------- .../Codecs/lucene3x/TermInfosWriter.cs | 334 ------------------- .../Lucene.Net.TestFramework.csproj | 32 +- 33 files changed, 2103 insertions(+), 2103 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWCodec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWCodec.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWCodec.cs new file mode 100644 index 0000000..4d265d9 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWCodec.cs @@ -0,0 +1,151 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /// <summary> + /// Writes 3.x-like indexes (not perfect emulation yet) for testing only! + /// @lucene.experimental + /// </summary> +#pragma warning disable 612, 618 + public class PreFlexRWCodec : Lucene3xCodec + { + private readonly PostingsFormat Postings = new PreFlexRWPostingsFormat(); + private readonly Lucene3xNormsFormat Norms = new PreFlexRWNormsFormat(); + private readonly FieldInfosFormat FieldInfos = new PreFlexRWFieldInfosFormat(); + private readonly TermVectorsFormat TermVectors = new PreFlexRWTermVectorsFormat(); + private readonly SegmentInfoFormat SegmentInfos = new PreFlexRWSegmentInfoFormat(); + private readonly StoredFieldsFormat StoredFields = new PreFlexRWStoredFieldsFormat(); + private readonly bool _oldFormatImpersonationIsActive; + + /// <summary> + /// LUCENENET specific + /// Creates the codec with OldFormatImpersonationIsActive = true. + /// </summary> + /// <remarks> + /// Added so that SPIClassIterator can locate this Codec. The iterator + /// only recognises classes that have empty constructors. + /// </remarks> + public PreFlexRWCodec() + : this(true) + { } + + /// <summary> + /// </summary> + /// <param name="oldFormatImpersonationIsActive"> + /// LUCENENET specific + /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> + /// </param> + public PreFlexRWCodec(bool oldFormatImpersonationIsActive) : base() + { + _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive; + } + + public override PostingsFormat PostingsFormat + { + get + { + if (_oldFormatImpersonationIsActive) + { + return Postings; + } + else + { + return base.PostingsFormat; + } + } + } + + public override NormsFormat NormsFormat + { + get + { + if (_oldFormatImpersonationIsActive) + { + return Norms; + } + else + { + return base.NormsFormat; + } + } + } + + public override SegmentInfoFormat SegmentInfoFormat + { + get + { + if (_oldFormatImpersonationIsActive) + { + return SegmentInfos; + } + else + { + return base.SegmentInfoFormat; + } + } + } + + public override FieldInfosFormat FieldInfosFormat + { + get + { + if (_oldFormatImpersonationIsActive) + { + return FieldInfos; + } + else + { + return base.FieldInfosFormat; + } + } + } + + public override TermVectorsFormat TermVectorsFormat + { + get + { + if (_oldFormatImpersonationIsActive) + { + return TermVectors; + } + else + { + return base.TermVectorsFormat; + } + } + } + + public override StoredFieldsFormat StoredFieldsFormat + { + get + { + if (_oldFormatImpersonationIsActive) + { + return StoredFields; + } + else + { + return base.StoredFieldsFormat; + } + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosFormat.cs new file mode 100644 index 0000000..a02fe7f --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosFormat.cs @@ -0,0 +1,45 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// <summary> + /// @lucene.internal + /// @lucene.experimental + /// </summary> +#pragma warning disable 612, 618 + internal class PreFlexRWFieldInfosFormat : Lucene3xFieldInfosFormat + { + public override FieldInfosReader FieldInfosReader + { + get + { + return new PreFlexRWFieldInfosReader(); + } + } + + public override FieldInfosWriter FieldInfosWriter + { + get + { + return new PreFlexRWFieldInfosWriter(); + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosReader.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosReader.cs new file mode 100644 index 0000000..458951e --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosReader.cs @@ -0,0 +1,133 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + using System.Collections.Generic; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; + using Directory = Lucene.Net.Store.Directory; + using DocValuesType = Lucene.Net.Index.DocValuesType; + using FieldInfo = Lucene.Net.Index.FieldInfo; + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexFormatTooNewException = Lucene.Net.Index.IndexFormatTooNewException; + using IndexFormatTooOldException = Lucene.Net.Index.IndexFormatTooOldException; + using IndexInput = Lucene.Net.Store.IndexInput; + using IndexOptions = Lucene.Net.Index.IndexOptions; + using IOContext = Lucene.Net.Store.IOContext; + using SegmentInfo = Lucene.Net.Index.SegmentInfo; + + /// <summary> + /// @lucene.internal + /// @lucene.experimental + /// </summary> + internal class PreFlexRWFieldInfosReader : FieldInfosReader + { + internal const int FORMAT_MINIMUM = PreFlexRWFieldInfosWriter.FORMAT_START; + + public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) + { + string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); + IndexInput input = directory.OpenInput(fileName, iocontext); + + try + { + int format = input.ReadVInt32(); + + if (format > FORMAT_MINIMUM) + { + throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); + } + if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) + { + throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); + } + + int size = input.ReadVInt32(); //read in the size + FieldInfo[] infos = new FieldInfo[size]; + + for (int i = 0; i < size; i++) + { + string name = input.ReadString(); + int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt32() : i; + byte bits = input.ReadByte(); + bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; + bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; + bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; + bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; + IndexOptions? indexOptions; + if (!isIndexed) + { + indexOptions = null; + } + else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) + { + indexOptions = IndexOptions.DOCS_ONLY; + } + else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) + { + if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) + { + indexOptions = IndexOptions.DOCS_AND_FREQS; + } + else + { + throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); + } + } + else + { + indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + } + + // LUCENE-3027: past indices were able to write + // storePayloads=true when omitTFAP is also true, + // which is invalid. We correct that, here: + if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) + { + storePayloads = false; + } + + DocValuesType? normType = isIndexed && !omitNorms ? (DocValuesType?)DocValuesType.NUMERIC : null; + if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) + { + // RW can have norms but doesn't write them + normType = input.ReadByte() != 0 ? (DocValuesType?)DocValuesType.NUMERIC : null; + } + + infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null); + } + + if (input.FilePointer != input.Length) + { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length + " (resource: " + input + ")"); + } + return new FieldInfos(infos); + } + finally + { + input.Dispose(); + } + } + + public static void Files(Directory dir, SegmentInfo info, ISet<string> files) + { + files.Add(IndexFileNames.SegmentFileName(info.Name, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION)); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosWriter.cs new file mode 100644 index 0000000..e0fef49 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldInfosWriter.cs @@ -0,0 +1,130 @@ +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using FieldInfo = Lucene.Net.Index.FieldInfo; + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOptions = Lucene.Net.Index.IndexOptions; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + + /// <summary> + /// @lucene.internal + /// @lucene.experimental + /// </summary> + internal class PreFlexRWFieldInfosWriter : FieldInfosWriter + { + // TODO move to test-framework preflex RW? + + /// <summary> + /// Extension of field infos </summary> + internal const string FIELD_INFOS_EXTENSION = "fnm"; + + // First used in 2.9; prior to 2.9 there was no format header + internal const int FORMAT_START = -2; + + // First used in 3.4: omit only positional information + internal const int FORMAT_OMIT_POSITIONS = -3; + + internal static readonly int FORMAT_PREFLEX_RW = int.MinValue; + + // whenever you add a new format, make it 1 smaller (negative version logic)! + internal const int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS; + + internal const sbyte IS_INDEXED = 0x1; + internal const sbyte STORE_TERMVECTOR = 0x2; + internal const sbyte OMIT_NORMS = 0x10; + internal const sbyte STORE_PAYLOADS = 0x20; + internal const sbyte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; + internal const sbyte OMIT_POSITIONS = -128; + + public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) + { + string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); + IndexOutput output = directory.CreateOutput(fileName, context); + bool success = false; + try + { + output.WriteVInt32(FORMAT_PREFLEX_RW); + output.WriteVInt32(infos.Count); + foreach (FieldInfo fi in infos) + { + sbyte bits = 0x0; + if (fi.HasVectors) + { + bits |= STORE_TERMVECTOR; + } + if (fi.OmitsNorms) + { + bits |= OMIT_NORMS; + } + if (fi.HasPayloads) + { + bits |= STORE_PAYLOADS; + } + if (fi.IsIndexed) + { + bits |= IS_INDEXED; + Debug.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); + if (fi.IndexOptions == IndexOptions.DOCS_ONLY) + { + bits |= OMIT_TERM_FREQ_AND_POSITIONS; + } + else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS) + { + bits |= OMIT_POSITIONS; + } + } + output.WriteString(fi.Name); + /* + * we need to write the field number since IW tries + * to stabelize the field numbers across segments so the + * FI ordinal is not necessarily equivalent to the field number + */ + output.WriteInt32(fi.Number); + output.WriteByte((byte)bits); + if (fi.IsIndexed && !fi.OmitsNorms) + { + // to allow null norm types we need to indicate if norms are written + // only in RW case + output.WriteByte((byte)(sbyte)(fi.NormType == null ? 0 : 1)); + } + Debug.Assert(fi.Attributes == null); // not used or supported + } + success = true; + } + finally + { + if (success) + { + output.Dispose(); + } + else + { + IOUtils.CloseWhileHandlingException(output); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldsWriter.cs new file mode 100644 index 0000000..b0c8174 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWFieldsWriter.cs @@ -0,0 +1,269 @@ +using System.Collections.Generic; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using BytesRef = Lucene.Net.Util.BytesRef; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; + using FieldInfo = Lucene.Net.Index.FieldInfo; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOptions = Lucene.Net.Index.IndexOptions; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOUtils = Lucene.Net.Util.IOUtils; + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + +#pragma warning disable 612, 618 + internal class PreFlexRWFieldsWriter : FieldsConsumer + { + private readonly TermInfosWriter TermsOut; + private readonly IndexOutput FreqOut; + private readonly IndexOutput ProxOut; + private readonly PreFlexRWSkipListWriter SkipListWriter; + private readonly int TotalNumDocs; + + public PreFlexRWFieldsWriter(SegmentWriteState state) + { + TermsOut = new TermInfosWriter(state.Directory, state.SegmentInfo.Name, state.FieldInfos, state.TermIndexInterval); + + bool success = false; + try + { + string freqFile = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "", Lucene3xPostingsFormat.FREQ_EXTENSION); + FreqOut = state.Directory.CreateOutput(freqFile, state.Context); + TotalNumDocs = state.SegmentInfo.DocCount; + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException(TermsOut); + } + } + + success = false; + try + { + if (state.FieldInfos.HasProx) + { + string proxFile = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "", Lucene3xPostingsFormat.PROX_EXTENSION); + ProxOut = state.Directory.CreateOutput(proxFile, state.Context); + } + else + { + ProxOut = null; + } + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException(TermsOut, FreqOut); + } + } + + SkipListWriter = new PreFlexRWSkipListWriter(TermsOut.SkipInterval, TermsOut.MaxSkipLevels, TotalNumDocs, FreqOut, ProxOut); + //System.out.println("\nw start seg=" + segment); + } + + public override TermsConsumer AddField(FieldInfo field) + { + Debug.Assert(field.Number != -1); + if (field.IndexOptions >= IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + { + throw new System.NotSupportedException("this codec cannot index offsets"); + } + //System.out.println("w field=" + field.Name + " storePayload=" + field.storePayloads + " number=" + field.number); + return new PreFlexTermsWriter(this, field); + } + + public override void Dispose() + { + IOUtils.Close(TermsOut, FreqOut, ProxOut); + } + + private class PreFlexTermsWriter : TermsConsumer + { + internal virtual void InitializeInstanceFields() + { + postingsWriter = new PostingsWriter(this); + } + + private readonly PreFlexRWFieldsWriter OuterInstance; + + internal readonly FieldInfo FieldInfo; + internal readonly bool OmitTF; + internal readonly bool StorePayloads; + + internal readonly TermInfo TermInfo = new TermInfo(); + internal PostingsWriter postingsWriter; + + public PreFlexTermsWriter(PreFlexRWFieldsWriter outerInstance, FieldInfo fieldInfo) + { + this.OuterInstance = outerInstance; + + InitializeInstanceFields(); + this.FieldInfo = fieldInfo; + OmitTF = fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY; + StorePayloads = fieldInfo.HasPayloads; + } + + internal class PostingsWriter : PostingsConsumer + { + private readonly PreFlexRWFieldsWriter.PreFlexTermsWriter OuterInstance; + + public PostingsWriter(PreFlexRWFieldsWriter.PreFlexTermsWriter outerInstance) + { + this.OuterInstance = outerInstance; + } + + internal int LastDocID; + internal int LastPayloadLength = -1; + internal int LastPosition; + internal int Df; + + public PostingsWriter Reset() + { + Df = 0; + LastDocID = 0; + LastPayloadLength = -1; + return this; + } + + public override void StartDoc(int docID, int termDocFreq) + { + //System.out.println(" w doc=" + docID); + + int delta = docID - LastDocID; + if (docID < 0 || (Df > 0 && delta <= 0)) + { + throw new CorruptIndexException("docs out of order (" + docID + " <= " + LastDocID + " )"); + } + + if ((++Df % OuterInstance.OuterInstance.TermsOut.SkipInterval) == 0) + { + OuterInstance.OuterInstance.SkipListWriter.SetSkipData(LastDocID, OuterInstance.StorePayloads, LastPayloadLength); + OuterInstance.OuterInstance.SkipListWriter.BufferSkip(Df); + } + + LastDocID = docID; + + Debug.Assert(docID < OuterInstance.OuterInstance.TotalNumDocs, "docID=" + docID + " totalNumDocs=" + OuterInstance.OuterInstance.TotalNumDocs); + + if (OuterInstance.OmitTF) + { + OuterInstance.OuterInstance.FreqOut.WriteVInt32(delta); + } + else + { + int code = delta << 1; + if (termDocFreq == 1) + { + OuterInstance.OuterInstance.FreqOut.WriteVInt32(code | 1); + } + else + { + OuterInstance.OuterInstance.FreqOut.WriteVInt32(code); + OuterInstance.OuterInstance.FreqOut.WriteVInt32(termDocFreq); + } + } + LastPosition = 0; + } + + public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) + { + Debug.Assert(OuterInstance.OuterInstance.ProxOut != null); + Debug.Assert(startOffset == -1); + Debug.Assert(endOffset == -1); + //System.out.println(" w pos=" + position + " payl=" + payload); + int delta = position - LastPosition; + LastPosition = position; + + if (OuterInstance.StorePayloads) + { + int payloadLength = payload == null ? 0 : payload.Length; + if (payloadLength != LastPayloadLength) + { + //System.out.println(" write payload len=" + payloadLength); + LastPayloadLength = payloadLength; + OuterInstance.OuterInstance.ProxOut.WriteVInt32((delta << 1) | 1); + OuterInstance.OuterInstance.ProxOut.WriteVInt32(payloadLength); + } + else + { + OuterInstance.OuterInstance.ProxOut.WriteVInt32(delta << 1); + } + if (payloadLength > 0) + { + OuterInstance.OuterInstance.ProxOut.WriteBytes(payload.Bytes, payload.Offset, payload.Length); + } + } + else + { + OuterInstance.OuterInstance.ProxOut.WriteVInt32(delta); + } + } + + public override void FinishDoc() + { + } + } + + public override PostingsConsumer StartTerm(BytesRef text) + { + //System.out.println(" w term=" + text.utf8ToString()); + OuterInstance.SkipListWriter.ResetSkip(); + TermInfo.FreqPointer = OuterInstance.FreqOut.FilePointer; + if (OuterInstance.ProxOut != null) + { + TermInfo.ProxPointer = OuterInstance.ProxOut.FilePointer; + } + return postingsWriter.Reset(); + } + + public override void FinishTerm(BytesRef text, TermStats stats) + { + if (stats.DocFreq > 0) + { + long skipPointer = OuterInstance.SkipListWriter.WriteSkip(OuterInstance.FreqOut); + TermInfo.DocFreq = stats.DocFreq; + TermInfo.SkipOffset = (int)(skipPointer - TermInfo.FreqPointer); + //System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number); + OuterInstance.TermsOut.Add(FieldInfo.Number, text, TermInfo); + } + } + + public override void Finish(long sumTotalTermCount, long sumDocFreq, int docCount) + { + } + + public override IComparer<BytesRef> Comparer + { + get + { + return BytesRef.UTF8SortedAsUTF16Comparer; + } + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsConsumer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsConsumer.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsConsumer.cs new file mode 100644 index 0000000..2a91121 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsConsumer.cs @@ -0,0 +1,116 @@ +using System; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using System.Collections.Generic; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using FieldInfo = Lucene.Net.Index.FieldInfo; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + + /// <summary> + /// Writes and Merges Lucene 3.x norms format + /// @lucene.experimental + /// </summary> + internal class PreFlexRWNormsConsumer : DocValuesConsumer + { + /// <summary> + /// norms header placeholder </summary> + private static readonly sbyte[] NORMS_HEADER = new sbyte[] { (sbyte)'N', (sbyte)'R', (sbyte)'M', -1 }; + + /// <summary> + /// Extension of norms file </summary> + private const string NORMS_EXTENSION = "nrm"; + + /// <summary> + /// Extension of separate norms file </summary> + /// @deprecated Only for reading existing 3.x indexes + [Obsolete("Only for reading existing 3.x indexes")] + private const string SEPARATE_NORMS_EXTENSION = "s"; + + private readonly IndexOutput @out; + private int LastFieldNumber = -1; // only for assert + + public PreFlexRWNormsConsumer(Directory directory, string segment, IOContext context) + { + string normsFileName = IndexFileNames.SegmentFileName(segment, "", NORMS_EXTENSION); + bool success = false; + IndexOutput output = null; + try + { + output = directory.CreateOutput(normsFileName, context); + // output.WriteBytes(NORMS_HEADER, 0, NORMS_HEADER.Length); + foreach (var @sbyte in NORMS_HEADER) + { + output.WriteByte((byte)@sbyte); + } + @out = output; + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException(output); + } + } + } + + public override void AddNumericField(FieldInfo field, IEnumerable<long?> values) + { + Debug.Assert(field.Number > LastFieldNumber, "writing norms fields out of order" + LastFieldNumber + " -> " + field.Number); + foreach (var n in values) + { + if (((sbyte)(byte)(long)n) < sbyte.MinValue || ((sbyte)(byte)(long)n) > sbyte.MaxValue) + { + throw new System.NotSupportedException("3.x cannot index norms that won't fit in a byte, got: " + ((sbyte)(byte)(long)n)); + } + @out.WriteByte((byte)(sbyte)n); + } + LastFieldNumber = field.Number; + } + + protected override void Dispose(bool disposing) + { + if (disposing) + IOUtils.Close(@out); + } + + public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) + { + throw new InvalidOperationException(); + } + + public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd) + { + throw new InvalidOperationException(); + } + + public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords) + { + throw new InvalidOperationException(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsFormat.cs new file mode 100644 index 0000000..d85d5d3 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWNormsFormat.cs @@ -0,0 +1,35 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + + /// <summary> + /// @lucene.internal + /// @lucene.experimental + /// </summary> +#pragma warning disable 612, 618 + internal class PreFlexRWNormsFormat : Lucene3xNormsFormat + { + public override DocValuesConsumer NormsConsumer(SegmentWriteState state) + { + return new PreFlexRWNormsConsumer(state.Directory, state.SegmentInfo.Name, state.Context); + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWPostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWPostingsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWPostingsFormat.cs new file mode 100644 index 0000000..962d95c --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWPostingsFormat.cs @@ -0,0 +1,87 @@ +using System; +using System.Reflection; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using SegmentReadState = Lucene.Net.Index.SegmentReadState; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; + + /// <summary> + /// Codec, only for testing, that can write and read the + /// pre-flex index format. + /// + /// @lucene.experimental + /// </summary> +#pragma warning disable 612, 618 + internal class PreFlexRWPostingsFormat : Lucene3xPostingsFormat + { + public PreFlexRWPostingsFormat() + { + // NOTE: we impersonate the PreFlex codec so that it can + // read the segments we write! + } + + public override FieldsConsumer FieldsConsumer(SegmentWriteState state) + { + return new PreFlexRWFieldsWriter(state); + } + + public override FieldsProducer FieldsProducer(SegmentReadState state) + { + // Whenever IW opens readers, eg for merging, we have to + // keep terms order in UTF16: + + return new Lucene3xFieldsAnonymousInnerClassHelper(this, state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.TermsIndexDivisor); + } + + private class Lucene3xFieldsAnonymousInnerClassHelper : Lucene3xFields + { + private readonly PreFlexRWPostingsFormat OuterInstance; + + public Lucene3xFieldsAnonymousInnerClassHelper(PreFlexRWPostingsFormat outerInstance, Store.Directory directory, Index.FieldInfos fieldInfos, Index.SegmentInfo segmentInfo, Store.IOContext context, int termsIndexDivisor) + : base(directory, fieldInfos, segmentInfo, context, termsIndexDivisor) + { + this.OuterInstance = outerInstance; + } + + protected internal override bool SortTermsByUnicode() + { + // We carefully peek into stack track above us: if + // we are part of a "merge", we must sort by UTF16: + bool unicodeSortOrder = true; + + if(Util.StackTraceHelper.DoesStackTraceContainMethod("Merge")) + { + unicodeSortOrder = false; + if (LuceneTestCase.VERBOSE) + { + Console.WriteLine("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order"); + } + } + + return unicodeSortOrder; + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoFormat.cs new file mode 100644 index 0000000..86d7e4d --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoFormat.cs @@ -0,0 +1,37 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// @lucene.experimental + /// </summary> +#pragma warning disable 612, 618 + internal class PreFlexRWSegmentInfoFormat : Lucene3xSegmentInfoFormat + { + private readonly SegmentInfoWriter Writer = new PreFlexRWSegmentInfoWriter(); + + public override SegmentInfoWriter SegmentInfoWriter + { + get + { + return Writer; + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoWriter.cs new file mode 100644 index 0000000..3019c51 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSegmentInfoWriter.cs @@ -0,0 +1,47 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IOContext = Lucene.Net.Store.IOContext; + using SegmentInfo = Lucene.Net.Index.SegmentInfo; + using SegmentInfos = Lucene.Net.Index.SegmentInfos; + + /// <summary> + /// PreFlex implementation of <seealso cref="SegmentInfoWriter"/>. + /// @lucene.experimental + /// </summary> +#pragma warning disable 612, 618 + internal class PreFlexRWSegmentInfoWriter : SegmentInfoWriter + { + // NOTE: this is not "really" 3.x format, because we are + // writing each SI to its own file, vs 3.x where the list + // of segments and SI for each segment is written into a + // single segments_N file + + /// <summary> + /// Save a single segment's info. </summary> + public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) + { + SegmentInfos.Write3xInfo(dir, si, ioContext); + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSkipListWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSkipListWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSkipListWriter.cs new file mode 100644 index 0000000..0ce2d24 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWSkipListWriter.cs @@ -0,0 +1,138 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + using Lucene.Net.Support; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using IndexOutput = Lucene.Net.Store.IndexOutput; + + /// <summary> + /// PreFlexRW skiplist implementation. + /// @lucene.experimental + /// </summary> + public class PreFlexRWSkipListWriter : MultiLevelSkipListWriter + { + private int[] LastSkipDoc; + private int[] LastSkipPayloadLength; + private long[] LastSkipFreqPointer; + private long[] LastSkipProxPointer; + + private IndexOutput FreqOutput; + private IndexOutput ProxOutput; + + private int CurDoc; + private bool CurStorePayloads; + private int CurPayloadLength; + private long CurFreqPointer; + private long CurProxPointer; + + public PreFlexRWSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) + : base(skipInterval, numberOfSkipLevels, docCount) + { + this.FreqOutput = freqOutput; + this.ProxOutput = proxOutput; + + LastSkipDoc = new int[numberOfSkipLevels]; + LastSkipPayloadLength = new int[numberOfSkipLevels]; + LastSkipFreqPointer = new long[numberOfSkipLevels]; + LastSkipProxPointer = new long[numberOfSkipLevels]; + } + + /// <summary> + /// Sets the values for the current skip data. + /// </summary> + public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength) + { + this.CurDoc = doc; + this.CurStorePayloads = storePayloads; + this.CurPayloadLength = payloadLength; + this.CurFreqPointer = FreqOutput.FilePointer; + if (ProxOutput != null) + { + this.CurProxPointer = ProxOutput.FilePointer; + } + } + + public override void ResetSkip() + { + base.ResetSkip(); + Arrays.Fill(LastSkipDoc, 0); + Arrays.Fill(LastSkipPayloadLength, -1); // we don't have to write the first length in the skip list + Arrays.Fill(LastSkipFreqPointer, FreqOutput.FilePointer); + if (ProxOutput != null) + { + Arrays.Fill(LastSkipProxPointer, ProxOutput.FilePointer); + } + } + + protected override void WriteSkipData(int level, IndexOutput skipBuffer) + { + // To efficiently store payloads in the posting lists we do not store the length of + // every payload. Instead we omit the length for a payload if the previous payload had + // the same length. + // However, in order to support skipping the payload length at every skip point must be known. + // So we use the same length encoding that we use for the posting lists for the skip data as well: + // Case 1: current field does not store payloads + // SkipDatum --> DocSkip, FreqSkip, ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // DocSkip records the document number before every SkipInterval th document in TermFreqs. + // Document numbers are represented as differences from the previous value in the sequence. + // Case 2: current field stores payloads + // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // PayloadLength --> VInt + // In this case DocSkip/2 is the difference between + // the current and the previous value. If DocSkip + // is odd, then a PayloadLength encoded as VInt follows, + // if DocSkip is even, then it is assumed that the + // current payload length equals the length at the previous + // skip point + if (CurStorePayloads) + { + int delta = CurDoc - LastSkipDoc[level]; + if (CurPayloadLength == LastSkipPayloadLength[level]) + { + // the current payload length equals the length at the previous skip point, + // so we don't store the length again + skipBuffer.WriteVInt32(delta * 2); + } + else + { + // the payload length is different from the previous one. We shift the DocSkip, + // set the lowest bit and store the current payload length as VInt. + skipBuffer.WriteVInt32(delta * 2 + 1); + skipBuffer.WriteVInt32(CurPayloadLength); + LastSkipPayloadLength[level] = CurPayloadLength; + } + } + else + { + // current field does not store payloads + skipBuffer.WriteVInt32(CurDoc - LastSkipDoc[level]); + } + + skipBuffer.WriteVInt32((int)(CurFreqPointer - LastSkipFreqPointer[level])); + skipBuffer.WriteVInt32((int)(CurProxPointer - LastSkipProxPointer[level])); + + LastSkipDoc[level] = CurDoc; + + LastSkipFreqPointer[level] = CurFreqPointer; + LastSkipProxPointer[level] = CurProxPointer; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsFormat.cs new file mode 100644 index 0000000..63ffc4a --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsFormat.cs @@ -0,0 +1,34 @@ +namespace Lucene.Net.Codecs.Lucene3x +{ + using Directory = Lucene.Net.Store.Directory; + using IOContext = Lucene.Net.Store.IOContext; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using SegmentInfo = Lucene.Net.Index.SegmentInfo; + +#pragma warning disable 612, 618 + internal class PreFlexRWStoredFieldsFormat : Lucene3xStoredFieldsFormat + { + public override StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) + { + return new PreFlexRWStoredFieldsWriter(directory, segmentInfo.Name, context); + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsWriter.cs new file mode 100644 index 0000000..628564a --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWStoredFieldsWriter.cs @@ -0,0 +1,214 @@ +using System; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using Lucene.Net.Support; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + + /// <summary> + /// Copyright 2004 The Apache Software Foundation + /// + /// Licensed under the Apache License, Version 2.0 (the "License"); you may not + /// use this file except in compliance with the License. You may obtain a copy of + /// the License at + /// + /// http://www.apache.org/licenses/LICENSE-2.0 + /// + /// Unless required by applicable law or agreed to in writing, software + /// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + /// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + /// License for the specific language governing permissions and limitations under + /// the License. + /// </summary> + + using FieldInfo = Lucene.Net.Index.FieldInfo; + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IIndexableField = Lucene.Net.Index.IIndexableField; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + + /// <summary> + /// @lucene.experimental </summary> +#pragma warning disable 612, 618 + internal sealed class PreFlexRWStoredFieldsWriter : StoredFieldsWriter + { + private readonly Directory Directory; + private readonly string Segment; + private IndexOutput FieldsStream; + private IndexOutput IndexStream; + + public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context) + { + Debug.Assert(directory != null); + this.Directory = directory; + this.Segment = segment; + + bool success = false; + try + { + FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); + IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); + + FieldsStream.WriteInt32(Lucene3xStoredFieldsReader.FORMAT_CURRENT); + IndexStream.WriteInt32(Lucene3xStoredFieldsReader.FORMAT_CURRENT); + + success = true; + } + finally + { + if (!success) + { + Abort(); + } + } + } + + // Writes the contents of buffer into the fields stream + // and adds a new entry for this document into the index + // stream. this assumes the buffer was already written + // in the correct fields format. + public override void StartDocument(int numStoredFields) + { + IndexStream.WriteInt64(FieldsStream.FilePointer); + FieldsStream.WriteVInt32(numStoredFields); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + try + { + IOUtils.Close(FieldsStream, IndexStream); + } + finally + { + FieldsStream = IndexStream = null; + } + } + } + + public override void Abort() + { + try + { + Dispose(); + } +#pragma warning disable 168 + catch (Exception ignored) +#pragma warning restore 168 + { + } + IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); + } + + public override void WriteField(FieldInfo info, IIndexableField field) + { + FieldsStream.WriteVInt32(info.Number); + int bits = 0; + BytesRef bytes; + string @string; + // TODO: maybe a field should serialize itself? + // this way we don't bake into indexer all these + // specific encodings for different fields? and apps + // can customize... + + object number = field.GetNumericValue(); + if (number != null) + { + if (number is sbyte? || number is short? || number is int?) + { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT; + } + else if (number is long?) + { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG; + } + else if (number is float?) + { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT; + } + else if (number is double?) + { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE; + } + else + { + throw new System.ArgumentException("cannot store numeric type " + number.GetType()); + } + @string = null; + bytes = null; + } + else + { + bytes = field.GetBinaryValue(); + if (bytes != null) + { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY; + @string = null; + } + else + { + @string = field.GetStringValue(); + if (@string == null) + { + throw new System.ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); + } + } + } + + FieldsStream.WriteByte((byte)(sbyte)bits); + + if (bytes != null) + { + FieldsStream.WriteVInt32(bytes.Length); + FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); + } + else if (@string != null) + { + FieldsStream.WriteString(field.GetStringValue()); + } + else + { + if (number is sbyte? || number is short? || number is int?) + { + FieldsStream.WriteInt32((int)number); + } + else if (number is long?) + { + FieldsStream.WriteInt64((long)number); + } + else if (number is float?) + { + FieldsStream.WriteInt32(Number.SingleToInt32Bits((float)number)); + } + else if (number is double?) + { + FieldsStream.WriteInt64(BitConverter.DoubleToInt64Bits((double)number)); + } + else + { + Debug.Assert(false); + } + } + } + + public override void Finish(FieldInfos fis, int numDocs) + { + if (4 + ((long)numDocs) * 8 != IndexStream.FilePointer) + // this is most likely a bug in Sun JRE 1.6.0_04/_05; + // we detect that the bug has struck, here, and + // throw an exception to prevent the corruption from + // entering the index. See LUCENE-1282 for + // details. + { + throw new Exception("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + IndexStream.FilePointer + " file=" + IndexStream.ToString() + "; now aborting this merge to prevent index corruption"); + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsFormat.cs new file mode 100644 index 0000000..871ee07 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsFormat.cs @@ -0,0 +1,74 @@ +using System; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IOContext = Lucene.Net.Store.IOContext; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using SegmentInfo = Lucene.Net.Index.SegmentInfo; + +#pragma warning disable 612, 618 + internal class PreFlexRWTermVectorsFormat : Lucene3xTermVectorsFormat + { + public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) + { + return new PreFlexRWTermVectorsWriter(directory, segmentInfo.Name, context); + } + + public override TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) + { + return new Lucene3xTermVectorsReaderAnonymousInnerClassHelper(this, directory, segmentInfo, fieldInfos, context); + } + + private class Lucene3xTermVectorsReaderAnonymousInnerClassHelper : Lucene3xTermVectorsReader + { + private readonly PreFlexRWTermVectorsFormat OuterInstance; + + public Lucene3xTermVectorsReaderAnonymousInnerClassHelper(PreFlexRWTermVectorsFormat outerInstance, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) + : base(directory, segmentInfo, fieldInfos, context) + { + this.OuterInstance = outerInstance; + } + + protected internal override bool SortTermsByUnicode() + { + + // We carefully peek into stack track above us: if + // we are part of a "merge", we must sort by UTF16: + bool unicodeSortOrder = true; + + if (Util.StackTraceHelper.DoesStackTraceContainMethod("Merge")) + { + unicodeSortOrder = false; + if (LuceneTestCase.VERBOSE) + { + Console.WriteLine("NOTE: PreFlexRW codec: forcing legacy UTF16 vector term sort order"); + } + } + + return unicodeSortOrder; + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsWriter.cs new file mode 100644 index 0000000..db3e4c3 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/PreFlexRWTermVectorsWriter.cs @@ -0,0 +1,243 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using ArrayUtil = Lucene.Net.Util.ArrayUtil; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using FieldInfo = Lucene.Net.Index.FieldInfo; + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + using StringHelper = Lucene.Net.Util.StringHelper; + +#pragma warning disable 612, 618 + internal sealed class PreFlexRWTermVectorsWriter : TermVectorsWriter + { + private readonly Directory Directory; + private readonly string Segment; + private IndexOutput Tvx = null, Tvd = null, Tvf = null; + + public PreFlexRWTermVectorsWriter(Directory directory, string segment, IOContext context) + { + this.Directory = directory; + this.Segment = segment; + bool success = false; + try + { + // Open files for TermVector storage + Tvx = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), context); + Tvx.WriteInt32(Lucene3xTermVectorsReader.FORMAT_CURRENT); + Tvd = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context); + Tvd.WriteInt32(Lucene3xTermVectorsReader.FORMAT_CURRENT); + Tvf = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION), context); + Tvf.WriteInt32(Lucene3xTermVectorsReader.FORMAT_CURRENT); + success = true; + } + finally + { + if (!success) + { + Abort(); + } + } + } + + public override void StartDocument(int numVectorFields) + { + LastFieldName = null; + this.NumVectorFields = numVectorFields; + Tvx.WriteInt64(Tvd.FilePointer); + Tvx.WriteInt64(Tvf.FilePointer); + Tvd.WriteVInt32(numVectorFields); + FieldCount = 0; + Fps = ArrayUtil.Grow(Fps, numVectorFields); + } + + private long[] Fps = new long[10]; // pointers to the tvf before writing each field + private int FieldCount = 0; // number of fields we have written so far for this document + private int NumVectorFields = 0; // total number of fields we will write for this document + private string LastFieldName; + + public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) + { + Debug.Assert(LastFieldName == null || info.Name.CompareTo(LastFieldName) > 0, "fieldName=" + info.Name + " lastFieldName=" + LastFieldName); + LastFieldName = info.Name; + if (payloads) + { + throw new System.NotSupportedException("3.x codec does not support payloads on vectors!"); + } + this.Positions = positions; + this.Offsets = offsets; + LastTerm.Length = 0; + Fps[FieldCount++] = Tvf.FilePointer; + Tvd.WriteVInt32(info.Number); + Tvf.WriteVInt32(numTerms); + sbyte bits = 0x0; + if (positions) + { + bits |= Lucene3xTermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; + } + if (offsets) + { + bits |= Lucene3xTermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; + } + Tvf.WriteByte((byte)bits); + + Debug.Assert(FieldCount <= NumVectorFields); + if (FieldCount == NumVectorFields) + { + // last field of the document + // this is crazy because the file format is crazy! + for (int i = 1; i < FieldCount; i++) + { + Tvd.WriteVInt64(Fps[i] - Fps[i - 1]); + } + } + } + + private readonly BytesRef LastTerm = new BytesRef(10); + + // NOTE: we override addProx, so we don't need to buffer when indexing. + // we also don't buffer during bulk merges. + private int[] OffsetStartBuffer = new int[10]; + + private int[] OffsetEndBuffer = new int[10]; + private int OffsetIndex = 0; + private int OffsetFreq = 0; + private bool Positions = false; + private bool Offsets = false; + + public override void StartTerm(BytesRef term, int freq) + { + int prefix = StringHelper.BytesDifference(LastTerm, term); + int suffix = term.Length - prefix; + Tvf.WriteVInt32(prefix); + Tvf.WriteVInt32(suffix); + Tvf.WriteBytes(term.Bytes, term.Offset + prefix, suffix); + Tvf.WriteVInt32(freq); + LastTerm.CopyBytes(term); + LastPosition = LastOffset = 0; + + if (Offsets && Positions) + { + // we might need to buffer if its a non-bulk merge + OffsetStartBuffer = ArrayUtil.Grow(OffsetStartBuffer, freq); + OffsetEndBuffer = ArrayUtil.Grow(OffsetEndBuffer, freq); + OffsetIndex = 0; + OffsetFreq = freq; + } + } + + internal int LastPosition = 0; + internal int LastOffset = 0; + + public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload) + { + Debug.Assert(payload == null); + if (Positions && Offsets) + { + // write position delta + Tvf.WriteVInt32(position - LastPosition); + LastPosition = position; + + // buffer offsets + OffsetStartBuffer[OffsetIndex] = startOffset; + OffsetEndBuffer[OffsetIndex] = endOffset; + OffsetIndex++; + + // dump buffer if we are done + if (OffsetIndex == OffsetFreq) + { + for (int i = 0; i < OffsetIndex; i++) + { + Tvf.WriteVInt32(OffsetStartBuffer[i] - LastOffset); + Tvf.WriteVInt32(OffsetEndBuffer[i] - OffsetStartBuffer[i]); + LastOffset = OffsetEndBuffer[i]; + } + } + } + else if (Positions) + { + // write position delta + Tvf.WriteVInt32(position - LastPosition); + LastPosition = position; + } + else if (Offsets) + { + // write offset deltas + Tvf.WriteVInt32(startOffset - LastOffset); + Tvf.WriteVInt32(endOffset - startOffset); + LastOffset = endOffset; + } + } + + public override void Abort() + { + try + { + Dispose(); + } +#pragma warning disable 168 + catch (Exception ignored) +#pragma warning restore 168 + { + } + IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); + } + + public override void Finish(FieldInfos fis, int numDocs) + { + if (4 + ((long)numDocs) * 16 != Tvx.FilePointer) + // this is most likely a bug in Sun JRE 1.6.0_04/_05; + // we detect that the bug has struck, here, and + // throw an exception to prevent the corruption from + // entering the index. See LUCENE-1282 for + // details. + { + throw new Exception("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + Tvx.FilePointer + " file=" + Tvx.ToString() + "; now aborting this merge to prevent index corruption"); + } + } + + /// <summary> + /// Close all streams. </summary> + protected override void Dispose(bool disposing) + { + // make an effort to close all streams we can but remember and re-throw + // the first exception encountered in this process + IOUtils.Close(Tvx, Tvd, Tvf); + Tvx = Tvd = Tvf = null; + } + + public override IComparer<BytesRef> Comparer + { + get + { + return BytesRef.UTF8SortedAsUTF16Comparer; + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file
