http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/Lucene3x/TermInfosWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene3x/TermInfosWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/TermInfosWriter.cs new file mode 100644 index 0000000..fd7c05d --- /dev/null +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene3x/TermInfosWriter.cs @@ -0,0 +1,334 @@ +using System.Diagnostics; + +namespace Lucene.Net.Codecs.Lucene3x +{ + using System; + using System.IO; + using BytesRef = Lucene.Net.Util.BytesRef; + using CharsRef = Lucene.Net.Util.CharsRef; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using FieldInfos = Lucene.Net.Index.FieldInfos; + using IndexFileNames = Lucene.Net.Index.IndexFileNames; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + + /// <summary> + /// this stores a monotonically increasing set of <Term, TermInfo> pairs in a + /// Directory. A TermInfos can be written once, in order. + /// </summary> +#pragma warning disable 612, 618 + internal sealed class TermInfosWriter : IDisposable + { + /// <summary> + /// The file format version, a negative number. </summary> + public const int FORMAT = -3; + + // Changed strings to true utf8 with length-in-bytes not + // length-in-chars + public const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; + + // NOTE: always change this if you switch to a new format! + public const int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; + + private FieldInfos FieldInfos; + private IndexOutput Output; + private TermInfo LastTi = new TermInfo(); + private long Size; + + // TODO: the default values for these two parameters should be settable from + // IndexWriter. However, once that's done, folks will start setting them to + // ridiculous values and complaining that things don't work well, as with + // mergeFactor. So, let's wait until a number of folks find that alternate + // values work better. Note that both of these values are stored in the + // segment, so that it's safe to change these w/o rebuilding all indexes. + + /// <summary> + /// Expert: The fraction of terms in the "dictionary" which should be stored + /// in RAM. Smaller values use more memory, but make searching slightly + /// faster, while larger values use less memory and make searching slightly + /// slower. Searching is typically not dominated by dictionary lookup, so + /// tweaking this is rarely useful. + /// </summary> + internal int IndexInterval = 128; + + /// <summary> + /// Expert: The fraction of term entries stored in skip tables, + /// used to accelerate skipping. Larger values result in + /// smaller indexes, greater acceleration, but fewer accelerable cases, while + /// smaller values result in bigger indexes, less acceleration and more + /// accelerable cases. More detailed experiments would be useful here. + /// </summary> + internal int SkipInterval = 16; + + /// <summary> + /// Expert: The maximum number of skip levels. Smaller values result in + /// slightly smaller indexes, but slower skipping in big posting lists. + /// </summary> + internal int MaxSkipLevels = 10; + + private long LastIndexPointer; + private bool IsIndex; + private readonly BytesRef LastTerm = new BytesRef(); + private int LastFieldNumber = -1; + + private TermInfosWriter Other; + + internal TermInfosWriter(Directory directory, string segment, FieldInfos fis, int interval) + { + Initialize(directory, segment, fis, interval, false); + bool success = false; + try + { + Other = new TermInfosWriter(directory, segment, fis, interval, true); + Other.Other = this; + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException(Output); + + try + { + directory.DeleteFile(IndexFileNames.SegmentFileName(segment, "", (IsIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION))); + } +#pragma warning disable 168 + catch (IOException ignored) +#pragma warning restore 168 + { + } + } + } + } + + private TermInfosWriter(Directory directory, string segment, FieldInfos fis, int interval, bool isIndex) + { + Initialize(directory, segment, fis, interval, isIndex); + } + + private void Initialize(Directory directory, string segment, FieldInfos fis, int interval, bool isi) + { + IndexInterval = interval; + FieldInfos = fis; + IsIndex = isi; + Output = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", (IsIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION)), IOContext.DEFAULT); + bool success = false; + try + { + Output.WriteInt32(FORMAT_CURRENT); // write format + Output.WriteInt64(0); // leave space for size + Output.WriteInt32(IndexInterval); // write indexInterval + Output.WriteInt32(SkipInterval); // write skipInterval + Output.WriteInt32(MaxSkipLevels); // write maxSkipLevels + Debug.Assert(InitUTF16Results()); + success = true; + } + finally + { + if (!success) + { + IOUtils.CloseWhileHandlingException(Output); + + try + { + directory.DeleteFile(IndexFileNames.SegmentFileName(segment, "", (IsIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION))); + } +#pragma warning disable 168 + catch (IOException ignored) +#pragma warning restore 168 + { + } + } + } + } + + // Currently used only by assert statements + internal CharsRef Utf16Result1; + + internal CharsRef Utf16Result2; + private readonly BytesRef ScratchBytes = new BytesRef(); + + // Currently used only by assert statements + private bool InitUTF16Results() + { + Utf16Result1 = new CharsRef(10); + Utf16Result2 = new CharsRef(10); + return true; + } + + /// <summary> + /// note: -1 is the empty field: "" !!!! </summary> + internal static string FieldName(FieldInfos infos, int fieldNumber) + { + if (fieldNumber == -1) + { + return ""; + } + else + { + return infos.FieldInfo(fieldNumber).Name; + } + } + + // Currently used only by assert statement + private int CompareToLastTerm(int fieldNumber, BytesRef term) + { + if (LastFieldNumber != fieldNumber) + { + int cmp = FieldName(FieldInfos, LastFieldNumber).CompareTo(FieldName(FieldInfos, fieldNumber)); + // If there is a field named "" (empty string) then we + // will get 0 on this comparison, yet, it's "OK". But + // it's not OK if two different field numbers map to + // the same name. + if (cmp != 0 || LastFieldNumber != -1) + { + return cmp; + } + } + + ScratchBytes.CopyBytes(term); + Debug.Assert(LastTerm.Offset == 0); + UnicodeUtil.UTF8toUTF16(LastTerm.Bytes, 0, LastTerm.Length, Utf16Result1); + + Debug.Assert(ScratchBytes.Offset == 0); + UnicodeUtil.UTF8toUTF16(ScratchBytes.Bytes, 0, ScratchBytes.Length, Utf16Result2); + + int len; + if (Utf16Result1.Length < Utf16Result2.Length) + { + len = Utf16Result1.Length; + } + else + { + len = Utf16Result2.Length; + } + + for (int i = 0; i < len; i++) + { + char ch1 = Utf16Result1.Chars[i]; + char ch2 = Utf16Result2.Chars[i]; + if (ch1 != ch2) + { + return ch1 - ch2; + } + } + if (Utf16Result1.Length == 0 && LastFieldNumber == -1) + { + // If there is a field named "" (empty string) with a term text of "" (empty string) then we + // will get 0 on this comparison, yet, it's "OK". + return -1; + } + return Utf16Result1.Length - Utf16Result2.Length; + } + + /// <summary> + /// Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set. + /// Term must be lexicographically greater than all previous Terms added. + /// TermInfo pointers must be positive and greater than all previous. + /// </summary> + public void Add(int fieldNumber, BytesRef term, TermInfo ti) + { + Debug.Assert(CompareToLastTerm(fieldNumber, term) < 0 || (IsIndex && term.Length == 0 && LastTerm.Length == 0), "Terms are out of order: field=" + FieldName(FieldInfos, fieldNumber) + " (number " + fieldNumber + ")" + " lastField=" + FieldName(FieldInfos, LastFieldNumber) + " (number " + LastFieldNumber + ")" + " text=" + term.Utf8ToString() + " lastText=" + LastTerm.Utf8ToString()); + + Debug.Assert(ti.FreqPointer >= LastTi.FreqPointer, "freqPointer out of order (" + ti.FreqPointer + " < " + LastTi.FreqPointer + ")"); + Debug.Assert(ti.ProxPointer >= LastTi.ProxPointer, "proxPointer out of order (" + ti.ProxPointer + " < " + LastTi.ProxPointer + ")"); + + if (!IsIndex && Size % IndexInterval == 0) + { + Other.Add(LastFieldNumber, LastTerm, LastTi); // add an index term + } + WriteTerm(fieldNumber, term); // write term + + Output.WriteVInt32(ti.DocFreq); // write doc freq + Output.WriteVInt64(ti.FreqPointer - LastTi.FreqPointer); // write pointers + Output.WriteVInt64(ti.ProxPointer - LastTi.ProxPointer); + + if (ti.DocFreq >= SkipInterval) + { + Output.WriteVInt32(ti.SkipOffset); + } + + if (IsIndex) + { + Output.WriteVInt64(Other.Output.FilePointer - LastIndexPointer); + LastIndexPointer = Other.Output.FilePointer; // write pointer + } + + LastFieldNumber = fieldNumber; + LastTi.Set(ti); + Size++; + } + + private void WriteTerm(int fieldNumber, BytesRef term) + { + //System.out.println(" tiw.write field=" + fieldNumber + " term=" + term.utf8ToString()); + + // TODO: UTF16toUTF8 could tell us this prefix + // Compute prefix in common with last term: + int start = 0; + int limit = term.Length < LastTerm.Length ? term.Length : LastTerm.Length; + while (start < limit) + { + if (term.Bytes[start + term.Offset] != LastTerm.Bytes[start + LastTerm.Offset]) + { + break; + } + start++; + } + + int length = term.Length - start; + Output.WriteVInt32(start); // write shared prefix length + Output.WriteVInt32(length); // write delta length + Output.WriteBytes(term.Bytes, start + term.Offset, length); // write delta bytes + Output.WriteVInt32(fieldNumber); // write field num + LastTerm.CopyBytes(term); + } + + /// <summary> + /// Called to complete TermInfos creation. </summary> + public void Dispose() + { + try + { + Output.Seek(4); // write size after format + Output.WriteInt64(Size); + } + finally + { + try + { + Output.Dispose(); + } + finally + { + if (!IsIndex) + { + Other.Dispose(); + } + } + } + } + } +#pragma warning restore 612, 618 +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWCodec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWCodec.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWCodec.cs deleted file mode 100644 index 4d265d9..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWCodec.cs +++ /dev/null @@ -1,151 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; - - /// <summary> - /// Writes 3.x-like indexes (not perfect emulation yet) for testing only! - /// @lucene.experimental - /// </summary> -#pragma warning disable 612, 618 - public class PreFlexRWCodec : Lucene3xCodec - { - private readonly PostingsFormat Postings = new PreFlexRWPostingsFormat(); - private readonly Lucene3xNormsFormat Norms = new PreFlexRWNormsFormat(); - private readonly FieldInfosFormat FieldInfos = new PreFlexRWFieldInfosFormat(); - private readonly TermVectorsFormat TermVectors = new PreFlexRWTermVectorsFormat(); - private readonly SegmentInfoFormat SegmentInfos = new PreFlexRWSegmentInfoFormat(); - private readonly StoredFieldsFormat StoredFields = new PreFlexRWStoredFieldsFormat(); - private readonly bool _oldFormatImpersonationIsActive; - - /// <summary> - /// LUCENENET specific - /// Creates the codec with OldFormatImpersonationIsActive = true. - /// </summary> - /// <remarks> - /// Added so that SPIClassIterator can locate this Codec. The iterator - /// only recognises classes that have empty constructors. - /// </remarks> - public PreFlexRWCodec() - : this(true) - { } - - /// <summary> - /// </summary> - /// <param name="oldFormatImpersonationIsActive"> - /// LUCENENET specific - /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/> - /// </param> - public PreFlexRWCodec(bool oldFormatImpersonationIsActive) : base() - { - _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive; - } - - public override PostingsFormat PostingsFormat - { - get - { - if (_oldFormatImpersonationIsActive) - { - return Postings; - } - else - { - return base.PostingsFormat; - } - } - } - - public override NormsFormat NormsFormat - { - get - { - if (_oldFormatImpersonationIsActive) - { - return Norms; - } - else - { - return base.NormsFormat; - } - } - } - - public override SegmentInfoFormat SegmentInfoFormat - { - get - { - if (_oldFormatImpersonationIsActive) - { - return SegmentInfos; - } - else - { - return base.SegmentInfoFormat; - } - } - } - - public override FieldInfosFormat FieldInfosFormat - { - get - { - if (_oldFormatImpersonationIsActive) - { - return FieldInfos; - } - else - { - return base.FieldInfosFormat; - } - } - } - - public override TermVectorsFormat TermVectorsFormat - { - get - { - if (_oldFormatImpersonationIsActive) - { - return TermVectors; - } - else - { - return base.TermVectorsFormat; - } - } - } - - public override StoredFieldsFormat StoredFieldsFormat - { - get - { - if (_oldFormatImpersonationIsActive) - { - return StoredFields; - } - else - { - return base.StoredFieldsFormat; - } - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosFormat.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosFormat.cs deleted file mode 100644 index a02fe7f..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosFormat.cs +++ /dev/null @@ -1,45 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// <summary> - /// @lucene.internal - /// @lucene.experimental - /// </summary> -#pragma warning disable 612, 618 - internal class PreFlexRWFieldInfosFormat : Lucene3xFieldInfosFormat - { - public override FieldInfosReader FieldInfosReader - { - get - { - return new PreFlexRWFieldInfosReader(); - } - } - - public override FieldInfosWriter FieldInfosWriter - { - get - { - return new PreFlexRWFieldInfosWriter(); - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosReader.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosReader.cs deleted file mode 100644 index 458951e..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosReader.cs +++ /dev/null @@ -1,133 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - using System.Collections.Generic; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; - using Directory = Lucene.Net.Store.Directory; - using DocValuesType = Lucene.Net.Index.DocValuesType; - using FieldInfo = Lucene.Net.Index.FieldInfo; - using FieldInfos = Lucene.Net.Index.FieldInfos; - using IndexFileNames = Lucene.Net.Index.IndexFileNames; - using IndexFormatTooNewException = Lucene.Net.Index.IndexFormatTooNewException; - using IndexFormatTooOldException = Lucene.Net.Index.IndexFormatTooOldException; - using IndexInput = Lucene.Net.Store.IndexInput; - using IndexOptions = Lucene.Net.Index.IndexOptions; - using IOContext = Lucene.Net.Store.IOContext; - using SegmentInfo = Lucene.Net.Index.SegmentInfo; - - /// <summary> - /// @lucene.internal - /// @lucene.experimental - /// </summary> - internal class PreFlexRWFieldInfosReader : FieldInfosReader - { - internal const int FORMAT_MINIMUM = PreFlexRWFieldInfosWriter.FORMAT_START; - - public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) - { - string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); - IndexInput input = directory.OpenInput(fileName, iocontext); - - try - { - int format = input.ReadVInt32(); - - if (format > FORMAT_MINIMUM) - { - throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); - } - if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) - { - throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); - } - - int size = input.ReadVInt32(); //read in the size - FieldInfo[] infos = new FieldInfo[size]; - - for (int i = 0; i < size; i++) - { - string name = input.ReadString(); - int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt32() : i; - byte bits = input.ReadByte(); - bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; - bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; - bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; - bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; - IndexOptions? indexOptions; - if (!isIndexed) - { - indexOptions = null; - } - else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) - { - indexOptions = IndexOptions.DOCS_ONLY; - } - else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) - { - if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) - { - indexOptions = IndexOptions.DOCS_AND_FREQS; - } - else - { - throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); - } - } - else - { - indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - } - - // LUCENE-3027: past indices were able to write - // storePayloads=true when omitTFAP is also true, - // which is invalid. We correct that, here: - if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) - { - storePayloads = false; - } - - DocValuesType? normType = isIndexed && !omitNorms ? (DocValuesType?)DocValuesType.NUMERIC : null; - if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) - { - // RW can have norms but doesn't write them - normType = input.ReadByte() != 0 ? (DocValuesType?)DocValuesType.NUMERIC : null; - } - - infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null); - } - - if (input.FilePointer != input.Length) - { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length + " (resource: " + input + ")"); - } - return new FieldInfos(infos); - } - finally - { - input.Dispose(); - } - } - - public static void Files(Directory dir, SegmentInfo info, ISet<string> files) - { - files.Add(IndexFileNames.SegmentFileName(info.Name, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION)); - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosWriter.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosWriter.cs deleted file mode 100644 index e0fef49..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldInfosWriter.cs +++ /dev/null @@ -1,130 +0,0 @@ -using System.Diagnostics; - -namespace Lucene.Net.Codecs.Lucene3x -{ - using Directory = Lucene.Net.Store.Directory; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using FieldInfo = Lucene.Net.Index.FieldInfo; - using FieldInfos = Lucene.Net.Index.FieldInfos; - using IndexFileNames = Lucene.Net.Index.IndexFileNames; - using IndexOptions = Lucene.Net.Index.IndexOptions; - using IndexOutput = Lucene.Net.Store.IndexOutput; - using IOContext = Lucene.Net.Store.IOContext; - using IOUtils = Lucene.Net.Util.IOUtils; - - /// <summary> - /// @lucene.internal - /// @lucene.experimental - /// </summary> - internal class PreFlexRWFieldInfosWriter : FieldInfosWriter - { - // TODO move to test-framework preflex RW? - - /// <summary> - /// Extension of field infos </summary> - internal const string FIELD_INFOS_EXTENSION = "fnm"; - - // First used in 2.9; prior to 2.9 there was no format header - internal const int FORMAT_START = -2; - - // First used in 3.4: omit only positional information - internal const int FORMAT_OMIT_POSITIONS = -3; - - internal static readonly int FORMAT_PREFLEX_RW = int.MinValue; - - // whenever you add a new format, make it 1 smaller (negative version logic)! - internal const int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS; - - internal const sbyte IS_INDEXED = 0x1; - internal const sbyte STORE_TERMVECTOR = 0x2; - internal const sbyte OMIT_NORMS = 0x10; - internal const sbyte STORE_PAYLOADS = 0x20; - internal const sbyte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; - internal const sbyte OMIT_POSITIONS = -128; - - public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) - { - string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); - IndexOutput output = directory.CreateOutput(fileName, context); - bool success = false; - try - { - output.WriteVInt32(FORMAT_PREFLEX_RW); - output.WriteVInt32(infos.Count); - foreach (FieldInfo fi in infos) - { - sbyte bits = 0x0; - if (fi.HasVectors) - { - bits |= STORE_TERMVECTOR; - } - if (fi.OmitsNorms) - { - bits |= OMIT_NORMS; - } - if (fi.HasPayloads) - { - bits |= STORE_PAYLOADS; - } - if (fi.IsIndexed) - { - bits |= IS_INDEXED; - Debug.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); - if (fi.IndexOptions == IndexOptions.DOCS_ONLY) - { - bits |= OMIT_TERM_FREQ_AND_POSITIONS; - } - else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS) - { - bits |= OMIT_POSITIONS; - } - } - output.WriteString(fi.Name); - /* - * we need to write the field number since IW tries - * to stabelize the field numbers across segments so the - * FI ordinal is not necessarily equivalent to the field number - */ - output.WriteInt32(fi.Number); - output.WriteByte((byte)bits); - if (fi.IsIndexed && !fi.OmitsNorms) - { - // to allow null norm types we need to indicate if norms are written - // only in RW case - output.WriteByte((byte)(sbyte)(fi.NormType == null ? 0 : 1)); - } - Debug.Assert(fi.Attributes == null); // not used or supported - } - success = true; - } - finally - { - if (success) - { - output.Dispose(); - } - else - { - IOUtils.CloseWhileHandlingException(output); - } - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldsWriter.cs deleted file mode 100644 index b0c8174..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWFieldsWriter.cs +++ /dev/null @@ -1,269 +0,0 @@ -using System.Collections.Generic; -using System.Diagnostics; - -namespace Lucene.Net.Codecs.Lucene3x -{ - using BytesRef = Lucene.Net.Util.BytesRef; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; - using FieldInfo = Lucene.Net.Index.FieldInfo; - using IndexFileNames = Lucene.Net.Index.IndexFileNames; - using IndexOptions = Lucene.Net.Index.IndexOptions; - using IndexOutput = Lucene.Net.Store.IndexOutput; - using IOUtils = Lucene.Net.Util.IOUtils; - using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; - -#pragma warning disable 612, 618 - internal class PreFlexRWFieldsWriter : FieldsConsumer - { - private readonly TermInfosWriter TermsOut; - private readonly IndexOutput FreqOut; - private readonly IndexOutput ProxOut; - private readonly PreFlexRWSkipListWriter SkipListWriter; - private readonly int TotalNumDocs; - - public PreFlexRWFieldsWriter(SegmentWriteState state) - { - TermsOut = new TermInfosWriter(state.Directory, state.SegmentInfo.Name, state.FieldInfos, state.TermIndexInterval); - - bool success = false; - try - { - string freqFile = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "", Lucene3xPostingsFormat.FREQ_EXTENSION); - FreqOut = state.Directory.CreateOutput(freqFile, state.Context); - TotalNumDocs = state.SegmentInfo.DocCount; - success = true; - } - finally - { - if (!success) - { - IOUtils.CloseWhileHandlingException(TermsOut); - } - } - - success = false; - try - { - if (state.FieldInfos.HasProx) - { - string proxFile = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "", Lucene3xPostingsFormat.PROX_EXTENSION); - ProxOut = state.Directory.CreateOutput(proxFile, state.Context); - } - else - { - ProxOut = null; - } - success = true; - } - finally - { - if (!success) - { - IOUtils.CloseWhileHandlingException(TermsOut, FreqOut); - } - } - - SkipListWriter = new PreFlexRWSkipListWriter(TermsOut.SkipInterval, TermsOut.MaxSkipLevels, TotalNumDocs, FreqOut, ProxOut); - //System.out.println("\nw start seg=" + segment); - } - - public override TermsConsumer AddField(FieldInfo field) - { - Debug.Assert(field.Number != -1); - if (field.IndexOptions >= IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) - { - throw new System.NotSupportedException("this codec cannot index offsets"); - } - //System.out.println("w field=" + field.Name + " storePayload=" + field.storePayloads + " number=" + field.number); - return new PreFlexTermsWriter(this, field); - } - - public override void Dispose() - { - IOUtils.Close(TermsOut, FreqOut, ProxOut); - } - - private class PreFlexTermsWriter : TermsConsumer - { - internal virtual void InitializeInstanceFields() - { - postingsWriter = new PostingsWriter(this); - } - - private readonly PreFlexRWFieldsWriter OuterInstance; - - internal readonly FieldInfo FieldInfo; - internal readonly bool OmitTF; - internal readonly bool StorePayloads; - - internal readonly TermInfo TermInfo = new TermInfo(); - internal PostingsWriter postingsWriter; - - public PreFlexTermsWriter(PreFlexRWFieldsWriter outerInstance, FieldInfo fieldInfo) - { - this.OuterInstance = outerInstance; - - InitializeInstanceFields(); - this.FieldInfo = fieldInfo; - OmitTF = fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY; - StorePayloads = fieldInfo.HasPayloads; - } - - internal class PostingsWriter : PostingsConsumer - { - private readonly PreFlexRWFieldsWriter.PreFlexTermsWriter OuterInstance; - - public PostingsWriter(PreFlexRWFieldsWriter.PreFlexTermsWriter outerInstance) - { - this.OuterInstance = outerInstance; - } - - internal int LastDocID; - internal int LastPayloadLength = -1; - internal int LastPosition; - internal int Df; - - public PostingsWriter Reset() - { - Df = 0; - LastDocID = 0; - LastPayloadLength = -1; - return this; - } - - public override void StartDoc(int docID, int termDocFreq) - { - //System.out.println(" w doc=" + docID); - - int delta = docID - LastDocID; - if (docID < 0 || (Df > 0 && delta <= 0)) - { - throw new CorruptIndexException("docs out of order (" + docID + " <= " + LastDocID + " )"); - } - - if ((++Df % OuterInstance.OuterInstance.TermsOut.SkipInterval) == 0) - { - OuterInstance.OuterInstance.SkipListWriter.SetSkipData(LastDocID, OuterInstance.StorePayloads, LastPayloadLength); - OuterInstance.OuterInstance.SkipListWriter.BufferSkip(Df); - } - - LastDocID = docID; - - Debug.Assert(docID < OuterInstance.OuterInstance.TotalNumDocs, "docID=" + docID + " totalNumDocs=" + OuterInstance.OuterInstance.TotalNumDocs); - - if (OuterInstance.OmitTF) - { - OuterInstance.OuterInstance.FreqOut.WriteVInt32(delta); - } - else - { - int code = delta << 1; - if (termDocFreq == 1) - { - OuterInstance.OuterInstance.FreqOut.WriteVInt32(code | 1); - } - else - { - OuterInstance.OuterInstance.FreqOut.WriteVInt32(code); - OuterInstance.OuterInstance.FreqOut.WriteVInt32(termDocFreq); - } - } - LastPosition = 0; - } - - public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) - { - Debug.Assert(OuterInstance.OuterInstance.ProxOut != null); - Debug.Assert(startOffset == -1); - Debug.Assert(endOffset == -1); - //System.out.println(" w pos=" + position + " payl=" + payload); - int delta = position - LastPosition; - LastPosition = position; - - if (OuterInstance.StorePayloads) - { - int payloadLength = payload == null ? 0 : payload.Length; - if (payloadLength != LastPayloadLength) - { - //System.out.println(" write payload len=" + payloadLength); - LastPayloadLength = payloadLength; - OuterInstance.OuterInstance.ProxOut.WriteVInt32((delta << 1) | 1); - OuterInstance.OuterInstance.ProxOut.WriteVInt32(payloadLength); - } - else - { - OuterInstance.OuterInstance.ProxOut.WriteVInt32(delta << 1); - } - if (payloadLength > 0) - { - OuterInstance.OuterInstance.ProxOut.WriteBytes(payload.Bytes, payload.Offset, payload.Length); - } - } - else - { - OuterInstance.OuterInstance.ProxOut.WriteVInt32(delta); - } - } - - public override void FinishDoc() - { - } - } - - public override PostingsConsumer StartTerm(BytesRef text) - { - //System.out.println(" w term=" + text.utf8ToString()); - OuterInstance.SkipListWriter.ResetSkip(); - TermInfo.FreqPointer = OuterInstance.FreqOut.FilePointer; - if (OuterInstance.ProxOut != null) - { - TermInfo.ProxPointer = OuterInstance.ProxOut.FilePointer; - } - return postingsWriter.Reset(); - } - - public override void FinishTerm(BytesRef text, TermStats stats) - { - if (stats.DocFreq > 0) - { - long skipPointer = OuterInstance.SkipListWriter.WriteSkip(OuterInstance.FreqOut); - TermInfo.DocFreq = stats.DocFreq; - TermInfo.SkipOffset = (int)(skipPointer - TermInfo.FreqPointer); - //System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number); - OuterInstance.TermsOut.Add(FieldInfo.Number, text, TermInfo); - } - } - - public override void Finish(long sumTotalTermCount, long sumDocFreq, int docCount) - { - } - - public override IComparer<BytesRef> Comparer - { - get - { - return BytesRef.UTF8SortedAsUTF16Comparer; - } - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsConsumer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsConsumer.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsConsumer.cs deleted file mode 100644 index 2a91121..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsConsumer.cs +++ /dev/null @@ -1,116 +0,0 @@ -using System; -using System.Diagnostics; - -namespace Lucene.Net.Codecs.Lucene3x -{ - using System.Collections.Generic; - using BytesRef = Lucene.Net.Util.BytesRef; - using Directory = Lucene.Net.Store.Directory; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using FieldInfo = Lucene.Net.Index.FieldInfo; - using IndexFileNames = Lucene.Net.Index.IndexFileNames; - using IndexOutput = Lucene.Net.Store.IndexOutput; - using IOContext = Lucene.Net.Store.IOContext; - using IOUtils = Lucene.Net.Util.IOUtils; - - /// <summary> - /// Writes and Merges Lucene 3.x norms format - /// @lucene.experimental - /// </summary> - internal class PreFlexRWNormsConsumer : DocValuesConsumer - { - /// <summary> - /// norms header placeholder </summary> - private static readonly sbyte[] NORMS_HEADER = new sbyte[] { (sbyte)'N', (sbyte)'R', (sbyte)'M', -1 }; - - /// <summary> - /// Extension of norms file </summary> - private const string NORMS_EXTENSION = "nrm"; - - /// <summary> - /// Extension of separate norms file </summary> - /// @deprecated Only for reading existing 3.x indexes - [Obsolete("Only for reading existing 3.x indexes")] - private const string SEPARATE_NORMS_EXTENSION = "s"; - - private readonly IndexOutput @out; - private int LastFieldNumber = -1; // only for assert - - public PreFlexRWNormsConsumer(Directory directory, string segment, IOContext context) - { - string normsFileName = IndexFileNames.SegmentFileName(segment, "", NORMS_EXTENSION); - bool success = false; - IndexOutput output = null; - try - { - output = directory.CreateOutput(normsFileName, context); - // output.WriteBytes(NORMS_HEADER, 0, NORMS_HEADER.Length); - foreach (var @sbyte in NORMS_HEADER) - { - output.WriteByte((byte)@sbyte); - } - @out = output; - success = true; - } - finally - { - if (!success) - { - IOUtils.CloseWhileHandlingException(output); - } - } - } - - public override void AddNumericField(FieldInfo field, IEnumerable<long?> values) - { - Debug.Assert(field.Number > LastFieldNumber, "writing norms fields out of order" + LastFieldNumber + " -> " + field.Number); - foreach (var n in values) - { - if (((sbyte)(byte)(long)n) < sbyte.MinValue || ((sbyte)(byte)(long)n) > sbyte.MaxValue) - { - throw new System.NotSupportedException("3.x cannot index norms that won't fit in a byte, got: " + ((sbyte)(byte)(long)n)); - } - @out.WriteByte((byte)(sbyte)n); - } - LastFieldNumber = field.Number; - } - - protected override void Dispose(bool disposing) - { - if (disposing) - IOUtils.Close(@out); - } - - public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) - { - throw new InvalidOperationException(); - } - - public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd) - { - throw new InvalidOperationException(); - } - - public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords) - { - throw new InvalidOperationException(); - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsFormat.cs deleted file mode 100644 index d85d5d3..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWNormsFormat.cs +++ /dev/null @@ -1,35 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; - - /// <summary> - /// @lucene.internal - /// @lucene.experimental - /// </summary> -#pragma warning disable 612, 618 - internal class PreFlexRWNormsFormat : Lucene3xNormsFormat - { - public override DocValuesConsumer NormsConsumer(SegmentWriteState state) - { - return new PreFlexRWNormsConsumer(state.Directory, state.SegmentInfo.Name, state.Context); - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWPostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWPostingsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWPostingsFormat.cs deleted file mode 100644 index 962d95c..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWPostingsFormat.cs +++ /dev/null @@ -1,87 +0,0 @@ -using System; -using System.Reflection; -using System.Diagnostics; - -namespace Lucene.Net.Codecs.Lucene3x -{ - using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; - using SegmentReadState = Lucene.Net.Index.SegmentReadState; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; - - /// <summary> - /// Codec, only for testing, that can write and read the - /// pre-flex index format. - /// - /// @lucene.experimental - /// </summary> -#pragma warning disable 612, 618 - internal class PreFlexRWPostingsFormat : Lucene3xPostingsFormat - { - public PreFlexRWPostingsFormat() - { - // NOTE: we impersonate the PreFlex codec so that it can - // read the segments we write! - } - - public override FieldsConsumer FieldsConsumer(SegmentWriteState state) - { - return new PreFlexRWFieldsWriter(state); - } - - public override FieldsProducer FieldsProducer(SegmentReadState state) - { - // Whenever IW opens readers, eg for merging, we have to - // keep terms order in UTF16: - - return new Lucene3xFieldsAnonymousInnerClassHelper(this, state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.TermsIndexDivisor); - } - - private class Lucene3xFieldsAnonymousInnerClassHelper : Lucene3xFields - { - private readonly PreFlexRWPostingsFormat OuterInstance; - - public Lucene3xFieldsAnonymousInnerClassHelper(PreFlexRWPostingsFormat outerInstance, Store.Directory directory, Index.FieldInfos fieldInfos, Index.SegmentInfo segmentInfo, Store.IOContext context, int termsIndexDivisor) - : base(directory, fieldInfos, segmentInfo, context, termsIndexDivisor) - { - this.OuterInstance = outerInstance; - } - - protected internal override bool SortTermsByUnicode() - { - // We carefully peek into stack track above us: if - // we are part of a "merge", we must sort by UTF16: - bool unicodeSortOrder = true; - - if(Util.StackTraceHelper.DoesStackTraceContainMethod("Merge")) - { - unicodeSortOrder = false; - if (LuceneTestCase.VERBOSE) - { - Console.WriteLine("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order"); - } - } - - return unicodeSortOrder; - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoFormat.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoFormat.cs deleted file mode 100644 index 86d7e4d..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoFormat.cs +++ /dev/null @@ -1,37 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// <summary> - /// @lucene.experimental - /// </summary> -#pragma warning disable 612, 618 - internal class PreFlexRWSegmentInfoFormat : Lucene3xSegmentInfoFormat - { - private readonly SegmentInfoWriter Writer = new PreFlexRWSegmentInfoWriter(); - - public override SegmentInfoWriter SegmentInfoWriter - { - get - { - return Writer; - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoWriter.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoWriter.cs deleted file mode 100644 index 3019c51..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSegmentInfoWriter.cs +++ /dev/null @@ -1,47 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - using Directory = Lucene.Net.Store.Directory; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using FieldInfos = Lucene.Net.Index.FieldInfos; - using IOContext = Lucene.Net.Store.IOContext; - using SegmentInfo = Lucene.Net.Index.SegmentInfo; - using SegmentInfos = Lucene.Net.Index.SegmentInfos; - - /// <summary> - /// PreFlex implementation of <seealso cref="SegmentInfoWriter"/>. - /// @lucene.experimental - /// </summary> -#pragma warning disable 612, 618 - internal class PreFlexRWSegmentInfoWriter : SegmentInfoWriter - { - // NOTE: this is not "really" 3.x format, because we are - // writing each SI to its own file, vs 3.x where the list - // of segments and SI for each segment is written into a - // single segments_N file - - /// <summary> - /// Save a single segment's info. </summary> - public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) - { - SegmentInfos.Write3xInfo(dir, si, ioContext); - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSkipListWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSkipListWriter.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSkipListWriter.cs deleted file mode 100644 index 0ce2d24..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWSkipListWriter.cs +++ /dev/null @@ -1,138 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - using Lucene.Net.Support; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using IndexOutput = Lucene.Net.Store.IndexOutput; - - /// <summary> - /// PreFlexRW skiplist implementation. - /// @lucene.experimental - /// </summary> - public class PreFlexRWSkipListWriter : MultiLevelSkipListWriter - { - private int[] LastSkipDoc; - private int[] LastSkipPayloadLength; - private long[] LastSkipFreqPointer; - private long[] LastSkipProxPointer; - - private IndexOutput FreqOutput; - private IndexOutput ProxOutput; - - private int CurDoc; - private bool CurStorePayloads; - private int CurPayloadLength; - private long CurFreqPointer; - private long CurProxPointer; - - public PreFlexRWSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) - : base(skipInterval, numberOfSkipLevels, docCount) - { - this.FreqOutput = freqOutput; - this.ProxOutput = proxOutput; - - LastSkipDoc = new int[numberOfSkipLevels]; - LastSkipPayloadLength = new int[numberOfSkipLevels]; - LastSkipFreqPointer = new long[numberOfSkipLevels]; - LastSkipProxPointer = new long[numberOfSkipLevels]; - } - - /// <summary> - /// Sets the values for the current skip data. - /// </summary> - public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength) - { - this.CurDoc = doc; - this.CurStorePayloads = storePayloads; - this.CurPayloadLength = payloadLength; - this.CurFreqPointer = FreqOutput.FilePointer; - if (ProxOutput != null) - { - this.CurProxPointer = ProxOutput.FilePointer; - } - } - - public override void ResetSkip() - { - base.ResetSkip(); - Arrays.Fill(LastSkipDoc, 0); - Arrays.Fill(LastSkipPayloadLength, -1); // we don't have to write the first length in the skip list - Arrays.Fill(LastSkipFreqPointer, FreqOutput.FilePointer); - if (ProxOutput != null) - { - Arrays.Fill(LastSkipProxPointer, ProxOutput.FilePointer); - } - } - - protected override void WriteSkipData(int level, IndexOutput skipBuffer) - { - // To efficiently store payloads in the posting lists we do not store the length of - // every payload. Instead we omit the length for a payload if the previous payload had - // the same length. - // However, in order to support skipping the payload length at every skip point must be known. - // So we use the same length encoding that we use for the posting lists for the skip data as well: - // Case 1: current field does not store payloads - // SkipDatum --> DocSkip, FreqSkip, ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // DocSkip records the document number before every SkipInterval th document in TermFreqs. - // Document numbers are represented as differences from the previous value in the sequence. - // Case 2: current field stores payloads - // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // PayloadLength --> VInt - // In this case DocSkip/2 is the difference between - // the current and the previous value. If DocSkip - // is odd, then a PayloadLength encoded as VInt follows, - // if DocSkip is even, then it is assumed that the - // current payload length equals the length at the previous - // skip point - if (CurStorePayloads) - { - int delta = CurDoc - LastSkipDoc[level]; - if (CurPayloadLength == LastSkipPayloadLength[level]) - { - // the current payload length equals the length at the previous skip point, - // so we don't store the length again - skipBuffer.WriteVInt32(delta * 2); - } - else - { - // the payload length is different from the previous one. We shift the DocSkip, - // set the lowest bit and store the current payload length as VInt. - skipBuffer.WriteVInt32(delta * 2 + 1); - skipBuffer.WriteVInt32(CurPayloadLength); - LastSkipPayloadLength[level] = CurPayloadLength; - } - } - else - { - // current field does not store payloads - skipBuffer.WriteVInt32(CurDoc - LastSkipDoc[level]); - } - - skipBuffer.WriteVInt32((int)(CurFreqPointer - LastSkipFreqPointer[level])); - skipBuffer.WriteVInt32((int)(CurProxPointer - LastSkipProxPointer[level])); - - LastSkipDoc[level] = CurDoc; - - LastSkipFreqPointer[level] = CurFreqPointer; - LastSkipProxPointer[level] = CurProxPointer; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsFormat.cs deleted file mode 100644 index 63ffc4a..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsFormat.cs +++ /dev/null @@ -1,34 +0,0 @@ -namespace Lucene.Net.Codecs.Lucene3x -{ - using Directory = Lucene.Net.Store.Directory; - using IOContext = Lucene.Net.Store.IOContext; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using SegmentInfo = Lucene.Net.Index.SegmentInfo; - -#pragma warning disable 612, 618 - internal class PreFlexRWStoredFieldsFormat : Lucene3xStoredFieldsFormat - { - public override StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) - { - return new PreFlexRWStoredFieldsWriter(directory, segmentInfo.Name, context); - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsWriter.cs deleted file mode 100644 index 628564a..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWStoredFieldsWriter.cs +++ /dev/null @@ -1,214 +0,0 @@ -using System; -using System.Diagnostics; - -namespace Lucene.Net.Codecs.Lucene3x -{ - using Lucene.Net.Support; - using BytesRef = Lucene.Net.Util.BytesRef; - using Directory = Lucene.Net.Store.Directory; - - /// <summary> - /// Copyright 2004 The Apache Software Foundation - /// - /// Licensed under the Apache License, Version 2.0 (the "License"); you may not - /// use this file except in compliance with the License. You may obtain a copy of - /// the License at - /// - /// http://www.apache.org/licenses/LICENSE-2.0 - /// - /// Unless required by applicable law or agreed to in writing, software - /// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - /// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - /// License for the specific language governing permissions and limitations under - /// the License. - /// </summary> - - using FieldInfo = Lucene.Net.Index.FieldInfo; - using FieldInfos = Lucene.Net.Index.FieldInfos; - using IIndexableField = Lucene.Net.Index.IIndexableField; - using IndexFileNames = Lucene.Net.Index.IndexFileNames; - using IndexOutput = Lucene.Net.Store.IndexOutput; - using IOContext = Lucene.Net.Store.IOContext; - using IOUtils = Lucene.Net.Util.IOUtils; - - /// <summary> - /// @lucene.experimental </summary> -#pragma warning disable 612, 618 - internal sealed class PreFlexRWStoredFieldsWriter : StoredFieldsWriter - { - private readonly Directory Directory; - private readonly string Segment; - private IndexOutput FieldsStream; - private IndexOutput IndexStream; - - public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context) - { - Debug.Assert(directory != null); - this.Directory = directory; - this.Segment = segment; - - bool success = false; - try - { - FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); - IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); - - FieldsStream.WriteInt32(Lucene3xStoredFieldsReader.FORMAT_CURRENT); - IndexStream.WriteInt32(Lucene3xStoredFieldsReader.FORMAT_CURRENT); - - success = true; - } - finally - { - if (!success) - { - Abort(); - } - } - } - - // Writes the contents of buffer into the fields stream - // and adds a new entry for this document into the index - // stream. this assumes the buffer was already written - // in the correct fields format. - public override void StartDocument(int numStoredFields) - { - IndexStream.WriteInt64(FieldsStream.FilePointer); - FieldsStream.WriteVInt32(numStoredFields); - } - - protected override void Dispose(bool disposing) - { - if (disposing) - { - try - { - IOUtils.Close(FieldsStream, IndexStream); - } - finally - { - FieldsStream = IndexStream = null; - } - } - } - - public override void Abort() - { - try - { - Dispose(); - } -#pragma warning disable 168 - catch (Exception ignored) -#pragma warning restore 168 - { - } - IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); - } - - public override void WriteField(FieldInfo info, IIndexableField field) - { - FieldsStream.WriteVInt32(info.Number); - int bits = 0; - BytesRef bytes; - string @string; - // TODO: maybe a field should serialize itself? - // this way we don't bake into indexer all these - // specific encodings for different fields? and apps - // can customize... - - object number = field.GetNumericValue(); - if (number != null) - { - if (number is sbyte? || number is short? || number is int?) - { - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT; - } - else if (number is long?) - { - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG; - } - else if (number is float?) - { - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT; - } - else if (number is double?) - { - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE; - } - else - { - throw new System.ArgumentException("cannot store numeric type " + number.GetType()); - } - @string = null; - bytes = null; - } - else - { - bytes = field.GetBinaryValue(); - if (bytes != null) - { - bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY; - @string = null; - } - else - { - @string = field.GetStringValue(); - if (@string == null) - { - throw new System.ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); - } - } - } - - FieldsStream.WriteByte((byte)(sbyte)bits); - - if (bytes != null) - { - FieldsStream.WriteVInt32(bytes.Length); - FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); - } - else if (@string != null) - { - FieldsStream.WriteString(field.GetStringValue()); - } - else - { - if (number is sbyte? || number is short? || number is int?) - { - FieldsStream.WriteInt32((int)number); - } - else if (number is long?) - { - FieldsStream.WriteInt64((long)number); - } - else if (number is float?) - { - FieldsStream.WriteInt32(Number.SingleToInt32Bits((float)number)); - } - else if (number is double?) - { - FieldsStream.WriteInt64(BitConverter.DoubleToInt64Bits((double)number)); - } - else - { - Debug.Assert(false); - } - } - } - - public override void Finish(FieldInfos fis, int numDocs) - { - if (4 + ((long)numDocs) * 8 != IndexStream.FilePointer) - // this is most likely a bug in Sun JRE 1.6.0_04/_05; - // we detect that the bug has struck, here, and - // throw an exception to prevent the corruption from - // entering the index. See LUCENE-1282 for - // details. - { - throw new Exception("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + IndexStream.FilePointer + " file=" + IndexStream.ToString() + "; now aborting this merge to prevent index corruption"); - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8304ca82/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWTermVectorsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWTermVectorsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWTermVectorsFormat.cs deleted file mode 100644 index 871ee07..0000000 --- a/src/Lucene.Net.TestFramework/Codecs/lucene3x/PreFlexRWTermVectorsFormat.cs +++ /dev/null @@ -1,74 +0,0 @@ -using System; -using System.Diagnostics; - -namespace Lucene.Net.Codecs.Lucene3x -{ - using Directory = Lucene.Net.Store.Directory; - - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using FieldInfos = Lucene.Net.Index.FieldInfos; - using IOContext = Lucene.Net.Store.IOContext; - using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; - using SegmentInfo = Lucene.Net.Index.SegmentInfo; - -#pragma warning disable 612, 618 - internal class PreFlexRWTermVectorsFormat : Lucene3xTermVectorsFormat - { - public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) - { - return new PreFlexRWTermVectorsWriter(directory, segmentInfo.Name, context); - } - - public override TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) - { - return new Lucene3xTermVectorsReaderAnonymousInnerClassHelper(this, directory, segmentInfo, fieldInfos, context); - } - - private class Lucene3xTermVectorsReaderAnonymousInnerClassHelper : Lucene3xTermVectorsReader - { - private readonly PreFlexRWTermVectorsFormat OuterInstance; - - public Lucene3xTermVectorsReaderAnonymousInnerClassHelper(PreFlexRWTermVectorsFormat outerInstance, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) - : base(directory, segmentInfo, fieldInfos, context) - { - this.OuterInstance = outerInstance; - } - - protected internal override bool SortTermsByUnicode() - { - - // We carefully peek into stack track above us: if - // we are part of a "merge", we must sort by UTF16: - bool unicodeSortOrder = true; - - if (Util.StackTraceHelper.DoesStackTraceContainMethod("Merge")) - { - unicodeSortOrder = false; - if (LuceneTestCase.VERBOSE) - { - Console.WriteLine("NOTE: PreFlexRW codec: forcing legacy UTF16 vector term sort order"); - } - } - - return unicodeSortOrder; - } - } - } -#pragma warning restore 612, 618 -} \ No newline at end of file
