http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/DocFieldProcessorPerThread.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/DocFieldProcessorPerThread.cs b/src/Lucene.Net.Core/Index/DocFieldProcessorPerThread.cs deleted file mode 100644 index 45eaff3..0000000 --- a/src/Lucene.Net.Core/Index/DocFieldProcessorPerThread.cs +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Documents; -using Lucene.Net.Support; -using Document = Lucene.Net.Documents.Document; -using ArrayUtil = Lucene.Net.Util.ArrayUtil; - -namespace Lucene.Net.Index -{ - - /// <summary> Gathers all Fieldables for a document under the same - /// name, updates FieldInfos, and calls per-field consumers - /// to process field by field. - /// - /// Currently, only a single thread visits the fields, - /// sequentially, for processing. - /// </summary> - - sealed class DocFieldProcessorPerThread:DocConsumerPerThread - { - private void InitBlock() - { - docFreeList = new PerDoc[1]; - } - - internal float docBoost; - internal int fieldGen; - internal DocFieldProcessor docFieldProcessor; - internal FieldInfos fieldInfos; - internal DocFieldConsumerPerThread consumer; - - // Holds all fields seen in current doc - internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; - internal int fieldCount; - - // Hash table for all fields ever seen - internal DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; - internal int hashMask = 1; - internal int totalFieldCount; - - internal StoredFieldsWriterPerThread fieldsWriter; - - internal DocumentsWriter.DocState docState; - - public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) - { - InitBlock(); - this.docState = threadState.docState; - this.docFieldProcessor = docFieldProcessor; - this.fieldInfos = docFieldProcessor.fieldInfos; - this.consumer = docFieldProcessor.consumer.AddThread(this); - fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState); - } - - public override void Abort() - { - for (int i = 0; i < fieldHash.Length; i++) - { - DocFieldProcessorPerField field = fieldHash[i]; - while (field != null) - { - DocFieldProcessorPerField next = field.next; - field.Abort(); - field = next; - } - } - fieldsWriter.Abort(); - consumer.Abort(); - } - - public System.Collections.Generic.ICollection<DocFieldConsumerPerField> Fields() - { - System.Collections.Generic.ICollection<DocFieldConsumerPerField> fields = - new System.Collections.Generic.HashSet<DocFieldConsumerPerField>(); - for (int i = 0; i < fieldHash.Length; i++) - { - DocFieldProcessorPerField field = fieldHash[i]; - while (field != null) - { - fields.Add(field.consumer); - field = field.next; - } - } - System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount); - return fields; - } - - /// <summary>If there are fields we've seen but did not see again - /// in the last run, then free them up. - /// </summary> - - internal void TrimFields(SegmentWriteState state) - { - - for (int i = 0; i < fieldHash.Length; i++) - { - DocFieldProcessorPerField perField = fieldHash[i]; - DocFieldProcessorPerField lastPerField = null; - - while (perField != null) - { - - if (perField.lastGen == - 1) - { - - // This field was not seen since the previous - // flush, so, free up its resources now - - // Unhash - if (lastPerField == null) - fieldHash[i] = perField.next; - else - lastPerField.next = perField.next; - - if (state.docWriter.infoStream != null) - state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name); - - totalFieldCount--; - } - else - { - // Reset - perField.lastGen = - 1; - lastPerField = perField; - } - - perField = perField.next; - } - } - } - - private void Rehash() - { - int newHashSize = (fieldHash.Length * 2); - System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length); - - DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize]; - - // Rehash - int newHashMask = newHashSize - 1; - for (int j = 0; j < fieldHash.Length; j++) - { - DocFieldProcessorPerField fp0 = fieldHash[j]; - while (fp0 != null) - { - int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask; - DocFieldProcessorPerField nextFP0 = fp0.next; - fp0.next = newHashArray[hashPos2]; - newHashArray[hashPos2] = fp0; - fp0 = nextFP0; - } - } - - fieldHash = newHashArray; - hashMask = newHashMask; - } - - public override DocumentsWriter.DocWriter ProcessDocument() - { - - consumer.StartDocument(); - fieldsWriter.StartDocument(); - - Document doc = docState.doc; - - System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); - - fieldCount = 0; - - int thisFieldGen = fieldGen++; - - System.Collections.Generic.IList<IFieldable> docFields = doc.GetFields(); - int numDocFields = docFields.Count; - - // Absorb any new fields first seen in this document. - // Also absorb any changes to fields we had already - // seen before (eg suddenly turning on norms or - // vectors, etc.): - - for (int i = 0; i < numDocFields; i++) - { - IFieldable field = docFields[i]; - System.String fieldName = field.Name; - - // Make sure we have a PerField allocated - int hashPos = fieldName.GetHashCode() & hashMask; - DocFieldProcessorPerField fp = fieldHash[hashPos]; - while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) - fp = fp.next; - - if (fp == null) - { - - // TODO FI: we need to genericize the "flags" that a - // field holds, and, how these flags are merged; it - // needs to be more "pluggable" such that if I want - // to have a new "thing" my Fields can do, I can - // easily add it - FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored, - field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, - field.OmitNorms, false, field.OmitTermFreqAndPositions); - - fp = new DocFieldProcessorPerField(this, fi); - fp.next = fieldHash[hashPos]; - fieldHash[hashPos] = fp; - totalFieldCount++; - - if (totalFieldCount >= fieldHash.Length / 2) - Rehash(); - } - else - { - fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored, - field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, - field.OmitNorms, false, field.OmitTermFreqAndPositions); - } - - if (thisFieldGen != fp.lastGen) - { - - // First time we're seeing this field for this doc - fp.fieldCount = 0; - - if (fieldCount == fields.Length) - { - int newSize = fields.Length * 2; - DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; - Array.Copy(fields, 0, newArray, 0, fieldCount); - fields = newArray; - } - - fields[fieldCount++] = fp; - fp.lastGen = thisFieldGen; - } - - if (fp.fieldCount == fp.fields.Length) - { - IFieldable[] newArray = new IFieldable[fp.fields.Length * 2]; - Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); - fp.fields = newArray; - } - - fp.fields[fp.fieldCount++] = field; - if (field.IsStored) - { - fieldsWriter.AddField(field, fp.fieldInfo); - } - } - - // If we are writing vectors then we must visit - // fields in sorted order so they are written in - // sorted order. TODO: we actually only need to - // sort the subset of fields that have vectors - // enabled; we could save [small amount of] CPU - // here. - QuickSort(fields, 0, fieldCount - 1); - - for (int i = 0; i < fieldCount; i++) - fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); - - if (docState.maxTermPrefix != null && docState.infoStream != null) - { - docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); - docState.maxTermPrefix = null; - } - - DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); - DocumentsWriter.DocWriter two = consumer.FinishDocument(); - if (one == null) - { - return two; - } - else if (two == null) - { - return one; - } - else - { - PerDoc both = GetPerDoc(); - both.docID = docState.docID; - System.Diagnostics.Debug.Assert(one.docID == docState.docID); - System.Diagnostics.Debug.Assert(two.docID == docState.docID); - both.one = one; - both.two = two; - return both; - } - } - - internal void QuickSort(DocFieldProcessorPerField[] array, int lo, int hi) - { - if (lo >= hi) - return ; - else if (hi == 1 + lo) - { - if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0) - { - DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[hi]; - array[hi] = tmp; - } - return ; - } - - int mid = Number.URShift((lo + hi), 1); - - if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0) - { - DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp; - } - - if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0) - { - DocFieldProcessorPerField tmp = array[mid]; - array[mid] = array[hi]; - array[hi] = tmp; - - if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0) - { - DocFieldProcessorPerField tmp2 = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp2; - } - } - - int left = lo + 1; - int right = hi - 1; - - if (left >= right) - return ; - - DocFieldProcessorPerField partition = array[mid]; - - for (; ; ) - { - while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0) - --right; - - while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0) - ++left; - - if (left < right) - { - DocFieldProcessorPerField tmp = array[left]; - array[left] = array[right]; - array[right] = tmp; - --right; - } - else - { - break; - } - } - - QuickSort(array, lo, left); - QuickSort(array, left + 1, hi); - } - - internal PerDoc[] docFreeList; - internal int freeCount; - internal int allocCount; - - internal PerDoc GetPerDoc() - { - lock (this) - { - if (freeCount == 0) - { - allocCount++; - if (allocCount > docFreeList.Length) - { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); - docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; - } - return new PerDoc(this); - } - else - return docFreeList[--freeCount]; - } - } - - internal void FreePerDoc(PerDoc perDoc) - { - lock (this) - { - System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); - docFreeList[freeCount++] = perDoc; - } - } - - internal class PerDoc:DocumentsWriter.DocWriter - { - public PerDoc(DocFieldProcessorPerThread enclosingInstance) - { - InitBlock(enclosingInstance); - } - private void InitBlock(DocFieldProcessorPerThread enclosingInstance) - { - this.enclosingInstance = enclosingInstance; - } - private DocFieldProcessorPerThread enclosingInstance; - public DocFieldProcessorPerThread Enclosing_Instance - { - get - { - return enclosingInstance; - } - - } - - internal DocumentsWriter.DocWriter one; - internal DocumentsWriter.DocWriter two; - - public override long SizeInBytes() - { - return one.SizeInBytes() + two.SizeInBytes(); - } - - public override void Finish() - { - try - { - try - { - one.Finish(); - } - finally - { - two.Finish(); - } - } - finally - { - Enclosing_Instance.FreePerDoc(this); - } - } - - public override void Abort() - { - try - { - try - { - one.Abort(); - } - finally - { - two.Abort(); - } - } - finally - { - Enclosing_Instance.FreePerDoc(this); - } - } - } - } -} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/DocInverterPerThread.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/DocInverterPerThread.cs b/src/Lucene.Net.Core/Index/DocInverterPerThread.cs deleted file mode 100644 index afa6d14..0000000 --- a/src/Lucene.Net.Core/Index/DocInverterPerThread.cs +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Analysis.Tokenattributes; -using Lucene.Net.Util; -using TokenStream = Lucene.Net.Analysis.TokenStream; - -namespace Lucene.Net.Index -{ - - /// <summary>This is a DocFieldConsumer that inverts each field, - /// separately, from a Document, and accepts a - /// InvertedTermsConsumer to process those terms. - /// </summary> - - sealed class DocInverterPerThread : DocFieldConsumerPerThread - { - private void InitBlock() - { - singleToken = new SingleTokenAttributeSource(); - } - internal DocInverter docInverter; - internal InvertedDocConsumerPerThread consumer; - internal InvertedDocEndConsumerPerThread endConsumer; - internal SingleTokenAttributeSource singleToken; - - internal class SingleTokenAttributeSource : AttributeSource - { - internal ITermAttribute termAttribute; - internal IOffsetAttribute offsetAttribute; - - internal SingleTokenAttributeSource() - { - termAttribute = AddAttribute<ITermAttribute>(); - offsetAttribute = AddAttribute<IOffsetAttribute>(); - } - - public void Reinit(System.String stringValue, int startOffset, int endOffset) - { - termAttribute.SetTermBuffer(stringValue); - offsetAttribute.SetOffset(startOffset, endOffset); - } - } - - internal DocumentsWriter.DocState docState; - - internal FieldInvertState fieldState = new FieldInvertState(); - - // Used to read a string value for a field - internal ReusableStringReader stringReader = new ReusableStringReader(); - - public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) - { - InitBlock(); - this.docInverter = docInverter; - docState = docFieldProcessorPerThread.docState; - consumer = docInverter.consumer.AddThread(this); - endConsumer = docInverter.endConsumer.AddThread(this); - } - - public override void StartDocument() - { - consumer.StartDocument(); - endConsumer.StartDocument(); - } - - public override DocumentsWriter.DocWriter FinishDocument() - { - // TODO: allow endConsumer.finishDocument to also return - // a DocWriter - endConsumer.FinishDocument(); - return consumer.FinishDocument(); - } - - public override void Abort() - { - try - { - consumer.Abort(); - } - finally - { - endConsumer.Abort(); - } - } - - public override DocFieldConsumerPerField AddField(FieldInfo fi) - { - return new DocInverterPerField(this, fi); - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/DocumentsWriterThreadState.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/DocumentsWriterThreadState.cs b/src/Lucene.Net.Core/Index/DocumentsWriterThreadState.cs deleted file mode 100644 index 255a2ae..0000000 --- a/src/Lucene.Net.Core/Index/DocumentsWriterThreadState.cs +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary>Used by DocumentsWriter to maintain per-thread state. - /// We keep a separate Posting hash and other state for each - /// thread and then merge postings hashes from all threads - /// when writing the segment. - /// </summary> - sealed class DocumentsWriterThreadState - { - - internal bool isIdle = true; // false if this is currently in use by a thread - internal int numThreads = 1; // Number of threads that share this instance - internal bool doFlushAfter; // true if we should flush after processing current doc - internal DocConsumerPerThread consumer; - internal DocumentsWriter.DocState docState; - - internal DocumentsWriter docWriter; - - public DocumentsWriterThreadState(DocumentsWriter docWriter) - { - this.docWriter = docWriter; - docState = new DocumentsWriter.DocState(); - docState.maxFieldLength = docWriter.maxFieldLength; - docState.infoStream = docWriter.infoStream; - docState.similarity = docWriter.similarity; - docState.docWriter = docWriter; - consumer = docWriter.consumer.AddThread(this); - } - - internal void DoAfterFlush() - { - numThreads = 0; - doFlushAfter = false; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/FieldReaderException.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/FieldReaderException.cs b/src/Lucene.Net.Core/Index/FieldReaderException.cs deleted file mode 100644 index 816d67b..0000000 --- a/src/Lucene.Net.Core/Index/FieldReaderException.cs +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Runtime.Serialization; - -namespace Lucene.Net.Index -{ - /// <summary> - /// - /// - /// </summary> - [Serializable] - public class FieldReaderException:System.SystemException - { - /// <summary> Constructs a new runtime exception with <c>null</c> as its - /// detail message. The cause is not initialized, and may subsequently be - /// initialized by a call to <see cref="Exception.InnerException" />. - /// </summary> - public FieldReaderException() - { - } - - /// <summary> Constructs a new runtime exception with the specified cause and a - /// detail message of <tt>(cause==null ? null : cause.toString())</tt> - /// (which typically contains the class and detail message of - /// <tt>cause</tt>). - /// <p/> - /// This constructor is useful for runtime exceptions - /// that are little more than wrappers for other throwables. - /// - /// </summary> - /// <param name="cause">the cause (which is saved for later retrieval by the - /// <see cref="Exception.InnerException" />). (A <tt>null</tt> value is - /// permitted, and indicates that the cause is nonexistent or - /// unknown.) - /// </param> - /// <since> 1.4 - /// </since> - public FieldReaderException(System.Exception cause):base((cause == null)?null:cause.Message, cause) - { - } - - /// <summary> Constructs a new runtime exception with the specified detail message. - /// The cause is not initialized, and may subsequently be initialized by a - /// call to <see cref="Exception.InnerException" />. - /// - /// </summary> - /// <param name="message">the detail message. The detail message is saved for - /// later retrieval by the <see cref="Exception.Message" /> method. - /// </param> - public FieldReaderException(System.String message):base(message) - { - } - - /// <summary> Constructs a new runtime exception with the specified detail message and - /// cause. <p/>Note that the detail message associated with - /// <c>cause</c> is <i>not</i> automatically incorporated in - /// this runtime exception's detail message. - /// - /// </summary> - /// <param name="message">the detail message (which is saved for later retrieval - /// by the <see cref="Exception.Message" /> method). - /// </param> - /// <param name="cause"> the cause (which is saved for later retrieval by the - /// <see cref="Exception.InnerException" /> method). (A <tt>null</tt> value is - /// permitted, and indicates that the cause is nonexistent or - /// unknown.) - /// </param> - /// <since> 1.4 - /// </since> - public FieldReaderException(System.String message, System.Exception cause):base(message, cause) - { - } - - protected FieldReaderException( - SerializationInfo info, - StreamingContext context) : base(info, context) - { - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/FieldSortedTermVectorMapper.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/FieldSortedTermVectorMapper.cs b/src/Lucene.Net.Core/Index/FieldSortedTermVectorMapper.cs deleted file mode 100644 index 4d193f5..0000000 --- a/src/Lucene.Net.Core/Index/FieldSortedTermVectorMapper.cs +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System.Collections.Generic; -using Lucene.Net.Support; - -namespace Lucene.Net.Index -{ - - /// <summary> For each Field, store a sorted collection of <see cref="TermVectorEntry" />s - /// <p/> - /// This is not thread-safe. - /// </summary> - public class FieldSortedTermVectorMapper:TermVectorMapper - { - private readonly IDictionary<string, SortedSet<TermVectorEntry>> fieldToTerms = new HashMap<string, SortedSet<TermVectorEntry>>(); - private SortedSet<TermVectorEntry> currentSet; - private System.String currentField; - private readonly IComparer<TermVectorEntry> comparator; - - /// <summary> </summary> - /// <param name="comparator">A Comparator for sorting <see cref="TermVectorEntry" />s - /// </param> - public FieldSortedTermVectorMapper(IComparer<TermVectorEntry> comparator) - : this(false, false, comparator) - { - } - - - public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer<TermVectorEntry> comparator) - : base(ignoringPositions, ignoringOffsets) - { - this.comparator = comparator; - } - - public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) - { - var entry = new TermVectorEntry(currentField, term, frequency, offsets, positions); - currentSet.Add(entry); - } - - public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) - { - currentSet = new SortedSet<TermVectorEntry>(comparator); - currentField = field; - fieldToTerms[field] = currentSet; - } - - /// <summary> Get the mapping between fields and terms, sorted by the comparator - /// - /// </summary> - /// <value> A map between field names and <see cref="System.Collections.Generic.SortedDictionary{Object,Object}" />s per field. SortedSet entries are <see cref="TermVectorEntry" /> </value> - public virtual IDictionary<string, SortedSet<TermVectorEntry>> FieldToTerms - { - get { return fieldToTerms; } - } - - - public virtual IComparer<TermVectorEntry> Comparator - { - get { return comparator; } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/FieldsReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/FieldsReader.cs b/src/Lucene.Net.Core/Index/FieldsReader.cs deleted file mode 100644 index d4973d9..0000000 --- a/src/Lucene.Net.Core/Index/FieldsReader.cs +++ /dev/null @@ -1,641 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.IO; -using Lucene.Net.Support; -using Lucene.Net.Util; -using TokenStream = Lucene.Net.Analysis.TokenStream; -using Lucene.Net.Documents; -using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; -using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; -using Directory = Lucene.Net.Store.Directory; -using IndexInput = Lucene.Net.Store.IndexInput; - -namespace Lucene.Net.Index -{ - - /// <summary> Class responsible for access to stored document fields. - /// <p/> - /// It uses <segment>.fdt and <segment>.fdx; files. - /// - /// </summary> - public sealed class FieldsReader : ICloneable, IDisposable - { - private readonly FieldInfos fieldInfos; - - // The main fieldStream, used only for cloning. - private readonly IndexInput cloneableFieldsStream; - - // This is a clone of cloneableFieldsStream used for reading documents. - // It should not be cloned outside of a synchronized context. - private readonly IndexInput fieldsStream; - - private readonly IndexInput cloneableIndexStream; - private readonly IndexInput indexStream; - private readonly int numTotalDocs; - private readonly int size; - private bool closed; - private readonly int format; - private readonly int formatSize; - - // The docID offset where our docs begin in the index - // file. This will be 0 if we have our own private file. - private readonly int docStoreOffset; - - private readonly CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>(); - private readonly bool isOriginal = false; - - /// <summary>Returns a cloned FieldsReader that shares open - /// IndexInputs with the original one. It is the caller's - /// job not to close the original FieldsReader until all - /// clones are called (eg, currently SegmentReader manages - /// this logic). - /// </summary> - public System.Object Clone() - { - EnsureOpen(); - return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); - } - - // Used only by clone - private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) - { - this.fieldInfos = fieldInfos; - this.numTotalDocs = numTotalDocs; - this.size = size; - this.format = format; - this.formatSize = formatSize; - this.docStoreOffset = docStoreOffset; - this.cloneableFieldsStream = cloneableFieldsStream; - this.cloneableIndexStream = cloneableIndexStream; - fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); - indexStream = (IndexInput) cloneableIndexStream.Clone(); - } - - public /*internal*/ FieldsReader(Directory d, String segment, FieldInfos fn):this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0) - { - } - - internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize):this(d, segment, fn, readBufferSize, - 1, 0) - { - } - - internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) - { - bool success = false; - isOriginal = true; - try - { - fieldInfos = fn; - - cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); - cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); - - // First version of fdx did not include a format - // header, but, the first int will always be 0 in that - // case - int firstInt = cloneableIndexStream.ReadInt(); - format = firstInt == 0 ? 0 : firstInt; - - if (format > FieldsWriter.FORMAT_CURRENT) - throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); - - formatSize = format > FieldsWriter.FORMAT ? 4 : 0; - - if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - cloneableFieldsStream.SetModifiedUTF8StringsMode(); - - fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); - - long indexSize = cloneableIndexStream.Length() - formatSize; - - if (docStoreOffset != - 1) - { - // We read only a slice out of this shared fields file - this.docStoreOffset = docStoreOffset; - this.size = size; - - // Verify the file is long enough to hold all of our - // docs - System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); - } - else - { - this.docStoreOffset = 0; - this.size = (int) (indexSize >> 3); - } - - indexStream = (IndexInput) cloneableIndexStream.Clone(); - numTotalDocs = (int) (indexSize >> 3); - success = true; - } - finally - { - // With lock-less commits, it's entirely possible (and - // fine) to hit a FileNotFound exception above. In - // this case, we want to explicitly close any subset - // of things that were opened so that we don't have to - // wait for a GC to do so. - if (!success) - { - Dispose(); - } - } - } - - /// <throws> AlreadyClosedException if this FieldsReader is closed </throws> - internal void EnsureOpen() - { - if (closed) - { - throw new AlreadyClosedException("this FieldsReader is closed"); - } - } - - /// <summary> Closes the underlying <see cref="Lucene.Net.Store.IndexInput" /> streams, including any ones associated with a - /// lazy implementation of a Field. This means that the Fields values will not be accessible. - /// - /// </summary> - /// <throws> IOException </throws> - public void Dispose() - { - // Move to protected method if class becomes unsealed - if (!closed) - { - if (fieldsStream != null) - { - fieldsStream.Close(); - } - if (isOriginal) - { - if (cloneableFieldsStream != null) - { - cloneableFieldsStream.Close(); - } - if (cloneableIndexStream != null) - { - cloneableIndexStream.Close(); - } - } - if (indexStream != null) - { - indexStream.Close(); - } - fieldsStreamTL.Close(); - closed = true; - } - } - - public /*internal*/ int Size() - { - return size; - } - - private void SeekIndex(int docID) - { - indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L); - } - - internal bool CanReadRawDocs() - { - // Disable reading raw docs in 2.x format, because of the removal of compressed - // fields in 3.0. We don't want rawDocs() to decode field bits to figure out - // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges - // for <3.0 indexes. - return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - } - - public /*internal*/ Document Doc(int n, FieldSelector fieldSelector) - { - SeekIndex(n); - long position = indexStream.ReadLong(); - fieldsStream.Seek(position); - - var doc = new Document(); - int numFields = fieldsStream.ReadVInt(); - for (int i = 0; i < numFields; i++) - { - int fieldNumber = fieldsStream.ReadVInt(); - FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); - FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); - - byte bits = fieldsStream.ReadByte(); - System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); - - bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; - System.Diagnostics.Debug.Assert( - (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), - "compressed fields are only allowed in indexes of version <= 2.9"); - bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; - bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; - //TODO: Find an alternative approach here if this list continues to grow beyond the - //list of 5 or 6 currently here. See Lucene 762 for discussion - if (acceptField.Equals(FieldSelectorResult.LOAD)) - { - AddField(doc, fi, binary, compressed, tokenize); - } - else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) - { - AddField(doc, fi, binary, compressed, tokenize); - break; //Get out of this loop - } - else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) - { - AddFieldLazy(doc, fi, binary, compressed, tokenize); - } - else if (acceptField.Equals(FieldSelectorResult.SIZE)) - { - SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed)); - } - else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) - { - AddFieldSize(doc, fi, binary, compressed); - break; - } - else - { - SkipField(binary, compressed); - } - } - - return doc; - } - - /// <summary>Returns the length in bytes of each raw document in a - /// contiguous range of length numDocs starting with - /// startDocID. Returns the IndexInput (the fieldStream), - /// already seeked to the starting point for startDocID. - /// </summary> - internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs) - { - SeekIndex(startDocID); - long startOffset = indexStream.ReadLong(); - long lastOffset = startOffset; - int count = 0; - while (count < numDocs) - { - long offset; - int docID = docStoreOffset + startDocID + count + 1; - System.Diagnostics.Debug.Assert(docID <= numTotalDocs); - if (docID < numTotalDocs) - offset = indexStream.ReadLong(); - else - offset = fieldsStream.Length(); - lengths[count++] = (int) (offset - lastOffset); - lastOffset = offset; - } - - fieldsStream.Seek(startOffset); - - return fieldsStream; - } - - /// <summary> Skip the field. We still have to read some of the information about the field, but can skip past the actual content. - /// This will have the most payoff on large fields. - /// </summary> - private void SkipField(bool binary, bool compressed) - { - SkipField(binary, compressed, fieldsStream.ReadVInt()); - } - - private void SkipField(bool binary, bool compressed, int toRead) - { - if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) - { - fieldsStream.Seek(fieldsStream.FilePointer + toRead); - } - else - { - // We need to skip chars. This will slow us down, but still better - fieldsStream.SkipChars(toRead); - } - } - - private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize) - { - if (binary) - { - int toRead = fieldsStream.ReadVInt(); - long pointer = fieldsStream.FilePointer; - //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); - doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary, compressed)); - - //Need to move the pointer ahead by toRead positions - fieldsStream.Seek(pointer + toRead); - } - else - { - const Field.Store store = Field.Store.YES; - Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize); - Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); - - AbstractField f; - if (compressed) - { - int toRead = fieldsStream.ReadVInt(); - long pointer = fieldsStream.FilePointer; - f = new LazyField(this, fi.name, store, toRead, pointer, binary, compressed); - //skip over the part that we aren't loading - fieldsStream.Seek(pointer + toRead); - f.OmitNorms = fi.omitNorms; - f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions; - } - else - { - int length = fieldsStream.ReadVInt(); - long pointer = fieldsStream.FilePointer; - //Skip ahead of where we are by the length of what is stored - if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - { - fieldsStream.Seek(pointer + length); - } - else - { - fieldsStream.SkipChars(length); - } - f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary, compressed) - {OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions}; - } - - doc.Add(f); - } - } - - private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize) - { - //we have a binary stored field, and it may be compressed - if (binary) - { - int toRead = fieldsStream.ReadVInt(); - var b = new byte[toRead]; - fieldsStream.ReadBytes(b, 0, b.Length); - doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES)); - } - else - { - const Field.Store store = Field.Store.YES; - Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize); - Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); - - AbstractField f; - if (compressed) - { - int toRead = fieldsStream.ReadVInt(); - - var b = new byte[toRead]; - fieldsStream.ReadBytes(b, 0, b.Length); - f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, - termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms}; - } - else - { - f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector) - {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms}; - } - - doc.Add(f); - } - } - - // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) - // Read just the size -- caller must skip the field content to continue reading fields - // Return the size in bytes or chars, depending on field type - private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed) - { - int size = fieldsStream.ReadVInt(), bytesize = binary || compressed?size:2 * size; - var sizebytes = new byte[4]; - sizebytes[0] = (byte) (Number.URShift(bytesize, 24)); - sizebytes[1] = (byte) (Number.URShift(bytesize, 16)); - sizebytes[2] = (byte) (Number.URShift(bytesize, 8)); - sizebytes[3] = (byte) bytesize; - doc.Add(new Field(fi.name, sizebytes, Field.Store.YES)); - return size; - } - - /// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is - /// loaded. - /// </summary> - [Serializable] - private sealed class LazyField : AbstractField - { - private void InitBlock(FieldsReader enclosingInstance) - { - this.Enclosing_Instance = enclosingInstance; - } - - private FieldsReader Enclosing_Instance { get; set; } - - private int toRead; - private long pointer; - [Obsolete("Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0.")] - private readonly Boolean isCompressed; - - public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, Field.Index.NO, Field.TermVector.NO) - { - InitBlock(enclosingInstance); - this.toRead = toRead; - this.pointer = pointer; - this.internalIsBinary = isBinary; - if (isBinary) - internalBinaryLength = toRead; - lazy = true; - this.isCompressed = isCompressed; - } - - public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, index, termVector) - { - InitBlock(enclosingInstance); - this.toRead = toRead; - this.pointer = pointer; - this.internalIsBinary = isBinary; - if (isBinary) - internalBinaryLength = toRead; - lazy = true; - this.isCompressed = isCompressed; - } - - private IndexInput GetFieldStream() - { - IndexInput localFieldsStream = Enclosing_Instance.fieldsStreamTL.Get(); - if (localFieldsStream == null) - { - localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone(); - Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream); - } - return localFieldsStream; - } - - /// <summary>The value of the field as a Reader, or null. If null, the String value, - /// binary value, or TokenStream value is used. Exactly one of StringValue(), - /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. - /// </summary> - public override TextReader ReaderValue - { - get - { - Enclosing_Instance.EnsureOpen(); - return null; - } - } - - /// <summary>The value of the field as a TokenStream, or null. If null, the Reader value, - /// String value, or binary value is used. Exactly one of StringValue(), - /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. - /// </summary> - public override TokenStream TokenStreamValue - { - get - { - Enclosing_Instance.EnsureOpen(); - return null; - } - } - - /// <summary>The value of the field as a String, or null. If null, the Reader value, - /// binary value, or TokenStream value is used. Exactly one of StringValue(), - /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. - /// </summary> - public override string StringValue - { - get - { - Enclosing_Instance.EnsureOpen(); - if (internalIsBinary) - return null; - - if (fieldsData == null) - { - IndexInput localFieldsStream = GetFieldStream(); - try - { - localFieldsStream.Seek(pointer); - if (isCompressed) - { - var b = new byte[toRead]; - localFieldsStream.ReadBytes(b, 0, b.Length); - fieldsData = - System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); - } - else - { - if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - { - var bytes = new byte[toRead]; - localFieldsStream.ReadBytes(bytes, 0, toRead); - fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes); - } - else - { - //read in chars b/c we already know the length we need to read - var chars = new char[toRead]; - localFieldsStream.ReadChars(chars, 0, toRead); - fieldsData = new System.String(chars); - } - } - } - catch (System.IO.IOException e) - { - throw new FieldReaderException(e); - } - } - return (System.String) fieldsData; - } - } - - public long Pointer - { - get - { - Enclosing_Instance.EnsureOpen(); - return pointer; - } - set - { - Enclosing_Instance.EnsureOpen(); - this.pointer = value; - } - } - - public int ToRead - { - get - { - Enclosing_Instance.EnsureOpen(); - return toRead; - } - set - { - Enclosing_Instance.EnsureOpen(); - this.toRead = value; - } - } - - public override byte[] GetBinaryValue(byte[] result) - { - Enclosing_Instance.EnsureOpen(); - - if (internalIsBinary) - { - if (fieldsData == null) - { - // Allocate new buffer if result is null or too small - byte[] b; - if (result == null || result.Length < toRead) - b = new byte[toRead]; - else - b = result; - - IndexInput localFieldsStream = GetFieldStream(); - - // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people - // since they are already handling this exception when getting the document - try - { - localFieldsStream.Seek(pointer); - localFieldsStream.ReadBytes(b, 0, toRead); - fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b; - } - catch (IOException e) - { - throw new FieldReaderException(e); - } - - internalbinaryOffset = 0; - internalBinaryLength = toRead; - } - - return (byte[]) fieldsData; - } - return null; - } - } - - private byte[] Uncompress(byte[] b) - { - try - { - return CompressionTools.Decompress(b); - } - catch (Exception e) - { - // this will happen if the field is not compressed - throw new CorruptIndexException("field data are in wrong format: " + e, e); - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/FieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/FieldsWriter.cs b/src/Lucene.Net.Core/Index/FieldsWriter.cs deleted file mode 100644 index d34a662..0000000 --- a/src/Lucene.Net.Core/Index/FieldsWriter.cs +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Linq; -using Lucene.Net.Documents; -using Document = Lucene.Net.Documents.Document; -using Directory = Lucene.Net.Store.Directory; -using IndexInput = Lucene.Net.Store.IndexInput; -using IndexOutput = Lucene.Net.Store.IndexOutput; -using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; - -namespace Lucene.Net.Index -{ - - sealed class FieldsWriter : IDisposable - { - internal const byte FIELD_IS_TOKENIZED = (0x1); - internal const byte FIELD_IS_BINARY = (0x2); - [Obsolete("Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0")] - internal const byte FIELD_IS_COMPRESSED = (0x4); - - // Original format - internal const int FORMAT = 0; - - // Changed strings to UTF8 - internal const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1; - - // Lucene 3.0: Removal of compressed fields - internal static int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; - - // NOTE: if you introduce a new format, make it 1 higher - // than the current one, and always change this if you - // switch to a new format! - internal static readonly int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - - private readonly FieldInfos fieldInfos; - - private IndexOutput fieldsStream; - - private IndexOutput indexStream; - - private readonly bool doClose; - - internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) - { - fieldInfos = fn; - - bool success = false; - String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION; - try - { - fieldsStream = d.CreateOutput(fieldsName); - fieldsStream.WriteInt(FORMAT_CURRENT); - success = true; - } - finally - { - if (!success) - { - try - { - Dispose(); - } - catch (System.Exception) - { - // Suppress so we keep throwing the original exception - } - try - { - d.DeleteFile(fieldsName); - } - catch (System.Exception) - { - // Suppress so we keep throwing the original exception - } - } - } - - success = false; - String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; - try - { - indexStream = d.CreateOutput(indexName); - indexStream.WriteInt(FORMAT_CURRENT); - success = true; - } - finally - { - if (!success) - { - try - { - Dispose(); - } - catch (System.IO.IOException) - { - } - try - { - d.DeleteFile(fieldsName); - } - catch (System.Exception) - { - // Suppress so we keep throwing the original exception - } - try - { - d.DeleteFile(indexName); - } - catch (System.Exception) - { - // Suppress so we keep throwing the original exception - } - } - } - - doClose = true; - } - - internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) - { - fieldInfos = fn; - fieldsStream = fdt; - indexStream = fdx; - doClose = false; - } - - internal void SetFieldsStream(IndexOutput stream) - { - this.fieldsStream = stream; - } - - // Writes the contents of buffer into the fields stream - // and adds a new entry for this document into the index - // stream. This assumes the buffer was already written - // in the correct fields format. - internal void FlushDocument(int numStoredFields, RAMOutputStream buffer) - { - indexStream.WriteLong(fieldsStream.FilePointer); - fieldsStream.WriteVInt(numStoredFields); - buffer.WriteTo(fieldsStream); - } - - internal void SkipDocument() - { - indexStream.WriteLong(fieldsStream.FilePointer); - fieldsStream.WriteVInt(0); - } - - internal void Flush() - { - indexStream.Flush(); - fieldsStream.Flush(); - } - - public void Dispose() - { - // Move to protected method if class becomes unsealed - if (doClose) - { - try - { - if (fieldsStream != null) - { - try - { - fieldsStream.Close(); - } - finally - { - fieldsStream = null; - } - } - } - catch (System.IO.IOException) - { - try - { - if (indexStream != null) - { - try - { - indexStream.Close(); - } - finally - { - indexStream = null; - } - } - } - catch (System.IO.IOException) - { - // Ignore so we throw only first IOException hit - } - throw; - } - finally - { - if (indexStream != null) - { - try - { - indexStream.Close(); - } - finally - { - indexStream = null; - } - } - } - } - } - - internal void WriteField(FieldInfo fi, IFieldable field) - { - fieldsStream.WriteVInt(fi.number); - byte bits = 0; - if (field.IsTokenized) - bits |= FieldsWriter.FIELD_IS_TOKENIZED; - if (field.IsBinary) - bits |= FieldsWriter.FIELD_IS_BINARY; - - fieldsStream.WriteByte(bits); - - // compression is disabled for the current field - if (field.IsBinary) - { - byte[] data = field.GetBinaryValue(); - int len = field.BinaryLength; - int offset = field.BinaryOffset; - - fieldsStream.WriteVInt(len); - fieldsStream.WriteBytes(data, offset, len); - } - else - { - fieldsStream.WriteString(field.StringValue); - } - } - - /// <summary>Bulk write a contiguous series of documents. The - /// lengths array is the length (in bytes) of each raw - /// document. The stream IndexInput is the - /// fieldsStream from which we should bulk-copy all - /// bytes. - /// </summary> - internal void AddRawDocuments(IndexInput stream, int[] lengths, int numDocs) - { - long position = fieldsStream.FilePointer; - long start = position; - for (int i = 0; i < numDocs; i++) - { - indexStream.WriteLong(position); - position += lengths[i]; - } - fieldsStream.CopyBytes(stream, position - start); - System.Diagnostics.Debug.Assert(fieldsStream.FilePointer == position); - } - - internal void AddDocument(Document doc) - { - indexStream.WriteLong(fieldsStream.FilePointer); - - System.Collections.Generic.IList<IFieldable> fields = doc.GetFields(); - int storedCount = fields.Count(field => field.IsStored); - fieldsStream.WriteVInt(storedCount); - - foreach(IFieldable field in fields) - { - if (field.IsStored) - WriteField(fieldInfos.FieldInfo(field.Name), field); - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/FilterIndexReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/FilterIndexReader.cs b/src/Lucene.Net.Core/Index/FilterIndexReader.cs deleted file mode 100644 index ced4220..0000000 --- a/src/Lucene.Net.Core/Index/FilterIndexReader.cs +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -using Document = Lucene.Net.Documents.Document; -using FieldSelector = Lucene.Net.Documents.FieldSelector; -using Directory = Lucene.Net.Store.Directory; - -namespace Lucene.Net.Index -{ - - /// <summary>A <c>FilterIndexReader</c> contains another IndexReader, which it - /// uses as its basic source of data, possibly transforming the data along the - /// way or providing additional functionality. The class - /// <c>FilterIndexReader</c> itself simply implements all abstract methods - /// of <c>IndexReader</c> with versions that pass all requests to the - /// contained index reader. Subclasses of <c>FilterIndexReader</c> may - /// further override some of these methods and may also provide additional - /// methods and fields. - /// </summary> - public class FilterIndexReader:IndexReader - { - - /// <summary>Base class for filtering <see cref="Lucene.Net.Index.TermDocs" /> implementations. </summary> - public class FilterTermDocs : TermDocs - { - protected internal TermDocs in_Renamed; - - public FilterTermDocs(TermDocs in_Renamed) - { - this.in_Renamed = in_Renamed; - } - - public virtual void Seek(Term term) - { - in_Renamed.Seek(term); - } - public virtual void Seek(TermEnum termEnum) - { - in_Renamed.Seek(termEnum); - } - - public virtual int Doc - { - get { return in_Renamed.Doc; } - } - - public virtual int Freq - { - get { return in_Renamed.Freq; } - } - - public virtual bool Next() - { - return in_Renamed.Next(); - } - public virtual int Read(int[] docs, int[] freqs) - { - return in_Renamed.Read(docs, freqs); - } - public virtual bool SkipTo(int i) - { - return in_Renamed.SkipTo(i); - } - - public void Close() - { - Dispose(); - } - - public void Dispose() - { - Dispose(true); - } - - protected virtual void Dispose(bool disposing) - { - if (disposing) - { - in_Renamed.Close(); - } - } - } - - /// <summary>Base class for filtering <see cref="TermPositions" /> implementations. </summary> - public class FilterTermPositions:FilterTermDocs, TermPositions - { - - public FilterTermPositions(TermPositions in_Renamed):base(in_Renamed) - { - } - - public virtual int NextPosition() - { - return ((TermPositions) this.in_Renamed).NextPosition(); - } - - public virtual int PayloadLength - { - get { return ((TermPositions) this.in_Renamed).PayloadLength; } - } - - public virtual byte[] GetPayload(byte[] data, int offset) - { - return ((TermPositions) this.in_Renamed).GetPayload(data, offset); - } - - - // TODO: Remove warning after API has been finalized - - public virtual bool IsPayloadAvailable - { - get { return ((TermPositions) this.in_Renamed).IsPayloadAvailable; } - } - } - - /// <summary>Base class for filtering <see cref="TermEnum" /> implementations. </summary> - public class FilterTermEnum:TermEnum - { - protected internal TermEnum in_Renamed; - - public FilterTermEnum(TermEnum in_Renamed) - { - this.in_Renamed = in_Renamed; - } - - public override bool Next() - { - return in_Renamed.Next(); - } - - public override Term Term - { - get { return in_Renamed.Term; } - } - - public override int DocFreq() - { - return in_Renamed.DocFreq(); - } - - protected override void Dispose(bool disposing) - { - if (disposing) - { - in_Renamed.Close(); - } - } - } - - protected internal IndexReader in_Renamed; - - /// <summary> <p/>Construct a FilterIndexReader based on the specified base reader. - /// Directory locking for delete, undeleteAll, and setNorm operations is - /// left to the base reader.<p/> - /// <p/>Note that base reader is closed if this FilterIndexReader is closed.<p/> - /// </summary> - /// <param name="in_Renamed">specified base reader. - /// </param> - public FilterIndexReader(IndexReader in_Renamed):base() - { - this.in_Renamed = in_Renamed; - } - - public override Directory Directory() - { - return in_Renamed.Directory(); - } - - public override ITermFreqVector[] GetTermFreqVectors(int docNumber) - { - EnsureOpen(); - return in_Renamed.GetTermFreqVectors(docNumber); - } - - public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field) - { - EnsureOpen(); - return in_Renamed.GetTermFreqVector(docNumber, field); - } - - - public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) - { - EnsureOpen(); - in_Renamed.GetTermFreqVector(docNumber, field, mapper); - } - - public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) - { - EnsureOpen(); - in_Renamed.GetTermFreqVector(docNumber, mapper); - } - - public override int NumDocs() - { - // Don't call ensureOpen() here (it could affect performance) - return in_Renamed.NumDocs(); - } - - public override int MaxDoc - { - get - { - // Don't call ensureOpen() here (it could affect performance) - return in_Renamed.MaxDoc; - } - } - - public override Document Document(int n, FieldSelector fieldSelector) - { - EnsureOpen(); - return in_Renamed.Document(n, fieldSelector); - } - - public override bool IsDeleted(int n) - { - // Don't call ensureOpen() here (it could affect performance) - return in_Renamed.IsDeleted(n); - } - - public override bool HasDeletions - { - get - { - // Don't call ensureOpen() here (it could affect performance) - return in_Renamed.HasDeletions; - } - } - - protected internal override void DoUndeleteAll() - { - in_Renamed.UndeleteAll(); - } - - public override bool HasNorms(System.String field) - { - EnsureOpen(); - return in_Renamed.HasNorms(field); - } - - public override byte[] Norms(System.String f) - { - EnsureOpen(); - return in_Renamed.Norms(f); - } - - public override void Norms(System.String f, byte[] bytes, int offset) - { - EnsureOpen(); - in_Renamed.Norms(f, bytes, offset); - } - - protected internal override void DoSetNorm(int d, System.String f, byte b) - { - in_Renamed.SetNorm(d, f, b); - } - - public override TermEnum Terms() - { - EnsureOpen(); - return in_Renamed.Terms(); - } - - public override TermEnum Terms(Term t) - { - EnsureOpen(); - return in_Renamed.Terms(t); - } - - public override int DocFreq(Term t) - { - EnsureOpen(); - return in_Renamed.DocFreq(t); - } - - public override TermDocs TermDocs() - { - EnsureOpen(); - return in_Renamed.TermDocs(); - } - - public override TermDocs TermDocs(Term term) - { - EnsureOpen(); - return in_Renamed.TermDocs(term); - } - - public override TermPositions TermPositions() - { - EnsureOpen(); - return in_Renamed.TermPositions(); - } - - protected internal override void DoDelete(int n) - { - in_Renamed.DeleteDocument(n); - } - - protected internal override void DoCommit(System.Collections.Generic.IDictionary<string, string> commitUserData) - { - in_Renamed.Commit(commitUserData); - } - - protected internal override void DoClose() - { - in_Renamed.Close(); - // NOTE: only needed in case someone had asked for - // FieldCache for top-level reader (which is generally - // not a good idea): - Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this); - } - - - public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames) - { - EnsureOpen(); - return in_Renamed.GetFieldNames(fieldNames); - } - - public override long Version - { - get - { - EnsureOpen(); - return in_Renamed.Version; - } - } - - public override bool IsCurrent() - { - EnsureOpen(); - return in_Renamed.IsCurrent(); - } - - public override bool IsOptimized() - { - EnsureOpen(); - return in_Renamed.IsOptimized(); - } - - public override IndexReader[] GetSequentialSubReaders() - { - return in_Renamed.GetSequentialSubReaders(); - } - - override public System.Object Clone() - { - System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this FilterIndexReader.Clone()"); // {{Aroush-2.9}} - return null; - } - - /// <summary> - /// If the subclass of FilteredIndexReader modifies the - /// contents of the FieldCache, you must override this - /// method to provide a different key */ - ///</summary> - public override object FieldCacheKey - { - get { return in_Renamed.FieldCacheKey; } - } - - /// <summary> - /// If the subclass of FilteredIndexReader modifies the - /// deleted docs, you must override this method to provide - /// a different key */ - /// </summary> - public override object DeletesCacheKey - { - get { return in_Renamed.DeletesCacheKey; } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/FormatPostingsDocsConsumer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/FormatPostingsDocsConsumer.cs b/src/Lucene.Net.Core/Index/FormatPostingsDocsConsumer.cs deleted file mode 100644 index 74efb0d..0000000 --- a/src/Lucene.Net.Core/Index/FormatPostingsDocsConsumer.cs +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary> NOTE: this API is experimental and will likely change</summary> - - abstract class FormatPostingsDocsConsumer - { - - /// <summary>Adds a new doc in this term. If this returns null - /// then we just skip consuming positions/payloads. - /// </summary> - internal abstract FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq); - - /// <summary>Called when we are done adding docs to this term </summary> - internal abstract void Finish(); - } -} \ No newline at end of file
