http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/SegmentTermPositions.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/SegmentTermPositions.cs b/src/Lucene.Net.Core/Index/SegmentTermPositions.cs deleted file mode 100644 index 2f49df9..0000000 --- a/src/Lucene.Net.Core/Index/SegmentTermPositions.cs +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Support; -using IndexInput = Lucene.Net.Store.IndexInput; - -namespace Lucene.Net.Index -{ - internal sealed class SegmentTermPositions : SegmentTermDocs, TermPositions - { - private IndexInput proxStream; - private int proxCount; - private int position; - - // the current payload length - private int payloadLength; - // indicates whether the payload of the currend position has - // been read from the proxStream yet - private bool needToLoadPayload; - - // these variables are being used to remember information - // for a lazy skip - private long lazySkipPointer = - 1; - private int lazySkipProxCount = 0; - - internal SegmentTermPositions(SegmentReader p):base(p) - { - this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time - } - - internal override void Seek(TermInfo ti, Term term) - { - base.Seek(ti, term); - if (ti != null) - lazySkipPointer = ti.proxPointer; - - lazySkipProxCount = 0; - proxCount = 0; - payloadLength = 0; - needToLoadPayload = false; - } - - protected override void Dispose(bool disposing) - { - base.Dispose(disposing); - if (proxStream != null) - proxStream.Dispose(); - } - - public int NextPosition() - { - if (currentFieldOmitTermFreqAndPositions) - // This field does not store term freq, positions, payloads - return 0; - // perform lazy skips if neccessary - LazySkip(); - proxCount--; - return position += ReadDeltaPosition(); - } - - private int ReadDeltaPosition() - { - int delta = proxStream.ReadVInt(); - if (currentFieldStoresPayloads) - { - // if the current field stores payloads then - // the position delta is shifted one bit to the left. - // if the LSB is set, then we have to read the current - // payload length - if ((delta & 1) != 0) - { - payloadLength = proxStream.ReadVInt(); - } - delta = Number.URShift(delta, 1); - needToLoadPayload = true; - } - return delta; - } - - protected internal override void SkippingDoc() - { - // we remember to skip a document lazily - lazySkipProxCount += freq; - } - - public override bool Next() - { - // we remember to skip the remaining positions of the current - // document lazily - lazySkipProxCount += proxCount; - - if (base.Next()) - { - // run super - proxCount = freq; // note frequency - position = 0; // reset position - return true; - } - return false; - } - - public override int Read(int[] docs, int[] freqs) - { - throw new System.NotSupportedException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); - } - - - /// <summary>Called by super.skipTo(). </summary> - protected internal override void SkipProx(long proxPointer, int payloadLength) - { - // we save the pointer, we might have to skip there lazily - lazySkipPointer = proxPointer; - lazySkipProxCount = 0; - proxCount = 0; - this.payloadLength = payloadLength; - needToLoadPayload = false; - } - - private void SkipPositions(int n) - { - System.Diagnostics.Debug.Assert(!currentFieldOmitTermFreqAndPositions); - for (int f = n; f > 0; f--) - { - // skip unread positions - ReadDeltaPosition(); - SkipPayload(); - } - } - - private void SkipPayload() - { - if (needToLoadPayload && payloadLength > 0) - { - proxStream.Seek(proxStream.FilePointer + payloadLength); - } - needToLoadPayload = false; - } - - // It is not always neccessary to move the prox pointer - // to a new document after the freq pointer has been moved. - // Consider for example a phrase query with two terms: - // the freq pointer for term 1 has to move to document x - // to answer the question if the term occurs in that document. But - // only if term 2 also matches document x, the positions have to be - // read to figure out if term 1 and term 2 appear next - // to each other in document x and thus satisfy the query. - // So we move the prox pointer lazily to the document - // as soon as positions are requested. - private void LazySkip() - { - if (proxStream == null) - { - // clone lazily - proxStream = (IndexInput) parent.core.proxStream.Clone(); - } - - // we might have to skip the current payload - // if it was not read yet - SkipPayload(); - - if (lazySkipPointer != - 1) - { - proxStream.Seek(lazySkipPointer); - lazySkipPointer = - 1; - } - - if (lazySkipProxCount != 0) - { - SkipPositions(lazySkipProxCount); - lazySkipProxCount = 0; - } - } - - public int PayloadLength - { - get { return payloadLength; } - } - - public byte[] GetPayload(byte[] data, int offset) - { - if (!needToLoadPayload) - { - throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); - } - - // read payloads lazily - byte[] retArray; - int retOffset; - if (data == null || data.Length - offset < payloadLength) - { - // the array is too small to store the payload data, - // so we allocate a new one - retArray = new byte[payloadLength]; - retOffset = 0; - } - else - { - retArray = data; - retOffset = offset; - } - proxStream.ReadBytes(retArray, retOffset, payloadLength); - needToLoadPayload = false; - return retArray; - } - - public bool IsPayloadAvailable - { - get { return needToLoadPayload && payloadLength > 0; } - } - } -} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/SegmentTermVector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/SegmentTermVector.cs b/src/Lucene.Net.Core/Index/SegmentTermVector.cs deleted file mode 100644 index 1a2a8fb..0000000 --- a/src/Lucene.Net.Core/Index/SegmentTermVector.cs +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - - class SegmentTermVector : ITermFreqVector - { - private System.String field; - private System.String[] terms; - private int[] termFreqs; - - internal SegmentTermVector(System.String field, System.String[] terms, int[] termFreqs) - { - this.field = field; - this.terms = terms; - this.termFreqs = termFreqs; - } - - /// <summary> </summary> - /// <value> The number of the field this vector is associated with </value> - public virtual string Field - { - get { return field; } - } - - public override System.String ToString() - { - System.Text.StringBuilder sb = new System.Text.StringBuilder(); - sb.Append('{'); - sb.Append(field).Append(": "); - if (terms != null) - { - for (int i = 0; i < terms.Length; i++) - { - if (i > 0) - sb.Append(", "); - sb.Append(terms[i]).Append('/').Append(termFreqs[i]); - } - } - sb.Append('}'); - - return sb.ToString(); - } - - public virtual int Size - { - get { return terms == null ? 0 : terms.Length; } - } - - public virtual System.String[] GetTerms() - { - return terms; - } - - public virtual int[] GetTermFrequencies() - { - return termFreqs; - } - - public virtual int IndexOf(System.String termText) - { - if (terms == null) - return - 1; - int res = System.Array.BinarySearch(terms, termText, System.StringComparer.Ordinal); - return res >= 0?res:- 1; - } - - public virtual int[] IndexesOf(System.String[] termNumbers, int start, int len) - { - // TODO: there must be a more efficient way of doing this. - // At least, we could advance the lower bound of the terms array - // as we find valid indexes. Also, it might be possible to leverage - // this even more by starting in the middle of the termNumbers array - // and thus dividing the terms array maybe in half with each found index. - int[] res = new int[len]; - - for (int i = 0; i < len; i++) - { - res[i] = IndexOf(termNumbers[start + i]); - } - return res; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/SortedTermVectorMapper.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/SortedTermVectorMapper.cs b/src/Lucene.Net.Core/Index/SortedTermVectorMapper.cs deleted file mode 100644 index 8e32d71..0000000 --- a/src/Lucene.Net.Core/Index/SortedTermVectorMapper.cs +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Collections.Generic; -using Lucene.Net.Support; - -namespace Lucene.Net.Index -{ - - /// <summary> Store a sorted collection of <see cref="Lucene.Net.Index.TermVectorEntry" />s. Collects all term information - /// into a single, SortedSet. - /// <br/> - /// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not - /// know what Fields they correlate with. - /// <br/> - /// This is not thread-safe - /// </summary> - public class SortedTermVectorMapper:TermVectorMapper - { - private SortedSet<TermVectorEntry> currentSet; - private IDictionary<string, TermVectorEntry> termToTVE = new HashMap<string, TermVectorEntry>(); - private bool storeOffsets; - private bool storePositions; - /// <summary> Stand-in name for the field in <see cref="TermVectorEntry" />.</summary> - public const System.String ALL = "_ALL_"; - - /// <summary> </summary> - /// <param name="comparator">A Comparator for sorting <see cref="TermVectorEntry" />s - /// </param> - public SortedTermVectorMapper(IComparer<TermVectorEntry> comparator) - : this(false, false, comparator) - { - } - - - public SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer<TermVectorEntry> comparator) - : base(ignoringPositions, ignoringOffsets) - { - currentSet = new SortedSet<TermVectorEntry>(comparator); - } - - /// <summary> </summary> - /// <param name="term">The term to map - /// </param> - /// <param name="frequency">The frequency of the term - /// </param> - /// <param name="offsets">Offset information, may be null - /// </param> - /// <param name="positions">Position information, may be null - /// </param> - //We need to combine any previous mentions of the term - public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) - { - TermVectorEntry entry = termToTVE[term]; - if (entry == null) - { - entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null); - termToTVE[term] = entry; - currentSet.Add(entry); - } - else - { - entry.Frequency = entry.Frequency + frequency; - if (storeOffsets) - { - TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets(); - //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions - if (existingOffsets != null && offsets != null && offsets.Length > 0) - { - //copy over the existing offsets - TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length]; - Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length); - Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length); - entry.SetOffsets(newOffsets); - } - else if (existingOffsets == null && offsets != null && offsets.Length > 0) - { - entry.SetOffsets(offsets); - } - //else leave it alone - } - if (storePositions) - { - int[] existingPositions = entry.GetPositions(); - if (existingPositions != null && positions != null && positions.Length > 0) - { - int[] newPositions = new int[existingPositions.Length + positions.Length]; - Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length); - Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length); - entry.SetPositions(newPositions); - } - else if (existingPositions == null && positions != null && positions.Length > 0) - { - entry.SetPositions(positions); - } - } - } - } - - public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) - { - - this.storeOffsets = storeOffsets; - this.storePositions = storePositions; - } - - /// <summary> The TermVectorEntrySet. A SortedSet of <see cref="TermVectorEntry" /> objects. Sort is by the comparator passed into the constructor. - /// <br/> - /// This set will be empty until after the mapping process takes place. - /// - /// </summary> - /// <value> The SortedSet of <see cref="TermVectorEntry" />. </value> - public virtual SortedSet<TermVectorEntry> TermVectorEntrySet - { - get { return currentSet; } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/StaleReaderException.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/StaleReaderException.cs b/src/Lucene.Net.Core/Index/StaleReaderException.cs deleted file mode 100644 index f2e5760..0000000 --- a/src/Lucene.Net.Core/Index/StaleReaderException.cs +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Runtime.Serialization; - -namespace Lucene.Net.Index -{ - /// <summary> This exception is thrown when an <see cref="IndexReader" /> - /// tries to make changes to the index (via <see cref="IndexReader.DeleteDocument" /> - ///, <see cref="IndexReader.UndeleteAll" /> - /// or <see cref="IndexReader.SetNorm(int,string,float)" />) - /// but changes have already been committed to the index - /// since this reader was instantiated. When this happens - /// you must open a new reader on the current index to make - /// the changes. - /// </summary> - [Serializable] - public class StaleReaderException : System.IO.IOException - { - public StaleReaderException(string message) : base(message) - { - } - - public StaleReaderException(string message, Exception inner) : base(message, inner) - { - } - - protected StaleReaderException( - SerializationInfo info, - StreamingContext context) : base(info, context) - { - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/StoredFieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/StoredFieldsWriter.cs b/src/Lucene.Net.Core/Index/StoredFieldsWriter.cs deleted file mode 100644 index bfe5f43..0000000 --- a/src/Lucene.Net.Core/Index/StoredFieldsWriter.cs +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; -using ArrayUtil = Lucene.Net.Util.ArrayUtil; - -namespace Lucene.Net.Index -{ - - /// <summary>This is a DocFieldConsumer that writes stored fields. </summary> - sealed class StoredFieldsWriter - { - private void InitBlock() - { - docFreeList = new PerDoc[1]; - } - - internal FieldsWriter fieldsWriter; - internal DocumentsWriter docWriter; - internal FieldInfos fieldInfos; - internal int lastDocID; - - internal PerDoc[] docFreeList; - internal int freeCount; - - public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos) - { - InitBlock(); - this.docWriter = docWriter; - this.fieldInfos = fieldInfos; - } - - public StoredFieldsWriterPerThread AddThread(DocumentsWriter.DocState docState) - { - return new StoredFieldsWriterPerThread(docState, this); - } - - public void Flush(SegmentWriteState state) - { - lock (this) - { - - if (state.numDocsInStore > 0) - { - // It's possible that all documents seen in this segment - // hit non-aborting exceptions, in which case we will - // not have yet init'd the FieldsWriter: - InitFieldsWriter(); - - // Fill fdx file to include any final docs that we - // skipped because they hit non-aborting exceptions - Fill(state.numDocsInStore - docWriter.DocStoreOffset); - } - - if (fieldsWriter != null) - fieldsWriter.Flush(); - } - } - - private void InitFieldsWriter() - { - if (fieldsWriter == null) - { - System.String docStoreSegment = docWriter.DocStoreSegment; - if (docStoreSegment != null) - { - System.Diagnostics.Debug.Assert(docStoreSegment != null); - fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos); - docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION); - docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); - lastDocID = 0; - } - } - } - - public void CloseDocStore(SegmentWriteState state) - { - lock (this) - { - int inc = state.numDocsInStore - lastDocID; - if (inc > 0) - { - InitFieldsWriter(); - Fill(state.numDocsInStore - docWriter.DocStoreOffset); - } - - if (fieldsWriter != null) - { - fieldsWriter.Dispose(); - fieldsWriter = null; - lastDocID = 0; - System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); - state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); - state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); - - state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); - state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); - - System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; - - if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) - throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); - } - } - } - - internal int allocCount; - - internal PerDoc GetPerDoc() - { - lock (this) - { - if (freeCount == 0) - { - allocCount++; - if (allocCount > docFreeList.Length) - { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); - docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; - } - return new PerDoc(this); - } - else - return docFreeList[--freeCount]; - } - } - - internal void Abort() - { - lock (this) - { - if (fieldsWriter != null) - { - try - { - fieldsWriter.Dispose(); - } - catch (System.Exception) - { - } - fieldsWriter = null; - lastDocID = 0; - } - } - } - - /// <summary>Fills in any hole in the docIDs </summary> - internal void Fill(int docID) - { - int docStoreOffset = docWriter.DocStoreOffset; - - // We must "catch up" for all docs before us - // that had no stored fields: - int end = docID + docStoreOffset; - while (lastDocID < end) - { - fieldsWriter.SkipDocument(); - lastDocID++; - } - } - - internal void FinishDocument(PerDoc perDoc) - { - lock (this) - { - System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument start")); - InitFieldsWriter(); - - Fill(perDoc.docID); - - // Append stored fields to the real FieldsWriter: - fieldsWriter.FlushDocument(perDoc.numStoredFields, perDoc.fdt); - lastDocID++; - perDoc.Reset(); - Free(perDoc); - System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument end")); - } - } - - public bool FreeRAM() - { - return false; - } - - internal void Free(PerDoc perDoc) - { - lock (this) - { - System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); - System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields); - System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length); - System.Diagnostics.Debug.Assert(0 == perDoc.fdt.FilePointer); - docFreeList[freeCount++] = perDoc; - } - } - - internal class PerDoc:DocumentsWriter.DocWriter - { - public PerDoc(StoredFieldsWriter enclosingInstance) - { - InitBlock(enclosingInstance); - } - private void InitBlock(StoredFieldsWriter enclosingInstance) - { - this.enclosingInstance = enclosingInstance; - buffer = enclosingInstance.docWriter.NewPerDocBuffer(); - fdt = new RAMOutputStream(buffer); - } - private StoredFieldsWriter enclosingInstance; - public StoredFieldsWriter Enclosing_Instance - { - get - { - return enclosingInstance; - } - - } - - internal DocumentsWriter.PerDocBuffer buffer ; - internal RAMOutputStream fdt; - internal int numStoredFields; - - internal void Reset() - { - fdt.Reset(); - buffer.Recycle(); - numStoredFields = 0; - } - - public override void Abort() - { - Reset(); - Enclosing_Instance.Free(this); - } - - public override long SizeInBytes() - { - return buffer.SizeInBytes; - } - - public override void Finish() - { - Enclosing_Instance.FinishDocument(this); - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/StoredFieldsWriterPerThread.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/StoredFieldsWriterPerThread.cs b/src/Lucene.Net.Core/Index/StoredFieldsWriterPerThread.cs deleted file mode 100644 index 0440c12..0000000 --- a/src/Lucene.Net.Core/Index/StoredFieldsWriterPerThread.cs +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Documents; -using IndexOutput = Lucene.Net.Store.IndexOutput; - -namespace Lucene.Net.Index -{ - - sealed class StoredFieldsWriterPerThread - { - - internal FieldsWriter localFieldsWriter; - internal StoredFieldsWriter storedFieldsWriter; - internal DocumentsWriter.DocState docState; - - internal StoredFieldsWriter.PerDoc doc; - - public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) - { - this.storedFieldsWriter = storedFieldsWriter; - this.docState = docState; - localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos); - } - - public void StartDocument() - { - if (doc != null) - { - // Only happens if previous document hit non-aborting - // exception while writing stored fields into - // localFieldsWriter: - doc.Reset(); - doc.docID = docState.docID; - } - } - - public void AddField(IFieldable field, FieldInfo fieldInfo) - { - if (doc == null) - { - doc = storedFieldsWriter.GetPerDoc(); - doc.docID = docState.docID; - localFieldsWriter.SetFieldsStream(doc.fdt); - System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields); - System.Diagnostics.Debug.Assert(0 == doc.fdt.Length); - System.Diagnostics.Debug.Assert(0 == doc.fdt.FilePointer); - } - - localFieldsWriter.WriteField(fieldInfo, field); - System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField")); - doc.numStoredFields++; - } - - public DocumentsWriter.DocWriter FinishDocument() - { - // If there were any stored fields in this doc, doc will - // be non-null; else it's null. - try - { - return doc; - } - finally - { - doc = null; - } - } - - public void Abort() - { - if (doc != null) - { - doc.Abort(); - doc = null; - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermBuffer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermBuffer.cs b/src/Lucene.Net.Core/Index/TermBuffer.cs deleted file mode 100644 index 801cc04..0000000 --- a/src/Lucene.Net.Core/Index/TermBuffer.cs +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Support; -using IndexInput = Lucene.Net.Store.IndexInput; -using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; - -namespace Lucene.Net.Index -{ - - sealed class TermBuffer : System.ICloneable - { - - private System.String field; - private Term term; // cached - private bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding (LUCENE-510) - private bool dirty; // true if text was set externally (ie not read via UTF8 bytes) - - private UnicodeUtil.UTF16Result text = new UnicodeUtil.UTF16Result(); - private UnicodeUtil.UTF8Result bytes = new UnicodeUtil.UTF8Result(); - - public int CompareTo(TermBuffer other) - { - if ((System.Object) field == (System.Object) other.field) - // fields are interned - return CompareChars(text.result, text.length, other.text.result, other.text.length); - else - return String.CompareOrdinal(field, other.field); - } - - private static int CompareChars(char[] chars1, int len1, char[] chars2, int len2) - { - int end = len1 < len2?len1:len2; - for (int k = 0; k < end; k++) - { - char c1 = chars1[k]; - char c2 = chars2[k]; - if (c1 != c2) - { - return c1 - c2; - } - } - return len1 - len2; - } - - /// <summary>Call this if the IndexInput passed to <see cref="Read" /> - /// stores terms in the "modified UTF8" (pre LUCENE-510) - /// format. - /// </summary> - internal void SetPreUTF8Strings() - { - preUTF8Strings = true; - } - - public void Read(IndexInput input, FieldInfos fieldInfos) - { - this.term = null; // invalidate cache - int start = input.ReadVInt(); - int length = input.ReadVInt(); - int totalLength = start + length; - if (preUTF8Strings) - { - text.SetLength(totalLength); - input.ReadChars(text.result, start, length); - } - else - { - - if (dirty) - { - // Fully convert all bytes since bytes is dirty - UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); - bytes.SetLength(totalLength); - input.ReadBytes(bytes.result, start, length); - UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); - dirty = false; - } - else - { - // Incrementally convert only the UTF8 bytes that are new: - bytes.SetLength(totalLength); - input.ReadBytes(bytes.result, start, length); - UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); - } - } - this.field = fieldInfos.FieldName(input.ReadVInt()); - } - - public void Set(Term term) - { - if (term == null) - { - Reset(); - return ; - } - System.String termText = term.Text; - int termLen = termText.Length; - text.SetLength(termLen); - TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0); - dirty = true; - field = term.Field; - this.term = term; - } - - public void Set(TermBuffer other) - { - text.CopyText(other.text); - dirty = true; - field = other.field; - term = other.term; - } - - public void Reset() - { - field = null; - text.SetLength(0); - term = null; - dirty = true; - } - - public Term ToTerm() - { - if (field == null) - // unset - return null; - - if (term == null) - term = new Term(field, new System.String(text.result, 0, text.length), false); - - return term; - } - - public System.Object Clone() - { - TermBuffer clone = null; - try - { - clone = (TermBuffer) base.MemberwiseClone(); - } - catch (System.Exception) - { - } - - clone.dirty = true; - clone.bytes = new UnicodeUtil.UTF8Result(); - clone.text = new UnicodeUtil.UTF16Result(); - clone.text.CopyText(text); - return clone; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermDocs.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermDocs.cs b/src/Lucene.Net.Core/Index/TermDocs.cs deleted file mode 100644 index 44e1707..0000000 --- a/src/Lucene.Net.Core/Index/TermDocs.cs +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - /// <summary>TermDocs provides an interface for enumerating <document, frequency> - /// pairs for a term. <p/> The document portion names each document containing - /// the term. Documents are indicated by number. The frequency portion gives - /// the number of times the term occurred in each document. <p/> The pairs are - /// ordered by document number. - /// </summary> - /// <seealso cref="IndexReader.TermDocs()" /> - public interface TermDocs : IDisposable - { - /// <summary>Sets this to the data for a term. - /// The enumeration is reset to the start of the data for this term. - /// </summary> - void Seek(Term term); - - /// <summary>Sets this to the data for the current term in a <see cref="TermEnum" />. - /// This may be optimized in some implementations. - /// </summary> - void Seek(TermEnum termEnum); - - /// <summary>Returns the current document number. <p/> This is invalid until <see cref="Next()" /> - /// is called for the first time. - /// </summary> - int Doc { get; } - - /// <summary>Returns the frequency of the term within the current document. <p/> This - /// is invalid until <see cref="Next()" /> is called for the first time. - /// </summary> - int Freq { get; } - - /// <summary>Moves to the next pair in the enumeration. <p/> Returns true iff there is - /// such a next pair in the enumeration. - /// </summary> - bool Next(); - - /// <summary>Attempts to read multiple entries from the enumeration, up to length of - /// <i>docs</i>. Document numbers are stored in <i>docs</i>, and term - /// frequencies are stored in <i>freqs</i>. The <i>freqs</i> array must be as - /// long as the <i>docs</i> array. - /// - /// <p/>Returns the number of entries read. Zero is only returned when the - /// stream has been exhausted. - /// </summary> - int Read(int[] docs, int[] freqs); - - /// <summary>Skips entries to the first beyond the current whose document number is - /// greater than or equal to <i>target</i>. <p/>Returns true iff there is such - /// an entry. <p/>Behaves as if written: <code> - /// boolean skipTo(int target) { - /// do { - /// if (!next()) - /// return false; - /// } while (target > doc()); - /// return true; - /// } - /// </code> - /// Some implementations are considerably more efficient than that. - /// </summary> - bool SkipTo(int target); - - // TODO: Determine which release this will be removed from - /// <summary>Frees associated resources. </summary> - [Obsolete("Use Dispose() instead")] - void Close(); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermEnum.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermEnum.cs b/src/Lucene.Net.Core/Index/TermEnum.cs deleted file mode 100644 index 0de5998..0000000 --- a/src/Lucene.Net.Core/Index/TermEnum.cs +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary>Abstract class for enumerating terms. - /// <p/>Term enumerations are always ordered by Term.compareTo(). Each term in - /// the enumeration is greater than all that precede it. - /// </summary> - public abstract class TermEnum : IDisposable - { - /// <summary>Increments the enumeration to the next element. True if one exists.</summary> - public abstract bool Next(); - - /// <summary>Returns the current Term in the enumeration.</summary> - public abstract Term Term { get; } - - /// <summary>Returns the docFreq of the current Term in the enumeration.</summary> - public abstract int DocFreq(); - - /// <summary>Closes the enumeration to further activity, freeing resources. </summary> - [Obsolete("Use Dispose() instead")] - public void Close() - { - Dispose(); - } - - /// <summary>Closes the enumeration to further activity, freeing resources. </summary> - public void Dispose() - { - Dispose(true); - } - - protected abstract void Dispose(bool disposing); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermFreqVector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermFreqVector.cs b/src/Lucene.Net.Core/Index/TermFreqVector.cs deleted file mode 100644 index 82df77d..0000000 --- a/src/Lucene.Net.Core/Index/TermFreqVector.cs +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Documents; - -namespace Lucene.Net.Index -{ - - /// <summary>Provides access to stored term vector of - /// a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the - /// <see cref="Lucene.Net.Documents.Document" /> and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the - /// frequency of getTerms()[5], assuming there are at least 5 terms in the Document. - /// </summary> - public interface ITermFreqVector - { - /// <summary> The <see cref="IFieldable" /> name. </summary> - /// <value> The name of the field this vector is associated with. </value> - string Field { get; } - - /// <value> The number of terms in the term vector. </value> - int Size { get; } - - /// <returns> An Array of term texts in ascending order. - /// </returns> - System.String[] GetTerms(); - - - /// <summary>Array of term frequencies. Locations of the array correspond one to one - /// to the terms in the array obtained from <c>getTerms</c> - /// method. Each location in the array contains the number of times this - /// term occurs in the document or the document field. - /// </summary> - int[] GetTermFrequencies(); - - - /// <summary>Return an index in the term numbers array returned from - /// <c>getTerms</c> at which the term with the specified - /// <c>term</c> appears. If this term does not appear in the array, - /// return -1. - /// </summary> - int IndexOf(System.String term); - - - /// <summary>Just like <c>indexOf(int)</c> but searches for a number of terms - /// at the same time. Returns an array that has the same size as the number - /// of terms searched for, each slot containing the result of searching for - /// that term number. - /// - /// </summary> - /// <param name="terms">array containing terms to look for - /// </param> - /// <param name="start">index in the array where the list of terms starts - /// </param> - /// <param name="len">the number of terms in the list - /// </param> - int[] IndexesOf(System.String[] terms, int start, int len); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermInfo.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermInfo.cs b/src/Lucene.Net.Core/Index/TermInfo.cs deleted file mode 100644 index 7e25870..0000000 --- a/src/Lucene.Net.Core/Index/TermInfo.cs +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary>A TermInfo is the record of information stored for a term.</summary> - - sealed class TermInfo - { - /// <summary>The number of documents which contain the term. </summary> - internal int docFreq = 0; - - internal long freqPointer = 0; - internal long proxPointer = 0; - internal int skipOffset; - - internal TermInfo() - { - } - - internal TermInfo(int df, long fp, long pp) - { - docFreq = df; - freqPointer = fp; - proxPointer = pp; - } - - internal TermInfo(TermInfo ti) - { - docFreq = ti.docFreq; - freqPointer = ti.freqPointer; - proxPointer = ti.proxPointer; - skipOffset = ti.skipOffset; - } - - internal void Set(int docFreq, long freqPointer, long proxPointer, int skipOffset) - { - this.docFreq = docFreq; - this.freqPointer = freqPointer; - this.proxPointer = proxPointer; - this.skipOffset = skipOffset; - } - - internal void Set(TermInfo ti) - { - docFreq = ti.docFreq; - freqPointer = ti.freqPointer; - proxPointer = ti.proxPointer; - skipOffset = ti.skipOffset; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermInfosReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermInfosReader.cs b/src/Lucene.Net.Core/Index/TermInfosReader.cs deleted file mode 100644 index 2257b1f..0000000 --- a/src/Lucene.Net.Core/Index/TermInfosReader.cs +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using Lucene.Net.Support; -using Lucene.Net.Util; -using Lucene.Net.Util.Cache; -using Directory = Lucene.Net.Store.Directory; - -namespace Lucene.Net.Index -{ - - /// <summary>This stores a monotonically increasing set of <Term, TermInfo> pairs in a - /// Directory. Pairs are accessed either by Term or by ordinal position the - /// set. - /// </summary> - - sealed class TermInfosReader : IDisposable - { - private readonly Directory directory; - private readonly String segment; - private readonly FieldInfos fieldInfos; - - private bool isDisposed; - - private readonly CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>(); - private readonly SegmentTermEnum origEnum; - private readonly long size; - - private readonly Term[] indexTerms; - private readonly TermInfo[] indexInfos; - private readonly long[] indexPointers; - - private readonly int totalIndexInterval; - - private const int DEFAULT_CACHE_SIZE = 1024; - - /// <summary> Per-thread resources managed by ThreadLocal</summary> - private sealed class ThreadResources - { - internal SegmentTermEnum termEnum; - - // Used for caching the least recently looked-up Terms - internal Cache<Term, TermInfo> termInfoCache; - } - - internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor) - { - bool success = false; - - if (indexDivisor < 1 && indexDivisor != - 1) - { - throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); - } - - try - { - directory = dir; - segment = seg; - fieldInfos = fis; - - origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); - size = origEnum.size; - - - if (indexDivisor != - 1) - { - // Load terms index - totalIndexInterval = origEnum.indexInterval * indexDivisor; - var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); - - try - { - int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index - - indexTerms = new Term[indexSize]; - indexInfos = new TermInfo[indexSize]; - indexPointers = new long[indexSize]; - - for (int i = 0; indexEnum.Next(); i++) - { - indexTerms[i] = indexEnum.Term; - indexInfos[i] = indexEnum.TermInfo(); - indexPointers[i] = indexEnum.indexPointer; - - for (int j = 1; j < indexDivisor; j++) - if (!indexEnum.Next()) - break; - } - } - finally - { - indexEnum.Close(); - } - } - else - { - // Do not load terms index: - totalIndexInterval = - 1; - indexTerms = null; - indexInfos = null; - indexPointers = null; - } - success = true; - } - finally - { - // With lock-less commits, it's entirely possible (and - // fine) to hit a FileNotFound exception above. In - // this case, we want to explicitly close any subset - // of things that were opened so that we don't have to - // wait for a GC to do so. - if (!success) - { - Dispose(); - } - } - } - - public int SkipInterval - { - get { return origEnum.skipInterval; } - } - - public int MaxSkipLevels - { - get { return origEnum.maxSkipLevels; } - } - - public void Dispose() - { - if (isDisposed) return; - - // Move to protected method if class becomes unsealed - if (origEnum != null) - origEnum.Dispose(); - threadResources.Dispose(); - - isDisposed = true; - } - - /// <summary>Returns the number of term/value pairs in the set. </summary> - internal long Size() - { - return size; - } - - private ThreadResources GetThreadResources() - { - ThreadResources resources = threadResources.Get(); - if (resources == null) - { - resources = new ThreadResources - {termEnum = Terms(), termInfoCache = new SimpleLRUCache<Term, TermInfo>(DEFAULT_CACHE_SIZE)}; - // Cache does not have to be thread-safe, it is only used by one thread at the same time - threadResources.Set(resources); - } - return resources; - } - - - /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary> - private int GetIndexOffset(Term term) - { - int lo = 0; // binary search indexTerms[] - int hi = indexTerms.Length - 1; - - while (hi >= lo) - { - int mid = Number.URShift((lo + hi), 1); - int delta = term.CompareTo(indexTerms[mid]); - if (delta < 0) - hi = mid - 1; - else if (delta > 0) - lo = mid + 1; - else - return mid; - } - return hi; - } - - private void SeekEnum(SegmentTermEnum enumerator, int indexOffset) - { - enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); - } - - /// <summary>Returns the TermInfo for a Term in the set, or null. </summary> - internal TermInfo Get(Term term) - { - return Get(term, true); - } - - /// <summary>Returns the TermInfo for a Term in the set, or null. </summary> - private TermInfo Get(Term term, bool useCache) - { - if (size == 0) - return null; - - EnsureIndexIsRead(); - - TermInfo ti; - ThreadResources resources = GetThreadResources(); - Cache<Term, TermInfo> cache = null; - - if (useCache) - { - cache = resources.termInfoCache; - // check the cache first if the term was recently looked up - ti = cache.Get(term); - if (ti != null) - { - return ti; - } - } - - // optimize sequential access: first try scanning cached enum w/o seeking - SegmentTermEnum enumerator = resources.termEnum; - if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0)) - { - int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1; - if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) - { - // no need to seek - - int numScans = enumerator.ScanTo(term); - if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0) - { - ti = enumerator.TermInfo(); - if (cache != null && numScans > 1) - { - // we only want to put this TermInfo into the cache if - // scanEnum skipped more than one dictionary entry. - // This prevents RangeQueries or WildcardQueries to - // wipe out the cache when they iterate over a large numbers - // of terms in order - cache.Put(term, ti); - } - } - else - { - ti = null; - } - - return ti; - } - } - - // random-access: must seek - SeekEnum(enumerator, GetIndexOffset(term)); - enumerator.ScanTo(term); - if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0) - { - ti = enumerator.TermInfo(); - if (cache != null) - { - cache.Put(term, ti); - } - } - else - { - ti = null; - } - return ti; - } - - private void EnsureIndexIsRead() - { - if (indexTerms == null) - { - throw new SystemException("terms index was not loaded when this reader was created"); - } - } - - /// <summary>Returns the position of a Term in the set or -1. </summary> - internal long GetPosition(Term term) - { - if (size == 0) - return - 1; - - EnsureIndexIsRead(); - int indexOffset = GetIndexOffset(term); - - SegmentTermEnum enumerator = GetThreadResources().termEnum; - SeekEnum(enumerator, indexOffset); - - while (term.CompareTo(enumerator.Term) > 0 && enumerator.Next()) - { - } - - if (term.CompareTo(enumerator.Term) == 0) - return enumerator.position; - else - return - 1; - } - - /// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary> - public SegmentTermEnum Terms() - { - return (SegmentTermEnum) origEnum.Clone(); - } - - /// <summary>Returns an enumeration of terms starting at or after the named term. </summary> - public SegmentTermEnum Terms(Term term) - { - // don't use the cache in this call because we want to reposition the - // enumeration - Get(term, false); - return (SegmentTermEnum) GetThreadResources().termEnum.Clone(); - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermInfosWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermInfosWriter.cs b/src/Lucene.Net.Core/Index/TermInfosWriter.cs deleted file mode 100644 index 2d3a571..0000000 --- a/src/Lucene.Net.Core/Index/TermInfosWriter.cs +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -using Directory = Lucene.Net.Store.Directory; -using IndexOutput = Lucene.Net.Store.IndexOutput; -using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; - -namespace Lucene.Net.Index -{ - - /// <summary>This stores a monotonically increasing set of <Term, TermInfo> pairs in a - /// Directory. A TermInfos can be written once, in order. - /// </summary> - - sealed class TermInfosWriter : IDisposable - { - /// <summary>The file format version, a negative number. </summary> - public const int FORMAT = - 3; - - // Changed strings to true utf8 with length-in-bytes not - // length-in-chars - public const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = - 4; - - // NOTE: always change this if you switch to a new format! - public static readonly int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; - - private bool isDisposed; - - private FieldInfos fieldInfos; - private IndexOutput output; - private TermInfo lastTi = new TermInfo(); - private long size; - - // TODO: the default values for these two parameters should be settable from - // IndexWriter. However, once that's done, folks will start setting them to - // ridiculous values and complaining that things don't work well, as with - // mergeFactor. So, let's wait until a number of folks find that alternate - // values work better. Note that both of these values are stored in the - // segment, so that it's safe to change these w/o rebuilding all indexes. - - /// <summary>Expert: The fraction of terms in the "dictionary" which should be stored - /// in RAM. Smaller values use more memory, but make searching slightly - /// faster, while larger values use less memory and make searching slightly - /// slower. Searching is typically not dominated by dictionary lookup, so - /// tweaking this is rarely useful. - /// </summary> - internal int indexInterval = 128; - - /// <summary>Expert: The fraction of <see cref="TermDocs" /> entries stored in skip tables, - /// used to accellerate <see cref="TermDocs.SkipTo(int)" />. Larger values result in - /// smaller indexes, greater acceleration, but fewer accelerable cases, while - /// smaller values result in bigger indexes, less acceleration and more - /// accelerable cases. More detailed experiments would be useful here. - /// </summary> - internal int skipInterval = 16; - - /// <summary>Expert: The maximum number of skip levels. Smaller values result in - /// slightly smaller indexes, but slower skipping in big posting lists. - /// </summary> - internal int maxSkipLevels = 10; - - private long lastIndexPointer; - private bool isIndex; - private byte[] lastTermBytes = new byte[10]; - private int lastTermBytesLength = 0; - private int lastFieldNumber = - 1; - - private TermInfosWriter other; - private UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); - - internal TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval) - { - Initialize(directory, segment, fis, interval, false); - other = new TermInfosWriter(directory, segment, fis, interval, true); - other.other = this; - } - - private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex) - { - Initialize(directory, segment, fis, interval, isIndex); - } - - private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) - { - indexInterval = interval; - fieldInfos = fis; - isIndex = isi; - output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); - output.WriteInt(FORMAT_CURRENT); // write format - output.WriteLong(0); // leave space for size - output.WriteInt(indexInterval); // write indexInterval - output.WriteInt(skipInterval); // write skipInterval - output.WriteInt(maxSkipLevels); // write maxSkipLevels - System.Diagnostics.Debug.Assert(InitUTF16Results()); - } - - internal void Add(Term term, TermInfo ti) - { - UnicodeUtil.UTF16toUTF8(term.Text, 0, term.Text.Length, utf8Result); - Add(fieldInfos.FieldNumber(term.Field), utf8Result.result, utf8Result.length, ti); - } - - // Currently used only by assert statements - internal UnicodeUtil.UTF16Result utf16Result1; - internal UnicodeUtil.UTF16Result utf16Result2; - - // Currently used only by assert statements - private bool InitUTF16Results() - { - utf16Result1 = new UnicodeUtil.UTF16Result(); - utf16Result2 = new UnicodeUtil.UTF16Result(); - return true; - } - - // Currently used only by assert statement - private int CompareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) - { - - if (lastFieldNumber != fieldNumber) - { - int cmp = String.CompareOrdinal(fieldInfos.FieldName(lastFieldNumber), fieldInfos.FieldName(fieldNumber)); - // If there is a field named "" (empty string) then we - // will get 0 on this comparison, yet, it's "OK". But - // it's not OK if two different field numbers map to - // the same name. - if (cmp != 0 || lastFieldNumber != - 1) - return cmp; - } - - UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1); - UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2); - int len; - if (utf16Result1.length < utf16Result2.length) - len = utf16Result1.length; - else - len = utf16Result2.length; - - for (int i = 0; i < len; i++) - { - char ch1 = utf16Result1.result[i]; - char ch2 = utf16Result2.result[i]; - if (ch1 != ch2) - return ch1 - ch2; - } - return utf16Result1.length - utf16Result2.length; - } - - /// <summary>Adds a new <fieldNumber, termBytes>, TermInfo> pair to the set. - /// Term must be lexicographically greater than all previous Terms added. - /// TermInfo pointers must be positive and greater than all previous. - /// </summary> - internal void Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti) - { - - System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || - (isIndex && termBytesLength == 0 && lastTermBytesLength == 0), - "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + " (number " + fieldNumber + ")" + - " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + - " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength)); - - System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"); - System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"); - - if (!isIndex && size % indexInterval == 0) - other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term - - WriteTerm(fieldNumber, termBytes, termBytesLength); // write term - - output.WriteVInt(ti.docFreq); // write doc freq - output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers - output.WriteVLong(ti.proxPointer - lastTi.proxPointer); - - if (ti.docFreq >= skipInterval) - { - output.WriteVInt(ti.skipOffset); - } - - if (isIndex) - { - output.WriteVLong(other.output.FilePointer - lastIndexPointer); - lastIndexPointer = other.output.FilePointer; // write pointer - } - - lastFieldNumber = fieldNumber; - lastTi.Set(ti); - size++; - } - - private void WriteTerm(int fieldNumber, byte[] termBytes, int termBytesLength) - { - - // TODO: UTF16toUTF8 could tell us this prefix - // Compute prefix in common with last term: - int start = 0; - int limit = termBytesLength < lastTermBytesLength?termBytesLength:lastTermBytesLength; - while (start < limit) - { - if (termBytes[start] != lastTermBytes[start]) - break; - start++; - } - - int length = termBytesLength - start; - output.WriteVInt(start); // write shared prefix length - output.WriteVInt(length); // write delta length - output.WriteBytes(termBytes, start, length); // write delta bytes - output.WriteVInt(fieldNumber); // write field num - if (lastTermBytes.Length < termBytesLength) - { - byte[] newArray = new byte[(int) (termBytesLength * 1.5)]; - Array.Copy(lastTermBytes, 0, newArray, 0, start); - lastTermBytes = newArray; - } - Array.Copy(termBytes, start, lastTermBytes, start, length); - lastTermBytesLength = termBytesLength; - } - - /// <summary>Called to complete TermInfos creation. </summary> - public void Dispose() - { - // Move to protected method if class becomes unsealed - if (isDisposed) return; - - output.Seek(4); // write size after format - output.WriteLong(size); - output.Dispose(); - - if (!isIndex) - other.Dispose(); - - isDisposed = true; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermPositionVector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermPositionVector.cs b/src/Lucene.Net.Core/Index/TermPositionVector.cs deleted file mode 100644 index 490c7b4..0000000 --- a/src/Lucene.Net.Core/Index/TermPositionVector.cs +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary>Extends <c>TermFreqVector</c> to provide additional information about - /// positions in which each of the terms is found. A TermPositionVector not necessarily - /// contains both positions and offsets, but at least one of these arrays exists. - /// </summary> - public interface TermPositionVector:ITermFreqVector - { - - /// <summary>Returns an array of positions in which the term is found. - /// Terms are identified by the index at which its number appears in the - /// term String array obtained from the <c>indexOf</c> method. - /// May return null if positions have not been stored. - /// </summary> - int[] GetTermPositions(int index); - - /// <summary> Returns an array of TermVectorOffsetInfo in which the term is found. - /// May return null if offsets have not been stored. - /// - /// </summary> - /// <seealso cref="Lucene.Net.Analysis.Token"> - /// - /// </seealso> - /// <param name="index">The position in the array to get the offsets from - /// </param> - /// <returns> An array of TermVectorOffsetInfo objects or the empty list - /// </returns> - TermVectorOffsetInfo[] GetOffsets(int index); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermPositions.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermPositions.cs b/src/Lucene.Net.Core/Index/TermPositions.cs deleted file mode 100644 index 0d3c42b..0000000 --- a/src/Lucene.Net.Core/Index/TermPositions.cs +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary> TermPositions provides an interface for enumerating the <document, - /// frequency, <position>* > tuples for a term. <p/> The document and - /// frequency are the same as for a TermDocs. The positions portion lists the ordinal - /// positions of each occurrence of a term in a document. - /// - /// </summary> - /// <seealso cref="IndexReader.TermPositions()"> - /// </seealso> - - public interface TermPositions : TermDocs - { - /// <summary>Returns next position in the current document. It is an error to call - /// this more than <see cref="TermDocs.Freq()" /> times - /// without calling <see cref="TermDocs.Next()" /><p/> This is - /// invalid until <see cref="TermDocs.Next()" /> is called for - /// the first time. - /// </summary> - int NextPosition(); - - /// <summary> Returns the length of the payload at the current term position. - /// This is invalid until <see cref="NextPosition()" /> is called for - /// the first time.<br/> - /// </summary> - /// <value> length of the current payload in number of bytes </value> - int PayloadLength { get; } - - /// <summary> Returns the payload data at the current term position. - /// This is invalid until <see cref="NextPosition()" /> is called for - /// the first time. - /// This method must not be called more than once after each call - /// of <see cref="NextPosition()" />. However, payloads are loaded lazily, - /// so if the payload data for the current position is not needed, - /// this method may not be called at all for performance reasons.<br/> - /// - /// </summary> - /// <param name="data">the array into which the data of this payload is to be - /// stored, if it is big enough; otherwise, a new byte[] array - /// is allocated for this purpose. - /// </param> - /// <param name="offset">the offset in the array into which the data of this payload - /// is to be stored. - /// </param> - /// <returns> a byte[] array containing the data of this payload - /// </returns> - /// <throws> IOException </throws> - byte[] GetPayload(byte[] data, int offset); - - /// <summary> Checks if a payload can be loaded at this position. - /// <p/> - /// Payloads can only be loaded once per call to - /// <see cref="NextPosition()" />. - /// - /// </summary> - /// <value> true if there is a payload available at this position that can be loaded </value> - bool IsPayloadAvailable { get; } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/228b970a/src/Lucene.Net.Core/Index/TermVectorEntry.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Index/TermVectorEntry.cs b/src/Lucene.Net.Core/Index/TermVectorEntry.cs deleted file mode 100644 index 8aa2080..0000000 --- a/src/Lucene.Net.Core/Index/TermVectorEntry.cs +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; - -namespace Lucene.Net.Index -{ - - /// <summary> Convenience class for holding TermVector information.</summary> - public class TermVectorEntry - { - private System.String field; - private System.String term; - private int frequency; - private TermVectorOffsetInfo[] offsets; - private int[] positions; - - - public TermVectorEntry() - { - } - - public TermVectorEntry(System.String field, System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) - { - this.field = field; - this.term = term; - this.frequency = frequency; - this.offsets = offsets; - this.positions = positions; - } - - - public virtual string Field - { - get { return field; } - } - - public virtual int Frequency - { - get { return frequency; } - internal set { this.frequency = value; } - } - - internal virtual void SetOffsets(TermVectorOffsetInfo[] value) - { - offsets = value; - } - - public virtual TermVectorOffsetInfo[] GetOffsets() - { - return offsets; - } - - internal virtual void SetPositions(int[] value) - { - positions = value; - } - - public virtual int[] GetPositions() - { - return positions; - } - - public virtual string Term - { - get { return term; } - } - - public override bool Equals(System.Object o) - { - if (this == o) - return true; - if (o == null || GetType() != o.GetType()) - return false; - - TermVectorEntry that = (TermVectorEntry) o; - - if (term != null?!term.Equals(that.term):that.term != null) - return false; - - return true; - } - - public override int GetHashCode() - { - return (term != null?term.GetHashCode():0); - } - - public override System.String ToString() - { - return "TermVectorEntry{" + "field='" + field + '\'' + ", term='" + term + '\'' + ", frequency=" + frequency + '}'; - } - } -} \ No newline at end of file
