http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Memory/MemoryIndex.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Memory/MemoryIndex.cs b/src/contrib/Memory/MemoryIndex.cs deleted file mode 100644 index 41c18fb..0000000 --- a/src/contrib/Memory/MemoryIndex.cs +++ /dev/null @@ -1,1138 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text; -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Tokenattributes; -using Lucene.Net.Documents; -using Lucene.Net.Search; -using Lucene.Net.Support; - -namespace Lucene.Net.Index.Memory -{ - /// <summary> - /// High-performance single-document main memory Apache Lucene fulltext search index. - /// - /// <h4>Overview</h4> - /// - /// This class is a replacement/substitute for a large subset of - /// {@link RAMDirectory} functionality. It is designed to - /// enable maximum efficiency for on-the-fly matchmaking combining structured and - /// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML - /// message queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and - /// distribution systems, application level routers, firewalls, classifiers, etc. - /// Rather than targeting fulltext search of infrequent queries over huge persistent - /// data archives (historic search), this class targets fulltext search of huge - /// numbers of queries over comparatively small transient realtime data (prospective - /// search). - /// For example as in - /// <pre> - /// float score = search(String text, Query query) - /// </pre> - /// <p/> - /// Each instance can hold at most one Lucene "document", with a document containing - /// zero or more "fields", each field having a name and a fulltext value. The - /// fulltext value is tokenized (split and transformed) into zero or more index terms - /// (aka words) on <c>addField()</c>, according to the policy implemented by an - /// Analyzer. For example, Lucene analyzers can split on whitespace, normalize to lower case - /// for case insensitivity, ignore common terms with little discriminatory value such as "he", "in", "and" (stop - /// words), reduce the terms to their natural linguistic root form such as "fishing" - /// being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri - /// (upon indexing and/or querying), etc. For details, see - /// <a target="_blank" href="http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html">Lucene Analyzer Intro</a>. - /// <p/> - /// Arbitrary Lucene queries can be run against this class - see <a target="_blank" - /// href="../../../../../../../queryparsersyntax.html">Lucene Query Syntax</a> - /// as well as <a target="_blank" - /// href="http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html">Query Parser Rules</a>. - /// Note that a Lucene query selects on the field names and associated (indexed) - /// tokenized terms, not on the original fulltext(s) - the latter are not stored - /// but rather thrown away immediately after tokenization. - /// <p/> - /// For some interesting background information on search technology, see Bob Wyman's - /// <a target="_blank" - /// href="http://bobwyman.pubsub.com/main/2005/05/mary_hodder_poi.html">Prospective Search</a>, - /// Jim Gray's - /// <a target="_blank" href="http://www.acmqueue.org/modules.php?name=Content&pa=showpage&pid=293&page=4"> - /// A Call to Arms - Custom subscriptions</a>, and Tim Bray's - /// <a target="_blank" - /// href="http://www.tbray.org/ongoing/When/200x/2003/07/30/OnSearchTOC">On Search, the Series</a>. - /// - /// - /// <h4>Example Usage</h4> - /// - /// <pre> - /// Analyzer analyzer = PatternAnalyzer.DEFAULT_ANALYZER; - /// //Analyzer analyzer = new SimpleAnalyzer(); - /// MemoryIndex index = new MemoryIndex(); - /// index.addField("content", "Readings about Salmons and other select Alaska fishing Manuals", analyzer); - /// index.addField("author", "Tales of James", analyzer); - /// QueryParser parser = new QueryParser("content", analyzer); - /// float score = index.search(parser.parse("+author:james +salmon~ +fish/// manual~")); - /// if (score > 0.0f) { - /// System.out.println("it's a match"); - /// } else { - /// System.out.println("no match found"); - /// } - /// System.out.println("indexData=" + index.toString()); - /// </pre> - /// - /// - /// <h4>Example XQuery Usage</h4> - /// - /// <pre> - /// (: An XQuery that finds all books authored by James that have something to do with "salmon fishing manuals", sorted by relevance :) - /// declare namespace lucene = "java:nux.xom.pool.FullTextUtil"; - /// declare variable $query := "+salmon~ +fish/// manual~"; (: any arbitrary Lucene query can go here :) - /// - /// for $book in /books/book[author="James" and lucene:match(abstract, $query) > 0.0] - /// let $score := lucene:match($book/abstract, $query) - /// order by $score descending - /// return $book - /// </pre> - /// - /// - /// <h4>No thread safety guarantees</h4> - /// - /// An instance can be queried multiple times with the same or different queries, - /// but an instance is not thread-safe. If desired use idioms such as: - /// <pre> - /// MemoryIndex index = ... - /// synchronized (index) { - /// // read and/or write index (i.e. add fields and/or query) - /// } - /// </pre> - /// - /// - /// <h4>Performance Notes</h4> - /// - /// Internally there's a new data structure geared towards efficient indexing - /// and searching, plus the necessary support code to seamlessly plug into the Lucene - /// framework. - /// <p/> - /// This class performs very well for very small texts (e.g. 10 chars) - /// as well as for large texts (e.g. 10 MB) and everything in between. - /// Typically, it is about 10-100 times faster than <c>RAMDirectory</c>. - /// Note that <c>RAMDirectory</c> has particularly - /// large efficiency overheads for small to medium sized texts, both in time and space. - /// Indexing a field with N tokens takes O(N) in the best case, and O(N logN) in the worst - /// case. Memory consumption is probably larger than for <c>RAMDirectory</c>. - /// <p/> - /// Example throughput of many simple term queries over a single MemoryIndex: - /// ~500000 queries/sec on a MacBook Pro, jdk 1.5.0_06, server VM. - /// As always, your mileage may vary. - /// <p/> - /// If you're curious about - /// the whereabouts of bottlenecks, run java 1.5 with the non-perturbing '-server - /// -agentlib:hprof=cpu=samples,depth=10' flags, then study the trace log and - /// correlate its hotspot trailer with its call stack headers (see <a - /// target="_blank" href="http://java.sun.com/developer/technicalArticles/Programming/HPROF.html"> - /// hprof tracing </a>). - /// - ///</summary> - [Serializable] - public partial class MemoryIndex - { - /* info for each field: Map<String fieldName, Info field> */ - private HashMap<String, Info> fields = new HashMap<String, Info>(); - - /* fields sorted ascending by fieldName; lazily computed on demand */ - [NonSerialized] private KeyValuePair<String, Info>[] sortedFields; - - /* pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */ - private int stride; - - /* Could be made configurable; See {@link Document#setBoost(float)} */ - private static float docBoost = 1.0f; - - private static long serialVersionUID = 2782195016849084649L; - - private static bool DEBUG = false; - - /* - * Constructs an empty instance. - */ - public MemoryIndex() - : this(false) - { - } - - /* - * Constructs an empty instance that can optionally store the start and end - * character offset of each token term in the text. This can be useful for - * highlighting of hit locations with the Lucene highlighter package. - * Private until the highlighter package matures, so that this can actually - * be meaningfully integrated. - * - * @param storeOffsets - * whether or not to store the start and end character offset of - * each token term in the text - */ - - private MemoryIndex(bool storeOffsets) - { - this.stride = storeOffsets ? 3 : 1; - } - - /* - * Convenience method; Tokenizes the given field text and adds the resulting - * terms to the index; Equivalent to adding an indexed non-keyword Lucene - * {@link org.apache.lucene.document.Field} that is - * {@link org.apache.lucene.document.Field.Index#ANALYZED tokenized}, - * {@link org.apache.lucene.document.Field.Store#NO not stored}, - * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions} (or - * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions and offsets}), - * - * @param fieldName - * a name to be associated with the text - * @param text - * the text to tokenize and index. - * @param analyzer - * the analyzer to use for tokenization - */ - - public void AddField(String fieldName, String text, Analyzer analyzer) - { - if (fieldName == null) - throw new ArgumentException("fieldName must not be null"); - if (text == null) - throw new ArgumentException("text must not be null"); - if (analyzer == null) - throw new ArgumentException("analyzer must not be null"); - - TokenStream stream = analyzer.TokenStream(fieldName, new StringReader(text)); - - AddField(fieldName, stream); - } - - /* - * Convenience method; Creates and returns a token stream that generates a - * token for each keyword in the given collection, "as is", without any - * transforming text analysis. The resulting token stream can be fed into - * {@link #addField(String, TokenStream)}, perhaps wrapped into another - * {@link org.apache.lucene.analysis.TokenFilter}, as desired. - * - * @param keywords - * the keywords to generate tokens for - * @return the corresponding token stream - */ - - public TokenStream CreateKeywordTokenStream<T>(ICollection<T> keywords) - { - // TODO: deprecate & move this method into AnalyzerUtil? - if (keywords == null) - throw new ArgumentException("keywords must not be null"); - - return new KeywordTokenStream<T>(keywords); - } - - /* - * Equivalent to <c>addField(fieldName, stream, 1.0f)</c>. - * - * @param fieldName - * a name to be associated with the text - * @param stream - * the token stream to retrieve tokens from - */ - public void AddField(String fieldName, TokenStream stream) - { - AddField(fieldName, stream, 1.0f); - } - - /* - * Iterates over the given token stream and adds the resulting terms to the index; - * Equivalent to adding a tokenized, indexed, termVectorStored, unstored, - * Lucene {@link org.apache.lucene.document.Field}. - * Finally closes the token stream. Note that untokenized keywords can be added with this method via - * {@link #CreateKeywordTokenStream(Collection)}, the Lucene contrib <c>KeywordTokenizer</c> or similar utilities. - * - * @param fieldName - * a name to be associated with the text - * @param stream - * the token stream to retrieve tokens from. - * @param boost - * the boost factor for hits for this field - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public void AddField(String fieldName, TokenStream stream, float boost) - { - try - { - if (fieldName == null) - throw new ArgumentException("fieldName must not be null"); - if (stream == null) - throw new ArgumentException("token stream must not be null"); - if (boost <= 0.0f) - throw new ArgumentException("boost factor must be greater than 0.0"); - if (fields[fieldName] != null) - throw new ArgumentException("field must not be added more than once"); - - var terms = new HashMap<String, ArrayIntList>(); - int numTokens = 0; - int numOverlapTokens = 0; - int pos = -1; - - var termAtt = stream.AddAttribute<ITermAttribute>(); - var posIncrAttribute = stream.AddAttribute<IPositionIncrementAttribute>(); - var offsetAtt = stream.AddAttribute<IOffsetAttribute>(); - - stream.Reset(); - while (stream.IncrementToken()) - { - String term = termAtt.Term; - if (term.Length == 0) continue; // nothing to do - // if (DEBUG) System.Diagnostics.Debug.WriteLine("token='" + term + "'"); - numTokens++; - int posIncr = posIncrAttribute.PositionIncrement; - if (posIncr == 0) - numOverlapTokens++; - pos += posIncr; - - ArrayIntList positions = terms[term]; - if (positions == null) - { - // term not seen before - positions = new ArrayIntList(stride); - terms[term] = positions; - } - if (stride == 1) - { - positions.Add(pos); - } - else - { - positions.Add(pos, offsetAtt.StartOffset, offsetAtt.EndOffset); - } - } - stream.End(); - - // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() - if (numTokens > 0) - { - boost = boost*docBoost; // see DocumentWriter.addDocument(...) - fields[fieldName] = new Info(terms, numTokens, numOverlapTokens, boost); - sortedFields = null; // invalidate sorted view, if any - } - } - catch (IOException e) - { - // can never happen - throw new SystemException(string.Empty, e); - } - finally - { - try - { - if (stream != null) stream.Close(); - } - catch (IOException e2) - { - throw new SystemException(string.Empty, e2); - } - } - } - - /* - * Creates and returns a searcher that can be used to execute arbitrary - * Lucene queries and to collect the resulting query results as hits. - * - * @return a searcher - */ - - public IndexSearcher CreateSearcher() - { - MemoryIndexReader reader = new MemoryIndexReader(this); - IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !! - reader.SetSearcher(searcher); // to later get hold of searcher.getSimilarity() - return searcher; - } - - /* - * Convenience method that efficiently returns the relevance score by - * matching this index against the given Lucene query expression. - * - * @param query - * an arbitrary Lucene query to run against this index - * @return the relevance score of the matchmaking; A number in the range - * [0.0 .. 1.0], with 0.0 indicating no match. The higher the number - * the better the match. - * - */ - - public float Search(Query query) - { - if (query == null) - throw new ArgumentException("query must not be null"); - - Searcher searcher = CreateSearcher(); - try - { - float[] scores = new float[1]; // inits to 0.0f (no match) - searcher.Search(query, new FillingCollector(scores)); - float score = scores[0]; - return score; - } - catch (IOException e) - { - // can never happen (RAMDirectory) - throw new SystemException(string.Empty, e); - } - finally - { - // searcher.close(); - /* - * Note that it is harmless and important for good performance to - * NOT close the index reader!!! This avoids all sorts of - * unnecessary baggage and locking in the Lucene IndexReader - * superclass, all of which is completely unnecessary for this main - * memory index data structure without thread-safety claims. - * - * Wishing IndexReader would be an interface... - * - * Actually with the new tight createSearcher() API auto-closing is now - * made impossible, hence searcher.close() would be harmless and also - * would not degrade performance... - */ - } - } - - /* - * Returns a reasonable approximation of the main memory [bytes] consumed by - * this instance. Useful for smart memory sensititive caches/pools. Assumes - * fieldNames are interned, whereas tokenized terms are memory-overlaid. - * - * @return the main memory consumption - */ - public int GetMemorySize() - { - // for example usage in a smart cache see nux.xom.pool.Pool - int PTR = VM.PTR; - int INT = VM.INT; - int size = 0; - size += VM.SizeOfObject(2*PTR + INT); // memory index - if (sortedFields != null) size += VM.SizeOfObjectArray(sortedFields.Length); - - size += VM.SizeOfHashMap(fields.Count); - foreach (var entry in fields) - { - // for each Field Info - Info info = entry.Value; - size += VM.SizeOfObject(2*INT + 3*PTR); // Info instance vars - if (info.SortedTerms != null) size += VM.SizeOfObjectArray(info.SortedTerms.Length); - - int len = info.Terms.Count; - size += VM.SizeOfHashMap(len); - - var iter2 = info.Terms.GetEnumerator(); - while (--len >= 0) - { - iter2.MoveNext(); - // for each term - KeyValuePair<String, ArrayIntList> e = iter2.Current; - size += VM.SizeOfObject(PTR + 3*INT); // assumes substring() memory overlay -// size += STR + 2 * ((String) e.getKey()).length(); - ArrayIntList positions = e.Value; - size += VM.SizeOfArrayIntList(positions.Size()); - } - } - return size; - } - - private int NumPositions(ArrayIntList positions) - { - return positions.Size()/stride; - } - - /* sorts into ascending order (on demand), reusing memory along the way */ - - private void SortFields() - { - if (sortedFields == null) sortedFields = Sort(fields); - } - - /* returns a view of the given map's entries, sorted ascending by key */ - - private static KeyValuePair<TKey, TValue>[] Sort<TKey, TValue>(HashMap<TKey, TValue> map) - where TKey : class, IComparable<TKey> - { - int size = map.Count; - - var entries = map.ToArray(); - - if (size > 1) Array.Sort(entries, TermComparer.KeyComparer); - return entries; - } - - /* - * Returns a String representation of the index data for debugging purposes. - * - * @return the string representation - */ - - public override String ToString() - { - StringBuilder result = new StringBuilder(256); - SortFields(); - int sumChars = 0; - int sumPositions = 0; - int sumTerms = 0; - - for (int i = 0; i < sortedFields.Length; i++) - { - KeyValuePair<String, Info> entry = sortedFields[i]; - String fieldName = entry.Key; - Info info = entry.Value; - info.SortTerms(); - result.Append(fieldName + ":\n"); - - int numChars = 0; - int numPos = 0; - for (int j = 0; j < info.SortedTerms.Length; j++) - { - KeyValuePair<String, ArrayIntList> e = info.SortedTerms[j]; - String term = e.Key; - ArrayIntList positions = e.Value; - result.Append("\t'" + term + "':" + NumPositions(positions) + ":"); - result.Append(positions.ToString(stride)); // ignore offsets - result.Append("\n"); - numPos += NumPositions(positions); - numChars += term.Length; - } - - result.Append("\tterms=" + info.SortedTerms.Length); - result.Append(", positions=" + numPos); - result.Append(", Kchars=" + (numChars/1000.0f)); - result.Append("\n"); - sumPositions += numPos; - sumChars += numChars; - sumTerms += info.SortedTerms.Length; - } - - result.Append("\nfields=" + sortedFields.Length); - result.Append(", terms=" + sumTerms); - result.Append(", positions=" + sumPositions); - result.Append(", Kchars=" + (sumChars/1000.0f)); - return result.ToString(); - } - - - /////////////////////////////////////////////////////////////////////////////// - // Nested classes: - /////////////////////////////////////////////////////////////////////////////// - /* - * Index data structure for a field; Contains the tokenized term texts and - * their positions. - */ - - [Serializable] - private sealed class Info - { - public static readonly IComparer<KeyValuePair<string, Info>> InfoComparer = new TermComparer<Info>(); - public static readonly IComparer<KeyValuePair<string, ArrayIntList>> ArrayIntListComparer = new TermComparer<ArrayIntList>(); - /* - * Term strings and their positions for this field: Map <String - * termText, ArrayIntList positions> - */ - private HashMap<String, ArrayIntList> terms; - - /* Terms sorted ascending by term text; computed on demand */ - [NonSerialized] private KeyValuePair<String, ArrayIntList>[] sortedTerms; - - /* Number of added tokens for this field */ - private int numTokens; - - /* Number of overlapping tokens for this field */ - private int numOverlapTokens; - - /* Boost factor for hits for this field */ - private float boost; - - /* Term for this field's fieldName, lazily computed on demand */ - [NonSerialized] public Term template; - - private static long serialVersionUID = 2882195016849084649L; - - public Info(HashMap<String, ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) - { - this.terms = terms; - this.numTokens = numTokens; - this.NumOverlapTokens = numOverlapTokens; - this.boost = boost; - } - - public HashMap<string, ArrayIntList> Terms - { - get { return terms; } - } - - public int NumTokens - { - get { return numTokens; } - } - - public int NumOverlapTokens - { - get { return numOverlapTokens; } - set { numOverlapTokens = value; } - } - - public float Boost - { - get { return boost; } - } - - public KeyValuePair<string, ArrayIntList>[] SortedTerms - { - get { return sortedTerms; } - } - - /* - * Sorts hashed terms into ascending order, reusing memory along the - * way. Note that sorting is lazily delayed until required (often it's - * not required at all). If a sorted view is required then hashing + - * sort + binary search is still faster and smaller than TreeMap usage - * (which would be an alternative and somewhat more elegant approach, - * apart from more sophisticated Tries / prefix trees). - */ - - public void SortTerms() - { - if (SortedTerms == null) sortedTerms = Sort(Terms); - } - - /* note that the frequency can be calculated as numPosition(getPositions(x)) */ - - public ArrayIntList GetPositions(String term) - { - return Terms[term]; - } - - /* note that the frequency can be calculated as numPosition(getPositions(x)) */ - - public ArrayIntList GetPositions(int pos) - { - return SortedTerms[pos].Value; - } - } - - - /////////////////////////////////////////////////////////////////////////////// - // Nested classes: - /////////////////////////////////////////////////////////////////////////////// - /* - * Efficient resizable auto-expanding list holding <c>int</c> elements; - * implemented with arrays. - */ - - [Serializable] - private sealed class ArrayIntList - { - - private int[] elements; - private int size = 0; - - private static long serialVersionUID = 2282195016849084649L; - - private ArrayIntList() - : this(10) - { - - } - - public ArrayIntList(int initialCapacity) - { - elements = new int[initialCapacity]; - } - - public void Add(int elem) - { - if (size == elements.Length) EnsureCapacity(size + 1); - elements[size++] = elem; - } - - public void Add(int pos, int start, int end) - { - if (size + 3 > elements.Length) EnsureCapacity(size + 3); - elements[size] = pos; - elements[size + 1] = start; - elements[size + 2] = end; - size += 3; - } - - public int Get(int index) - { - if (index >= size) ThrowIndex(index); - return elements[index]; - } - - public int Size() - { - return size; - } - - public int[] ToArray(int stride) - { - int[] arr = new int[Size()/stride]; - if (stride == 1) - { - Array.Copy(elements, 0, arr, 0, size); - } - else - { - for (int i = 0, j = 0; j < size; i++, j += stride) arr[i] = elements[j]; - } - return arr; - } - - private void EnsureCapacity(int minCapacity) - { - int newCapacity = Math.Max(minCapacity, (elements.Length*3)/2 + 1); - int[] newElements = new int[newCapacity]; - Array.Copy(elements, 0, newElements, 0, size); - elements = newElements; - } - - private void ThrowIndex(int index) - { - throw new IndexOutOfRangeException("index: " + index - + ", size: " + size); - } - - /* returns the first few positions (without offsets); debug only */ - - public string ToString(int stride) - { - int s = Size()/stride; - int len = Math.Min(10, s); // avoid printing huge lists - StringBuilder buf = new StringBuilder(4*len); - buf.Append("["); - for (int i = 0; i < len; i++) - { - buf.Append(Get(i*stride)); - if (i < len - 1) buf.Append(", "); - } - if (len != s) buf.Append(", ..."); // and some more... - buf.Append("]"); - return buf.ToString(); - } - } - - - /////////////////////////////////////////////////////////////////////////////// - // Nested classes: - /////////////////////////////////////////////////////////////////////////////// - private static readonly Term MATCH_ALL_TERM = new Term(""); - - /* - * Search support for Lucene framework integration; implements all methods - * required by the Lucene IndexReader contracts. - */ - - private sealed partial class MemoryIndexReader : IndexReader - { - private readonly MemoryIndex _index; - - private Searcher searcher; // needed to find searcher.getSimilarity() - - internal MemoryIndexReader(MemoryIndex index) - { - _index = index; - } - - private Info GetInfo(String fieldName) - { - return _index.fields[fieldName]; - } - - private Info GetInfo(int pos) - { - return _index.sortedFields[pos].Value; - } - - public override int DocFreq(Term term) - { - Info info = GetInfo(term.Field); - int freq = 0; - if (info != null) freq = info.GetPositions(term.Text) != null ? 1 : 0; - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.docFreq: " + term + ", freq:" + freq); - return freq; - } - - public override TermEnum Terms() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.terms()"); - return Terms(MATCH_ALL_TERM); - } - - public override TermEnum Terms(Term term) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.terms: " + term); - - int i; // index into info.sortedTerms - int j; // index into sortedFields - - _index.SortFields(); - if (_index.sortedFields.Length == 1 && _index.sortedFields[0].Key == term.Field) - { - j = 0; // fast path - } - else - { - j = Array.BinarySearch(_index.sortedFields, new KeyValuePair<string, Info>(term.Field, null), Info.InfoComparer); - } - - if (j < 0) - { - // not found; choose successor - j = -j - 1; - i = 0; - if (j < _index.sortedFields.Length) GetInfo(j).SortTerms(); - } - else - { - // found - Info info = GetInfo(j); - info.SortTerms(); - i = Array.BinarySearch(info.SortedTerms, new KeyValuePair<string, ArrayIntList>(term.Text, null), Info.ArrayIntListComparer); - if (i < 0) - { - // not found; choose successor - i = -i - 1; - if (i >= info.SortedTerms.Length) - { - // move to next successor - j++; - i = 0; - if (j < _index.sortedFields.Length) GetInfo(j).SortTerms(); - } - } - } - int ix = i; - int jx = j; - - return new MemoryTermEnum(_index, this, ix, jx); - } - - public override TermPositions TermPositions() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.termPositions"); - - return new MemoryTermPositions(_index, this); - } - - - public override TermDocs TermDocs() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.termDocs"); - return TermPositions(); - } - - public override ITermFreqVector[] GetTermFreqVectors(int docNumber) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getTermFreqVectors"); - // This is okay, ToArray() is as optimized as writing it by hand - return _index.fields.Keys.Select(k => GetTermFreqVector(docNumber, k)).ToArray(); - } - - public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getTermFreqVectors"); - - // if (vectors.length == 0) return null; - foreach (String fieldName in _index.fields.Keys) - { - GetTermFreqVector(docNumber, fieldName, mapper); - } - } - - public override void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getTermFreqVector"); - Info info = GetInfo(field); - if (info == null) - { - return; - } - info.SortTerms(); - mapper.SetExpectations(field, info.SortedTerms.Length, _index.stride != 1, true); - for (int i = info.SortedTerms.Length; --i >= 0;) - { - - ArrayIntList positions = info.SortedTerms[i].Value; - int size = positions.Size(); - var offsets = new TermVectorOffsetInfo[size/_index.stride]; - - for (int k = 0, j = 1; j < size; k++, j += _index.stride) - { - int start = positions.Get(j); - int end = positions.Get(j + 1); - offsets[k] = new TermVectorOffsetInfo(start, end); - } - mapper.Map(info.SortedTerms[i].Key, _index.NumPositions(info.SortedTerms[i].Value), offsets, - (info.SortedTerms[i].Value).ToArray(_index.stride)); - } - } - - public override ITermFreqVector GetTermFreqVector(int docNumber, String fieldName) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getTermFreqVector"); - Info info = GetInfo(fieldName); - if (info == null) return null; // TODO: or return empty vector impl??? - info.SortTerms(); - - return new MemoryTermPositionVector(_index, info, fieldName); - } - - private Similarity GetSimilarity() - { - if (searcher != null) return searcher.Similarity; - return Similarity.Default; - } - - internal void SetSearcher(Searcher searcher) - { - this.searcher = searcher; - } - - /* performance hack: cache norms to avoid repeated expensive calculations */ - private byte[] cachedNorms; - private String cachedFieldName; - private Similarity cachedSimilarity; - - public override byte[] Norms(String fieldName) - { - byte[] norms = cachedNorms; - Similarity sim = GetSimilarity(); - if (fieldName != cachedFieldName || sim != cachedSimilarity) - { - // not cached? - Info info = GetInfo(fieldName); - int numTokens = info != null ? info.NumTokens : 0; - int numOverlapTokens = info != null ? info.NumOverlapTokens : 0; - float boost = info != null ? info.Boost : 1.0f; - FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); - float n = sim.ComputeNorm(fieldName, invertState); - byte norm = Similarity.EncodeNorm(n); - norms = new byte[] {norm}; - - // cache it for future reuse - cachedNorms = norms; - cachedFieldName = fieldName; - cachedSimilarity = sim; - if (DEBUG) - System.Diagnostics.Debug.WriteLine("MemoryIndexReader.norms: " + fieldName + ":" + n + ":" + - norm + ":" + numTokens); - } - return norms; - } - - public override void Norms(String fieldName, byte[] bytes, int offset) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.norms*: " + fieldName); - byte[] norms = Norms(fieldName); - Buffer.BlockCopy(norms, 0, bytes, offset, norms.Length); - } - - protected override void DoSetNorm(int doc, String fieldName, byte value) - { - throw new NotSupportedException(); - } - - public override int NumDocs() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.numDocs"); - return _index.fields.Count > 0 ? 1 : 0; - } - - public override int MaxDoc - { - get - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.maxDoc"); - return 1; - } - } - - public override Document Document(int n) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.document"); - return new Document(); // there are no stored fields - } - - //When we convert to JDK 1.5 make this Set<String> - public override Document Document(int n, FieldSelector fieldSelector) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.document"); - return new Document(); // there are no stored fields - } - - public override bool IsDeleted(int n) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.isDeleted"); - return false; - } - - public override bool HasDeletions - { - get - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.hasDeletions"); - return false; - } - } - - protected override void DoDelete(int docNum) - { - throw new NotSupportedException(); - } - - protected override void DoUndeleteAll() - { - throw new NotSupportedException(); - } - - protected override void DoCommit(IDictionary<String, String> commitUserData) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.doCommit"); - - } - - protected override void DoClose() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.doClose"); - } - - // lucene >= 1.9 (remove this method for lucene-1.4.3) - public override ICollection<String> GetFieldNames(FieldOption fieldOption) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getFieldNamesOption"); - if (fieldOption == FieldOption.UNINDEXED) - return CollectionsHelper<string>.EmptyList(); - if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR) - return CollectionsHelper<string>.EmptyList(); - if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && _index.stride == 1) - return CollectionsHelper<string>.EmptyList(); - if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && _index.stride == 1) - return CollectionsHelper<string>.EmptyList(); - - return _index.fields.Keys.AsReadOnly(); - } - } - - - /////////////////////////////////////////////////////////////////////////////// - // Nested classes: - /////////////////////////////////////////////////////////////////////////////// - private static class VM - { - - public static readonly int PTR = Is64BitVM() ? 8 : 4; - - // bytes occupied by primitive data types - public static readonly int BOOLEAN = 1; - public static readonly int BYTE = 1; - public static readonly int CHAR = 2; - public static readonly int SHORT = 2; - public static readonly int INT = 4; - public static readonly int LONG = 8; - public static readonly int FLOAT = 4; - public static readonly int DOUBLE = 8; - - private static readonly int LOG_PTR = (int) Math.Round(Log2(PTR)); - - /* - * Object header of any heap allocated Java object. - * ptr to class, info for monitor, gc, hash, etc. - */ - private static readonly int OBJECT_HEADER = 2*PTR; - - // assumes n > 0 - // 64 bit VM: - // 0 --> 0*PTR - // 1..8 --> 1*PTR - // 9..16 --> 2*PTR - private static int SizeOf(int n) - { - return (((n - 1) >> LOG_PTR) + 1) << LOG_PTR; - } - - public static int SizeOfObject(int n) - { - return SizeOf(OBJECT_HEADER + n); - } - - public static int SizeOfObjectArray(int len) - { - return SizeOfObject(INT + PTR*len); - } - - public static int SizeOfCharArray(int len) - { - return SizeOfObject(INT + CHAR*len); - } - - public static int SizeOfIntArray(int len) - { - return SizeOfObject(INT + INT*len); - } - - public static int SizeOfString(int len) - { - return SizeOfObject(3*INT + PTR) + SizeOfCharArray(len); - } - - public static int SizeOfHashMap(int len) - { - return SizeOfObject(4*PTR + 4*INT) + SizeOfObjectArray(len) - + len*SizeOfObject(3*PTR + INT); // entries - } - - // note: does not include referenced objects - public static int SizeOfArrayList(int len) - { - return SizeOfObject(PTR + 2*INT) + SizeOfObjectArray(len); - } - - public static int SizeOfArrayIntList(int len) - { - return SizeOfObject(PTR + INT) + SizeOfIntArray(len); - } - - private static bool Is64BitVM() - { - return IntPtr.Size == 8; - } - - /* logarithm to the base 2. Example: log2(4) == 2, log2(8) == 3 */ - - private static double Log2(double value) - { - return Math.Log(value, 2); - //return Math.Log(value) / Math.Log(2); - } - } - - } -}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Memory/MemoryTermEnum.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Memory/MemoryTermEnum.cs b/src/contrib/Memory/MemoryTermEnum.cs deleted file mode 100644 index ba9436b..0000000 --- a/src/contrib/Memory/MemoryTermEnum.cs +++ /dev/null @@ -1,105 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Lucene.Net.Index.Memory -{ - public partial class MemoryIndex - { - private sealed partial class MemoryIndexReader - { - private class MemoryTermEnum : TermEnum - { - private readonly MemoryIndex _index; - private readonly MemoryIndexReader _reader; - private int _i; // index into info.sortedTerms - private int _j; // index into sortedFields - - public MemoryTermEnum(MemoryIndex index, MemoryIndexReader reader, int ix, int jx) - { - _index = index; - _reader = reader; - _i = ix; // index into info.sortedTerms - _j = jx; // index into sortedFields - } - - public override bool Next() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("TermEnum.next"); - if (_j >= _index.sortedFields.Length) return false; - Info info = _reader.GetInfo(_j); - if (++_i < info.SortedTerms.Length) return true; - - // move to successor - _j++; - _i = 0; - if (_j >= _index.sortedFields.Length) return false; - _reader.GetInfo(_j).SortTerms(); - return true; - } - - public override Term Term - { - get - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("TermEnum.term: " + _i); - if (_j >= _index.sortedFields.Length) return null; - Info info = _reader.GetInfo(_j); - if (_i >= info.SortedTerms.Length) return null; - // if (DEBUG) System.Diagnostics.Debug.WriteLine("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey()); - return CreateTerm(info, _j, info.SortedTerms[_i].Key); - } - } - - public override int DocFreq() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("TermEnum.docFreq"); - if (_j >= _index.sortedFields.Length) return 0; - Info info = _reader.GetInfo(_j); - if (_i >= info.SortedTerms.Length) return 0; - return _index.NumPositions(info.GetPositions(_i)); - } - - protected override void Dispose(bool disposing) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine("TermEnum.close"); - } - - private Term CreateTerm(Info info, int pos, string text) - { - // Assertion: sortFields has already been called before - Term template = info.template; - if (template == null) { // not yet cached? - String fieldName = _index.sortedFields[pos].Key; - template = new Term(fieldName); - info.template = template; - } - - return template.CreateTerm(text); - } - } - } - } -} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Memory/MemoryTermPositionVector.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Memory/MemoryTermPositionVector.cs b/src/contrib/Memory/MemoryTermPositionVector.cs deleted file mode 100644 index c4b2797..0000000 --- a/src/contrib/Memory/MemoryTermPositionVector.cs +++ /dev/null @@ -1,116 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Lucene.Net.Index.Memory -{ - public partial class MemoryIndex - { - private sealed partial class MemoryIndexReader - { - private class MemoryTermPositionVector : TermPositionVector - { - private readonly MemoryIndex _index; - private readonly string _fieldName; - private readonly KeyValuePair<String,ArrayIntList>[] sortedTerms; - - public MemoryTermPositionVector(MemoryIndex index, Info info, string fieldName) - { - _index = index; - _fieldName = fieldName; - sortedTerms = info.SortedTerms; - } - - public string Field - { - get { return _fieldName; } - } - - public int Size - { - get { return sortedTerms.Length; } - } - - public string[] GetTerms() - { - var terms = new String[sortedTerms.Length]; - for (int i = sortedTerms.Length; --i >= 0; ) - { - terms[i] = sortedTerms[i].Key; - } - return terms; - } - - public int[] GetTermFrequencies() - { - int[] freqs = new int[sortedTerms.Length]; - for (int i = sortedTerms.Length; --i >= 0; ) - { - freqs[i] = _index.NumPositions(sortedTerms[i].Value); - } - return freqs; - } - - public int IndexOf(string term) - { - int i = Array.BinarySearch(sortedTerms, new KeyValuePair<string, ArrayIntList>(term, null), Info.ArrayIntListComparer); - return i >= 0 ? i : -1; - } - - public int[] IndexesOf(string[] terms, int start, int len) - { - int[] indexes = new int[len]; - for (int i = 0; i < len; i++) - { - indexes[i] = IndexOf(terms[start++]); - } - return indexes; - } - - public int[] GetTermPositions(int index) - { - return sortedTerms[index].Value.ToArray(_index.stride); - } - - public TermVectorOffsetInfo[] GetOffsets(int index) - { - if (_index.stride == 1) return null; // no offsets stored - - ArrayIntList positions = sortedTerms[index].Value; - int size = positions.Size(); - TermVectorOffsetInfo[] offsets = new TermVectorOffsetInfo[size / _index.stride]; - - for (int i = 0, j = 1; j < size; i++, j += _index.stride) - { - int start = positions.Get(j); - int end = positions.Get(j + 1); - offsets[i] = new TermVectorOffsetInfo(start, end); - } - return offsets; - } - } - } - } -} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Memory/MemoryTermPositions.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Memory/MemoryTermPositions.cs b/src/contrib/Memory/MemoryTermPositions.cs deleted file mode 100644 index 7096379..0000000 --- a/src/contrib/Memory/MemoryTermPositions.cs +++ /dev/null @@ -1,151 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Lucene.Net.Index.Memory -{ - public partial class MemoryIndex - { - private sealed partial class MemoryIndexReader - { - private sealed class MemoryTermPositions : TermPositions - { - private readonly MemoryIndex _index; - private readonly MemoryIndexReader _reader; - private bool hasNext; - private int cursor = 0; - private ArrayIntList current; - private Term term; - - public MemoryTermPositions(MemoryIndex index, MemoryIndexReader reader) - { - _index = index; - _reader = reader; - } - - public void Seek(Term term) - { - this.term = term; - - if (DEBUG) System.Diagnostics.Debug.WriteLine(".seek: " + term); - - if (term == null) - { - hasNext = true; // term==null means match all docs - } - else - { - Info info = _reader.GetInfo(term.Field); - current = info == null ? null : info.GetPositions(term.Text); - hasNext = (current != null); - cursor = 0; - } - } - - public void Seek(TermEnum termEnum) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".seekEnum"); - Seek(termEnum.Term); - } - - public int Doc - { - get - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".doc"); - return 0; - } - } - - public int Freq - { - get - { - int freq = current != null ? _index.NumPositions(current) : (term == null ? 1 : 0); - if (DEBUG) System.Diagnostics.Debug.WriteLine(".freq: " + freq); - return freq; - } - } - - public bool Next() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".next: " + current + ", oldHasNext=" + hasNext); - bool next = hasNext; - hasNext = false; - return next; - } - - public int Read(int[] docs, int[] freqs) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".read: " + docs.Length); - if (!hasNext) return 0; - hasNext = false; - docs[0] = 0; - freqs[0] = Freq; - return 1; - } - - public bool SkipTo(int target) - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".skipTo: " + target); - return Next(); - } - - public void Close() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".close"); - } - - public void Dispose() - { - if (DEBUG) System.Diagnostics.Debug.WriteLine(".close"); - } - - public int NextPosition() - { - int pos = current.Get(cursor); - cursor += _index.stride; - if (DEBUG) System.Diagnostics.Debug.WriteLine(".nextPosition: " + pos); - return pos; - } - - public int PayloadLength - { - get { throw new NotSupportedException(); } - } - - public byte[] GetPayload(byte[] data, int offset) - { - throw new NotSupportedException(); - } - - public bool IsPayloadAvailable - { - get { return false; } - } - } - } - } -} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Memory/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Memory/Properties/AssemblyInfo.cs b/src/contrib/Memory/Properties/AssemblyInfo.cs deleted file mode 100644 index 5635333..0000000 --- a/src/contrib/Memory/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,60 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Security; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Contrib.Memory")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Contrib.Memory")] -[assembly: AssemblyCopyright("Copyright © 2012")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -[assembly: AllowPartiallyTrustedCallers] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("5d1e7f1d-ae69-4cf0-875e-64c3a5f3a53b")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Memory/TermComparer.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Memory/TermComparer.cs b/src/contrib/Memory/TermComparer.cs deleted file mode 100644 index 0f3ce55..0000000 --- a/src/contrib/Memory/TermComparer.cs +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Lucene.Net.Index.Memory -{ - class TermComparer - { - /// <summary> - /// Sorts term entries into ascending order; also works for - /// Arrays.binarySearch() and Arrays.sort() - /// </summary> - public static int KeyComparer<TKey, TValue>(KeyValuePair<TKey, TValue> x, KeyValuePair<TKey, TValue> y) - where TKey : class, IComparable<TKey> - { - if (x.Key == y.Key) return 0; - return typeof (TKey) == typeof (string) - ? string.Compare(x.Key as string, y.Key as string, StringComparison.Ordinal) - : x.Key.CompareTo(y.Key); - } - } - - sealed class TermComparer<T> : TermComparer, IComparer<KeyValuePair<string, T>> - { - public int Compare(KeyValuePair<string, T> x, KeyValuePair<string, T> y) - { - return KeyComparer(x, y); - } - } -} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Queries/BooleanFilter.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Queries/BooleanFilter.cs b/src/contrib/Queries/BooleanFilter.cs deleted file mode 100644 index de4b8fd..0000000 --- a/src/contrib/Queries/BooleanFilter.cs +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -using Lucene.Net.Index; -using Lucene.Net.Support; -using Lucene.Net.Util; - -namespace Lucene.Net.Search -{ - public class BooleanFilter : Filter - { - /// <summary> - /// The filters that are optional clauses. - /// </summary> - private List<Filter> shouldFilters = null; - - /// <summary> - /// The filters that are used for exclusion. - /// </summary> - private List<Filter> notFilters = null; - - /// <summary> - /// The filters that must be met. - /// </summary> - private List<Filter> mustFilters = null; - - /// <summary> - /// Get the iterator for a specific filter. - /// </summary> - /// <param name="filters">The list of filters</param> - /// <param name="index">The index of the iterator to get.</param> - /// <param name="reader">The reader for the index.</param> - /// <returns></returns> - private DocIdSetIterator GetDISI(List<Filter> filters, int index, IndexReader reader) - { - return filters[index].GetDocIdSet(reader).Iterator(); - } - - /// <summary> - /// Get the id set for the filter. - /// </summary> - /// <param name="reader">The reader.</param> - /// <returns>The filter set to use.</returns> - public override DocIdSet GetDocIdSet(IndexReader reader) - { - OpenBitSetDISI res = null; - - if (shouldFilters != null) - { - for (int i = 0; i < shouldFilters.Count; i++) - { - if (res == null) - { - res = new OpenBitSetDISI(GetDISI(shouldFilters, i, reader), reader.MaxDoc); - } - else - { - DocIdSet dis = shouldFilters[i].GetDocIdSet(reader); - if (dis is OpenBitSet) - { - // optimized case for OpenBitSets - res.Or((OpenBitSet)dis); - } - else - { - res.InPlaceOr(GetDISI(shouldFilters, i, reader)); - } - } - } - } - - if (notFilters != null) - { - for (int i = 0; i < notFilters.Count; i++) - { - if (res == null) - { - res = new OpenBitSetDISI(GetDISI(notFilters, i, reader), reader.MaxDoc); - res.Flip(0, reader.MaxDoc); // NOTE: may set bits on deleted docs - } - else - { - DocIdSet dis = notFilters[i].GetDocIdSet(reader); - if (dis is OpenBitSet) - { - // optimized case for OpenBitSets - res.AndNot((OpenBitSet)dis); - } - else - { - res.InPlaceNot(GetDISI(notFilters, i, reader)); - } - } - } - } - - if (mustFilters != null) - { - for (int i = 0; i < mustFilters.Count; i++) - { - if (res == null) - { - res = new OpenBitSetDISI(GetDISI(mustFilters, i, reader), reader.MaxDoc); - } - else - { - DocIdSet dis = mustFilters[i].GetDocIdSet(reader); - if (dis is OpenBitSet) - { - // optimized case for OpenBitSets - res.And((OpenBitSet)dis); - } - else - { - res.InPlaceAnd(GetDISI(mustFilters, i, reader)); - } - } - } - } - - if (res != null) - return FinalResult(res, reader.MaxDoc); - - return DocIdSet.EMPTY_DOCIDSET; - } - - /* Provide a SortedVIntList when it is definitely smaller - * than an OpenBitSet. - * @deprecated Either use CachingWrapperFilter, or - * switch to a different DocIdSet implementation yourself. - * This method will be removed in Lucene 4.0 - */ - protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs) - { - return result; - } - - /// <summary> - /// Add a filter clause. - /// </summary> - /// <param name="filterClause">The clause to add.</param> - public void Add(FilterClause filterClause) - { - if (filterClause.Occur == Occur.MUST) - { - if (mustFilters == null) - { - mustFilters = new EquatableList<Filter>(); - } - mustFilters.Add(filterClause.Filter); - } - if (filterClause.Occur == Occur.SHOULD) - { - if (shouldFilters == null) - { - shouldFilters = new EquatableList<Filter>(); - } - shouldFilters.Add(filterClause.Filter); - } - if (filterClause.Occur == Occur.MUST_NOT) - { - if (notFilters == null) - { - notFilters = new EquatableList<Filter>(); - } - notFilters.Add(filterClause.Filter); - } - } - - /// <summary> - /// Determine equality between two lists. - /// </summary> - /// <param name="filters1"></param> - /// <param name="filters2"></param> - /// <returns></returns> - private bool EqualFilters(List<Filter> filters1, List<Filter> filters2) - { - return (filters1 == filters2) || - ((filters1 != null) && filters1.Equals(filters2)); - } - - /// <summary> - /// Equality - /// </summary> - /// <param name="obj"></param> - /// <returns></returns> - public override bool Equals(Object obj) - { - if (this == obj) - return true; - - if ((obj == null) || !(obj is BooleanFilter)) - return false; - - BooleanFilter other = (BooleanFilter)obj; - return EqualFilters(notFilters, other.notFilters) - && EqualFilters(mustFilters, other.mustFilters) - && EqualFilters(shouldFilters, other.shouldFilters); - } - - /// <summary> - /// Hash code. - /// </summary> - /// <returns></returns> - public override int GetHashCode() - { - int hash = 7; - hash = 31 * (hash + this.ListHash(this.mustFilters)); - hash = 31 * (hash + this.ListHash(this.notFilters)); - hash = 31 * (hash + this.ListHash(this.shouldFilters)); - return hash; - } - - - private int ListHash(List<Filter> filters) - { - int sum = 0; - if (filters != null && filters.Count > 0) - { - for (int i = 0; i < filters.Count; i++) - { - sum += filters[i].GetHashCode(); - } - } - return sum; - } - - /// <summary> - /// String representation. - /// </summary> - /// <returns></returns> - public override String ToString() - { - StringBuilder buffer = new StringBuilder(); - buffer.Append("BooleanFilter("); - AppendFilters(shouldFilters, "", buffer); - AppendFilters(mustFilters, "+", buffer); - AppendFilters(notFilters, "-", buffer); - buffer.Append(")"); - return buffer.ToString(); - } - - /// <summary> - /// Append individual filters. - /// </summary> - /// <param name="filters"></param> - /// <param name="occurString"></param> - /// <param name="buffer"></param> - private void AppendFilters(List<Filter> filters, String occurString, StringBuilder buffer) - { - if (filters != null) - { - for (int i = 0; i < filters.Count(); i++) - { - buffer.Append(' '); - buffer.Append(occurString); - buffer.Append(filters[i].ToString()); - } - } - } - } -} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Queries/BoostingQuery.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Queries/BoostingQuery.cs b/src/contrib/Queries/BoostingQuery.cs deleted file mode 100644 index 5d57d2c..0000000 --- a/src/contrib/Queries/BoostingQuery.cs +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -using Lucene.Net.Index; -using Lucene.Net.Search; - -namespace Lucene.Net.Search -{ - /// <summary> - /// The BoostingQuery class can be used to effectively demote results that match a given query. - /// Unlike the "NOT" clause, this still selects documents that contain undesirable terms, - /// but reduces their overall score: - /// <pre> - /// Query balancedQuery = new BoostingQuery(positiveQuery, negativeQuery, 0.01f); - /// </pre> - /// In this scenario the positiveQuery contains the mandatory, desirable criteria which is used to - /// select all matching documents, and the negativeQuery contains the undesirable elements which - /// are simply used to lessen the scores. Documents that match the negativeQuery have their score - /// multiplied by the supplied "boost" parameter, so this should be less than 1 to achieve a - /// demoting effect - /// - /// This code was originally made available here: - /// <a href="http://marc.theaimsgroup.com/?l=lucene-user&m=108058407130459&w=2">mailing list</a> - /// and is documented here: <a href="http://wiki.apache.org/lucene-java/CommunityContributions">Documentation</a> - /// </summary> - public class BoostingQuery : Query - { - private float boost; // the amount to boost by - private Query match; // query to match - private Query context; // boost when matches too - - public BoostingQuery(Query match, Query context, float boost) - { - this.match = match; - this.context = (Query)context.Clone(); // clone before boost - this.boost = boost; - - this.context.Boost = 0.0f; // ignore context-only matches - } - - public override Query Rewrite(IndexReader reader) - { - BooleanQuery result = new AnonymousBooleanQuery(boost); - - result.Add(match, Occur.MUST); - result.Add(context, Occur.SHOULD); - - return result; - } - - class AnonymousBooleanQuery : BooleanQuery - { - float boost; - public AnonymousBooleanQuery(float boost) - { - this.boost = boost; - } - - public override Similarity GetSimilarity(Searcher searcher) - { - return new AnonymousDefaultSimilarity(boost); - } - } - - class AnonymousDefaultSimilarity : DefaultSimilarity - { - float boost ; - public AnonymousDefaultSimilarity(float boost) - { - this.boost = boost; - } - - public override float Coord(int overlap, int max) - { - switch (overlap) - { - - case 1: // matched only one clause - return 1.0f; // use the score as-is - - case 2: // matched both clauses - return boost; // multiply by boost - - default: - return 0.0f; - - } - } - } - - public override int GetHashCode() - { - int prime = 31; - int result = 1; - result = prime * result + BitConverter.ToInt32(BitConverter.GetBytes(boost),0); - result = prime * result + ((context == null) ? 0 : context.GetHashCode()); - result = prime * result + ((match == null) ? 0 : match.GetHashCode()); - return result; - } - - public override bool Equals(Object obj) - { - if (this == obj) - return true; - if (obj == null) - return false; - if (this.GetType() != obj.GetType()) - return false; - BoostingQuery other = (BoostingQuery)obj; - if (BitConverter.ToInt32(BitConverter.GetBytes(boost),0) != BitConverter.ToInt32(BitConverter.GetBytes(other.boost),0) ) - return false; - if (context == null) - { - if (other.context != null) - return false; - } - else if (!context.Equals(other.context)) - return false; - if (match == null) - { - if (other.match != null) - return false; - } - else if (!match.Equals(other.match)) - return false; - return true; - } - - public override String ToString(String field) - { - return match.ToString(field) + "/" + context.ToString(field); - } - } -}
