Author: alexparvulescu Date: Thu Jul 19 13:11:09 2012 New Revision: 1363321
URL: http://svn.apache.org/viewvc?rev=1363321&view=rev Log: OAK-154 Full text search index - bumped lucene up to 4.0.0 alpha, fixed compilation issues - added factory classes & minor cosmetics Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java (with props) jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java (with props) jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java (with props) Modified: jackrabbit/oak/trunk/oak-core/pom.xml jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java Modified: jackrabbit/oak/trunk/oak-core/pom.xml URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/pom.xml?rev=1363321&r1=1363320&r2=1363321&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/pom.xml (original) +++ jackrabbit/oak/trunk/oak-core/pom.xml Thu Jul 19 13:11:09 2012 @@ -162,10 +162,16 @@ <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> - <version>3.6.0</version> + <version>4.0.0-ALPHA</version> <optional>true</optional> </dependency> <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analyzers-common</artifactId> + <version>4.0.0-ALPHA</version> + <optional>true</optional> + </dependency> + <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parsers</artifactId> <version>1.1</version> Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java?rev=1363321&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java (added) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java Thu Jul 19 13:11:09 2012 @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.lucene; + +import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH; +import static org.apache.lucene.document.Field.Store.*; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; + +/** + * <code>FieldFactory</code> is a factory for <code>Field</code> instances with + * frequently used fields. + */ +public final class FieldFactory { + + /** + * Private constructor. + */ + private FieldFactory() { + } + + public static Field newPathField(String path) { + return new StringField(PATH, path, YES); + } + + public static Field newPropertyField(String name, String value) { + // TODO do we need norms info on the indexed fields ? TextField:StringField + // return new TextField(name, value, NO); + return new StringField(name, value, NO); + } +} Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java?rev=1363321&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java (added) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java Thu Jul 19 13:11:09 2012 @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.lucene; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +/** + * Defines field names that are used internally to store :path, etc in the + * search index. + */ +public final class FieldNames { + + /** + * Private constructor. + */ + private FieldNames() { + } + + /** + * Name of the field that contains the {@value} property of the node. + */ + public static final String PATH = ":path"; + + /** + * Used to select only the PATH field from the lucene documents + */ + public static final Set<String> PATH_SELECTOR = new HashSet<String>( + Arrays.asList(PATH)); + +} Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java?rev=1363321&r1=1363320&r2=1363321&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java Thu Jul 19 13:11:09 2012 @@ -16,6 +16,10 @@ */ package org.apache.jackrabbit.oak.plugins.lucene; +import static org.apache.jackrabbit.oak.plugins.lucene.FieldFactory.newPathField; +import static org.apache.jackrabbit.oak.plugins.lucene.FieldFactory.newPropertyField; +import static org.apache.jackrabbit.oak.plugins.lucene.TermFactory.newPathTerm; + import java.io.IOException; import javax.jcr.PropertyType; @@ -31,12 +35,8 @@ import org.apache.jackrabbit.oak.spi.sta import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; import org.apache.lucene.util.Version; import org.apache.tika.Tika; import org.apache.tika.exception.TikaException; @@ -45,7 +45,7 @@ public class LuceneEditor implements Com private static final Tika TIKA = new Tika(); - private static final Version VERSION = Version.LUCENE_36; + private static final Version VERSION = Version.LUCENE_40; private static final Analyzer ANALYZER = new StandardAnalyzer(VERSION); @@ -101,7 +101,7 @@ public class LuceneEditor implements Com } if (modified) { writer.updateDocument( - makePathTerm(path), + newPathTerm(path), makeDocument(path, state)); } } @@ -167,31 +167,26 @@ public class LuceneEditor implements Com private void deleteSubtree(String path, NodeState state) throws IOException { - writer.deleteDocuments(makePathTerm(path)); + writer.deleteDocuments(newPathTerm(path)); for (ChildNodeEntry entry : state.getChildNodeEntries()) { deleteSubtree(path + "/" + entry.getName(), entry.getNodeState()); } } - private Term makePathTerm(String path) { - return new Term(":path", path); - } - private Document makeDocument( String path, NodeState state) { Document document = new Document(); - document.add(new Field( - ":path", path, Store.YES, Index.NOT_ANALYZED)); + document.add(newPathField(path)); for (PropertyState property : state.getProperties()) { String pname = property.getName(); for (CoreValue value : property.getValues()) { - document.add(makeField(pname, value)); + document.add(newPropertyField(pname, parseStringValue(value))); } } return document; } - private Field makeField(String name, CoreValue value) { + private String parseStringValue(CoreValue value) { String string; if (value.getType() != PropertyType.BINARY) { string = value.getString(); @@ -204,7 +199,7 @@ public class LuceneEditor implements Com string = ""; } } - return new Field(name, string, Store.NO, Index.ANALYZED); + return string; } } Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java?rev=1363321&r1=1363320&r2=1363321&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java Thu Jul 19 13:11:09 2012 @@ -16,6 +16,10 @@ */ package org.apache.jackrabbit.oak.plugins.lucene; +import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH; +import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH_SELECTOR; +import static org.apache.jackrabbit.oak.plugins.lucene.TermFactory.newPathTerm; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -28,6 +32,7 @@ import org.apache.jackrabbit.oak.spi.Fil import org.apache.jackrabbit.oak.spi.Filter.PropertyRestriction; import org.apache.jackrabbit.oak.spi.QueryIndex; import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; @@ -70,32 +75,29 @@ public class LuceneIndex implements Quer @Override public Cursor query(Filter filter, String revisionId) { try { - Directory directory = - new OakDirectory(store, store.getRoot(), path); + Directory directory = new OakDirectory(store, store.getRoot(), path); try { - IndexReader reader = IndexReader.open(directory); + IndexReader reader = DirectoryReader.open(directory); try { IndexSearcher searcher = new IndexSearcher(reader); - try { - Collection<String> paths = new ArrayList<String>(); + Collection<String> paths = new ArrayList<String>(); - Query query = getQuery(filter); - if (query != null) { - TopDocs docs = searcher.search(query, Integer.MAX_VALUE); - for (ScoreDoc doc : docs.scoreDocs) { - String path = reader.document(doc.doc).get(":path"); - if ("".equals(path)) { - paths.add("/"); - } else if (path != null) { - paths.add(path); - } + Query query = getQuery(filter); + if (query != null) { + TopDocs docs = searcher + .search(query, Integer.MAX_VALUE); + for (ScoreDoc doc : docs.scoreDocs) { + String path = reader.document(doc.doc, + PATH_SELECTOR).get(PATH); + if ("".equals(path)) { + paths.add("/"); + } else if (path != null) { + paths.add(path); } } - - return new PathCursor(paths); - } finally { - searcher.close(); } + + return new PathCursor(paths); } finally { reader.close(); } @@ -103,7 +105,7 @@ public class LuceneIndex implements Quer directory.close(); } } catch (IOException e) { - return new PathCursor(Collections.<String>emptySet()); + return new PathCursor(Collections.<String> emptySet()); } } @@ -116,19 +118,19 @@ public class LuceneIndex implements Quer } switch (filter.getPathRestriction()) { case ALL_CHILDREN: - qs.add(new PrefixQuery(new Term(":path", path + "/"))); + qs.add(new PrefixQuery(newPathTerm(path + "/"))); break; case DIRECT_CHILDREN: - qs.add(new PrefixQuery(new Term(":path", path + "/"))); // FIXME + qs.add(new PrefixQuery(newPathTerm(path + "/"))); // FIXME break; case EXACT: - qs.add(new TermQuery(new Term(":path", path))); + qs.add(new TermQuery(newPathTerm(path))); break; case PARENT: int slash = path.lastIndexOf('/'); if (slash != -1) { String parent = path.substring(0, slash); - qs.add(new TermQuery(new Term(":path", parent))); + qs.add(new TermQuery(newPathTerm(parent))); } else { return null; // there's no parent of the root node } @@ -142,8 +144,8 @@ public class LuceneIndex implements Quer if (first .equals(last) && pr.firstIncluding && pr.lastIncluding) { qs.add(new TermQuery(new Term(name, first))); } else { - qs.add(new TermRangeQuery( - name, first, last, pr.firstIncluding, pr.lastIncluding)); + qs.add(TermRangeQuery.newStringRange(name, first, last, + pr.firstIncluding, pr.lastIncluding)); } } Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java?rev=1363321&r1=1363320&r2=1363321&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java Thu Jul 19 13:11:09 2012 @@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import javax.annotation.Nonnull; @@ -32,6 +33,7 @@ import org.apache.jackrabbit.oak.spi.sta import org.apache.jackrabbit.oak.spi.state.NodeStateBuilder; import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.NoLockFactory; @@ -60,7 +62,7 @@ class OakDirectory extends Directory { } @Nonnull - public NodeState getRoot() { + NodeState getRoot() { return rootBuilder.getNodeState(); } @@ -89,30 +91,6 @@ class OakDirectory extends Directory { } @Override - public long fileModified(String name) throws IOException { - NodeState file = getDirectory().getChildNode(name); - if (file == null) { - return 0; - } - - PropertyState property = file.getProperty("jcr:lastModified"); - if (property == null || property.isArray()) { - return 0; - } - - return property.getValue().getLong(); - } - - @Override - public void touchFile(String name) throws IOException { - NodeStateBuilder builder = directoryBuilder.getChildBuilder(name); - builder.setProperty( - "jcr:lastModified", - factory.createValue(System.currentTimeMillis())); - directory = null; - } - - @Override public void deleteFile(String name) throws IOException { directoryBuilder.removeNode(name); directory = null; @@ -132,65 +110,23 @@ class OakDirectory extends Directory { return property.getValue().length(); } + @Override - public IndexOutput createOutput(String name) throws IOException { + public IndexOutput createOutput(String name, IOContext context) + throws IOException { return new OakIndexOutput(name); } @Override - public IndexInput openInput(final String name) throws IOException { - return new IndexInput(name) { - - private final byte[] data = readFile(name); - - private int position; - - @Override - public void readBytes(byte[] b, int offset, int len) - throws IOException { - if (len < 0 || position + len > data.length) { - throw new IOException("Invalid byte range request"); - } else { - System.arraycopy(data, position, b, offset, len); - position += len; - } - } - - @Override - public byte readByte() throws IOException { - if (position >= data.length) { - throw new IOException("Invalid byte range request"); - } else { - return data[position++]; - } - } - - @Override - public void seek(long pos) throws IOException { - if (pos < 0 || pos >= data.length) { - throw new IOException("Invalid seek request"); - } else { - position = (int) pos; - } - } - - @Override - public long length() { - return data.length; - } - - @Override - public long getFilePointer() { - return position; - } - - @Override - public void close() { - // do nothing - } + public IndexInput openInput(String name, IOContext context) + throws IOException { + return new OakIndexInput(name); + } - }; + @Override + public void sync(Collection<String> names) throws IOException { + // ? } @Override @@ -314,5 +250,62 @@ class OakDirectory extends Directory { } } + private final class OakIndexInput extends IndexInput { + + private final byte[] data; + + private int position; + + public OakIndexInput(String name) throws IOException { + super(name); + this.data = readFile(name); + this.position = 0; + } + + @Override + public void readBytes(byte[] b, int offset, int len) + throws IOException { + if (len < 0 || position + len > data.length) { + throw new IOException("Invalid byte range request"); + } else { + System.arraycopy(data, position, b, offset, len); + position += len; + } + } + + @Override + public byte readByte() throws IOException { + if (position >= data.length) { + throw new IOException("Invalid byte range request"); + } else { + return data[position++]; + } + } + + @Override + public void seek(long pos) throws IOException { + if (pos < 0 || pos >= data.length) { + throw new IOException("Invalid seek request"); + } else { + position = (int) pos; + } + } + + @Override + public long length() { + return data.length; + } + + @Override + public long getFilePointer() { + return position; + } + + @Override + public void close() { + // do nothing + } + + }; } Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java?rev=1363321&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java (added) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java Thu Jul 19 13:11:09 2012 @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.lucene; + +import org.apache.lucene.index.Term; + +/** + * <code>TermFactory</code> is a factory for <code>Term</code> instances with + * frequently used field names. + */ +public final class TermFactory { + + /** + * Private constructor. + */ + private TermFactory() { + } + + /** + * Creates a Term with the given <code>path</code> value and with a field + * name {@link FieldNames#PATH}. + * + * @param path + * the path. + * @return the path term. + */ + public static Term newPathTerm(String path) { + return new Term(FieldNames.PATH, path); + } + +} Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java ------------------------------------------------------------------------------ svn:mime-type = text/plain
