Repository: vxquery
Updated Branches:
  refs/heads/master 9e4e99050 -> 303899f10


VXQUERY-198 Added Update Index Statement

Adds a metadata file for tracking status of Lucene Index
update-index uses this file to update document entries
handles file changes, deletes, and adds
Includes query test
Author: Menaka (menakaj)


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/303899f1
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/303899f1
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/303899f1

Branch: refs/heads/master
Commit: 303899f107e7314f8b264e95c347c73987d2a01b
Parents: 9e4e990
Author: Steven Glenn Jacobs <[email protected]>
Authored: Mon Jun 27 11:42:07 2016 -0700
Committer: Steven Glenn Jacobs <[email protected]>
Committed: Mon Jun 27 11:42:07 2016 -0700

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .../vxquery/functions/builtin-functions.xml     |   8 +
 .../vxquery/index/IndexDocumentBuilder.java     |   7 +-
 ...ctionFromIndexUnnestingEvaluatorFactory.java |   2 +-
 .../functions/index/IndexConstructorUtil.java   |  96 +++++--
 .../index/IndexUpdaterEvaluatorFactory.java     |  76 ++++++
 .../functions/index/updateIndex/Constants.java  |  26 ++
 .../index/updateIndex/IndexUpdater.java         | 264 +++++++++++++++++++
 .../index/updateIndex/MetaFileUtil.java         | 109 ++++++++
 .../index/updateIndex/XmlMetadata.java          |  57 ++++
 .../Indexing/updateIndex.txt                    |   0
 .../Queries/XQuery/Indexing/updateIndex.xq      |  19 ++
 .../src/test/resources/cat/IndexingQueries.xml  |   5 +
 13 files changed, 647 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 65263b3..734a174 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@
 .settings
 .classpath
 .idea
+.iml
 target
 /ClusterControllerService/

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml 
b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index adeef38..870ab75 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
@@ -135,6 +135,14 @@
         <return type="node()*"/> 
         <runtime type="scalar" 
class="org.apache.vxquery.runtime.functions.index.IndexConstructorScalarEvaluatorFactory"/>
     </function>
+
+    <!-- fn:update-index($indexFolder  as xs:string?) as  node()* -->
+    <function name="fn:update-index">
+        <param name="index-folder" type="xs:string?"/>
+        <return type="node()*"/>
+        <runtime type="scalar"
+                 
class="org.apache.vxquery.runtime.functions.index.IndexUpdaterEvaluatorFactory"/>
+    </function>
     
     <!-- fn:collection-from-index($indexfolder  as xs:string?, $elementpath as 
xs:string?) as  node()* -->
     <function name="fn:collection-from-index">

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java 
b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
index 2884097..bccd28d 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
@@ -56,6 +56,7 @@ import 
org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable;
 import org.apache.vxquery.datamodel.accessors.nodes.TextOrCommentNodePointable;
 import org.apache.vxquery.datamodel.values.ValueTag;
 import org.apache.vxquery.runtime.functions.cast.CastToStringOperation;
+import org.apache.vxquery.runtime.functions.index.updateIndex.Constants;
 import org.apache.vxquery.serializer.XMLSerializer;
 
 public class IndexDocumentBuilder extends XMLSerializer {
@@ -74,6 +75,7 @@ public class IndexDocumentBuilder extends XMLSerializer {
     private final int sstart;
     private final int lstart;
     private final IndexWriter writer;
+    private final String filePath;
 
     class ComplexItem {
         public final StringField sf;
@@ -86,10 +88,12 @@ public class IndexDocumentBuilder extends XMLSerializer {
     }
 
     //TODO: Handle Processing Instructions, PrefixedNames, and Namepsace 
entries
-    public IndexDocumentBuilder(IPointable tree, IndexWriter inWriter) {
+    public IndexDocumentBuilder(IPointable tree, IndexWriter inWriter, String 
file) {
         this.treePointable = tree;
         writer = inWriter;
 
+        this.filePath = file;
+
         //convert to tagged value pointable
         TaggedValuePointable tvp = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
         tvp.set(treePointable.getByteArray(), 0, treePointable.getLength());
@@ -109,6 +113,7 @@ public class IndexDocumentBuilder extends XMLSerializer {
     //This is a wrapper to start indexing using the functions adapted from 
XMLSerializer
     public void printStart() throws IOException {
 
+        doc.add(new StringField(Constants.FIELD_PATH, filePath, 
Field.Store.YES));
         print(bstart, sstart, lstart, "0", "");
         for (int i = 1; i < results.size() - 1; i++) {
             //TODO: Since each doc is a file,

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
index cf0b203..9bd6b92 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
@@ -136,7 +136,7 @@ public class CollectionFromIndexUnnestingEvaluatorFactory 
extends AbstractTagged
                 }
                 tvp1.getValue(stringIndexFolder);
                 tvp2.getValue(stringElementPath);
-                //This whole loop is to get the string arguments, indefolder, 
elementpath, and match option
+                //This whole loop is to get the string arguments, indexFolder, 
elementPath, and match option
                 try {
                     // Get the list of files.
                     bbis.setByteBuffer(ByteBuffer.wrap(

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java
index 7191827..ed409f1 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java
@@ -16,13 +16,6 @@
 */
 package org.apache.vxquery.runtime.functions.index;
 
-import java.io.DataInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.file.Paths;
-import java.util.Arrays;
-
 import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
@@ -39,15 +32,32 @@ import org.apache.vxquery.datamodel.values.ValueTag;
 import org.apache.vxquery.exceptions.ErrorCode;
 import org.apache.vxquery.exceptions.SystemException;
 import org.apache.vxquery.index.IndexDocumentBuilder;
+import org.apache.vxquery.runtime.functions.index.updateIndex.Constants;
+import org.apache.vxquery.runtime.functions.index.updateIndex.MetaFileUtil;
+import org.apache.vxquery.runtime.functions.index.updateIndex.XmlMetadata;
 import org.apache.vxquery.runtime.functions.util.FunctionHelper;
 import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
 import org.apache.vxquery.xmlparser.XMLParser;
 
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Paths;
+import java.security.NoSuchAlgorithmException;
+import java.util.Arrays;
+import java.util.concurrent.ConcurrentHashMap;
+
 public class IndexConstructorUtil {
+    static boolean isMetaFilePresent = false;
+    static MetaFileUtil metaFileUtil;
+    static ConcurrentHashMap<String, XmlMetadata> metadataMap = new 
ConcurrentHashMap<>();
+
     public static void evaluate(TaggedValuePointable[] args, IPointable 
result, UTF8StringPointable stringp,
-            ByteBufferInputStream bbis, DataInputStream di, SequenceBuilder 
sb, ArrayBackedValueStorage abvs,
-            ITreeNodeIdProvider nodeIdProvider, ArrayBackedValueStorage 
abvsFileNode, TaggedValuePointable nodep,
-            boolean isElementPath, String nodeId) throws SystemException {
+                                ByteBufferInputStream bbis, DataInputStream 
di, SequenceBuilder sb,
+                                ArrayBackedValueStorage abvs, 
ITreeNodeIdProvider nodeIdProvider,
+                                ArrayBackedValueStorage abvsFileNode, 
TaggedValuePointable nodep,
+                                boolean isElementPath, String nodeId) throws 
SystemException {
         String collectionFolder;
         String indexFolder;
         TaggedValuePointable collectionTVP = args[0];
@@ -69,6 +79,10 @@ public class IndexConstructorUtil {
             
bbis.setByteBuffer(ByteBuffer.wrap(Arrays.copyOfRange(stringp.getByteArray(), 
stringp.getStartOffset(),
                     stringp.getLength() + stringp.getStartOffset())), 0);
             indexFolder = di.readUTF();
+
+            metaFileUtil = MetaFileUtil.create(indexFolder);
+            isMetaFilePresent = metaFileUtil.isMetaFilePresent();
+
         } catch (IOException e) {
             throw new SystemException(ErrorCode.SYSE0001, e);
         }
@@ -95,6 +109,16 @@ public class IndexConstructorUtil {
             indexXmlFiles(collectionDirectory, writer, isElementPath, nodep, 
abvsFileNode, nodeIdProvider, sb, bbis, di,
                     nodeId);
 
+            if (!isMetaFilePresent) {
+                // Add collection information to the map.
+                XmlMetadata data = new XmlMetadata();
+                data.setPath(collectionFolder);
+                metadataMap.put(Constants.COLLECTION_ENTRY, data);
+
+                // Write metadata map to a file.
+                metaFileUtil.writeMetaFile(metadataMap);
+            }
+
             //This makes write slower but search faster.
             writer.forceMerge(1);
 
@@ -111,25 +135,34 @@ public class IndexConstructorUtil {
      * it indexes that document node.
      */
     public static void indexXmlFiles(File collectionDirectory, IndexWriter 
writer, boolean isElementPath,
-            TaggedValuePointable nodep, ArrayBackedValueStorage abvsFileNode, 
ITreeNodeIdProvider nodeIdProvider,
-            SequenceBuilder sb, ByteBufferInputStream bbis, DataInputStream 
di, String nodeId)
-                    throws SystemException, IOException {
+                                     TaggedValuePointable nodep, 
ArrayBackedValueStorage abvsFileNode,
+                                     ITreeNodeIdProvider nodeIdProvider, 
SequenceBuilder sb,
+                                     ByteBufferInputStream bbis, 
DataInputStream di, String nodeId)
+            throws SystemException, IOException {
+
+
         for (File file : collectionDirectory.listFiles()) {
 
             if (readableXmlFile(file.getPath())) {
                 abvsFileNode.reset();
-                // Get the document node
-                XMLParser parser = new XMLParser(false, nodeIdProvider, 
nodeId);
-                FunctionHelper.readInDocFromString(file.getPath(), bbis, di, 
abvsFileNode, parser);
 
-                nodep.set(abvsFileNode.getByteArray(), 
abvsFileNode.getStartOffset(), abvsFileNode.getLength());
-
-                //Add the document to the index
-                //Creates one lucene doc per file
-                IndexDocumentBuilder ibuilder = new 
IndexDocumentBuilder(nodep, writer);
+                IndexDocumentBuilder ibuilder = getIndexBuilder(file, writer, 
nodep, abvsFileNode, nodeIdProvider,
+                        bbis, di, nodeId);
 
                 ibuilder.printStart();
 
+                if (!isMetaFilePresent) {
+                    XmlMetadata xmlMetadata = new XmlMetadata();
+                    xmlMetadata.setPath(file.getCanonicalPath());
+                    xmlMetadata.setFileName(file.getName());
+                    try {
+                        xmlMetadata.setMd5(metaFileUtil.generateMD5(file));
+                    } catch (NoSuchAlgorithmException e) {
+                        throw new SystemException(ErrorCode.SYSE0001, e);
+                    }
+                    metadataMap.put(file.getCanonicalPath(), xmlMetadata);
+                }
+
             } else if (file.isDirectory()) {
                 // Consider all XML file in sub directories.
                 indexXmlFiles(file, writer, isElementPath, nodep, 
abvsFileNode, nodeIdProvider, sb, bbis, di, nodeId);
@@ -141,4 +174,25 @@ public class IndexConstructorUtil {
         return (path.toLowerCase().endsWith(".xml") || 
path.toLowerCase().endsWith(".xml.gz"));
     }
 
+
+    /**
+     * Separated from create index method so that it could be used as a helper 
function in IndexUpdater
+     */
+    public static IndexDocumentBuilder getIndexBuilder(File file, IndexWriter 
writer,
+                                                       TaggedValuePointable 
nodep, ArrayBackedValueStorage abvsFileNode,
+                                                       ITreeNodeIdProvider 
nodeIdProvider,
+                                                       ByteBufferInputStream 
bbis, DataInputStream di, String nodeId)
+            throws IOException {
+
+        //Get the document node
+        XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId);
+        FunctionHelper.readInDocFromString(file.getPath(), bbis, di, 
abvsFileNode, parser);
+
+        nodep.set(abvsFileNode.getByteArray(), abvsFileNode.getStartOffset(), 
abvsFileNode.getLength());
+
+        //Add the document to the index
+        //Creates one lucene doc per file
+        return new IndexDocumentBuilder(nodep, writer, 
file.getCanonicalPath());
+    }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexUpdaterEvaluatorFactory.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexUpdaterEvaluatorFactory.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexUpdaterEvaluatorFactory.java
new file mode 100644
index 0000000..0231f3d
--- /dev/null
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexUpdaterEvaluatorFactory.java
@@ -0,0 +1,76 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index;
+
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
+import org.apache.vxquery.exceptions.SystemException;
+import 
org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluator;
+import 
org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluatorFactory;
+import org.apache.vxquery.runtime.functions.index.updateIndex.IndexUpdater;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * Update the index of collection
+ */
+public class IndexUpdaterEvaluatorFactory extends 
AbstractTaggedValueArgumentScalarEvaluatorFactory {
+    public IndexUpdaterEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+        super(args);
+    }
+
+    @Override
+    protected IScalarEvaluator createEvaluator(IHyracksTaskContext ctx, 
IScalarEvaluator[] args) throws AlgebricksException {
+        final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage();
+        final UTF8StringPointable stringp = (UTF8StringPointable) 
UTF8StringPointable.FACTORY.createPointable();
+        final TaggedValuePointable nodep = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
+        final ByteBufferInputStream bbis = new ByteBufferInputStream();
+        final DataInputStream di = new DataInputStream(bbis);
+        final SequenceBuilder sb = new SequenceBuilder();
+        final ArrayBackedValueStorage abvsFileNode = new 
ArrayBackedValueStorage();
+        final int partition = 
ctx.getTaskAttemptId().getTaskId().getPartition();
+        final String nodeId = 
ctx.getJobletContext().getApplicationContext().getNodeId();
+        final ITreeNodeIdProvider nodeIdProvider = new 
TreeNodeIdProvider((short) partition);
+
+        return new AbstractTaggedValueArgumentScalarEvaluator(args) {
+
+            @Override
+            protected void evaluate(TaggedValuePointable[] args, IPointable 
result) throws SystemException {
+                IndexUpdater updater = new IndexUpdater(args, result, stringp, 
bbis, di, sb, abvs, nodeIdProvider,
+                        abvsFileNode, nodep, nodeId);
+                try {
+                    updater.evaluate();
+                } catch (IOException | NoSuchAlgorithmException e) {
+                    e.printStackTrace();
+                }
+            }
+
+        };
+    }
+}

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java
new file mode 100644
index 0000000..321d348
--- /dev/null
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java
@@ -0,0 +1,26 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index.updateIndex;
+
+/**
+ * Constants used in updating index
+ */
+public class Constants {
+    public static String FIELD_PATH = "path";
+    public static String META_FILE_NAME = "metaFile.file";
+    public static String COLLECTION_ENTRY = "collection";
+}

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java
new file mode 100644
index 0000000..11621a7
--- /dev/null
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java
@@ -0,0 +1,264 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index.updateIndex;
+
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.exceptions.ErrorCode;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.index.IndexDocumentBuilder;
+import org.apache.vxquery.runtime.functions.index.CaseSensitiveAnalyzer;
+import org.apache.vxquery.runtime.functions.index.IndexConstructorUtil;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Paths;
+import java.security.NoSuchAlgorithmException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Update the index if the source files are changed.
+ */
+public class IndexUpdater {
+    private MetaFileUtil metaFileUtil;
+    private ConcurrentHashMap<String, XmlMetadata> metadataMap;
+    private TaggedValuePointable[] args;
+    private IPointable result;
+    private UTF8StringPointable stringp;
+    private ByteBufferInputStream bbis;
+    private DataInputStream di;
+    private SequenceBuilder sb;
+    private ArrayBackedValueStorage abvs;
+    private ITreeNodeIdProvider nodeIdProvider;
+    private ArrayBackedValueStorage abvsFileNode;
+    private TaggedValuePointable nodep;
+    private String nodeId;
+    private IndexWriter indexWriter;
+    private Set<String> pathsFromFileList;
+    private Logger LOGGER = Logger.getLogger("Index Updater");
+
+    //TODO : Implement for paralleizing
+    public IndexUpdater(TaggedValuePointable[] args, IPointable result, 
UTF8StringPointable stringp,
+                        ByteBufferInputStream bbis, DataInputStream di, 
SequenceBuilder sb, ArrayBackedValueStorage abvs,
+                        ITreeNodeIdProvider nodeIdProvider, 
ArrayBackedValueStorage abvsFileNode,
+                        TaggedValuePointable nodep,  String nodeId) {
+        this.args = args;
+        this.result = result;
+        this.stringp = stringp;
+        this.bbis = bbis;
+        this.di = di;
+        this.sb = sb;
+        this.abvs = abvs;
+        this.nodeIdProvider = nodeIdProvider;
+        this.abvsFileNode = abvsFileNode;
+        this.nodep = nodep;
+        this.nodeId = nodeId;
+        this.pathsFromFileList = new HashSet<>();
+    }
+
+    public void evaluate() throws SystemException, IOException, 
NoSuchAlgorithmException {
+        String collectionFolder;
+        String indexFolder;
+        TaggedValuePointable indexTVP = args[0];
+
+        if (indexTVP.getTag() != ValueTag.XS_STRING_TAG) {
+            throw new SystemException(ErrorCode.FORG0006);
+        }
+
+        XmlMetadata collectionMetadata;
+        try {
+            // Get the index folder
+            indexTVP.getValue(stringp);
+            
bbis.setByteBuffer(ByteBuffer.wrap(Arrays.copyOfRange(stringp.getByteArray(), 
stringp.getStartOffset(),
+                    stringp.getLength() + stringp.getStartOffset())), 0);
+            indexFolder = di.readUTF();
+
+            // Read the metadata file and load the metadata map into memory.
+            metaFileUtil = MetaFileUtil.create(indexFolder);
+            metadataMap = metaFileUtil.readMetaFile();
+
+            // Retrieve the collection folder path.
+            // Remove the entry for ease of the next steps.
+            collectionMetadata = 
metadataMap.remove(Constants.COLLECTION_ENTRY);
+            collectionFolder = collectionMetadata.getPath();
+
+        } catch (IOException | ClassNotFoundException e) {
+            throw new SystemException(ErrorCode.SYSE0001, e);
+        }
+
+        File collectionDirectory = new File(collectionFolder);
+        if (!collectionDirectory.exists()) {
+            throw new RuntimeException("The collection directory (" + 
collectionFolder + ") does not exist.");
+        }
+
+        abvs.reset();
+        sb.reset(abvs);
+
+        Directory fsdir = FSDirectory.open(Paths.get(indexFolder));
+        indexWriter = new IndexWriter(fsdir, new IndexWriterConfig(new 
CaseSensitiveAnalyzer()).
+                setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND));
+
+        //Execute update index process
+        updateIndex(collectionDirectory);
+
+        //Detect deleted files and execute the delete index process.
+        deleteIndexOfDeletedFiles(metadataMap.keySet(), pathsFromFileList);
+
+        // Add collection path entry back
+        metadataMap.put(Constants.COLLECTION_ENTRY, collectionMetadata);
+
+        //Write the updated metadata to the file.
+        metaFileUtil.writeMetaFile(metadataMap);
+
+        indexWriter.forceMerge(1);
+
+        indexWriter.close();
+
+        sb.finish();
+        result.set(abvs);
+    }
+
+    /**
+     * Check the collection for changes.
+     * If changes are detected, update the index
+     *
+     * @param collection : Collection folder path
+     */
+    private void updateIndex(File collection) throws IOException, 
NoSuchAlgorithmException {
+
+        File[] list = collection.listFiles();
+
+        assert list != null;
+        for (File file : list) {
+            pathsFromFileList.add(file.getCanonicalPath());
+            if (IndexConstructorUtil.readableXmlFile(file.getCanonicalPath())) 
{
+                XmlMetadata data = metadataMap.get(file.getCanonicalPath());
+                String md5 = metaFileUtil.generateMD5(file);
+
+                abvsFileNode.reset();
+
+                IndexDocumentBuilder indexDocumentBuilder;
+                if (data != null) {
+
+                    // This case checks whether the file has been changed.
+                    // If the file has changed, delete the existing document, 
create a new index document and add it
+                    // to the current index.
+                    // At the same time, update the metadata for the file.
+                    if (!md5.equals(data.getMd5())) {
+
+                        //Update index corresponding to the xml file.
+                        indexWriter.deleteDocuments(new 
Term(Constants.FIELD_PATH, file.getCanonicalPath()));
+                        indexDocumentBuilder = 
IndexConstructorUtil.getIndexBuilder(file, indexWriter,
+                                nodep, abvsFileNode, nodeIdProvider, bbis, di, 
nodeId);
+                        indexDocumentBuilder.printStart();
+
+                        if (LOGGER.isDebugEnabled())
+                            LOGGER.log(Level.DEBUG, "New Index is created for 
updated file " + file.getCanonicalPath());
+
+                        //Update the metadata map.
+                        XmlMetadata metadata = updateEntry(file, data);
+                        metadataMap.replace(file.getCanonicalPath(), metadata);
+
+                    }
+                } else {
+
+                    // In this case, the xml file has not added to the index. 
(It is a newly added file)
+                    // Therefore generate a new index for this file and add it 
to the existing index.
+                    indexDocumentBuilder = 
IndexConstructorUtil.getIndexBuilder(file, indexWriter,
+                            nodep, abvsFileNode, nodeIdProvider, bbis, di, 
nodeId);
+                    indexDocumentBuilder.printStart();
+
+                    if (LOGGER.isDebugEnabled())
+                        LOGGER.log(Level.DEBUG, "New Index is created for 
newly added file " + file.getCanonicalPath());
+
+                    XmlMetadata metadata = updateEntry(file, null);
+                    metadataMap.put(file.getCanonicalPath(), metadata);
+                }
+            } else if (file.isDirectory()) {
+                updateIndex(file);
+            }
+        }
+    }
+
+
+    /**
+     * Update the current XmlMetadata object related to the currently reading 
XML file.
+     *
+     * @param file : XML file
+     * @param metadata : Existing metadata object
+     * @return : XML metadata object with updated fields.
+     * @throws IOException
+     * @throws NoSuchAlgorithmException
+     */
+    public XmlMetadata updateEntry(File file, XmlMetadata metadata) throws 
IOException, NoSuchAlgorithmException {
+
+        if (metadata == null)
+            metadata = new XmlMetadata();
+
+        metadata.setPath(file.getCanonicalPath());
+        metadata.setFileName(file.getName());
+        metadata.setMd5(metaFileUtil.generateMD5(file));
+        return metadata;
+    }
+
+    /**
+     * Delete the index of deleted files.
+     *
+     * @param pathsFromMap      : Set of paths taken from metafile.
+     * @param pathsFromFileList : Set of paths taken from list of existing 
files.
+     * @throws IOException
+     */
+    public void deleteIndexOfDeletedFiles(Set<String> pathsFromMap, 
Set<String> pathsFromFileList) throws IOException {
+        Set<String> sfm = new HashSet<>(pathsFromMap);
+
+        // If any file has been deleted from the collection, the number of 
files stored in metadata is higher  than
+        // the actual number of files.
+        // With set difference, the paths of deleted files are taken from the 
stored metadata.
+        // Delete the corresponding indexes of each file from the index and as 
well as remove the entry from the
+        // metadata file.
+
+        if (sfm.size() > pathsFromFileList.size()) {
+            sfm.removeAll(pathsFromFileList);
+
+            for (String s : sfm) {
+                metadataMap.remove(s);
+                indexWriter.deleteDocuments(new Term(Constants.FIELD_PATH, s));
+                if (LOGGER.isDebugEnabled())
+                    LOGGER.log(Level.DEBUG, "Index of the deleted file " + s + 
" was deleted from the index!");
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java
new file mode 100644
index 0000000..97c9da7
--- /dev/null
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java
@@ -0,0 +1,109 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index.updateIndex;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+import javax.xml.bind.DatatypeConverter;
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Utility class for writing, reading metadata file and generating checksum.
+ */
+public class MetaFileUtil {
+
+    private File metaFile;
+    private Logger LOGGER = Logger.getLogger("MetadataFileUtil");
+
+    private MetaFileUtil(String indexFolder) {
+        this.metaFile = new File(indexFolder + "/" + Constants.META_FILE_NAME);
+    }
+
+    public static MetaFileUtil create(String indexFolder) {
+        return new MetaFileUtil(indexFolder);
+    }
+
+    /**
+     * Checks for existing metadata file.
+     * @return true if the metadata file is present
+     */
+    public boolean isMetaFilePresent() {
+        return metaFile.exists();
+    }
+
+    /**
+     * Write the given List of XmlMetadata objects to a file.
+     * If the metadata file is already presents, delete it.
+     *
+     * @param metadataMap : Set of XmlMetaData objects
+     * @throws IOException
+     */
+    public void writeMetaFile(ConcurrentHashMap<String, XmlMetadata> 
metadataMap) throws IOException {
+        if (this.isMetaFilePresent()) 
Files.delete(Paths.get(metaFile.getCanonicalPath()));
+
+        FileOutputStream fileOutputStream = new 
FileOutputStream(this.metaFile);
+        ObjectOutputStream objectOutputStream = new 
ObjectOutputStream(fileOutputStream);
+        objectOutputStream.writeObject(metadataMap);
+        objectOutputStream.close();
+
+        if (LOGGER.isDebugEnabled())
+            LOGGER.log(Level.DEBUG, "Writing metadata file completed 
successfully!");
+
+    }
+
+
+    /**
+     * Read metadata file
+     *
+     * @return : List of XmlMetadata objects
+     * @throws IOException
+     * @throws ClassNotFoundException
+     */
+    public ConcurrentHashMap<String, XmlMetadata> readMetaFile() throws 
IOException, ClassNotFoundException {
+        FileInputStream fin = new FileInputStream(this.metaFile);
+        ObjectInputStream ois = new ObjectInputStream(fin);
+        ConcurrentHashMap<String, XmlMetadata> metadataMap = new 
ConcurrentHashMap<>((Map<String, XmlMetadata>)ois
+                .readObject()) ;
+        ois.close();
+
+        return metadataMap;
+
+    }
+
+    /**
+     * Generate MD5 checksum string for a given file.
+     *
+     * @param file : File which the checksum should be generated.
+     * @return : Checksum String
+     * @throws NoSuchAlgorithmException
+     * @throws IOException
+     */
+    public String generateMD5(File file) throws NoSuchAlgorithmException, 
IOException {
+        MessageDigest md = MessageDigest.getInstance("MD5");
+        md.update(Files.readAllBytes(file.toPath()));
+        byte[] md5 = md.digest();
+        return DatatypeConverter.printHexBinary(md5);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java
new file mode 100644
index 0000000..38f283f
--- /dev/null
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java
@@ -0,0 +1,57 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index.updateIndex;
+
+import java.io.Serializable;
+
+/**
+ *Class to store metadata related to an XML file.
+ * This contains
+ *      - Path to the xml file
+ *      - MD5 Checksum String
+ *      - File name
+ */
+public class XmlMetadata implements Serializable{
+
+    private String path;
+    private String md5;
+    private String fileName;
+
+    public String getPath() {
+        return path;
+    }
+
+    public void setPath(String path) {
+        this.path = path;
+    }
+
+    public String getMd5() {
+        return md5;
+    }
+
+    public void setMd5(String md5) {
+        this.md5 = md5;
+    }
+
+    public String getFileName() {
+        return fileName;
+    }
+
+    public void setFileName(String fileName) {
+        this.fileName = fileName;
+    }
+}

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/updateIndex.txt
----------------------------------------------------------------------
diff --git 
a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/updateIndex.txt 
b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/updateIndex.txt
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/updateIndex.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/updateIndex.xq 
b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/updateIndex.xq
new file mode 100644
index 0000000..061f1c1
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/updateIndex.xq
@@ -0,0 +1,19 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Update Lucene Index :)
+update-index("target/tmp/indexFolder")

http://git-wip-us.apache.org/repos/asf/vxquery/blob/303899f1/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml
----------------------------------------------------------------------
diff --git a/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml 
b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml
index c69a6b5..369dc82 100644
--- a/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml
+++ b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml
@@ -60,4 +60,9 @@
       <query name="useIndex7" date="2016-05-26"/>
       <output-file compare="Text">useIndex7.txt</output-file>
    </test-case>
+   <test-case name="update-index" FilePath="Indexing/" Creator="Menaka 
Jayawardena">
+      <description>Update the existing index</description>
+      <query name="updateIndex" date="2016-06-24"/>
+      <output-file compare="Text">updateIndex.txt</output-file>
+   </test-case>
 </test-group>

Reply via email to