Repository: vxquery Updated Branches: refs/heads/master 2b5932660 -> 1f623b166
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java new file mode 100644 index 0000000..7191827 --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java @@ -0,0 +1,144 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.vxquery.runtime.functions.index; + +import java.io.DataInputStream; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Paths; +import java.util.Arrays; + +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.vxquery.datamodel.accessors.TaggedValuePointable; +import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder; +import org.apache.vxquery.datamodel.values.ValueTag; +import org.apache.vxquery.exceptions.ErrorCode; +import org.apache.vxquery.exceptions.SystemException; +import org.apache.vxquery.index.IndexDocumentBuilder; +import org.apache.vxquery.runtime.functions.util.FunctionHelper; +import org.apache.vxquery.xmlparser.ITreeNodeIdProvider; +import org.apache.vxquery.xmlparser.XMLParser; + +public class IndexConstructorUtil { + public static void evaluate(TaggedValuePointable[] args, IPointable result, UTF8StringPointable stringp, + ByteBufferInputStream bbis, DataInputStream di, SequenceBuilder sb, ArrayBackedValueStorage abvs, + ITreeNodeIdProvider nodeIdProvider, ArrayBackedValueStorage abvsFileNode, TaggedValuePointable nodep, + boolean isElementPath, String nodeId) throws SystemException { + String collectionFolder; + String indexFolder; + TaggedValuePointable collectionTVP = args[0]; + TaggedValuePointable indexTVP = args[1]; + + if (collectionTVP.getTag() != ValueTag.XS_STRING_TAG || indexTVP.getTag() != ValueTag.XS_STRING_TAG) { + throw new SystemException(ErrorCode.FORG0006); + } + + try { + // Get the list of files. + collectionTVP.getValue(stringp); + bbis.setByteBuffer(ByteBuffer.wrap(Arrays.copyOfRange(stringp.getByteArray(), stringp.getStartOffset(), + stringp.getLength() + stringp.getStartOffset())), 0); + collectionFolder = di.readUTF(); + + // Get the index folder + indexTVP.getValue(stringp); + bbis.setByteBuffer(ByteBuffer.wrap(Arrays.copyOfRange(stringp.getByteArray(), stringp.getStartOffset(), + stringp.getLength() + stringp.getStartOffset())), 0); + indexFolder = di.readUTF(); + } catch (IOException e) { + throw new SystemException(ErrorCode.SYSE0001, e); + } + File collectionDirectory = new File(collectionFolder); + if (!collectionDirectory.exists()) { + throw new RuntimeException("The collection directory (" + collectionFolder + ") does not exist."); + } + + try { + abvs.reset(); + sb.reset(abvs); + + Directory dir = FSDirectory.open(Paths.get(indexFolder)); + Analyzer analyzer = new CaseSensitiveAnalyzer(); + IndexWriterConfig iwc = new IndexWriterConfig(analyzer); + + // Create will overwrite the index everytime + iwc.setOpenMode(OpenMode.CREATE); + + //Create an index writer + IndexWriter writer = new IndexWriter(dir, iwc); + + //Add files to index + indexXmlFiles(collectionDirectory, writer, isElementPath, nodep, abvsFileNode, nodeIdProvider, sb, bbis, di, + nodeId); + + //This makes write slower but search faster. + writer.forceMerge(1); + + writer.close(); + + sb.finish(); + result.set(abvs); + } catch (IOException e) { + throw new SystemException(ErrorCode.SYSE0001, e); + } + } + + /*This function goes recursively one file at a time. First it turns the file into an ABVS document node, then + * it indexes that document node. + */ + public static void indexXmlFiles(File collectionDirectory, IndexWriter writer, boolean isElementPath, + TaggedValuePointable nodep, ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider, + SequenceBuilder sb, ByteBufferInputStream bbis, DataInputStream di, String nodeId) + throws SystemException, IOException { + for (File file : collectionDirectory.listFiles()) { + + if (readableXmlFile(file.getPath())) { + abvsFileNode.reset(); + // Get the document node + XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId); + FunctionHelper.readInDocFromString(file.getPath(), bbis, di, abvsFileNode, parser); + + nodep.set(abvsFileNode.getByteArray(), abvsFileNode.getStartOffset(), abvsFileNode.getLength()); + + //Add the document to the index + //Creates one lucene doc per file + IndexDocumentBuilder ibuilder = new IndexDocumentBuilder(nodep, writer); + + ibuilder.printStart(); + + } else if (file.isDirectory()) { + // Consider all XML file in sub directories. + indexXmlFiles(file, writer, isElementPath, nodep, abvsFileNode, nodeIdProvider, sb, bbis, di, nodeId); + } + } + } + + public static boolean readableXmlFile(String path) { + return (path.toLowerCase().endsWith(".xml") || path.toLowerCase().endsWith(".xml.gz")); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java index d394bbc..b6668ba 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java @@ -480,7 +480,7 @@ public class FunctionHelper { public static boolean compareTaggedValues(AbstractValueComparisonOperation aOp, TaggedValuePointable tvp1, TaggedValuePointable tvp2, DynamicContext dCtx, TypedPointables tp1, TypedPointables tp2) - throws SystemException { + throws SystemException { int tid1 = getBaseTypeForComparisons(tvp1.getTag()); int tid2 = getBaseTypeForComparisons(tvp2.getTag()); @@ -1217,6 +1217,11 @@ public class FunctionHelper { } catch (SystemException e) { throw new HyracksDataException(e); } + readInDocFromString(fName, bbis, di, abvs, parser); + } + + public static void readInDocFromString(String fName, ByteBufferInputStream bbis, DataInputStream di, + ArrayBackedValueStorage abvs, XMLParser parser) throws HyracksDataException { if (!fName.contains("hdfs:/")) { File file = new File(fName); if (file.exists()) { http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java index 03a125b..846c27b 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java @@ -51,40 +51,41 @@ import org.xml.sax.ext.LexicalHandler; public class SAXContentHandler implements ContentHandler, LexicalHandler { // XML node builders - private final AttributeNodeBuilder anb; - private final CommentNodeBuilder cnb; - private final DictionaryBuilder db; - private final DocumentNodeBuilder docb; - private final PINodeBuilder pinb; - private final TextNodeBuilder tnb; - private final UTF8StringBuilder utf8b; - private final List<ElementNodeBuilder> enbStack; - private final List<ElementNodeBuilder> freeENBList; + protected final AttributeNodeBuilder anb; + protected final CommentNodeBuilder cnb; + protected final DictionaryBuilder db; + protected final DocumentNodeBuilder docb; + protected final PINodeBuilder pinb; + protected final TextNodeBuilder tnb; + protected final UTF8StringBuilder utf8b; + protected final List<ElementNodeBuilder> enbStack; + protected final List<ElementNodeBuilder> freeENBList; + protected boolean isIndexHandler; // Frame writing variables - private IFrameFieldAppender appender; + protected IFrameFieldAppender appender; private int tupleIndex; private IFrameWriter writer; // Element writing and path step variables - private boolean skipping; + protected boolean skipping; private String[] childLocalName = null; private String[] childUri = null; private boolean[] subElement = null; private final TaggedValuePointable tvp; // Basic tracking and setting variables - private final boolean attachTypes; - private final boolean createNodeIds; + protected final boolean attachTypes; + protected final boolean createNodeIds; private int depth; - private final ArrayBackedValueStorage resultABVS; - private boolean pendingText; - private int nodeIdCounter; - private final ITreeNodeIdProvider nodeIdProvider; - private final ArrayBackedValueStorage tempABVS; + protected final ArrayBackedValueStorage resultABVS; + protected boolean pendingText; + protected int nodeIdCounter; + protected final ITreeNodeIdProvider nodeIdProvider; + protected final ArrayBackedValueStorage tempABVS; private final ArrayBackedValueStorage textABVS; - public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider) { + public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider, boolean isIndexHandler) { // XML node builders anb = new AttributeNodeBuilder(); cnb = new CommentNodeBuilder(); @@ -110,11 +111,16 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { this.nodeIdProvider = nodeIdProvider; tempABVS = new ArrayBackedValueStorage(); textABVS = new ArrayBackedValueStorage(); + this.isIndexHandler = isIndexHandler; + if (isIndexHandler) { + this.appender = null; + this.skipping = false; + } } public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider, IFrameFieldAppender appender, List<SequenceType> childSequenceTypes) { - this(attachTypes, nodeIdProvider); + this(attachTypes, nodeIdProvider, false); // Frame writing variables this.appender = appender; @@ -189,16 +195,21 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { return; } try { - boolean nonSkipped = foundFirstNonSkippedElement(); + boolean nonSkipped = false; + if (!isIndexHandler) { + nonSkipped = foundFirstNonSkippedElement(); + } flushText(); ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1); enb.endChildrenChunk(); endChildInParent(enb, nonSkipped); freeENB(enb); - if (nonSkipped) { - writeElement(); + if (!isIndexHandler) { + if (nonSkipped) { + writeElement(); + } + endElementChildPathStep(); } - endElementChildPathStep(); } catch (IOException e) { e.printStackTrace(); throw new SAXException(e); @@ -248,7 +259,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { @Override public void startDocument() throws SAXException { - if (subElement == null) { + if (isIndexHandler || subElement == null) { skipping = false; } db.reset(); @@ -305,7 +316,10 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { @Override public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException { ++depth; - boolean start = startElementChildPathStep(uri, localName); + boolean start = false; + if (!isIndexHandler) { + start = startElementChildPathStep(uri, localName); + } if (skipping) { return; @@ -392,7 +406,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { } } - private void flushText() throws IOException { + protected void flushText() throws IOException { if (pendingText) { peekENBStackTop().startChild(tnb); if (createNodeIds) { @@ -471,7 +485,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { out.write(resultABVS.getByteArray(), resultABVS.getStartOffset(), resultABVS.getLength()); } - private ElementNodeBuilder createENB() { + protected ElementNodeBuilder createENB() { if (freeENBList.isEmpty()) { return new ElementNodeBuilder(); } @@ -482,7 +496,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { freeENBList.add(enb); } - private ElementNodeBuilder peekENBStackTop() { + protected ElementNodeBuilder peekENBStackTop() { return enbStack.get(enbStack.size() - 1); } http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java index a62a26c..34d7ba9 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java @@ -57,7 +57,7 @@ public class XMLParser { try { parser = XMLReaderFactory.createXMLReader(); if (appender == null) { - handler = new SAXContentHandler(attachTypes, idProvider); + handler = new SAXContentHandler(attachTypes, idProvider, false); } else { List<SequenceType> childSequenceTypes = new ArrayList<SequenceType>(); for (int typeCode : childSeq) { http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java b/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java index 4d0ddc0..11f7eb2 100644 --- a/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java +++ b/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java @@ -1,25 +1,26 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.vxquery.xtest; import java.io.File; import java.io.IOException; import java.util.Collection; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -30,9 +31,10 @@ import org.junit.runners.Parameterized.Parameters; @RunWith(Parameterized.class) public class VXQueryTest extends AbstractXQueryTest { private static MiniDFS dfs; + private final static String TMP = "target/tmp"; - private static String VXQUERY_CATALOG = StringUtils.join(new String[] { "src", "test", "resources", - "VXQueryCatalog.xml" }, File.separator); + private static String VXQUERY_CATALOG = StringUtils + .join(new String[] { "src", "test", "resources", "VXQueryCatalog.xml" }, File.separator); public VXQueryTest(TestCase tc) throws Exception { super(tc); @@ -57,7 +59,12 @@ public class VXQueryTest extends AbstractXQueryTest { } @BeforeClass - public static void setupHDFS() { + public static void setup() throws IOException { + File tmp = new File(TMP); + if (tmp.exists()) { + FileUtils.deleteDirectory(tmp); + } + new File(TMP.concat("/indexFolder")).mkdirs(); dfs = new MiniDFS(); try { dfs.startHDFS(); @@ -67,7 +74,11 @@ public class VXQueryTest extends AbstractXQueryTest { } @AfterClass - public static void shutdownHDFS() { + public static void shutdown() throws IOException { + File tmp = new File(TMP); + if (tmp.exists()) { + FileUtils.deleteDirectory(tmp); + } dfs.shutdownHDFS(); } http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/createIndex.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/createIndex.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/createIndex.txt new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt new file mode 100644 index 0000000..baf9dca --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt @@ -0,0 +1,2 @@ +<data><date>2003-03-03T00:00:00.000</date><dataType>TMIN</dataType><station>GHCND:AS000000003</station><value>13.75</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data> +<data><date>2003-03-03T00:00:00.000</date><dataType>TMAX</dataType><station>GHCND:AS000000003</station><value>33</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt new file mode 100644 index 0000000..ef8dde4 --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt @@ -0,0 +1 @@ +<data><date>2001-01-01T00:00:00.000</date><dataType>AWND</dataType><station>GHCND:US000000001</station><value>1000</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt new file mode 100644 index 0000000..d8263ee --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt @@ -0,0 +1 @@ +2 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt new file mode 100644 index 0000000..f30101c --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt @@ -0,0 +1 @@ +3.3 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt new file mode 100644 index 0000000..c84c360 --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt @@ -0,0 +1,3 @@ +<data><date>2002-02-02T00:00:00.000</date><dataType>TMIN</dataType><station>GHCND:US000000002</station><value>12.5</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data> +<data><date>2002-02-02T00:00:00.000</date><dataType>TMAX</dataType><station>GHCND:US000000002</station><value>32</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data> +<data><date>2002-02-02T00:00:00.000</date><dataType>PRCP</dataType><station>GHCND:US000000002</station><value>20</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt new file mode 100644 index 0000000..9abedff --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt @@ -0,0 +1,2 @@ +<station><id>GHCND:US000000001</id><displayName>Station 1</displayName><latitude>10.000</latitude><longitude>-10.000</longitude><elevation>1000.0</elevation><locationLabels><type>ST</type><id>FIPS:1</id><displayName>State 1</displayName></locationLabels><locationLabels><type>CNTY</type><id>FIPS:-9999</id><displayName>County 1</displayName></locationLabels><locationLabels><type>CNTRY</type><id>FIPS:US</id><displayName/></locationLabels></station> +<station><id>GHCND:US000000002</id><displayName>Station 2</displayName><latitude>20.000</latitude><longitude>-20.000</longitude><elevation>2000.0</elevation><locationLabels><type>ST</type><id>FIPS:1</id><displayName>State 1</displayName></locationLabels><locationLabels><type>CNTY</type><id>FIPS:-9999</id><displayName>County 2</displayName></locationLabels><locationLabels><type>CNTRY</type><id>FIPS:US</id><displayName/></locationLabels></station> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt new file mode 100644 index 0000000..c84c360 --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt @@ -0,0 +1,3 @@ +<data><date>2002-02-02T00:00:00.000</date><dataType>TMIN</dataType><station>GHCND:US000000002</station><value>12.5</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data> +<data><date>2002-02-02T00:00:00.000</date><dataType>TMAX</dataType><station>GHCND:US000000002</station><value>32</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data> +<data><date>2002-02-02T00:00:00.000</date><dataType>PRCP</dataType><station>GHCND:US000000002</station><value>20</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq new file mode 100644 index 0000000..f34ac4c --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq @@ -0,0 +1,20 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Build Lucene Index :) +build-index-on-collection( "src/test/resources/TestSources/ghcnd", "target/tmp/indexFolder") + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq new file mode 100644 index 0000000..1635f61 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq @@ -0,0 +1,25 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data +let $datetime := xs:dateTime(fn:data($r/date)) +where $r/station eq "GHCND:AS000000003" + and fn:year-from-dateTime($datetime) ge 2000 + and fn:month-from-dateTime($datetime) eq 3 + and fn:day-from-dateTime($datetime) eq 3 +return $r \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq new file mode 100644 index 0000000..bf19ee9 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq @@ -0,0 +1,24 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +(: Find all reading for hurricane force wind warning or extreme wind warning. :) +(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :) +(: meters per second). (Wind value is in tenth of a meter per second) :) +for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data +where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744 +return $r http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq new file mode 100644 index 0000000..28cf019 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq @@ -0,0 +1,27 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +(: Find the annual precipitation (PRCP) for a Seattle using the airport :) +(: station (US000000002) for 2002. :) +fn:sum( + for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data + where $r/station eq "GHCND:US000000002" + and $r/dataType eq "PRCP" + and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2002 + return $r/value +) div 10 http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq new file mode 100644 index 0000000..2b75cf4 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq @@ -0,0 +1,24 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +(: Find the highest recorded temperature (TMAX) in Celsius. :) +fn:max( + for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data + where $r/dataType eq "TMAX" + return $r/value +) div 10 http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq new file mode 100644 index 0000000..e83484a --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq @@ -0,0 +1,23 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +(: Find all the weather readings for Washington state for a specific day :) +(: 2002-2-2. :) +for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data +where xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000") +return $r http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq new file mode 100644 index 0000000..04f6672 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq @@ -0,0 +1,23 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +(: Find all the weather readings for Washington state for a specific day :) +(: 2002-2-2. :) +for $s in collection-from-index("target/tmp/indexFolder", "/stationCollection/station")/station +where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) +return $s http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq new file mode 100644 index 0000000..e471baa --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq @@ -0,0 +1,27 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +(: Find all the weather readings for Washington state for a specific day :) +(: 2002-2-2. :) +for $s in collection-from-index("target/tmp/indexFolder", "/stationCollection/station")/station +for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data + +where $s/id eq $r/station + and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) + and xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000") +return $r http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/VXQueryCatalog.xml ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/VXQueryCatalog.xml b/vxquery-xtest/src/test/resources/VXQueryCatalog.xml index f75ce49..414601e 100644 --- a/vxquery-xtest/src/test/resources/VXQueryCatalog.xml +++ b/vxquery-xtest/src/test/resources/VXQueryCatalog.xml @@ -42,6 +42,8 @@ <!ENTITY HDFSAggregateQueries SYSTEM "cat/HDFSAggregateQueries.xml"> +<!ENTITY IndexingQueries SYSTEM "cat/IndexingQueries.xml"> + ]> <test-suite xmlns="http://www.w3.org/2005/02/query-test-XQTSCatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" @@ -208,4 +210,17 @@ &HDFSAggregateQueries; </test-group> </test-group> + <test-group name="IndexingQueries" featureOwner="Steven Jacobs"> + <GroupInfo> + <title>Indexing Queries</title> + <description/> + </GroupInfo> + <test-group name="IndexingTests" featureOwner="Steven Jacobs"> + <GroupInfo> + <title>Indexing Execution Tests</title> + <description/> + </GroupInfo> + &IndexingQueries; + </test-group> + </test-group> </test-suite> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml new file mode 100644 index 0000000..1f8291d --- /dev/null +++ b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml @@ -0,0 +1,63 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<test-group xmlns="http://www.w3.org/2005/02/query-test-XQTSCatalog" name="IndexingQueries" featureOwner="VXQuery"> + <GroupInfo> + <title>Indexing</title> + <description/> + </GroupInfo> + <test-case name="create-index" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Create Lucene Index from Collection.</description> + <query name="createIndex" date="2016-05-26"/> + <output-file compare="Text">createIndex.txt</output-file> + </test-case> + <test-case name="use-index-1" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex1" date="2016-05-26"/> + <output-file compare="Text">useIndex1.txt</output-file> + </test-case> + <test-case name="use-index-2" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex2" date="2016-05-26"/> + <output-file compare="Text">useIndex2.txt</output-file> + </test-case> + <test-case name="use-index-3" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex3" date="2016-05-26"/> + <output-file compare="Text">useIndex3.txt</output-file> + </test-case> + <test-case name="use-index-4" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex4" date="2016-05-26"/> + <output-file compare="Text">useIndex4.txt</output-file> + </test-case> + <test-case name="use-index-5" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex5" date="2016-05-26"/> + <output-file compare="Text">useIndex5.txt</output-file> + </test-case> + <test-case name="use-index-6" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex6" date="2016-05-26"/> + <output-file compare="Text">useIndex6.txt</output-file> + </test-case> + <test-case name="use-index-7" FilePath="Indexing/" Creator="Steven Jacobs"> + <description>Get Collection From Lucene Index</description> + <query name="useIndex7" date="2016-05-26"/> + <output-file compare="Text">useIndex7.txt</output-file> + </test-case> +</test-group>
