Consolidated changes for November. - Update Hyracks version - MRQL benchmark updates - Updated the hash join size property for benchmarks - Fixed hash join size bug - Added unnesting for descendent step expressions - Fixed a few rewrite rule bugs
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/99ba4dbf Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/99ba4dbf Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/99ba4dbf Branch: refs/heads/master Commit: 99ba4dbfd7fac85cf17c91c23ee8fb328e440319 Parents: e97888e Author: Preston Carman <[email protected]> Authored: Tue Nov 18 10:40:19 2014 -0800 Committer: Preston Carman <[email protected]> Committed: Tue Nov 18 10:40:19 2014 -0800 ---------------------------------------------------------------------- pom.xml | 985 ++++++++++--------- src/site/apt/user_running_tests.apt | 1 + .../noaa-ghcn-daily/other_systems/mrql/q07.mrql | 2 +- .../mrql_scripts/load_node_file.sh | 2 + .../mrql_scripts/run_group_test.sh | 14 +- .../mrql_scripts/run_mrql_tests.sh | 7 +- .../mrql_scripts/yarn_and_flink/README.md | 23 + .../yarn_and_flink/clear_hadoop2.sh | 22 + .../yarn_and_flink/load_node_file.sh | 47 + .../yarn_and_flink/run_group_test.sh | 86 ++ .../yarn_and_flink/run_mrql_tests.sh | 49 + .../scripts/benchmark_logging.properties | 43 +- .../noaa-ghcn-daily/scripts/run_benchmark.sh | 2 +- .../scripts/run_benchmark_cluster.sh | 7 +- .../scripts/testing_logging.properties | 79 ++ .../scripts/weather_benchmark.py | 24 +- .../java/org/apache/vxquery/cli/VXQuery.java | 4 +- .../compiler/rewriter/RewriteRuleset.java | 8 +- .../rewriter/rules/ConsolidateUnnestsRule.java | 10 +- ...tAssignSortDistinctNodesToOperatorsRule.java | 1 - .../DelayMaterializationForJoinProbeRule.java | 142 +++ .../PushMapOperatorDownThroughProductRule.java | 142 --- .../vxquery/functions/builtin-operators.xml | 1 + .../metadata/VXQueryCollectionDataSource.java | 4 + .../metadata/VXQueryMetadataProvider.java | 45 +- .../functions/step/AbstractChildPathStep.java | 74 -- ...stractDescendantPathStepScalarEvaluator.java | 5 +- .../step/AbstractForwardAxisPathStep.java | 74 ++ .../step/ChildPathStepOperatorDescriptor.java | 38 +- .../functions/step/ChildPathStepUnnesting.java | 3 +- ...DescendantOrSelfPathStepScalarEvaluator.java | 11 +- .../step/DescendantOrSelfPathStepUnnesting.java | 167 ++++ ...cendantOrSelfPathStepUnnestingEvaluator.java | 44 + ...OrSelfPathStepUnnestingEvaluatorFactory.java | 40 + .../step/DescendantPathStepScalarEvaluator.java | 4 - .../DescendantPathStepUnnestingEvaluator.java | 44 + ...endantPathStepUnnestingEvaluatorFactory.java | 39 + .../strings/UTF8StringCharacterIterator.java | 4 +- .../xmlquery/query/XMLQueryCompiler.java | 2 +- 39 files changed, 1492 insertions(+), 807 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 04e0377..b57411c 100644 --- a/pom.xml +++ b/pom.xml @@ -14,435 +14,438 @@ See the License for the specific language governing permissions and limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <groupId>org.apache</groupId> - <artifactId>apache</artifactId> - <version>13</version> - <relativePath /> - </parent> - - <groupId>org.apache.vxquery</groupId> - <artifactId>apache-vxquery</artifactId> - <version>0.5-SNAPSHOT</version> - <packaging>pom</packaging> - <name>VXQuery</name> - <description>A Versatile XQuery Processor</description> - <url>http://vxquery.apache.org/</url> - - <organization> - <name>Apache Software Foundation</name> - <url>http://www.apache.org/</url> - </organization> - - <licenses> - <license> - <name>The Apache Software License, Version 2.0</name> - <url>http://www.apache.org/licenses/</url> - <distribution>repo</distribution> - <comments>A business-friendly OSS license</comments> - </license> - </licenses> - - <scm> - <connection>scm:git:https://git-wip-us.apache.org/repos/asf/vxquery.git</connection> - <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/vxquery.git</developerConnection> - <url>https://git-wip-us.apache.org/repos/asf/vxquery.git</url> - <tag>HEAD</tag> - </scm> - - <issueManagement> - <system>Jira</system> - <url>https://issues.apache.org/jira/browse/VXQUERY</url> - </issueManagement> - - <developers> - <developer> - <id>antelder</id> - <name>Ant Elder</name> - <email /> - <roles> - <role>Mentor</role> - </roles> - <organization /> - <timezone /> - </developer> - - <developer> - <id>cezar</id> - <name>Cezar Andrei</name> - <email /> - <roles> - <role>Architect</role> - </roles> - <organization /> - <timezone>-6</timezone> - </developer> - <developer> - <id>dtabass</id> - <name>Michael J. Carey</name> - <email /> - <roles> - <role>Architect</role> - </roles> - <organization /> - <timezone>-8</timezone> - </developer> - <developer> - <id>prestonc</id> - <name>Preston Carman</name> - <email /> - <roles> - <role>Developer</role> - </roles> - <organization /> - <timezone>-8</timezone> - </developer> - <developer> - <id>sjaco002</id> - <name>Steven Jacobs</name> - <email /> - <roles> - <role>Developer</role> - </roles> - <organization /> - <timezone>-8</timezone> - </developer> - <developer> - <id>tillw</id> - <name>Till Westmann</name> - <email /> - <roles> - <role>Chair</role> - <role>Architect</role> - </roles> - <organization /> - <timezone>-8</timezone> - </developer> - <developer> - <id>vinayakb</id> - <name>Vinayak Borkar</name> - <email /> - <roles> - <role>Architect</role> - </roles> - <organization /> - <timezone>-8</timezone> - </developer> - </developers> - - <mailingLists> - <mailingList> - <name>dev</name> - <subscribe>[email protected]</subscribe> - <unsubscribe>[email protected]</unsubscribe> - <post>[email protected]</post> - <archive>http://mail-archives.apache.org/mod_mbox/vxquery-dev/</archive> - </mailingList> - <mailingList> - <name>commits</name> - <subscribe>[email protected]</subscribe> - <unsubscribe>[email protected]</unsubscribe> - <post>[email protected]</post> - <archive>http://mail-archives.apache.org/mod_mbox/vxquery-commits/</archive> - </mailingList> - </mailingLists> - - <distributionManagement> - <site> - <id>vxquery.website</id> - <name>VXQuery Website</name> - <url>file:../site/</url> - </site> - </distributionManagement> - - <repositories> - <repository> - <id>hyracks-snapshots</id> - <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url> - <snapshots> - </snapshots> - </repository> - <repository> - <id>hyracks-releases</id> - <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url> - <releases> - </releases> - </repository> - <repository> - <id>hyracks-thirdparty</id> - <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-thirdparty/</url> - <releases> - </releases> - </repository> - </repositories> - - <dependencyManagement> - <dependencies> - <dependency> - <groupId>args4j</groupId> - <artifactId>args4j</artifactId> - <version>2.0.9</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-client</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-control-common</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-control-cc</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-control-nc</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>algebricks-compiler</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.commons</groupId> - <artifactId>commons-lang3</artifactId> - <version>3.1</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>algebricks-common</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>algebricks-core</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>algebricks-data</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>algebricks-rewriter</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>algebricks-runtime</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-api</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-data-std</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-dataflow-common</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-dataflow-std</artifactId> - <version>${hyracks.version}</version> - </dependency> - - <dependency> - <groupId>ant</groupId> - <artifactId>ant-trax</artifactId> - <version>1.6.5</version> - <scope>provided</scope> - </dependency> - - <dependency> - <groupId>org.codehaus.woodstox</groupId> - <artifactId>stax2-api</artifactId> - <version>3.0.1</version> - </dependency> - - <dependency> - <groupId>org.codehaus.woodstox</groupId> - <artifactId>woodstox-core-asl</artifactId> - <version>4.0.6</version> - </dependency> - - <dependency> - <groupId>xalan</groupId> - <artifactId>serializer</artifactId> - <version>2.7.1</version> - </dependency> - - <dependency> - <groupId>xerces</groupId> - <artifactId>xercesImpl</artifactId> - <version>2.9.1</version> - </dependency> - - <dependency> - <groupId>activemq</groupId> - <artifactId>activemq-transport-xstream</artifactId> - <version>2.1</version> - </dependency> - - <dependency> - <groupId>com.thoughtworks.xstream</groupId> - <artifactId>xstream</artifactId> - <version>1.3.1</version> - </dependency> - - <dependency> - <groupId>commons-io</groupId> - <artifactId>commons-io</artifactId> - <version>1.3.2</version> - </dependency> - - <dependency> - <groupId>commons-codec</groupId> - <artifactId>commons-codec</artifactId> - <version>1.4</version> - </dependency> - - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.7</version> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jetty</artifactId> - <version>6.1.4</version> - <scope>compile</scope> - </dependency> - </dependencies> - </dependencyManagement> - - <build> - <pluginManagement> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-site-plugin</artifactId> - <version>3.3</version> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-project-info-reports-plugin</artifactId> - <version>2.7</version> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <version>2.9.1</version> - </plugin> - </plugins> - </pluginManagement> - - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <source>1.6</source> - <target>1.6</target> - </configuration> - </plugin> - <plugin> - <artifactId>maven-install-plugin</artifactId> - <configuration> - <createChecksum>true</createChecksum> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-release-plugin</artifactId> - <version>2.4.2</version> +<project + xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache</groupId> + <artifactId>apache</artifactId> + <version>13</version> + <relativePath /> + </parent> + + <groupId>org.apache.vxquery</groupId> + <artifactId>apache-vxquery</artifactId> + <version>0.5-SNAPSHOT</version> + <packaging>pom</packaging> + <name>VXQuery</name> + <description>A Versatile XQuery Processor</description> + <url>http://vxquery.apache.org/</url> + + <organization> + <name>Apache Software Foundation</name> + <url>http://www.apache.org/</url> + </organization> + + <licenses> + <license> + <name>The Apache Software License, Version 2.0</name> + <url>http://www.apache.org/licenses/</url> + <distribution>repo</distribution> + <comments>A business-friendly OSS license</comments> + </license> + </licenses> + + <scm> + <connection>scm:git:https://git-wip-us.apache.org/repos/asf/vxquery.git</connection> + <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/vxquery.git</developerConnection> + <url>https://git-wip-us.apache.org/repos/asf/vxquery.git</url> + <tag>HEAD</tag> + </scm> + + <issueManagement> + <system>Jira</system> + <url>https://issues.apache.org/jira/browse/VXQUERY</url> + </issueManagement> + + <developers> + <developer> + <id>antelder</id> + <name>Ant Elder</name> + <email /> + <roles> + <role>Mentor</role> + </roles> + <organization /> + <timezone /> + </developer> + + <developer> + <id>cezar</id> + <name>Cezar Andrei</name> + <email /> + <roles> + <role>Architect</role> + </roles> + <organization /> + <timezone>-6</timezone> + </developer> + <developer> + <id>dtabass</id> + <name>Michael J. Carey</name> + <email /> + <roles> + <role>Architect</role> + </roles> + <organization /> + <timezone>-8</timezone> + </developer> + <developer> + <id>prestonc</id> + <name>Preston Carman</name> + <email /> + <roles> + <role>Developer</role> + </roles> + <organization /> + <timezone>-8</timezone> + </developer> + <developer> + <id>sjaco002</id> + <name>Steven Jacobs</name> + <email /> + <roles> + <role>Developer</role> + </roles> + <organization /> + <timezone>-8</timezone> + </developer> + <developer> + <id>tillw</id> + <name>Till Westmann</name> + <email /> + <roles> + <role>Chair</role> + <role>Architect</role> + </roles> + <organization /> + <timezone>-8</timezone> + </developer> + <developer> + <id>vinayakb</id> + <name>Vinayak Borkar</name> + <email /> + <roles> + <role>Architect</role> + </roles> + <organization /> + <timezone>-8</timezone> + </developer> + </developers> + + <mailingLists> + <mailingList> + <name>dev</name> + <subscribe>[email protected]</subscribe> + <unsubscribe>[email protected]</unsubscribe> + <post>[email protected]</post> + <archive>http://mail-archives.apache.org/mod_mbox/vxquery-dev/</archive> + </mailingList> + <mailingList> + <name>commits</name> + <subscribe>[email protected]</subscribe> + <unsubscribe>[email protected]</unsubscribe> + <post>[email protected]</post> + <archive>http://mail-archives.apache.org/mod_mbox/vxquery-commits/</archive> + </mailingList> + </mailingLists> + + <distributionManagement> + <site> + <id>vxquery.website</id> + <name>VXQuery Website</name> + <url>file:../site/</url> + </site> + </distributionManagement> + + <repositories> + <repository> + <id>hyracks-snapshots</id> + <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url> + <snapshots> + </snapshots> + </repository> + <repository> + <id>hyracks-releases</id> + <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url> + <releases> + </releases> + </repository> + <repository> + <id>hyracks-thirdparty</id> + <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-thirdparty/</url> + <releases> + </releases> + </repository> + </repositories> + + <dependencyManagement> <dependencies> - <dependency> - <groupId>org.apache.maven.scm</groupId> - <artifactId>maven-scm-provider-gitexe</artifactId> - <version>1.8.1</version> - </dependency> + <dependency> + <groupId>args4j</groupId> + <artifactId>args4j</artifactId> + <version>2.0.9</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-client</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-control-common</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-control-cc</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-control-nc</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>algebricks-compiler</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + <version>3.1</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>algebricks-common</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>algebricks-core</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>algebricks-data</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>algebricks-rewriter</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>algebricks-runtime</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-api</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-data-std</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-dataflow-common</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-dataflow-std</artifactId> + <version>${hyracks.version}</version> + </dependency> + + <dependency> + <groupId>ant</groupId> + <artifactId>ant-trax</artifactId> + <version>1.6.5</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.codehaus.woodstox</groupId> + <artifactId>stax2-api</artifactId> + <version>3.0.1</version> + </dependency> + + <dependency> + <groupId>org.codehaus.woodstox</groupId> + <artifactId>woodstox-core-asl</artifactId> + <version>4.0.6</version> + </dependency> + + <dependency> + <groupId>xalan</groupId> + <artifactId>serializer</artifactId> + <version>2.7.1</version> + </dependency> + + <dependency> + <groupId>xerces</groupId> + <artifactId>xercesImpl</artifactId> + <version>2.9.1</version> + </dependency> + + <dependency> + <groupId>activemq</groupId> + <artifactId>activemq-transport-xstream</artifactId> + <version>2.1</version> + </dependency> + + <dependency> + <groupId>com.thoughtworks.xstream</groupId> + <artifactId>xstream</artifactId> + <version>1.3.1</version> + </dependency> + + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>1.3.2</version> + </dependency> + + <dependency> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + <version>1.4</version> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.7</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.mortbay.jetty</groupId> + <artifactId>jetty</artifactId> + <version>6.1.4</version> + <scope>compile</scope> + </dependency> </dependencies> - <configuration> - <autoVersionSubmodules>true</autoVersionSubmodules> - </configuration> - </plugin> - <plugin> - <inherited>true</inherited> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <executions> - <execution> - <id>aggregate</id> - <goals> - <goal>aggregate</goal> - </goals> - </execution> - </executions> - <configuration> - <notimestamp>true</notimestamp> - <maxmemory>2g</maxmemory> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-assembly-plugin</artifactId> + </dependencyManagement> + + <build> + <pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-site-plugin</artifactId> + <version>3.3</version> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-project-info-reports-plugin</artifactId> + <version>2.7</version> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.9.1</version> + </plugin> + </plugins> + </pluginManagement> + + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>1.6</source> + <target>1.6</target> + </configuration> + </plugin> + <plugin> + <artifactId>maven-install-plugin</artifactId> + <configuration> + <createChecksum>true</createChecksum> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-release-plugin</artifactId> + <version>2.4.2</version> + <dependencies> + <dependency> + <groupId>org.apache.maven.scm</groupId> + <artifactId>maven-scm-provider-gitexe</artifactId> + <version>1.8.1</version> + </dependency> + </dependencies> + <configuration> + <autoVersionSubmodules>true</autoVersionSubmodules> + </configuration> + </plugin> + <plugin> + <inherited>true</inherited> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <executions> + <execution> + <id>aggregate</id> + <goals> + <goal>aggregate</goal> + </goals> + </execution> + </executions> + <configuration> + <notimestamp>true</notimestamp> + <maxmemory>2g</maxmemory> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> <!-- We override the configuration plugin to override the descriptor to use for building the source release zip. Specifically, we would like to control the inclusions/exclusions. For example, we exclude the KEYS file from the zip --> - <executions> - <execution> + <executions> + <execution> <!-- Use this id to match the id mentioned in the assembly plugin configuration in the apache parent POM under the apache-release profile --> - <id>source-release-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> + <id>source-release-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> <!-- combine.self should be override to replace the configuration in the parent POM --> - <configuration combine.self="override"> - <runOnlyAtExecutionRoot>true</runOnlyAtExecutionRoot> - <descriptors> - <descriptor>src/main/assembly/source.xml</descriptor> - </descriptors> - </configuration> - </execution> - </executions> - </plugin> + <configuration combine.self="override"> + <runOnlyAtExecutionRoot>true</runOnlyAtExecutionRoot> + <descriptors> + <descriptor>src/main/assembly/source.xml</descriptor> + </descriptors> + </configuration> + </execution> + </executions> + </plugin> <!-- <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -471,75 +474,75 @@ </executions> </plugin> --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-site-plugin</artifactId> - </plugin> - </plugins> - </build> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-site-plugin</artifactId> + </plugin> + </plugins> + </build> - <reporting> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-project-info-reports-plugin</artifactId> - <inherited>false</inherited> - <reportSets> - <reportSet> - <reports> - <report>index</report> - <report>license</report> - <report>project-team</report> - <report>scm</report> - <report>issue-tracking</report> - <report>mailing-list</report> - <report>plugins</report> - </reports> - </reportSet> - </reportSets> - <configuration> - <linkOnly>true</linkOnly> - <dependencyLocationsEnabled>false</dependencyLocationsEnabled> - <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/vxquery.git</developerConnection> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.rat</groupId> - <artifactId>apache-rat-plugin</artifactId> - <version>${apache-rat-plugin.version}</version> - <configuration> - <excludes> - <exclude>.gitignore</exclude> - <exclude>.git/**/*</exclude> - </excludes> - <numUnapprovedLicenses>0</numUnapprovedLicenses> - <maxmemory>2g</maxmemory> - </configuration> - </plugin> - <plugin> - <inherited>true</inherited> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <configuration> - <notimestamp>true</notimestamp> - <maxmemory>2g</maxmemory> - </configuration> - </plugin> - </plugins> - </reporting> - - <properties> - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> - <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> - <hyracks.version>0.2.12</hyracks.version> - <apache-rat-plugin.version>0.11</apache-rat-plugin.version> - </properties> - - <modules> - <module>vxquery-core</module> - <module>vxquery-server</module> - <module>vxquery-cli</module> - <module>vxquery-xtest</module> - <module>vxquery-benchmark</module> - </modules> + <reporting> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-project-info-reports-plugin</artifactId> + <inherited>false</inherited> + <reportSets> + <reportSet> + <reports> + <report>index</report> + <report>license</report> + <report>project-team</report> + <report>scm</report> + <report>issue-tracking</report> + <report>mailing-list</report> + <report>plugins</report> + </reports> + </reportSet> + </reportSets> + <configuration> + <linkOnly>true</linkOnly> + <dependencyLocationsEnabled>false</dependencyLocationsEnabled> + <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/vxquery.git</developerConnection> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <version>${apache-rat-plugin.version}</version> + <configuration> + <excludes> + <exclude>.gitignore</exclude> + <exclude>.git/**/*</exclude> + </excludes> + <numUnapprovedLicenses>0</numUnapprovedLicenses> + <maxmemory>2g</maxmemory> + </configuration> + </plugin> + <plugin> + <inherited>true</inherited> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <configuration> + <notimestamp>true</notimestamp> + <maxmemory>2g</maxmemory> + </configuration> + </plugin> + </plugins> + </reporting> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> + <hyracks.version>0.2.15-SNAPSHOT</hyracks.version> + <apache-rat-plugin.version>0.11</apache-rat-plugin.version> + </properties> + + <modules> + <module>vxquery-core</module> + <module>vxquery-server</module> + <module>vxquery-cli</module> + <module>vxquery-xtest</module> + <module>vxquery-benchmark</module> + </modules> </project> http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/src/site/apt/user_running_tests.apt ---------------------------------------------------------------------- diff --git a/src/site/apt/user_running_tests.apt b/src/site/apt/user_running_tests.apt index ef9748b..f74a897 100644 --- a/src/site/apt/user_running_tests.apt +++ b/src/site/apt/user_running_tests.apt @@ -74,6 +74,7 @@ sh ./vxquery-xtest/target/appassembler/bin/xtest -catalog xqts/XQTSCatalog.xml - ---------------------------------------- * view the results at {{{file:///tmp/full_report.html}file:///tmp/full_report.html}}. + * Add JAVA_OPTS for additional java parameters. * VXQuery Test Suite http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql index cdb0b0c..e8dfce1 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql @@ -17,7 +17,7 @@ avg( select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) from rtmax in source(xml, args[0], {"data"}), - rtmin in source(xml, args[0], {"data"}) + rtmin in source(xml, args[2], {"data"}) where text(rtmax.date) = text(rtmin.date) and text(rtmax.station) = text(rtmin.station) and text(rtmax.dataType) = "TMAX" http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh index 206c38b..a3d1dfc 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh @@ -38,10 +38,12 @@ echo "Loading ${NODES} node ${DATASET} data file in to cluster." cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/ gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors +hadoop fs -cp ${DATASET}/sensors/${DATASET}_sensors_${NODES}.xml ${DATASET}2/sensors/${DATASET}_sensors_${NODES}.xml rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml # Add each station block cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/ gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations +hadoop fs -cp ${DATASET}/stations/${DATASET}_stations_${NODES}.xml ${DATASET}2/stations/${DATASET}_stations_${NODES}.xml rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh index 0208beb..134c05e 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh @@ -32,6 +32,8 @@ fi DATASET=${1} NODES=${2} REPEAT=1 +#DATA_FILES=${NODES} +DATA_FILES=8 # Start Hadoop sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh @@ -45,6 +47,13 @@ hadoop fs -mkdir ${DATASET}/sensors hadoop fs -mkdir ${DATASET}/stations hadoop fs -ls ${DATASET} +# Prepare hadoop file system 2 +hadoop fs -mkdir ${DATASET}2 +hadoop fs -ls +hadoop fs -mkdir ${DATASET}2/sensors +hadoop fs -mkdir ${DATASET}2/stations +hadoop fs -ls ${DATASET}2 + hadoop balancer @@ -58,8 +67,9 @@ done # Start test -sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET} +sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh \ + vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET} # Stop Hadoop -sh saved/hadoop/hadoop-1.2.1/bin/stop-all.sh \ No newline at end of file +sh saved/hadoop/hadoop-1.2.1/bin/stop-all.sh http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh index d6bc9ab..912cd3b 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh @@ -23,16 +23,19 @@ NODES=${2} REPEAT=${3} DATASET=${4} +THREADS=$((4*${NODES})) # Make log folder mkdir -p ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/ -for j in $(find ${1} -name '*q??.mrql') +for j in $(find ${1} -name '*q?7.mrql') do date echo "Running MRQL query: ${j}" - time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors/ ${DATASET}/stations/ >> ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/$(basename "${j}").log 2>&1; done; + time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${THREADS} ${j} \ + ${DATASET}/sensors/ ${DATASET}/stations/ ${DATASET}2/sensors/ ${DATASET}2/stations/ \ + >> ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/$(basename "${j}").log 2>&1; done; done http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/README.md ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/README.md new file mode 100644 index 0000000..f1ff8a5 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/README.md @@ -0,0 +1,23 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + + +clear_hadoop2.sh + +hadoop namenode -format + +run_group_test.sh \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/clear_hadoop2.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/clear_hadoop2.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/clear_hadoop2.sh new file mode 100755 index 0000000..fbc15b5 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/clear_hadoop2.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Remove data +rm -rf disk1/hadoop2/hdfs +rm -rf disk2/hadoop2/hdfs http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/load_node_file.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/load_node_file.sh new file mode 100755 index 0000000..206c38b --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/load_node_file.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -z "${1}" ] +then + echo "Please enter the data set as the first argument." + exit +fi + +if [ -z "${2}" ] +then + echo "Please enter the node number as the second argument." + exit +fi + +DATASET=${1} +NODES=${2} + +echo "Loading ${NODES} node ${DATASET} data file in to cluster." + +# Add each sensor block +cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/ +gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz +hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors +rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml + +# Add each station block +cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/ +gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz +hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations +rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_group_test.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_group_test.sh new file mode 100755 index 0000000..95e8046 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_group_test.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -z "${1}" ] +then + echo "Please enter the data set as the first argument." + exit +fi + +if [ -z "${2}" ] +then + echo "Please enter the node number as the second argument." + exit +fi + +DATASET=${1} +NODES=${2} +THREADS=$((4*${NODES})) +REPEAT=1 +DATA_FILES=${NODES} + +# Start Hadoop +# sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh +sh saved/hadoop/hadoop-2.5.1/sbin/hadoop-daemon.sh start namenode +sh saved/hadoop/hadoop-2.5.1/sbin/hadoop-daemons.sh start datanode +sh saved/hadoop/hadoop-2.5.1/sbin/yarn-daemon.sh start resourcemanager +sh saved/hadoop/hadoop-2.5.1/sbin/yarn-daemons.sh start nodemanager +sh saved/hadoop/hadoop-2.5.1/sbin/mr-jobhistory-daemon.sh start historyserver + +sleep 10 + +# Start Flink +sh saved/flink/flink-yarn-0.6.1-incubating/bin/yarn-session.sh -n ${THREADS} -tm 1024 & +FLINK_PID=$! + +# Prepare hadoop file system +hadoop fs -mkdir ${DATASET} +hadoop fs -ls +hadoop fs -mkdir ${DATASET}/sensors +hadoop fs -mkdir ${DATASET}/stations +hadoop fs -ls ${DATASET} + +hadoop balancer + + +# Upload test data +COUNTER=0 +while [ ${COUNTER} -lt ${DATA_FILES} ]; +do + sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${DATASET} ${COUNTER} + let COUNTER=COUNTER+1 +done + + +# Start test +sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${THREADS} ${REPEAT} ${DATASET} + +# Stop Flink +kill ${FLINK_PID} +jobs -p +kill $(jobs -p) + + +# Stop Hadoop +# sh saved/hadoop/hadoop-1.2.1/bin/stop-all.sh +sh saved/hadoop/hadoop-2.5.1/sbin/mr-jobhistory-daemon.sh stop historyserver +sh saved/hadoop/hadoop-2.5.1/sbin/yarn-daemons.sh stop nodemanager +sh saved/hadoop/hadoop-2.5.1/sbin/yarn-daemon.sh stop resourcemanager +sh saved/hadoop/hadoop-2.5.1/sbin/hadoop-daemons.sh stop datanode +sh saved/hadoop/hadoop-2.5.1/sbin/hadoop-daemon.sh stop namenode http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_mrql_tests.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_mrql_tests.sh new file mode 100755 index 0000000..c000727 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/yarn_and_flink/run_mrql_tests.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Examples +# run_mrql_tests.sh mrql_all/ 1 2 + +NODES=${2} +REPEAT=${3} +DATASET=${4} + + +# Make log folder +mkdir -p ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/ + +for j in $(find ${1} -name '*q??.mrql') +do + date + echo "Running MRQL query: ${j}" + time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql.flink -dist -nodes ${NODES} ${j} ${DATASET}/sensors/ ${DATASET}/stations/ >> ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/$(basename "${j}").log 2>&1; done; +done + + +if which programname >/dev/null; +then + echo "Sending out e-mail notification." + SUBJECT="MRQL Tests Finished (${DATASET})" + EMAIL="[email protected]" + /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM + Completed all MRQL tests on ${DATASET}. + EOM +else + echo "No mail command to use." +fi; http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties index 2fb0af0..2745a12 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties @@ -1 +1,42 @@ -java.util.logging.ConsoleHandler.level=OFF \ No newline at end of file +# Properties file which configures the operation of the JDK +# logging facility. + +# The system will look for this config file, first using +# a System property specified at startup: +# +# >java -Djava.util.logging.config.file=myLoggingConfigFilePath +# +# If this property is not specified, then the config file is +# retrieved from its default location at: +# +# JDK_HOME/jre/lib/logging.properties + +# Global logging properties. +# ------------------------------------------ +# The set of handlers to be loaded upon startup. +# Comma-separated list of class names. +# (? LogManager docs say no comma here, but JDK example has comma.) +handlers=java.util.logging.ConsoleHandler + +# Default global logging level. +# Loggers and Handlers may override this level +.level=WARNING + +# Loggers +# ------------------------------------------ +# Loggers are usually attached to packages. +# Here, the level for each package is specified. +# The global level is used by default, so levels +# specified here simply act as an override. +#edu.uci.ics.hyracks.dataflow.std.join.level=ALL +edu.uci.ics.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor.level=ALL + + +# Handlers +# ----------------------------------------- + +# --- ConsoleHandler --- +# Override of global logging level +java.util.logging.ConsoleHandler.level=SEVERE +#java.util.logging.ConsoleHandler.formatter=java.util.logging.SimpleFormatter + http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh index 5146586..8bc6772 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh @@ -29,7 +29,7 @@ REPEAT=5 IGNORE=2 FRAME_SIZE=$((8*1024)) BUFFER_SIZE=$((32*1024*1024)) -JOIN_HASH_SIZE=-1 +JOIN_HASH_SIZE=$(( 4 * (64*1024*1024) )) if [ -z "${1}" ] then http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh index 98ab04b..c90a7a9 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh @@ -29,8 +29,7 @@ CLUSTER="uci" REPEAT=5 FRAME_SIZE=$((8*1024)) BUFFER_SIZE=$((32*1024*1024)) -#JOIN_HASH_SIZE=$((256*1024*1024)) -JOIN_HASH_SIZE=-1 +JOIN_HASH_SIZE=$((4*4*64*1024*1024)) if [ -z "${1}" ] then @@ -66,7 +65,7 @@ do log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" log_base_path=$(dirname ${j/queries/query_logs}) mkdir -p ${log_base_path} - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 + time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} -timing-ignore-queries 0 > ${log_base_path}/${log_file} 2>&1 echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file} echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file} @@ -76,6 +75,8 @@ done # Stop cluster. python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop +sleep 5 +python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a kill if which programname >/dev/null; then http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties new file mode 100644 index 0000000..ff877dd --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties @@ -0,0 +1,79 @@ +#/* +# Copyright 2009-2013 by The Regents of the University of California +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# you may obtain a copy of the License from +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#*/ +############################################################ +# Default Logging Configuration File +# +# You can use a different file by specifying a filename +# with the java.util.logging.config.file system property. +# For example java -Djava.util.logging.config.file=myfile +############################################################ + +############################################################ +# Global properties +############################################################ + +# "handlers" specifies a comma separated list of log Handler +# classes. These handlers will be installed during VM startup. +# Note that these classes must be on the system classpath. +# By default we only configure a ConsoleHandler, which will only +# show messages at the INFO and above levels. + +handlers= java.util.logging.ConsoleHandler + +# To also add the FileHandler, use the following line instead. + +# handlers= java.util.logging.FileHandler, java.util.logging.ConsoleHandler + +# Default global logging level. +# This specifies which kinds of events are logged across +# all loggers. For any given facility this global level +# can be overriden by a facility specific level +# Note that the ConsoleHandler also has a separate level +# setting to limit messages printed to the console. + +#.level= WARNING +# .level= INFO +.level= FINE +# .level = FINEST + +############################################################ +# Handler specific properties. +# Describes specific configuration info for Handlers. +############################################################ + +# default file output is in user's home directory. + +# java.util.logging.FileHandler.pattern = %h/java%u.log +# java.util.logging.FileHandler.limit = 50000 +# java.util.logging.FileHandler.count = 1 +# java.util.logging.FileHandler.formatter = java.util.logging.XMLFormatter + +# Limit the message that are printed on the console to FINE and above. + +java.util.logging.ConsoleHandler.level = FINE +java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter + + +############################################################ +# Facility specific properties. +# Provides extra control for each logger. +############################################################ + +# For example, set the com.xyz.foo logger to only log SEVERE +# messages: + +# edu.uci.ics.asterix.level = FINE +edu.uci.ics.algebricks.level = FINE +# edu.uci.ics.hyracks.level = FINE http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py index 4f81f86..746fef4 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py @@ -48,16 +48,20 @@ class WeatherBenchmark: ] QUERY_UTILITY_LIST = [ "no_result.xq", - "sensor_count.xq", - "station_count.xq", - "q04_sensor.xq", - "q04_station.xq", - "q05_sensor.xq", - "q05_station.xq", - "q06_sensor.xq", - "q06_station.xq", - "q07_tmin.xq", - "q07_tmax.xq", + "count_sensor.xq", + "count_station.xq", + "q04_count_join.xq", + "q04_count_sensor.xq", + "q04_count_station.xq", + "q05_count_join.xq", + "q05_count_sensor.xq", + "q05_count_station.xq", + "q06_count_join.xq", + "q06_count_sensor.xq", + "q06_count_station.xq", + "q07_count_join.xq", + "q07_count_tmin.xq", + "q07_count_tmax.xq", ] BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java ---------------------------------------------------------------------- diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java index c0ca612..53d9ec2 100644 --- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java +++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java @@ -446,10 +446,10 @@ public class VXQuery { @Option(name = "-local-node-controllers", usage = "Number of local node controllers (default 1)") private int localNodeControllers = 1; - @Option(name = "-frame-size", usage = "Frame size in bytes. (default 65536)") + @Option(name = "-frame-size", usage = "Frame size in bytes. (default 65,536)") private int frameSize = 65536; - @Option(name = "-join-hash-size", usage = "Join hash size in bytes.") + @Option(name = "-join-hash-size", usage = "Join hash size in bytes. (default 67,108,864)") private int joinHashSize = -1; @Option(name = "-buffer-size", usage = "Disk read buffer size in bytes.") http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java index f1d41d8..12b0780 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.vxquery.compiler.rewriter.rules.ConsolidateAssignAggregateRule; import org.apache.vxquery.compiler.rewriter.rules.ConvertFromAlgebricksExpressionsRule; import org.apache.vxquery.compiler.rewriter.rules.ConvertToAlgebricksExpressionsRule; +import org.apache.vxquery.compiler.rewriter.rules.DelayMaterializationForJoinProbeRule; import org.apache.vxquery.compiler.rewriter.rules.InlineNestedVariablesRule; import org.apache.vxquery.compiler.rewriter.rules.PushChildIntoDataScanRule; import org.apache.vxquery.compiler.rewriter.rules.ConsolidateUnnestsRule; @@ -33,7 +34,6 @@ import org.apache.vxquery.compiler.rewriter.rules.EliminateUnnestAggregateSubpla import org.apache.vxquery.compiler.rewriter.rules.IntroduceCollectionRule; import org.apache.vxquery.compiler.rewriter.rules.IntroduceTwoStepAggregateRule; import org.apache.vxquery.compiler.rewriter.rules.PushFunctionsOntoEqJoinBranches; -import org.apache.vxquery.compiler.rewriter.rules.PushMapOperatorDownThroughProductRule; import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantBooleanExpressionsRule; import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantCastExpressionsRule; import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantDataExpressionsRule; @@ -43,6 +43,7 @@ import org.apache.vxquery.compiler.rewriter.rules.RemoveUnusedSortDistinctNodesR import org.apache.vxquery.compiler.rewriter.rules.RemoveUnusedTreatRule; import org.apache.vxquery.compiler.rewriter.rules.SetCollectionDataSourceRule; import org.apache.vxquery.compiler.rewriter.rules.SetVariableIdContextRule; +import org.apache.vxquery.compiler.rewriter.rules.VXQueryExtractCommonOperatorsRule; import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer; import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; @@ -64,6 +65,7 @@ import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRul import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceProjectsRule; import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule; import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin; +import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushMapOperatorDownThroughProductRule; import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule; import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule; import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule; @@ -260,6 +262,7 @@ public class RewriteRuleset { public final static List<IAlgebraicRewriteRule> buildPhysicalRewritesTopLevelRuleCollection() { List<IAlgebraicRewriteRule> physicalPlanRewrites = new LinkedList<IAlgebraicRewriteRule>(); physicalPlanRewrites.add(new CopyLimitDownRule()); + physicalPlanRewrites.add(new SetExecutionModeRule()); return physicalPlanRewrites; } @@ -269,10 +272,13 @@ public class RewriteRuleset { prepareForJobGenRewrites.add(new IsolateHyracksOperatorsRule( HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled)); prepareForJobGenRewrites.add(new ExtractCommonOperatorsRule()); +// prepareForJobGenRewrites.add(new VXQueryExtractCommonOperatorsRule()); +// prepareForJobGenRewrites.add(new DelayMaterializationForJoinProbeRule()); // Re-infer all types, so that, e.g., the effect of not-is-null is // propagated. prepareForJobGenRewrites.add(new PushProjectIntoDataSourceScanRule()); prepareForJobGenRewrites.add(new ReinferAllTypesRule()); + prepareForJobGenRewrites.add(new SetExecutionModeRule()); return prepareForJobGenRewrites; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConsolidateUnnestsRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConsolidateUnnestsRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConsolidateUnnestsRule.java index 6c0aa26..6a2bb08 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConsolidateUnnestsRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConsolidateUnnestsRule.java @@ -18,6 +18,7 @@ package org.apache.vxquery.compiler.rewriter.rules; import org.apache.commons.lang3.mutable.Mutable; import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox; +import org.apache.vxquery.functions.BuiltinOperators; import org.apache.vxquery.functions.Function; import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException; @@ -45,7 +46,7 @@ import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOpera * UNNEST( $v1 : uf1( $v0 ) ) * plan__child * - * Where $v1 is not used in plan__parent. + * Where $v1 is not used in plan__parent and uf1 is not a descendant expression. * * After * @@ -85,6 +86,12 @@ public class ConsolidateUnnestsRule extends AbstractUsedVariablesProcessingRule if (!functionInfo2.hasScalarEvaluatorFactory()) { return false; } + // Exception for specific path expressions. + if (functionCall2.getFunctionIdentifier().equals(BuiltinOperators.DESCENDANT.getFunctionIdentifier()) + || functionCall2.getFunctionIdentifier().equals( + BuiltinOperators.DESCENDANT_OR_SELF.getFunctionIdentifier())) { + return false; + } // Find unnest2 variable in unnest1 Mutable<ILogicalExpression> unnest1Arg = ExpressionToolbox.findVariableExpression( @@ -105,5 +112,4 @@ public class ConsolidateUnnestsRule extends AbstractUsedVariablesProcessingRule } return false; } - } http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignSortDistinctNodesToOperatorsRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignSortDistinctNodesToOperatorsRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignSortDistinctNodesToOperatorsRule.java index b92384e..f3d7d48 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignSortDistinctNodesToOperatorsRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignSortDistinctNodesToOperatorsRule.java @@ -203,7 +203,6 @@ public class ConvertAssignSortDistinctNodesToOperatorsRule implements IAlgebraic List<Pair<IOrder, Mutable<ILogicalExpression>>> orderArgs = new ArrayList<Pair<IOrder, Mutable<ILogicalExpression>>>(); orderArgs.add(new Pair<IOrder, Mutable<ILogicalExpression>>(OrderOperator.ASC_ORDER, variableRef)); OrderOperator oo = new OrderOperator(orderArgs); -// oo.setExecutionMode(AbstractLogicalOperator.ExecutionMode.LOCAL); return oo; } http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/DelayMaterializationForJoinProbeRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/DelayMaterializationForJoinProbeRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/DelayMaterializationForJoinProbeRule.java new file mode 100644 index 0000000..341b7ec --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/DelayMaterializationForJoinProbeRule.java @@ -0,0 +1,142 @@ +/* + * Copyright 2009-2013 by The Regents of the University of California + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * you may obtain a copy of the License from + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.compiler.rewriter.rules; + +import org.apache.commons.lang3.mutable.Mutable; +import org.apache.commons.lang3.mutable.MutableObject; + +import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException; +import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext; +import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.MaterializeOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.IsomorphismUtilities; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.MaterializePOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.OneToOneExchangePOperator; +import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.ReplicatePOperator; +import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; + +public class DelayMaterializationForJoinProbeRule implements IAlgebraicRewriteRule { + + @Override + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { + return false; + } + + @Override + public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); + if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN + && op.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) { + return false; + } + + AbstractBinaryJoinOperator abjo = (AbstractBinaryJoinOperator) op; + if (abjo.getInputs().size() != 2) { + return false; + } + + Mutable<ILogicalOperator> branchProbeRO = findReplicateOperator(abjo.getInputs().get(0)); + Mutable<ILogicalOperator> branchBuildRO = findReplicateOperator(abjo.getInputs().get(1)); + if (branchBuildRO == null || branchBuildRO == null + || !IsomorphismUtilities.isOperatorIsomorphic(branchProbeRO.getValue(), branchBuildRO.getValue())) { + return false; + } + + // Turn off materialization in replicate operator. + boolean found = false; + ReplicateOperator ro = (ReplicateOperator) branchProbeRO.getValue(); + boolean[] outputMaterializationFlags = ro.getOutputMaterializationFlags(); + for (int i = 0; i < outputMaterializationFlags.length; ++i) { + if (outputMaterializationFlags[i]) { + found = true; + outputMaterializationFlags[i] = false; + } + } + if (!found) { + return false; + } + ro.setOutputMaterializationFlags(outputMaterializationFlags); + + // Set up references to one level down + Mutable<ILogicalOperator> parentOp = abjo.getInputs().get(0); + Mutable<ILogicalOperator> childOpRef = parentOp; + parentOp = parentOp.getValue().getInputs().get(0); + + // New plan operators + AbstractLogicalOperator exchange = new ExchangeOperator(); + exchange.setPhysicalOperator(new OneToOneExchangePOperator()); + exchange.setExecutionMode(ExecutionMode.PARTITIONED); + MutableObject<ILogicalOperator> exchangeRef = new MutableObject<ILogicalOperator>(exchange); + exchange.getInputs().add(parentOp); + + MaterializeOperator mop = new + MaterializeOperator(); + mop.setPhysicalOperator(new MaterializePOperator(false)); + mop.setExecutionMode(ExecutionMode.PARTITIONED); + Mutable<ILogicalOperator> mopRef = new MutableObject<ILogicalOperator>(mop); + mop.getInputs().add(exchangeRef); + AbstractLogicalOperator childOp = (AbstractLogicalOperator) childOpRef.getValue(); + childOp.getInputs().set(0, mopRef); + +// boolean[] materializeFlag = new boolean[1]; +// materializeFlag[0] = true; +// ReplicateOperator rop = new ReplicateOperator(1, materializeFlag); +// rop.setPhysicalOperator(new ReplicatePOperator()); +// rop.setExecutionMode(ExecutionMode.PARTITIONED); +// Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop); +// rop.getInputs().add(exchangeRef); +// +// // Output +// rop.getOutputs().add(childOpRef); +// AbstractLogicalOperator childOp = (AbstractLogicalOperator) childOpRef.getValue(); +// childOp.getInputs().set(0, ropRef); + + context.computeAndSetTypeEnvironmentForOperator(exchange); + context.computeAndSetTypeEnvironmentForOperator(mop); + context.computeAndSetTypeEnvironmentForOperator(childOp); + return true; + } + + private Mutable<ILogicalOperator> findReplicateOperator(Mutable<ILogicalOperator> input) { + AbstractLogicalOperator op = (AbstractLogicalOperator) input.getValue(); + if (op.getOperatorTag() == LogicalOperatorTag.REPLICATE) { + return input; + } + if (op.getInputs().size() == 1) { + return findReplicateOperator(op.getInputs().get(0)); + } + return null; + } + +// private void updateBranchContext(Mutable<ILogicalOperator> input, IOptimizationContext context) +// throws AlgebricksException { +// AbstractLogicalOperator op = (AbstractLogicalOperator) input.getValue(); +// if (op.getOperatorTag() != LogicalOperatorTag.REPLICATE) { +// updateBranchContext(op.getInputs().get(0), context); +// } +// op.setExecutionMode(ExecutionMode.PARTITIONED); +// // context.invalidateTypeEnvironmentForOperator(op); +// context.computeAndSetTypeEnvironmentForOperator(op); +// op.computeOutputTypeEnvironment(context); +// } + +} http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushMapOperatorDownThroughProductRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushMapOperatorDownThroughProductRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushMapOperatorDownThroughProductRule.java deleted file mode 100644 index a13d64b..0000000 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushMapOperatorDownThroughProductRule.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.vxquery.compiler.rewriter.rules; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.lang3.mutable.Mutable; - -import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException; -import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator; -import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext; -import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; -import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable; -import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression; -import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator; -import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; -import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; -import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; - -/** - * The rule searches for an inner join operator followed by a map operator. - * When the map operator only uses variable generated by one side of the join, - * the operator is pushed down through the product rule. - * - * <pre> - * Before - * - * %PARENT_PLAN - * ASSIGN | EXCHANGE | SELECT | UNNEST - * INNERJOIN( true ) - * { - * %RIGHT_BRANCH_PLAN - * } - * { - * %LEFT_BRANCH_PLAN - * } - * %CHILD_PLAN - * - * Where ASSIGN | EXCHANGE | SELECT | UNNEST are operators with the map - * property. - * - * After - * - * When all variables used in ASSIGN | EXCHANGE | SELECT | UNNEST exist in - * %RIGHT_BRANCH_PLAN. - * - * %PARENT_PLAN - * INNERJOIN( true ) - * { - * ASSIGN | EXCHANGE | SELECT | UNNEST - * %RIGHT_BRANCH_PLAN - * } - * { - * %LEFT_BRANCH_PLAN - * } - * %CHILD_PLAN - * - * When all variables used in ASSIGN | EXCHANGE | SELECT | UNNEST exist in - * %LEFT_BRANCH_PLAN. - * - * %PARENT_PLAN - * INNERJOIN( true ) - * { - * %RIGHT_BRANCH_PLAN - * } - * { - * ASSIGN | EXCHANGE | SELECT | UNNEST - * %LEFT_BRANCH_PLAN - * } - * %CHILD_PLAN - * </pre> - */ -public class PushMapOperatorDownThroughProductRule implements IAlgebraicRewriteRule { - - @Override - public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { - return false; - } - - @Override - public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) - throws AlgebricksException { - AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); - if (!op1.isMap()) { - return false; - } - Mutable<ILogicalOperator> op2Ref = op1.getInputs().get(0); - AbstractLogicalOperator op2 = (AbstractLogicalOperator) op2Ref.getValue(); - if (op2.getOperatorTag() != LogicalOperatorTag.INNERJOIN) { - return false; - } - AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) op2; - if (join.getCondition().getValue() != ConstantExpression.TRUE) { - return false; - } - - List<LogicalVariable> used = new ArrayList<LogicalVariable>(); - VariableUtilities.getUsedVariables(op1, used); - - Mutable<ILogicalOperator> b0Ref = op2.getInputs().get(0); - ILogicalOperator b0 = b0Ref.getValue(); - List<LogicalVariable> b0Scm = new ArrayList<LogicalVariable>(); - VariableUtilities.getLiveVariables(b0, b0Scm); - if (b0Scm.containsAll(used)) { - // push assign on left branch - op2Ref.setValue(b0); - b0Ref.setValue(op1); - opRef.setValue(op2); - return true; - } else { - Mutable<ILogicalOperator> b1Ref = op2.getInputs().get(1); - ILogicalOperator b1 = b1Ref.getValue(); - List<LogicalVariable> b1Scm = new ArrayList<LogicalVariable>(); - VariableUtilities.getLiveVariables(b1, b1Scm); - if (b1Scm.containsAll(used)) { - // push assign on right branch - op2Ref.setValue(b1); - b1Ref.setValue(op1); - opRef.setValue(op2); - return true; - } else { - return false; - } - } - } - -} http://git-wip-us.apache.org/repos/asf/vxquery/blob/99ba4dbf/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml index ecf7542..5892c48 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml +++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml @@ -906,6 +906,7 @@ <param name="arg" type="node()*"/> <return type="node()*"/> <runtime type="scalar" class="org.apache.vxquery.runtime.functions.step.DescendantOrSelfPathStepScalarEvaluatorFactory"/> + <runtime type="unnesting" class="org.apache.vxquery.runtime.functions.step.DescendantOrSelfPathStepUnnestingEvaluatorFactory"/> <property type="UniqueNodes" class="org.apache.vxquery.compiler.rewriter.rules.propagationpolicies.uniquenodes.UniqueNodesNOPropagationPolicy"/> </operator>
