Updated with little changes to improve the benchmark process. - Next version of Hyracks. - Updated MRQL queries. - Benchmark frame and buffer sizes updated. - Conditionally send out benchmark finish e-mail - Added CLI argument for ignoring queries when producing timing results.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/61b22a9c Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/61b22a9c Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/61b22a9c Branch: refs/heads/master Commit: 61b22a9c77930a8b69f308de9d001e743cd318eb Parents: cee27a7 Author: Preston Carman <[email protected]> Authored: Sun Aug 3 15:34:21 2014 -0700 Committer: Preston Carman <[email protected]> Committed: Sun Aug 3 15:34:21 2014 -0700 ---------------------------------------------------------------------- pom.xml | 2 +- vxquery-benchmark/pom.xml | 21 +++++++++ .../other_systems/mrql_gsn/q01.mrql | 2 +- .../other_systems/mrql_gsn/q04.mrql | 4 +- .../other_systems/mrql_gsn/q06.mrql | 2 - .../other_systems/mrql_test/q00.mrql | 7 +++ .../other_systems/mrql_test/q01.mrql | 5 ++ .../other_systems/mrql_test/q02.mrql | 8 ++++ .../other_systems/mrql_test/q03.mrql | 6 +++ .../other_systems/mrql_test/q04.mrql | 8 ++++ .../other_systems/mrql_test/q05.mrql | 11 +++++ .../other_systems/mrql_test/q06.mrql | 11 +++++ .../other_systems/mrql_test/q07.mrql | 10 ++++ .../noaa-ghcn-daily/scripts/run_benchmark.sh | 23 +++++---- .../scripts/run_benchmark_cluster.sh | 28 ++++++----- .../noaa-ghcn-daily/scripts/run_group_test.sh | 23 +++++---- .../noaa-ghcn-daily/scripts/run_mrql_tests.sh | 16 +++++-- .../scripts/weather_benchmark.py | 49 +++++++++----------- .../noaa-ghcn-daily/scripts/weather_cli.py | 4 +- .../scripts/weather_data_files.py | 21 ++++++--- .../src/main/resources/util/merge_xml_files.py | 2 +- .../java/org/apache/vxquery/cli/VXQuery.java | 24 +++++----- .../VXQueryCollectionOperatorDescriptor.java | 2 +- .../metadata/VXQueryMetadataProvider.java | 26 ++++++----- .../org/apache/vxquery/xmlparser/XMLParser.java | 18 +++++-- vxquery-xtest/pom.xml | 25 ++++++---- .../apache/vxquery/xtest/TestRunnerFactory.java | 3 -- 27 files changed, 247 insertions(+), 114 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 752a633..cf17361 100644 --- a/pom.xml +++ b/pom.xml @@ -594,7 +594,7 @@ <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> - <hyracks.version>0.2.12-SNAPSHOT</hyracks.version> + <hyracks.version>0.2.13-SNAPSHOT</hyracks.version> </properties> <modules> http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/pom.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/pom.xml b/vxquery-benchmark/pom.xml index da981d7..e08da38 100644 --- a/vxquery-benchmark/pom.xml +++ b/vxquery-benchmark/pom.xml @@ -47,6 +47,27 @@ </execution> </executions> </plugin> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <version>2.5</version> + <executions> + <execution> + <id>copy-scripts</id> + <phase>package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>target/appassembler/scripts</outputDirectory> + <resources> + <resource> + <directory>src/main/resources</directory> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql index 5e8de9b..1712cfe 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql @@ -1,5 +1,5 @@ select (r) from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"}) where text(r.dataType) = "AWND" - and toInt(text(r.value)) > 491.744 + and toFloat(text(r.value)) > 491.744 ; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql index 938f6d8..2929478 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql @@ -1,8 +1,8 @@ -select (r) +select (sensors) from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}), stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}), l in stations.locationLabels where text(stations.id) = text(sensors.station) and text(sensors.date) = "1976-07-04T00:00:00.000" - and text(l.displayName) = "WASHINGTON" + and text(l.displayName) = "Washington" ; http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql index c4ab3da..bef3413 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql @@ -4,9 +4,7 @@ from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}), v in sensors.value, stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}), n in stations.displayName, - l in stations.locationLabels where text(stations.id) = text(sensors.station) and toInt(substring(text(d), 0, 4)) = 2000 and text(sensors.dataType) = "TMAX" - and text(l.displayName) = "WASHINGTON" ; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql new file mode 100644 index 0000000..2e606d8 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql @@ -0,0 +1,7 @@ +select (r) +from r in source(xml, "sample_xml/nano_sensors.xml", {"data"}) +where text(r.station) = "GHCND:AS000000003" + and toInt(substring(text(r.date), 0, 4)) >= 2000 + and toInt(substring(text(r.date), 5, 7)) = 3 + and toInt(substring(text(r.date), 8, 10)) = 3 +; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql new file mode 100644 index 0000000..1e41c2c --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql @@ -0,0 +1,5 @@ +select (r) +from r in source(xml, "sample_xml/nano_sensors.xml", {"data"}) +where text(r.dataType) = "AWND" + and toFloat(text(r.value)) > 491.744 +; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql new file mode 100644 index 0000000..818ad31 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql @@ -0,0 +1,8 @@ +sum( + select (toFloat(text(r.value))) + from r in source(xml, "sample_xml/nano_sensors.xml", {"data"}) + where text(r.station) = "GHCND:US000000002" + and toInt(substring(text(r.date), 0, 4)) = 2002 + and text(r.dataType) = "PRCP" +) / 10 +; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql new file mode 100644 index 0000000..68fa926 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql @@ -0,0 +1,6 @@ +max( + select (toFloat(text(r.value))) + from r in source(xml, "sample_xml/nano_sensors.xml", {"data"}) + where text(r.dataType) = "TMAX" +) / 10 +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql new file mode 100644 index 0000000..467d318 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql @@ -0,0 +1,8 @@ +select (sensors) +from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}), + stations in source(xml, "sample_xml/nano_stations.xml", {"station"}), + l in stations.locationLabels +where text(stations.id) = text(sensors.station) + and text(sensors.date) = "2002-02-02T00:00:00.000" + and text(l.displayName) = "State 1" +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql new file mode 100644 index 0000000..c95d7d8 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql @@ -0,0 +1,11 @@ +min( + select (toFloat(text(sensors.value))) + from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}), + stations in source(xml, "sample_xml/nano_stations.xml", {"station"}), + l in stations.locationLabels + where text(stations.id) = text(sensors.station) + and toInt(substring(text(sensors.date), 0, 4)) = 2001 + and text(sensors.dataType) = "TMIN" + and text(l.id) = "FIPS:US" +) / 10 +; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql new file mode 100644 index 0000000..8989e48 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql @@ -0,0 +1,11 @@ +select (n, d, v) +from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}), + d in sensors.date, + v in sensors.value, + stations in source(xml, "sample_xml/nano_stations.xml", {"station"}), + n in stations.displayName, + l in stations.locationLabels +where text(stations.id) = text(sensors.station) + and toInt(substring(text(d), 0, 4)) = 2002 + and text(sensors.dataType) = "TMAX" +; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql new file mode 100644 index 0000000..ac28716 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql @@ -0,0 +1,10 @@ +avg( + select (toFloat(text(rtmax.value))-toFloat(text(rtmin.value))) + from rtmax in source(xml, "sample_xml/nano_sensors.xml", {"data"}), + rtmin in source(xml, "sample_xml/nano_sensors.xml", {"data"}) + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and text(rtmin.dataType) = "TMIN" +) / 10 +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh index b82f0be..0852d86 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh @@ -26,7 +26,7 @@ # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 # REPEAT=5 -FRAME_SIZE=10000 +FRAME_SIZE=$((8*1024)) BUFFER_SIZE=$((32*1024*1024)) if [ -z "${1}" ] @@ -46,15 +46,20 @@ do log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" log_base_path=$(dirname ${j/queries/query_logs}) mkdir -p ${log_base_path} - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 - #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 + time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 echo "Buffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file} - #echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} + echo "Frame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} fi; done -SUBJECT="Benchmark Tests Finished" -EMAIL="[email protected]" -/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM -Completed all tests in folder ${1}. -EOM \ No newline at end of file +if which programname >/dev/null; +then + echo "Sending out e-mail notification." + SUBJECT="Benchmark Tests Finished" + EMAIL="[email protected]" + /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM + Completed all tests in folder ${1}. + EOM +else + echo "No mail command to use." +fi; http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh old mode 100644 new mode 100755 index 6c19713..5c27266 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh @@ -25,8 +25,9 @@ # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138" # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 # +CLUSTER="uci" REPEAT=5 -FRAME_SIZE=10000 +FRAME_SIZE=$((8*1024)) BUFFER_SIZE=$((32*1024*1024)) if [ -z "${1}" ] @@ -43,7 +44,7 @@ fi # Run queries for the specified number of nodes. echo "Starting ${2} cluster nodes" -python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a start +python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start # wait for cluster to finish setting up sleep 5 @@ -63,19 +64,24 @@ do log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" log_base_path=$(dirname ${j/queries/query_logs}) mkdir -p ${log_base_path} - #time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 + time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file} - #echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} + echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} fi; fi; done # Stop cluster. -python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a stop +python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop -SUBJECT="Benchmark Cluster Tests Finished" -EMAIL="[email protected]" -/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM -Completed all tests in folder ${1} for a ${2} node cluster. -EOM \ No newline at end of file +if which programname >/dev/null; +then + echo "Sending out e-mail notification." + SUBJECT="Benchmark Cluster Tests Finished" + EMAIL="[email protected]" + /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM + Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}. + EOM +else + echo "No mail command to use." +fi; http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh old mode 100644 new mode 100755 index 7bef3cb..d5b8dc5 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh @@ -19,26 +19,33 @@ DATASET="dataset-hcn-d2" cluster_ip=${1} +base_weather_folder=${2} -for n in 4 +for n in 7 6 5 3 4 2 1 0 do #for t in "batch_scale_out" "speed_up" for t in "batch_scale_out" #for t in "speed_up" do - for p in 0 + for p in 2 1 0 do for c in 4 do echo " ==== node ${n} test ${t} partition ${p} cores ${c} ====" - sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh weather_data/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}" + sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}" done done done done -SUBJECT="Benchmark Group Tests Finished" -EMAIL="[email protected]" -/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM -Completed all tests in the predefined group for ${DATASET}. -EOM \ No newline at end of file +if which programname >/dev/null; +then + echo "Sending out e-mail notification." + SUBJECT="Benchmark Group Tests Finished" + EMAIL="[email protected]" + /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM + Completed all tests in the predefined group for ${DATASET}. + EOM +else + echo "No mail command to use." +fi; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh old mode 100644 new mode 100755 index 1fa58dd..a6788be --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh @@ -29,8 +29,14 @@ do time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; done -SUBJECT="MRQL Tests Finished (${DATASET})" -EMAIL="[email protected]" -/bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM -Completed all MRQL tests on ${DATASET}. -EOM \ No newline at end of file +if which programname >/dev/null; +then + echo "Sending out e-mail notification." + SUBJECT="MRQL Tests Finished (${DATASET})" + EMAIL="[email protected]" + /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM + Completed all MRQL tests on ${DATASET}. + EOM +else + echo "No mail command to use." +fi; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py index 3b0f9b3..c013d8e 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py @@ -92,19 +92,17 @@ class WeatherBenchmark: def print_local_partition_schemes(self, test): node_index = 0 - virtual_partitions = get_local_virtual_partitions(self.partitions) - virtual_partitions_per_disk = virtual_partitions / len(self.base_paths) + virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions) for p in self.partitions: scheme = self.get_local_partition_scheme(test, p) - self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index) + self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index) def print_cluster_partition_schemes(self, test): node_index = self.get_current_node_index() - virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions) - virtual_partitions_per_disk = virtual_partitions / len(self.base_paths) + virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions) for p in self.partitions: scheme = self.get_cluster_partition_scheme(test, p) - self.print_partition_schemes(virtual_partitions, scheme, test, p, node_index) + self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index) def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id): print @@ -128,7 +126,7 @@ class WeatherBenchmark: def get_local_partition_scheme(self, test, partition): scheme = [] - virtual_partitions = get_local_virtual_partitions(self.partitions) + virtual_partitions = get_local_virtual_disk_partitions(self.partitions) data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths) link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test) @@ -156,28 +154,27 @@ class WeatherBenchmark: return scheme = [] - local_virtual_partitions = get_local_virtual_partitions(self.partitions) - virtual_partitions = get_cluster_virtual_partitions(self.nodes, self.partitions) - virtual_partitions_per_disk = virtual_partitions / len(self.base_paths) - data_schemes = get_partition_scheme(node_index, virtual_partitions, self.base_paths) - link_base_schemes = get_cluster_link_scheme(len(self.nodes), virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test) + virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions) + data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths) + link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test) # Match link paths to real data paths. for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes: # Prep if test == "speed_up": - group_size = virtual_partitions_per_disk / (link_node + 1) + group_size = virtual_disk_partitions / (link_node + 1) / partition elif test == "batch_scale_out": - group_size = virtual_partitions_per_disk / len(self.nodes) + group_size = virtual_disk_partitions / len(self.nodes) / partition else: print "Unknown test." return - node_offset = group_size * (node_index * partition) + + node_offset = group_size * node_index * partition node_offset += group_size * link_index has_data = True if link_node < node_index: has_data = False - + # Make links for date_node, data_disk, data_virtual, data_index, data_path in data_schemes: if has_data and data_disk == link_disk \ @@ -295,7 +292,7 @@ class WeatherBenchmark: prepare_path(query_path, reset) # Copy query files. - partition_paths = get_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes") + partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes") self.copy_and_replace_query(query_path, partition_paths) def copy_local_query_files(self, test, reset): @@ -313,7 +310,7 @@ class WeatherBenchmark: prepare_path(query_path, reset) # Copy query files. - partition_paths = get_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test) + partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test) self.copy_and_replace_query(query_path, partition_paths) def copy_and_replace_query(self, query_path, replacement_list): @@ -339,15 +336,15 @@ class WeatherBenchmark: for line in fileinput.input(query_path + query_file, True): sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection")) - def get_number_of_slices(self): + def get_number_of_slices_per_disk(self): if len(self.dataset.get_tests()) == 0: print "No test has been defined in config file." else: for test in self.dataset.get_tests(): if test in self.BENCHMARK_LOCAL_TESTS: - return get_local_virtual_partitions(self.partitions) + return get_local_virtual_disk_partitions(self.partitions) elif test in self.BENCHMARK_CLUSTER_TESTS: - return get_cluster_virtual_partitions(self.nodes, self.partitions) + return get_cluster_virtual_disk_partitions(self.nodes, self.partitions) else: print "Unknown test." exit() @@ -355,7 +352,7 @@ class WeatherBenchmark: def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"): link_paths = [] for n in range(0, nodes): - new_link_path = get_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes") + new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes") link_paths.extend(new_link_path) return link_paths @@ -368,12 +365,12 @@ def get_local_query_folder(disks, partitions): def get_cluster_query_path(base_paths, test, partition, nodes): return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/" -def get_cluster_virtual_partitions(nodes, partitions): - vp = get_local_virtual_partitions(partitions) - vn = calculate_partitions(range(len(nodes), 0, -1)) +def get_cluster_virtual_disk_partitions(nodes, partitions): + vp = get_local_virtual_disk_partitions(partitions) + vn = calculate_partitions(range(1, len(nodes)+1, 1)) return vp * vn -def get_local_virtual_partitions(partitions): +def get_local_virtual_disk_partitions(partitions): return calculate_partitions(partitions) def calculate_partitions(list): http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py index 8ac6d17..eeae25c 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py @@ -53,7 +53,7 @@ def main(argv): print ' -a Append the results to the progress file.' print ' -f (str) The file name of a specific station to process.' print ' * Helpful when testing a single stations XML file output.' - print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, inventory, statistics).' + print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).' print ' -m (int) Limits the number of files created for each station.' print ' * Helpful when testing to make sure all elements are supported for each station.' print ' Alternate form: --max_station_files=(int)' @@ -203,7 +203,7 @@ def main(argv): benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list()) if section in ("all", "partition", "partition_scheme"): - slices = benchmark.get_number_of_slices() + slices = benchmark.get_number_of_slices_per_disk() print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').' data.reset() if section == "partition_scheme": http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py index b39f934..4877120 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py @@ -376,19 +376,28 @@ PARTITION_INDEX = 3 PARTITION_INDEX_PATH = 4 PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path") -def get_partition_paths(node_id, partitions, base_paths, key="partitions"): +def get_partition_paths(node_id, partitions, base_paths, key="partitions"): partition_paths = [] for scheme in get_partition_scheme(node_id, partitions, base_paths, key): partition_paths.append(scheme[PARTITION_INDEX_PATH]) return partition_paths -def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"): - partition_scheme = [] +def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"): partitions_per_disk = virtual_partitions / len(base_paths) - for i in range(0, partitions_per_disk): + return get_disk_partition_scheme(node_id, partitions_per_disk, base_paths, key) + +def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"): + partition_paths = [] + for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key): + partition_paths.append(scheme[PARTITION_INDEX_PATH]) + return partition_paths + +def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"): + partition_scheme = [] + for i in range(0, virtual_disk_partitions): for j in range(0, len(base_paths)): - new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, partitions_per_disk, i) + "/" - partition_scheme.append((node_id, j, partitions_per_disk, i, new_partition_path)) + new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/" + partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path)) return partition_scheme def get_partition_folder(disks, partitions, index): http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py index 8a6952b..9238a19 100644 --- a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py +++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py @@ -43,7 +43,7 @@ def main(argv): except getopt.GetoptError: print 'The file options for list_xml_files.py were not correctly specified.' print 'To see a full list of options try:' - print ' $ python list_xml_files.py -h' + print ' $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors' sys.exit(2) for opt, arg in opts: if opt == '-h': http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java ---------------------------------------------------------------------- diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java index a0a5c5a..c513a72 100644 --- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java +++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java @@ -83,7 +83,6 @@ public class VXQuery { private static long sumSquaredTiming; private static long minTiming = Long.MAX_VALUE; private static long maxTiming = Long.MIN_VALUE; - private static byte TIMING_QUERIES_TO_IGNORE = 2; /** * Constructor to use command line options passed. @@ -124,9 +123,9 @@ public class VXQuery { if (opts.timing) { Date end = new Date(); timingMessage("Execution time: " + (end.getTime() - start.getTime()) + " ms"); - if (opts.repeatExec > TIMING_QUERIES_TO_IGNORE) { - long mean = sumTiming / (opts.repeatExec - TIMING_QUERIES_TO_IGNORE); - double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Byte(TIMING_QUERIES_TO_IGNORE).doubleValue()) - mean * mean); + if (opts.repeatExec > opts.timingIgnoreQueries) { + long mean = sumTiming / (opts.repeatExec - opts.timingIgnoreQueries); + double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Integer(opts.timingIgnoreQueries).doubleValue()) - mean * mean); timingMessage("Average execution time: " + mean + " ms"); timingMessage("Standard deviation: " + String.format( "%.4f", sd)); timingMessage("Coefficient of variation: " + String.format( "%.4f", (sd / mean))); @@ -290,7 +289,7 @@ public class VXQuery { if (opts.timing) { end = new Date(); long currentRun = end.getTime() - start.getTime(); - if ((i + 1) > TIMING_QUERIES_TO_IGNORE) { + if ((i + 1) > opts.timingIgnoreQueries) { sumTiming += currentRun; sumSquaredTiming += currentRun * currentRun; if (currentRun < minTiming) { @@ -434,22 +433,22 @@ public class VXQuery { */ private static class CmdLineOptions { @Option(name = "-available-processors", usage = "Number of available processors. (default java's available processors)") - public int availableProcessors = -1; + private int availableProcessors = -1; @Option(name = "-client-net-ip-address", usage = "IP Address of the ClusterController") - public String clientNetIpAddress = null; + private String clientNetIpAddress = null; @Option(name = "-client-net-port", usage = "Port of the ClusterController (default 1098)") - public int clientNetPort = 1098; + private int clientNetPort = 1098; @Option(name = "-local-node-controllers", usage = "Number of local node controllers (default 1)") - public int localNodeControllers = 1; + private int localNodeControllers = 1; @Option(name = "-frame-size", usage = "Frame size in bytes. (default 65536)") - public int frameSize = 65536; + private int frameSize = 65536; @Option(name = "-buffer-size", usage = "Disk read buffer size in bytes.") - public int bufferSize = -1; + private int bufferSize = -1; @Option(name = "-O", usage = "Optimization Level. Default: Full Optimization") private int optimizationLevel = Integer.MAX_VALUE; @@ -478,6 +477,9 @@ public class VXQuery { @Option(name = "-timing", usage = "Produce timing information") private boolean timing; + @Option(name = "-timing-ignore-queries", usage = "Ignore the first X number of quereies.") + private int timingIgnoreQueries = 2; + @Option(name = "-x", usage = "Bind an external variable") private Map<String, String> bindings = new HashMap<String, String>(); http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java index 89968d5..8fdd1ec 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java @@ -105,7 +105,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO } } } else { - throw new HyracksDataException("Invalid directory parameter (" + throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + collectionDirectory.getAbsolutePath() + ") passed to collection."); } } http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java index 40a02ae..bc92ffc 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java @@ -68,8 +68,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String @Override public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, - IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, - JobSpecification jobSpec, Object implConfig) throws AlgebricksException { + List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, + IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) + throws AlgebricksException { VXQueryCollectionDataSource ds = (VXQueryCollectionDataSource) dataSource; if (sourceFileMap != null) { final int len = ds.getPartitions().length; @@ -123,23 +124,24 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String @Override public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime( IDataSource<String> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys, - LogicalVariable payLoadVar, JobGenContext context, JobSpecification jobSpec) throws AlgebricksException { + LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, JobGenContext context, + JobSpecification jobSpec) throws AlgebricksException { return null; } @Override public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(IDataSource<String> dataSource, IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys, - LogicalVariable payLoadVar, RecordDescriptor recordDesc, JobGenContext context, JobSpecification jobSpec) - throws AlgebricksException { + LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor recordDesc, + JobGenContext context, JobSpecification jobSpec) throws AlgebricksException { return null; } @Override public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(IDataSource<String> dataSource, IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys, - LogicalVariable payLoadVar, RecordDescriptor recordDesc, JobGenContext context, JobSpecification jobSpec) - throws AlgebricksException { + LogicalVariable payLoadVar, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor recordDesc, + JobGenContext context, JobSpecification jobSpec) throws AlgebricksException { return null; } @@ -147,8 +149,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime( IDataSourceIndex<String, String> dataSource, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, - List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc, - JobGenContext context, JobSpecification spec) throws AlgebricksException { + List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, + ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec) + throws AlgebricksException { return null; } @@ -156,8 +159,9 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime( IDataSourceIndex<String, String> dataSource, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, - List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc, - JobGenContext context, JobSpecification spec) throws AlgebricksException { + List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, + ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec) + throws AlgebricksException { return null; } http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java index 1d979b5..81b9191 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java @@ -20,6 +20,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; +import java.io.Reader; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -53,7 +54,7 @@ public class XMLParser { public XMLParser(boolean attachTypes, ITreeNodeIdProvider idProvider, String nodeId, ByteBuffer frame, FrameTupleAppender appender, List<Integer> childSeq, StaticContext staticContext) throws HyracksDataException { - bufferSize = Integer.parseInt(System.getProperty("vxquery.buffer_size")); + bufferSize = Integer.parseInt(System.getProperty("vxquery.buffer_size", "-1")); this.nodeId = nodeId; try { parser = XMLReaderFactory.createXMLReader(); @@ -76,13 +77,16 @@ public class XMLParser { public void parseDocument(File file, ArrayBackedValueStorage abvs) throws HyracksDataException { try { + Reader input; if (bufferSize > 0) { - in.setCharacterStream(new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize)); + input = new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize); } else { - in.setCharacterStream(new InputStreamReader(new FileInputStream(file))); + input = new InputStreamReader(new FileInputStream(file)); } + in.setCharacterStream(input); parser.parse(in); handler.writeDocument(abvs); + input.close(); } catch (FileNotFoundException e) { HyracksDataException hde = new VXQueryFileNotFoundException(e, file); hde.setNodeId(nodeId); @@ -101,13 +105,17 @@ public class XMLParser { public void parseElements(File file, IFrameWriter writer, FrameTupleAccessor fta, int tupleIndex) throws HyracksDataException { try { + Reader input; if (bufferSize > 0) { - in.setCharacterStream(new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize)); + input = new BufferedReader(new InputStreamReader(new FileInputStream(file)), bufferSize); +// System.err.println("buffer size: " + bufferSize); } else { - in.setCharacterStream(new InputStreamReader(new FileInputStream(file))); + input = new InputStreamReader(new FileInputStream(file)); } + in.setCharacterStream(input); handler.setupElementWriter(writer, fta, tupleIndex); parser.parse(in); + input.close(); } catch (FileNotFoundException e) { HyracksDataException hde = new VXQueryFileNotFoundException(e, file); hde.setNodeId(nodeId); http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-xtest/pom.xml ---------------------------------------------------------------------- diff --git a/vxquery-xtest/pom.xml b/vxquery-xtest/pom.xml index 320950b..762aa0b 100644 --- a/vxquery-xtest/pom.xml +++ b/vxquery-xtest/pom.xml @@ -67,11 +67,9 @@ </plugin> <plugin> <artifactId>maven-resources-plugin</artifactId> - <version>2.5</version> <executions> <execution> - <id>copy-scripts</id> - <!-- here the phase you need --> + <id>copy-xtest-scripts</id> <phase>package</phase> <goals> <goal>copy-resources</goal> @@ -85,14 +83,23 @@ </resources> </configuration> </execution> + <execution> + <id>copy-xtest-conf</id> + <phase>package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>target/appassembler/conf</outputDirectory> + <resources> + <resource> + <directory>src/main/resources/conf</directory> + </resource> + </resources> + </configuration> + </execution> </executions> </plugin> - <!-- - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-site-plugin</artifactId> - </plugin> - --> <plugin> <artifactId>maven-antrun-plugin</artifactId> <executions> http://git-wip-us.apache.org/repos/asf/vxquery/blob/61b22a9c/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java ---------------------------------------------------------------------- diff --git a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java index b9f1d70..6b3fb4b 100644 --- a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java +++ b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunnerFactory.java @@ -23,12 +23,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; import java.util.List; -import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.xml.namespace.QName; - import org.apache.vxquery.compiler.CompilerControlBlock; import org.apache.vxquery.compiler.algebricks.VXQueryGlobalDataFactory; import org.apache.vxquery.context.DynamicContext;
