http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq deleted file mode 100644 index 0827c45..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq +++ /dev/null @@ -1,25 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Filter Query :) -(: Find all reading for hurricane force wind warning or extreme wind warning. :) -(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :) -(: meters per second). (Wind value is in tenth of a meter per second) :) -let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" -for $r in collection($collection)/dataCollection/data -where $r/dataType eq "AWND" and xs:decimal(fn:data($r/value)) gt 491.744 -return $r \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq deleted file mode 100644 index 0635618..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq +++ /dev/null @@ -1,30 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) -(: -XQuery Aggregate Query ----------------------- -Find the annual precipitation (PRCP) for a Syracuse, NY using the airport -weather station (USW00014771) report for 1999. -:) -fn:sum( - let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($collection)/dataCollection/data - where $r/station eq "GHCND:USW00014771" - and $r/dataType eq "PRCP" - and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999 - return $r/value -) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq deleted file mode 100644 index c58b0a3..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq +++ /dev/null @@ -1,25 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Aggregate Query :) -(: Find the highest recorded temperature (TMAX) in Celsius. :) -fn:max( - let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($collection)/dataCollection/data - where $r/dataType eq "TMAX" - return $r/value -) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq deleted file mode 100644 index 5b7246d..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq +++ /dev/null @@ -1,30 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Find all the weather readings for Washington state for a specific day :) -(: 1976/7/4. :) -let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" -for $s in collection($station_collection)/stationCollection/station - -let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" -for $r in collection($sensor_collection)/dataCollection/data - -where $s/id eq $r/station - and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON")) - and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000") -return $r \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq deleted file mode 100644 index 6c7810a..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq +++ /dev/null @@ -1,27 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Count all the weather sensor readings on 1976-07-04. :) -count( - let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($sensor_collection)/dataCollection/data - - let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11)) - where $date eq xs:date("1976-07-04") - return $r -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq deleted file mode 100644 index 18e627a..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq +++ /dev/null @@ -1,25 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Count all the weather stations for Washington state. :) -count( - let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" - for $s in collection($station_collection)/stationCollection/station - where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON")) - return $s -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq deleted file mode 100644 index c95f3f5..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq +++ /dev/null @@ -1,33 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Aggregate Query :) -(: Find the lowest recorded temperature (TMIN) in the United States for :) -(: 2001. :) -fn:min( - let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" - for $s in collection($station_collection)/stationCollection/station - - let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($sensor_collection)/dataCollection/data - - where $s/id eq $r/station - and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US")) - and $r/dataType eq "TMIN" - and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001 - return $r/value -) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq deleted file mode 100644 index 8548742..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq +++ /dev/null @@ -1,28 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Aggregate Query :) -(: Count all sensor readings for TMIN in 2001. :) -count( - let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($sensor_collection)/dataCollection/data - - let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11)) - where $r/dataType eq "TMIN" - and fn:year-from-date($date) eq 2001 - return $r/value -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq deleted file mode 100644 index 6f3a6b8..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq +++ /dev/null @@ -1,25 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Aggregate Query :) -(: Count all stations in the United States. :) -count( - let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" - for $s in collection($station_collection)/stationCollection/station - where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US")) - return $s -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq deleted file mode 100644 index 5c8ed54..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq +++ /dev/null @@ -1,30 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Find the highest recorded temperature (TMAX) for each station for each :) -(: day over the year 2000. :) -let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" -for $s in collection($station_collection)/stationCollection/station - -let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" -for $r in collection($sensor_collection)/dataCollection/data - -where $s/id eq $r/station - and $r/dataType eq "TMAX" - and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000 -return ($s/displayName, $r/date, $r/value) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq deleted file mode 100644 index 1938151..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq +++ /dev/null @@ -1,27 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Count max temperature (TMAX) readings for 2000-01-01. :) -count( - let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($sensor_collection)/dataCollection/data - - where $r/dataType eq "TMAX" - and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000 - return $r -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq deleted file mode 100644 index 3c1dc98..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq +++ /dev/null @@ -1,24 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Count all the stations. :) -count( - let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" - for $s in collection($station_collection)/stationCollection/station - return $s -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq deleted file mode 100644 index 5b1f2ac..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq +++ /dev/null @@ -1,33 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Self Join Query :) -(: Self join with all stations finding the difference in min and max :) -(: temperature and get the average. :) -fn:avg( - let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r_min in collection($sensor_collection_min)/dataCollection/data - - let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r_max in collection($sensor_collection_max)/dataCollection/data - - where $r_min/station eq $r_max/station - and $r_min/date eq $r_max/date - and $r_min/dataType eq "TMIN" - and $r_max/dataType eq "TMAX" - return $r_max/value - $r_min/value -) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq deleted file mode 100644 index a48cad5..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq +++ /dev/null @@ -1,26 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Find the all the records for TMAX. :) -count( - let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r_max in collection($sensor_collection_max)/dataCollection/data - - where $r_max/dataType eq "TMAX" - return $r_max -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq deleted file mode 100644 index 4a72d0f..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq +++ /dev/null @@ -1,26 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Find the all the records for TMIN. :) -count( - let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r_min in collection($sensor_collection_min)/dataCollection/data - - where $r_min/dataType eq "TMIN" - return $r_min -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq deleted file mode 100644 index 6fa981b..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq +++ /dev/null @@ -1,24 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Count all the weather sensor readings available. :) -count( - let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" - for $r in collection($sensor_collection)/dataCollection/data - return $r -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq deleted file mode 100644 index 1958ec6..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq +++ /dev/null @@ -1,24 +0,0 @@ -(: Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. :) - -(: XQuery Join Query :) -(: Count all the weather stations available. :) -count( - let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" - for $s in collection($station_collection)/stationCollection/station - return $s -) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md deleted file mode 100644 index 58bea51..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md +++ /dev/null @@ -1,51 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -Weather Data Conversion To XML -===================== - -# Introduction - -The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) -.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor -readings. Using the RSS feed as a template, the GHCN-DAILY historical -information is used to generate past RSS feed XML documents. The process allows -testing on a large set of information with out having to continually monitor -the weather.gov site for all the weather details for years. - -# Detailed Description - -Detailed GHDN-DAILY information: -<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt> - -The process takes a save folder for the data. The folder contains a several -folders: - - - all_xml_files (The generated xml files for a given package) - - downloads (All files taken from the NOAA HTTP site) - - dataset-[name] (all files related to a single dataset) - - -# Examples commands - -Building - - -Partitioning -python weather_cli.py -x weather_example.xml - -Linking \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh deleted file mode 100755 index 632dbcb..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Run all the queries and save a log. -# First argument: Supply the folder which houses all the queries (recursive). -# Second argument: adds options to the VXQuery CLI. -# -# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ -# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138" -# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 -# -REPEAT=5 -FRAME_SIZE=$((8*1024)) -BUFFER_SIZE=$((32*1024*1024)) -JOIN_HASH_SIZE=-1 - -if [ -z "${1}" ] -then - echo "Please supply a directory for query files to be found." - exit -fi - -export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties" - -for j in $(find ${1} -name '*q??.xq') -do - if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]] - then - date - echo "Running query: ${j}" - log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" - log_base_path=$(dirname ${j/queries/query_logs}) - mkdir -p ${log_base_path} - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 - echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file} - echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} - echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file} - fi; -done - -if which programname >/dev/null; -then - echo "Sending out e-mail notification." - SUBJECT="Benchmark Tests Finished" - EMAIL="[email protected]" - /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM - Completed all tests in folder ${1}. - EOM -else - echo "No mail command to use." -fi; http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh deleted file mode 100755 index 98ab04b..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Run all the queries and save a log. -# First argument: Supply the folder which houses all the queries (recursive). -# Second argument: adds options to the VXQuery CLI. -# -# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ -# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138" -# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 -# -CLUSTER="uci" -REPEAT=5 -FRAME_SIZE=$((8*1024)) -BUFFER_SIZE=$((32*1024*1024)) -#JOIN_HASH_SIZE=$((256*1024*1024)) -JOIN_HASH_SIZE=-1 - -if [ -z "${1}" ] -then - echo "Please supply a directory for query files to be found." - exit -fi - -if [ -z "${2}" ] -then - echo "Please the number of nodes (start at 0)." - exit -fi - -# Run queries for the specified number of nodes. -echo "Starting ${2} cluster nodes" -python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start - -# wait for cluster to finish setting up -sleep 5 - -export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties" - -for j in $(find ${1} -name '*q??.xq') -do - # Only work with i nodes. - if [[ "${j}" =~ "${2}nodes" ]] - then - # Only run for specified queries. - if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]] - then - date - echo "Running query: ${j}" - log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" - log_base_path=$(dirname ${j/queries/query_logs}) - mkdir -p ${log_base_path} - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 - echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file} - echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} - echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file} - fi; - fi; -done - -# Stop cluster. -python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop - -if which programname >/dev/null; -then - echo "Sending out e-mail notification." - SUBJECT="Benchmark Cluster Tests Finished" - EMAIL="[email protected]" - /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM - Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}. - EOM -else - echo "No mail command to use." -fi; http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh deleted file mode 100755 index 58976b7..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -DATASET="dataset-hcn-d2" -cluster_ip=${1} -base_weather_folder=${2} - -for n in 7 6 5 3 4 2 1 0 -do - #for t in "batch_scale_out" "speed_up" - for t in "batch_scale_out" - #for t in "speed_up" - do - for p in 2 - do - for c in 4 - do - echo " ==== node ${n} test ${t} partition ${p} cores ${c} ====" - sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}" - done - done - done -done - -if which programname >/dev/null; -then - echo "Sending out e-mail notification." - SUBJECT="Benchmark Group Tests Finished" - EMAIL="[email protected]" - /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM - Completed all tests in the predefined group for ${DATASET}. - EOM -else - echo "No mail command to use." -fi; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh deleted file mode 100755 index a6788be..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45 -REPEAT=${1} -DATASET="hcn" - -for n in `seq 0 7` -#for n in 0 -do - date - echo "Running q0${n} on ${DATASET} for MRQL." - time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; -done - -if which programname >/dev/null; -then - echo "Sending out e-mail notification." - SUBJECT="MRQL Tests Finished (${DATASET})" - EMAIL="[email protected]" - /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM - Completed all MRQL tests on ${DATASET}. - EOM -else - echo "No mail command to use." -fi; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py deleted file mode 100644 index 8021b2c..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py +++ /dev/null @@ -1,377 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os.path -import linecache -import distutils.core -import fileinput -import socket - -from weather_config import * -from weather_data_files import * - -# Weather data files created to manage the conversion process. -# Allows partition and picking up where you left off. -# -# benchmark_name/ -# data/ -# queries/ -# logs/ -class WeatherBenchmark: - - DATA_LINKS_FOLDER = "data_links/" - LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG - QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/" - QUERY_MASTER_FOLDER = "../queries/" - QUERY_FILE_LIST = [ - "q00.xq", - "q01.xq", - "q02.xq", - "q03.xq", - "q04.xq", - "q05.xq", - "q06.xq", - "q07.xq" - ] - QUERY_UTILITY_LIST = [ - "no_result.xq", - "sensor_count.xq", - "station_count.xq", - "q04_sensor.xq", - "q04_station.xq", - "q05_sensor.xq", - "q05_station.xq", - "q06_sensor.xq", - "q06_station.xq", - "q07_tmin.xq", - "q07_tmax.xq", - ] - BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] - BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] - QUERY_COLLECTIONS = ["sensors", "stations"] - - SEPERATOR = "|" - - def __init__(self, base_paths, partitions, dataset, nodes): - self.base_paths = base_paths - self.partitions = partitions - self.dataset = dataset - self.nodes = nodes - - def print_partition_scheme(self): - if (len(self.base_paths) == 0): - return - for test in self.dataset.get_tests(): - if test in self.BENCHMARK_LOCAL_TESTS: - self.print_local_partition_schemes(test) - elif test in self.BENCHMARK_CLUSTER_TESTS: - self.print_cluster_partition_schemes(test) - else: - print "Unknown test." - exit() - - def print_local_partition_schemes(self, test): - node_index = 0 - virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions) - for p in self.partitions: - scheme = self.get_local_partition_scheme(test, p) - self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index) - - def print_cluster_partition_schemes(self, test): - node_index = self.get_current_node_index() - virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions) - for p in self.partitions: - scheme = self.get_cluster_partition_scheme(test, p) - self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index) - - def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id): - print - print "---------------- Partition Scheme --------------------" - print " Test: " + test - print " Virtual Partitions: " + str(virtual_partitions) - print " Disks: " + str(len(self.base_paths)) - print " Partitions: " + str(partitions) - print " Node Id: " + str(node_id) - - if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0: - folder_length = len(scheme[0][3]) + 5 - row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}" - HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path") - print row_format.format(*HEADER) - for row in scheme: - print row_format.format(*row) - print - else: - print " Scheme is EMPTY." - - def get_local_partition_scheme(self, test, partition): - scheme = [] - virtual_partitions = get_local_virtual_disk_partitions(self.partitions) - data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths) - link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test) - - # Match link paths to real data paths. - group_size = len(data_schemes) / len(link_base_schemes) - for d in range(len(self.base_paths)): - offset = 0 - for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes: - if d == link_disk: - # Only consider a single disk at a time. - for data_node, data_disk, data_virtual, data_index, data_path in data_schemes: - if test == "local_speed_up" and data_disk == link_disk \ - and offset <= data_index and data_index < offset + group_size: - scheme.append([data_disk, data_index, link_index, data_path, link_path]) - elif test == "local_batch_scale_out" and data_disk == link_disk \ - and data_index == link_index: - scheme.append([data_disk, data_index, link_index, data_path, link_path]) - offset += group_size - return scheme - - def get_cluster_partition_scheme(self, test, partition): - node_index = self.get_current_node_index() - if node_index == -1: - print "Unknown host." - return - - scheme = [] - virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions) - data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths) - link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test) - - # Match link paths to real data paths. - for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes: - # Prep - if test == "speed_up": - group_size = virtual_disk_partitions / (link_node + 1) / partition - elif test == "batch_scale_out": - group_size = virtual_disk_partitions / len(self.nodes) / partition - else: - print "Unknown test." - return - - node_offset = group_size * node_index * partition - node_offset += group_size * link_index - has_data = True - if link_node < node_index: - has_data = False - - # Make links - for date_node, data_disk, data_virtual, data_index, data_path in data_schemes: - if has_data and data_disk == link_disk \ - and node_offset <= data_index and data_index < node_offset + group_size: - scheme.append([link_disk, data_index, link_index, data_path, link_path]) - scheme.append([link_disk, -1, link_index, "", link_path]) - return scheme - - def build_data_links(self, reset): - if (len(self.base_paths) == 0): - return - if reset: - shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER) - for test in self.dataset.get_tests(): - if test in self.BENCHMARK_LOCAL_TESTS: - for i in self.partitions: - scheme = self.get_local_partition_scheme(test, i) - self.build_data_links_scheme(scheme) - if 1 in self.partitions and len(self.base_paths) > 1: - scheme = self.build_data_links_local_zero_partition(test) - self.build_data_links_scheme(scheme) - elif test in self.BENCHMARK_CLUSTER_TESTS: - for i in self.partitions: - scheme = self.get_cluster_partition_scheme(test, i) - self.build_data_links_scheme(scheme) - if 1 in self.partitions and len(self.base_paths) > 1: - scheme = self.build_data_links_cluster_zero_partition(test) - self.build_data_links_scheme(scheme) - else: - print "Unknown test." - exit() - - def build_data_links_scheme(self, scheme): - '''Build all the data links based on the scheme information.''' - for (data_disk, data_index, partition, data_path, link_path) in scheme: - self.add_collection_links_for(data_path, link_path, data_index) - - def build_data_links_cluster_zero_partition(self, test): - '''Build a scheme for all data in one symbolically linked folder. (0 partition)''' - scheme = [] - link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, self.base_paths, self.DATA_LINKS_FOLDER + test) - for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes: - new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes") - scheme.append([0, link_disk, 0, link_path, new_link_path]) - return scheme - - def build_data_links_local_zero_partition(self, test): - '''Build a scheme for all data in one symbolically linked folder. (0 partition)''' - scheme = [] - index = 0 - link_base_schemes = get_partition_scheme(0, 1, self.base_paths, self.DATA_LINKS_FOLDER + test) - for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes: - if test == "local_batch_scale_out" and index > 0: - continue - new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test) - scheme.append([0, index, 0, link_path, new_link_path]) - index += 1 - return scheme - - def get_zero_partition_path(self, node, key): - '''Return a partition path for the zero partition.''' - base_path = self.base_paths[0] - new_link_path = get_partition_scheme(node, 1, [base_path], key)[0][PARTITION_INDEX_PATH] - return new_link_path.replace("p1", "p0") - - def get_current_node_index(self): - found = False - node_index = 0 - for machine in self.nodes: - if socket.gethostname().startswith(machine.get_node_name()): - found = True - break - node_index += 1 - - if found: - return node_index - else: - return -1 - - def add_collection_links_for(self, real_path, link_path, index): - for collection in self.QUERY_COLLECTIONS: - collection_path = link_path + collection + "/" - collection_index = collection_path + "index" + str(index) - if not os.path.isdir(collection_path): - os.makedirs(collection_path) - if index >= 0: - if os.path.islink(collection_index): - os.unlink(collection_index) - os.symlink(real_path + collection + "/", collection_index) - - def copy_query_files(self, reset): - for test in self.dataset.get_tests(): - if test in self.BENCHMARK_LOCAL_TESTS: - self.copy_local_query_files(test, reset) - elif test in self.BENCHMARK_CLUSTER_TESTS: - self.copy_cluster_query_files(test, reset) - else: - print "Unknown test." - exit() - - def copy_cluster_query_files(self, test, reset): - '''Determine the data_link path for cluster query files and copy with - new location for collection.''' - if 1 in self.partitions and len(self.base_paths) > 1: - for n in range(len(self.nodes)): - query_path = get_cluster_query_path(self.base_paths, test, 0, n) - prepare_path(query_path, reset) - - # Copy query files. - new_link_path = self.get_zero_partition_path(n, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes") - self.copy_and_replace_query(query_path, [new_link_path]) - for n in range(len(self.nodes)): - for p in self.partitions: - query_path = get_cluster_query_path(self.base_paths, test, p, n) - prepare_path(query_path, reset) - - # Copy query files. - partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes") - self.copy_and_replace_query(query_path, partition_paths) - - def copy_local_query_files(self, test, reset): - '''Determine the data_link path for local query files and copy with - new location for collection.''' - if 1 in self.partitions and len(self.base_paths) > 1: - query_path = get_local_query_path(self.base_paths, test, 0) - prepare_path(query_path, reset) - - # Copy query files. - new_link_path = self.get_zero_partition_path(0, self.DATA_LINKS_FOLDER + test) - self.copy_and_replace_query(query_path, [new_link_path]) - for p in self.partitions: - query_path = get_local_query_path(self.base_paths, test, p) - prepare_path(query_path, reset) - - # Copy query files. - partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test) - self.copy_and_replace_query(query_path, partition_paths) - - def copy_and_replace_query(self, query_path, replacement_list): - '''Copy the query files over to the query_path and replace the path - for the where the collection data is located.''' - for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST: - shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path + query_file) - - # Make a search replace for each collection. - for collection in self.QUERY_COLLECTIONS: - replacement_list_with_type = [] - for replace in replacement_list: - replacement_list_with_type.append(replace + collection) - - replace_string = self.SEPERATOR.join(replacement_list_with_type) - for line in fileinput.input(query_path + query_file, True): - sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string)) - - # Make a search replace for partition type. - if self.dataset.get_partition_type() == "large_files": - for line in fileinput.input(query_path + query_file, True): - sys.stdout.write(line.replace("/stationCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/stationCollection")) - for line in fileinput.input(query_path + query_file, True): - sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection")) - - def get_number_of_slices_per_disk(self): - if len(self.dataset.get_tests()) == 0: - print "No test has been defined in config file." - else: - for test in self.dataset.get_tests(): - if test in self.BENCHMARK_LOCAL_TESTS: - return get_local_virtual_disk_partitions(self.partitions) - elif test in self.BENCHMARK_CLUSTER_TESTS: - return get_cluster_virtual_disk_partitions(self.nodes, self.partitions) - else: - print "Unknown test." - exit() - -def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"): - link_paths = [] - for n in range(0, nodes): - new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes") - link_paths.extend(new_link_path) - return link_paths - -def get_local_query_path(base_paths, test, partition): - return base_paths[0] + "queries/" + test + "/" + get_local_query_folder(len(base_paths), partition) + "/" - -def get_local_query_folder(disks, partitions): - return "d" + str(disks) + "_p" + str(partitions) - -def get_cluster_query_path(base_paths, test, partition, nodes): - return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/" - -def get_cluster_virtual_disk_partitions(nodes, partitions): - vp = get_local_virtual_disk_partitions(partitions) - vn = calculate_partitions(range(1, len(nodes)+1, 1)) - return vp * vn - -def get_local_virtual_disk_partitions(partitions): - return calculate_partitions(partitions) - -def calculate_partitions(list): - x = 1 - for i in list: - if x % i != 0: - if i % x == 0: - x = i - else: - x *= i - return x http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py deleted file mode 100644 index eeae25c..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys, getopt - -# Custom modules. -from weather_data_files import * -from weather_download_files import * -from weather_convert_to_xml import * -from weather_config import * -from weather_benchmark import * - -DEBUG_OUTPUT = False - -# -# Weather conversion for GHCN-DAILY files to xml. -# -# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt -# -def main(argv): - append = False - max_records = 0 - process_file_name = "" - reset = False - section = "all" - token = "" - update = False - xml_config_path = "" - - try: - opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", "locality=", "max_station_files=", "web_service=", "xml_config="]) - except getopt.GetoptError: - print 'The file options for weather_cli.py were not correctly specified.' - print 'To see a full list of options try:' - print ' $ python weather_cli.py -h' - sys.exit(2) - for opt, arg in opts: - if opt == '-h': - print 'Converting weather daily files to xml options:' - print ' -a Append the results to the progress file.' - print ' -f (str) The file name of a specific station to process.' - print ' * Helpful when testing a single stations XML file output.' - print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).' - print ' -m (int) Limits the number of files created for each station.' - print ' * Helpful when testing to make sure all elements are supported for each station.' - print ' Alternate form: --max_station_files=(int)' - print ' -r Reset the build process. (For one section or all sections depending on other parameters.)' - print ' -u Recalculate the file count and data size for each data source file.' - print ' -v Extra debug information.' - print ' -w (str) Downloads the station XML file form the web service.' - print ' -x (str) XML config file for weather data.' - sys.exit() - elif opt in ('-a', "--append"): - append = True - elif opt in ('-f', "--file"): - # check if file exists. - if os.path.exists(arg): - process_file_name = arg - else: - print 'Error: Argument must be a file name for --file (-f).' - sys.exit() - elif opt in ('-l', "--locality"): - if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "inventory", "statistics"): - section = arg - else: - print 'Error: Argument must be a string for --locality (-l) and a valid locality.' - sys.exit() - elif opt in ('-m', "--max_station_files"): - if arg.isdigit(): - max_records = int(arg) - else: - print 'Error: Argument must be an integer for --max_station_files (-m).' - sys.exit() - elif opt == '-r': - reset = True - elif opt == '-u': - update = True - elif opt == '-v': - global DEBUG_OUTPUT - DEBUG_OUTPUT = True - elif opt == '-w': - # check if file exists. - if arg is not "": - token = arg - else: - print 'Error: Argument must be a string --web_service (-w).' - sys.exit() - elif opt in ('-x', "--xml_config"): - # check if file exists. - if os.path.exists(arg): - xml_config_path = arg - else: - print 'Error: Argument must be a xml file for --xml_config (-x).' - sys.exit() - - # Required fields to run the script. - if xml_config_path == "" or not os.path.exists(xml_config_path): - print 'Error: The xml config option must be supplied: --xml_config (-x).' - sys.exit() - config = WeatherConfig(xml_config_path) - - # Required fields to run the script. - if config.get_save_path() == "" or not os.path.exists(config.get_save_path()): - print 'Error: The save directory option must be supplied in the config file.' - sys.exit() - - # Set up downloads folder. - download_path = config.get_save_path() + "/downloads" - if section in ("all", "download"): - print 'Processing the download section.' - download = WeatherDownloadFiles(download_path) - download.download_ghcnd_files(reset) - download.download_mshr_files(reset) - - # Unzip the required file. - download.unzip_ghcnd_package(config.get_package(), reset) - download.unzip_mshr_files(reset) - - - # Create some basic paths for save files and references. - ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + config.get_package() - xml_data_save_path = config.get_save_path() + '/all_xml_files/' - - # Make sure the xml folder is available. - if not os.path.isdir(xml_data_save_path): - os.makedirs(xml_data_save_path) - - # Set up the XML build objects. - convert = WeatherWebServiceMonthlyXMLFile(download_path, xml_data_save_path, DEBUG_OUTPUT) - progress_file = xml_data_save_path + "_data_progress.csv" - data = WeatherDataFiles(ghcnd_data_dly_path, progress_file) - if section in ("all", "progress_file"): - print 'Processing the progress_file section.' - options = list() - if append: - options.append('append') - if update: - options.append('recalculate') - if reset: - options.append('reset') - data.build_progress_file(options, convert) - - if section in ("all", "sensor_build"): - print 'Processing the sensor_build section.' - if process_file_name is not "": - # process a single file - if os.path.exists(process_file_name): - (file_count, data_size) = convert.process_sensor_file(process_file_name, max_records, 4) - data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size) - else: - data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_MISSING) - else: - # process directory - data.reset() - data.set_type("sensor") - data.set_data_reset(reset) - for file_name in data: - file_path = ghcnd_data_dly_path + '/' + file_name - if os.path.exists(file_path): - (file_count, data_size) = convert.process_sensor_file(file_path, max_records, 4) - data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size) - else: - data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_MISSING) - - if section in ("all", "station_build"): - print 'Processing the station_build section.' - data.reset() - data.set_type("station") - data.set_data_reset(reset) - if token is not "": - convert.set_token(token) - for file_name in data: - file_path = ghcnd_data_dly_path + '/' + file_name - if os.path.exists(file_path): - return_status = convert.process_station_file(file_path) - status = data.get_station_status(return_status) - data.update_file_station_status(file_name, status) - else: - data.update_file_station_status(file_name, WeatherDataFiles.DATA_FILE_MISSING) - - for dataset in config.get_dataset_list(): - # Set up the setting for each dataset. - dataset_folder = "/dataset-" + dataset.get_name() - progress_file = config.get_save_path() + dataset_folder + "/_data_progress.csv" - data = WeatherDataFiles(ghcnd_data_dly_path, progress_file) - - base_paths = [] - for paths in dataset.get_save_paths(): - base_paths.append(paths + dataset_folder + "/") - benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list()) - - if section in ("all", "partition", "partition_scheme"): - slices = benchmark.get_number_of_slices_per_disk() - print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').' - data.reset() - if section == "partition_scheme": - benchmark.print_partition_scheme() - else: - if dataset.get_partition_type() == "large_files": - data.build_to_n_partition_files(xml_data_save_path, slices, base_paths, reset) - else: - data.copy_to_n_partitions(xml_data_save_path, slices, base_paths, reset) - - if section in ("all", "test_links"): - # TODO determine current node - print 'Processing the test links section (' + dataset.get_name() + ').' - benchmark.print_partition_scheme() - benchmark.build_data_links(reset) - - if section in ("all", "queries"): - print 'Processing the queries section (' + dataset.get_name() + ').' - benchmark.copy_query_files(reset) - - if section in ("inventory"): - print 'Processing the inventory section.' - convert.process_inventory_file() - -# if section in ("statistics"): -# print 'Processing the statistics section.' -# data.print_progress_file_stats(convert) - -if __name__ == "__main__": - main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py deleted file mode 100644 index 80607b8..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from xml.dom.minidom import parse - -class WeatherConfig: - def __init__(self, config_xml_file): - self.config_xml_file = config_xml_file - - self.config = parse(self.config_xml_file) - - def get_save_path(self): - return self.get_text(self.config.getElementsByTagName("save_path")[0]) - - def get_package(self): - return self.get_text(self.config.getElementsByTagName("package")[0]) - - def get_node_machine_list(self): - nodes = [] - for node in self.config.getElementsByTagName("node"): - id = self.get_node_name(node) - ip = self.get_node_ip(node) - nodes.append(Machine(id, ip)) - return nodes - - def get_dataset_list(self): - nodes = [] - for node in self.config.getElementsByTagName("dataset"): - name = self.get_dataset_name(node) - save_paths = self.get_dataset_save_paths(node) - partition_type = self.get_dataset_partition_type(node) - partitions = self.get_dataset_partitions(node) - tests = self.get_dataset_tests(node) - nodes.append(Dataset(name, save_paths, partition_type, partitions, tests)) - return nodes - - - # -------------------------------------------------------------------------- - # Node Specific Functions - # -------------------------------------------------------------------------- - def get_node_ip(self, node): - return self.get_text(node.getElementsByTagName("cluster_ip")[0]) - - def get_node_name(self, node): - return self.get_text(node.getElementsByTagName("id")[0]) - - - # -------------------------------------------------------------------------- - # Dataset Specific Functions - # -------------------------------------------------------------------------- - def get_dataset_name(self, node): - return self.get_text(node.getElementsByTagName("name")[0]) - - def get_dataset_save_paths(self, node): - paths = [] - for item in node.getElementsByTagName("save_path"): - paths.append(self.get_text(item)) - return paths - - def get_dataset_partition_type(self, node): - return self.get_text(node.getElementsByTagName("partition_type")[0]) - - def get_dataset_partitions(self, node): - paths = [] - for item in node.getElementsByTagName("partitions_per_path"): - paths.append(int(self.get_text(item))) - return paths - - def get_dataset_tests(self, node): - tests = [] - for item in node.getElementsByTagName("test"): - tests.append(self.get_text(item)) - return tests - - def get_text(self, xml_node): - rc = [] - for node in xml_node.childNodes: - if node.nodeType == node.TEXT_NODE: - rc.append(node.data) - return ''.join(rc) - -class Machine: - def __init__(self, id, ip): - self.id = id - self.ip = ip - - def get_node_name(self): - return self.id - - def get_node_ip(self): - return self.ip - - def __repr__(self): - return self.id + "(" + self.ip + ")" - -class Dataset: - def __init__(self, name, save_paths, partition_type, partitions, tests): - self.name = name - self.save_paths = save_paths - self.partitions = partitions - self.partition_type = partition_type - self.tests = tests - - def get_name(self): - return self.name - - def get_save_paths(self): - return self.save_paths - - def get_partitions(self): - return self.partitions - - def get_partition_type(self): - return self.partition_type - - def get_tests(self): - return self.tests - - def __repr__(self): - return self.name + ":" + str(self.save_paths) + ":" + str(self.partitions) - http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py deleted file mode 100644 index 04fff52..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Base URL used to get all the required files. -BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/' - -# List of required files for a build. -FILE_NAMES = [] -FILE_NAMES.append('ghcnd-countries.txt') -FILE_NAMES.append('ghcnd-inventory.txt') -FILE_NAMES.append('ghcnd-states.txt') -FILE_NAMES.append('ghcnd-stations.txt') -FILE_NAMES.append('ghcnd-version.txt') -FILE_NAMES.append('ghcnd_all.tar.gz') -FILE_NAMES.append('ghcnd_gsn.tar.gz') -FILE_NAMES.append('ghcnd_hcn.tar.gz') -FILE_NAMES.append('readme.txt') -FILE_NAMES.append('status.txt') - -# Store the row details here. - -# Index values of each field details. -FIELD_INDEX_NAME = 0 -FIELD_INDEX_START = 1 -FIELD_INDEX_END = 2 -FIELD_INDEX_TYPE = 3 - -DLY_FIELD_ID = 0 -DLY_FIELD_YEAR = 1 -DLY_FIELD_MONTH = 2 -DLY_FIELD_ELEMENT = 3 - -DLY_FIELD_DAY_OFFSET = 4 -DLY_FIELD_DAY_FIELDS = 4 - -DLY_FIELDS = [] - -# Details about the row. -DLY_FIELDS.append(['ID', 1, 11, 'Character']) -DLY_FIELDS.append(['YEAR', 12, 15, 'Integer']) -DLY_FIELDS.append(['MONTH', 16, 17, 'Integer']) -DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character']) - -# Days in each row. -for i in range(1, 32): - start = 22 + ((i - 1) * 8) - DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer']) - DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character']) - DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character']) - DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character']) - -# Details about the row. -STATIONS_FIELDS = {} -STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character'] -STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real'] -STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real'] -STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real'] -STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character'] -STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character'] -STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character'] -STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character'] -STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character'] - -# Details about the row. -COUNTRIES_FIELDS = {} -COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character'] -COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character'] - -# Details about the row. -STATES_FIELDS = {} -STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character'] -STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character'] - -# Details about the row. -INVENTORY_FIELDS = {} -INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character'] -INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real'] -INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real'] -INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character'] -INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer'] -INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer'] http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py deleted file mode 100644 index 7b1434f..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# List of required files for a build. -MSHR_URLS = [] -MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt') -MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip') - -# Index values of each field details. -MSHR_FIELD_INDEX_NAME = 0 -MSHR_FIELD_INDEX_START = 1 -MSHR_FIELD_INDEX_END = 2 -MSHR_FIELD_INDEX_TYPE = 3 - -# Store the row details here. -MSHR_FIELDS = {} - -# Details about the row. -MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)'] -MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)'] -MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD'] -MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD'] -MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)'] -MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)'] -MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)'] -MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)'] -MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)'] -MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)'] -MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)'] -MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)'] -MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)'] -MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)'] -MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)'] -MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 'X(30)'] -MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)'] -MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)'] -MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)'] -MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)'] -MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)'] -MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)'] -MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)'] -MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)'] -MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)'] -MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)'] -MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)'] -MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)'] -MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)'] -MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)'] -MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)'] -MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)'] -MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)'] -MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)'] -MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)'] -MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)'] -MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)'] -MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)'] -MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)'] -MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)'] -MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)'] -MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)'] -MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)'] -MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)'] -MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) '] -MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']
