Repository: vxquery Updated Branches: refs/heads/prestonc/site_update 1ddb81e48 -> 57d1bfbfb refs/heads/site f87bc5b4f -> 9bbd1cdab
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py new file mode 100644 index 0000000..5db090a --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py @@ -0,0 +1,554 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import textwrap +from datetime import date +import os +from collections import OrderedDict + +# Custom modules. +from weather_config_ghcnd import * +from weather_config_mshr import * +from weather_download_files import * + +class WeatherConvertToXML: + + STATES = OrderedDict({ + 'AK': 'Alaska', + 'AL': 'Alabama', + 'AR': 'Arkansas', + 'AS': 'American Samoa', + 'AZ': 'Arizona', + 'CA': 'California', + 'CO': 'Colorado', + 'CT': 'Connecticut', + 'DC': 'District of Columbia', + 'DE': 'Delaware', + 'FL': 'Florida', + 'GA': 'Georgia', + 'GU': 'Guam', + 'HI': 'Hawaii', + 'IA': 'Iowa', + 'ID': 'Idaho', + 'IL': 'Illinois', + 'IN': 'Indiana', + 'KS': 'Kansas', + 'KY': 'Kentucky', + 'LA': 'Louisiana', + 'MA': 'Massachusetts', + 'MD': 'Maryland', + 'ME': 'Maine', + 'MI': 'Michigan', + 'MN': 'Minnesota', + 'MO': 'Missouri', + 'MP': 'Northern Mariana Islands', + 'MS': 'Mississippi', + 'MT': 'Montana', + 'NA': 'National', + 'NC': 'North Carolina', + 'ND': 'North Dakota', + 'NE': 'Nebraska', + 'NH': 'New Hampshire', + 'NJ': 'New Jersey', + 'NM': 'New Mexico', + 'NV': 'Nevada', + 'NY': 'New York', + 'OH': 'Ohio', + 'OK': 'Oklahoma', + 'OR': 'Oregon', + 'PA': 'Pennsylvania', + 'PR': 'Puerto Rico', + 'RI': 'Rhode Island', + 'SC': 'South Carolina', + 'SD': 'South Dakota', + 'TN': 'Tennessee', + 'TX': 'Texas', + 'UT': 'Utah', + 'VA': 'Virginia', + 'VI': 'Virgin Islands', + 'VT': 'Vermont', + 'WA': 'Washington', + 'WI': 'Wisconsin', + 'WV': 'West Virginia', + 'WY': 'Wyoming' + }) + + MONTHS = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December" + ] + + token = "" + + def __init__(self, base_path, save_path, debug_output): + self.save_path = save_path + self.debug_output = debug_output + + # Extra support files. + self.ghcnd_countries = base_path + '/ghcnd-countries.txt' + self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt' + self.ghcnd_states = base_path + '/ghcnd-states.txt' + self.ghcnd_stations = base_path + '/ghcnd-stations.txt' + + # MSHR support files. + self.mshr_stations = base_path + '/mshr_enhanced_201402.txt' + + def set_token(self, token): + self.token = token + + def get_field_from_definition(self, row, field_definition): + return row[(field_definition[FIELD_INDEX_START] - 1):field_definition[FIELD_INDEX_END]] + + def get_field(self, fields_array, row, index): + return row[(fields_array[index][FIELD_INDEX_START] - 1):fields_array[index][FIELD_INDEX_END]] + + def get_dly_field(self, row, index): + return self.get_field(DLY_FIELDS, row, index) + + def print_row_files(self, row): + for field in DLY_FIELDS: + print str(field[FIELD_INDEX_NAME]) + " = '" + row[(field[FIELD_INDEX_START] - 1):field[FIELD_INDEX_END]] + "'" + + def save_file(self, filename, contents): + file = open(filename, 'w') + file.write(contents) + file.close() + return filename + + def get_folder_size(self, folder_name): + total_size = 0 + for dirpath, dirnames, filenames in os.walk(folder_name): + for f in filenames: + fp = os.path.join(dirpath, f) + total_size += os.path.getsize(fp) + return total_size + + def process_one_month_sensor_set(self, records, page): + # Default + return 0 + + def process_station_data(self, row): + # Default + return 0 + + def get_base_folder(self, station_id, data_type="sensors"): + return build_base_save_folder(self.save_path, station_id, data_type) + + def process_inventory_file(self): + print "Processing inventory file" + file_stream = open(self.ghcnd_inventory, 'r') + + csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT', 'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS'] + row = file_stream.readline() + csv_inventory = {} + for row in file_stream: + id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID']) + sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT']) + start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR'])) + end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR'])) + if id in csv_inventory: + new_count = str(int(csv_inventory[id][2]) + 1) + new_max = str(max(int(csv_inventory[id][3]), (end - start))) + new_total = str(int(csv_inventory[id][3]) + end - start) + csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total] + else: + csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)] + + path = self.save_path + "/inventory.csv" + self.save_csv_file(path, csv_inventory, csv_header) + + def save_csv_file(self, path, csv_inventory, header): + csv_content = "|".join(header) + "\n" + for row_id in csv_inventory: + csv_content += "|".join(csv_inventory[row_id]) + "\n" + self.save_file(path, csv_content) + + + def process_station_file(self, file_name): + print "Processing station file: " + file_name + file_stream = open(file_name, 'r') + + row = file_stream.readline() + return self.process_station_data(row) + + def process_sensor_file(self, file_name, max_files, sensor_max=99): + print "Processing sensor file: " + file_name + file_stream = open(file_name, 'r') + + month_last = 0 + year_last = 0 + records = [] + page = 0 + sensor_count = 0 + + file_count = 0 + for row in file_stream: + month = self.get_dly_field(row, DLY_FIELD_MONTH) + year = self.get_dly_field(row, DLY_FIELD_YEAR) + + if (month_last != 0 and year_last != 0) and (sensor_count >= sensor_max or month != month_last or year != year_last): + # process set + file_count += self.process_one_month_sensor_set(records, page) + records = [] + if sensor_count >= sensor_max and month == month_last and year == year_last: + # start a new page. + page += 1 + else: + # start over. + page = 0 + sensor_count = 0 + + records.append(row) + sensor_count += 1 + if max_files != 0 and file_count >= max_files: + # Stop creating more files after the max is reached. + break + + month_last = month + year_last = year + + station_id = self.get_dly_field(records[0], DLY_FIELD_ID) + data_size = self.get_folder_size(self.get_base_folder(station_id) + "/" + station_id) + print "Created " + str(file_count) + " XML files for a data size of " + str(data_size) + "." + + return (file_count, data_size) + + def convert_c2f(self, c): + return (9 / 5 * c) + 32 + + def default_xml_web_service_start(self): + field_xml = "" + field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" + return field_xml + + def default_xml_data_start(self, total_records): + field_xml = "" + field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n" + return field_xml + + def default_xml_station_start(self): + field_xml = "" + field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n" + return field_xml + + def default_xml_field_date(self, report_date, indent=2): + field_xml = "" + field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n" + return field_xml + + def default_xml_mshr_station_additional(self, station_id): + """The web service station data is generate from the MSHR data supplemented with GHCN-Daily.""" + station_mshr_row = "" + stations_mshr_file = open(self.mshr_stations, 'r') + for line in stations_mshr_file: + if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip(): + station_mshr_row = line + break + + if station_mshr_row == "": + return "" + + additional_xml = "" + + county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip() + if county != "": + additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county) + + country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip() + country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip() + if country_code != "" and country_name != "": + additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name) + + return additional_xml + + def default_xml_location_labels(self, type, id, display_name): + label_xml = "" + label_xml += self.default_xml_start_tag("locationLabels", 2) + label_xml += self.default_xml_element("type", type, 3) + label_xml += self.default_xml_element("id", id, 3) + label_xml += self.default_xml_element("displayName", display_name, 3) + label_xml += self.default_xml_end_tag("locationLabels", 2) + return label_xml + + + def default_xml_web_service_station(self, station_id): + """The web service station data is generate from available historical sources.""" + station_ghcnd_row = "" + stations_ghcnd_file = open(self.ghcnd_stations, 'r') + for line in stations_ghcnd_file: + if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']): + station_ghcnd_row = line + break + + xml_station = "" + xml_station += self.default_xml_start_tag("station", 1) + + xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2) + xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2) + xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2) + xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2) + + elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip() + if elevation != "-999.9": + xml_station += self.default_xml_element("elevation", elevation, 2) + + state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip() + if state_code != "" and state_code in self.STATES: + xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code]) + + # Add the MSHR data to the station generated information. + xml_station += self.default_xml_mshr_station_additional(station_id) + + xml_station += self.default_xml_end_tag("station", 1) + return xml_station + + def default_xml_day_reading_as_field(self, row, day): + day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS) + value = self.get_dly_field(row, day_index); + if value == "-9999": + return "" + + field_xml = "" + field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT) + if field_id in ("MDTN", "MDTX", "MNPN", "MXPN", "TMAX", "TMIN", "TOBS",): + # Add both the celcius and fahrenheit temperatures. + celcius = float(value) / 10 + field_xml += " <" + field_id + "_c>" + str(celcius) + "</" + field_id + "_c>\n" + fahrenheit = self.convert_c2f(celcius) + field_xml += " <" + field_id + "_f>" + str(fahrenheit) + "</" + field_id + "_f>\n" + elif field_id in ("AWND", "EVAP", "PRCP", "THIC", "WESD", "WESF", "WSF1", "WSF2", "WSF5", "WSFG", "WSFI", "WSFM",): + # Field values that are in tenths. + converted_value = float(value) / 10 + field_xml += " <" + field_id + ">" + str(converted_value) + "</" + field_id + ">\n" + elif field_id in ("ACMC", "ACMH", "ACSC", "ACSH", "PSUN",): + # Fields is a percentage. + field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n" + elif field_id in ("FMTM", "PGTM",): + # Fields is a time value HHMM. + field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n" + elif field_id in ("DAEV", "DAPR", "DASF", "DATN", "DATX", "DAWM", "DWPR", "FRGB", "FRGT", "FRTH", "GAHT", "MDSF", "MDWM", "MDEV", "MDPR", "SNOW", "SNWD", "TSUN", "WDF1", "WDF2", "WDF5", "WDFG", "WDFI", "WDFM", "WDMV",): + # Fields with no alternation needed. + field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n" + else: + field_xml += " <unknown>" + field_id + "</unknown>\n" + + # print field_xml + return field_xml + + def default_xml_day_reading(self, row, day, indent=2): + day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS) + value = self.get_dly_field(row, day_index); + mflag = self.get_dly_field(row, day_index + 1); + qflag = self.get_dly_field(row, day_index + 2); + sflag = self.get_dly_field(row, day_index + 3); + + if value == "-9999": + return "" + + indent_space = self.get_indent_space(indent) + field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT) + station_id = "GHCND:" + self.get_dly_field(row, DLY_FIELD_ID) + + field_xml = "" + field_xml += indent_space + "<dataType>" + field_id + "</dataType>\n" + field_xml += indent_space + "<station>" + station_id + "</station>\n" + field_xml += indent_space + "<value>" + value.strip() + "</value>\n" + field_xml += indent_space + "<attributes>\n" + field_xml += indent_space + indent_space + "<attribute>" + mflag.strip() + "</attribute>\n" + field_xml += indent_space + indent_space + "<attribute>" + qflag.strip() + "</attribute>\n" + field_xml += indent_space + indent_space + "<attribute>" + sflag.strip() + "</attribute>\n" + field_xml += indent_space + indent_space + "<attribute></attribute>\n" + field_xml += indent_space + "</attributes>\n" + + # print field_xml + return field_xml + + def default_xml_end(self): + return textwrap.dedent("""\ + </ghcnd_observation>""") + + def default_xml_data_end(self): + return self.default_xml_end_tag("dataCollection", 0) + + def default_xml_station_end(self): + return self.default_xml_end_tag("stationCollection", 0) + + def default_xml_element(self, tag, data, indent=1): + return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n" + + def default_xml_start_tag(self, tag, indent=1): + return self.get_indent_space(indent) + "<" + tag + ">\n" + + def default_xml_end_tag(self, tag, indent=1): + return self.get_indent_space(indent) + "</" + tag + ">\n" + + def get_indent_space(self, indent): + return (" " * (4 * indent)) + + +class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML): + """The web service class details how to create files similar to the NOAA web service.""" + skip_downloading = False + # Station data + def process_station_data(self, row): + """Adds a single station record file either from downloading the data or generating a similar record.""" + station_id = self.get_dly_field(row, DLY_FIELD_ID) + download = 0 + if self.token is not "" and not self.skip_downloading: + download = self.download_station_data(station_id, self.token, True) + if download == 0: + self.skip_downloading = True + + # If not downloaded, generate. + if download != 0: + return download + else: + # Information for each daily file. + station_xml_file = self.default_xml_web_service_start() + station_xml_file += self.default_xml_station_start() + station_xml_file += self.default_xml_web_service_station(station_id) + station_xml_file += self.default_xml_station_end() + + # Remove white space. + station_xml_file = station_xml_file.replace("\n", ""); + station_xml_file = station_xml_file.replace(self.get_indent_space(1), ""); + + # Make sure the station folder is available. + ghcnd_xml_station_path = self.get_base_folder(station_id, "stations") + if not os.path.isdir(ghcnd_xml_station_path): + os.makedirs(ghcnd_xml_station_path) + + # Save XML string to disk. + save_file_name = ghcnd_xml_station_path + station_id + ".xml" + save_file_name = self.save_file(save_file_name, station_xml_file) + + if save_file_name is not "": + if self.debug_output: + print "Wrote file: " + save_file_name + return 1 + else: + return 0 + + # Station data + def download_station_data(self, station_id, token, reset=False): + """Downloads the station data from the web service.""" + import time + time.sleep(2) + # Make sure the station folder is available. + ghcnd_xml_station_path = self.get_base_folder(station_id, "stations") + if not os.path.isdir(ghcnd_xml_station_path): + os.makedirs(ghcnd_xml_station_path) + + # Build download URL. + url = "http://www.ncdc.noaa.gov/cdo-services/services/datasets/GHCND/stations/GHCND:" + station_id + ".xml?token=" + token + url_file = urllib.urlopen(url) + station_xml_file = "" + while (True): + line = url_file.readline() + if not line: + break + station_xml_file += line + + if station_xml_file.find("<cdoError>") != -1: + if self.debug_output: + print "Error in station download" + return 0 + + # Save XML string to disk. + save_file_name = ghcnd_xml_station_path + station_id + ".xml" + save_file_name = self.save_file(save_file_name, station_xml_file) + + if save_file_name is not "": + if self.debug_output: + print "Wrote file: " + save_file_name + return 2 + else: + return 0 + + # Sensor data + def process_one_month_sensor_set(self, records, page): + """Generates records for a station using the web service xml layout.""" + found_data = False + year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR)) + month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH)) + + station_id = self.get_dly_field(records[0], DLY_FIELD_ID) + + # Information for each daily file. + count = 0 + daily_xml_file = "" + + for day in range(1, 32): + try: + # TODO find out what is a valid python date range? 1889? + # Attempt to see if this is valid date. + report_date = date(year, month, day) + + for record in records: + record_xml_snip = self.default_xml_day_reading(record, report_date.day) + if record_xml_snip is not "": + daily_xml_file += self.default_xml_start_tag("data") + daily_xml_file += self.default_xml_field_date(report_date) + daily_xml_file += record_xml_snip + daily_xml_file += self.default_xml_end_tag("data") + found_data = True + count += 1 + + except ValueError: + pass + + daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end() + daily_xml_file = daily_xml_file.replace("\n", ""); + daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), ""); + + if not found_data: + return 0 + + # Make sure the station folder is available. + ghcnd_xml_station_path = self.get_base_folder(station_id) + "/" + station_id + "/" + str(report_date.year) + "/" + if not os.path.isdir(ghcnd_xml_station_path): + os.makedirs(ghcnd_xml_station_path) + + # Save XML string to disk. + save_file_name = ghcnd_xml_station_path + build_sensor_save_filename(station_id, report_date, page) + save_file_name = self.save_file(save_file_name, daily_xml_file) + + if save_file_name is not "": + if self.debug_output: + print "Wrote file: " + save_file_name + return 1 + else: + return 0 + +def build_base_save_folder(save_path, station_id, data_type="sensors"): + # Default + station_prefix = station_id[:3] + return save_path + data_type + "/" + station_prefix + "/" + +def build_sensor_save_filename(station_id, report_date, page): + # Default + return station_id + "_" + str(report_date.year).zfill(4) + str(report_date.month).zfill(2) + "_" + str(page) + ".xml" + http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py new file mode 100644 index 0000000..c8b0fa5 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import glob +import os.path +import linecache +import distutils.core + +from weather_convert_to_xml import * +from collections import OrderedDict + +# Weather data files created to manage the conversion process. +# Allows partition and picking up where you left off. +class WeatherDataFiles: + + LARGE_FILE_ROOT_TAG = "root" + + INDEX_DATA_FILE_NAME = 0 + INDEX_DATA_SENSORS_STATUS = 1 + INDEX_DATA_STATION_STATUS = 2 + INDEX_DATA_FILE_COUNT = 3 + INDEX_DATA_FOLDER_DATA = 4 + + DATA_FILE_START_INDEX = 0 + DATA_FILE_EXTENSION = ".dly" + DATA_FILE_MISSING = "missing" + DATA_FILE_INITIAL = "initialized" + DATA_FILE_DOWNLOADED = "downloaded" + DATA_FILE_GENERATED = "generated" + SEPERATOR = "," + + type = "sensor" + data_reset = False + + def __init__(self, base_path, progress_file_name="/tmp/_weather_data.csv"): + self.base_path = base_path + + self.progress_file_name = progress_file_name + + self.current = self.DATA_FILE_START_INDEX + self.progress_data = [] + + def get_file_list_iterator(self): + """Return the list of files one at a time.""" + return glob.iglob(self.base_path + "/*" + self.DATA_FILE_EXTENSION) + + # Save Functions + def build_progress_file(self, options, convert): + if not os.path.isfile(self.progress_file_name) or 'reset' in options: + # Build a new file. + file = open(self.progress_file_name, 'w') + contents = self.get_default_progress_file_csv() + file.write(contents) + file.close() + elif 'append' in options or 'recalculate' in options: + self.open_progress_data() + row_count = len(self.progress_data) + for row in range(0, row_count): + row_contents = self.progress_data[row].rsplit(self.SEPERATOR) + file_name = row_contents[self.INDEX_DATA_FILE_NAME] + if self.get_file_row(file_name) < 0 and 'append' in options: + self.progress_data.append(self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)) + elif 'recalculate' in options: + # The folder is hard coded + station_id = os.path.basename(file_name).split('.')[0] + folder_name = convert.get_base_folder(station_id) + if os.path.exists(folder_name): + row_contents = self.progress_data[row].rsplit(self.SEPERATOR) + sensor_status = row_contents[self.INDEX_DATA_SENSORS_STATUS] + station_status = row_contents[self.INDEX_DATA_STATION_STATUS] + file_count = self.get_file_count(folder_name) + data_size = self.get_folder_size(folder_name) + self.progress_data[row] = self.get_progress_csv_row(file_name, sensor_status, station_status, file_count, data_size) + else: + self.progress_data[row] = self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL) + # Save file + self.close_progress_data(True) + self.reset() + + def copy_to_n_partitions(self, save_path, partitions, base_paths, reset): + """Once the initial data has been generated, the data can be copied into a set number of partitions. """ + if (len(base_paths) == 0): + return + + # Initialize the partition paths. + partition_paths = get_disk_partition_paths(0, partitions, base_paths) + for path in partition_paths: + # Make sure the xml folder is available. + prepare_path(path, reset) + + import fnmatch + import os + + # copy stations and sensors into each partition + current_sensor_partition = 0 + current_station_partition = 0 + self.open_progress_data() + row_count = len(self.progress_data) + for row in range(0, row_count): + row_contents = self.progress_data[row].rsplit(self.SEPERATOR) + file_name = row_contents[self.INDEX_DATA_FILE_NAME] + station_id = os.path.basename(file_name).split('.')[0] + + # Copy sensor files + type = "sensors" + file_path = build_base_save_folder(save_path, station_id, type) + station_id + for root, dirnames, filenames in os.walk(file_path): + for filename in fnmatch.filter(filenames, '*.xml'): + xml_path = os.path.join(root, filename) + new_file_base = build_base_save_folder(partition_paths[current_sensor_partition], station_id, type) + station_id + if not os.path.isdir(new_file_base): + os.makedirs(new_file_base) + shutil.copyfile(xml_path, new_file_base + "/" + filename) + current_sensor_partition += 1 + if current_sensor_partition >= len(partition_paths): + current_sensor_partition = 0 + + # Copy station files + type = "stations" + file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml" + new_file_base = build_base_save_folder(partition_paths[current_station_partition], station_id, type) + new_file_path = new_file_base + station_id + ".xml" + if os.path.isfile(file_path): + if not os.path.isdir(new_file_base): + os.makedirs(new_file_base) + shutil.copyfile(file_path, new_file_path) + current_station_partition += 1 + if current_station_partition >= len(partition_paths): + current_station_partition = 0 + + def build_to_n_partition_files(self, save_path, partitions, base_paths, reset): + """Once the initial data has been generated, the data can be divided into partitions + and stored in single files. + """ + if (len(base_paths) == 0): + return + + XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>" + + partition_paths = get_disk_partition_paths(0, partitions, base_paths) + + import fnmatch + import os + + for path in partition_paths: + prepare_path(path, reset) + + # Initialize the partition paths. + types = ["sensors", "stations"] + for type in types: + partition_files = [] + for path in partition_paths: + # Make sure the xml folder is available. + prepare_path(path + type + "/", False) + partition_files.append(open(path + type + "/partition.xml", 'w')) + partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n") + + # copy into each partition + current_partition = 0 + self.open_progress_data() + row_count = len(self.progress_data) + for row in range(0, row_count): + row_contents = self.progress_data[row].rsplit(self.SEPERATOR) + file_name = row_contents[self.INDEX_DATA_FILE_NAME] + station_id = os.path.basename(file_name).split('.')[0] + + # Copy files + if type == "sensors": + file_path = build_base_save_folder(save_path, station_id, type) + station_id + for root, dirnames, filenames in os.walk(file_path): + for filename in fnmatch.filter(filenames, '*.xml'): + xml_path = os.path.join(root, filename) + xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n" + partition_files[current_partition].write(xml_data) + current_partition += 1 + if current_partition >= len(partition_files): + current_partition = 0 + elif type == "stations": + file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml" + xml_path = os.path.join(root, file_path) + xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n" + partition_files[current_partition].write(xml_data) + current_partition += 1 + if current_partition >= len(partition_paths): + current_partition = 0 + + for row in range(0, len(partition_paths)): + partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n") + partition_files[row].close() + + def get_file_row(self, file_name): + for i in range(0, len(self.progress_data)): + if self.progress_data[i].startswith(file_name): + return i + return -1 + + def get_default_progress_file_csv(self): + contents = "" + for path in self.get_file_list_iterator(): + file_name = os.path.basename(path) + contents += self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL) + return contents + + def print_progress_file_stats(self, convert): + sensor_count_missing = 0 + sensor_count = 0 + file_count = 0 + data_size = 0 + + sensor_count_actual = 0 + file_count_actual = 0 + data_size_actual = 0 + + station_count_missing = 0 + station_count_generated = 0 + station_count_downloaded = 0 + + self.open_progress_data() + row_count = len(self.progress_data) + for row in range(0, row_count): + row_contents = self.progress_data[row].rsplit(self.SEPERATOR) + if int(row_contents[self.INDEX_DATA_FILE_COUNT]) != -1 and int(row_contents[self.INDEX_DATA_FOLDER_DATA]) != -1: + sensor_count += 1 + file_count += int(row_contents[self.INDEX_DATA_FILE_COUNT]) + data_size += int(row_contents[self.INDEX_DATA_FOLDER_DATA]) + else: + sensor_count_missing += 1 + + if row_contents[self.INDEX_DATA_STATION_STATUS] == "generated": + station_count_generated += 1 + if row_contents[self.INDEX_DATA_STATION_STATUS] == "downloaded": + station_count_downloaded += 1 + else: + station_count_missing += 1 + + file_name = row_contents[self.INDEX_DATA_FILE_NAME] + station_id = os.path.basename(file_name).split('.')[0] + folder_name = convert.get_base_folder(station_id) + if os.path.exists(folder_name): + sensor_count_actual += 1 + file_count_actual += self.get_file_count(folder_name) + data_size_actual += self.get_folder_size(folder_name) + + + print "Progress File:\t" + self.progress_file_name + "\n" + + print "CSV DETAILS OF PROCESSED SENSORS" + print "Number of stations:\t" + "{:,}".format(sensor_count) + print "Number of files:\t" + "{:,}".format(file_count) + print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n" + + print "CSV DETAILS OF unPROCESSED SENSORS" + print "Number of stations:\t" + "{:,}".format(sensor_count_missing) + "\n" + + print "CSV DETAILS OF PROCESSED STATIONS" + print "Generated:\t\t" + "{:,}".format(station_count_generated) + print "Downloaded:\t\t" + "{:,}".format(station_count_downloaded) + print "Missing:\t\t" + "{:,}".format(station_count_missing) + "\n" + + print "FOLDER DETAILS" + print "Number of stations:\t" + "{:,}".format(sensor_count_actual) + print "Number of files:\t" + "{:,}".format(file_count_actual) + print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n" + + + def get_progress_csv_row(self, file_name, sensors_status, station_status, file_count=-1, data_size=-1): + return file_name + self.SEPERATOR + sensors_status + self.SEPERATOR + station_status + self.SEPERATOR + str(file_count) + self.SEPERATOR + str(data_size) + "\n" + + def update_file_sensor_status(self, file_name, sensors_status, file_count=-1, data_size=-1): + for row in range(0, len(self.progress_data)): + if self.progress_data[row].startswith(file_name): + station_status = self.progress_data[row].rsplit(self.SEPERATOR)[self.INDEX_DATA_STATION_STATUS] + self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size) + break + + # Save the file + self.close_progress_data(True) + + def update_file_station_status(self, file_name, station_status): + for row in range(0, len(self.progress_data)): + if self.progress_data[row].startswith(file_name): + row_contents = self.progress_data[row].rsplit(self.SEPERATOR) + sensors_status = row_contents[self.INDEX_DATA_SENSORS_STATUS] + file_count = int(row_contents[self.INDEX_DATA_FILE_COUNT]) + data_size = int(row_contents[self.INDEX_DATA_FOLDER_DATA]) + self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size) + break + + # Save the file + self.close_progress_data(True) + + def get_file_count(self, folder_name): + count = 0 + for dirpath, dirnames, filenames in os.walk(folder_name): + for f in filenames: + count += 1 + return count + + def get_folder_size(self, folder_name): + total_size = 0 + for dirpath, dirnames, filenames in os.walk(folder_name): + for f in filenames: + fp = os.path.join(dirpath, f) + total_size += os.path.getsize(fp) + return total_size + + def get_station_status(self, return_value): + if return_value == 2: + return self.DATA_FILE_DOWNLOADED + elif return_value == 1: + return self.DATA_FILE_GENERATED + return self.DATA_FILE_MISSING + + + def open_progress_data(self): + with open(self.progress_file_name, 'r') as file: + self.progress_data = file.readlines() + + def close_progress_data(self, force=False): + if len(self.progress_data) > 0 or force: + with open(self.progress_file_name, 'w') as file: + file.writelines(self.progress_data) + + + def reset(self): + self.close_progress_data() + + self.current = self.DATA_FILE_START_INDEX + self.open_progress_data() + + def set_type(self, type): + self.type = type + + def set_data_reset(self, data_reset): + self.data_reset = data_reset + + + # Iterator Functions + def __iter__(self): + return self + + def next(self): + columns = [] + while True: + # find a row that has not been created. + if self.current >= len(self.progress_data): + raise StopIteration + row = self.progress_data[self.current] + self.current += 1 + columns = row.rsplit(self.SEPERATOR) + if self.type == "sensor" and (columns[self.INDEX_DATA_SENSORS_STATUS].strip() != self.DATA_FILE_GENERATED or self.data_reset): + break + elif self.type == "station" and (columns[self.INDEX_DATA_STATION_STATUS].strip() != self.DATA_FILE_DOWNLOADED or self.data_reset): + break + return columns[self.INDEX_DATA_FILE_NAME] + + +# Index values of each field details. +PARTITION_INDEX_NODE = 0 +PARTITION_INDEX_DISK = 1 +PARTITION_INDEX_VIRTUAL = 2 +PARTITION_INDEX = 3 +PARTITION_INDEX_PATH = 4 +PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path") + +def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"): + partition_paths = [] + for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key): + partition_paths.append(scheme[PARTITION_INDEX_PATH]) + return partition_paths + +def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"): + partition_scheme = [] + for i in range(0, virtual_disk_partitions): + for j in range(0, len(base_paths)): + new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/" + partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path)) + return partition_scheme + +def get_partition_folder(disks, partitions, index): + return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index) + +def prepare_path(path, reset): + """Ensures the directory is available. If reset, then its a brand new directory.""" + if os.path.isdir(path) and reset: + shutil.rmtree(path) + + if not os.path.isdir(path): + os.makedirs(path) + +def file_get_contents(filename): + with open(filename) as f: + return f.read() http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py new file mode 100644 index 0000000..fb59b50 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import os.path +import shutil +import tarfile +import urllib +import zipfile + +# Custom modules. +from weather_config_ghcnd import * +from weather_config_mshr import * + +class WeatherDownloadFiles: + + def __init__(self, save_path): + self.save_path = save_path + + if not os.path.isdir(save_path): + os.makedirs(save_path) + + + def download_ghcnd_files(self, reset=False): + """Download the complete list.""" + for file_name in FILE_NAMES: + url = BASE_DOWNLOAD_URL + file_name + self.download_file(url, reset) + + def download_mshr_files(self, reset=False): + for url in MSHR_URLS: + self.download_file(url, reset) + + def download_file(self, url, reset=False): + """Download the file, unless it exists.""" + file_name = self.save_path + "/" + url.split('/')[-1] + + if not os.path.isfile(file_name) or reset: + print "Downloading: " + url + urllib.urlretrieve(url, file_name, report_download_status) + print + + def unzip_ghcnd_package(self, package, reset=False): + """Unzip the package file, unless it exists.""" + file_name = self.save_path + "/" + package + ".tar.gz" + unzipped_path = self.save_path + "/" + package + + if os.path.isdir(unzipped_path) and reset: + shutil.rmtree(unzipped_path) + + if not os.path.isdir(unzipped_path): + print "Unzipping: " + file_name + tar_file = tarfile.open(file_name, 'r:gz') + tar_file.extractall(unzipped_path) + + def unzip_mshr_files(self, reset=False): + """Unzip the package file, unless it exists.""" + for url in MSHR_URLS: + if url.endswith('.zip'): + file_name = self.save_path + "/" + url.split('/')[-1] + print "Unzipping: " + file_name + with zipfile.ZipFile(file_name, 'r') as myzip: + myzip.extractall(self.save_path) + +def report_download_status(count, block, size): + """Report download status.""" + line_size = 50 + erase = "\b" * line_size + sys.stdout.write(erase) + report = get_report_line((float(count) * block / size), line_size) + sys.stdout.write(report) + +def get_report_line(percentage, line_size): + """Creates a string to be used in reporting the percentage done.""" + report = "" + for i in range(0, line_size): + if (float(i) / line_size < percentage): + report += "=" + else: + report += "-" + return report + +def download_file_save_as(url, new_file_name, reset=False): + """Download the file, unless it exists.""" + if not os.path.isfile(new_file_name) or reset: + print "Downloading: " + url + urllib.urlretrieve(url, new_file_name, report_download_status) + print + http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/README.md ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/README.md b/vxquery-benchmark/src/main/resources/util/README.md new file mode 100644 index 0000000..8e2a204 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/README.md @@ -0,0 +1,28 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +Utilities for Benchmark Operations +===================== + +# Introduction + +Helpful scripts or configuration document to work with the benchmarks. + +## Saxon Collection + +To test the data with other XQuery processors, the saxon script helps with +creating a collection.xml file. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py new file mode 100644 index 0000000..02f39ee --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import getopt, glob, os, sys + +def main(argv): + xml_folder = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "f:h", ["folder="]) + except getopt.GetoptError: + print 'The file options for build_saxon_collection_xml.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python build_saxon_collection_xml.py -h' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to create collection XML file.' + sys.exit() + elif opt in ('-f', "--folder"): + # check if file exists. + if os.path.exists(arg): + xml_folder = arg + else: + print 'Error: Argument must be a folder name for --folder (-f).' + sys.exit() + + # Required fields to run the script. + if xml_folder == "" or not os.path.exists(xml_folder): + print 'Error: The folder path option must be supplied: --folder (-f).' + sys.exit() + + # find all XML files in folder + collection_xml = "<collection>" + for i in range(1, 5): + # Search the ith directory level. + search_pattern = xml_folder + ('/*' * i) + '.xml' + for file_path in glob.iglob(search_pattern): + collection_xml += '<doc href="' + str.replace(file_path, xml_folder, '') + '"/>' + collection_xml += "</collection>" + + # create collection XML + file = open('collection.xml', 'w') + file.write(collection_xml) + file.close() + +if __name__ == "__main__": + main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/diff_xml_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/diff_xml_files.py b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py new file mode 100644 index 0000000..8ad2e30 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import getopt, glob, os, sys + +def main(argv): + f1 = "" + f2 = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "h", ["f1=", "f2="]) + except getopt.GetoptError: + print 'The file options for build_saxon_collection_xml.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python build_saxon_collection_xml.py -h' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to create collection XML file.' + sys.exit() + elif opt in ('--f1'): + # check if file exists. + if os.path.exists(arg): + f1 = arg + else: + print 'Error: Argument must be a file name for --f1.' + sys.exit() + elif opt in ('--f2'): + # check if file exists. + if os.path.exists(arg): + f2 = arg + else: + print 'Error: Argument must be a file name for --f2.' + sys.exit() + + # Required fields to run the script. + if f1 == "" or not os.path.exists(f1): + print 'Error: The file path option must be supplied: --f1.' + sys.exit() + if f2 == "" or not os.path.exists(f2): + print 'Error: The file path option must be supplied: --f2.' + sys.exit() + + missing_in_f1 = [] + missing_in_f2 = [] + found_in_both = [] + + with open(f1) as f: + content_f1 = f.readlines() + set_f1 = set(content_f1) + + + with open(f2) as f: + content_f2 = f.readlines() + set_f2 = set(content_f2) + + missing_in_f1 = set_f2.difference(set_f1) + missing_in_f2 = set_f1.difference(set_f2) + found_in_both = set_f1.intersection(set_f2) + + print "" + print "Missing files in " + f1 + for f1_name in missing_in_f1: + print " + " + f1_name.strip() + + print "" + print "Missing files in " + f2 + for f2_name in missing_in_f2: + print " + " + f2_name.strip() + + offset = 40 + print "" + print "XML Summary" + print (" - Found in both:").ljust(offset) + str(len(found_in_both)) + print (" - " + f1 + " diff set vs list:").ljust(offset) + str(len(content_f1) - len(set_f1)) + print (" - " + f2 + " diff set vs list:").ljust(offset) + str(len(content_f2) - len(set_f2)) + print (" - " + f1 + " missing:").ljust(offset) + str(len(missing_in_f1)) + print (" - " + f2 + " missing:").ljust(offset) + str(len(missing_in_f2)) + + +if __name__ == "__main__": + main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py new file mode 100644 index 0000000..1cd7939 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import fnmatch +import getopt +import glob +import os +import sys +import csv + +SEARCH_STRING = 'Average execution time:' + +def find_files(directory, pattern): + for root, dirs, files in os.walk(directory): + for basename in files: + if fnmatch.fnmatch(basename, pattern): + yield (root, basename) + + +def main(argv): + ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv + ''' + log_folder = "" + save_file = "" + data_type = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="]) + except getopt.GetoptError: + print 'The file options for list_xml_files.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python list_xml_files.py -h' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to build XML file list.' + print ' -s The save file.' + sys.exit() + elif opt in ('-f', "--folder"): + # check if file exists. + if os.path.exists(arg): + log_folder = arg + else: + print 'Error: Argument must be a folder name for --folder (-f).' + sys.exit() + elif opt in ('-s', "--save_file"): + save_file = arg + elif opt in ('-t', "--data_type"): + data_type = arg + + # Required fields to run the script. + if log_folder == "" or not os.path.exists(log_folder): + print 'Error: The folder path option must be supplied: --folder (-f).' + sys.exit() + if save_file == "": + print 'Error: The folder path option must be supplied: --save_file (-s).' + sys.exit() + + list_xml_csv = '' + with open(save_file, 'w') as outfile: + csvfile = csv.writer(outfile) + for path, filename in find_files(log_folder, '*.log'): + # Only write out a specific type of data xml documents found in a specific path. + with open(path + "/" + filename) as infile: + folders = path.replace(log_folder, "") + for line in infile: + # Skip the root tags. + if line.startswith(SEARCH_STRING): + time_split = line.split(" ") + name_split = filename.split(".") + folder_split = folders.split("/") + + # Build data row + row = folder_split + row.append(name_split[0]) + row.append(time_split[3]) + row.append(name_split[2]) + csvfile.writerow(row) + + +if __name__ == "__main__": + main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/list_xml_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/list_xml_files.py b/vxquery-benchmark/src/main/resources/util/list_xml_files.py new file mode 100644 index 0000000..750a95e --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/list_xml_files.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import fnmatch +import getopt +import glob +import os +import sys + +def find_files(directory, pattern): + for root, dirs, files in os.walk(directory): + for basename in files: + if fnmatch.fnmatch(basename, pattern): + yield (root, basename) + + +def main(argv): + ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv + ''' + xml_folder = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "f:h", ["folder="]) + except getopt.GetoptError: + print 'The file options for list_xml_files.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python list_xml_files.py -h' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to build XML file list.' + sys.exit() + elif opt in ('-f', "--folder"): + # check if file exists. + if os.path.exists(arg): + xml_folder = arg + else: + print 'Error: Argument must be a folder name for --folder (-f).' + sys.exit() + + # Required fields to run the script. + if xml_folder == "" or not os.path.exists(xml_folder): + print 'Error: The folder path option must be supplied: --folder (-f).' + sys.exit() + + list_xml_csv = '' + for path, filename in find_files(xml_folder, '*.xml'): + list_xml_csv += filename + "\n" + #list_xml_csv += filename + "," + path + "\n" + + # create collection XML + file = open('list_xml.csv', 'w') + file.write(list_xml_csv) + file.close() + +if __name__ == "__main__": + main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py new file mode 100644 index 0000000..2df026b --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import fnmatch +import getopt +import glob +import os +import sys + +XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n" +XML_SUFFIX = '</root>' + "\n" + +def find_files(directory, pattern): + for root, dirs, files in os.walk(directory, followlinks=True): + for basename in files: + if fnmatch.fnmatch(basename, pattern): + yield (root, basename) + + +def main(argv): + ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv + ''' + xml_folder = "" + save_file = "" + data_type = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="]) + except getopt.GetoptError: + print 'The file options for list_xml_files.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to build XML file list.' + print ' -s The save file.' + sys.exit() + elif opt in ('-f', "--folder"): + # check if file exists. + if os.path.exists(arg): + xml_folder = arg + else: + print 'Error: Argument must be a folder name for --folder (-f).' + sys.exit() + elif opt in ('-s', "--save_file"): + save_file = arg + elif opt in ('-t', "--data_type"): + data_type = arg + + # Required fields to run the script. + if xml_folder == "" or not os.path.exists(xml_folder): + print 'Error: The folder path option must be supplied: --folder (-f).' + sys.exit() + if save_file == "": + print 'Error: The folder path option must be supplied: --save_file (-s).' + sys.exit() + + list_xml_csv = '' + with open(save_file, 'w') as outfile: + outfile.write(XML_PREFIX) + for path, filename in find_files(xml_folder, '*.xml'): + # Only write out a specific type of data xml documents found in a specific path. + if data_type in path: + with open(path + "/" + filename) as infile: + for line in infile: + # Skip the root tags. + if line != XML_PREFIX and line != XML_SUFFIX: + outfile.write(line) + outfile.write(XML_SUFFIX) + +if __name__ == "__main__": + main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq new file mode 100644 index 0000000..d0621eb --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq @@ -0,0 +1,27 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Function List :) +(: VXQuery function list in csv with arguments and return types :) +let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml" +let $r := + for $f in fn:doc($list)/functions/function + let $pl := + for $p in $f/param + return $p/@type + return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',') +return fn:string-join($r , '|') \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq new file mode 100644 index 0000000..f485807 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq @@ -0,0 +1,27 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Function List :) +(: VXQuery function list in csv with arguments and return types :) +let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml" +let $r := + for $f in fn:doc($list)/operators/operator + let $pl := + for $p in $f/param + return $p/@type + return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',') +return fn:string-join($r , '|') \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java ---------------------------------------------------------------------- diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java index 8451bd5..c0ca612 100644 --- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java +++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java @@ -375,6 +375,7 @@ public class VXQuery { ccConfig.clientNetPort = 39000; ccConfig.clusterNetIpAddress = "127.0.0.1"; ccConfig.clusterNetPort = 39001; + ccConfig.httpPort = 39002; ccConfig.profileDumpPeriod = 10000; cc = new ClusterControllerService(ccConfig); cc.start(); http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/pom.xml ---------------------------------------------------------------------- diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml index ef8f348..6c99712 100644 --- a/vxquery-server/pom.xml +++ b/vxquery-server/pom.xml @@ -47,6 +47,10 @@ <configuration> <programs> <program> + <mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass> + <name>vxqueryshutdown</name> + </program> + <program> <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass> <name>vxquerycc</name> </program> http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_actions.py ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/cluster_actions.py b/vxquery-server/src/main/resources/scripts/cluster_actions.py index a7cda17..deeee33 100644 --- a/vxquery-server/src/main/resources/scripts/cluster_actions.py +++ b/vxquery-server/src/main/resources/scripts/cluster_actions.py @@ -62,6 +62,10 @@ class ClusterActions: time.sleep(5) self.start_all_ncs() + def stop_cluster(self): + machine = self.ci.get_master_node_machine() + self.stop_cc_and_all_ncs(machine) + def stop(self): self.stop_all_ncs() time.sleep(2) @@ -109,16 +113,22 @@ class ClusterActions: def start_cc(self, machine): print "Start Cluster Controller." - print " " + machine.get_id() + " " + machine.get_ip() + ":" + machine.get_port() - command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_ip() + " \"" + machine.get_port() + "\" \"" + machine.get_java_opts() + "\"" + print " " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port() + command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\"" self.run_remote_command(machine.get_username(), machine.get_id(), command) def start_nc(self, machine, cc): print "Start Node Controller." print " " + machine.get_id() + " " + machine.get_ip() - command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_ip() + " \"" + cc.get_port() + "\" \"" + machine.get_java_opts() + "\"" + command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_client_ip() + " \"" + cc.get_client_port() + "\" \"" + machine.get_java_opts() + "\"" self.run_remote_command(machine.get_username(), machine.get_id(), command) + def stop_cc_and_all_ncs(self, machine): + print "Stop Cluster and Node Controllers." + print " " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port() + command = "./vxquery-server/target/appassembler/bin/stopcluster.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\"" + self.run_remote_command(machine.get_username(), machine.get_id(), command) + def stop_cc(self, machine): print "Stop Cluster Controller." print " " + machine.get_id() + " " + machine.get_ip() http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_cli.py ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py index 089ad08..bd5efa6 100644 --- a/vxquery-server/src/main/resources/scripts/cluster_cli.py +++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py @@ -39,10 +39,10 @@ def main(argv): sys.exit() elif opt in ('-a', "--action"): # check if file exists. - if arg in ('deploy', 'start', 'stop'): + if arg in ('deploy', 'start', 'stop', 'kill'): action = arg else: - print 'Error: Argument must be a string ("deploy", "start", or "stop") for --action (-a).' + print 'Error: Argument must be a string ("deploy", "start", "stop", or "kill") for --action (-a).' sys.exit() elif opt in ('-c', "--cluster"): # check if file exists. @@ -72,6 +72,8 @@ def main(argv): if action == 'start': cluster.start() elif action == 'stop': + cluster.stop_cluster() + elif action == 'kill': cluster.stop() elif action == 'deploy': if deploy_path != "": http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_information.py ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/cluster_information.py b/vxquery-server/src/main/resources/scripts/cluster_information.py index 677204b..94b231d 100644 --- a/vxquery-server/src/main/resources/scripts/cluster_information.py +++ b/vxquery-server/src/main/resources/scripts/cluster_information.py @@ -32,12 +32,13 @@ class ClusterInformation: master_node = self.config.getElementsByTagName("master_node")[0] id = NodeXmlReader.get_cluster_id(master_node) ip = NodeXmlReader.get_cluster_ip(master_node) - port = NodeXmlReader.get_cluster_port(master_node) + client_ip = NodeXmlReader.get_client_ip(master_node) + client_port = NodeXmlReader.get_client_port(master_node) java_opts = NodeXmlReader.get_java_opts(master_node) if java_opts is "": java_opts = self.get_java_opts() username = self.get_username() - return Machine(id, ip, username, port, java_opts) + return Machine(id, ip, username, client_ip, client_port, java_opts) def get_node_machine_list(self): nodes = [] @@ -48,7 +49,7 @@ class ClusterInformation: java_opts = NodeXmlReader.get_java_opts(node) if java_opts is "": java_opts = self.get_java_opts() - nodes.append(Machine(id, ip, username, "", java_opts)) + nodes.append(Machine(id, ip, username, "", "", java_opts)) return nodes class NodeXmlReader(object): @@ -64,8 +65,12 @@ class NodeXmlReader(object): return get_tag_text(node, "cluster_ip") @staticmethod - def get_cluster_port(node): - return get_tag_text(node, "cluster_port") + def get_client_ip(node): + return get_tag_text(node, "client_ip") + + @staticmethod + def get_client_port(node): + return get_tag_text(node, "client_port") @staticmethod def get_java_opts(node): @@ -90,11 +95,12 @@ class Machine: log_path = "" port = "" - def __init__(self, id, ip, username, port="", java_opts=""): + def __init__(self, id, ip, username, client_ip="", client_port="", java_opts=""): self.id = id self.ip = ip self.username = username - self.port = port + self.client_ip = client_ip + self.client_port = client_port self.java_opts = java_opts def get_id(self): @@ -106,8 +112,11 @@ class Machine: def get_java_opts(self): return self.java_opts - def get_port(self): - return self.port + def get_client_ip(self): + return self.client_ip + + def get_client_port(self): + return self.client_port def get_username(self): return self.username http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startcc.sh ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/startcc.sh b/vxquery-server/src/main/resources/scripts/startcc.sh index 002055c..dd1e5be 100755 --- a/vxquery-server/src/main/resources/scripts/startcc.sh +++ b/vxquery-server/src/main/resources/scripts/startcc.sh @@ -23,7 +23,7 @@ CCHOST=$1 CCPORT=$2 J_OPTS=$3 -#Export JAVA_HOME +# Export JAVA_HOME export JAVA_HOME=${JAVA_HOME} # java opts added parameters @@ -43,8 +43,8 @@ mkdir -p ${CCLOGS_DIR} CC_OPTIONS=" -client-net-ip-address ${CCHOST} -cluster-net-ip-address ${CCHOST} " if [ ! -z "${CCPORT}" ] then - CC_OPTIONS=" ${CC_OPTIONS} -cluster-net-port ${CCPORT} " + CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} " fi -#Launch hyracks cc script without toplogy +# Launch hyracks cc script without toplogy ${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerycc ${CC_OPTIONS} &> ${CCLOGS_DIR}/cc_$(date +%Y%m%d%H%M).log & http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startnc.sh ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/startnc.sh b/vxquery-server/src/main/resources/scripts/startnc.sh index c2bda3c..260512e 100755 --- a/vxquery-server/src/main/resources/scripts/startnc.sh +++ b/vxquery-server/src/main/resources/scripts/startnc.sh @@ -25,7 +25,7 @@ CCHOST=$3 CCPORT=$4 J_OPTS=$5 -#Set JAVA_HOME +# Set JAVA_HOME export JAVA_HOME=$JAVA_HOME # java opts added parameters @@ -49,5 +49,5 @@ then fi -#Launch hyracks nc +# Launch hyracks nc ${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerync ${NC_OPTIONS} &> ${NCLOGS_DIR}/nc_$(date +%Y%m%d%H%M).log & http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcc.sh ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/stopcc.sh b/vxquery-server/src/main/resources/scripts/stopcc.sh index 3290ec6..f2b6883 100755 --- a/vxquery-server/src/main/resources/scripts/stopcc.sh +++ b/vxquery-server/src/main/resources/scripts/stopcc.sh @@ -21,8 +21,7 @@ hostname USER=$1 -#Kill process -#Kill process +# Kill process PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerycc'|awk '{print $2}'` if [ "$PID" == "" ]; then http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcluster.sh ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/stopcluster.sh b/vxquery-server/src/main/resources/scripts/stopcluster.sh new file mode 100755 index 0000000..238da7f --- /dev/null +++ b/vxquery-server/src/main/resources/scripts/stopcluster.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +CCHOST=$1 +CCPORT=$2 +J_OPTS=$3 + +# Export JAVA_HOME +export JAVA_HOME=${JAVA_HOME} + +# java opts added parameters +if [ ! -z "${J_OPTS}" ] +then + JAVA_OPTS="${JAVA_OPTS} ${J_OPTS}" + export JAVA_OPTS +fi + +VXQUERY_HOME=`pwd` +CCLOGS_DIR=${VXQUERY_HOME}/logs + +# logs dir +mkdir -p ${CCLOGS_DIR} + +# Set up the options for the cc. +CC_OPTIONS=" -client-net-ip-address ${CCHOST} " +if [ ! -z "${CCPORT}" ] +then + CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} " +fi + +# Launch hyracks cc script without toplogy +echo "${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &" +${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log & http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopnc.sh ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/stopnc.sh b/vxquery-server/src/main/resources/scripts/stopnc.sh index 56ffc66..8f29de5 100755 --- a/vxquery-server/src/main/resources/scripts/stopnc.sh +++ b/vxquery-server/src/main/resources/scripts/stopnc.sh @@ -21,7 +21,7 @@ hostname USER=$1 -#Kill process +# Kill process PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerync'|awk '{print $2}'` if [ "$PID" == "" ]; then
