Control: tags -1 patchThis should fix this (and as the filename suggests, also re-adds spme older fixes that seem to have been dropped by mistake between -1.1 and -2), but has not been tested: the relevant tests aren't run by default and there vaguely might be legal issues around ulmo's tests.
Description: Don't fail on malformed or changed test data
CDEC has malformed lines that pandas 1.4+ errors out on (I'm not sure why earlier pandas didn't do the same); GHCN has simply changed at the source. Author: Rebecca N. Palmer <rebecca_pal...@zoho.com> (but upstream independently came up with the on_bad_lines part) Bug-Debian: https://bugs.debian.org/1017573 https://bugs.debian.org/1044057 Forwarded: partly no, partly not-needed, partly https://github.com/ulmo-dev/ulmo/pull/214 --- a/test/cdec_historical_test.py +++ b/test/cdec_historical_test.py @@ -9,7 +9,7 @@ def test_get_stations(): stations_file = 'cdec/historical/all_stations.csv' with test_util.mocked_urls(stations_file): stations = ulmo.cdec.historical.get_stations() - assert 2000 < len(stations) + assert 1900 < len(stations) assert u'PRA' in stations.index --- a/test/ghcn_daily_test.py +++ b/test/ghcn_daily_test.py @@ -10,12 +10,12 @@ import test_util test_stations = [ { 'country': 'US', - 'elevation': 286.5, + 'elevation': 325.8, 'gsn_flag': 'GSN', 'hcn_flag': 'HCN', 'id': 'USW00003870', - 'latitude': 34.8831, - 'longitude': -82.2203, + 'latitude': 34.8833, + 'longitude': -82.2197, 'name': 'GREER', 'network': 'W', 'network_id': '00003870', --- a/ulmo/cdec/historical/core.py +++ b/ulmo/cdec/historical/core.py @@ -74,9 +74,9 @@ def get_stations(): # I haven't found a better list of stations, seems pretty janky # to just have them in a file, and not sure if/when it is updated. url = 'http://cdec.water.ca.gov/misc/all_stations.csv' - # the csv is malformed, so some rows think there are 7 fields - col_names = ['id','meta_url','name','num','lat','lon','junk'] - df = pd.read_csv(url, names=col_names, header=None, quotechar="'",index_col=0) + # the csv is malformed, so some rows think there are 7-8 fields + col_names = ['id','meta_url','name','num','lat','lon'] + df = pd.read_csv(url, names=col_names, header=None, quotechar="'",index_col=0,on_bad_lines='skip') return df @@ -170,7 +170,7 @@ def get_station_sensors(station_ids=None, sensor_ids=None, resolutions=None): sensor_list.columns = ['sensor_id', 'variable', 'resolution','timerange'] except: sensor_list.columns = ['variable', 'sensor_id', 'resolution', 'varcode', 'method', 'timerange'] - sensor_list[['variable', 'units']] = sensor_list.variable.str.split(',', 1, expand=True) + sensor_list[['variable', 'units']] = sensor_list.variable.str.split(',', n=1, expand=True) sensor_list.resolution = sensor_list.resolution.str.strip('()') station_sensors[station_id] = _limit_sensor_list(sensor_list, sensor_ids, resolutions)