# HG changeset patch # User Natacha Da Riba # Date 1673027646 -3600 # Fri Jan 06 18:54:06 2023 +0100 # Node ID 8375e51a1e627c60d2621b8e497c45a552e1fdfe # Parent 2bebf06d135f5976e6f21731cb76ff77b2001c14 era5: add scraper diff --git a/meteo_scraper/cli.py b/meteo_scraper/cli.py --- a/meteo_scraper/cli.py +++ b/meteo_scraper/cli.py @@ -1,6 +1,7 @@ import click from inireader import reader +import pandas as pd from sqlalchemy import create_engine from tshistory.api import timeseries from rework import ( @@ -16,6 +17,13 @@ from meteo_scraper.gfs_scraper import ( get_last_run_data ) +from meteo_scraper.era5_scraper import ( + get_historical_data, + get_last_month_data +) +from meteo_scraper.utils import ( + RESTRICTED_WINDOWS, +) CONFIG = reader('meteo.cfg') @@ -58,6 +66,14 @@ rule='0 27 4,10,16,22 * * *' ) + api.prepare( + engine, + 'era5_last_data', + domain='meteo', + rule='0 05 22 * * *', + inputdata={'parameter': 'temperature'} + ) + @meteo.command(name='scrap-ecmwf-hres') @click.option( @@ -92,3 +108,39 @@ def scrap_ecmwf_ens_fcst(): tsa = timeseries(CONFIG['uri']['dburi']) get_last_run_data(tsa) + + +@meteo.command(name='save-era5-lastdata') +@click.argument('weather-parameter') +@click.option( + "--zone" +) +def era5_last_data(weather_parameter, zone=None): + tsa = timeseries(CONFIG['uri']['dburi']) + if zone is None: + zones = RESTRICTED_WINDOWS.keys() + else: + zones = [zone] + + for zone in zones: + get_last_month_data(tsa, zone, weather_parameter) + + +@meteo.command(name='save-era5-histdata') +@click.argument('weather-parameter') +@click.argument('from-date') +@click.argument('to-date') +@click.option( + "--zone" +) +def era5_historical_data(weather_parameter, from_date, to_date, zone=None): + tsa = timeseries(CONFIG['uri']['dburi']) + from_date = pd.to_datetime(from_date) + to_date = pd.to_datetime(to_date) + if zone is None: + zones = RESTRICTED_WINDOWS.keys() + else: + zones = [zone] + + for zone in zones: + get_historical_data(tsa, zone, weather_parameter, from_date, to_date) diff --git a/meteo_scraper/era5_scraper.py b/meteo_scraper/era5_scraper.py new file mode 100644 --- /dev/null +++ b/meteo_scraper/era5_scraper.py @@ -0,0 +1,139 @@ +import numpy as np +import pandas as pd + +import metview as mv +import cdsapi +from shapely.geometry import Polygon +from matplotlib.path import Path as mplt_Path + +from meteo_scraper.utils import ( + load_zones, + RESTRICTED_WINDOWS, +) + +c = cdsapi.Client() + +PARAMETERS = { + 'temperature': ('2t', 'K'), + '10m_u_wind': ('10u', 'm.s-1'), + '10m_v_wind': ('10v', 'm.s-1'), + 'total_precipiation': ('tp', 'kg.m-2'), + 'surface_solar_radiation_downwards': ('ssrd', 'J.m-2'), +} + + +def construct_zone_mask(zone, longitudes, latitudes): + gridpoints = np.vstack((longitudes, latitudes)).T + geo_df = load_zones([zone]) + + if isinstance(geo_df['geometry'][0], Polygon): + polygon = geo_df['geometry'][0] + coords = polygon.exterior.coords.xy + x = coords[0] + y = coords[1] + + xy = np.vstack((x, y)).T + + path = mplt_Path(xy) + + mask = path.contains_points(gridpoints) + return mask + + # many polygons + polygons = list(geo_df['geometry'][0].geoms) + mask = np.full(np.shape(latitudes), False) + for polygon in polygons: + coords = polygon.exterior.coords.xy + x = coords[0] + y = coords[1] + + xy = np.vstack((x, y)).T + + path = mplt_Path(xy) + + inside = path.contains_points(gridpoints) + mask = np.where(inside==True, inside, mask) + + return mask + + +def get_and_save_era5(tsa, zone, param, days, months, years): + c.retrieve( + 'reanalysis-era5-single-levels', + { + 'product_type': 'reanalysis', + 'variable': PARAMETERS[param][0], + 'year': years, + 'month': months, + 'day': days, + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'area': RESTRICTED_WINDOWS[zone], + 'format': 'grib', + }, + f'{zone}.grib') + + gribfile = mv.read(source=f'{zone}.grib') + gt = gribfile[PARAMETERS[param][0]] + values = mv.values(gt) + + latitudes = gribfile.latitudes()[0] + longitudes = gribfile.longitudes()[0] + index = np.unique(mv.valid_date(gt)) + + mask_zone = construct_zone_mask(zone, longitudes, latitudes) + + val_zone = values[:, mask_zone] + val_mean_zone = np.nanmean(val_zone, axis=1) # here we use nanmean because there is no data over oceans (nan) + ts = pd.Series(data=val_mean_zone, index = index) + ts = ts.tz_localize('utc') + + seriesname = f'meteo.area.{zone.lower()}.{PARAMETERS[param][0]}.{PARAMETERS[param][1].lower()}.era5.obs.h' + tsa.update(seriesname, ts, author='scraper-era5') + + +def get_last_month_data(tsa, zone, param): + last_day = pd.Timestamp.today() - pd.Timedelta(days=5) + years = [str(last_day.year)] + months = ["{:02d}".format(last_day.month)] + days = np.arange(1, last_day.day + 1) + days = ["%02d" % x for x in days] + + get_and_save_era5(tsa, zone, param, days, months, years) + + +def get_historical_data(tsa, zone, param, from_date, to_date): + last_day = pd.Timestamp.today() - pd.Timedelta(days=31) + to_date = min(to_date, last_day) #this will prevent missing data crash from era5 + + days = [ + '01', '02', '03', '04', '05', '06', + '07', '08', '09', '10', '11', '12', + '13', '14', '15', '16', '17', '18', + '19', '20', '21', '22', '23', '24', + '25', '26', '27', '28', '29', '30', '31' + ] + + if to_date.year == from_date.year: + years = [str(to_date.year)] + months = list(np.arange(from_date.month, to_date.month +1)) + months = ["%02d" % x for x in months] + get_and_save_era5(tsa, zone, param, days, months, years) + else : + months = [ + '01', '02', '03', '04', '05', '06', + '07', '08', '09', '10', '11', '12', + ] + for year in list(np.arange(from_date.year, to_date.year +1)): + if year == last_day.year: + months = list(np.arange(1, last_day.month +1)) + months = ["%02d" % x for x in months] + get_and_save_era5(tsa, zone, param, days, months, [str(year)]) diff --git a/meteo_scraper/task.py b/meteo_scraper/task.py --- a/meteo_scraper/task.py +++ b/meteo_scraper/task.py @@ -14,6 +14,13 @@ from meteo_scraper.gfs_scraper import ( get_last_run_data ) +from meteo_scraper.era5_scraper import ( + get_historical_data, + get_last_month_data +) +from meteo_scraper.utils import ( + RESTRICTED_WINDOWS, +) CONFIG = reader('meteo.cfg') @@ -126,3 +133,57 @@ ) with task.capturelogs(std=True): get_last_run_data(tsa) + + +@task( + domain='meteo', + inputs=( + rio.string('parameter', required=True), + rio.string('zone', required=False), + ), +) +def era5_last_data(task): + tsa = apimaker( + {'db': {'uri': str(task.engine.url)}, + 'sources': {} + } + ) + with task.capturelogs(std=True): + inputs = task.input + if 'zone' not in inputs: + zones = RESTRICTED_WINDOWS.keys() + else: + zones = [inputs['zone']] + for zone in zones: + get_last_month_data(tsa, zone, inputs['parameter']) + + +@task( + domain='meteo', + inputs=( + rio.string('parameter', required=True), + rio.string('fromdate', required=True), + rio.string('todate', required=True), + rio.string('zone', required=False) + ), +) +def era5_historical_data(task): + tsa = apimaker( + {'db': {'uri': str(task.engine.url)}, + 'sources': {} + } + ) + with task.capturelogs(std=True): + inputs = task.input + if 'zone' not in inputs: + zones = RESTRICTED_WINDOWS.keys() + else: + zones = [inputs['zone']] + for zone in zones: + get_historical_data( + tsa, + zone, + inputs['parameter'], + inputs['fromdate'], + inputs['todate'] + ) diff --git a/meteo_scraper/utils.py b/meteo_scraper/utils.py --- a/meteo_scraper/utils.py +++ b/meteo_scraper/utils.py @@ -17,6 +17,48 @@ 'PL', 'PT', 'RO', 'RS', 'SE_1', 'SE_2', 'SE_3', 'SE_4', 'SI', 'SK' ] +RESTRICTED_WINDOWS = { + 'AT': [49.12, 9.35, 46.28, 17.4], + 'BE': [51.59, 2.40, 49.46, 6.57], + 'BG': [44.39, 22.03, 41.15, 29.12], + 'CH': [47.70, 5.95, 45.80, 10.47], + 'CZ': [51.13, 11.92, 48.51, 19.16], + 'DE_LU': [54.85, 5.85, 47.26, 15.03], + 'DK_1': [58.10, 7.88, 54.46, 13.09], + 'DK_2': [58.10, 7.88, 54.46, 13.09], + 'EE': [59.73, 21.58, 57.42, 28.45], + 'ES': [43.77, -9.32, 35.99, 3.30], + 'FI': [70.32, 20.09, 59.83, 31.98], + 'FR': [51, -4.75, 42.36, 8.12], + 'GR': [41.93, 18.97, 34.68, 26.85], + 'HR': [46.67, 13.25, 42.26, 19.64], + 'HU': [46.69, 15.89, 45.61, 23.25], + 'IT_CNOR': [45.14, 8.88, 40.17, 15.44], + 'IT_CSUD': [45.14, 8.88, 40.17, 15.44], + 'IT_NORD': [47.27, 6.47, 41.72, 14.33], + 'IT_SARD': [41.32, 7.89, 38.80, 9.87], + 'IT_SICI': [38.46, 12.17, 36.54, 15.65], + 'IT_SUD': [42.60, 12.15, 37.71, 18.71], + 'LT': [56.59, 20.80, 53.85, 27.00], + 'LV': [58.22, 20.60, 55.59, 28.48], + 'NL': [53.44, 3.34, 50.72, 7.24], + 'NO_1': [65.23, 3.93, 57.80, 13.84], + 'NO_2': [65.23, 3.93, 57.80, 13.84], + 'NO_3': [65.23, 3.93, 57.80, 13.84], + 'NO_4': [71.56, 10.25, 63.23, 32.67], + 'NO_5': [65.23, 3.93, 57.80, 13.84], + 'PL': [55.07, 13.78, 48.85, 24.50], + 'PT': [42.25, -9.76, 36.80, -5.99], + 'RO': [48.48, 20.07, 43.52, 29.93], + 'RS': [46.36, 18.52, 42.11, 23.14], + 'SE_1': [69.35, 9.65, 55.08, 24.74], + 'SE_2': [69.35, 9.65, 55.08, 24.74], + 'SE_3': [69.35, 9.65, 55.08, 24.74], + 'SE_4': [69.35, 9.65, 55.08, 24.74], + 'SI': [46.96, 13.29, 45.36, 16.69], + 'SK': [49.74, 16.71, 47.65, 22.63] +} + DATADIR = Path(__file__).parent / 'data' CITIES = pd.read_csv(DATADIR / 'cities_scraper_meteo.csv', index_col=0) diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -17,7 +17,8 @@ 'metview', 'scipy', 'geopandas', - 'Shapely' + 'Shapely', + 'cdsapi' ], entry_points={ 'console_scripts': [