M meteo_scraper/cli.py +17 -1
@@ 13,6 13,9 @@ from meteo_scraper.ecmwf_scraper import
get_last_ens_data,
get_last_hres_data
)
+from meteo_scraper.gfs_scraper import (
+ get_last_run_data
+)
CONFIG = reader('meteo.cfg')
@@ 48,6 51,13 @@ def setup_tasks():
inputdata={'filename': 'ecmwf_ens.grib'}
)
+ api.prepare(
+ engine,
+ 'scrap_gfs_1p00',
+ domain='meteo',
+ rule='0 27 4,10,16,22 * * *'
+ )
+
@meteo.command(name='scrap-ecmwf-hres')
@click.option(
@@ 75,4 85,10 @@ def scrap_ecmwf_ens_fcst(filename='ecmwf
)
def ingest_ecmwf_ens_fcst(filename='ecmwf_ens.grib'):
tsa = timeseries(CONFIG['uri']['dburi'])
- ens_ecmwfdata_to_refinery(tsa, filename)
No newline at end of file
+ ens_ecmwfdata_to_refinery(tsa, filename)
+
+
+@meteo.command(name='scrap-gfs-1p00')
+def scrap_ecmwf_ens_fcst():
+ tsa = timeseries(CONFIG['uri']['dburi'])
+ get_last_run_data(tsa)
M meteo_scraper/ecmwf_scraper.py +7 -16
@@ 6,13 6,14 @@ import metview as mv
from ecmwf.opendata import Client
from meteo_scraper.utils import (
+ CITIES,
construct_zone_mask,
nearest_gridpoint,
+ ZONES
)
client = Client('ecmwf', beta=True)
-DATADIR = Path(__file__).parent / 'data'
# irregular timesteps of weather model outputs
STEPS = {
@@ 41,16 42,6 @@ PARAMETERS = {
PARAMS = ['2t', '10u', '10v', 'tp']
-ZONES = [
- 'AT', 'BE', 'BG', 'CH', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
- 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IT_CALA', 'IT_CNOR',
- 'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
- 'LT', 'LV', 'NL', 'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5',
- 'PL', 'PT', 'RO', 'RS', 'SE_1', 'SE_2', 'SE_3', 'SE_4', 'SI', 'SK'
-]
-
-CITIES = pd.read_csv(DATADIR / 'cities_scraper_meteo.csv', index_col=0)
-
def find_last_run(model):
return client.latest(
@@ 61,7 52,7 @@ def find_last_run(model):
def get_ens_data(filename_ens, date, time):
for param in PARAMS:
- filename_param = f'{param}_{filename_ens}'
+ filename_param = f'{param}_{filename_ens}'
client.retrieve(
date=date,
time=time,
@@ 112,7 103,7 @@ def exploit_grib_file_hres(tsa, gribfile
index = np.unique(mv.valid_date(gt))
for zone in ZONES:
- mask_zone = construct_zone_mask(zone, np.shape(values))
+ mask_zone = construct_zone_mask(zone, np.shape(values), model='ecmwf')
val_zone = values[:, mask_zone]
val_mean_zone = np.mean(val_zone, axis=1)
ts = pd.Series(data=val_mean_zone, index = index)
@@ 121,7 112,7 @@ def exploit_grib_file_hres(tsa, gribfile
tsa.replace(seriesname, ts, author='scraper-ecmwf') #replace
print(f'retrieving {param} for cities...')
for row in CITIES.iterrows():
- index_city = nearest_gridpoint(row[1].lng, row[1].lat)
+ index_city = nearest_gridpoint(row[1].lng, row[1].lat, 'ecmwf')
val_city = values[:, index_city]
ts = pd.Series(data=val_city, index = index)
@@ 138,7 129,7 @@ def exploit_grib_file_ens(tsa, gribfile,
index = np.unique(mv.valid_date(gt))
for zone in ZONES:
- mask_zone = construct_zone_mask(zone, np.shape(values))
+ mask_zone = construct_zone_mask(zone, np.shape(values), model='ecmwf')
val_zone = values[:, mask_zone]
val_mean_zone = np.mean(val_zone, axis=1)
list_ts = []
@@ 154,7 145,7 @@ def exploit_grib_file_ens(tsa, gribfile,
# print(f'retrieving {param} for cities...')
# for row in CITIES.iterrows():
# print(row)
- # index_city = nearest_gridpoint(row[1].lng, row[1].lat)
+ # index_city = nearest_gridpoint(row[1].lng, row[1].lat, 'ecmwf')
# val_city = values[:, index_city]
# list_ts = []
# for ens in range(51):
A => meteo_scraper/gfs_scraper.py +73 -0
@@ 0,0 1,73 @@
+import xarray as xr
+import numpy as np
+import pandas as pd
+from bs4 import BeautifulSoup
+import requests
+
+from meteo_scraper.utils import (
+ CITIES,
+ construct_zone_mask,
+ nearest_gridpoint,
+ ZONES
+)
+
+GFS_DATA_URL = 'https://nomads.ncep.noaa.gov/dods/gfs_1p00'
+
+PARAMETERS = {
+ 'tmp2m': ('2t', 'K'),
+ 'dlwrfsfc': ('downrad_longwave', 'wm-2'),
+ 'dswrfsfc': ('downrad_shortwave', 'wm-2'),
+}
+
+
+def find_last_run():
+ response = requests.get(GFS_DATA_URL)
+ soup = BeautifulSoup(response.content, 'html.parser')
+ all_hrefs = [a.get('href') for a in soup.find_all('a') if a.text == 'dir']
+ avail_dates = [url.split('/gfs')[-1] for url in all_hrefs]
+ last_date = pd.to_datetime(avail_dates[-1], format='%Y%m%d')
+ last_date
+
+ url = f'{GFS_DATA_URL}/gfs{avail_dates[-1]}'
+ response = requests.get(url)
+ soup = BeautifulSoup(response.content, 'html.parser')
+ all_hrefs = [a.get('href') for a in soup.find_all('a') if a.text == 'info']
+ avail_runs = [url.split('/gfs')[-1] for url in all_hrefs]
+ avail_runs = [url.split('.info')[0] for url in avail_runs if '_anl.info' not in url]
+
+ last_run = avail_runs[-1].split('_')[-1]
+
+ return (last_date, last_run)
+
+
+def get_last_run_data(tsa):
+ date, run = find_last_run()
+ url = f'{GFS_DATA_URL}/gfs{date.strftime("%Y%m%d")}/gfs_1p00_{run}'
+
+ with xr.open_dataset(url) as ds:
+ for var, (param, unit) in PARAMETERS.items():
+ print(f'retrieving {param} for 1p00...')
+ da = ds[var]
+
+ data = da.data
+ data = np.reshape(data,(129,181*360))
+ time = da.time.data
+
+ for zone in ZONES:
+ print(f'retrieving {param} for {zone}...')
+ mask_zone = construct_zone_mask(zone, np.shape(data), model='1p00')
+
+ val_zone = data[:, mask_zone]
+ val_mean_zone = np.mean(val_zone, axis=1)
+ ts = pd.Series(data=val_mean_zone, index = time)
+ seriesname = f'meteo.area.{zone.lower()}.{param}.{unit.lower()}.gfs.1p00.fcst.3h'
+ tsa.replace(seriesname, ts, author='scraper-gfs')
+
+ print(f'retrieving {param} for cities...')
+ for row in CITIES.iterrows():
+ index_city = nearest_gridpoint(row[1].lng, row[1].lat, '1p00')
+ val_city = data[:, index_city]
+ ts = pd.Series(data=val_city, index = time)
+ seriesname = f"meteo.city.{(row[1].country).replace(' ', '').lower()}.{(row[1].city_ascii).replace(' ', '').lower()}.{param}.{unit.lower()}.gfs.1p00.fcst.3h"
+ tsa.replace(seriesname, ts, author='scraper-gfs')
+
M meteo_scraper/task.py +16 -0
@@ 11,6 11,9 @@ from meteo_scraper.ecmwf_scraper import
get_last_ens_data,
get_last_hres_data
)
+from meteo_scraper.gfs_scraper import (
+ get_last_run_data
+)
CONFIG = reader('meteo.cfg')
@@ 110,3 113,16 @@ def scrap_ecmwf_ens(task):
domain='meteo',
inputdata={'filename': filename}
)
+
+
+@task(
+ domain='meteo'
+)
+def scrap_gfs_1p00(task):
+ tsa = apimaker(
+ {'db': {'uri': str(task.engine.url)},
+ 'sources': {}
+ }
+ )
+ with task.capturelogs(std=True):
+ get_last_run_data(tsa)
M meteo_scraper/utils.py +35 -6
@@ 9,15 9,45 @@ from matplotlib.path import Path as mplt
HERE = Path(__file__).parent
+ZONES = [
+ 'AT', 'BE', 'BG', 'CH', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
+ 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IT_CALA', 'IT_CNOR',
+ 'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
+ 'LT', 'LV', 'NL', 'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5',
+ 'PL', 'PT', 'RO', 'RS', 'SE_1', 'SE_2', 'SE_3', 'SE_4', 'SI', 'SK'
+]
+
+DATADIR = Path(__file__).parent / 'data'
+CITIES = pd.read_csv(DATADIR / 'cities_scraper_meteo.csv', index_col=0)
+
+
def load_zones(zones: list):
geojson = [gpd.read_file(HERE / 'data' / 'geojson' / f'{x}.geojson') for x in zones]
geo = pd.concat(geojson).set_index('zoneName').sort_index()
return geo
-def construct_zone_mask(zone, gt_dim):
- longitudes = np.tile(np.arange(-180, 180, 0.4), 451)
- latitudes = np.repeat(np.arange(90, -90.4, -0.4), 900)
+def construct_lat_lon(model):
+ if model == 'ecmwf':
+ longitudes = np.tile(np.arange(-180, 180, 0.4), 451)
+ latitudes = np.repeat(np.arange(90, -90.4, -0.4), 900)
+ return (longitudes, latitudes)
+
+ if model == '1p00':
+ longitudes = np.tile(
+ np.concatenate((np.arange(0, 181, 1), np.arange(-179, 0, 1)), axis=None),
+ 181
+ )
+ latitudes = np.repeat(
+ np.concatenate((np.arange(-90, 0, 1), np.arange(0, 91, 1)), axis=None),
+ 360
+ )
+ return (longitudes, latitudes)
+
+
+def construct_zone_mask(zone, gt_dim, model):
+
+ (longitudes, latitudes) = construct_lat_lon(model)
assert np.shape(longitudes)[0] == gt_dim[1]
assert np.shape(latitudes)[0] == gt_dim[1]
@@ 57,9 87,8 @@ def construct_zone_mask(zone, gt_dim):
return mask
-def nearest_gridpoint(lon, lat):
- longitudes = np.tile(np.arange(-180, 180, 0.4), 451)
- latitudes = np.repeat(np.arange(90, -90.4, -0.4), 900)
+def nearest_gridpoint(lon, lat, model):
+ (longitudes, latitudes) = construct_lat_lon(model)
gridpoints = np.vstack((longitudes, latitudes)).T
_, index = spatial.KDTree(gridpoints).query([lon, lat])