gfs: add scraper
M meteo_scraper/cli.py +17 -1
@@ 13,6 13,9 @@ from meteo_scraper.ecmwf_scraper import 
     get_last_ens_data,
     get_last_hres_data
 )
+from meteo_scraper.gfs_scraper import (
+    get_last_run_data
+)
 
 CONFIG = reader('meteo.cfg')
 

          
@@ 48,6 51,13 @@ def setup_tasks():
             inputdata={'filename': 'ecmwf_ens.grib'}
         )
 
+    api.prepare(
+            engine,
+            'scrap_gfs_1p00',
+            domain='meteo',
+            rule='0 27 4,10,16,22 * * *'
+        )
+
 
 @meteo.command(name='scrap-ecmwf-hres')
 @click.option(

          
@@ 75,4 85,10 @@ def scrap_ecmwf_ens_fcst(filename='ecmwf
 )
 def ingest_ecmwf_ens_fcst(filename='ecmwf_ens.grib'):
     tsa = timeseries(CONFIG['uri']['dburi'])
-    ens_ecmwfdata_to_refinery(tsa, filename)
  No newline at end of file
+    ens_ecmwfdata_to_refinery(tsa, filename)
+
+
+@meteo.command(name='scrap-gfs-1p00')
+def scrap_ecmwf_ens_fcst():
+    tsa = timeseries(CONFIG['uri']['dburi'])
+    get_last_run_data(tsa)

          
M meteo_scraper/ecmwf_scraper.py +7 -16
@@ 6,13 6,14 @@ import metview as mv
 from ecmwf.opendata import Client
 
 from meteo_scraper.utils import (
+    CITIES,
     construct_zone_mask,
     nearest_gridpoint,
+    ZONES
 )
 
 client = Client('ecmwf', beta=True)
 
-DATADIR = Path(__file__).parent / 'data'
 
 # irregular timesteps of weather model outputs
 STEPS = {

          
@@ 41,16 42,6 @@ PARAMETERS = {
 
 PARAMS = ['2t', '10u', '10v', 'tp']
 
-ZONES = [
-    'AT', 'BE', 'BG', 'CH', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
-    'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IT_CALA', 'IT_CNOR',
-    'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
-    'LT', 'LV', 'NL', 'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5',
-    'PL', 'PT', 'RO', 'RS', 'SE_1', 'SE_2', 'SE_3', 'SE_4', 'SI', 'SK'
-]
-
-CITIES = pd.read_csv(DATADIR / 'cities_scraper_meteo.csv', index_col=0)
-
 
 def find_last_run(model):
     return client.latest(

          
@@ 61,7 52,7 @@ def find_last_run(model):
 
 def get_ens_data(filename_ens, date, time):
     for param in PARAMS:
-        filename_param = f'{param}_{filename_ens}' 
+        filename_param = f'{param}_{filename_ens}'
         client.retrieve(
             date=date,
             time=time,

          
@@ 112,7 103,7 @@ def exploit_grib_file_hres(tsa, gribfile
         index = np.unique(mv.valid_date(gt))
 
         for zone in ZONES:
-            mask_zone = construct_zone_mask(zone, np.shape(values))
+            mask_zone = construct_zone_mask(zone, np.shape(values), model='ecmwf')
             val_zone = values[:, mask_zone]
             val_mean_zone = np.mean(val_zone, axis=1)
             ts = pd.Series(data=val_mean_zone, index = index)

          
@@ 121,7 112,7 @@ def exploit_grib_file_hres(tsa, gribfile
             tsa.replace(seriesname, ts, author='scraper-ecmwf') #replace
         print(f'retrieving {param} for cities...')
         for row in CITIES.iterrows():
-            index_city = nearest_gridpoint(row[1].lng, row[1].lat)
+            index_city = nearest_gridpoint(row[1].lng, row[1].lat, 'ecmwf')
             val_city = values[:, index_city]
             ts = pd.Series(data=val_city, index = index)
 

          
@@ 138,7 129,7 @@ def exploit_grib_file_ens(tsa, gribfile,
     index = np.unique(mv.valid_date(gt))
 
     for zone in ZONES:
-        mask_zone = construct_zone_mask(zone, np.shape(values))
+        mask_zone = construct_zone_mask(zone, np.shape(values), model='ecmwf')
         val_zone = values[:, mask_zone]
         val_mean_zone = np.mean(val_zone, axis=1)
         list_ts = []

          
@@ 154,7 145,7 @@ def exploit_grib_file_ens(tsa, gribfile,
     # print(f'retrieving {param} for cities...')
     # for row in CITIES.iterrows():
     #     print(row)
-    #     index_city = nearest_gridpoint(row[1].lng, row[1].lat)
+    #     index_city = nearest_gridpoint(row[1].lng, row[1].lat, 'ecmwf')
     #     val_city = values[:, index_city]
     #     list_ts = []
     #     for ens in range(51):

          
A => meteo_scraper/gfs_scraper.py +73 -0
@@ 0,0 1,73 @@ 
+import xarray as xr
+import numpy as np
+import pandas as pd
+from bs4 import BeautifulSoup
+import requests
+
+from meteo_scraper.utils import (
+    CITIES,
+    construct_zone_mask,
+    nearest_gridpoint,
+    ZONES
+)
+
+GFS_DATA_URL = 'https://nomads.ncep.noaa.gov/dods/gfs_1p00'
+
+PARAMETERS = {
+    'tmp2m': ('2t', 'K'),
+    'dlwrfsfc': ('downrad_longwave', 'wm-2'),
+    'dswrfsfc': ('downrad_shortwave', 'wm-2'),
+}
+
+
+def find_last_run():
+    response = requests.get(GFS_DATA_URL)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    all_hrefs = [a.get('href') for a in soup.find_all('a') if a.text == 'dir']
+    avail_dates = [url.split('/gfs')[-1] for url in all_hrefs]
+    last_date = pd.to_datetime(avail_dates[-1], format='%Y%m%d')
+    last_date
+
+    url = f'{GFS_DATA_URL}/gfs{avail_dates[-1]}'
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    all_hrefs = [a.get('href') for a in soup.find_all('a') if a.text == 'info']
+    avail_runs = [url.split('/gfs')[-1] for url in all_hrefs]
+    avail_runs = [url.split('.info')[0] for url in avail_runs if '_anl.info' not in url]
+
+    last_run = avail_runs[-1].split('_')[-1]
+
+    return (last_date, last_run)
+
+
+def get_last_run_data(tsa):
+    date, run = find_last_run()
+    url = f'{GFS_DATA_URL}/gfs{date.strftime("%Y%m%d")}/gfs_1p00_{run}'
+
+    with xr.open_dataset(url) as ds:
+        for var, (param, unit) in PARAMETERS.items():
+            print(f'retrieving {param} for 1p00...')
+            da = ds[var]
+
+            data = da.data
+            data = np.reshape(data,(129,181*360))
+            time = da.time.data
+
+            for zone in ZONES:
+                print(f'retrieving {param} for {zone}...')
+                mask_zone = construct_zone_mask(zone, np.shape(data), model='1p00')
+
+                val_zone = data[:, mask_zone]
+                val_mean_zone = np.mean(val_zone, axis=1)
+                ts = pd.Series(data=val_mean_zone, index = time)
+                seriesname = f'meteo.area.{zone.lower()}.{param}.{unit.lower()}.gfs.1p00.fcst.3h'
+                tsa.replace(seriesname, ts, author='scraper-gfs')
+
+            print(f'retrieving {param} for cities...')
+            for row in CITIES.iterrows():
+                index_city = nearest_gridpoint(row[1].lng, row[1].lat, '1p00')
+                val_city = data[:, index_city]
+                ts = pd.Series(data=val_city, index = time)
+                seriesname = f"meteo.city.{(row[1].country).replace(' ', '').lower()}.{(row[1].city_ascii).replace(' ', '').lower()}.{param}.{unit.lower()}.gfs.1p00.fcst.3h"
+                tsa.replace(seriesname, ts, author='scraper-gfs')
+

          
M meteo_scraper/task.py +16 -0
@@ 11,6 11,9 @@ from meteo_scraper.ecmwf_scraper import 
     get_last_ens_data,
     get_last_hres_data
 )
+from meteo_scraper.gfs_scraper import (
+    get_last_run_data
+)
 
 CONFIG = reader('meteo.cfg')
 

          
@@ 110,3 113,16 @@ def scrap_ecmwf_ens(task):
                 domain='meteo',
                 inputdata={'filename': filename}
             )
+
+
+@task(
+    domain='meteo'
+)
+def scrap_gfs_1p00(task):
+    tsa = apimaker(
+        {'db': {'uri': str(task.engine.url)},
+         'sources': {}
+         }
+    )
+    with task.capturelogs(std=True):
+        get_last_run_data(tsa)

          
M meteo_scraper/utils.py +35 -6
@@ 9,15 9,45 @@ from matplotlib.path import Path as mplt
 
 HERE = Path(__file__).parent
 
+ZONES = [
+    'AT', 'BE', 'BG', 'CH', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
+    'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IT_CALA', 'IT_CNOR',
+    'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
+    'LT', 'LV', 'NL', 'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5',
+    'PL', 'PT', 'RO', 'RS', 'SE_1', 'SE_2', 'SE_3', 'SE_4', 'SI', 'SK'
+]
+
+DATADIR = Path(__file__).parent / 'data'
+CITIES = pd.read_csv(DATADIR / 'cities_scraper_meteo.csv', index_col=0)
+
+
 def load_zones(zones: list):
     geojson = [gpd.read_file(HERE / 'data' / 'geojson' / f'{x}.geojson') for x in zones]
     geo = pd.concat(geojson).set_index('zoneName').sort_index()
     return geo
 
 
-def construct_zone_mask(zone, gt_dim):
-    longitudes = np.tile(np.arange(-180, 180, 0.4), 451)
-    latitudes = np.repeat(np.arange(90, -90.4, -0.4), 900)
+def construct_lat_lon(model):
+    if model == 'ecmwf':
+        longitudes = np.tile(np.arange(-180, 180, 0.4), 451)
+        latitudes = np.repeat(np.arange(90, -90.4, -0.4), 900)
+        return (longitudes, latitudes)
+
+    if model == '1p00':
+        longitudes = np.tile(
+            np.concatenate((np.arange(0, 181, 1), np.arange(-179, 0, 1)), axis=None),
+            181
+        )
+        latitudes = np.repeat(
+            np.concatenate((np.arange(-90, 0, 1), np.arange(0, 91, 1)), axis=None),
+            360
+        )
+        return (longitudes, latitudes)
+
+
+def construct_zone_mask(zone, gt_dim, model):
+
+    (longitudes, latitudes) = construct_lat_lon(model)
 
     assert np.shape(longitudes)[0] == gt_dim[1]
     assert np.shape(latitudes)[0] == gt_dim[1]

          
@@ 57,9 87,8 @@ def construct_zone_mask(zone, gt_dim):
     return mask
 
 
-def nearest_gridpoint(lon, lat):
-    longitudes = np.tile(np.arange(-180, 180, 0.4), 451)
-    latitudes = np.repeat(np.arange(90, -90.4, -0.4), 900)
+def nearest_gridpoint(lon, lat, model):
+    (longitudes, latitudes) = construct_lat_lon(model)
     gridpoints = np.vstack((longitudes, latitudes)).T
 
     _, index = spatial.KDTree(gridpoints).query([lon, lat])