M test/test_api.py +205 -0
@@ 188,3 188,208 @@ 2020-01-01 False
2020-01-02 False
2020-01-03 False
""", marker)
+
+
+def test_infer_freq(tsx):
+ ts = pd.Series(
+ [1, 2, 3, 4, 6],
+ index=[
+ pd.Timestamp('2024-01-01'),
+ pd.Timestamp('2024-01-02'),
+ pd.Timestamp('2024-01-03'),
+ pd.Timestamp('2024-01-04'),
+ pd.Timestamp('2024-01-06'),
+ ]
+ )
+ tsx.update('series_with_holes', ts, 'test')
+
+ ts, markers = tsx.edited('series_with_holes')
+ assert len(ts) == 5
+
+ ts, markers = tsx.edited('series_with_holes', inferred_freq=True)
+
+ assert_df("""
+2024-01-01 1.0
+2024-01-02 2.0
+2024-01-03 3.0
+2024-01-04 4.0
+2024-01-05 NaN
+2024-01-06 6.0
+""", ts)
+
+ assert_df("""
+2024-01-01 False
+2024-01-02 False
+2024-01-03 False
+2024-01-04 False
+2024-01-05 False
+2024-01-06 False
+""", markers)
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ to_value_date=pd.Timestamp('2024-01-08 12:00:00')
+ )
+ assert_df("""
+2024-01-01 1.0
+2024-01-02 2.0
+2024-01-03 3.0
+2024-01-04 4.0
+2024-01-05 NaN
+2024-01-06 6.0
+2024-01-07 NaN
+2024-01-08 NaN
+""", ts)
+ assert len(markers) == 8
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ to_value_date=pd.Timestamp('2024-01-05 12:00:00')
+ )
+ assert_df("""
+2024-01-01 1.0
+2024-01-02 2.0
+2024-01-03 3.0
+2024-01-04 4.0
+2024-01-05 NaN
+""", ts)
+ assert len(markers) == 5
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ from_value_date=pd.Timestamp('2023-12-30 18:00:00'),
+ )
+ assert_df("""
+2023-12-31 NaN
+2024-01-01 1.0
+2024-01-02 2.0
+2024-01-03 3.0
+2024-01-04 4.0
+2024-01-05 NaN
+2024-01-06 6.0
+""", ts)
+ assert len(markers) == 7
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ from_value_date=pd.Timestamp('2024-01-01 18:00:00'),
+ )
+ assert_df("""
+2024-01-02 2.0
+2024-01-03 3.0
+2024-01-04 4.0
+2024-01-05 NaN
+2024-01-06 6.0
+""", ts)
+ assert len(markers) == 5
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ from_value_date=pd.Timestamp('2023-12-30 18:00:00'),
+ to_value_date=pd.Timestamp('2024-01-07 12:00:00')
+ )
+ assert_df("""
+2023-12-31 NaN
+2024-01-01 1.0
+2024-01-02 2.0
+2024-01-03 3.0
+2024-01-04 4.0
+2024-01-05 NaN
+2024-01-06 6.0
+2024-01-07 NaN
+""", ts)
+
+ assert len(markers) == 8
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ from_value_date=pd.Timestamp('2024-01-02 12:00:00'),
+ to_value_date=pd.Timestamp('2024-01-05 12:00:00')
+ )
+ assert_df("""
+2024-01-03 3.0
+2024-01-04 4.0
+""", ts)
+
+ assert len(markers) == 2
+
+ ts, markers = tsx.edited(
+ 'series_with_holes',
+ inferred_freq=True,
+ from_value_date=pd.Timestamp('2024-01-04 12:00:00'),
+ to_value_date=pd.Timestamp('2024-01-07 12:00:00')
+ )
+ assert_df("""
+2024-01-06 6.0
+""", ts)
+
+ assert len(markers) == 1
+
+
+def test_infer_freq_tz(tsx):
+ """Since we build a pseudo index based
+ both on request bounds and the series index
+ we make sure that the tz-status are correctly processed"""
+
+ ts = pd.Series(
+ [1, 2, 3, 4, 6],
+ index=[
+ pd.Timestamp('2024-01-01'),
+ pd.Timestamp('2024-01-02'),
+ pd.Timestamp('2024-01-03'),
+ pd.Timestamp('2024-01-04'),
+ pd.Timestamp('2024-01-06'),
+ ]
+ )
+ tsx.update('series_with_holes_naive', ts, 'test')
+
+ ts = pd.Series(
+ [1, 2, 3, 4, 6],
+ index=[
+ pd.Timestamp('2024-01-01', tz='UTC'),
+ pd.Timestamp('2024-01-02', tz='UTC'),
+ pd.Timestamp('2024-01-03', tz='UTC'),
+ pd.Timestamp('2024-01-04', tz='UTC'),
+ pd.Timestamp('2024-01-06', tz='UTC'),
+ ]
+ )
+ tsx.update('series_with_holes_tz_aware', ts, 'test')
+
+ from_naive = pd.Timestamp('2024-01-01 12:00:00')
+ to_naive = pd.Timestamp('2024-01-07 12:00:00')
+
+ from_tz_aware = pd.Timestamp('2024-01-01 12:00:00', tz='CET')
+ to_tz_aware = pd.Timestamp('2024-01-07 12:00:00', tz='CET')
+
+ assert len(
+ tsx.edited(
+ 'series_with_holes_naive',
+ from_value_date=from_tz_aware,
+ to_value_date=to_tz_aware,
+ inferred_freq=True,
+ )[0]
+ ) == 6
+
+ assert len(
+ tsx.edited(
+ 'series_with_holes_tz_aware',
+ from_value_date=from_tz_aware,
+ to_value_date=to_tz_aware,
+ inferred_freq=True,
+ )[0]
+ ) == 6
+
+ assert len(
+ tsx.edited(
+ 'series_with_holes_tz_aware',
+ from_value_date=from_naive,
+ to_value_date=to_naive,
+ inferred_freq=True,
+ )[0]
+ ) == 6
M tshistory_supervision/api.py +4 -0
@@ 14,6 14,7 @@ def edited(self, name: str,
revision_date: Optional[pd.Timestamp]=None,
from_value_date: Optional[pd.Timestamp]=None,
to_value_date: Optional[pd.Timestamp]=None,
+ inferred_freq: Optional[bool]=False,
_keep_nans: bool=False) -> Tuple[pd.Series, pd.Series]:
"""
Returns the base series and a second boolean series whose entries
@@ 28,6 29,7 @@ def edited(self, name: str,
revision_date=revision_date,
from_value_date=from_value_date,
to_value_date=to_value_date,
+ inferred_freq=inferred_freq,
_keep_nans=_keep_nans
)
@@ 46,6 48,7 @@ def edited(self, # noqa: F811
revision_date=None,
from_value_date=None,
to_value_date=None,
+ inferred_freq=False,
_keep_nans=False):
source = self._findsourcefor(name)
@@ 56,6 59,7 @@ def edited(self, # noqa: F811
revision_date,
from_value_date,
to_value_date,
+ inferred_freq,
_keep_nans
)
M tshistory_supervision/http.py +8 -0
@@ 54,6 54,10 @@ edited.add_argument(
help='override from/to_value_date'
)
edited.add_argument(
+ 'inferred_freq', type=bool, default=False,
+ help='re-index series on a inferred frequency'
+)
+edited.add_argument(
'_keep_nans', type=inputs.boolean, default=False,
help='keep erasure information'
)
@@ 100,6 104,7 @@ class supervision_httpapi(httpapi):
revision_date=args.insertion_date,
from_value_date=fvd,
to_value_date=tvd,
+ inferred_freq=args.get('inferred_freq'),
_keep_nans=args._keep_nans
)
metadata = tsa.metadata(args.name, all=True)
@@ 146,6 151,7 @@ class supervision_httpclient(httpclient)
revision_date=None,
from_value_date=None,
to_value_date=None,
+ inferred_freq=False,
_keep_nans=False):
args = {
'name': name,
@@ 158,6 164,8 @@ class supervision_httpclient(httpclient)
args['from_value_date'] = strft(from_value_date)
if to_value_date:
args['to_value_date'] = strft(to_value_date)
+ if inferred_freq:
+ args['inferred_freq'] = inferred_freq
res = self.session.get(
f'{self.uri}/series/supervision', params=args
)
M tshistory_supervision/tsio.py +108 -6
@@ 1,10 1,15 @@
import pandas as pd
import numpy as np
-from tshistory.util import tx, diff
+from tshistory.util import (
+ compatible_date,
+ infer_freq,
+ diff,
+ tx
+)
from tshistory.tsio import timeseries as basets
-from tshistory_supervision import api # trigger registration # noqa: F401
+from tshistory_supervision import api # noqa
def join_index(ts1, ts2):
@@ 17,6 22,66 @@ def join_index(ts1, ts2):
return ts1.index.union(ts2.index)
+def extended(inferred_freq, ts, from_value_date, to_value_date):
+ if not inferred_freq or len(ts) < 3 :
+ return ts
+
+ first_index = ts.index[0]
+ last_index = ts.index[-1]
+ delta_interval = infer_freq(ts)[0]
+ tz_series = first_index.tz
+ to_value_date = compatible_date(tz_series, to_value_date)
+ from_value_date = compatible_date(tz_series, from_value_date)
+
+ if from_value_date is None and to_value_date is None:
+ new_index = pd.date_range(
+ start=first_index,
+ end=last_index,
+ freq=delta_interval
+ )
+ return ts.reindex(new_index)
+
+ if from_value_date is None:
+ new_index = pd.date_range(
+ start=first_index,
+ end=to_value_date,
+ freq=delta_interval
+ )
+ return ts.reindex(new_index)
+
+ if to_value_date is None:
+ new_index = pd.date_range(
+ start=last_index,
+ end=from_value_date,
+ freq=-delta_interval
+ ).sort_values()
+ return ts.reindex(new_index)
+
+ # we have to build the index in two parts
+ new_index = pd.date_range(
+ start=first_index,
+ end=to_value_date,
+ freq=delta_interval
+ )
+ complement = pd.date_range(
+ start=first_index,
+ end=from_value_date,
+ freq=-delta_interval
+ )
+ new_index = new_index.union(complement).sort_values()
+ return ts.reindex(new_index)
+
+
+def fill_markers(markers):
+ """ markers must remain pure boolean series.
+ When a point is created by the infer-freq option,
+ the associated markers should be set at False i.e.
+ this is not a manual edition
+ """
+ markers = markers.fillna(False)
+ return markers
+
+
class timeseries(basets):
"""This class refines the base `tshistory.timeseries` by adding a
specific workflow on top of it.
@@ 223,6 288,7 @@ class timeseries(basets):
@tx
def get_ts_marker(self, cn, name, revision_date=None,
from_value_date=None, to_value_date=None,
+ inferred_freq=False,
_keep_nans=False):
table = self._series_to_tablename(cn, name)
if table is None:
@@ 239,6 305,12 @@ class timeseries(basets):
# because of a revision_date
return None, None
+ def finish(edited):
+ keep_nans = _keep_nans or inferred_freq
+ if not keep_nans:
+ return edited.dropna()
+ return edited
+
supervision = self.supervision_status(cn, name)
if supervision in ('unsupervised', 'handcrafted'):
flags = pd.Series(
@@ 247,7 319,23 @@ class timeseries(basets):
dtype=np.dtype('bool')
)
flags.name = name
- return edited.dropna(), flags
+ edited = finish(edited)
+ return (
+ extended(
+ inferred_freq,
+ edited,
+ from_value_date,
+ to_value_date
+ ),
+ fill_markers(
+ extended(
+ inferred_freq,
+ flags,
+ from_value_date,
+ to_value_date
+ )
+ )
+ )
upstreamtsh = self.upstream
upstream = upstreamtsh.get(
@@ 272,6 360,20 @@ class timeseries(basets):
mask_manual[manual.index] = True
mask_manual.name = name
- if not _keep_nans:
- edited = edited.dropna()
- return edited, mask_manual
+ edited = finish(edited)
+ return (
+ extended(
+ inferred_freq,
+ edited,
+ from_value_date,
+ to_value_date
+ ),
+ fill_markers(
+ extended(
+ inferred_freq,
+ mask_manual,
+ from_value_date,
+ to_value_date
+ )
+ )
+ )