M test/test_api.py +29 -0
@@ 421,6 421,35 @@ def test_inferred_freq(tsx):
)
+def test_erasure(tsx):
+ tsx.delete('erasure')
+
+ ts = pd.Series(
+ [np.nan, np.nan],
+ index=pd.date_range(
+ utcdt(2024, 1, 1),
+ freq='h',
+ periods=2
+ )
+ )
+
+ diff = tsx.update('erasure', ts, 'Babar')
+ assert not tsx.insertion_dates('erasure')
+
+ diff = tsx.update('erasure', ts, 'Babar', keepnans=True)
+ assert len(tsx.insertion_dates('erasure')) == 1
+
+ ts = pd.Series(
+ [np.nan, 1, 2],
+ index=pd.date_range(
+ utcdt(2024, 1, 1),
+ freq='h',
+ periods=3
+ )
+ )
+
+
+
def test_log(tsx):
for name in ('log-me',):
tsx.delete(name)
M test/test_http.py +1 -0
@@ 417,6 417,7 @@ def test_get_nans(http):
'series': util.tojson(ts),
'author': 'Babar',
'insertion_date': utcdt(2024, 1, 1),
+ 'keepnans': json.dumps(True),
'tzaware': util.tzaware_series(ts)
})
M test/test_tsio.py +28 -14
@@ 501,7 501,7 @@ 2024-01-01 01:00:00+00:00 NaN
2024-01-01 02:00:00+00:00 3.0
""", ts2)
- diff = tsh.update(engine, ts2, 'na-in-hole', 'Babar')
+ diff = tsh.update(engine, ts2, 'na-in-hole', 'Babar', keepnans=True)
assert_df("""
2024-01-01 01:00:00+00:00 NaN
""", diff)
@@ 795,6 795,16 @@ 2024-04-03 00:00:00+00:00 2024-04-01 00
'Babar',
insertion_date=pd.Timestamp('2024-4-4', tz='utc')
)
+ assert not len(diff)
+
+ diff = tsh.update(
+ engine,
+ erasets,
+ 'hist-withfullnans',
+ 'Babar',
+ insertion_date=pd.Timestamp('2024-4-4', tz='utc'),
+ keepnans=True
+ )
assert_df("""
2024-04-03 00:00:00+00:00 NaN
2024-04-04 00:00:00+00:00 NaN
@@ 836,7 846,8 @@ 2024-04-04 00:00:00+00:00 2024-04-01 00
erasets,
'hist-withfullnans',
'Babar',
- insertion_date=pd.Timestamp('2024-4-5', tz='utc')
+ insertion_date=pd.Timestamp('2024-4-5', tz='utc'),
+ keepnans=True
)
assert_df("""
2024-04-06 00:00:00+00:00 NaN
@@ 1007,7 1018,7 @@ def test_infer_freq(engine, tsh):
def test_point_deletion(engine, tsh):
ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
ts_begin.iloc[-1] = np.nan
- tsh.update(engine, ts_begin, 'ts_del', 'test')
+ tsh.update(engine, ts_begin, 'ts_del', 'test', keepnans=True)
_, ts = Postgres(engine, tsh, 'ts_del').find()
assert ts.iloc[-3] == 8.0
@@ 1015,7 1026,7 @@ def test_point_deletion(engine, tsh):
ts_begin.iloc[0] = np.nan
ts_begin.iloc[3] = np.nan
- tsh.update(engine, ts_begin, 'ts_del', 'test')
+ tsh.update(engine, ts_begin, 'ts_del', 'test', keepnans=True)
assert_df("""
2010-01-02 1.0
@@ 1061,7 1072,7 @@ 2010-01-10 9.0
ts_string.iloc[4] = None
ts_string.iloc[5] = None
- tsh.update(engine, ts_string, 'ts_string_del', 'test')
+ tsh.update(engine, ts_string, 'ts_string_del', 'test', keepnans=True)
assert_df("""
2010-01-01 machin
2010-01-02 machin
@@ 1101,7 1112,7 @@ 2010-01-10 machin
ts_string[ts_string.index] = np.nan
with pytest.raises(ValueError):
- tsh.update(engine, ts_string, 'ts_string_del', 'test')
+ tsh.update(engine, ts_string, 'ts_string_del', 'test', keepnans=True)
def test_nan_first(engine, tsh):
@@ 1177,7 1188,7 @@ 2010-01-15 NaN
ts_begin.iloc[:] = np.nan
with pytest.raises(ValueError):
- tsh.update(engine, ts_begin, 'ts_full_del', 'test')
+ tsh.update(engine, ts_begin, 'ts_full_del', 'test', keepnans=True)
ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
tsh.update(engine, ts_end, 'ts_full_del', 'test')
@@ 1191,7 1202,7 @@ 2010-01-15 NaN
index=ts_begin.index)
with pytest.raises(ValueError):
- tsh.update(engine, ts_begin, 'ts_full_del_str', 'test')
+ tsh.update(engine, ts_begin, 'ts_full_del_str', 'test', keepnans=True)
ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'], dtype='object')
tsh.update(engine, ts_end, 'ts_full_del_str', 'test')
@@ 1214,7 1225,8 @@ def test_deletion_over_horizon(engine, t
)
tsh.update(engine, ts, name, 'Celeste',
- insertion_date=idate.replace(day=2))
+ insertion_date=idate.replace(day=2),
+ keepnans=True)
ival = tsh.interval(engine, name)
assert ival.left == datetime(2018, 1, 1)
assert ival.right == datetime(2018, 1, 2)
@@ 1224,7 1236,8 @@ def test_deletion_over_horizon(engine, t
index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
)
tsh.update(engine, ts, name, 'Arthur',
- insertion_date=idate.replace(day=3))
+ insertion_date=idate.replace(day=3),
+ keepnans=True)
ival = tsh.interval(engine, name)
assert ival.left == datetime(2018, 1, 2)
assert ival.right == datetime(2018, 1, 2)
@@ 1467,7 1480,8 @@ 2015-01-22 00:00:00+00:00 2015-01-20 00
if idx == 2:
serie.iloc[-1] = np.nan
tsh.update(engine, serie, 'with_na', 'arnaud',
- insertion_date=idate)
+ insertion_date=idate,
+ keepnans=True)
# the value at 2015-01-22 is hidden by the inserted nan
assert_df("""
@@ 1550,7 1564,7 @@ def test_add_na(engine, tsh):
ts_nan[[True] * len(ts_nan)] = np.nan
ts_nan = pd.concat([ts_begin, ts_nan])
- diff = tsh.update(engine, ts_nan, 'ts_add_na', 'test')
+ diff = tsh.update(engine, ts_nan, 'ts_add_na', 'test', keepnans=True)
assert len(diff) == 5
result = tsh.get(engine, 'ts_add_na')
@@ 2588,7 2602,7 @@ def test_na_at_boundaries(engine, tsh):
ts = pd.Series([np.nan] * 3 + [3] * 5 + [np.nan] * 2,
index=pd.date_range(start=datetime(2010, 1, 10),
freq='D', periods=10))
- tsh.update(engine, ts, 'test_nan', 'test')
+ tsh.update(engine, ts, 'test_nan', 'test', keepnans=True)
result = tsh.get(engine, 'test_nan')
assert_df("""
2010-01-13 3.0
@@ 2644,7 2658,7 @@ 2010-01-19 NaN
ts = pd.Series([np.nan] * 4 + [5] * 3 + [np.nan] * 3,
index=pd.date_range(start=datetime(2010, 1, 10),
freq='D', periods=10))
- tsh.update(engine, ts, 'test_nan', 'test')
+ tsh.update(engine, ts, 'test_nan', 'test', keepnans=True)
result = tsh.get(engine, 'test_nan', _keep_nans=True)
assert_df("""
2010-01-10 NaN
M tshistory/api.py +7 -1
@@ 110,6 110,7 @@ class mainsource:
author: str,
metadata: Optional[dict]=None,
insertion_date: Optional[datetime]=None,
+ keepnans: Optional[bool]=False,
**kw) -> Optional[pd.Series]:
"""Update a series named by <name> with the input pandas series.
@@ 121,7 122,7 @@ class mainsource:
no change, None is returned and no new version is created.
New points are added, changed points are changed,
- points with NaN are considered to be _erased_.
+ points with NaN are _erased_.
The `author` is mandatory.
The `metadata` dictionary allows to associate any metadata
@@ 130,6 131,10 @@ class mainsource:
It is possible to force an `insertion_date`, which can only be
higher than the previous `insertion_date`.
+ The `keepnans` flag will treat, if true, the Nans values as
+ erasures. If false, we do a dropna() call on the series before
+ the actual update.
+
.. highlight:: python
.. code-block:: python
@@ 168,6 173,7 @@ class mainsource:
author,
metadata=metadata,
insertion_date=insertion_date,
+ keepnans=keepnans,
**kw
)
M tshistory/http/client.py +4 -1
@@ 147,6 147,7 @@ class httpclient:
def _insert(self, name, series, author,
metadata=None,
insertion_date=None,
+ keepnans=False,
supervision=False,
replace=False):
guard_insert(
@@ 158,6 159,7 @@ class httpclient:
'name': name,
'author': author,
'insertion_date': insertion_date.isoformat() if insertion_date else None,
+ 'keepnans': keepnans,
'tzaware': meta['tzaware'],
'replace': replace,
'supervision': supervision,
@@ 184,11 186,12 @@ class httpclient:
@unwraperror
def update(self, name, series, author,
- metadata=None, insertion_date=None, manual=False):
+ metadata=None, insertion_date=None, keepnans=False, manual=False):
return self._insert(
name, series, author,
metadata=metadata,
insertion_date=insertion_date,
+ keepnans=keepnans,
supervision=manual
)
M tshistory/http/server.py +11 -0
@@ 64,6 64,10 @@ update.add_argument(
help='insertion date can be forced'
)
update.add_argument(
+ 'keepnans', type=inputs.boolean, default=False,
+ help='treat nans as point erasure or drop them'
+)
+update.add_argument(
'tzaware', type=inputs.boolean, default=True,
help='tzaware series'
)
@@ 684,6 688,7 @@ class httpapi:
args.name, series, args.author,
metadata=args.metadata,
insertion_date=args.insertion_date,
+ keepnans=args.keepnans,
manual=args.supervision
)
except ValueError as err:
@@ 694,6 699,12 @@ class httpapi:
if args.tzaware and args.tzone != 'UTC':
diff.index = diff.index.tz_convert(args.tzone)
+ if diff is None:
+ # nothing happened
+ # possible cause is sending nans without erasure flag
+ # on creation
+ return no_content()
+
return series_response(
args.format,
diff,
M tshistory/migrate.py +8 -0
@@ 216,6 216,14 @@ def migrate_add_diffstart_diffend(engine
f'alter table "{namespace}.revision"."{tablename}" '
f'alter column diffend set not null'
)
+ cn.execute(
+ f'alter table "{namespace}.revision"."{tablename}" '
+ f'alter column tsstart drop not null'
+ )
+ cn.execute(
+ f'alter table "{namespace}.revision"."{tablename}" '
+ f'alter column tsend drop not null'
+ )
def partition(alist, size):
for i in range(0, len(alist), size):
M tshistory/series.sql +2 -2
@@ 13,8 13,8 @@ create index on "{namespace}.snapshot"."
create table "{namespace}.revision"."{tablename}" (
id serial primary key,
- tsstart timestamp not null, -- whole series index min
- tsend timestamp not null, -- whole series index max
+ tsstart timestamp, -- whole series index min
+ tsend timestamp, -- whole series index max
diffstart timestamptz not null, -- diff index min
diffend timestamptz not null, -- diff index min
snapshot integer references "{namespace}.snapshot"."{tablename}"(id),
M tshistory/tsio.py +10 -5
@@ 73,6 73,7 @@ class timeseries:
def update(self, cn, updatets, name, author,
metadata=None,
insertion_date=None,
+ keepnans=False,
**k):
"""Create a new revision of a given time series
with update semantics:
@@ 109,6 110,9 @@ class timeseries:
'datetime' in str(updatets.index.dtype) and not
isinstance(updatets.index, pd.MultiIndex))
+ if not keepnans:
+ updatets = updatets.dropna()
+
if tablename is None:
seriesmeta = self._series_initial_meta(cn, name, updatets)
return self._create(cn, updatets, name, author, seriesmeta,
@@ 879,12 883,10 @@ class timeseries:
def _create(self, cn, newts, name, author, seriesmeta,
metadata=None, insertion_date=None):
- start, end = start_end(newts, notz=False)
- if start is None:
- assert end is None
- # this is just full of nans
+ if not len(newts):
return None
+ start, end = start_end(newts, notz=False)
# at creation time we take an exclusive lock to avoid
# a deadlock on created tables against the changeset-series fk
cn.execute(
@@ 899,7 901,7 @@ class timeseries:
start, end = start_end(newts)
self._new_revision(
- cn, name, head, start, end, start, end,
+ cn, name, head, start, end, newts.index.min(), newts.index.max(),
author, insertion_date, metadata
)
@@ 909,6 911,9 @@ class timeseries:
def _update(self, cn, newts, name, author,
metadata=None, insertion_date=None):
+ if not len(newts):
+ return empty_series(self.tzaware(cn, name))
+
self._validate(cn, newts, name)
snapshot = self.storageclass(cn, self, name)