api/update: make erasure update explicit

We introduce a `keepnans` keyword whose default value is False.

By default, series given to .update will be stripped of their Nans,
unless we explicitly tell it it is for point erasure purposes.

It is not only for erasure: nans won't be rejected if they apply to
previously unvalued points.

Why would one want "pure" nans ?  A series can also be used as a patch
for another series (as in the formula `priority` operator) and nans
can make sense in this context (they indeed will erase data).

Because of these changes, in series.sql, tsstart and tsend can be null.
M test/test_api.py +29 -0
@@ 421,6 421,35 @@ def test_inferred_freq(tsx):
     )
 
 
+def test_erasure(tsx):
+    tsx.delete('erasure')
+
+    ts = pd.Series(
+        [np.nan, np.nan],
+        index=pd.date_range(
+            utcdt(2024, 1, 1),
+            freq='h',
+            periods=2
+        )
+    )
+
+    diff = tsx.update('erasure', ts, 'Babar')
+    assert not tsx.insertion_dates('erasure')
+
+    diff = tsx.update('erasure', ts, 'Babar', keepnans=True)
+    assert len(tsx.insertion_dates('erasure')) == 1
+
+    ts = pd.Series(
+        [np.nan, 1, 2],
+        index=pd.date_range(
+            utcdt(2024, 1, 1),
+            freq='h',
+            periods=3
+        )
+    )
+
+
+
 def test_log(tsx):
     for name in ('log-me',):
         tsx.delete(name)

          
M test/test_http.py +1 -0
@@ 417,6 417,7 @@ def test_get_nans(http):
         'series': util.tojson(ts),
         'author': 'Babar',
         'insertion_date': utcdt(2024, 1, 1),
+        'keepnans': json.dumps(True),
         'tzaware': util.tzaware_series(ts)
     })
 

          
M test/test_tsio.py +28 -14
@@ 501,7 501,7 @@ 2024-01-01 01:00:00+00:00    NaN
 2024-01-01 02:00:00+00:00    3.0
 """, ts2)
 
-    diff = tsh.update(engine, ts2, 'na-in-hole', 'Babar')
+    diff = tsh.update(engine, ts2, 'na-in-hole', 'Babar', keepnans=True)
     assert_df("""
 2024-01-01 01:00:00+00:00   NaN
 """, diff)

          
@@ 795,6 795,16 @@ 2024-04-03 00:00:00+00:00  2024-04-01 00
         'Babar',
         insertion_date=pd.Timestamp('2024-4-4', tz='utc')
     )
+    assert not len(diff)
+
+    diff = tsh.update(
+        engine,
+        erasets,
+        'hist-withfullnans',
+        'Babar',
+        insertion_date=pd.Timestamp('2024-4-4', tz='utc'),
+        keepnans=True
+    )
     assert_df("""
 2024-04-03 00:00:00+00:00   NaN
 2024-04-04 00:00:00+00:00   NaN

          
@@ 836,7 846,8 @@ 2024-04-04 00:00:00+00:00  2024-04-01 00
         erasets,
         'hist-withfullnans',
         'Babar',
-        insertion_date=pd.Timestamp('2024-4-5', tz='utc')
+        insertion_date=pd.Timestamp('2024-4-5', tz='utc'),
+        keepnans=True
     )
     assert_df("""
 2024-04-06 00:00:00+00:00   NaN

          
@@ 1007,7 1018,7 @@ def test_infer_freq(engine, tsh):
 def test_point_deletion(engine, tsh):
     ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
     ts_begin.iloc[-1] = np.nan
-    tsh.update(engine, ts_begin, 'ts_del', 'test')
+    tsh.update(engine, ts_begin, 'ts_del', 'test', keepnans=True)
 
     _, ts = Postgres(engine, tsh, 'ts_del').find()
     assert ts.iloc[-3] == 8.0

          
@@ 1015,7 1026,7 @@ def test_point_deletion(engine, tsh):
     ts_begin.iloc[0] = np.nan
     ts_begin.iloc[3] = np.nan
 
-    tsh.update(engine, ts_begin, 'ts_del', 'test')
+    tsh.update(engine, ts_begin, 'ts_del', 'test', keepnans=True)
 
     assert_df("""
 2010-01-02    1.0

          
@@ 1061,7 1072,7 @@ 2010-01-10     9.0
     ts_string.iloc[4] = None
     ts_string.iloc[5] = None
 
-    tsh.update(engine, ts_string, 'ts_string_del', 'test')
+    tsh.update(engine, ts_string, 'ts_string_del', 'test', keepnans=True)
     assert_df("""
 2010-01-01    machin
 2010-01-02    machin

          
@@ 1101,7 1112,7 @@ 2010-01-10    machin
 
     ts_string[ts_string.index] = np.nan
     with pytest.raises(ValueError):
-        tsh.update(engine, ts_string, 'ts_string_del', 'test')
+        tsh.update(engine, ts_string, 'ts_string_del', 'test', keepnans=True)
 
 
 def test_nan_first(engine, tsh):

          
@@ 1177,7 1188,7 @@ 2010-01-15     NaN
 
     ts_begin.iloc[:] = np.nan
     with pytest.raises(ValueError):
-        tsh.update(engine, ts_begin, 'ts_full_del', 'test')
+        tsh.update(engine, ts_begin, 'ts_full_del', 'test', keepnans=True)
 
     ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
     tsh.update(engine, ts_end, 'ts_full_del', 'test')

          
@@ 1191,7 1202,7 @@ 2010-01-15     NaN
                          index=ts_begin.index)
 
     with pytest.raises(ValueError):
-        tsh.update(engine, ts_begin, 'ts_full_del_str', 'test')
+        tsh.update(engine, ts_begin, 'ts_full_del_str', 'test', keepnans=True)
 
     ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'], dtype='object')
     tsh.update(engine, ts_end, 'ts_full_del_str', 'test')

          
@@ 1214,7 1225,8 @@ def test_deletion_over_horizon(engine, t
     )
 
     tsh.update(engine, ts, name, 'Celeste',
-               insertion_date=idate.replace(day=2))
+               insertion_date=idate.replace(day=2),
+               keepnans=True)
     ival = tsh.interval(engine, name)
     assert ival.left == datetime(2018, 1, 1)
     assert ival.right == datetime(2018, 1, 2)

          
@@ 1224,7 1236,8 @@ def test_deletion_over_horizon(engine, t
         index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
     )
     tsh.update(engine, ts, name, 'Arthur',
-               insertion_date=idate.replace(day=3))
+               insertion_date=idate.replace(day=3),
+               keepnans=True)
     ival = tsh.interval(engine, name)
     assert ival.left == datetime(2018, 1, 2)
     assert ival.right == datetime(2018, 1, 2)

          
@@ 1467,7 1480,8 @@ 2015-01-22 00:00:00+00:00  2015-01-20 00
         if idx == 2:
             serie.iloc[-1] = np.nan
         tsh.update(engine, serie, 'with_na', 'arnaud',
-                   insertion_date=idate)
+                   insertion_date=idate,
+                   keepnans=True)
 
     # the value at 2015-01-22 is hidden by the inserted nan
     assert_df("""

          
@@ 1550,7 1564,7 @@ def test_add_na(engine, tsh):
     ts_nan[[True] * len(ts_nan)] = np.nan
     ts_nan = pd.concat([ts_begin, ts_nan])
 
-    diff = tsh.update(engine, ts_nan, 'ts_add_na', 'test')
+    diff = tsh.update(engine, ts_nan, 'ts_add_na', 'test', keepnans=True)
     assert len(diff) == 5
 
     result = tsh.get(engine, 'ts_add_na')

          
@@ 2588,7 2602,7 @@ def test_na_at_boundaries(engine, tsh):
     ts = pd.Series([np.nan] * 3 + [3] * 5 + [np.nan] * 2,
                    index=pd.date_range(start=datetime(2010, 1, 10),
                                        freq='D', periods=10))
-    tsh.update(engine, ts, 'test_nan', 'test')
+    tsh.update(engine, ts, 'test_nan', 'test', keepnans=True)
     result = tsh.get(engine, 'test_nan')
     assert_df("""
 2010-01-13    3.0

          
@@ 2644,7 2658,7 @@ 2010-01-19    NaN
     ts = pd.Series([np.nan] * 4 + [5] * 3 + [np.nan] * 3,
                    index=pd.date_range(start=datetime(2010, 1, 10),
                                        freq='D', periods=10))
-    tsh.update(engine, ts, 'test_nan', 'test')
+    tsh.update(engine, ts, 'test_nan', 'test', keepnans=True)
     result = tsh.get(engine, 'test_nan', _keep_nans=True)
     assert_df("""
 2010-01-10    NaN

          
M tshistory/api.py +7 -1
@@ 110,6 110,7 @@ class mainsource:
                author: str,
                metadata: Optional[dict]=None,
                insertion_date: Optional[datetime]=None,
+               keepnans: Optional[bool]=False,
                **kw) -> Optional[pd.Series]:
         """Update a series named by <name> with the input pandas series.
 

          
@@ 121,7 122,7 @@ class mainsource:
         no change, None is returned and no new version is created.
 
         New points are added, changed points are changed,
-        points with NaN are considered to be _erased_.
+        points with NaN are _erased_.
 
         The `author` is mandatory.
         The `metadata` dictionary allows to associate any metadata

          
@@ 130,6 131,10 @@ class mainsource:
         It is possible to force an `insertion_date`, which can only be
         higher than the previous `insertion_date`.
 
+        The `keepnans` flag will treat, if true, the Nans values as
+        erasures. If false, we do a dropna() call on the series before
+        the actual update.
+
         .. highlight:: python
         .. code-block:: python
 

          
@@ 168,6 173,7 @@ class mainsource:
                 author,
                 metadata=metadata,
                 insertion_date=insertion_date,
+                keepnans=keepnans,
                 **kw
             )
 

          
M tshistory/http/client.py +4 -1
@@ 147,6 147,7 @@ class httpclient:
     def _insert(self, name, series, author,
                 metadata=None,
                 insertion_date=None,
+                keepnans=False,
                 supervision=False,
                 replace=False):
         guard_insert(

          
@@ 158,6 159,7 @@ class httpclient:
             'name': name,
             'author': author,
             'insertion_date': insertion_date.isoformat() if insertion_date else None,
+            'keepnans': keepnans,
             'tzaware': meta['tzaware'],
             'replace': replace,
             'supervision': supervision,

          
@@ 184,11 186,12 @@ class httpclient:
 
     @unwraperror
     def update(self, name, series, author,
-               metadata=None, insertion_date=None, manual=False):
+               metadata=None, insertion_date=None, keepnans=False, manual=False):
         return self._insert(
             name, series, author,
             metadata=metadata,
             insertion_date=insertion_date,
+            keepnans=keepnans,
             supervision=manual
         )
 

          
M tshistory/http/server.py +11 -0
@@ 64,6 64,10 @@ update.add_argument(
     help='insertion date can be forced'
 )
 update.add_argument(
+    'keepnans', type=inputs.boolean, default=False,
+    help='treat nans as point erasure or drop them'
+)
+update.add_argument(
     'tzaware', type=inputs.boolean, default=True,
     help='tzaware series'
 )

          
@@ 684,6 688,7 @@ class httpapi:
                             args.name, series, args.author,
                             metadata=args.metadata,
                             insertion_date=args.insertion_date,
+                            keepnans=args.keepnans,
                             manual=args.supervision
                         )
                 except ValueError as err:

          
@@ 694,6 699,12 @@ class httpapi:
                 if args.tzaware and args.tzone != 'UTC':
                     diff.index = diff.index.tz_convert(args.tzone)
 
+                if diff is None:
+                    # nothing happened
+                    # possible cause is sending nans without erasure flag
+                    # on creation
+                    return no_content()
+
                 return series_response(
                     args.format,
                     diff,

          
M tshistory/migrate.py +8 -0
@@ 216,6 216,14 @@ def migrate_add_diffstart_diffend(engine
             f'alter table "{namespace}.revision"."{tablename}" '
             f'alter column diffend set not null'
         )
+        cn.execute(
+            f'alter table "{namespace}.revision"."{tablename}" '
+            f'alter column tsstart drop not null'
+        )
+        cn.execute(
+            f'alter table "{namespace}.revision"."{tablename}" '
+            f'alter column tsend drop not null'
+        )
 
     def partition(alist, size):
         for i in range(0, len(alist), size):

          
M tshistory/series.sql +2 -2
@@ 13,8 13,8 @@ create index on "{namespace}.snapshot"."
 
 create table "{namespace}.revision"."{tablename}" (
   id serial primary key,
-  tsstart timestamp not null, -- whole series index min
-  tsend timestamp not null,   -- whole series index max
+  tsstart timestamp, -- whole series index min
+  tsend timestamp,   -- whole series index max
   diffstart timestamptz not null, -- diff index min
   diffend timestamptz not null,   -- diff index min
   snapshot integer references "{namespace}.snapshot"."{tablename}"(id),

          
M tshistory/tsio.py +10 -5
@@ 73,6 73,7 @@ class timeseries:
     def update(self, cn, updatets, name, author,
                metadata=None,
                insertion_date=None,
+               keepnans=False,
                **k):
         """Create a new revision of a given time series
         with update semantics:

          
@@ 109,6 110,9 @@ class timeseries:
                 'datetime' in str(updatets.index.dtype) and not
                 isinstance(updatets.index, pd.MultiIndex))
 
+        if not keepnans:
+            updatets = updatets.dropna()
+
         if tablename is None:
             seriesmeta = self._series_initial_meta(cn, name, updatets)
             return self._create(cn, updatets, name, author, seriesmeta,

          
@@ 879,12 883,10 @@ class timeseries:
 
     def _create(self, cn, newts, name, author, seriesmeta,
                 metadata=None, insertion_date=None):
-        start, end = start_end(newts, notz=False)
-        if start is None:
-            assert end is None
-            # this is just full of nans
+        if not len(newts):
             return None
 
+        start, end = start_end(newts, notz=False)
         # at creation time we take an exclusive lock to avoid
         # a deadlock on created tables against the changeset-series fk
         cn.execute(

          
@@ 899,7 901,7 @@ class timeseries:
         start, end = start_end(newts)
 
         self._new_revision(
-            cn, name, head, start, end, start, end,
+            cn, name, head, start, end, newts.index.min(), newts.index.max(),
             author, insertion_date, metadata
         )
 

          
@@ 909,6 911,9 @@ class timeseries:
 
     def _update(self, cn, newts, name, author,
                 metadata=None, insertion_date=None):
+        if not len(newts):
+            return empty_series(self.tzaware(cn, name))
+
         self._validate(cn, newts, name)
 
         snapshot = self.storageclass(cn, self, name)