c6a0fb38630a draft — Arnaud Campeas[arnaud.campeas@pythonian.fr] tip 2 months ago
inferred_freq: use union of index to keep irregular values
2 files changed, 94 insertions(+), 1 deletions(-)

M test/test_api.py
M tshistory/util.py
M test/test_api.py +4 -1
@@ 278,15 278,18 @@ 2024-01-06 06:00:00+00:00    5.0
 
     # when called with the option 'inferred_freq'
     # new index are created (good)
-    # but some values disappeared (bad)
+    # and the old ones are also kept (also good)
     ts = tsx.get('irregular_freq', inferred_freq=True)
     assert_df("""
 2024-01-01 00:00:00+00:00    0.0
 2024-01-02 00:00:00+00:00    1.0
 2024-01-03 00:00:00+00:00    2.0
 2024-01-04 00:00:00+00:00    NaN
+2024-01-04 06:00:00+00:00    3.0
 2024-01-05 00:00:00+00:00    NaN
+2024-01-05 06:00:00+00:00    4.0
 2024-01-06 00:00:00+00:00    NaN
+2024-01-06 06:00:00+00:00    5.0
 """, ts)
 
 

          
M tshistory/util.py +90 -0
@@ 448,6 448,96 @@ def parse_delta(td):
         minutes=int(minutes), seconds=int(seconds)
     )
 
+    old_index = ts.index
+    freq = infer_freq(ts)[0]
+    tzaware = ts_start.tz is not None
+    to_value_date = compatible_date(tzaware, to_value_date)
+    from_value_date = compatible_date(tzaware, from_value_date)
+
+    if from_value_date is None and to_value_date is None:
+        new_index = pd.date_range(
+            start=ts_start,
+            end=ts_end,
+            freq=freq
+        )
+        return ts.reindex(new_index.union(old_index))
+
+    if from_value_date is None:
+        new_index = pd.date_range(
+            start=ts_start,
+            end=to_value_date,
+            freq=freq
+        )
+        return ts.reindex(new_index.union(old_index))
+
+    if to_value_date is None:
+        new_index = pd.date_range(
+            start=ts_end,
+            end=from_value_date,
+            freq=-freq
+        ).sort_values()
+        return ts.reindex(new_index.union(old_index))
+
+    # we have to build the index in two parts
+    new_index = pd.date_range(
+        start=ts_start,
+        end=to_value_date,
+        freq=freq
+    )
+    complement = pd.date_range(
+        start=ts_start,
+        end=from_value_date,
+        freq=-freq
+    )
+    new_index = new_index.union(complement).sort_values()
+    return ts.reindex(new_index.union(old_index))
+
+
+def guard_insert(newts, name, author, metadata, insertion_date):
+    assert len(name), 'Name is an empty string'
+    assert isinstance(author, str), 'Author is not a string'
+    assert metadata is None or isinstance(metadata, dict), (
+        f'Bad format for metadata ({repr(metadata)})'
+    )
+    assert (insertion_date is None or
+            isinstance(insertion_date, datetime)), 'Bad format for insertion date'
+    assert isinstance(newts, pd.Series), 'Not a pd.Series'
+    index = newts.index
+    assert isinstance(index, pd.DatetimeIndex), 'You must provide a DatetimeIndex'
+    assert not index.duplicated().any(), 'There are some duplicates in the index'
+
+    assert index.notna().all(), 'The index contains NaT entries'
+    if index.tz is not None:
+        newts.index = index.tz_convert('UTC')
+    if not index.is_monotonic_increasing:
+        newts = newts.sort_index()
+
+    return num2float(newts)
+
+
+def guard_query_dates(*dates):
+    assert all(
+        isinstance(dt, datetime)
+        for dt in filter(None, dates)
+    ), 'all query dates must be datetime-compatible objects'
+
+
+# timedelta (de)serialisation
+
+def delta_isoformat(td):
+    return f'P{td.days}DT0H0M{td.seconds}S'
+
+
+_DELTA = re.compile('P(.*)DT(.*)H(.*)M(.*)S')
+def parse_delta(td):
+    match = _DELTA.match(td)
+    if not match:
+        raise Exception(f'unparseable time delta `{td}`')
+    days, hours, minutes, seconds = match.groups()
+    return pd.Timedelta(
+        days=int(days), hours=int(hours),
+        minutes=int(minutes), seconds=int(seconds)
+    )
 
 # metadata