Skip to content

Commit

Permalink
334 unexpected timezones in case of different timezones in components…
Browse files Browse the repository at this point in the history
… and times (#336)

* added extra timezone assertions

* make timezone of times instead of components leading in hatyan.prediction()

* allow for UTC+1 timezone next to pytz(60) timezone in write_dia_ts and write_dia_HWLW

* allow tzaware comp with tznaive times for backwards compatibility

* updated whatsnew

* simplified logging

* solved to_numeric() and logger.warn() deprecationwarnings
  • Loading branch information
veenstrajelmer authored Sep 4, 2024
1 parent 7465b2a commit 4df6e7f
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 33 deletions.
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

### Fix
- repaired support for equidistant multiblock diafiles with varying timesteps in [#314](https://github.com/Deltares/hatyan/pull/314)
- fixed passing of timezones in case of mixed timezones in comp and times in `hatyan.prediction()` in [#336](https://github.com/Deltares/hatyan/pull/336)


## 2.8.0 (2024-05-08)
Expand Down
50 changes: 35 additions & 15 deletions hatyan/analysis_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,17 +425,32 @@ def prediction_singleperiod(comp:pd.DataFrame, times:pd.DatetimeIndex, hatyan_se
raise TypeError(f'times argument can be of type pd.DatetimeIndex or slice, not {type(times)}')
times_pred_all_pdDTI = times

# localize times datetimeindex, first convert times to tzone of components, then drop timezone
if times_pred_all_pdDTI.tz is not None:
tzone_pred = times_pred_all_pdDTI.tz

if tzone_pred is None and tzone_comp is not None:
times_pred_all_pdDTI = times_pred_all_pdDTI.tz_localize(tzone_comp)
tzone_pred = tzone_comp
logger.warning("provided times are timezone-naive and provided components are "
"timezone-aware. The times are being interpreted as if they would "
f"have the same timezone as the components: {tzone_comp}")
tzone_convert = True
elif tzone_pred is None and tzone_comp is None:
tzone_convert = False
elif tzone_pred is not None and tzone_comp is not None:
tzone_convert = True
else:
raise ValueError("provided times are timezone-aware and components are timezone-naive, "
"this cannot be processed.")

# remove timezone from prediction times: first convert times to tzone of components, then make timezone naive
if tzone_convert:
times_pred_all_pdDTI = times_pred_all_pdDTI.tz_convert(tzone_comp)
times_pred_all_pdDTI = times_pred_all_pdDTI.tz_localize(None)

message = (f'components used = {len(comp)}\n'
f'tstart = {times_pred_all_pdDTI[0].strftime("%Y-%m-%d %H:%M:%S")}\n'
f'tstop = {times_pred_all_pdDTI[-1].strftime("%Y-%m-%d %H:%M:%S")}')
if hasattr(times_pred_all_pdDTI,'freq'):
message += f'\ntimestep = {times_pred_all_pdDTI.freq}'
logger.info(message)
logger.info(f'components used = {len(comp)}\n'
f'tstart = {times_pred_all_pdDTI[0].strftime("%Y-%m-%d %H:%M:%S")}\n'
f'tstop = {times_pred_all_pdDTI[-1].strftime("%Y-%m-%d %H:%M:%S")}\n'
f'timestep = {times_pred_all_pdDTI.freq}')

# middle of analysis period (2july in case of 1jan-1jan), zoals bij hatyan.
dood_date_mid = times_pred_all_pdDTI[[len(times_pred_all_pdDTI)//2]]
Expand Down Expand Up @@ -477,8 +492,10 @@ def prediction_singleperiod(comp:pd.DataFrame, times:pd.DatetimeIndex, hatyan_se
ts_prediction_pd = pd.DataFrame({'values': ht_res},index=times_pred_all_pdDTI)
logger.info('PREDICTION finished')

# add timezone to timeseries again
ts_prediction_pd.index = ts_prediction_pd.index.tz_localize(tzone_comp)
# add timezone to prediction: first interpret times as tzone of components, then convert to timezone of prediction
if tzone_convert:
ts_prediction_pd = ts_prediction_pd.tz_localize(tzone_comp)
ts_prediction_pd = ts_prediction_pd.tz_convert(tzone_pred)

return ts_prediction_pd

Expand All @@ -487,8 +504,9 @@ def prediction_singleperiod(comp:pd.DataFrame, times:pd.DatetimeIndex, hatyan_se
def prediction(comp, times=None, timestep=None):
"""
generates a tidal prediction from a set of components A and phi values.
The component set has the same timezone as the timeseries used to create it,
therefore the resulting prediction will also be in that original timezone.
The component set has the same timezone as the timeseries used to create it.
If times is timezone-naive the resulting prediction will be in component timezone.
If times is timezone-aware the resulting prediction will be converted to that timezone.
If a components dataframe contains multiple column levels (multiple periods),
The prediction is a concatenation of predictions of all periods (based on the respective A/phi values).
Expand Down Expand Up @@ -537,15 +555,17 @@ def prediction(comp, times=None, timestep=None):
comp_oneyear = comp.loc[:,(slice(None),period_dt)]
comp_oneyear.columns = comp_oneyear.columns.droplevel(1)
if period_dt.freqstr in ['A-DEC','Y-DEC']: #year frequency
tstart = pd.Timestamp(period_dt.year,1,1)
tstop = pd.Timestamp(period_dt.year+1,1,1) - pd.Timedelta(tstep)
tstart = pd.Timestamp(period_dt.year, 1, 1)
tstop = pd.Timestamp(period_dt.year+1, 1, 1) - pd.Timedelta(tstep)
elif period_dt.freqstr in ['M']: #month frequency
tstart = period_dt.to_timestamp()
tstop = period_dt.to_timestamp() + pd.Timedelta(days=period_dt.days_in_month) - pd.Timedelta(tstep)
else:
raise Exception(f'unknown freqstr: {period_dt.freqstr}')
# generate date range and do prediction
times_pred = pd.date_range(start=tstart, end=tstop, freq=tstep, unit="us")
metadata_comp = metadata_from_obj(comp)
tzone_comp = metadata_comp.pop('tzone', None)
times_pred = pd.date_range(start=tstart, end=tstop, freq=tstep, unit="us", tz=tzone_comp)
ts_prediction_oneperiod = prediction_singleperiod(comp=comp_oneyear, times=times_pred, hatyan_settings=hatyan_settings)
ts_prediction_perperiod_list.append(ts_prediction_oneperiod)
ts_prediction = pd.concat(ts_prediction_perperiod_list)
Expand Down
4 changes: 2 additions & 2 deletions hatyan/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ def write_dia_ts(ts, filename, headerformat='dia'):
if quantity != 'WATHTBRKD': #TODO: remove this after hardcoding in this function is fixed
raise ValueError(f'write_dia() expects quantity WATHTBRKD, but {quantity} was provided.')
tzone = ts.index.tz
if tzone != pytz.FixedOffset(60):
if tzone not in [pytz.FixedOffset(60), dt.timezone(dt.timedelta(seconds=3600))]:
raise ValueError(f'write_dia() expects tzone pytz.FixedOffset(60) (since tzone is not defined in dia-header), but {tzone} was provided.')

if vertref == 'NAP':
Expand Down Expand Up @@ -888,7 +888,7 @@ def write_dia_HWLW(ts_ext, filename, headerformat='dia'):
if quantity != 'WATHTBRKD': #TODO: remove this after hardcoding in this function is fixed
raise ValueError(f'write_dia() expects quantity WATHTBRKD, but {quantity} was provided.')
tzone = ts_ext.index.tz
if tzone != pytz.FixedOffset(60):
if tzone not in [pytz.FixedOffset(60), dt.timezone(dt.timedelta(seconds=3600))]:
raise ValueError(f'write_dia() expects tzone pytz.FixedOffset(60) (since tzone is not defined in dia-header), but {tzone} was provided.')

if vertref == 'NAP':
Expand Down
4 changes: 2 additions & 2 deletions tests/examples/export_freq_v0uf_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def get_hatyan55_values(file_hatyan55):
drop_idx = drop_idx+list(range(x,y+1))

hatyan55_freq = hatyan55_freq_raw.drop(drop_idx).reset_index(drop=True)
hatyan55_freq = hatyan55_freq.apply(pd.to_numeric,errors='ignore')
hatyan55_freq = hatyan55_freq.set_index('NAAM')
hatyan55_freq = hatyan55_freq.apply(pd.to_numeric)
hatyan55_freq.index.name=None

#######################
Expand All @@ -89,7 +89,7 @@ def get_hatyan55_values(file_hatyan55):
drop_idx = drop_idx+list(range(x,y+1))

hatyan55_v0uf_1y = hatyan55_v0uf_raw_1y.drop(drop_idx).reset_index(drop=True)
hatyan55_v0uf_1y = hatyan55_v0uf_1y.apply(pd.to_numeric,errors='ignore')
hatyan55_v0uf_1y = hatyan55_v0uf_1y.apply(pd.to_numeric)
hatyan55_v0uf_1y.index = hatyan55_freq.index

hatyan55_v0u[year] = hatyan55_v0uf_1y['VU-FAKTOR']
Expand Down
77 changes: 63 additions & 14 deletions tests/test_analysis_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,30 +506,77 @@ def test_prediction_comp_and_times_different_timezones():
it is possible to supply a timezone via times, but the comp dataframe also contains a timezone already.
The components timezone is leading, but the times will be converted to that timezone also.
From the below test, both predictions are therefore UTC+01:00, but the times are shifted
https://github.com/Deltares/hatyan/issues/334
"""

current_station = 'VLISSGN'

const_list = hatyan.get_const_list_hatyan('year') # 94 constituents

file_data_comp0 = os.path.join(dir_testdata,f'{current_station}_obs1.txt')
file_comp = os.path.join(dir_testdata,f'{current_station}_obs1.txt')
ts_meas = hatyan.read_dia(filename=file_comp, station=current_station)

times_pred1 = slice("2019-01-01","2020-01-01", "10min")
times_pred2 = pd.date_range("2019-01-01","2020-01-01", freq="10min", unit="us", tz="UTC+02:00")

ts_measurements_group0 = hatyan.read_dia(filename=file_data_comp0, station=current_station)
comp_naive = hatyan.analysis(ts=ts_meas, const_list=const_list)
comp_naive.attrs["tzone"] = None
comp_met = hatyan.analysis(ts=ts_meas, const_list=const_list)

comp_frommeasurements_avg_group0 = hatyan.analysis(ts=ts_measurements_group0, const_list=const_list)
times_naive = slice("2019-01-01","2020-01-01", "10min")
times_met = pd.date_range("2019-01-01","2020-01-01", freq="10min", tz="UTC+01:00")
times_utc = pd.date_range("2019-01-01","2020-01-01", freq="10min", tz="UTC+00:00")

#prediction and validation
ts_prediction1 = hatyan.prediction(comp=comp_frommeasurements_avg_group0, times=times_pred1)
ts_prediction2 = hatyan.prediction(comp=comp_frommeasurements_avg_group0, times=times_pred2)
pred_naive = hatyan.prediction(comp=comp_naive, times=times_naive)
pred_met = hatyan.prediction(comp=comp_met, times=times_met)
pred_utc = hatyan.prediction(comp=comp_met, times=times_utc)

assert ts_prediction1.index.tz == pytz.FixedOffset(60)
assert ts_prediction2.index.tz == pytz.FixedOffset(60)
assert ts_prediction1.index[0] == pd.Timestamp('2019-01-01 00:00:00 +01:00')
assert ts_prediction2.index[0] == pd.Timestamp('2018-12-31 23:00:00 +01:00')
assert ((ts_prediction1-ts_prediction2).dropna()["values"] < 1e-9).all()
assert pred_naive.index.tz is None
assert pred_naive.index[0] == pd.Timestamp('2019-01-01 00:00:00')
assert pred_met.index[0] == pd.Timestamp('2019-01-01 00:00:00+0100')
assert pred_met.index.tz == dt.timezone(dt.timedelta(seconds=3600))
assert pred_met.index[0] == pd.Timestamp('2019-01-01 00:00:00+0100')
assert pred_utc.index.tz == dt.timezone.utc
assert pred_utc.index[0] == pd.Timestamp('2019-01-01 00:00:00+0000')
assert ((pred_naive - pred_met.tz_localize(None)).dropna()["values"] < 1e-9).all()
assert ((pred_utc - pred_met).dropna()["values"] < 1e-9).all()


@pytest.mark.unittest
def test_prediction_times_tznaive_comp_tzaware(caplog):
"""
https://github.com/Deltares/hatyan/issues/334
"""
comp = pd.DataFrame({"A": [1, 0.5, 0.2],
"phi_deg": [10,15,20]},
index=["M2","M4","S2"])
comp.attrs["nodalfactors"] = True
comp.attrs["fu_alltimes"] = True
comp.attrs["xfac"] = False
comp.attrs["source"] = "schureman"
comp.attrs["tzone"] = "UTC+01:00"
dtindex = pd.date_range("2020-01-01","2020-01-02", freq="10min")
hatyan.prediction(comp, times=dtindex)
warning_text = ("provided times are timezone-naive and provided components are "
"timezone-aware. The times are being interpreted as if they would "
"have the same timezone as the components: UTC+01:00")
assert warning_text in caplog.text


@pytest.mark.unittest
def test_prediction_raise_mixed_tznaive_tzaware():
"""
https://github.com/Deltares/hatyan/issues/334
"""
comp = pd.DataFrame({"A": [1, 0.5, 0.2],
"phi_deg": [10,15,20]},
index=["M2","M4","S2"])
comp.attrs["nodalfactors"] = True
comp.attrs["fu_alltimes"] = True
comp.attrs["xfac"] = False
comp.attrs["source"] = "schureman"
comp.attrs["tzone"] = None
dtindex = pd.date_range("2020-01-01 00:00 +00:00","2020-01-02 00:00 +00:00", freq="10min")
with pytest.raises(ValueError) as e:
hatyan.prediction(comp, times=dtindex)
assert "provided times are timezone-aware and components are timezone-naive, this cannot be processed." in str(e.value)


@pytest.mark.unittest
Expand Down Expand Up @@ -571,6 +618,7 @@ def test_prediction_perperiod_month():
assert np.allclose(ts_pred_atonce["values"].values[:10], expected_atonce)
assert np.allclose(ts_pred_allmonths["values"].values[:10], expected_allmonths)
assert len(ts_pred_atonce) == len(ts_pred_allmonths)
assert ts_pred_allmonths.index.tz == pytz.FixedOffset(60)


@pytest.mark.unittest
Expand All @@ -594,6 +642,7 @@ def test_prediction_perperiod_year():
assert np.allclose(ts_pred_atonce["values"].values[:10], expected_atonce)
assert np.allclose(ts_pred_allyears["values"].values[:10], expected_allyears)
assert len(ts_pred_atonce) == len(ts_pred_allyears)
assert ts_pred_allyears.index.tz == pytz.FixedOffset(60)


@pytest.mark.unittest
Expand Down

0 comments on commit 4df6e7f

Please sign in to comment.