Skip to content

Commit 4a992a9

Browse files
authored
Merge pull request #129 from dh-tech/feature/uncertain-numbers
Implement UnInt and UnDelta for uncertain date durations
2 parents b91fe3c + 02da2b0 commit 4a992a9

File tree

5 files changed

+376
-27
lines changed

5 files changed

+376
-27
lines changed

src/undate/converters/calendars/gregorian.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter):
1313
#: calendar
1414
calendar_name: str = "Gregorian"
1515

16-
#: known non-leap year
16+
#: arbitrary known non-leap year
1717
NON_LEAP_YEAR: int = 2022
18+
#: arbitrary known leap year
19+
LEAP_YEAR: int = 2024
1820

1921
def min_month(self) -> int:
2022
"""First month for the Gregorian calendar."""
@@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int:
3840
_, max_day = monthrange(year, month)
3941
else:
4042
# if year and month are unknown, return maximum possible
43+
# TODO: should this return an IntervalRange?
4144
max_day = 31
4245

4346
return max_day

src/undate/date.py

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from enum import IntEnum
2+
from dataclasses import dataclass, replace
3+
import operator
24

35
# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
4-
from typing import Optional, Union
6+
from typing import Optional, Union, Iterable
57

68
import numpy as np
79

@@ -29,6 +31,144 @@ def days(self) -> int:
2931
return int(self.astype("datetime64[D]").astype("int"))
3032

3133

34+
@dataclass
35+
class UnInt:
36+
"""An uncertain integer intended for use with uncertain durations (:class:`UnDelta`),
37+
to convey a range of possible integer values between an upper
38+
and lower bound (both inclusive). Supports comparison, addition and subtraction,
39+
checking if a value is included in the range, and iterating over numbers
40+
included in the range.
41+
"""
42+
43+
lower: int
44+
upper: int
45+
46+
def __post_init__(self):
47+
# validate that lower value is less than upper
48+
if not self.lower < self.upper:
49+
raise ValueError(
50+
f"Lower value ({self.lower}) must be less than upper ({self.upper})"
51+
)
52+
53+
def __iter__(self) -> Iterable:
54+
# yield all integers in range from lower to upper, inclusive
55+
yield from range(self.lower, self.upper + 1)
56+
57+
def __gt__(self, other: object) -> bool:
58+
match other:
59+
case int():
60+
return self.lower > other
61+
case UnInt():
62+
return self.lower > other.upper
63+
case _:
64+
return NotImplemented
65+
66+
def __lt__(self, other: object) -> bool:
67+
match other:
68+
case int():
69+
return self.upper < other
70+
case UnInt():
71+
return self.upper < other.lower
72+
case _:
73+
return NotImplemented
74+
75+
def __contains__(self, other: object) -> bool:
76+
match other:
77+
case int():
78+
return other >= self.lower and other <= self.upper
79+
case UnInt():
80+
return other.lower >= self.lower and other.upper <= self.upper
81+
case _:
82+
# unsupported type: return false
83+
return False
84+
85+
def _replace_with(self, other_lower, other_upper, op):
86+
"""Create and return a new instance of UnInt using the specified
87+
operator (e.g. add, subtract) and other values to modify the values in
88+
the current UnInt instance."""
89+
return replace(
90+
self, lower=op(self.lower, other_lower), upper=op(self.upper, other_upper)
91+
)
92+
93+
def __add__(self, other: object) -> "UnInt":
94+
match other:
95+
case int():
96+
# increase both values by the added amount
97+
add_values = (other, other)
98+
case UnInt():
99+
# add other lower value to current lower and other upper
100+
# to current upper to include the largest range of possible values
101+
# (when calculating with uncertain values, the uncertainty increases)
102+
add_values = (other.lower, other.upper)
103+
case _:
104+
return NotImplemented
105+
106+
return self._replace_with(*add_values, operator.add)
107+
108+
def __sub__(self, other) -> "UnInt":
109+
match other:
110+
case int():
111+
# decrease both values by the subtracted amount
112+
sub_values = (other, other)
113+
case UnInt():
114+
# to determine the largest range of possible values,
115+
# subtract the other upper value from current lower
116+
# and other lower value from current upper
117+
sub_values = (other.upper, other.lower)
118+
case _:
119+
return NotImplemented
120+
121+
return self._replace_with(*sub_values, operator.sub)
122+
123+
124+
@dataclass
125+
class UnDelta:
126+
"""
127+
An uncertain timedelta, for durations where the number of days is uncertain.
128+
Initialize with a list of possible durations in days as integers, which are used
129+
to calculate a value for duration in :attr:`days` as an
130+
instance of :class:`UnInt`.
131+
"""
132+
133+
# NOTE: we will probably need other timedelta-like logic here besides days...
134+
135+
#: possible durations days, as an instance of :class:`UnInt`
136+
days: UnInt
137+
138+
def __init__(self, *days: int):
139+
if len(days) < 2:
140+
raise ValueError(
141+
"Must specify at least two values for an uncertain duration"
142+
)
143+
self.days = UnInt(min(days), max(days))
144+
145+
def __repr__(self):
146+
# customize string representation for simpler notation; default
147+
# specifies full UnInt initialization with upper and lower keywords
148+
return f"{self.__class__.__name__}(days=[{self.days.lower},{self.days.upper}])"
149+
150+
def __eq__(self, other: object) -> bool:
151+
# is an uncertain duration ever *equal* another, even if the values are the same?
152+
# for now, make the assumption that we only want identity equality
153+
# and not value equality; perhaps in future we can revisit
154+
# or add functions to check value equality / equivalence / similarity
155+
return other is self
156+
157+
def __lt__(self, other: object) -> bool:
158+
match other:
159+
case Timedelta() | UnDelta():
160+
return self.days < other.days
161+
case _:
162+
return NotImplemented
163+
164+
def __gt__(self, other: object) -> bool:
165+
match other:
166+
case Timedelta() | UnDelta():
167+
return self.days > other.days
168+
case _:
169+
return NotImplemented
170+
171+
32172
#: timedelta for single day
33173
ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1)
34174
#: timedelta for a single year (non-leap year)

src/undate/undate.py

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from typing import Dict, Optional, Union
2121

2222
from undate.converters.base import BaseDateConverter
23-
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta
23+
from undate.date import ONE_DAY, Date, DatePrecision, Timedelta, UnDelta
2424

2525

2626
class Calendar(StrEnum):
@@ -459,13 +459,14 @@ def _get_date_part(self, part: str) -> Optional[str]:
459459
value = self.initial_values.get(part)
460460
return str(value) if value else None
461461

462-
def duration(self) -> Timedelta:
462+
def duration(self) -> Timedelta | UnDelta:
463463
"""What is the duration of this date?
464464
Calculate based on earliest and latest date within range,
465465
taking into account the precision of the date even if not all
466466
parts of the date are known. Note that durations are inclusive
467467
(i.e., a closed interval) and include both the earliest and latest
468-
date rather than the difference between them."""
468+
date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when
469+
possible, and an :class:`undate.date.UnDelta` when the duration is uncertain."""
469470

470471
# if precision is a single day, duration is one day
471472
# no matter when it is or what else is known
@@ -476,25 +477,51 @@ def duration(self) -> Timedelta:
476477
# calculate month duration within a single year (not min/max)
477478
if self.precision == DatePrecision.MONTH:
478479
latest = self.latest
480+
# if year is unknown, calculate month duration in
481+
# leap year and non-leap year, in case length varies
479482
if not self.known_year:
480-
# if year is unknown, calculate month duration in
481-
# a single year
482-
latest = Date(self.earliest.year, self.latest.month, self.latest.day)
483-
484-
# latest = datetime.date(
485-
# self.earliest.year, self.latest.month, self.latest.day
486-
# )
487-
delta = latest - self.earliest + ONE_DAY
488-
# month duration can't ever be more than 31 days
489-
# (could we ever know if it's smaller?)
490-
491-
# if granularity == month but not known month, duration = 31
492-
if delta.astype(int) > 31:
493-
return ONE_MONTH_MAX
494-
return delta
483+
# TODO: should leap-year specific logic shift to the calendars,
484+
# since it works differently depending on the calendar?
485+
possible_years = [
486+
self.calendar_converter.LEAP_YEAR,
487+
self.calendar_converter.NON_LEAP_YEAR,
488+
]
489+
# TODO: handle partially known years like 191X,
490+
# switch to representative years (depends on calendar)
491+
# (to be implemented as part of ambiguous year duration)
492+
else:
493+
# otherwise, get possible durations for all possible months
494+
# for a known year
495+
possible_years = [self.earliest.year]
496+
497+
# for every possible month and year, get max days for that month,
498+
possible_max_days = set()
499+
# appease mypy, which says month values could be None here;
500+
# Date object allows optional month, but earliest/latest initialization
501+
# should always be day-precision dates
502+
if self.earliest.month is not None and self.latest.month is not None:
503+
for possible_month in range(self.earliest.month, self.latest.month + 1):
504+
for year in possible_years:
505+
possible_max_days.add(
506+
self.calendar_converter.max_day(year, possible_month)
507+
)
508+
509+
# if there is more than one possible value for month length,
510+
# whether due to leap year / non-leap year or ambiguous month,
511+
# return an uncertain delta
512+
if len(possible_max_days) > 1:
513+
return UnDelta(*possible_max_days)
514+
515+
# otherwise, calculate timedelta normally based on maximum day
516+
max_day = list(possible_max_days)[0]
517+
latest = Date(self.earliest.year, self.earliest.month, max_day)
518+
519+
return latest - self.earliest + ONE_DAY
520+
521+
# TODO: handle year precision + unknown/partially known year
522+
# (will be handled in separate branch)
495523

496524
# otherwise, calculate based on earliest/latest range
497-
498525
# subtract earliest from latest and add a day to count start day
499526
return self.latest - self.earliest + ONE_DAY
500527

0 commit comments

Comments
 (0)