Skip to content

Commit 3560b00

Browse files
committed
1 parent 3a51c41 commit 3560b00

File tree

1 file changed

+88
-6
lines changed

1 file changed

+88
-6
lines changed

babel/dates.py

Lines changed: 88 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME):
718718
return parse_pattern(format).apply(time, locale)
719719

720720

721-
def format_skeleton(skeleton, datetime=None, tzinfo=None, locale=LC_TIME):
721+
def format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=LC_TIME):
722722
r"""Return a time and/or date formatted according to the given pattern.
723723
724724
The skeletons are defined in the CLDR data and provide more flexibility
@@ -731,6 +731,9 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, locale=LC_TIME):
731731
u'dim. 1 avr.'
732732
>>> format_skeleton('MMMEd', t, locale='en')
733733
u'Sun, Apr 1'
734+
>>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used
735+
u'1.4.2007'
736+
734737
735738
After the skeleton is resolved to a pattern `format_datetime` is called so
736739
all timezone processing etc is the same as for that.
@@ -739,9 +742,13 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, locale=LC_TIME):
739742
:param datetime: the ``time`` or ``datetime`` object; if `None`, the current
740743
time in UTC is used
741744
:param tzinfo: the time-zone to apply to the time for display
745+
:param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
746+
close enough to it.
742747
:param locale: a `Locale` object or a locale identifier
743748
"""
744749
locale = Locale.parse(locale)
750+
if fuzzy and skeleton not in locale.datetime_skeletons:
751+
skeleton = match_skeleton(skeleton, locale.datetime_skeletons)
745752
format = locale.datetime_skeletons[skeleton]
746753
return format_datetime(datetime, format, tzinfo, locale)
747754

@@ -873,7 +880,7 @@ def _format_fallback_interval(start, end, skeleton, tzinfo, locale):
873880
)
874881

875882

876-
def format_interval(start, end, skeleton, tzinfo=None, locale=LC_TIME):
883+
def format_interval(start, end, skeleton, tzinfo=None, fuzzy=True, locale=LC_TIME):
877884
"""
878885
Format an interval between two instants according to the locale's rules.
879886
@@ -896,6 +903,8 @@ def format_interval(start, end, skeleton, tzinfo=None, locale=LC_TIME):
896903
:param end: Second instant (datetime/date/time)
897904
:param skeleton: The "skeleton format" to use for formatting.
898905
:param tzinfo: tzinfo to use (if none is already attached)
906+
:param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
907+
close enough to it.
899908
:param locale: A locale object or identifier.
900909
:return: Formatted interval
901910
"""
@@ -911,16 +920,21 @@ def format_interval(start, end, skeleton, tzinfo=None, locale=LC_TIME):
911920
# > starting in the current locale and then following the locale fallback
912921
# > chain up to, but not including root.
913922

914-
if skeleton not in locale.interval_formats:
923+
interval_formats = locale.interval_formats
924+
925+
if skeleton not in interval_formats:
915926
# > If no match was found from the previous step, check what the closest
916927
# > match is in the fallback locale chain, as in availableFormats. That
917928
# > is, this allows for adjusting the string value field's width,
918929
# > including adjusting between "MMM" and "MMMM", and using different
919930
# > variants of the same field, such as 'v' and 'z'.
920-
# TODO: Implement closest-match instead of immediately falling back
921-
return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
931+
if fuzzy:
932+
skeleton = match_skeleton(skeleton, interval_formats)
933+
if not skeleton: # Still no match whatsoever?
934+
# > Otherwise, format the start and end datetime using the fallback pattern.
935+
return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
922936

923-
skel_formats = locale.interval_formats[skeleton]
937+
skel_formats = interval_formats[skeleton]
924938

925939
start_fmt = DateTimeFormat(start, locale=locale)
926940
end_fmt = DateTimeFormat(end, locale=locale)
@@ -1444,3 +1458,71 @@ def split_interval_pattern(pattern):
14441458
parts[-1].append((tok_type, tok_value))
14451459

14461460
return [untokenize_pattern(tokens) for tokens in parts]
1461+
1462+
1463+
def match_skeleton(skeleton, options, allow_different_fields=False):
1464+
"""
1465+
Find the closest match for the given datetime skeleton among the options given.
1466+
1467+
This uses the rules outlined in the TR35 document.
1468+
1469+
>>> match_skeleton('yMMd', ('yMd', 'yMMMd'))
1470+
'yMd'
1471+
1472+
>>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True)
1473+
'jyMMd'
1474+
1475+
>>> bool(match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False))
1476+
False
1477+
1478+
:param skeleton: The skeleton to match
1479+
:type skeleton: str
1480+
:param options: An iterable of other skeletons to match against
1481+
:type options: Iterable[str]
1482+
:return: The closest skeleton match, or if no match was found, None.
1483+
:rtype: str|None
1484+
"""
1485+
1486+
# TODO: maybe implement pattern expansion?
1487+
1488+
# Based on the implementation in
1489+
# http://source.icu-project.org/repos/icu/icu4j/trunk/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
1490+
1491+
# Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key.
1492+
options = sorted(option for option in options if option)
1493+
1494+
if 'z' in skeleton and not any('z' in option for option in options):
1495+
skeleton = skeleton.replace('z', 'v')
1496+
1497+
get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get
1498+
best_skeleton = None
1499+
best_distance = None
1500+
for option in options:
1501+
get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get
1502+
distance = 0
1503+
for field in PATTERN_CHARS:
1504+
input_width = get_input_field_width(field, 0)
1505+
opt_width = get_opt_field_width(field, 0)
1506+
if input_width == opt_width:
1507+
continue
1508+
if opt_width == 0 or input_width == 0:
1509+
if not allow_different_fields: # This one is not okay
1510+
option = None
1511+
break
1512+
distance += 0x1000 # Magic weight constant for "entirely different fields"
1513+
elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)):
1514+
distance += 0x100 # Magic weight for "text turns into a number"
1515+
else:
1516+
distance += abs(input_width - opt_width)
1517+
1518+
if not option: # We lost the option along the way (probably due to "allow_different_fields")
1519+
continue
1520+
1521+
if not best_skeleton or distance < best_distance:
1522+
best_skeleton = option
1523+
best_distance = distance
1524+
1525+
if distance == 0: # Found a perfect match!
1526+
break
1527+
1528+
return best_skeleton

0 commit comments

Comments
 (0)