@@ -718,7 +718,7 @@ def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME):
718718 return parse_pattern (format ).apply (time , locale )
719719
720720
721- def format_skeleton (skeleton , datetime = None , tzinfo = None , locale = LC_TIME ):
721+ def format_skeleton (skeleton , datetime = None , tzinfo = None , fuzzy = True , locale = LC_TIME ):
722722 r"""Return a time and/or date formatted according to the given pattern.
723723
724724 The skeletons are defined in the CLDR data and provide more flexibility
@@ -731,6 +731,9 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, locale=LC_TIME):
731731 u'dim. 1 avr.'
732732 >>> format_skeleton('MMMEd', t, locale='en')
733733 u'Sun, Apr 1'
734+ >>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used
735+ u'1.4.2007'
736+
734737
735738 After the skeleton is resolved to a pattern `format_datetime` is called so
736739 all timezone processing etc is the same as for that.
@@ -739,9 +742,13 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, locale=LC_TIME):
739742 :param datetime: the ``time`` or ``datetime`` object; if `None`, the current
740743 time in UTC is used
741744 :param tzinfo: the time-zone to apply to the time for display
745+ :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
746+ close enough to it.
742747 :param locale: a `Locale` object or a locale identifier
743748 """
744749 locale = Locale .parse (locale )
750+ if fuzzy and skeleton not in locale .datetime_skeletons :
751+ skeleton = match_skeleton (skeleton , locale .datetime_skeletons )
745752 format = locale .datetime_skeletons [skeleton ]
746753 return format_datetime (datetime , format , tzinfo , locale )
747754
@@ -873,7 +880,7 @@ def _format_fallback_interval(start, end, skeleton, tzinfo, locale):
873880 )
874881
875882
876- def format_interval (start , end , skeleton , tzinfo = None , locale = LC_TIME ):
883+ def format_interval (start , end , skeleton , tzinfo = None , fuzzy = True , locale = LC_TIME ):
877884 """
878885 Format an interval between two instants according to the locale's rules.
879886
@@ -896,6 +903,8 @@ def format_interval(start, end, skeleton, tzinfo=None, locale=LC_TIME):
896903 :param end: Second instant (datetime/date/time)
897904 :param skeleton: The "skeleton format" to use for formatting.
898905 :param tzinfo: tzinfo to use (if none is already attached)
906+ :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
907+ close enough to it.
899908 :param locale: A locale object or identifier.
900909 :return: Formatted interval
901910 """
@@ -911,16 +920,21 @@ def format_interval(start, end, skeleton, tzinfo=None, locale=LC_TIME):
911920 # > starting in the current locale and then following the locale fallback
912921 # > chain up to, but not including root.
913922
914- if skeleton not in locale .interval_formats :
923+ interval_formats = locale .interval_formats
924+
925+ if skeleton not in interval_formats :
915926 # > If no match was found from the previous step, check what the closest
916927 # > match is in the fallback locale chain, as in availableFormats. That
917928 # > is, this allows for adjusting the string value field's width,
918929 # > including adjusting between "MMM" and "MMMM", and using different
919930 # > variants of the same field, such as 'v' and 'z'.
920- # TODO: Implement closest-match instead of immediately falling back
921- return _format_fallback_interval (start , end , skeleton , tzinfo , locale )
931+ if fuzzy :
932+ skeleton = match_skeleton (skeleton , interval_formats )
933+ if not skeleton : # Still no match whatsoever?
934+ # > Otherwise, format the start and end datetime using the fallback pattern.
935+ return _format_fallback_interval (start , end , skeleton , tzinfo , locale )
922936
923- skel_formats = locale . interval_formats [skeleton ]
937+ skel_formats = interval_formats [skeleton ]
924938
925939 start_fmt = DateTimeFormat (start , locale = locale )
926940 end_fmt = DateTimeFormat (end , locale = locale )
@@ -1444,3 +1458,71 @@ def split_interval_pattern(pattern):
14441458 parts [- 1 ].append ((tok_type , tok_value ))
14451459
14461460 return [untokenize_pattern (tokens ) for tokens in parts ]
1461+
1462+
1463+ def match_skeleton (skeleton , options , allow_different_fields = False ):
1464+ """
1465+ Find the closest match for the given datetime skeleton among the options given.
1466+
1467+ This uses the rules outlined in the TR35 document.
1468+
1469+ >>> match_skeleton('yMMd', ('yMd', 'yMMMd'))
1470+ 'yMd'
1471+
1472+ >>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True)
1473+ 'jyMMd'
1474+
1475+ >>> bool(match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False))
1476+ False
1477+
1478+ :param skeleton: The skeleton to match
1479+ :type skeleton: str
1480+ :param options: An iterable of other skeletons to match against
1481+ :type options: Iterable[str]
1482+ :return: The closest skeleton match, or if no match was found, None.
1483+ :rtype: str|None
1484+ """
1485+
1486+ # TODO: maybe implement pattern expansion?
1487+
1488+ # Based on the implementation in
1489+ # http://source.icu-project.org/repos/icu/icu4j/trunk/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
1490+
1491+ # Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key.
1492+ options = sorted (option for option in options if option )
1493+
1494+ if 'z' in skeleton and not any ('z' in option for option in options ):
1495+ skeleton = skeleton .replace ('z' , 'v' )
1496+
1497+ get_input_field_width = dict (t [1 ] for t in tokenize_pattern (skeleton ) if t [0 ] == "field" ).get
1498+ best_skeleton = None
1499+ best_distance = None
1500+ for option in options :
1501+ get_opt_field_width = dict (t [1 ] for t in tokenize_pattern (option ) if t [0 ] == "field" ).get
1502+ distance = 0
1503+ for field in PATTERN_CHARS :
1504+ input_width = get_input_field_width (field , 0 )
1505+ opt_width = get_opt_field_width (field , 0 )
1506+ if input_width == opt_width :
1507+ continue
1508+ if opt_width == 0 or input_width == 0 :
1509+ if not allow_different_fields : # This one is not okay
1510+ option = None
1511+ break
1512+ distance += 0x1000 # Magic weight constant for "entirely different fields"
1513+ elif field == 'M' and ((input_width > 2 and opt_width <= 2 ) or (input_width <= 2 and opt_width > 2 )):
1514+ distance += 0x100 # Magic weight for "text turns into a number"
1515+ else :
1516+ distance += abs (input_width - opt_width )
1517+
1518+ if not option : # We lost the option along the way (probably due to "allow_different_fields")
1519+ continue
1520+
1521+ if not best_skeleton or distance < best_distance :
1522+ best_skeleton = option
1523+ best_distance = distance
1524+
1525+ if distance == 0 : # Found a perfect match!
1526+ break
1527+
1528+ return best_skeleton
0 commit comments