From eccd2151f82ae27fad7e5181f0920a056cb36616 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 31 Oct 2017 15:57:41 -0400 Subject: Initial implementation of isoparse --- dateutil/parser/isoparser.py | 147 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 dateutil/parser/isoparser.py (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py new file mode 100644 index 0000000..36d852e --- /dev/null +++ b/dateutil/parser/isoparser.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, timedelta +from dateutil import tz + +import re + +class Isoparser(object): + def __init__(self, sep='T'): + if len(sep) != 1: + raise ValueError('Separator must be a single character') + + self._sep = sep + + def _read_int(self, cstr, pos, comp_len): + out = int(cstr[pos:pos + comp_len]) + return out, pos + comp_len + + def isoparse(self, dt_str): + dt_str = getattr(dt_str, 'read', lambda: dt_str)() + try: + return self.isoparse_quick(dt_str) + except ValueError as e: + raise + + _ISO_LENGTHS = (4, 2, 2, 2, 2, 2) # Lengths of ISO components + _MICROSECOND_END_REGEX = re.compile('[-+Z]+') + def isoparse_quick(self, dt_str): + """ + This handles the most common subset of ISO-8601 date times, with the + following formats: + + - ``YYYY`` + - ``YYYYMM`` + - ``YYYY-MM`` + - ``YYYYMMDD`` + - ``YYYY-MM-DD`` + - ``YYYYMMDDTHH`` + - ``YYYYMMDDTHHMM`` + - ``YYYY-MM-DDTHH`` + - ``YYYYMMDDTHH:MM`` + - ``YYYYMMDDTHHMMSS`` + - ``YYYY-MM-DDTHHMM`` + - ``YYYY-MM-DDTHH:MM`` + - ``YYYYMMDDTHH:MM:SS`` + - ``YYYY-MM-DDTHHMMSS`` + - ``YYYYMMDDTHHMMSS.fff`` + - ``YYYY-MM-DDTHH:MM:SS`` + - ``YYYYMMDDTHH:MM:SS.fff`` + - ``YYYY-MM-DDTHHMMSS.fff`` + - ``YYYYMMDDTHHMMSS.ffffff`` + - ``YYYY-MM-DDTHH:MM:SS.fff`` + - ``YYYYMMDDTHH:MM:SS.ffffff`` + - ``YYYY-MM-DDTHHMMSS.ffffff`` + - ``YYYY-MM-DDTHH:MM:SS.ffffff`` + + Additionally, anything with a specified time may also have a time zone + with the forms: + + - `Z` + - `±HH:MM` + - `±HHMM` + - `±HH` + """ + len_str = len(dt_str) + + if len_str < 4: + raise ValueError('ISO string too short') + + # Parse the year first + components = [1, 1, 1, 0, 0, 0, 0, None] + pos = 0 + comp = -1 + sep = '-' + has_sep = len_str > 4 and dt_str[4] == sep + + while pos < len_str and comp <= 7: + comp += 1 + + if comp == 3: + # After component 2 has been processed, check for the separators + if dt_str[pos] != self._sep: + raise ValueError('Invalid separator in ISO string') + + pos += 1 + sep = ':' + has_sep = len_str > pos + 2 and dt_str[pos + 2] == sep + + if has_sep and comp in {1, 2, 4, 5} and dt_str[pos] == sep: + pos += 1 + + if dt_str[pos] in '+-Z': + components[-1] = self.process_tzstr(dt_str[pos:]) + pos = len_str + break + + if comp <= 5: + # First 5 components just read an integer + components[comp], pos = self._read_int(dt_str, pos, + self._ISO_LENGTHS[comp]) + continue + + if comp == 6: + # Parse the microseconds portion + if dt_str[pos] != '.': + continue + + pos += 1 + us_str = self._MICROSECOND_END_REGEX.split(dt_str[pos:pos+6], 1)[0] + + components[comp] = int(us_str) * 10**(6 - len(us_str)) + pos += len(us_str) + + if pos < len_str: + raise ValueError('String contains unknown ISO components') + + return datetime(*components) + + @classmethod + def process_tzstr(cls, tzstr, zero_as_utc=True): + if tzstr == 'Z': + return tz.tzutc() + + if 6 < len(tzstr) < 3: + raise ValueError('Time zone offset must be 1 or 3-6 characters') + + if tzstr[0] == '-': + mult = -1 + elif tzstr[0] == '+': + mult = 1 + else: + raise ValueError('Time zone offset requires sign') + + hours = int(tzstr[1:3]) + if len(tzstr) == 3: + minutes = 0 + else: + minutes = int(tzstr[(4 if tzstr[3] == ':' else 3):]) + + if zero_as_utc and hours == 0 and minutes == 0: + return tz.tzutc() + else: + return tz.tzoffset(None, mult * timedelta(hours=hours, + minutes=minutes)) + +DEFAULT_ISOPARSER = Isoparser() +def isoparse(dt_str): + return DEFAULT_ISOPARSER.isoparse(dt_str) \ No newline at end of file -- cgit v1.2.3 From a928b3bffcd8c89acaa24bca71762316689b60ff Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 31 Oct 2017 16:47:02 -0400 Subject: Refactor out date and time parsing --- dateutil/parser/isoparser.py | 122 ++++++++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 35 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 36d852e..b8aa28f 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta +from datetime import datetime, timedelta, time from dateutil import tz import re @@ -18,13 +18,13 @@ class Isoparser(object): def isoparse(self, dt_str): dt_str = getattr(dt_str, 'read', lambda: dt_str)() try: - return self.isoparse_quick(dt_str) - except ValueError as e: + return self.isoparse_common(dt_str) + except ValueError: raise _ISO_LENGTHS = (4, 2, 2, 2, 2, 2) # Lengths of ISO components _MICROSECOND_END_REGEX = re.compile('[-+Z]+') - def isoparse_quick(self, dt_str): + def isoparse_common(self, dt_str): """ This handles the most common subset of ISO-8601 date times, with the following formats: @@ -61,62 +61,114 @@ class Isoparser(object): - `±HHMM` - `±HH` """ + # Parse the year first + components, pos = self._parse_isodate_common(dt_str) + if len(dt_str) > pos: + if dt_str[pos] == self._sep: + components += self._parse_isotime(dt_str[pos + 1:]) + else: + raise ValueError('String contains unknown ISO components') + + return datetime(*components) + + def isoparse_uncommon(self, dt_str): + """ + This handles the uncommon subset of ISO-8601 datetime formats, including + + - ``--MM-DD`` + - ``--MMDD`` + - ``YYYY-Www`` + - ``YYYYWww`` + - ``YYYY-Www-D`` + - ``YYYYWwwD`` + - ``YYYY-DDD`` + - ``YYYYDDD`` + """ + + raise NotImplementedError + + @classmethod + def parse_isotime(cls, timestr): + return time(*cls._parse_isotime(timestr)) + + @classmethod + def _parse_isodate_common(cls, dt_str): len_str = len(dt_str) + components = [1, 1, 1] + pos = 0 if len_str < 4: raise ValueError('ISO string too short') - # Parse the year first - components = [1, 1, 1, 0, 0, 0, 0, None] - pos = 0 - comp = -1 - sep = '-' - has_sep = len_str > 4 and dt_str[4] == sep + # Year + components[0] = int(dt_str[0:4]) + pos = 4 + if pos >= len_str: + return components, pos - while pos < len_str and comp <= 7: - comp += 1 + has_sep = dt_str[pos] == '-' + if has_sep: + pos += 1 - if comp == 3: - # After component 2 has been processed, check for the separators - if dt_str[pos] != self._sep: - raise ValueError('Invalid separator in ISO string') + # Month + components[1] = int(dt_str[pos:pos + 2]) + pos += 2 - pos += 1 - sep = ':' - has_sep = len_str > pos + 2 and dt_str[pos + 2] == sep + if pos >= len_str: + return components, pos - if has_sep and comp in {1, 2, 4, 5} and dt_str[pos] == sep: - pos += 1 + if has_sep: + if dt_str[pos] != '-': + raise ValueError('Invalid separator in ISO string') + pos += 1 + + # Day + components[2] = int(dt_str[pos:pos + 2]) + return components, pos + 2 + + @classmethod + def _parse_isotime(cls, timestr): + len_str = len(timestr) + components = [0, 0, 0, 0, None] + pos = 0 + comp = -1 - if dt_str[pos] in '+-Z': - components[-1] = self.process_tzstr(dt_str[pos:]) + has_sep = len_str >= 3 and timestr[2] == ':' + + while pos < len_str and comp < 5: + comp += 1 + + if timestr[pos] in '-+Z': + components[-1] = cls.parse_tzstr(timestr[pos:]) pos = len_str break - if comp <= 5: - # First 5 components just read an integer - components[comp], pos = self._read_int(dt_str, pos, - self._ISO_LENGTHS[comp]) - continue + if comp < 3: + # Hour, minute, second + components[comp] = int(timestr[pos:pos + 2]) + pos += 2 + if has_sep and pos < len_str and timestr[pos] == ':': + pos += 1 - if comp == 6: - # Parse the microseconds portion - if dt_str[pos] != '.': + if comp == 3: + # Microsecond + if timestr[pos] != '.': continue pos += 1 - us_str = self._MICROSECOND_END_REGEX.split(dt_str[pos:pos+6], 1)[0] + us_str = cls._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6], + 1)[0] components[comp] = int(us_str) * 10**(6 - len(us_str)) pos += len(us_str) if pos < len_str: - raise ValueError('String contains unknown ISO components') + raise ValueError('Unused components in ISO string') - return datetime(*components) + return components @classmethod - def process_tzstr(cls, tzstr, zero_as_utc=True): + def parse_tzstr(cls, tzstr, zero_as_utc=True): if tzstr == 'Z': return tz.tzutc() -- cgit v1.2.3 From e077d05ab640d97b0ca426e9142cb8d81dacaf2a Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 1 Nov 2017 11:18:26 -0400 Subject: Support uncommon cases, add documentation, refactor --- dateutil/parser/isoparser.py | 339 ++++++++++++++++++++++++++++++++----------- 1 file changed, 251 insertions(+), 88 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index b8aa28f..330ae5d 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -1,68 +1,109 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta, time +from datetime import datetime, timedelta, time, date +import calendar from dateutil import tz import re + class Isoparser(object): - def __init__(self, sep='T'): + def __init__(self, sep='T', default_year=None): + """ + :param sep: + A single character that separates date and time portions + + :param default_year: + The default year to be used as the basis for parsing the uncommon + no-year date formats. + """ if len(sep) != 1: raise ValueError('Separator must be a single character') self._sep = sep + if default_year is not None: + if not 1 <= default_year <= 9999: + raise ValueError('Year must be in [1, 9999]') - def _read_int(self, cstr, pos, comp_len): - out = int(cstr[pos:pos + comp_len]) - return out, pos + comp_len - - def isoparse(self, dt_str): - dt_str = getattr(dt_str, 'read', lambda: dt_str)() - try: - return self.isoparse_common(dt_str) - except ValueError: - raise + self._default_year = default_year + else: + self._default_year = datetime.now().year - _ISO_LENGTHS = (4, 2, 2, 2, 2, 2) # Lengths of ISO components - _MICROSECOND_END_REGEX = re.compile('[-+Z]+') - def isoparse_common(self, dt_str): + def isoparse(self, dt_str, common_only=False): """ - This handles the most common subset of ISO-8601 date times, with the - following formats: + Parse an ISO-8601 datetime string into a :py:`datetime.datetime`. + + An ISO-8601 datetime string consists of a date portion, followed + optionally by a time portion - the date and time portions are separated + by a single character separator, which is ``T`` in the official + standard. + + Supported date formats are: + + Common: - ``YYYY`` - - ``YYYYMM`` - - ``YYYY-MM`` - - ``YYYYMMDD`` - - ``YYYY-MM-DD`` - - ``YYYYMMDDTHH`` - - ``YYYYMMDDTHHMM`` - - ``YYYY-MM-DDTHH`` - - ``YYYYMMDDTHH:MM`` - - ``YYYYMMDDTHHMMSS`` - - ``YYYY-MM-DDTHHMM`` - - ``YYYY-MM-DDTHH:MM`` - - ``YYYYMMDDTHH:MM:SS`` - - ``YYYY-MM-DDTHHMMSS`` - - ``YYYYMMDDTHHMMSS.fff`` - - ``YYYY-MM-DDTHH:MM:SS`` - - ``YYYYMMDDTHH:MM:SS.fff`` - - ``YYYY-MM-DDTHHMMSS.fff`` - - ``YYYYMMDDTHHMMSS.ffffff`` - - ``YYYY-MM-DDTHH:MM:SS.fff`` - - ``YYYYMMDDTHH:MM:SS.ffffff`` - - ``YYYY-MM-DDTHHMMSS.ffffff`` - - ``YYYY-MM-DDTHH:MM:SS.ffffff`` - - Additionally, anything with a specified time may also have a time zone - with the forms: - - - `Z` + - ``YYYY-MM`` or ``YYYYMM`` + - ``YYYY-MM-DD`` or `YYYYMMDD`` + + Uncommon: + + - ``--MM-DD`` or ``--MMDD`` - Year unspecified + - ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0) + - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day + + The ISO week and day numbering follows the same logic as + :py:`datetime.date.isocalendar`. + + Supported time formats are: + + - ``hh`` + - ``hh:mm`` or ``hhmm`` + - ``hh:mm:ss`` or `hhmmss`` + - ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits) + + Midnight is a special case for `hh`, as the standard supports both + 00:00 and 24:00 as a representation. + + .. caution:: + + Support for fractional components other than seconds is part of the + ISO-8601 standard, but is not currently implemented in this parser. + + Supported time zone offset formats are: + + - `Z` (UTC) - `±HH:MM` - `±HHMM` - `±HH` + + Offsets will be represented as :class:`dateutil.tz.tzoffset` objects, + with the exception of UTC, which will be represented as + :class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such + as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`. + + :param dt_str: + A string or stream containing only an ISO-8601 datetime string + + :param common_only: + If true, parsing the uncommon formats will throw an error. + + :return: + Returns a :py:`datetime.datetime` representing the string. + Unspecified components default to their lowest value, with the + exception of year, which will use the value passed to the + ``default_year`` parameter of the method's bound + :class:`dateutil.parser.isoparser.Isoparser` instance. If that + would produce an invalid date (e.g. ``'--02-29'`` parsed with a + non-leap-year default date), the default will be the last leap + year to occur before the default year. """ - # Parse the year first - components, pos = self._parse_isodate_common(dt_str) + dt_str = getattr(dt_str, 'read', lambda: dt_str)() + + if common_only: + components, pos = self._parse_isodate_common(dt_str) + else: + components, pos = self._parse_isodate(dt_str) + if len(dt_str) > pos: if dt_str[pos] == self._sep: components += self._parse_isotime(dt_str[pos + 1:]) @@ -71,28 +112,82 @@ class Isoparser(object): return datetime(*components) - def isoparse_uncommon(self, dt_str): - """ - This handles the uncommon subset of ISO-8601 datetime formats, including - - - ``--MM-DD`` - - ``--MMDD`` - - ``YYYY-Www`` - - ``YYYYWww`` - - ``YYYY-Www-D`` - - ``YYYYWwwD`` - - ``YYYY-DDD`` - - ``YYYYDDD`` + def parse_isodate(self, datestr): """ + Parse the date portion of an ISO strin. + + :param datestr: + The string portion of an ISO string, without a separator - raise NotImplementedError + :return: + Returns a :class:`datetime.date` object + """ + components, pos = self._parse_isodate(datestr) + return date(*components) @classmethod def parse_isotime(cls, timestr): + """ + Parse the time portion of an ISO string. + + :param timestr: + The time portion of an ISO string, without a separator + + :return: + Returns a :class:`datetime.time` object + """ return time(*cls._parse_isotime(timestr)) @classmethod - def _parse_isodate_common(cls, dt_str): + def parse_tzstr(cls, tzstr, zero_as_utc=True): + """ + Parse a valid ISO time zone string. + + See :func:`Isoparser.isoparse` for details on supported formats. + + :param tzstr: + A string representing an ISO time zone offset + + :param zero_as_utc: + Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones + + :return: + Returns :class:`dateutil.tz.tzoffset` for offsets and + :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is + specified) offsets equivalent to UTC. + """ + if tzstr == 'Z': + return tz.tzutc() + + if 6 < len(tzstr) < 3: + raise ValueError('Time zone offset must be 1 or 3-6 characters') + + if tzstr[0] == '-': + mult = -1 + elif tzstr[0] == '+': + mult = 1 + else: + raise ValueError('Time zone offset requires sign') + + hours = int(tzstr[1:3]) + if len(tzstr) == 3: + minutes = 0 + else: + minutes = int(tzstr[(4 if tzstr[3] == ':' else 3):]) + + if zero_as_utc and hours == 0 and minutes == 0: + return tz.tzutc() + else: + return tz.tzoffset(None, mult * timedelta(hours=hours, + minutes=minutes)) + + def _parse_isodate(self, dt_str): + try: + return self._parse_isodate_common(dt_str) + except ValueError: + return self._parse_isodate_uncommon(dt_str) + + def _parse_isodate_common(self, dt_str): len_str = len(dt_str) components = [1, 1, 1] @@ -126,6 +221,94 @@ class Isoparser(object): components[2] = int(dt_str[pos:pos + 2]) return components, pos + 2 + def _parse_isodate_uncommon(self, dt_str): + if dt_str[0:2] == '--': + # --MM-DD or --MMDD + month = int(dt_str[2:4]) + pos = 4 + (dt_str[4] == '-') + day = int(dt_str[pos:pos + 2]) + year = self._default_year + + if month == 2 and day == 29: + # Calcualtes the latest leap year + year -= year % 4 + if (year % 400) and not (year % 100): + year -= 4 + + return [year, month, day], pos + 2 + + # All other uncommon ISO formats start with the year + year = int(dt_str[0:4]) + + pos = 4 + dt_str[4] == '-' # Skip '-' if it's there + if dt_str[pos] == 'W': + # YYYY-?Www-?D? + pos += 1 + weekno = dt_str[pos:pos + 2] + pos += 2 + + dayno = 1 + if len(dt_str) > pos: + if dt_str[pos] == '-': + # YYYY-W + if dt_str[4] != '-': + raise ValueError('Inconsistent use of dash separator') + pos += 1 + + dayno = dt_str[pos] + pos += 1 + + base_date = self._calculate_weekdate(year, weekno, dayno) + else: + # YYYYDDD or YYYY-DDD + ordinal_day = int(dt_str[pos:pos + 3]) + pos += 3 + + if ordinal_day > 365 + calendar.isleap(year): + raise ValueError('Invalid ordinal day' + + ' {} for year {}'.format(ordinal_day, year)) + + base_date = date(year, 1, 1) + timedelta(days=ordinal_day) + + components = [base_date.year, base_date.month, base_date.day] + return components, pos + + @classmethod + def _calculate_weekdate(cls, year, week, day): + """ + Calculate the day of corresponding to the ISO year-week-day calendar. + + This function is effectively the inverse of + :func:`datetime.date.isocalendar`. + + :param year: + The year in the ISO calendar + + :param week: + The week in the ISO calendar - range is [1, 53] + + :param day: + The day in the ISO calendar - range is [1 (MON), 7 (SUN)] + + :return: + Returns a :class:`datetime.date` + """ + if not 0 < week < 54: + raise ValueError('Invalid week: {}'.format(week)) + + if not 0 < day < 8: # Range is 1-7 + raise ValueError('Invalid weekday: {}'.format(day)) + + # Get week 1 for the specific year: + jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it + week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1) + + # Now add the specific number of weeks and days to get what we want + week_offset = (week - 1) * 7 + (day - 1) + return week_1 + timedelta(days=week_offset) + + _MICROSECOND_END_REGEX = re.compile('[-+Z]+') + @classmethod def _parse_isotime(cls, timestr): len_str = len(timestr) @@ -139,6 +322,7 @@ class Isoparser(object): comp += 1 if timestr[pos] in '-+Z': + # Detect time zone boundary components[-1] = cls.parse_tzstr(timestr[pos:]) pos = len_str break @@ -165,35 +349,14 @@ class Isoparser(object): if pos < len_str: raise ValueError('Unused components in ISO string') - return components - - @classmethod - def parse_tzstr(cls, tzstr, zero_as_utc=True): - if tzstr == 'Z': - return tz.tzutc() - - if 6 < len(tzstr) < 3: - raise ValueError('Time zone offset must be 1 or 3-6 characters') + if components[0] == 24: + # Standard supports 00:00 and 24:00 as representations of midnight + if any(component != 0 for component in components[1:4]): + raise ValueError('Hour may only be 24 at 24:00:00.000') + components[0] = 0 - if tzstr[0] == '-': - mult = -1 - elif tzstr[0] == '+': - mult = 1 - else: - raise ValueError('Time zone offset requires sign') - - hours = int(tzstr[1:3]) - if len(tzstr) == 3: - minutes = 0 - else: - minutes = int(tzstr[(4 if tzstr[3] == ':' else 3):]) + return components - if zero_as_utc and hours == 0 and minutes == 0: - return tz.tzutc() - else: - return tz.tzoffset(None, mult * timedelta(hours=hours, - minutes=minutes)) DEFAULT_ISOPARSER = Isoparser() -def isoparse(dt_str): - return DEFAULT_ISOPARSER.isoparse(dt_str) \ No newline at end of file +isoparse = DEFAULT_ISOPARSER.isoparse -- cgit v1.2.3 From 9a135d017792b9f5ff87a0f916dc5eb65f51ec47 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 1 Nov 2017 13:34:29 -0400 Subject: Add fix for ordinal dates --- dateutil/parser/isoparser.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 330ae5d..074aef2 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -206,6 +206,9 @@ class Isoparser(object): pos += 1 # Month + if len_str - pos < 2: + raise ValueError('Invalid common month') + components[1] = int(dt_str[pos:pos + 2]) pos += 2 @@ -218,6 +221,8 @@ class Isoparser(object): pos += 1 # Day + if len_str - pos < 2: + raise ValueError('Invalid common day') components[2] = int(dt_str[pos:pos + 2]) return components, pos + 2 @@ -240,11 +245,11 @@ class Isoparser(object): # All other uncommon ISO formats start with the year year = int(dt_str[0:4]) - pos = 4 + dt_str[4] == '-' # Skip '-' if it's there + pos = 4 + (dt_str[4] == '-') # Skip '-' if it's there if dt_str[pos] == 'W': # YYYY-?Www-?D? pos += 1 - weekno = dt_str[pos:pos + 2] + weekno = int(dt_str[pos:pos + 2]) pos += 2 dayno = 1 @@ -255,7 +260,7 @@ class Isoparser(object): raise ValueError('Inconsistent use of dash separator') pos += 1 - dayno = dt_str[pos] + dayno = int(dt_str[pos]) pos += 1 base_date = self._calculate_weekdate(year, weekno, dayno) @@ -264,11 +269,11 @@ class Isoparser(object): ordinal_day = int(dt_str[pos:pos + 3]) pos += 3 - if ordinal_day > 365 + calendar.isleap(year): + if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)): raise ValueError('Invalid ordinal day' + ' {} for year {}'.format(ordinal_day, year)) - base_date = date(year, 1, 1) + timedelta(days=ordinal_day) + base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1) components = [base_date.year, base_date.month, base_date.day] return components, pos -- cgit v1.2.3 From 6fca9a55cbe3f4d8ced17989ca3e3c757172fcf0 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 1 Nov 2017 13:45:36 -0400 Subject: Tweak parser/isoparser documentation --- dateutil/parser/__init__.py | 3 +++ dateutil/parser/isoparser.py | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/__init__.py b/dateutil/parser/__init__.py index 2d6b1a3..abe7b41 100644 --- a/dateutil/parser/__init__.py +++ b/dateutil/parser/__init__.py @@ -3,7 +3,10 @@ from ._parser import parse, parser, parserinfo from ._parser import DEFAULTPARSER, DEFAULTTZPARSER from ._parser import InvalidDateError, InvalidDatetimeError, InvalidTimeError +from .isoparser import Isoparser, isoparse + __all__ = ['parse', 'parser', 'parserinfo', + 'isoparse', 'Isoparser', 'InvalidDatetimeError', 'InvalidDateError', 'InvalidTimeError'] diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 074aef2..50a8564 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -1,10 +1,17 @@ # -*- coding: utf-8 -*- +""" +This module offers a parser for ISO-8601 strings + +It is intended to support all valid date, time and datetime formats per the +ISO-8601 specification, with a stricter mode for the most common subset. +""" from datetime import datetime, timedelta, time, date import calendar from dateutil import tz import re +__all__ = ["isoparse", "Isoparser"] class Isoparser(object): def __init__(self, sep='T', default_year=None): @@ -30,7 +37,7 @@ class Isoparser(object): def isoparse(self, dt_str, common_only=False): """ - Parse an ISO-8601 datetime string into a :py:`datetime.datetime`. + Parse an ISO-8601 datetime string into a :class:`datetime.datetime`. An ISO-8601 datetime string consists of a date portion, followed optionally by a time portion - the date and time portions are separated @@ -52,7 +59,7 @@ class Isoparser(object): - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day The ISO week and day numbering follows the same logic as - :py:`datetime.date.isocalendar`. + :func:`datetime.date.isocalendar`. Supported time formats are: @@ -88,11 +95,11 @@ class Isoparser(object): If true, parsing the uncommon formats will throw an error. :return: - Returns a :py:`datetime.datetime` representing the string. + Returns a :class:`datetime.datetime` representing the string. Unspecified components default to their lowest value, with the exception of year, which will use the value passed to the ``default_year`` parameter of the method's bound - :class:`dateutil.parser.isoparser.Isoparser` instance. If that + :class:`Isoparser` instance. If that would produce an invalid date (e.g. ``'--02-29'`` parsed with a non-leap-year default date), the default will be the last leap year to occur before the default year. @@ -114,7 +121,7 @@ class Isoparser(object): def parse_isodate(self, datestr): """ - Parse the date portion of an ISO strin. + Parse the date portion of an ISO string. :param datestr: The string portion of an ISO string, without a separator -- cgit v1.2.3 From 561d31ce9d1a3cc6940480c82f643c20d0961afc Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 1 Nov 2017 16:59:47 -0400 Subject: Make time zone string parser more strict --- dateutil/parser/isoparser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 50a8564..c658a1b 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -166,8 +166,8 @@ class Isoparser(object): if tzstr == 'Z': return tz.tzutc() - if 6 < len(tzstr) < 3: - raise ValueError('Time zone offset must be 1 or 3-6 characters') + if len(tzstr) not in {3, 5, 6}: + raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') if tzstr[0] == '-': mult = -1 @@ -185,6 +185,12 @@ class Isoparser(object): if zero_as_utc and hours == 0 and minutes == 0: return tz.tzutc() else: + if minutes > 59: + raise ValueError('Invalid minutes in time zone offset') + + if hours > 23: + raise ValueError('Invalid hours in time zone offset') + return tz.tzoffset(None, mult * timedelta(hours=hours, minutes=minutes)) -- cgit v1.2.3 From c8964ad107c23af4478ec1363e2c81d2216d3cc1 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 5 Nov 2017 15:26:27 -0500 Subject: Add test for too-short ISO string --- dateutil/parser/isoparser.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index c658a1b..0e2025f 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -240,6 +240,9 @@ class Isoparser(object): return components, pos + 2 def _parse_isodate_uncommon(self, dt_str): + if len(dt_str) < 4: + raise ValueError('ISO string too short') + if dt_str[0:2] == '--': # --MM-DD or --MMDD month = int(dt_str[2:4]) -- cgit v1.2.3 From 396b89803dcef174eca50cde283e2e40da211cf4 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 1 Nov 2017 17:10:54 -0400 Subject: Make restrictions on sep stricter --- dateutil/parser/isoparser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 0e2025f..77043b4 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -13,6 +13,7 @@ import re __all__ = ["isoparse", "Isoparser"] + class Isoparser(object): def __init__(self, sep='T', default_year=None): """ @@ -23,8 +24,9 @@ class Isoparser(object): The default year to be used as the basis for parsing the uncommon no-year date formats. """ - if len(sep) != 1: - raise ValueError('Separator must be a single character') + if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'): + raise ValueError('Separator must be a single, non-numeric ' + 'ASCII character') self._sep = sep if default_year is not None: -- cgit v1.2.3 From a06ab966bc997401156b88e076278a2955c9897a Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 6 Nov 2017 10:35:08 -0500 Subject: Add decorator to coerce to ASCII --- dateutil/parser/isoparser.py | 74 ++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 23 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 77043b4..e43bc87 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -9,11 +9,34 @@ from datetime import datetime, timedelta, time, date import calendar from dateutil import tz +from functools import wraps + import re +import six __all__ = ["isoparse", "Isoparser"] +def _takes_ascii(f): + @wraps(f) + def func(self, str_in, *args, **kwargs): + # If it's a stream, read the whole thing + str_in = getattr(str_in, 'read', lambda: str_in)() + + # If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII + if isinstance(str_in, six.text_type): + # ASCII is the same in UTF-8 + try: + str_in = str_in.encode('ascii') + except UnicodeEncodeError as e: + msg = 'ISO-8601 strings contain only ASCII characters' + six.raise_from(ValueError(msg), e) + + return f(self, str_in, *args, **kwargs) + + return func + + class Isoparser(object): def __init__(self, sep='T', default_year=None): """ @@ -28,7 +51,7 @@ class Isoparser(object): raise ValueError('Separator must be a single, non-numeric ' 'ASCII character') - self._sep = sep + self._sep = sep.encode('ascii') if default_year is not None: if not 1 <= default_year <= 9999: raise ValueError('Year must be in [1, 9999]') @@ -37,6 +60,7 @@ class Isoparser(object): else: self._default_year = datetime.now().year + @_takes_ascii def isoparse(self, dt_str, common_only=False): """ Parse an ISO-8601 datetime string into a :class:`datetime.datetime`. @@ -106,21 +130,20 @@ class Isoparser(object): non-leap-year default date), the default will be the last leap year to occur before the default year. """ - dt_str = getattr(dt_str, 'read', lambda: dt_str)() - if common_only: components, pos = self._parse_isodate_common(dt_str) else: components, pos = self._parse_isodate(dt_str) if len(dt_str) > pos: - if dt_str[pos] == self._sep: + if dt_str[pos:pos + 1] == self._sep: components += self._parse_isotime(dt_str[pos + 1:]) else: raise ValueError('String contains unknown ISO components') return datetime(*components) + @_takes_ascii def parse_isodate(self, datestr): """ Parse the date portion of an ISO string. @@ -134,6 +157,7 @@ class Isoparser(object): components, pos = self._parse_isodate(datestr) return date(*components) + @_takes_ascii @classmethod def parse_isotime(cls, timestr): """ @@ -165,15 +189,15 @@ class Isoparser(object): :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is specified) offsets equivalent to UTC. """ - if tzstr == 'Z': + if tzstr == b'Z': return tz.tzutc() if len(tzstr) not in {3, 5, 6}: raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') - if tzstr[0] == '-': + if tzstr[0:1] == b'-': mult = -1 - elif tzstr[0] == '+': + elif tzstr[0:1] == b'+': mult = 1 else: raise ValueError('Time zone offset requires sign') @@ -182,7 +206,7 @@ class Isoparser(object): if len(tzstr) == 3: minutes = 0 else: - minutes = int(tzstr[(4 if tzstr[3] == ':' else 3):]) + minutes = int(tzstr[(4 if tzstr[3] == cls._TIME_SEP else 3):]) if zero_as_utc and hours == 0 and minutes == 0: return tz.tzutc() @@ -196,6 +220,12 @@ class Isoparser(object): return tz.tzoffset(None, mult * timedelta(hours=hours, minutes=minutes)) + # Constants + _MICROSECOND_END_REGEX = re.compile(b'[-+Z]+') + _DATE_SEP = ord(b'-') + _TIME_SEP = ord(b':') + _MICRO_SEP = ord(b'.') + def _parse_isodate(self, dt_str): try: return self._parse_isodate_common(dt_str) @@ -216,7 +246,7 @@ class Isoparser(object): if pos >= len_str: return components, pos - has_sep = dt_str[pos] == '-' + has_sep = dt_str[pos] == self._DATE_SEP if has_sep: pos += 1 @@ -231,7 +261,7 @@ class Isoparser(object): return components, pos if has_sep: - if dt_str[pos] != '-': + if dt_str[pos] != self._DATE_SEP: raise ValueError('Invalid separator in ISO string') pos += 1 @@ -245,10 +275,10 @@ class Isoparser(object): if len(dt_str) < 4: raise ValueError('ISO string too short') - if dt_str[0:2] == '--': + if dt_str[0:2] == b'--': # --MM-DD or --MMDD month = int(dt_str[2:4]) - pos = 4 + (dt_str[4] == '-') + pos = 4 + (dt_str[4] == self._DATE_SEP) day = int(dt_str[pos:pos + 2]) year = self._default_year @@ -263,8 +293,8 @@ class Isoparser(object): # All other uncommon ISO formats start with the year year = int(dt_str[0:4]) - pos = 4 + (dt_str[4] == '-') # Skip '-' if it's there - if dt_str[pos] == 'W': + pos = 4 + (dt_str[4] == self._DATE_SEP) # Skip '-' if it's there + if dt_str[pos] == ord(b'W'): # YYYY-?Www-?D? pos += 1 weekno = int(dt_str[pos:pos + 2]) @@ -272,13 +302,13 @@ class Isoparser(object): dayno = 1 if len(dt_str) > pos: - if dt_str[pos] == '-': + if dt_str[pos] == self._DATE_SEP: # YYYY-W - if dt_str[4] != '-': + if dt_str[4] != self._DATE_SEP: raise ValueError('Inconsistent use of dash separator') pos += 1 - dayno = int(dt_str[pos]) + dayno = int(dt_str[pos:pos + 1]) pos += 1 base_date = self._calculate_weekdate(year, weekno, dayno) @@ -330,8 +360,6 @@ class Isoparser(object): week_offset = (week - 1) * 7 + (day - 1) return week_1 + timedelta(days=week_offset) - _MICROSECOND_END_REGEX = re.compile('[-+Z]+') - @classmethod def _parse_isotime(cls, timestr): len_str = len(timestr) @@ -339,12 +367,12 @@ class Isoparser(object): pos = 0 comp = -1 - has_sep = len_str >= 3 and timestr[2] == ':' + has_sep = len_str >= 3 and timestr[2] == cls._TIME_SEP while pos < len_str and comp < 5: comp += 1 - if timestr[pos] in '-+Z': + if timestr[pos:pos + 1] in b'-+Z': # Detect time zone boundary components[-1] = cls.parse_tzstr(timestr[pos:]) pos = len_str @@ -354,12 +382,12 @@ class Isoparser(object): # Hour, minute, second components[comp] = int(timestr[pos:pos + 2]) pos += 2 - if has_sep and pos < len_str and timestr[pos] == ':': + if has_sep and pos < len_str and timestr[pos] == cls._TIME_SEP: pos += 1 if comp == 3: # Microsecond - if timestr[pos] != '.': + if timestr[pos] != cls._MICRO_SEP: continue pos += 1 -- cgit v1.2.3 From b8a1a8315fe428929e0ffd95453e08ff80711285 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 6 Nov 2017 10:51:28 -0500 Subject: Refactor a public interface out of _parse_tzstr --- dateutil/parser/isoparser.py | 67 ++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 31 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index e43bc87..46bce83 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -171,6 +171,7 @@ class Isoparser(object): """ return time(*cls._parse_isotime(timestr)) + @_takes_ascii @classmethod def parse_tzstr(cls, tzstr, zero_as_utc=True): """ @@ -189,36 +190,7 @@ class Isoparser(object): :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is specified) offsets equivalent to UTC. """ - if tzstr == b'Z': - return tz.tzutc() - - if len(tzstr) not in {3, 5, 6}: - raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') - - if tzstr[0:1] == b'-': - mult = -1 - elif tzstr[0:1] == b'+': - mult = 1 - else: - raise ValueError('Time zone offset requires sign') - - hours = int(tzstr[1:3]) - if len(tzstr) == 3: - minutes = 0 - else: - minutes = int(tzstr[(4 if tzstr[3] == cls._TIME_SEP else 3):]) - - if zero_as_utc and hours == 0 and minutes == 0: - return tz.tzutc() - else: - if minutes > 59: - raise ValueError('Invalid minutes in time zone offset') - - if hours > 23: - raise ValueError('Invalid hours in time zone offset') - - return tz.tzoffset(None, mult * timedelta(hours=hours, - minutes=minutes)) + return cls._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) # Constants _MICROSECOND_END_REGEX = re.compile(b'[-+Z]+') @@ -374,7 +346,7 @@ class Isoparser(object): if timestr[pos:pos + 1] in b'-+Z': # Detect time zone boundary - components[-1] = cls.parse_tzstr(timestr[pos:]) + components[-1] = cls._parse_tzstr(timestr[pos:]) pos = len_str break @@ -408,6 +380,39 @@ class Isoparser(object): return components + @classmethod + def _parse_tzstr(cls, tzstr, zero_as_utc=True): + if tzstr == b'Z': + return tz.tzutc() + + if len(tzstr) not in {3, 5, 6}: + raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') + + if tzstr[0:1] == b'-': + mult = -1 + elif tzstr[0:1] == b'+': + mult = 1 + else: + raise ValueError('Time zone offset requires sign') + + hours = int(tzstr[1:3]) + if len(tzstr) == 3: + minutes = 0 + else: + minutes = int(tzstr[(4 if tzstr[3] == cls._TIME_SEP else 3):]) + + if zero_as_utc and hours == 0 and minutes == 0: + return tz.tzutc() + else: + if minutes > 59: + raise ValueError('Invalid minutes in time zone offset') + + if hours > 23: + raise ValueError('Invalid hours in time zone offset') + + return tz.tzoffset(None, mult * timedelta(hours=hours, + minutes=minutes)) + DEFAULT_ISOPARSER = Isoparser() isoparse = DEFAULT_ISOPARSER.isoparse -- cgit v1.2.3 From e49b89cea2ee7950e0ff895f206832160527c732 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 6 Nov 2017 11:06:24 -0500 Subject: Put @_takes_ascii decorator after @classmethod --- dateutil/parser/isoparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 46bce83..4ade587 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -157,8 +157,8 @@ class Isoparser(object): components, pos = self._parse_isodate(datestr) return date(*components) - @_takes_ascii @classmethod + @_takes_ascii def parse_isotime(cls, timestr): """ Parse the time portion of an ISO string. @@ -171,8 +171,8 @@ class Isoparser(object): """ return time(*cls._parse_isotime(timestr)) - @_takes_ascii @classmethod + @_takes_ascii def parse_tzstr(cls, tzstr, zero_as_utc=True): """ Parse a valid ISO time zone string. -- cgit v1.2.3 From 93cdfaf79c6f130a3356413825ee615908b883d8 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 7 Nov 2017 11:37:32 -0500 Subject: Fix byte slicing behavior for 2 and 3 --- dateutil/parser/isoparser.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 4ade587..2ef2afc 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -194,9 +194,9 @@ class Isoparser(object): # Constants _MICROSECOND_END_REGEX = re.compile(b'[-+Z]+') - _DATE_SEP = ord(b'-') - _TIME_SEP = ord(b':') - _MICRO_SEP = ord(b'.') + _DATE_SEP = b'-' + _TIME_SEP = b':' + _MICRO_SEP = b'.' def _parse_isodate(self, dt_str): try: @@ -218,7 +218,7 @@ class Isoparser(object): if pos >= len_str: return components, pos - has_sep = dt_str[pos] == self._DATE_SEP + has_sep = dt_str[pos:pos + 1] == self._DATE_SEP if has_sep: pos += 1 @@ -233,7 +233,7 @@ class Isoparser(object): return components, pos if has_sep: - if dt_str[pos] != self._DATE_SEP: + if dt_str[pos:pos + 1] != self._DATE_SEP: raise ValueError('Invalid separator in ISO string') pos += 1 @@ -250,7 +250,7 @@ class Isoparser(object): if dt_str[0:2] == b'--': # --MM-DD or --MMDD month = int(dt_str[2:4]) - pos = 4 + (dt_str[4] == self._DATE_SEP) + pos = 4 + (dt_str[4:5] == self._DATE_SEP) day = int(dt_str[pos:pos + 2]) year = self._default_year @@ -265,8 +265,8 @@ class Isoparser(object): # All other uncommon ISO formats start with the year year = int(dt_str[0:4]) - pos = 4 + (dt_str[4] == self._DATE_SEP) # Skip '-' if it's there - if dt_str[pos] == ord(b'W'): + pos = 4 + (dt_str[4:5] == self._DATE_SEP) # Skip '-' if it's there + if dt_str[pos:pos + 1] == b'W': # YYYY-?Www-?D? pos += 1 weekno = int(dt_str[pos:pos + 2]) @@ -274,9 +274,9 @@ class Isoparser(object): dayno = 1 if len(dt_str) > pos: - if dt_str[pos] == self._DATE_SEP: + if dt_str[pos:pos + 1] == self._DATE_SEP: # YYYY-W - if dt_str[4] != self._DATE_SEP: + if dt_str[4:5] != self._DATE_SEP: raise ValueError('Inconsistent use of dash separator') pos += 1 @@ -339,7 +339,7 @@ class Isoparser(object): pos = 0 comp = -1 - has_sep = len_str >= 3 and timestr[2] == cls._TIME_SEP + has_sep = len_str >= 3 and timestr[2:3] == cls._TIME_SEP while pos < len_str and comp < 5: comp += 1 @@ -354,12 +354,13 @@ class Isoparser(object): # Hour, minute, second components[comp] = int(timestr[pos:pos + 2]) pos += 2 - if has_sep and pos < len_str and timestr[pos] == cls._TIME_SEP: + if (has_sep and pos < len_str and + timestr[pos:pos + 1] == cls._TIME_SEP): pos += 1 if comp == 3: # Microsecond - if timestr[pos] != cls._MICRO_SEP: + if timestr[pos:pos + 1] != cls._MICRO_SEP: continue pos += 1 @@ -399,7 +400,7 @@ class Isoparser(object): if len(tzstr) == 3: minutes = 0 else: - minutes = int(tzstr[(4 if tzstr[3] == cls._TIME_SEP else 3):]) + minutes = int(tzstr[(4 if tzstr[3:4] == cls._TIME_SEP else 3):]) if zero_as_utc and hours == 0 and minutes == 0: return tz.tzutc() @@ -410,8 +411,7 @@ class Isoparser(object): if hours > 23: raise ValueError('Invalid hours in time zone offset') - return tz.tzoffset(None, mult * timedelta(hours=hours, - minutes=minutes)) + return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60) DEFAULT_ISOPARSER = Isoparser() -- cgit v1.2.3 From 34410949dcaf8492b84b403810d879600389362e Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 10 Nov 2017 17:28:21 -0500 Subject: Ensure parse_isodate consumes entire string --- dateutil/parser/isoparser.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 2ef2afc..d43c556 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -155,6 +155,9 @@ class Isoparser(object): Returns a :class:`datetime.date` object """ components, pos = self._parse_isodate(datestr) + if pos < len(datestr): + raise ValueError('String contains unknown ISO ' + + 'components: {}'.format(datestr)) return date(*components) @classmethod -- cgit v1.2.3 From 52e72944e0c49bb7e93094dfbf9a667bb621a866 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 10 Nov 2017 18:16:24 -0500 Subject: Add exception on too-short ISO times --- dateutil/parser/isoparser.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index d43c556..5fd63ed 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -342,6 +342,9 @@ class Isoparser(object): pos = 0 comp = -1 + if len(timestr) < 2: + raise ValueError('ISO time too short') + has_sep = len_str >= 3 and timestr[2:3] == cls._TIME_SEP while pos < len_str and comp < 5: -- cgit v1.2.3 From d65738c34fe730648ea472794b268765062dcb9a Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 5 Dec 2017 16:22:12 +0000 Subject: Drop all classmethods in favor of instance methods --- dateutil/parser/isoparser.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 5fd63ed..f21359e 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -160,9 +160,8 @@ class Isoparser(object): 'components: {}'.format(datestr)) return date(*components) - @classmethod @_takes_ascii - def parse_isotime(cls, timestr): + def parse_isotime(self, timestr): """ Parse the time portion of an ISO string. @@ -172,11 +171,10 @@ class Isoparser(object): :return: Returns a :class:`datetime.time` object """ - return time(*cls._parse_isotime(timestr)) + return time(*self._parse_isotime(timestr)) - @classmethod @_takes_ascii - def parse_tzstr(cls, tzstr, zero_as_utc=True): + def parse_tzstr(self, tzstr, zero_as_utc=True): """ Parse a valid ISO time zone string. @@ -193,7 +191,7 @@ class Isoparser(object): :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is specified) offsets equivalent to UTC. """ - return cls._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) + return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) # Constants _MICROSECOND_END_REGEX = re.compile(b'[-+Z]+') @@ -301,8 +299,7 @@ class Isoparser(object): components = [base_date.year, base_date.month, base_date.day] return components, pos - @classmethod - def _calculate_weekdate(cls, year, week, day): + def _calculate_weekdate(self, year, week, day): """ Calculate the day of corresponding to the ISO year-week-day calendar. @@ -335,8 +332,7 @@ class Isoparser(object): week_offset = (week - 1) * 7 + (day - 1) return week_1 + timedelta(days=week_offset) - @classmethod - def _parse_isotime(cls, timestr): + def _parse_isotime(self, timestr): len_str = len(timestr) components = [0, 0, 0, 0, None] pos = 0 @@ -345,14 +341,14 @@ class Isoparser(object): if len(timestr) < 2: raise ValueError('ISO time too short') - has_sep = len_str >= 3 and timestr[2:3] == cls._TIME_SEP + has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP while pos < len_str and comp < 5: comp += 1 if timestr[pos:pos + 1] in b'-+Z': # Detect time zone boundary - components[-1] = cls._parse_tzstr(timestr[pos:]) + components[-1] = self._parse_tzstr(timestr[pos:]) pos = len_str break @@ -361,17 +357,17 @@ class Isoparser(object): components[comp] = int(timestr[pos:pos + 2]) pos += 2 if (has_sep and pos < len_str and - timestr[pos:pos + 1] == cls._TIME_SEP): + timestr[pos:pos + 1] == self._TIME_SEP): pos += 1 if comp == 3: # Microsecond - if timestr[pos:pos + 1] != cls._MICRO_SEP: + if timestr[pos:pos + 1] != self._MICRO_SEP: continue pos += 1 - us_str = cls._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6], - 1)[0] + us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6], + 1)[0] components[comp] = int(us_str) * 10**(6 - len(us_str)) pos += len(us_str) @@ -387,8 +383,7 @@ class Isoparser(object): return components - @classmethod - def _parse_tzstr(cls, tzstr, zero_as_utc=True): + def _parse_tzstr(self, tzstr, zero_as_utc=True): if tzstr == b'Z': return tz.tzutc() @@ -406,7 +401,7 @@ class Isoparser(object): if len(tzstr) == 3: minutes = 0 else: - minutes = int(tzstr[(4 if tzstr[3:4] == cls._TIME_SEP else 3):]) + minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):]) if zero_as_utc and hours == 0 and minutes == 0: return tz.tzutc() -- cgit v1.2.3 From afd05a4bcb8fe1c7a5e29f6d85c95ef048b41c2e Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 5 Dec 2017 16:31:09 +0000 Subject: Rename Isoparser to isoparser --- dateutil/parser/__init__.py | 4 ++-- dateutil/parser/isoparser.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/__init__.py b/dateutil/parser/__init__.py index abe7b41..e407320 100644 --- a/dateutil/parser/__init__.py +++ b/dateutil/parser/__init__.py @@ -3,10 +3,10 @@ from ._parser import parse, parser, parserinfo from ._parser import DEFAULTPARSER, DEFAULTTZPARSER from ._parser import InvalidDateError, InvalidDatetimeError, InvalidTimeError -from .isoparser import Isoparser, isoparse +from .isoparser import isoparser, isoparse __all__ = ['parse', 'parser', 'parserinfo', - 'isoparse', 'Isoparser', + 'isoparse', 'isoparser', 'InvalidDatetimeError', 'InvalidDateError', 'InvalidTimeError'] diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index f21359e..6a3d0c6 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -14,7 +14,7 @@ from functools import wraps import re import six -__all__ = ["isoparse", "Isoparser"] +__all__ = ["isoparse", "isoparser"] def _takes_ascii(f): @@ -37,7 +37,7 @@ def _takes_ascii(f): return func -class Isoparser(object): +class isoparser(object): def __init__(self, sep='T', default_year=None): """ :param sep: @@ -125,7 +125,7 @@ class Isoparser(object): Unspecified components default to their lowest value, with the exception of year, which will use the value passed to the ``default_year`` parameter of the method's bound - :class:`Isoparser` instance. If that + :class:`isoparser` instance. If that would produce an invalid date (e.g. ``'--02-29'`` parsed with a non-leap-year default date), the default will be the last leap year to occur before the default year. @@ -178,7 +178,7 @@ class Isoparser(object): """ Parse a valid ISO time zone string. - See :func:`Isoparser.isoparse` for details on supported formats. + See :func:`isoparser.isoparse` for details on supported formats. :param tzstr: A string representing an ISO time zone offset @@ -415,5 +415,5 @@ class Isoparser(object): return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60) -DEFAULT_ISOPARSER = Isoparser() +DEFAULT_ISOPARSER = isoparser() isoparse = DEFAULT_ISOPARSER.isoparse -- cgit v1.2.3 From 5bf007f68a67c9fdcb0e4f0b668d4b39a7f70170 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 5 Dec 2017 16:32:51 +0000 Subject: Drop common_only option for initial API --- dateutil/parser/isoparser.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 6a3d0c6..c2f91d5 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -3,7 +3,7 @@ This module offers a parser for ISO-8601 strings It is intended to support all valid date, time and datetime formats per the -ISO-8601 specification, with a stricter mode for the most common subset. +ISO-8601 specification. """ from datetime import datetime, timedelta, time, date import calendar @@ -61,7 +61,7 @@ class isoparser(object): self._default_year = datetime.now().year @_takes_ascii - def isoparse(self, dt_str, common_only=False): + def isoparse(self, dt_str): """ Parse an ISO-8601 datetime string into a :class:`datetime.datetime`. @@ -117,9 +117,6 @@ class isoparser(object): :param dt_str: A string or stream containing only an ISO-8601 datetime string - :param common_only: - If true, parsing the uncommon formats will throw an error. - :return: Returns a :class:`datetime.datetime` representing the string. Unspecified components default to their lowest value, with the @@ -130,10 +127,7 @@ class isoparser(object): non-leap-year default date), the default will be the last leap year to occur before the default year. """ - if common_only: - components, pos = self._parse_isodate_common(dt_str) - else: - components, pos = self._parse_isodate(dt_str) + components, pos = self._parse_isodate(dt_str) if len(dt_str) > pos: if dt_str[pos:pos + 1] == self._sep: -- cgit v1.2.3 From fcc616087952c41dc8f14370fd4f19072444397e Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 6 Dec 2017 11:41:56 +0000 Subject: Drop support for no-year ISO datetimes --- dateutil/parser/isoparser.py | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index c2f91d5..cbb8c1e 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -38,27 +38,16 @@ def _takes_ascii(f): class isoparser(object): - def __init__(self, sep='T', default_year=None): + def __init__(self, sep='T'): """ :param sep: A single character that separates date and time portions - - :param default_year: - The default year to be used as the basis for parsing the uncommon - no-year date formats. """ if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'): raise ValueError('Separator must be a single, non-numeric ' 'ASCII character') self._sep = sep.encode('ascii') - if default_year is not None: - if not 1 <= default_year <= 9999: - raise ValueError('Year must be in [1, 9999]') - - self._default_year = default_year - else: - self._default_year = datetime.now().year @_takes_ascii def isoparse(self, dt_str): @@ -80,7 +69,6 @@ class isoparser(object): Uncommon: - - ``--MM-DD`` or ``--MMDD`` - Year unspecified - ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0) - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day @@ -119,13 +107,7 @@ class isoparser(object): :return: Returns a :class:`datetime.datetime` representing the string. - Unspecified components default to their lowest value, with the - exception of year, which will use the value passed to the - ``default_year`` parameter of the method's bound - :class:`isoparser` instance. If that - would produce an invalid date (e.g. ``'--02-29'`` parsed with a - non-leap-year default date), the default will be the last leap - year to occur before the default year. + Unspecified components default to their lowest value. """ components, pos = self._parse_isodate(dt_str) @@ -242,22 +224,7 @@ class isoparser(object): if len(dt_str) < 4: raise ValueError('ISO string too short') - if dt_str[0:2] == b'--': - # --MM-DD or --MMDD - month = int(dt_str[2:4]) - pos = 4 + (dt_str[4:5] == self._DATE_SEP) - day = int(dt_str[pos:pos + 2]) - year = self._default_year - - if month == 2 and day == 29: - # Calcualtes the latest leap year - year -= year % 4 - if (year % 400) and not (year % 100): - year -= 4 - - return [year, month, day], pos + 2 - - # All other uncommon ISO formats start with the year + # All ISO formats start with the year year = int(dt_str[0:4]) pos = 4 + (dt_str[4:5] == self._DATE_SEP) # Skip '-' if it's there -- cgit v1.2.3 From a3bd1285130c75d819e64cc0be04cef440af1ac1 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 6 Dec 2017 15:33:35 +0000 Subject: Isoparser style tweaks --- dateutil/parser/isoparser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index cbb8c1e..b5271ea 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -29,7 +29,7 @@ def _takes_ascii(f): try: str_in = str_in.encode('ascii') except UnicodeEncodeError as e: - msg = 'ISO-8601 strings contain only ASCII characters' + msg = 'ISO-8601 strings should contain only ASCII characters' six.raise_from(ValueError(msg), e) return f(self, str_in, *args, **kwargs) @@ -185,7 +185,6 @@ class isoparser(object): len_str = len(dt_str) components = [1, 1, 1] - pos = 0 if len_str < 4: raise ValueError('ISO string too short') -- cgit v1.2.3 From f8f802b4b18ab5f5a33f7002953cafd821e8d409 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 6 Dec 2017 16:32:44 +0000 Subject: Fix unreachable code and loose validation --- dateutil/parser/isoparser.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index b5271ea..89aa3ec 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -226,7 +226,9 @@ class isoparser(object): # All ISO formats start with the year year = int(dt_str[0:4]) - pos = 4 + (dt_str[4:5] == self._DATE_SEP) # Skip '-' if it's there + has_sep = dt_str[4:5] == self._DATE_SEP + + pos = 4 + has_sep # Skip '-' if it's there if dt_str[pos:pos + 1] == b'W': # YYYY-?Www-?D? pos += 1 @@ -235,11 +237,10 @@ class isoparser(object): dayno = 1 if len(dt_str) > pos: - if dt_str[pos:pos + 1] == self._DATE_SEP: - # YYYY-W - if dt_str[4:5] != self._DATE_SEP: - raise ValueError('Inconsistent use of dash separator') - pos += 1 + if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep: + raise ValueError('Inconsistent use of dash separator') + + pos += has_sep dayno = int(dt_str[pos:pos + 1]) pos += 1 @@ -247,6 +248,9 @@ class isoparser(object): base_date = self._calculate_weekdate(year, weekno, dayno) else: # YYYYDDD or YYYY-DDD + if len(dt_str) - pos < 3: + raise ValueError('Invalid ordinal day') + ordinal_day = int(dt_str[pos:pos + 3]) pos += 3 -- cgit v1.2.3 From 9bf7a3c995f5d9aab3d54b9833adb18b7e75bf4d Mon Sep 17 00:00:00 2001 From: Alex Chamberlain Date: Wed, 6 Dec 2017 21:15:30 +0000 Subject: Fixup strong emphasis warning. --- dateutil/parser/_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py index 6e1c7a8..3c62636 100644 --- a/dateutil/parser/_parser.py +++ b/dateutil/parser/_parser.py @@ -572,7 +572,7 @@ class parser(object): This parameter is ignored if ``ignoretz`` is set. - :param **kwargs: + :param \\*\\*kwargs: Keyword arguments as passed to ``_parse()``. :return: -- cgit v1.2.3 From bf46565029a373e5506573d75a526a69b1cfe35b Mon Sep 17 00:00:00 2001 From: Alex Chamberlain Date: Wed, 6 Dec 2017 21:21:12 +0000 Subject: Fixup parser.__doc__ --- dateutil/parser/__init__.py | 2 ++ dateutil/parser/_parser.py | 1 + 2 files changed, 3 insertions(+) (limited to 'dateutil/parser') diff --git a/dateutil/parser/__init__.py b/dateutil/parser/__init__.py index e407320..2cc195a 100644 --- a/dateutil/parser/__init__.py +++ b/dateutil/parser/__init__.py @@ -3,6 +3,8 @@ from ._parser import parse, parser, parserinfo from ._parser import DEFAULTPARSER, DEFAULTTZPARSER from ._parser import InvalidDateError, InvalidDatetimeError, InvalidTimeError +from ._parser import __doc__ + from .isoparser import isoparser, isoparse __all__ = ['parse', 'parser', 'parserinfo', diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py index 3c62636..541cdfc 100644 --- a/dateutil/parser/_parser.py +++ b/dateutil/parser/_parser.py @@ -6,6 +6,7 @@ most known formats to represent a date and/or time. This module attempts to be forgiving with regards to unlikely input formats, returning a datetime object even for dates which are ambiguous. If an element of a date/time stamp is omitted, the following rules are applied: + - If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is specified. -- cgit v1.2.3 From 04254e3c4718b9b2ee96fa38a5d67c8104989592 Mon Sep 17 00:00:00 2001 From: Bernat Gabor Date: Wed, 6 Dec 2017 23:50:52 +0000 Subject: add document generation to tox, and trigger automatically at CI (also check links in documentation to avoid having dangling pointers to the internet) --- dateutil/parser/_parser.py | 2 +- dateutil/parser/isoparser.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py index 541cdfc..2291f39 100644 --- a/dateutil/parser/_parser.py +++ b/dateutil/parser/_parser.py @@ -22,7 +22,7 @@ Additional resources about date/time string formats can be found below: - `A summary of the international standard date and time notation `_ - `W3C Date and Time Formats `_ -- `Time Formats (Planetary Rings Node) `_ +- `Time Formats (Planetary Rings Node) `_ - `CPAN ParseDate module `_ - `Java SimpleDateFormat Class diff --git a/dateutil/parser/isoparser.py b/dateutil/parser/isoparser.py index 89aa3ec..1550c3a 100644 --- a/dateutil/parser/isoparser.py +++ b/dateutil/parser/isoparser.py @@ -65,7 +65,7 @@ class isoparser(object): - ``YYYY`` - ``YYYY-MM`` or ``YYYYMM`` - - ``YYYY-MM-DD`` or `YYYYMMDD`` + - ``YYYY-MM-DD`` or ``YYYYMMDD`` Uncommon: @@ -79,7 +79,7 @@ class isoparser(object): - ``hh`` - ``hh:mm`` or ``hhmm`` - - ``hh:mm:ss`` or `hhmmss`` + - ``hh:mm:ss`` or ``hhmmss`` - ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits) Midnight is a special case for `hh`, as the standard supports both -- cgit v1.2.3 From 3b43612c35c7d0918ae96d132a698f7e19c7cdff Mon Sep 17 00:00:00 2001 From: Mateusz Dziedzic Date: Thu, 7 Dec 2017 15:14:37 +0000 Subject: Fix issue #427, all tests passing --- dateutil/parser/_parser.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'dateutil/parser') diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py index 2291f39..82be944 100644 --- a/dateutil/parser/_parser.py +++ b/dateutil/parser/_parser.py @@ -42,6 +42,8 @@ from io import StringIO import six from six import binary_type, integer_types, text_type +from decimal import Decimal + from .. import relativedelta from .. import tz @@ -873,7 +875,7 @@ class parser(object): def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): # Token is a number value_repr = tokens[idx] - value = float(value_repr) + value = Decimal(value_repr) len_li = len(value_repr) len_l = len(tokens) @@ -932,7 +934,7 @@ class parser(object): elif idx + 2 < len_l and tokens[idx + 1] == ':': # HH:MM[:SS[.ss]] res.hour = int(value) - value = float(tokens[idx + 2]) # TODO: try/except for this? + value = Decimal(tokens[idx + 2]) # TODO: try/except for this? (res.minute, res.second) = self._parse_min_sec(value) if idx + 4 < len_l and tokens[idx + 3] == ':': @@ -1032,7 +1034,9 @@ class parser(object): return hms_idx def _assign_hms(self, res, value_repr, hms): - value = float(value_repr) + # See GH issue #427, fixing float rounding + value = Decimal(value_repr) + if hms == 0: # Hour res.hour = int(value) -- cgit v1.2.3