Make tz.tzstr fail if an invalid GNU tz string is provided

author: Pablo Galindo Salgado <pablogsal@gmail.com> 2017-12-10 15:24:00 +0000
committer: Pablo Galindo Salgado <pablogsal@gmail.com> 2017-12-10 15:35:56 +0000
commit: 54341f74e4c4a8cdc3580aa0eb161e5e5ed95e4d (patch)
tree: 9b1fea82fde96317d88cad0fdbbfb76fee7bf728
parent: cab4149e318bbf7a9e54ced436ad1c80c7da0568 (diff)
download: dateutil-54341f74e4c4a8cdc3580aa0eb161e5e5ed95e4d.tar.gz
3 files changed, 59 insertions, 9 deletions
diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py
index 62ace3d..3708106 100644
--- a/dateutil/parser/_parser.py
+++ b/dateutil/parser/_parser.py
@@ -622,7 +622,7 @@ class parser(object):
     class _result(_resultbase):
         __slots__ = ["year", "month", "day", "weekday",
                      "hour", "minute", "second", "microsecond",
-                     "tzname", "tzoffset", "ampm"]
+                     "tzname", "tzoffset", "ampm","unused_tokens"]
 
     def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
                fuzzy_with_tokens=False):
@@ -1314,6 +1314,7 @@ class _tzparser(object):
     def parse(self, tzstr):
         res = self._result()
         l = _timelex.split(tzstr)
+        used_tokens = [False] * len(l)
         try:
 
             len_l = len(l)
@@ -1329,9 +1330,13 @@ class _tzparser(object):
                     if not res.stdabbr:
                         offattr = "stdoffset"
                         res.stdabbr = "".join(l[i:j])
+                        for ii in range(j):
+                            used_tokens[ii] = True
                     else:
                         offattr = "dstoffset"
                         res.dstabbr = "".join(l[i:j])
+                        for ii in range(j):
+                            used_tokens[ii] = True
                     i = j
                     if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
                                        "0123456789")):
@@ -1339,6 +1344,7 @@ class _tzparser(object):
                             # Yes, that's right.  See the TZ variable
                             # documentation.
                             signal = (1, -1)[l[i] == '+']
+                            used_tokens[i] = True
                             i += 1
                         else:
                             signal = -1
@@ -1352,6 +1358,7 @@ class _tzparser(object):
                             setattr(res, offattr,
                                     (int(l[i]) * 3600 +
                                      int(l[i + 2]) * 60) * signal)
+                            used_tokens[i] = True
                             i += 2
                         elif len_li <= 2:
                             # -[0]3
@@ -1359,12 +1366,14 @@ class _tzparser(object):
                                     int(l[i][:2]) * 3600 * signal)
                         else:
                             return None
+                        used_tokens[i] = True
                         i += 1
                     if res.dstabbr:
                         break
                 else:
                     break
 
+
             if i < len_l:
                 for j in range(i, len_l):
                     if l[j] == ';':
@@ -1385,21 +1394,26 @@ class _tzparser(object):
                     i += 2
                     if l[i] == '-':
                         value = int(l[i + 1]) * -1
+                        used_tokens[i] = True
                         i += 1
                     else:
                         value = int(l[i])
+                    used_tokens[i] = True
                     i += 2
                     if value:
                         x.week = value
                         x.weekday = (int(l[i]) - 1) % 7
                     else:
                         x.day = int(l[i])
+                    used_tokens[i] = True
                     i += 2
                     x.time = int(l[i])
+                    used_tokens[i] = True
                     i += 2
                 if i < len_l:
                     if l[i] in ('-', '+'):
                         signal = (-1, 1)[l[i] == "+"]
+                        used_tokens[i] = True
                         i += 1
                     else:
                         signal = 1
@@ -1411,29 +1425,37 @@ class _tzparser(object):
                 for x in (res.start, res.end):
                     if l[i] == 'J':
                         # non-leap year day (1 based)
+                        used_tokens[i] = True
                         i += 1
                         x.jyday = int(l[i])
                     elif l[i] == 'M':
                         # month[-.]week[-.]weekday
+                        used_tokens[i] = True
                         i += 1
                         x.month = int(l[i])
+                        used_tokens[i] = True
                         i += 1
                         assert l[i] in ('-', '.')
+                        used_tokens[i] = True
                         i += 1
                         x.week = int(l[i])
                         if x.week == 5:
                             x.week = -1
+                        used_tokens[i] = True
                         i += 1
                         assert l[i] in ('-', '.')
+                        used_tokens[i] = True
                         i += 1
                         x.weekday = (int(l[i]) - 1) % 7
                     else:
                         # year day (zero based)
                         x.yday = int(l[i]) + 1
 
+                    used_tokens[i] = True
                     i += 1
 
                     if i < len_l and l[i] == '/':
+                        used_tokens[i] = True
                         i += 1
                         # start time
                         len_li = len(l[i])
@@ -1444,8 +1466,10 @@ class _tzparser(object):
                         elif i + 1 < len_l and l[i + 1] == ':':
                             # -03:00
                             x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
+                            used_tokens[i] = True
                             i += 2
                             if i + 1 < len_l and l[i + 1] == ':':
+                                used_tokens[i] = True
                                 i += 2
                                 x.time += int(l[i])
                         elif len_li <= 2:
@@ -1453,6 +1477,7 @@ class _tzparser(object):
                             x.time = (int(l[i][:2]) * 3600)
                         else:
                             return None
+                        used_tokens[i] = True
                         i += 1
 
                     assert i == len_l or l[i] == ','
@@ -1464,6 +1489,7 @@ class _tzparser(object):
         except (IndexError, ValueError, AssertionError):
             return None
 
+        res.unused_tokens = not {token for token,is_used in zip(l,used_tokens) if not is_used}.issubset({",",":"})
         return res
 
 
diff --git a/dateutil/test/test_tz.py b/dateutil/test/test_tz.py
index a285e7d..1ef7d5c 100644
--- a/dateutil/test/test_tz.py
+++ b/dateutil/test/test_tz.py
@@ -1146,6 +1146,30 @@ class TZStrTest(unittest.TestCase, TzFoldMixin):
 
         return tz.tzstr(tzname_map[tzname])
 
+    def test_valid_GNU_time_zone(self):
+        # From https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html
+        # This test checks that tz.tzstr does not raise when parsing the strings.
+        tz.tzstr("EST+5EDT,M3.2.0/2,M11.1.0/2")
+        tz.tzstr("WART4WARST,J1,J365/25")
+        tz.tzstr("IST-2IDT,M3.4.4/26,M10.5.0")
+        tz.tzstr("WGT3WGST,M3.5.0/2,M10.5.0/1")
+
+    def test_invalid_GNU_time_zones(self):
+        with pytest.raises(ValueError):
+            tz.tzstr("EST5EDT,4,1,0,7200,10,-1,0,7200,3600")
+        with pytest.raises(ValueError):
+            tz.tzstr("hdfiughdfuig,dfughdfuigpu87ñ::")
+        with pytest.raises(ValueError):
+            tz.tzstr(",dfughdfuigpu87ñ::")
+        with pytest.raises(ValueError):
+            tz.tzstr("-1:WART4WARST,J1,J365/25")
+        with pytest.raises(ValueError):
+            tz.tzstr("WART4WARST,J1,J365/-25")
+        with pytest.raises(ValueError):
+            tz.tzstr("IST-2IDT,M3.4.-1/26,M10.5.0")
+        with pytest.raises(ValueError):
+            tz.tzstr("IST-2IDT,M3,2000,1/26,M10,5,0")
+
     def testStrStart1(self):
         self.assertEqual(datetime(2003, 4, 6, 1, 59,
                                   tzinfo=tz.tzstr("EST5EDT")).tzname(), "EST")
@@ -1161,14 +1185,14 @@ class TZStrTest(unittest.TestCase, TzFoldMixin):
         self.assertEqual(end.tzname(), "EST")
 
     def testStrStart2(self):
-        s = "EST5EDT,4,0,6,7200,10,0,26,7200,3600"
+        s = "EST5EDT,M4.1.0,M10.5.7"
         self.assertEqual(datetime(2003, 4, 6, 1, 59,
                                   tzinfo=tz.tzstr(s)).tzname(), "EST")
         self.assertEqual(datetime(2003, 4, 6, 2, 00,
                                   tzinfo=tz.tzstr(s)).tzname(), "EDT")
 
     def testStrEnd2(self):
-        s = "EST5EDT,4,0,6,7200,10,0,26,7200,3600"
+        s = "EST5EDT,M4.1.0,M10.5.7"
         self.assertEqual(datetime(2003, 10, 26, 0, 59,
                                   tzinfo=tz.tzstr(s)).tzname(), "EDT")
 
@@ -1177,14 +1201,14 @@ class TZStrTest(unittest.TestCase, TzFoldMixin):
         self.assertEqual(end.tzname(), "EST")
 
     def testStrStart3(self):
-        s = "EST5EDT,4,1,0,7200,10,-1,0,7200,3600"
+        s = "EST5EDT,M4.1.0,M10.5.7"
         self.assertEqual(datetime(2003, 4, 6, 1, 59,
                                   tzinfo=tz.tzstr(s)).tzname(), "EST")
         self.assertEqual(datetime(2003, 4, 6, 2, 00,
                                   tzinfo=tz.tzstr(s)).tzname(), "EDT")
 
     def testStrEnd3(self):
-        s = "EST5EDT,4,1,0,7200,10,-1,0,7200,3600"
+        s = "EST5EDT,M4.1.0,M10.5.7"
         self.assertEqual(datetime(2003, 10, 26, 0, 59,
                                   tzinfo=tz.tzstr(s)).tzname(), "EDT")
 
@@ -1208,14 +1232,14 @@ class TZStrTest(unittest.TestCase, TzFoldMixin):
         self.assertEqual(end.tzname(), "EST")
 
     def testStrStart5(self):
-        s = "EST5EDT4,95/02:00:00,298/02:00"
+        s = "EST5EDT,M4.1.0,M10.5.7"
         self.assertEqual(datetime(2003, 4, 6, 1, 59,
                                   tzinfo=tz.tzstr(s)).tzname(), "EST")
         self.assertEqual(datetime(2003, 4, 6, 2, 00,
                                   tzinfo=tz.tzstr(s)).tzname(), "EDT")
 
     def testStrEnd5(self):
-        s = "EST5EDT4,95/02:00:00,298/02"
+        s = "EST5EDT,M4.1.0,M10.5.7"
         self.assertEqual(datetime(2003, 10, 26, 0, 59,
                                   tzinfo=tz.tzstr(s)).tzname(), "EDT")
         end = tz.enfold(datetime(2003, 10, 26, 1, 00,
@@ -1253,7 +1277,7 @@ class TZStrTest(unittest.TestCase, TzFoldMixin):
     def testStrCmp2(self):
         # TODO: This is parsing the default arguments.
         self.assertEqual(tz.tzstr("EST5EDT"),
-                         tz.tzstr("EST5EDT,4,1,0,7200,10,-1,0,7200,3600"))
+                         tz.tzstr("EST5EDT,M4.1.0,M10.5.7"))
 
     def testStrInequality(self):
         TZS1 = tz.tzstr('EST5EDT4')
diff --git a/dateutil/tz/tz.py b/dateutil/tz/tz.py
index 39e19c7..44f58db 100644
--- a/dateutil/tz/tz.py
+++ b/dateutil/tz/tz.py
@@ -993,7 +993,7 @@ class tzstr(tzrange):
         self._s = s
 
         res = parser._parsetz(s)
-        if res is None:
+        if res is None or res.unused_tokens:
             raise ValueError("unknown string format")
 
         # Here we break the compatibility with the TZ variable handling.
author	Pablo Galindo Salgado <pablogsal@gmail.com>	2017-12-10 15:24:00 +0000
committer	Pablo Galindo Salgado <pablogsal@gmail.com>	2017-12-10 15:35:56 +0000
commit	54341f74e4c4a8cdc3580aa0eb161e5e5ed95e4d (patch)
tree	9b1fea82fde96317d88cad0fdbbfb76fee7bf728
parent	cab4149e318bbf7a9e54ced436ad1c80c7da0568 (diff)
download	dateutil-54341f74e4c4a8cdc3580aa0eb161e5e5ed95e4d.tar.gz