1
# -*- coding:iso-8859-1 -*-
3
Copyright (c) 2003 Gustavo Niemeyer <niemeyer@conectiva.com>
5
This module offers extensions to the standard python 2.3+
8
__author__ = "Gustavo Niemeyer <niemeyer@conectiva.com>"
9
__license__ = "PSF License"
20
__all__ = ["parse", "parserinfo"]
24
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html
25
# http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html
26
# http://www.w3.org/TR/NOTE-datetime
27
# http://ringmaster.arc.nasa.gov/tools/time_formats.html
28
# http://search.cpan.org/author/MUIR/Time-modules-2003.0211/lib/Time/ParseDate.pm
29
# http://stein.cshl.org/jade/distrib/docs/java.text.SimpleDateFormat.html
32
from cStringIO import StringIO
34
from StringIO import StringIO
37
def __init__(self, instream):
38
if isinstance(instream, basestring):
39
instream = StringIO(instream)
40
self.instream = instream
41
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
42
'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
43
'��������������������������������'
44
'������������������������������')
45
self.numchars = '0123456789'
46
self.whitespace = ' \t\r\n'
53
return self.tokenstack.pop(0)
57
wordchars = self.wordchars
58
numchars = self.numchars
59
whitespace = self.whitespace
62
nextchar = self.charstack.pop(0)
64
nextchar = self.instream.read(1)
70
if nextchar in wordchars:
72
elif nextchar in numchars:
74
elif nextchar in whitespace:
81
if nextchar in wordchars:
87
self.charstack.append(nextchar)
90
if nextchar in numchars:
96
self.charstack.append(nextchar)
100
if nextchar == '.' or nextchar in wordchars:
102
elif nextchar in numchars and token[-1] == '.':
106
self.charstack.append(nextchar)
109
if nextchar == '.' or nextchar in numchars:
111
elif nextchar in wordchars and token[-1] == '.':
115
self.charstack.append(nextchar)
117
if (state in ('a.', '0.') and
118
(seenletters or token.count('.') > 1 or token[-1] == '.')):
122
self.tokenstack.append('.')
124
self.tokenstack.append(tok)
131
token = self.get_token()
138
split = classmethod(split)
140
class _resultbase(object):
143
for attr in self.__slots__:
144
setattr(self, attr, None)
146
def _repr(self, classname):
148
for attr in self.__slots__:
149
value = getattr(self, attr)
150
if value is not None:
151
l.append("%s=%s" % (attr, `value`))
152
return "%s(%s)" % (classname, ", ".join(l))
155
return self._repr(self.__class__.__name__)
159
# m from a.m/p.m, t from ISO T separator
160
JUMP = [" ", ".", ",", ";", "-", "/", "'",
161
"at", "on", "and", "ad", "m", "t", "of",
162
"st", "nd", "rd", "th"]
164
WEEKDAYS = [("Mon", "Monday"),
166
("Wed", "Wednesday"),
171
MONTHS = [("Jan", "January"),
179
("Sep", "September"),
183
HMS = [("h", "hour", "hours"),
184
("m", "minute", "minutes"),
185
("s", "second", "seconds")]
188
UTCZONE = ["UTC", "GMT", "Z"]
192
def __init__(self, dayfirst=False, yearfirst=False):
193
self._jump = self._convert(self.JUMP)
194
self._weekdays = self._convert(self.WEEKDAYS)
195
self._months = self._convert(self.MONTHS)
196
self._hms = self._convert(self.HMS)
197
self._ampm = self._convert(self.AMPM)
198
self._utczone = self._convert(self.UTCZONE)
199
self._pertain = self._convert(self.PERTAIN)
201
self.dayfirst = dayfirst
202
self.yearfirst = yearfirst
204
self._year = time.localtime().tm_year
205
self._century = self._year/100*100
207
def _convert(self, lst):
209
for i in range(len(lst)):
211
if isinstance(v, tuple):
218
def jump(self, name):
219
return name.lower() in self._jump
221
def weekday(self, name):
224
return self._weekdays[name.lower()]
229
def month(self, name):
232
return self._months[name.lower()]+1
239
return self._hms[name.lower()]
243
def ampm(self, name):
245
return self._ampm[name.lower()]
249
def pertain(self, name):
250
return name.lower() in self._pertain
252
def utczone(self, name):
253
return name.lower() in self._utczone
255
def tzoffset(self, name):
256
if name in self._utczone:
258
return self.TZOFFSET.get(name)
260
def convertyear(self, year):
262
year += self._century
263
if abs(year-self._year) >= 50:
264
if year < self._year:
270
def validate(self, res):
273
res.year = self.convertyear(res.year)
274
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
277
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
284
def __init__(self, parserinfo=parserinfo):
285
self.info = parserinfo()
287
def parse(self, timestr, default=None,
288
ignoretz=False, tzinfos=None,
291
default = datetime.datetime.now().replace(hour=0, minute=0,
292
second=0, microsecond=0)
293
res = self._parse(timestr, **kwargs)
295
raise ValueError, "unknown string format"
297
for attr in ["year", "month", "day", "hour",
298
"minute", "second", "microsecond"]:
299
value = getattr(res, attr)
300
if value is not None:
302
ret = default.replace(**repl)
303
if res.weekday is not None and not res.day:
304
ret = ret+relativedelta.relativedelta(weekday=res.weekday)
306
if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
307
if callable(tzinfos):
308
tzdata = tzinfos(res.tzname, res.tzoffset)
310
tzdata = tzinfos.get(res.tzname)
311
if isinstance(tzdata, datetime.tzinfo):
313
elif isinstance(tzdata, basestring):
314
tzinfo = tz.tzstr(tzdata)
315
elif isinstance(tzdata, int):
316
tzinfo = tz.tzoffset(res.tzname, tzdata)
318
raise ValueError, "offset must be tzinfo subclass, " \
319
"tz string, or int offset"
320
ret = ret.replace(tzinfo=tzinfo)
321
elif res.tzname and res.tzname in time.tzname:
322
ret = ret.replace(tzinfo=tz.tzlocal())
323
elif res.tzoffset == 0:
324
ret = ret.replace(tzinfo=tz.tzutc())
326
ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
329
class _result(_resultbase):
330
__slots__ = ["year", "month", "day", "weekday",
331
"hour", "minute", "second", "microsecond",
332
"tzname", "tzoffset"]
334
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False):
337
dayfirst = info.dayfirst
338
if yearfirst is None:
339
yearfirst = info.yearfirst
341
l = _timelex.split(timestr)
344
# year/month/day list
347
# Index of the month string in ymd
354
# Check if it's a number
359
if value is not None:
363
if (len(ymd) == 3 and len_li in (2, 4)
364
and (i >= len_l or l[i] != ':')):
367
res.hour = int(s[:2])
369
res.minute = int(s[2:])
370
elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
371
# YYMMDD or HHMMSS[.ss]
373
if not ymd and l[i-1].find('.') == -1:
374
ymd.append(info.convertyear(int(s[:2])))
375
ymd.append(int(s[2:4]))
376
ymd.append(int(s[4:]))
378
# 19990101T235959[.59]
379
res.hour = int(s[:2])
380
res.minute = int(s[2:4])
382
res.second = int(value)
384
res.microsecond = int(1000000*(value%1))
388
ymd.append(int(s[:4]))
389
ymd.append(int(s[4:6]))
390
ymd.append(int(s[6:]))
391
elif len_li in (12, 14):
394
ymd.append(int(s[:4]))
395
ymd.append(int(s[4:6]))
396
ymd.append(int(s[6:8]))
397
res.hour = int(s[8:10])
398
res.minute = int(s[10:12])
400
res.second = int(s[12:])
401
elif ((i < len_l and info.hms(l[i]) is not None) or
402
(i+1 < len_l and l[i] == ' ' and
403
info.hms(l[i+1]) is not None)):
404
# HH[ ]h or MM[ ]m or SS[.ss][ ]s
410
res.hour = int(value)
412
res.minute = int(60*(value%1))
414
res.minute = int(value)
416
res.second = int(60*(value%1))
418
res.second = int(value)
420
res.microsecond = int(1000000*(value%1))
422
if i >= len_l or idx == 2:
433
newidx = info.hms(l[i])
434
if newidx is not None:
436
elif i+1 < len_l and l[i] == ':':
438
res.hour = int(value)
441
res.minute = int(value)
443
res.second = int(60*(value%1))
445
if i < len_l and l[i] == ':':
446
value = float(l[i+1])
447
res.second = int(value)
449
res.microsecond = int(1000000*(value%1))
451
elif i < len_l and l[i] in ('-', '/', '.'):
453
ymd.append(int(value))
455
if i < len_l and not info.jump(l[i]):
458
ymd.append(int(l[i]))
461
value = info.month(l[i])
462
if value is not None:
469
if i < len_l and l[i] == sep:
470
# We have three members
472
value = info.month(l[i])
473
if value is not None:
478
ymd.append(int(l[i]))
480
elif i >= len_l or info.jump(l[i]):
481
if i+1 < len_l and info.ampm(l[i+1]) is not None:
483
res.hour = int(value)
484
if res.hour < 12 and info.ampm(l[i+1]) == 1:
486
elif res.hour == 12 and info.ampm(l[i+1]) == 0:
491
ymd.append(int(value))
493
elif info.ampm(l[i]) is not None:
495
res.hour = int(value)
496
if res.hour < 12 and info.ampm(l[i]) == 1:
498
elif res.hour == 12 and info.ampm(l[i]) == 0:
508
value = info.weekday(l[i])
509
if value is not None:
515
value = info.month(l[i])
516
if value is not None:
522
if l[i] in ('-', '/'):
526
ymd.append(int(l[i]))
528
if i < len_l and l[i] == sep:
531
ymd.append(int(l[i]))
533
elif (i+3 < len_l and l[i] == l[i+2] == ' '
534
and info.pertain(l[i+1])):
536
# In this case, 01 is clearly year
543
# Convert it here to become unambiguous
544
ymd.append(info.convertyear(value))
549
value = info.ampm(l[i])
550
if value is not None:
551
if value == 1 and res.hour < 12:
553
elif value == 0 and res.hour == 12:
558
# Check for a timezone name
559
if (res.hour is not None and len(l[i]) <= 5 and
560
res.tzname is None and res.tzoffset is None and
561
not [x for x in l[i] if x not in string.ascii_uppercase]):
563
res.tzoffset = info.tzoffset(res.tzname)
566
# Check for something like GMT+3, or BRST+3. Notice
567
# that it doesn't mean "I am 3 hours after GMT", but
568
# "my time +3 is GMT". If found, we reverse the
569
# logic so that timezone parsing code will get it
571
if i < len_l and l[i] in ('+', '-'):
572
l[i] = ('+', '-')[l[i] == '+']
574
if info.utczone(res.tzname):
575
# With something like GMT+3, the timezone
581
# Check for a numbered timezone
582
if res.hour is not None and l[i] in ('+', '-'):
583
signal = (-1,1)[l[i] == '+']
588
res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
589
elif i+1 < len_l and l[i+1] == ':':
591
res.tzoffset = int(l[i])*3600+int(l[i+2])*60
595
res.tzoffset = int(l[i][:2])*3600
599
res.tzoffset *= signal
601
# Look for a timezone name between parenthesis
603
info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
604
3 <= len(l[i+2]) <= 5 and
605
not [x for x in l[i+2]
606
if x not in string.ascii_uppercase]):
613
if not (info.jump(l[i]) or fuzzy):
618
# Process year/month/day
621
# More than three members!?
623
elif len_ymd == 1 or (mstridx != -1 and len_ymd == 2):
624
# One member, or two members with a month string
626
res.month = ymd[mstridx]
628
if len_ymd > 1 or mstridx == -1:
634
# Two members with numbers
637
res.year, res.month = ymd
640
res.month, res.year = ymd
641
elif dayfirst and ymd[1] <= 12:
643
res.day, res.month = ymd
646
res.month, res.day = ymd
650
res.month, res.day, res.year = ymd
652
if ymd[0] > 31 or (yearfirst and ymd[2] <= 31):
654
res.year, res.month, res.day = ymd
657
# Give precendence to day-first, since
658
# two-digit years is usually hand-written.
659
res.day, res.month, res.year = ymd
664
res.day, res.year, res.month = ymd
667
res.year, res.day, res.month = ymd
670
(yearfirst and ymd[1] <= 12 and ymd[2] <= 31):
672
res.year, res.month, res.day = ymd
673
elif ymd[0] > 12 or (dayfirst and ymd[1] <= 12):
675
res.day, res.month, res.year = ymd
678
res.month, res.day, res.year = ymd
680
except (IndexError, ValueError, AssertionError):
683
if not info.validate(res):
687
DEFAULTPARSER = parser()
688
def parse(timestr, parserinfo=None, **kwargs):
690
return parser(parserinfo).parse(timestr, **kwargs)
692
return DEFAULTPARSER.parse(timestr, **kwargs)
696
class _result(_resultbase):
698
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
701
class _attr(_resultbase):
702
__slots__ = ["month", "week", "weekday",
703
"yday", "jyday", "day", "time"]
706
return self._repr("")
709
_resultbase.__init__(self)
710
self.start = self._attr()
711
self.end = self._attr()
713
def parse(self, tzstr):
715
l = _timelex.split(tzstr)
724
while j < len_l and not [x for x in l[j]
725
if x in "0123456789:,-+"]:
729
offattr = "stdoffset"
730
res.stdabbr = "".join(l[i:j])
732
offattr = "dstoffset"
733
res.dstabbr = "".join(l[i:j])
736
(l[i] in ('+', '-') or l[i][0] in "0123456789")):
737
if l[i] in ('+', '-'):
738
signal = (1,-1)[l[i] == '+']
745
setattr(res, offattr,
746
(int(l[i][:2])*3600+int(l[i][2:])*60)*signal)
747
elif i+1 < len_l and l[i+1] == ':':
749
setattr(res, offattr,
750
(int(l[i])*3600+int(l[i+2])*60)*signal)
754
setattr(res, offattr,
755
int(l[i][:2])*3600*signal)
765
for j in range(i, len_l):
766
if l[j] == ';': l[j] = ','
774
elif (8 <= l.count(',') <= 9 and
775
not [y for x in l[i:] if x != ','
776
for y in x if y not in "0123456789"]):
777
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
778
for x in (res.start, res.end):
782
value = int(l[i+1])*-1
789
x.weekday = (int(l[i])-1)%7
796
if l[i] in ('-','+'):
797
signal = (-1,1)[l[i] == "+"]
801
res.dstoffset = (res.stdoffset+int(l[i]))*signal
802
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
803
not [y for x in l[i:] if x not in (',','/','J','M',
805
for y in x if y not in "0123456789"]):
806
for x in (res.start, res.end):
808
# non-leap year day (1 based)
812
# month[-.]week[-.]weekday
816
assert l[i] in ('-', '.')
822
assert l[i] in ('-', '.')
824
x.weekday = (int(l[i])-1)%7
826
# year day (zero based)
831
if i < len_l and l[i] == '/':
837
x.time = (int(l[i][:2])*3600+int(l[i][2:])*60)
838
elif i+1 < len_l and l[i+1] == ':':
840
x.time = int(l[i])*3600+int(l[i+2])*60
842
if i+1 < len_l and l[i+1] == ':':
847
x.time = (int(l[i][:2])*3600)
852
assert i == len_l or l[i] == ','
858
except (IndexError, ValueError, AssertionError):
863
DEFAULTTZPARSER = _tzparser()
865
return DEFAULTTZPARSER.parse(tzstr)