~bzr-pqm/bzr/bzr.dev : contents of common.py at revision 0.5.88

~bzr-pqm/bzr/bzr.dev : (revision 0.5.88)

#!/usr/bin/env python
"""\
Common entries, like strings, etc, for the changeset reading + writing code.
"""

import bzrlib

header_str = 'Bazaar-NG changeset v'
version = (0, 0, 5)

def get_header():
    return [
        header_str + '.'.join([str(v) for v in version]),
        ''
    ]

def canonicalize_revision(branch, revnos):
    """Turn some sort of revision information into a single
    set of from-to revision ids.

    A revision id can be None if there is no associated revison.

    :param revnos:  A list of revisions to lookup, should be at most 2 long
    :return: (old, new)
    """
    # If only 1 entry is given, then we assume we want just the
    # changeset between that entry and it's base (we assume parents[0])
    if len(revnos) == 0:
        revnos = [None, None]
    elif len(revnos) == 1:
        revnos = [None, revnos[0]]

    if revnos[1] is None:
        new = branch.last_patch()
    else:
        new = branch.lookup_revision(revnos[1])
    if revnos[0] is None:
        if new is None:
            old = None
        else:
            oldrev = branch.get_revision(new)
            if len(oldrev.parents) == 0:
                old = None
            else:
                old = oldrev.parents[0].revision_id
    else:
        old = branch.lookup_revision(revnos[0])

    return old, new

class ChangesetTree(object):
    """This class is designed to take a base tree, and re-create
    a final tree based on the information contained within a
    changeset.
    """

    def __init__(self, branch, changeset_info):
        """Initialize this ChangesetTree.

        :param branch:  This is where information will be acquired
                        and updated.
        :param changeset_info:  Information about a given changeset,
                                so that we can identify the base,
                                and other information.
        """
        self.branch = branch
        self.changeset_info = changeset_info

        self._build_tree()

    def _build_tree(self):
        """Build the final description of the tree, based on
        the changeset_info object.
        """
        self.base_tree = self.branch.revision_tree(self.changeset_info.base)
        
def guess_text_id(tree, file_id, rev_id, kind, modified=True):
    """This returns the estimated text_id for a given file.
    The idea is that in general the text_id should be the id last
    revision which modified the file.

    :param tree: This should be the base tree for a changeset, since that
                 is all the target has for guessing.
    :param file_id: The file id to guess the text_id for.
    :param rev_id: The target revision id
    :param modified: Was the file modified between base and target?
    """
    from bzrlib.errors import BzrError
    if kind == 'directory':
        return None
    if modified:
        # If the file was modified in an intermediate stage
        # (not in the final target), this won't be correct
        # but it is our best guess.
        # TODO: In the current code, text-ids are randomly generated
        # using the filename as the base. In the future they will
        # probably follow this format.
        return file_id + '-' + rev_id
    # The file was not actually modified in this changeset
    # so the text_id should be equal to it's previous value
    if not file_id in tree.inventory:
        raise BzrError('Unable to generate text_id for file_id {%s}'
            ', file does not exist in tree.' % file_id)
    # This is the last known text_id for this file
    # so assume that it is being used.
    return tree.inventory[file_id].text_id

def encode(s):
    """Take a unicode string, and make sure to escape it for
    use in a changeset.

    Note: It can be either a normal, or a unicode string

    >>> encode(u'abcdefg')
    'abcdefg'
    >>> encode(u'a b\\tc\\nd\\\\e')
    'a b\\tc\\nd\\\\e'
    >>> encode('a b\\tc\\nd\\e')
    'a b\\tc\\nd\\\\e'
    >>> encode(u'\\u1234\\u0020')
    '\\xe1\\x88\\xb4 '
    >>> encode('abcdefg')
    'abcdefg'
    >>> encode(u'')
    ''
    >>> encode('')
    ''
    """
    return s.encode('utf-8')

def decode(s):
    """Undo the encode operation, returning a unicode string.

    >>> decode('abcdefg')
    u'abcdefg'
    >>> decode('a b\\tc\\nd\\\\e')
    u'a b\\tc\\nd\\\\e'
    >>> decode('\\xe1\\x88\\xb4 ')
    u'\\u1234 '
    >>> for s in ('test', 'strings'):
    ...   if decode(encode(s)) != s:
    ...     print 'Failed: %r' % s # There should be no failures

    """
    return s.decode('utf-8')

def format_highres_date(t, offset=0):
    """Format a date, such that it includes higher precision in the
    seconds field.

    :param t:   The local time in fractional seconds since the epoch
    :type t: float
    :param offset:  The timezone offset in integer seconds
    :type offset: int

    Example: format_highres_date(time.time(), -time.timezone)
    this will return a date stamp for right now,
    formatted for the local timezone.

    >>> from bzrlib.osutils import format_date
    >>> format_date(1120153132.350850105, 0)
    'Thu 2005-06-30 17:38:52 +0000'
    >>> format_highres_date(1120153132.350850105, 0)
    'Thu 2005-06-30 17:38:52.350850105 +0000'
    >>> format_date(1120153132.350850105, -5*3600)
    'Thu 2005-06-30 12:38:52 -0500'
    >>> format_highres_date(1120153132.350850105, -5*3600)
    'Thu 2005-06-30 12:38:52.350850105 -0500'
    >>> format_highres_date(1120153132.350850105, 7200)
    'Thu 2005-06-30 19:38:52.350850105 +0200'
    """
    import time
    assert isinstance(t, float)
    
    # This has to be formatted for "original" date, so that the
    # revision XML entry will be reproduced faithfully.
    if offset == None:
        offset = 0
    tt = time.gmtime(t + offset)

    return (time.strftime("%a %Y-%m-%d %H:%M:%S", tt)
            + ('%.9f' % (t - int(t)))[1:] # Get the high-res seconds, but ignore the 0
            + ' %+03d%02d' % (offset / 3600, (offset / 60) % 60))

def unpack_highres_date(date):
    """This takes the high-resolution date stamp, and
    converts it back into the tuple (timestamp, timezone)
    Where timestamp is in real UTC since epoch seconds, and timezone is an integer
    number of seconds offset.

    :param date: A date formated by format_highres_date
    :type date: string

    >>> import time, random
    >>> unpack_highres_date('Thu 2005-06-30 12:38:52.350850105 -0500')
    (1120153132.3508501, -18000)
    >>> unpack_highres_date('Thu 2005-06-30 17:38:52.350850105 +0000')
    (1120153132.3508501, 0)
    >>> unpack_highres_date('Thu 2005-06-30 19:38:52.350850105 +0200')
    (1120153132.3508501, 7200)
    >>> from bzrlib.osutils import local_time_offset
    >>> t = time.time()
    >>> o = local_time_offset()
    >>> t2, o2 = unpack_highres_date(format_highres_date(t, o))
    >>> t == t2
    True
    >>> o == o2
    True
    >>> for count in xrange(500):
    ...   t += random.random()*24*3600*365*2 - 24*3600*364 # Random time within +/- 1 year
    ...   o = random.randint(-12,12)*3600 # Random timezone
    ...   date = format_highres_date(t, o)
    ...   t2, o2 = unpack_highres_date(date)
    ...   if t != t2 or o != o2:
    ...      print 'Failed on date %r, %s,%s diff:%s' % (date, t, o, t2-t)
    ...      break

    """
    import time, calendar
    # Up until the first period is a datestamp that is generated
    # as normal from time.strftime, so use time.strptime to
    # parse it
    dot_loc = date.find('.')
    if dot_loc == -1:
        raise ValueError('Date string does not contain high-precision seconds: %r' % date)
    base_time = time.strptime(date[:dot_loc], "%a %Y-%m-%d %H:%M:%S")
    fract_seconds, offset = date[dot_loc:].split()
    fract_seconds = float(fract_seconds)
    offset = int(offset)
    offset = int(offset / 100) * 3600 + offset % 100
    
    # time.mktime returns localtime, but calendar.timegm returns UTC time
    timestamp = calendar.timegm(base_time)
    timestamp -= offset
    # Add back in the fractional seconds
    timestamp += fract_seconds
    return (timestamp, offset)

if __name__ == '__main__':
    import doctest
    doctest.testmod()


0.5.7 by John Arbash Meinel Added a bunch more information about changesets. Can now read back in all of the meta information.	1	#!/usr/bin/env python
	2	"""\
	3	Common entries, like strings, etc, for the changeset reading + writing code.
	4	"""
	5
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	6	import bzrlib
	7
0.5.57 by John Arbash Meinel Simplified the header, only output base if it is not the expected one.	8	header_str = 'Bazaar-NG changeset v'
0.5.7 by John Arbash Meinel Added a bunch more information about changesets. Can now read back in all of the meta information.	9	version = (0, 0, 5)
	10
	11	def get_header():
	12	return [
	13	header_str + '.'.join([str(v) for v in version]),
	14	''
	15	]
	16
0.5.36 by John Arbash Meinel Updated so that read_changeset is able to parse the output	17	def canonicalize_revision(branch, revnos):
	18	"""Turn some sort of revision information into a single
	19	set of from-to revision ids.
	20
	21	A revision id can be None if there is no associated revison.
	22
	23	:param revnos: A list of revisions to lookup, should be at most 2 long
	24	:return: (old, new)
	25	"""
	26	# If only 1 entry is given, then we assume we want just the
	27	# changeset between that entry and it's base (we assume parents[0])
	28	if len(revnos) == 0:
	29	revnos = [None, None]
	30	elif len(revnos) == 1:
	31	revnos = [None, revnos[0]]
	32
	33	if revnos[1] is None:
	34	new = branch.last_patch()
	35	else:
	36	new = branch.lookup_revision(revnos[1])
	37	if revnos[0] is None:
0.5.58 by John Arbash Meinel Fixed a bug in the case that there are no revision committed yet.	38	if new is None:
	39	old = None
	40	else:
0.5.59 by John Arbash Meinel Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)	41	oldrev = branch.get_revision(new)
	42	if len(oldrev.parents) == 0:
	43	old = None
	44	else:
	45	old = oldrev.parents[0].revision_id
0.5.36 by John Arbash Meinel Updated so that read_changeset is able to parse the output	46	else:
	47	old = branch.lookup_revision(revnos[0])
	48
	49	return old, new
	50
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	51	class ChangesetTree(object):
	52	"""This class is designed to take a base tree, and re-create
	53	a final tree based on the information contained within a
	54	changeset.
	55	"""
	56
	57	def __init__(self, branch, changeset_info):
	58	"""Initialize this ChangesetTree.
	59
	60	:param branch: This is where information will be acquired
	61	and updated.
	62	:param changeset_info: Information about a given changeset,
	63	so that we can identify the base,
	64	and other information.
	65	"""
	66	self.branch = branch
	67	self.changeset_info = changeset_info
	68
	69	self._build_tree()
	70
	71	def _build_tree(self):
	72	"""Build the final description of the tree, based on
	73	the changeset_info object.
	74	"""
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	75	self.base_tree = self.branch.revision_tree(self.changeset_info.base)
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	76
0.5.81 by John Arbash Meinel Cleaning up from pychecker.	77	def guess_text_id(tree, file_id, rev_id, kind, modified=True):
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	78	"""This returns the estimated text_id for a given file.
	79	The idea is that in general the text_id should be the id last
	80	revision which modified the file.
	81
	82	:param tree: This should be the base tree for a changeset, since that
	83	is all the target has for guessing.
	84	:param file_id: The file id to guess the text_id for.
	85	:param rev_id: The target revision id
	86	:param modified: Was the file modified between base and target?
	87	"""
	88	from bzrlib.errors import BzrError
0.5.81 by John Arbash Meinel Cleaning up from pychecker.	89	if kind == 'directory':
0.5.81 by John Arbash Meinel Cleaning up from pychecker.	90	return None
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	91	if modified:
	92	# If the file was modified in an intermediate stage
	93	# (not in the final target), this won't be correct
	94	# but it is our best guess.
	95	# TODO: In the current code, text-ids are randomly generated
	96	# using the filename as the base. In the future they will
	97	# probably follow this format.
	98	return file_id + '-' + rev_id
	99	# The file was not actually modified in this changeset
	100	# so the text_id should be equal to it's previous value
	101	if not file_id in tree.inventory:
	102	raise BzrError('Unable to generate text_id for file_id {%s}'
	103	', file does not exist in tree.' % file_id)
	104	# This is the last known text_id for this file
	105	# so assume that it is being used.
0.5.81 by John Arbash Meinel Cleaning up from pychecker.	106	return tree.inventory[file_id].text_id
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	107
	108	def encode(s):
	109	"""Take a unicode string, and make sure to escape it for
	110	use in a changeset.
	111
	112	Note: It can be either a normal, or a unicode string
	113
	114	>>> encode(u'abcdefg')
	115	'abcdefg'
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	116	>>> encode(u'a b\\tc\\nd\\\\e')
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	117	'a b\\tc\\nd\\\\e'
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	118	>>> encode('a b\\tc\\nd\\e')
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	119	'a b\\tc\\nd\\\\e'
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	120	>>> encode(u'\\u1234\\u0020')
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	121	'\\xe1\\x88\\xb4 '
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	122	>>> encode('abcdefg')
	123	'abcdefg'
	124	>>> encode(u'')
	125	''
	126	>>> encode('')
	127	''
	128	"""
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	129	return s.encode('utf-8')
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	130
	131	def decode(s):
	132	"""Undo the encode operation, returning a unicode string.
	133
	134	>>> decode('abcdefg')
	135	u'abcdefg'
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	136	>>> decode('a b\\tc\\nd\\\\e')
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	137	u'a b\\tc\\nd\\\\e'
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	138	>>> decode('\\xe1\\x88\\xb4 ')
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	139	u'\\u1234 '
	140	>>> for s in ('test', 'strings'):
	141	... if decode(encode(s)) != s:
	142	... print 'Failed: %r' % s # There should be no failures
	143
	144	"""
0.5.87 by John Arbash Meinel Handling international characters, added more test cases.	145	return s.decode('utf-8')
0.5.55 by John Arbash Meinel Lots of updates. Using a minimized annotations for changesets.	146
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	147	def format_highres_date(t, offset=0):
	148	"""Format a date, such that it includes higher precision in the
	149	seconds field.
	150
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	151	:param t: The local time in fractional seconds since the epoch
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	152	:type t: float
	153	:param offset: The timezone offset in integer seconds
	154	:type offset: int
	155
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	156	Example: format_highres_date(time.time(), -time.timezone)
	157	this will return a date stamp for right now,
	158	formatted for the local timezone.
	159
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	160	>>> from bzrlib.osutils import format_date
	161	>>> format_date(1120153132.350850105, 0)
	162	'Thu 2005-06-30 17:38:52 +0000'
	163	>>> format_highres_date(1120153132.350850105, 0)
	164	'Thu 2005-06-30 17:38:52.350850105 +0000'
	165	>>> format_date(1120153132.350850105, -5*3600)
	166	'Thu 2005-06-30 12:38:52 -0500'
	167	>>> format_highres_date(1120153132.350850105, -5*3600)
	168	'Thu 2005-06-30 12:38:52.350850105 -0500'
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	169	>>> format_highres_date(1120153132.350850105, 7200)
	170	'Thu 2005-06-30 19:38:52.350850105 +0200'
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	171	"""
	172	import time
	173	assert isinstance(t, float)
	174
	175	# This has to be formatted for "original" date, so that the
	176	# revision XML entry will be reproduced faithfully.
	177	if offset == None:
	178	offset = 0
	179	tt = time.gmtime(t + offset)
	180
	181	return (time.strftime("%a %Y-%m-%d %H:%M:%S", tt)
	182	+ ('%.9f' % (t - int(t)))[1:] # Get the high-res seconds, but ignore the 0
	183	+ ' %+03d%02d' % (offset / 3600, (offset / 60) % 60))
	184
	185	def unpack_highres_date(date):
	186	"""This takes the high-resolution date stamp, and
	187	converts it back into the tuple (timestamp, timezone)
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	188	Where timestamp is in real UTC since epoch seconds, and timezone is an integer
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	189	number of seconds offset.
	190
	191	:param date: A date formated by format_highres_date
	192	:type date: string
	193
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	194	>>> import time, random
	195	>>> unpack_highres_date('Thu 2005-06-30 12:38:52.350850105 -0500')
	196	(1120153132.3508501, -18000)
	197	>>> unpack_highres_date('Thu 2005-06-30 17:38:52.350850105 +0000')
	198	(1120153132.3508501, 0)
	199	>>> unpack_highres_date('Thu 2005-06-30 19:38:52.350850105 +0200')
	200	(1120153132.3508501, 7200)
	201	>>> from bzrlib.osutils import local_time_offset
	202	>>> t = time.time()
	203	>>> o = local_time_offset()
	204	>>> t2, o2 = unpack_highres_date(format_highres_date(t, o))
	205	>>> t == t2
	206	True
	207	>>> o == o2
	208	True
	209	>>> for count in xrange(500):
	210	... t += random.random()2436003652 - 243600364 # Random time within +/- 1 year
	211	... o = random.randint(-12,12)*3600 # Random timezone
	212	... date = format_highres_date(t, o)
	213	... t2, o2 = unpack_highres_date(date)
	214	... if t != t2 or o != o2:
	215	... print 'Failed on date %r, %s,%s diff:%s' % (date, t, o, t2-t)
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	216	... break
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	217
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	218	"""
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	219	import time, calendar
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	220	# Up until the first period is a datestamp that is generated
	221	# as normal from time.strftime, so use time.strptime to
	222	# parse it
	223	dot_loc = date.find('.')
	224	if dot_loc == -1:
	225	raise ValueError('Date string does not contain high-precision seconds: %r' % date)
	226	base_time = time.strptime(date[:dot_loc], "%a %Y-%m-%d %H:%M:%S")
	227	fract_seconds, offset = date[dot_loc:].split()
	228	fract_seconds = float(fract_seconds)
	229	offset = int(offset)
	230	offset = int(offset / 100) * 3600 + offset % 100
	231
0.5.79 by John Arbash Meinel Added common to the set of tests, fixed a problem with time conversions.	232	# time.mktime returns localtime, but calendar.timegm returns UTC time
	233	timestamp = calendar.timegm(base_time)
	234	timestamp -= offset
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	235	# Add back in the fractional seconds
	236	timestamp += fract_seconds
	237	return (timestamp, offset)
0.5.39 by John Arbash Meinel (broken) Working on changing the processing to use a ChangesetTree.	238
	239	if __name__ == '__main__':
	240	import doctest
	241	doctest.testmod()
0.5.40 by John Arbash Meinel Added some highres formatting of datestamps.	242