~bzr-pqm/bzr/bzr.dev : contents of bzrlib/generate

~bzr-pqm/bzr/bzr.dev : (revision 2495.4.4)

# Copyright (C) 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Common code for generating file or revision ids."""

from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
import time
import unicodedata

from bzrlib import (
    config,
    errors,
    osutils,
    )
""")

from bzrlib import (
    lazy_regex,
    )

# the regex removes any weird characters; we don't escape them 
# but rather just pull them out
_file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]')
_rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]')
_gen_file_id_suffix = None
_gen_file_id_serial = 0


def _next_id_suffix():
    """Create a new file id suffix that is reasonably unique.
    
    On the first call we combine the current time with 64 bits of randomness to
    give a highly probably globally unique number. Then each call in the same
    process adds 1 to a serial number we append to that unique value.
    """
    # XXX TODO: change bzrlib.add.smart_add to call workingtree.add() rather 
    # than having to move the id randomness out of the inner loop like this.
    # XXX TODO: for the global randomness this uses we should add the thread-id
    # before the serial #.
    # XXX TODO: jam 20061102 I think it would be good to reset every 100 or
    #           1000 calls, or perhaps if time.time() increases by a certain
    #           amount. time.time() shouldn't be terribly expensive to call,
    #           and it means that long-lived processes wouldn't use the same
    #           suffix forever.
    global _gen_file_id_suffix, _gen_file_id_serial
    if _gen_file_id_suffix is None:
        _gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()),
                                           osutils.rand_chars(16))
    _gen_file_id_serial += 1
    return _gen_file_id_suffix + str(_gen_file_id_serial)


def gen_file_id(name):
    """Return new file id for the basename 'name'.

    The uniqueness is supplied from _next_id_suffix.
    """
    # The real randomness is in the _next_id_suffix, the
    # rest of the identifier is just to be nice.
    # So we:
    # 1) Remove non-ascii word characters to keep the ids portable
    # 2) squash to lowercase, so the file id doesn't have to
    #    be escaped (case insensitive filesystems would bork for ids
    #    that only differ in case without escaping).
    # 3) truncate the filename to 20 chars. Long filenames also bork on some
    #    filesystems
    # 4) Removing starting '.' characters to prevent the file ids from
    #    being considered hidden.
    ascii_word_only = str(_file_id_chars_re.sub('', name.lower()))
    short_no_dots = ascii_word_only.lstrip('.')[:20]
    return short_no_dots + _next_id_suffix()


def gen_root_id():
    """Return a new tree-root file id."""
    return gen_file_id('tree_root')


def gen_revision_id(username, timestamp=None):
    """Return new revision-id.

    :param username: This is the value returned by config.username(), which is
        typically a real name, followed by an email address. If found, we will
        use just the email address portion. Otherwise we flatten the real name,
        and use that.
    :return: A new revision id.
    """
    try:
        user_or_email = config.extract_email_address(username)
    except errors.NoEmailInUsername:
        user_or_email = username

    user_or_email = user_or_email.lower()
    user_or_email = user_or_email.replace(' ', '_')
    user_or_email = _rev_id_chars_re.sub('', user_or_email)

    # This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
    unique_chunk = osutils.rand_chars(16)

    if timestamp is None:
        timestamp = time.time()

    rev_id = u'-'.join((user_or_email,
                        osutils.compact_date(timestamp),
                        unique_chunk))
    return rev_id.encode('utf8')

2116.4.1 by John Arbash Meinel Update file and revision id generators.	1	# Copyright (C) 2006 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	"""Common code for generating file or revision ids."""
	18
	19	from bzrlib.lazy_import import lazy_import
	20	lazy_import(globals(), """
	21	import time
	22	import unicodedata
	23
	24	from bzrlib import (
	25	config,
	26	errors,
	27	osutils,
	28	)
	29	""")
	30
	31	from bzrlib import (
	32	lazy_regex,
	33	)
	34
	35	# the regex removes any weird characters; we don't escape them
	36	# but rather just pull them out
	37	_file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]')
	38	_rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]')
	39	_gen_file_id_suffix = None
	40	_gen_file_id_serial = 0
	41
	42
	43	def _next_id_suffix():
	44	"""Create a new file id suffix that is reasonably unique.
	45
	46	On the first call we combine the current time with 64 bits of randomness to
	47	give a highly probably globally unique number. Then each call in the same
	48	process adds 1 to a serial number we append to that unique value.
	49	"""
	50	# XXX TODO: change bzrlib.add.smart_add to call workingtree.add() rather
	51	# than having to move the id randomness out of the inner loop like this.
	52	# XXX TODO: for the global randomness this uses we should add the thread-id
	53	# before the serial #.
	54	# XXX TODO: jam 20061102 I think it would be good to reset every 100 or
	55	# 1000 calls, or perhaps if time.time() increases by a certain
	56	# amount. time.time() shouldn't be terribly expensive to call,
	57	# and it means that long-lived processes wouldn't use the same
	58	# suffix forever.
	59	global _gen_file_id_suffix, _gen_file_id_serial
	60	if _gen_file_id_suffix is None:
	61	_gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()),
	62	osutils.rand_chars(16))
	63	_gen_file_id_serial += 1
	64	return _gen_file_id_suffix + str(_gen_file_id_serial)
65
66
67	def gen_file_id(name):
68	"""Return new file id for the basename 'name'.
69
70	The uniqueness is supplied from _next_id_suffix.
71	"""
72	# The real randomness is in the _next_id_suffix, the
73	# rest of the identifier is just to be nice.
74	# So we:
75	# 1) Remove non-ascii word characters to keep the ids portable
76	# 2) squash to lowercase, so the file id doesn't have to
77	# be escaped (case insensitive filesystems would bork for ids
78	# that only differ in case without escaping).
79	# 3) truncate the filename to 20 chars. Long filenames also bork on some
80	# filesystems
81	# 4) Removing starting '.' characters to prevent the file ids from
82	# being considered hidden.
2294.1.10 by John Arbash Meinel Switch all apis over to utf8 file ids. All tests pass	83	ascii_word_only = str(_file_id_chars_re.sub('', name.lower()))
2116.4.1 by John Arbash Meinel Update file and revision id generators.	84	short_no_dots = ascii_word_only.lstrip('.')[:20]
	85	return short_no_dots + _next_id_suffix()
	86
	87
	88	def gen_root_id():
	89	"""Return a new tree-root file id."""
	90	return gen_file_id('tree_root')
	91
	92
	93	def gen_revision_id(username, timestamp=None):
	94	"""Return new revision-id.
	95
	96	:param username: This is the value returned by config.username(), which is
	97	typically a real name, followed by an email address. If found, we will
	98	use just the email address portion. Otherwise we flatten the real name,
	99	and use that.
	100	:return: A new revision id.
	101	"""
	102	try:
	103	user_or_email = config.extract_email_address(username)
	104	except errors.NoEmailInUsername:
	105	user_or_email = username
	106
	107	user_or_email = user_or_email.lower()
	108	user_or_email = user_or_email.replace(' ', '_')
	109	user_or_email = _rev_id_chars_re.sub('', user_or_email)
	110
	111	# This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
	112	unique_chunk = osutils.rand_chars(16)
	113
	114	if timestamp is None:
	115	timestamp = time.time()
	116
2249.5.13 by John Arbash Meinel Finish auditing Repository, and fix generate_ids to always generate utf8 ids.	117	rev_id = u'-'.join((user_or_email,
	118	osutils.compact_date(timestamp),
	119	unique_chunk))
	120	return rev_id.encode('utf8')