~bzr-pqm/bzr/bzr.dev

5752.3.8 by John Arbash Meinel
Merge bzr.dev 5764 to resolve release-notes (aka NEWS) conflicts
1
# Copyright (C) 2006, 2007, 2009, 2010, 2011 Canonical Ltd
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
16
17
"""Common code for generating file or revision ids."""
18
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
21
import time
22
23
from bzrlib import (
24
    config,
25
    errors,
26
    osutils,
27
    )
28
""")
29
30
from bzrlib import (
31
    lazy_regex,
32
    )
33
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
34
# the regex removes any weird characters; we don't escape them
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
35
# but rather just pull them out
36
_file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]')
37
_rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]')
38
_gen_file_id_suffix = None
39
_gen_file_id_serial = 0
40
41
42
def _next_id_suffix():
43
    """Create a new file id suffix that is reasonably unique.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
44
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
45
    On the first call we combine the current time with 64 bits of randomness to
46
    give a highly probably globally unique number. Then each call in the same
47
    process adds 1 to a serial number we append to that unique value.
48
    """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
49
    # XXX TODO: change bzrlib.add.smart_add_tree to call workingtree.add() rather
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
50
    # than having to move the id randomness out of the inner loop like this.
51
    # XXX TODO: for the global randomness this uses we should add the thread-id
52
    # before the serial #.
53
    # XXX TODO: jam 20061102 I think it would be good to reset every 100 or
54
    #           1000 calls, or perhaps if time.time() increases by a certain
55
    #           amount. time.time() shouldn't be terribly expensive to call,
56
    #           and it means that long-lived processes wouldn't use the same
57
    #           suffix forever.
58
    global _gen_file_id_suffix, _gen_file_id_serial
59
    if _gen_file_id_suffix is None:
60
        _gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()),
61
                                           osutils.rand_chars(16))
62
    _gen_file_id_serial += 1
63
    return _gen_file_id_suffix + str(_gen_file_id_serial)
64
65
66
def gen_file_id(name):
67
    """Return new file id for the basename 'name'.
68
69
    The uniqueness is supplied from _next_id_suffix.
70
    """
71
    # The real randomness is in the _next_id_suffix, the
72
    # rest of the identifier is just to be nice.
73
    # So we:
74
    # 1) Remove non-ascii word characters to keep the ids portable
75
    # 2) squash to lowercase, so the file id doesn't have to
76
    #    be escaped (case insensitive filesystems would bork for ids
77
    #    that only differ in case without escaping).
78
    # 3) truncate the filename to 20 chars. Long filenames also bork on some
79
    #    filesystems
80
    # 4) Removing starting '.' characters to prevent the file ids from
81
    #    being considered hidden.
2294.1.10 by John Arbash Meinel
Switch all apis over to utf8 file ids. All tests pass
82
    ascii_word_only = str(_file_id_chars_re.sub('', name.lower()))
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
83
    short_no_dots = ascii_word_only.lstrip('.')[:20]
84
    return short_no_dots + _next_id_suffix()
85
86
87
def gen_root_id():
88
    """Return a new tree-root file id."""
89
    return gen_file_id('tree_root')
90
91
92
def gen_revision_id(username, timestamp=None):
93
    """Return new revision-id.
94
5050.18.1 by Aaron Bentley
CommitBuilder user committer, not username in revision-id.
95
    :param username: The username of the committer, in the format returned by
96
        config.username().  This is typically a real name, followed by an
97
        email address. If found, we will use just the email address portion.
98
        Otherwise we flatten the real name, and use that.
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
99
    :return: A new revision id.
100
    """
101
    try:
102
        user_or_email = config.extract_email_address(username)
103
    except errors.NoEmailInUsername:
104
        user_or_email = username
105
106
    user_or_email = user_or_email.lower()
107
    user_or_email = user_or_email.replace(' ', '_')
108
    user_or_email = _rev_id_chars_re.sub('', user_or_email)
109
110
    # This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
111
    unique_chunk = osutils.rand_chars(16)
112
113
    if timestamp is None:
114
        timestamp = time.time()
115
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
116
    rev_id = u'-'.join((user_or_email,
117
                        osutils.compact_date(timestamp),
118
                        unique_chunk))
119
    return rev_id.encode('utf8')