~bzr-pqm/bzr/bzr.dev

5365.5.25 by John Arbash Meinel
Merge bzr.dev 5387 in prep for NEWS
1
# Copyright (C) 2006, 2007, 2009, 2010 Canonical Ltd
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
16
17
"""Common code for generating file or revision ids."""
18
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
21
import time
22
import unicodedata
23
24
from bzrlib import (
25
    config,
26
    errors,
27
    osutils,
28
    )
29
""")
30
31
from bzrlib import (
32
    lazy_regex,
33
    )
34
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
35
# the regex removes any weird characters; we don't escape them
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
36
# but rather just pull them out
37
_file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]')
38
_rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]')
39
_gen_file_id_suffix = None
40
_gen_file_id_serial = 0
41
42
43
def _next_id_suffix():
44
    """Create a new file id suffix that is reasonably unique.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
45
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
46
    On the first call we combine the current time with 64 bits of randomness to
47
    give a highly probably globally unique number. Then each call in the same
48
    process adds 1 to a serial number we append to that unique value.
49
    """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
50
    # XXX TODO: change bzrlib.add.smart_add_tree to call workingtree.add() rather
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
51
    # than having to move the id randomness out of the inner loop like this.
52
    # XXX TODO: for the global randomness this uses we should add the thread-id
53
    # before the serial #.
54
    # XXX TODO: jam 20061102 I think it would be good to reset every 100 or
55
    #           1000 calls, or perhaps if time.time() increases by a certain
56
    #           amount. time.time() shouldn't be terribly expensive to call,
57
    #           and it means that long-lived processes wouldn't use the same
58
    #           suffix forever.
59
    global _gen_file_id_suffix, _gen_file_id_serial
60
    if _gen_file_id_suffix is None:
61
        _gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()),
62
                                           osutils.rand_chars(16))
63
    _gen_file_id_serial += 1
64
    return _gen_file_id_suffix + str(_gen_file_id_serial)
65
66
67
def gen_file_id(name):
68
    """Return new file id for the basename 'name'.
69
70
    The uniqueness is supplied from _next_id_suffix.
71
    """
72
    # The real randomness is in the _next_id_suffix, the
73
    # rest of the identifier is just to be nice.
74
    # So we:
75
    # 1) Remove non-ascii word characters to keep the ids portable
76
    # 2) squash to lowercase, so the file id doesn't have to
77
    #    be escaped (case insensitive filesystems would bork for ids
78
    #    that only differ in case without escaping).
79
    # 3) truncate the filename to 20 chars. Long filenames also bork on some
80
    #    filesystems
81
    # 4) Removing starting '.' characters to prevent the file ids from
82
    #    being considered hidden.
2294.1.10 by John Arbash Meinel
Switch all apis over to utf8 file ids. All tests pass
83
    ascii_word_only = str(_file_id_chars_re.sub('', name.lower()))
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
84
    short_no_dots = ascii_word_only.lstrip('.')[:20]
85
    return short_no_dots + _next_id_suffix()
86
87
88
def gen_root_id():
89
    """Return a new tree-root file id."""
90
    return gen_file_id('tree_root')
91
92
93
def gen_revision_id(username, timestamp=None):
94
    """Return new revision-id.
95
5050.18.1 by Aaron Bentley
CommitBuilder user committer, not username in revision-id.
96
    :param username: The username of the committer, in the format returned by
97
        config.username().  This is typically a real name, followed by an
98
        email address. If found, we will use just the email address portion.
99
        Otherwise we flatten the real name, and use that.
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
100
    :return: A new revision id.
101
    """
102
    try:
103
        user_or_email = config.extract_email_address(username)
104
    except errors.NoEmailInUsername:
105
        user_or_email = username
106
107
    user_or_email = user_or_email.lower()
108
    user_or_email = user_or_email.replace(' ', '_')
109
    user_or_email = _rev_id_chars_re.sub('', user_or_email)
110
111
    # This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
112
    unique_chunk = osutils.rand_chars(16)
113
114
    if timestamp is None:
115
        timestamp = time.time()
116
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
117
    rev_id = u'-'.join((user_or_email,
118
                        osutils.compact_date(timestamp),
119
                        unique_chunk))
120
    return rev_id.encode('utf8')