~bzr-pqm/bzr/bzr.dev

2116.4.1 by John Arbash Meinel
Update file and revision id generators.
1
# Copyright (C) 2006 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Common code for generating file or revision ids."""
18
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
21
import time
22
import unicodedata
23
24
from bzrlib import (
25
    config,
26
    errors,
27
    osutils,
28
    )
29
""")
30
31
from bzrlib import (
32
    lazy_regex,
33
    )
34
35
# the regex removes any weird characters; we don't escape them 
36
# but rather just pull them out
37
_file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]')
38
_rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]')
39
_gen_file_id_suffix = None
40
_gen_file_id_serial = 0
41
42
43
def _next_id_suffix():
44
    """Create a new file id suffix that is reasonably unique.
45
    
46
    On the first call we combine the current time with 64 bits of randomness to
47
    give a highly probably globally unique number. Then each call in the same
48
    process adds 1 to a serial number we append to that unique value.
49
    """
50
    # XXX TODO: change bzrlib.add.smart_add to call workingtree.add() rather 
51
    # than having to move the id randomness out of the inner loop like this.
52
    # XXX TODO: for the global randomness this uses we should add the thread-id
53
    # before the serial #.
54
    # XXX TODO: jam 20061102 I think it would be good to reset every 100 or
55
    #           1000 calls, or perhaps if time.time() increases by a certain
56
    #           amount. time.time() shouldn't be terribly expensive to call,
57
    #           and it means that long-lived processes wouldn't use the same
58
    #           suffix forever.
59
    global _gen_file_id_suffix, _gen_file_id_serial
60
    if _gen_file_id_suffix is None:
61
        _gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()),
62
                                           osutils.rand_chars(16))
63
    _gen_file_id_serial += 1
64
    return _gen_file_id_suffix + str(_gen_file_id_serial)
65
66
67
def gen_file_id(name):
68
    """Return new file id for the basename 'name'.
69
70
    The uniqueness is supplied from _next_id_suffix.
71
    """
72
    # The real randomness is in the _next_id_suffix, the
73
    # rest of the identifier is just to be nice.
74
    # So we:
75
    # 1) Remove non-ascii word characters to keep the ids portable
76
    # 2) squash to lowercase, so the file id doesn't have to
77
    #    be escaped (case insensitive filesystems would bork for ids
78
    #    that only differ in case without escaping).
79
    # 3) truncate the filename to 20 chars. Long filenames also bork on some
80
    #    filesystems
81
    # 4) Removing starting '.' characters to prevent the file ids from
82
    #    being considered hidden.
2294.1.10 by John Arbash Meinel
Switch all apis over to utf8 file ids. All tests pass
83
    ascii_word_only = str(_file_id_chars_re.sub('', name.lower()))
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
84
    short_no_dots = ascii_word_only.lstrip('.')[:20]
85
    return short_no_dots + _next_id_suffix()
86
87
88
def gen_root_id():
89
    """Return a new tree-root file id."""
90
    return gen_file_id('tree_root')
91
92
93
def gen_revision_id(username, timestamp=None):
94
    """Return new revision-id.
95
96
    :param username: This is the value returned by config.username(), which is
97
        typically a real name, followed by an email address. If found, we will
98
        use just the email address portion. Otherwise we flatten the real name,
99
        and use that.
100
    :return: A new revision id.
101
    """
102
    try:
103
        user_or_email = config.extract_email_address(username)
104
    except errors.NoEmailInUsername:
105
        user_or_email = username
106
107
    user_or_email = user_or_email.lower()
108
    user_or_email = user_or_email.replace(' ', '_')
109
    user_or_email = _rev_id_chars_re.sub('', user_or_email)
110
111
    # This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
112
    unique_chunk = osutils.rand_chars(16)
113
114
    if timestamp is None:
115
        timestamp = time.time()
116
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
117
    rev_id = u'-'.join((user_or_email,
118
                        osutils.compact_date(timestamp),
119
                        unique_chunk))
120
    return rev_id.encode('utf8')