2116.4.1
by John Arbash Meinel
Update file and revision id generators. |
1 |
# Copyright (C) 2006 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Common code for generating file or revision ids."""
|
|
18 |
||
19 |
from bzrlib.lazy_import import lazy_import |
|
20 |
lazy_import(globals(), """ |
|
21 |
import time
|
|
22 |
import unicodedata
|
|
23 |
||
24 |
from bzrlib import (
|
|
25 |
config,
|
|
26 |
errors,
|
|
27 |
osutils,
|
|
28 |
)
|
|
29 |
""") |
|
30 |
||
31 |
from bzrlib import ( |
|
32 |
lazy_regex, |
|
33 |
)
|
|
34 |
||
35 |
# the regex removes any weird characters; we don't escape them
|
|
36 |
# but rather just pull them out
|
|
37 |
_file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]') |
|
38 |
_rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]') |
|
39 |
_gen_file_id_suffix = None |
|
40 |
_gen_file_id_serial = 0 |
|
41 |
||
42 |
||
43 |
def _next_id_suffix(): |
|
44 |
"""Create a new file id suffix that is reasonably unique.
|
|
45 |
|
|
46 |
On the first call we combine the current time with 64 bits of randomness to
|
|
47 |
give a highly probably globally unique number. Then each call in the same
|
|
48 |
process adds 1 to a serial number we append to that unique value.
|
|
49 |
"""
|
|
50 |
# XXX TODO: change bzrlib.add.smart_add to call workingtree.add() rather
|
|
51 |
# than having to move the id randomness out of the inner loop like this.
|
|
52 |
# XXX TODO: for the global randomness this uses we should add the thread-id
|
|
53 |
# before the serial #.
|
|
54 |
# XXX TODO: jam 20061102 I think it would be good to reset every 100 or
|
|
55 |
# 1000 calls, or perhaps if time.time() increases by a certain
|
|
56 |
# amount. time.time() shouldn't be terribly expensive to call,
|
|
57 |
# and it means that long-lived processes wouldn't use the same
|
|
58 |
# suffix forever.
|
|
59 |
global _gen_file_id_suffix, _gen_file_id_serial |
|
60 |
if _gen_file_id_suffix is None: |
|
61 |
_gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()), |
|
62 |
osutils.rand_chars(16)) |
|
63 |
_gen_file_id_serial += 1 |
|
64 |
return _gen_file_id_suffix + str(_gen_file_id_serial) |
|
65 |
||
66 |
||
67 |
def gen_file_id(name): |
|
68 |
"""Return new file id for the basename 'name'.
|
|
69 |
||
70 |
The uniqueness is supplied from _next_id_suffix.
|
|
71 |
"""
|
|
72 |
# The real randomness is in the _next_id_suffix, the
|
|
73 |
# rest of the identifier is just to be nice.
|
|
74 |
# So we:
|
|
75 |
# 1) Remove non-ascii word characters to keep the ids portable
|
|
76 |
# 2) squash to lowercase, so the file id doesn't have to
|
|
77 |
# be escaped (case insensitive filesystems would bork for ids
|
|
78 |
# that only differ in case without escaping).
|
|
79 |
# 3) truncate the filename to 20 chars. Long filenames also bork on some
|
|
80 |
# filesystems
|
|
81 |
# 4) Removing starting '.' characters to prevent the file ids from
|
|
82 |
# being considered hidden.
|
|
2294.1.10
by John Arbash Meinel
Switch all apis over to utf8 file ids. All tests pass |
83 |
ascii_word_only = str(_file_id_chars_re.sub('', name.lower())) |
2116.4.1
by John Arbash Meinel
Update file and revision id generators. |
84 |
short_no_dots = ascii_word_only.lstrip('.')[:20] |
85 |
return short_no_dots + _next_id_suffix() |
|
86 |
||
87 |
||
88 |
def gen_root_id(): |
|
89 |
"""Return a new tree-root file id."""
|
|
90 |
return gen_file_id('tree_root') |
|
91 |
||
92 |
||
93 |
def gen_revision_id(username, timestamp=None): |
|
94 |
"""Return new revision-id.
|
|
95 |
||
96 |
:param username: This is the value returned by config.username(), which is
|
|
97 |
typically a real name, followed by an email address. If found, we will
|
|
98 |
use just the email address portion. Otherwise we flatten the real name,
|
|
99 |
and use that.
|
|
100 |
:return: A new revision id.
|
|
101 |
"""
|
|
102 |
try: |
|
103 |
user_or_email = config.extract_email_address(username) |
|
104 |
except errors.NoEmailInUsername: |
|
105 |
user_or_email = username |
|
106 |
||
107 |
user_or_email = user_or_email.lower() |
|
108 |
user_or_email = user_or_email.replace(' ', '_') |
|
109 |
user_or_email = _rev_id_chars_re.sub('', user_or_email) |
|
110 |
||
111 |
# This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
|
|
112 |
unique_chunk = osutils.rand_chars(16) |
|
113 |
||
114 |
if timestamp is None: |
|
115 |
timestamp = time.time() |
|
116 |
||
2249.5.13
by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids. |
117 |
rev_id = u'-'.join((user_or_email, |
118 |
osutils.compact_date(timestamp), |
|
119 |
unique_chunk)) |
|
120 |
return rev_id.encode('utf8') |