15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
import stat, os, sha, time
18
from binascii import b2a_qp, a2b_qp
19
20
from trace import mutter
20
21
from errors import BzrError, BzrCheckError
66
67
The SHA-1 is stored in memory as a hexdigest.
68
File names and file-ids are written out with non-ascii or whitespace
69
characters given as python-style unicode escapes. (file-ids shouldn't
70
contain wierd characters, but it might happen.)
69
File names are written out as the quoted-printable encoding of their
73
73
# order of fields returned by fingerprint()
106
102
fs.st_ctime, fs.st_ino, fs.st_dev)
111
return s.encode('unicode_escape') \
112
.replace('\n', '\\u000a') \
113
.replace(' ', '\\u0020') \
114
.replace('\r', '\\u000d')
117
def _write_cache(basedir, entries):
105
def _write_cache(basedir, entry_iter, dangerfiles):
118
106
from atomicfile import AtomicFile
120
108
cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
121
109
outf = AtomicFile(cachefn, 'wb')
122
outf.write(CACHE_HEADER + '\n')
124
for entry in entries:
111
for entry in entry_iter:
125
112
if len(entry) != 8:
126
113
raise ValueError("invalid statcache entry tuple %r" % entry)
127
outf.write(safe_quote(entry[0])) # file id
129
outf.write(entry[1]) # hex sha1
131
outf.write(safe_quote(entry[2])) # name
115
if entry[SC_FILE_ID] in dangerfiles:
116
continue # changed too recently
117
outf.write(entry[0]) # file id
119
outf.write(entry[1]) # hex sha1
121
outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name
132
122
for nf in entry[3:]:
133
123
outf.write(' %d' % nf)
138
128
if not outf.closed:
142
def _try_write_cache(basedir, entries):
144
return _write_cache(basedir, entries)
146
mutter("cannot update statcache in %s: %s" % (basedir, e))
148
mutter("cannot update statcache in %s: %s" % (basedir, e))
152
132
def load_cache(basedir):
157
sha_re = re.compile(r'[a-f0-9]{40}')
160
138
cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
161
cachefile = open(cachefn, 'rb')
139
cachefile = open(cachefn, 'r')
165
line1 = cachefile.readline().rstrip('\r\n')
166
if line1 != CACHE_HEADER:
167
mutter('cache header marker not found at top of %s' % cachefn)
170
143
for l in cachefile:
173
file_id = f[0].decode('unicode_escape')
174
147
if file_id in cache:
175
raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)
178
if len(text_sha) != 40 or not sha_re.match(text_sha):
179
raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)
148
raise BzrError("duplicated file_id in cache: {%s}" % file_id)
181
path = f[2].decode('unicode_escape')
150
path = a2b_qp(f[2]).decode('utf-8')
182
151
if path in seen_paths:
183
152
raise BzrCheckError("duplicated path in cache: %r" % path)
184
seen_paths[path] = True
186
entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])
155
entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])
187
156
if len(entry) != 8:
188
157
raise ValueError("invalid statcache entry tuple %r" % entry)
210
179
flush -- discard any previous cache and recalculate from scratch.
213
# load the existing cache; use information there to find a list of
214
# files ordered by inode, which is alleged to be the fastest order
217
to_update = _files_from_inventory(inv)
183
# TODO: It's supposed to be faster to stat the files in order by inum.
184
# We don't directly know the inum of the files of course but we do
185
# know where they were last sighted, so we can sort by that.
219
187
assert isinstance(flush, bool)
223
191
cache = load_cache(basedir)
227
for file_id, path in to_update:
229
by_inode.append((cache[file_id][SC_INO], file_id, path))
231
without_inode.append((file_id, path))
234
to_update = [a[1:] for a in by_inode] + without_inode
236
stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0
238
# dangerfiles have been recently touched and can't be committed to
239
# a persistent cache yet, but they are returned to the caller.
192
return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))
196
def _update_cache_from_list(basedir, cache, to_update):
197
"""Update and return the cache for given files.
199
cache -- Previously cached values to be validated.
201
to_update -- Sequence of (file_id, path) pairs to check.
206
stat_cnt = missing_cnt = hardcheck = change_cnt = 0
208
# files that have been recently touched and can't be
209
# committed to a persistent cache yet.
242
212
now = int(time.time())
244
214
## mutter('update statcache under %r' % basedir)
278
246
mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '
279
'%d deleted, %d new, '
281
% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),
282
missing_cnt, new_cnt, len(cache)))
248
% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))
285
251
mutter('updating on-disk statcache')
288
safe_cache = cache.copy()
289
for file_id in dangerfiles:
290
del safe_cache[file_id]
294
_try_write_cache(basedir, safe_cache.itervalues())
252
_write_cache(basedir, cache.itervalues(), dangerfiles)