15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
import stat, os, sha, time
18
from binascii import b2a_qp, a2b_qp
20
19
from trace import mutter
21
20
from errors import BzrError, BzrCheckError
67
66
The SHA-1 is stored in memory as a hexdigest.
69
File names are written out as the quoted-printable encoding of their
68
File names and file-ids are written out with non-ascii or whitespace
69
characters given as python-style unicode escapes. (file-ids shouldn't
70
contain wierd characters, but it might happen.)
73
73
# order of fields returned by fingerprint()
102
106
fs.st_ctime, fs.st_ino, fs.st_dev)
105
def _write_cache(basedir, entry_iter, dangerfiles):
111
return s.encode('unicode_escape') \
112
.replace('\n', '\\u000a') \
113
.replace(' ', '\\u0020') \
114
.replace('\r', '\\u000d')
117
def _write_cache(basedir, entries):
106
118
from atomicfile import AtomicFile
108
120
cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
109
121
outf = AtomicFile(cachefn, 'wb')
122
outf.write(CACHE_HEADER + '\n')
111
for entry in entry_iter:
124
for entry in entries:
112
125
if len(entry) != 8:
113
126
raise ValueError("invalid statcache entry tuple %r" % entry)
115
if entry[SC_FILE_ID] in dangerfiles:
116
continue # changed too recently
117
outf.write(entry[0]) # file id
119
outf.write(entry[1]) # hex sha1
121
outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name
127
outf.write(safe_quote(entry[0])) # file id
129
outf.write(entry[1]) # hex sha1
131
outf.write(safe_quote(entry[2])) # name
122
132
for nf in entry[3:]:
123
133
outf.write(' %d' % nf)
128
138
if not outf.closed:
142
def _try_write_cache(basedir, entries):
144
return _write_cache(basedir, entries)
146
mutter("cannot update statcache in %s: %s" % (basedir, e))
148
mutter("cannot update statcache in %s: %s" % (basedir, e))
132
152
def load_cache(basedir):
157
sha_re = re.compile(r'[a-f0-9]{40}')
138
160
cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
139
cachefile = open(cachefn, 'r')
161
cachefile = open(cachefn, 'rb')
165
line1 = cachefile.readline().rstrip('\r\n')
166
if line1 != CACHE_HEADER:
167
mutter('cache header marker not found at top of %s' % cachefn)
143
170
for l in cachefile:
173
file_id = f[0].decode('unicode_escape')
147
174
if file_id in cache:
148
raise BzrError("duplicated file_id in cache: {%s}" % file_id)
175
raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)
178
if len(text_sha) != 40 or not sha_re.match(text_sha):
179
raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)
150
path = a2b_qp(f[2]).decode('utf-8')
181
path = f[2].decode('unicode_escape')
151
182
if path in seen_paths:
152
183
raise BzrCheckError("duplicated path in cache: %r" % path)
184
seen_paths[path] = True
155
entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])
186
entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])
156
187
if len(entry) != 8:
157
188
raise ValueError("invalid statcache entry tuple %r" % entry)
179
210
flush -- discard any previous cache and recalculate from scratch.
213
# load the existing cache; use information there to find a list of
214
# files ordered by inode, which is alleged to be the fastest order
183
# TODO: It's supposed to be faster to stat the files in order by inum.
184
# We don't directly know the inum of the files of course but we do
185
# know where they were last sighted, so we can sort by that.
217
to_update = _files_from_inventory(inv)
187
219
assert isinstance(flush, bool)
191
223
cache = load_cache(basedir)
192
return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))
196
def _update_cache_from_list(basedir, cache, to_update):
197
"""Update and return the cache for given files.
199
cache -- Previously cached values to be validated.
201
to_update -- Sequence of (file_id, path) pairs to check.
206
stat_cnt = missing_cnt = hardcheck = change_cnt = 0
208
# files that have been recently touched and can't be
209
# committed to a persistent cache yet.
227
for file_id, path in to_update:
229
by_inode.append((cache[file_id][SC_INO], file_id, path))
231
without_inode.append((file_id, path))
234
to_update = [a[1:] for a in by_inode] + without_inode
236
stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0
238
# dangerfiles have been recently touched and can't be committed to
239
# a persistent cache yet, but they are returned to the caller.
212
242
now = int(time.time())
214
244
## mutter('update statcache under %r' % basedir)
246
278
mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '
279
'%d deleted, %d new, '
248
% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))
281
% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),
282
missing_cnt, new_cnt, len(cache)))
251
285
mutter('updating on-disk statcache')
252
_write_cache(basedir, cache.itervalues(), dangerfiles)
288
safe_cache = cache.copy()
289
for file_id in dangerfiles:
290
del safe_cache[file_id]
294
_try_write_cache(basedir, safe_cache.itervalues())