1
# (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
# TODO: Perhaps have a way to stat all the files in inode order, and
20
# then remember that they're all fresh for the lifetime of the object?
22
# TODO: Keep track of whether there are in-memory updates that need to
25
# TODO: Perhaps return more details on the file to avoid statting it
26
# again: nonexistent, file type, size, etc
31
CACHE_HEADER = "### bzr hashcache v5\n"
34
def _fingerprint(abspath):
38
fs = os.lstat(abspath)
40
# might be missing, etc
43
if stat.S_ISDIR(fs.st_mode):
46
# we discard any high precision because it's not reliable; perhaps we
47
# could do better on some systems?
48
return (fs.st_size, long(fs.st_mtime),
49
long(fs.st_ctime), fs.st_ino, fs.st_dev)
52
class HashCache(object):
53
"""Cache for looking up file SHA-1.
55
Files are considered to match the cached value if the fingerprint
56
of the file has not changed. This includes its mtime, ctime,
57
device number, inode number, and size. This should catch
58
modifications or replacement of the file by a new one.
60
This may not catch modifications that do not change the file's
61
size and that occur within the resolution window of the
62
timestamps. To handle this we specifically do not cache files
63
which have changed since the start of the present second, since
64
they could undetectably change again.
66
This scheme may fail if the machine's clock steps backwards.
69
This does not canonicalize the paths passed in; that should be
73
Indexed by path, points to a two-tuple of the SHA-1 of the file.
77
number of times files have been statted
80
number of times files have been retrieved from the cache, avoiding a
84
number of misses (times files have been completely re-read)
88
def __init__(self, basedir):
89
self.basedir = basedir
98
def cache_file_name(self):
100
return os.path.join(self.basedir, '.bzr', 'stat-cache')
106
"""Discard all cached information.
108
This does not reset the counters."""
110
self.needs_write = True
114
def get_sha1(self, path):
115
"""Return the hex SHA-1 of the contents of the file at path.
117
XXX: If the file does not exist or is not a plain file???
121
from bzrlib.osutils import sha_file
122
from bzrlib.trace import mutter
124
abspath = os.path.join(self.basedir, path)
125
fp = _fingerprint(abspath)
126
c = self._cache.get(path)
128
cache_sha1, cache_fp = c
130
cache_sha1, cache_fp = None, None
137
elif cache_fp and (cache_fp == fp):
142
digest = sha_file(file(abspath, 'rb'))
144
now = int(time.time())
145
if fp[1] >= now or fp[2] >= now:
146
# changed too recently; can't be cached. we can
147
# return the result and it could possibly be cached
149
self.danger_count += 1
151
mutter("remove outdated entry for %s" % path)
152
self.needs_write = True
153
del self._cache[path]
154
elif (fp != cache_fp) or (digest != cache_sha1):
155
mutter("update entry for %s" % path)
156
mutter(" %r" % (fp,))
157
mutter(" %r" % (cache_fp,))
158
self.needs_write = True
159
self._cache[path] = (digest, fp)
166
"""Write contents of cache to file."""
167
from atomicfile import AtomicFile
169
outf = AtomicFile(self.cache_file_name(), 'wb')
171
print >>outf, CACHE_HEADER,
173
for path, c in self._cache.iteritems():
174
assert '//' not in path, path
175
outf.write(path.encode('utf-8'))
177
print >>outf, c[0], # hex sha1
179
print >>outf, "%d" % fld,
183
self.needs_write = False
191
"""Reinstate cache from file.
193
Overwrites existing cache.
195
If the cache file has the wrong version marker, this just clears
197
from bzrlib.trace import mutter, warning
201
fn = self.cache_file_name()
205
mutter("failed to open %s: %s" % (fn, e))
210
if hdr != CACHE_HEADER:
211
mutter('cache header marker not found at top of %s; discarding cache'
217
path = l[:pos].decode('utf-8')
218
if path in self._cache:
219
warning('duplicated path %r in cache' % path)
223
fields = l[pos:].split(' ')
225
warning("bad line in hashcache: %r" % l)
230
warning("bad sha1 in hashcache: %r" % sha1)
233
fp = tuple(map(long, fields[1:]))
235
self._cache[path] = (sha1, fp)
237
self.needs_write = False