1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005, 2006 by Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30
30
CACHE_HEADER = "### bzr hashcache v5\n"
32
32
import os, stat, time
34
from bzrlib.osutils import sha_file, sha_string, pathjoin, safe_unicode
35
from bzrlib.osutils import sha_file, pathjoin, safe_unicode
35
36
from bzrlib.trace import mutter, warning
36
37
from bzrlib.atomicfile import AtomicFile
37
38
from bzrlib.errors import BzrError
41
42
FP_CTIME_COLUMN = 2
45
def _fingerprint(abspath):
47
fs = os.lstat(abspath)
49
# might be missing, etc
52
if stat.S_ISDIR(fs.st_mode):
55
# we discard any high precision because it's not reliable; perhaps we
56
# could do better on some systems?
57
return (fs.st_size, long(fs.st_mtime),
58
long(fs.st_ctime), fs.st_ino, fs.st_dev, fs.st_mode)
46
61
class HashCache(object):
82
97
def __init__(self, root, cache_file_name, mode=None):
83
98
"""Create a hash cache in base dir, and set the file mode to mode."""
84
99
self.root = safe_unicode(root)
85
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
86
100
self.hit_count = 0
87
101
self.miss_count = 0
88
102
self.stat_count = 0
118
132
for inum, path, cache_entry in prep:
119
133
abspath = pathjoin(self.root, path)
120
fp = self._fingerprint(abspath)
134
fp = _fingerprint(abspath)
121
135
self.stat_count += 1
123
137
cache_fp = cache_entry[1]
128
142
self.needs_write = True
129
143
del self._cache[path]
131
def get_sha1(self, path, stat_value=None):
146
def get_sha1(self, path):
132
147
"""Return the sha1 of a file.
134
if path.__class__ is str:
135
abspath = pathjoin(self.root_utf8, path)
137
abspath = pathjoin(self.root, path)
149
abspath = pathjoin(self.root, path)
138
150
self.stat_count += 1
139
file_fp = self._fingerprint(abspath, stat_value)
151
file_fp = _fingerprint(abspath)
142
154
# not a regular file or not existing
152
164
cache_sha1, cache_fp = None, None
154
166
if cache_fp == file_fp:
155
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
156
## mutter("now = %s", time.time())
157
167
self.hit_count += 1
158
168
return cache_sha1
160
170
self.miss_count += 1
162
173
mode = file_fp[FP_MODE_COLUMN]
163
174
if stat.S_ISREG(mode):
164
digest = self._really_sha1_file(abspath)
175
digest = sha_file(file(abspath, 'rb', buffering=65000))
165
176
elif stat.S_ISLNK(mode):
166
digest = sha_string(os.readlink(abspath))
177
digest = sha.new(os.readlink(abspath)).hexdigest()
168
179
raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))
170
# window of 3 seconds to allow for 2s resolution on windows,
171
# unsynchronized file servers, etc.
172
cutoff = self._cutoff_time()
173
if file_fp[FP_MTIME_COLUMN] >= cutoff \
174
or file_fp[FP_CTIME_COLUMN] >= cutoff:
181
now = int(time.time())
182
if file_fp[FP_MTIME_COLUMN] >= now or file_fp[FP_CTIME_COLUMN] >= now:
175
183
# changed too recently; can't be cached. we can
176
184
# return the result and it could possibly be cached
183
191
# need to let sufficient time elapse before we may cache this entry
184
192
# again. If we didn't do this, then, for example, a very quick 1
185
193
# byte replacement in the file might go undetected.
186
## mutter('%r modified too recently; not caching', path)
187
self.danger_count += 1
194
self.danger_count += 1
189
196
self.removed_count += 1
190
197
self.needs_write = True
191
198
del self._cache[path]
193
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
194
## path, time.time(), file_fp[FP_MTIME_COLUMN],
195
## file_fp[FP_CTIME_COLUMN])
196
200
self.update_count += 1
197
201
self.needs_write = True
198
202
self._cache[path] = (digest, file_fp)
201
def _really_sha1_file(self, abspath):
202
"""Calculate the SHA1 of a file by reading the full text"""
203
return sha_file(file(abspath, 'rb', buffering=65000))
206
206
"""Write contents of cache to file."""
207
207
outf = AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode)
209
outf.write(CACHE_HEADER)
209
print >>outf, CACHE_HEADER,
211
211
for path, c in self._cache.iteritems():
212
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
213
line_info.append(' '.join([str(fld) for fld in c[1]]))
214
line_info.append('\n')
215
outf.write(''.join(line_info))
212
assert '//' not in path, path
213
outf.write(path.encode('utf-8'))
215
print >>outf, c[0], # hex sha1
217
print >>outf, "%d" % fld,
217
220
self.needs_write = False
218
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
219
## self.cache_file_name(), self.hit_count, self.miss_count,
221
## self.danger_count, self.update_count)
221
mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
222
self.cache_file_name(), self.hit_count, self.miss_count,
224
self.danger_count, self.update_count)
226
230
"""Reinstate cache from file.
270
275
self._cache[path] = (sha1, fp)
272
277
self.needs_write = False
274
def _cutoff_time(self):
275
"""Return cutoff time.
277
Files modified more recently than this time are at risk of being
278
undetectably modified and so can't be cached.
280
return int(time.time()) - 3
282
def _fingerprint(self, abspath, stat_value=None):
283
if stat_value is None:
285
stat_value = os.lstat(abspath)
287
# might be missing, etc
289
if stat.S_ISDIR(stat_value.st_mode):
291
# we discard any high precision because it's not reliable; perhaps we
292
# could do better on some systems?
293
return (stat_value.st_size, long(stat_value.st_mtime),
294
long(stat_value.st_ctime), stat_value.st_ino,
295
stat_value.st_dev, stat_value.st_mode)