1
# (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
import stat, os, sha, time
18
from binascii import b2a_qp, a2b_qp
20
from trace import mutter
23
# file fingerprints are: (path, size, mtime, ctime, ino, dev).
25
# if this is the same for this file as in the previous revision, we
26
# assume the content is the same and the SHA-1 is the same.
28
# This is stored in a fingerprint file that also contains the file-id
29
# and the content SHA-1.
31
# Thus for any given file we can quickly get the SHA-1, either from
32
# the cache or if the cache is out of date.
34
# At the moment this is stored in a simple textfile; it might be nice
35
# to use a tdb instead.
40
# build a new cache from scratch
41
# load cache, incrementally update it
43
# TODO: Have a paranoid mode where we always compare the texts and
44
# always recalculate the digest, to trap modification without stat
45
# change and SHA collisions.
49
def fingerprint(path, abspath):
51
fs = os.lstat(abspath)
53
# might be missing, etc
56
if stat.S_ISDIR(fs.st_mode):
59
return (fs.st_size, fs.st_mtime,
60
fs.st_ctime, fs.st_ino, fs.st_dev)
63
def write_cache(branch, entry_iter):
64
outf = branch.controlfile('work-cache.tmp', 'wt')
65
for entry in entry_iter:
66
outf.write(entry[0] + ' ' + entry[1] + ' ')
67
outf.write(b2a_qp(entry[2], True))
68
outf.write(' %d %d %d %d %d\n' % entry[3:])
71
os.rename(branch.controlfilename('work-cache.tmp'),
72
branch.controlfilename('work-cache'))
76
def load_cache(branch):
80
cachefile = branch.controlfile('work-cache', 'rt')
88
raise BzrError("duplicated file_id in cache: {%s}" % file_id)
89
cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])
95
def _files_from_inventory(inv):
96
for path, ie in inv.iter_entries():
99
yield ie.file_id, path
102
def build_cache(branch):
103
inv = branch.read_working_inventory()
106
_update_cache_from_list(branch, cache, _files_from_inventory(inv))
110
def update_cache(branch, inv):
111
# TODO: It's supposed to be faster to stat the files in order by inum.
112
# We don't directly know the inum of the files of course but we do
113
# know where they were last sighted, so we can sort by that.
115
cache = load_cache(branch)
116
return _update_cache_from_list(branch, cache, _files_from_inventory(inv))
120
def _update_cache_from_list(branch, cache, to_update):
121
"""Update the cache to have info on the named files.
123
to_update is a sequence of (file_id, path) pairs.
125
hardcheck = dirty = 0
126
for file_id, path in to_update:
127
fap = branch.abspath(path)
128
fp = fingerprint(fap, path)
129
cacheentry = cache.get(file_id)
131
if fp == None: # not here
137
if cacheentry and (cacheentry[3:] == fp):
138
continue # all stat fields unchanged
142
dig = sha.new(file(fap, 'rb').read()).hexdigest()
144
if cacheentry == None or dig != cacheentry[1]:
145
# if there was no previous entry for this file, or if the
146
# SHA has changed, then update the cache
147
cacheentry = (file_id, dig, path) + fp
148
cache[file_id] = cacheentry
151
mutter('work cache: read %d files, %d changed' % (hardcheck, dirty))
154
write_cache(branch, cache.itervalues())