~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: Martin Pool
Date: 2005-07-11 03:16:29 UTC
Revision ID: mbp@sourcefrog.net-20050711031629-924ff7343d55103c

- faster weave extraction

files added:
bzrlib/hashcache.py

bzrlib/merge3.py

bzrlib/meta_store.py

bzrlib/plugin.py

bzrlib/selftest

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/versioning.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavefile.py

contrib/pwclient.full

contrib/pwk

effbot

effbot/__init__.py

effbot/org

effbot/org/__init__.py

effbot/org/gzip_consumer.py

effbot/org/http_client.py

effbot/org/http_manager.py

patches/ndiff.patch

patches/plugins-no-plugins.patch

plugins

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/testweave.py

files removed:
bzrlib/statcache.py

files renamed:
bzrlib/selftest.py => bzrlib/selftest/__init__.py

bzrlib/whitebox.py => bzrlib/selftest/whitebox.py

files modified:
.bzrignore

NEWS

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revision.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/upload-bzr.dev

doc/formats.txt

testbzr

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Perhaps have a way to stat all the files in inode order, and

# then remember that they're all fresh for the lifetime of the object?

# TODO: Keep track of whether there are in-memory updates that need to

# be flushed.

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

CACHE_HEADER = "### bzr hashcache v5\n"

def _fingerprint(abspath):

import os, stat

try:

fs = os.lstat(abspath)

except OSError:

# might be missing, etc

return None

if stat.S_ISDIR(fs.st_mode):

return None

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev)

class HashCache(object):

"""Cache for looking up file SHA-1.

Files are considered to match the cached value if the fingerprint

of the file has not changed. This includes its mtime, ctime,

device number, inode number, and size. This should catch

modifications or replacement of the file by a new one.

This may not catch modifications that do not change the file's

size and that occur within the resolution window of the

timestamps. To handle this we specifically do not cache files

which have changed since the start of the present second, since

they could undetectably change again.

This scheme may fail if the machine's clock steps backwards.

Don't do that.

This does not canonicalize the paths passed in; that should be

done by the caller.

_cache

Indexed by path, points to a two-tuple of the SHA-1 of the file.

and its fingerprint.

stat_count

number of times files have been statted

hit_count

number of times files have been retrieved from the cache, avoiding a

re-read

miss_count

number of misses (times files have been completely re-read)

"""

needs_write = False

def __init__(self, basedir):

self.basedir = basedir

self.hit_count = 0

self.miss_count = 0

self.stat_count = 0

self.danger_count = 0

self._cache = {}

def cache_file_name(self):

import os.path

100

return os.path.join(self.basedir, '.bzr', 'stat-cache')

101

102

103

104

105

def clear(self):

106

"""Discard all cached information.

107

108

This does not reset the counters."""

109

if self._cache:

110

self.needs_write = True

111

self._cache = {}

112

113

114

def get_sha1(self, path):

115

"""Return the hex SHA-1 of the contents of the file at path.

116

117

XXX: If the file does not exist or is not a plain file???

118

"""

119

120

import os, time

121

from bzrlib.osutils import sha_file

122

from bzrlib.trace import mutter

123

124

abspath = os.path.join(self.basedir, path)

125

fp = _fingerprint(abspath)

126

c = self._cache.get(path)

127

if c:

128

cache_sha1, cache_fp = c

129

else:

130

cache_sha1, cache_fp = None, None

131

132

self.stat_count += 1

133

134

if not fp:

135

# not a regular file

136

return None

137

elif cache_fp and (cache_fp == fp):

138

self.hit_count += 1

139

return cache_sha1

140

else:

141

self.miss_count += 1

142

digest = sha_file(file(abspath, 'rb'))

143

144

now = int(time.time())

145

if fp[1] >= now or fp[2] >= now:

146

# changed too recently; can't be cached. we can

147

# return the result and it could possibly be cached

148

# next time.

149

self.danger_count += 1

150

if cache_fp:

151

mutter("remove outdated entry for %s" % path)

152

self.needs_write = True

153

del self._cache[path]

154

elif (fp != cache_fp) or (digest != cache_sha1):

155

mutter("update entry for %s" % path)

156

mutter(" %r" % (fp,))

157

mutter(" %r" % (cache_fp,))

158

self.needs_write = True

159

self._cache[path] = (digest, fp)

160

161

return digest

162

163

164

165

def write(self):

166

"""Write contents of cache to file."""

167

from atomicfile import AtomicFile

168

169

outf = AtomicFile(self.cache_file_name(), 'wb')

170

try:

171

print >>outf, CACHE_HEADER,

172

173

for path, c in self._cache.iteritems():

174

assert '//' not in path, path

175

outf.write(path.encode('utf-8'))

176

outf.write('// ')

177

print >>outf, c[0], # hex sha1

178

for fld in c[1]:

179

print >>outf, "%d" % fld,

180

print >>outf

181

182

outf.commit()

183

self.needs_write = False

184

finally:

185

if not outf.closed:

186

outf.abort()

187

188

189

190

def read(self):

191

"""Reinstate cache from file.

192

193

Overwrites existing cache.

194

195

If the cache file has the wrong version marker, this just clears

196

the cache."""

197

from bzrlib.trace import mutter, warning

198

199

self._cache = {}

200

201

fn = self.cache_file_name()

202

try:

203

inf = file(fn, 'rb')

204

except IOError, e:

205

mutter("failed to open %s: %s" % (fn, e))

206

return

207

208

209

hdr = inf.readline()

210

if hdr != CACHE_HEADER:

211

mutter('cache header marker not found at top of %s; discarding cache'

212

% fn)

213

return

214

215

for l in inf:

216

pos = l.index('// ')

217

path = l[:pos].decode('utf-8')

218

if path in self._cache:

219

warning('duplicated path %r in cache' % path)

220

continue

221

222

pos += 3

223

fields = l[pos:].split(' ')

224

if len(fields) != 6:

225

warning("bad line in hashcache: %r" % l)

226

continue

227

228

sha1 = fields[0]

229

if len(sha1) != 40:

230

warning("bad sha1 in hashcache: %r" % sha1)

231

continue

232

233

fp = tuple(map(long, fields[1:]))

234

235

self._cache[path] = (sha1, fp)

236

237

self.needs_write = False

238

239

240

241

Older »