~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: Martin Pool
Date: 2005-07-07 10:13:43 UTC
mfrom: (0.1.95)
Revision ID: mbp@sourcefrog.net-20050707101342-aa2ef950004cb467

todo

files added:
bzrlib/statcache.py

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

files removed:
HACKING

Makefile

bzrlib/builtins.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/intset.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/plugins/__init__.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/weave.py

bzrlib/weavefile.py

contrib/emacs

contrib/emacs/bzr-mode.el

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches/pending-merge.patch

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/testweave.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/pwk

doc/index.txt

doc/todo-from-arch.txt

setup.py

testsweet.py

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Up-front, stat all files in order and remove those which are deleted or

# out-of-date. Don't actually re-read them until they're needed. That ought

# to bring all the inodes into core so that future stats to them are fast, and

# it preserves the nice property that any caller will always get up-to-date

# data except in unavoidable cases.

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

CACHE_HEADER = "### bzr hashcache v5\n"

import os, stat, time

from bzrlib.osutils import sha_file

from bzrlib.trace import mutter, warning

def _fingerprint(abspath):

import os, stat

try:

fs = os.lstat(abspath)

except OSError:

if stat.S_ISDIR(fs.st_mode):

return None

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev)

return (fs.st_size, fs.st_mtime,

fs.st_ctime, fs.st_ino, fs.st_dev)

class HashCache(object):

This does not canonicalize the paths passed in; that should be

done by the caller.

_cache

Indexed by path, points to a two-tuple of the SHA-1 of the file.

and its fingerprint.

cache_sha1

Indexed by path, gives the SHA-1 of the file.

validator

Indexed by path, gives the fingerprint of the file last time it was read.

stat_count

number of times files have been statted

miss_count

number of misses (times files have been completely re-read)

"""

needs_write = False

def __init__(self, basedir):

self.basedir = basedir

self.hit_count = 0

self.miss_count = 0

self.stat_count = 0

self.danger_count = 0

self.removed_count = 0

self.update_count = 0

self._cache = {}

100

def cache_file_name(self):

101

return os.sep.join([self.basedir, '.bzr', 'stat-cache'])

102

103

self.cache_sha1 = {}

self.validator = {}

104

105

106

def clear(self):

107

"""Discard all cached information.

108

109

This does not reset the counters."""

110

if self._cache:

111

self.needs_write = True

112

self._cache = {}

113

114

115

def scan(self):

116

"""Scan all files and remove entries where the cache entry is obsolete.

117

118

Obsolete entries are those where the file has been modified or deleted

119

since the entry was inserted.

120

"""

121

prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]

122

prep.sort()

123

124

for inum, path, cache_entry in prep:

125

abspath = os.sep.join([self.basedir, path])

126

fp = _fingerprint(abspath)

127

self.stat_count += 1

128

129

cache_fp = cache_entry[1]

130

131

if (not fp) or (cache_fp != fp):

132

# not here or not a regular file anymore

133

self.removed_count += 1

134

self.needs_write = True

135

del self._cache[path]

136

"""Discard all cached information."""

self.validator = {}

self.cache_sha1 = {}

137

138

139

def get_sha1(self, path):

140

"""Return the sha1 of a file.

"""Return the hex SHA-1 of the contents of the file at path.

XXX: If the file does not exist or is not a plain file???

141

"""

142

abspath = os.sep.join([self.basedir, path])

import os, time

from bzrlib.osutils import sha_file

abspath = os.path.join(self.basedir, path)

fp = _fingerprint(abspath)

cache_fp = self.validator.get(path)

100

143

101

self.stat_count += 1

144

file_fp = _fingerprint(abspath)

145

146

if not file_fp:

147

# not a regular file or not existing

148

if path in self._cache:

149

self.removed_count += 1

150

self.needs_write = True

151

del self._cache[path]

152

return None

153

154

if path in self._cache:

155

cache_sha1, cache_fp = self._cache[path]

156

else:

157

cache_sha1, cache_fp = None, None

158

159

if cache_fp == file_fp:

102

103

if not fp:

104

# not a regular file

105

return None

106

elif cache_fp and (cache_fp == fp):

160

107

self.hit_count += 1

161

return cache_sha1

162

163

self.miss_count += 1

164

digest = sha_file(file(abspath, 'rb', buffering=65000))

165

166

now = int(time.time())

167

if file_fp[1] >= now or file_fp[2] >= now:

168

# changed too recently; can't be cached. we can

169

# return the result and it could possibly be cached

170

# next time.

171

self.danger_count += 1

172

if cache_fp:

173

self.removed_count += 1

174

self.needs_write = True

175

del self._cache[path]

108

return self.cache_sha1[path]

176

109

else:

177

self.update_count += 1

178

self.needs_write = True

179

self._cache[path] = (digest, file_fp)

180

181

return digest

182

183

184

185

186

def write(self):

187

"""Write contents of cache to file."""

188

from atomicfile import AtomicFile

189

190

outf = AtomicFile(self.cache_file_name(), 'wb')

191

try:

192

print >>outf, CACHE_HEADER,

193

194

for path, c in self._cache.iteritems():

195

assert '//' not in path, path

196

outf.write(path.encode('utf-8'))

197

outf.write('// ')

198

print >>outf, c[0], # hex sha1

199

for fld in c[1]:

200

print >>outf, "%d" % fld,

201

print >>outf

202

203

outf.commit()

204

self.needs_write = False

205

finally:

206

if not outf.closed:

207

outf.abort()

208

209

210

211

def read(self):

212

"""Reinstate cache from file.

213

214

Overwrites existing cache.

215

216

If the cache file has the wrong version marker, this just clears

217

the cache."""

218

self._cache = {}

219

220

fn = self.cache_file_name()

221

try:

222

inf = file(fn, 'rb', buffering=65000)

223

except IOError, e:

224

mutter("failed to open %s: %s" % (fn, e))

225

return

226

227

228

hdr = inf.readline()

229

if hdr != CACHE_HEADER:

230

mutter('cache header marker not found at top of %s; discarding cache'

231

% fn)

232

return

233

234

for l in inf:

235

pos = l.index('// ')

236

path = l[:pos].decode('utf-8')

237

if path in self._cache:

238

warning('duplicated path %r in cache' % path)

239

continue

240

241

pos += 3

242

fields = l[pos:].split(' ')

243

if len(fields) != 6:

244

warning("bad line in hashcache: %r" % l)

245

continue

246

247

sha1 = fields[0]

248

if len(sha1) != 40:

249

warning("bad sha1 in hashcache: %r" % sha1)

250

continue

251

252

fp = tuple(map(long, fields[1:]))

253

254

self._cache[path] = (sha1, fp)

255

256

self.needs_write = False

257

258

259

260

110

self.miss_count += 1

111

digest = sha_file(file(abspath, 'rb'))

112

113

now = int(time.time())

114

if fp[1] >= now or fp[2] >= now:

115

# changed too recently; can't be cached. we can

116

# return the result and it could possibly be cached

117

# next time.

118

self.danger_count += 1

119

if cache_fp:

120

del self.validator[path]

121

del self.cache_sha1[path]

122

else:

123

self.validator[path] = fp

124

self.cache_sha1[path] = digest

125

126

return digest

127

Older »