~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: John Arbash Meinel
Date: 2005-12-28 19:47:54 UTC
mto: (1185.50.36 bzr-jam-integration)
mto: This revision was merged to the branch mainline in revision 1536.
Revision ID: john@arbash-meinel.com-20051228194754-27fd0b38f0b9a7f2

bzr --version prints branch nickname.

files added:
HACKING

INSTALL

Makefile

NEWS.developers

bzr_man.py

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/clone.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/identitymap.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/lsprof.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revision_info.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

tools

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/mdiff.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

testbzr

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

contrib/bash/bzr => contrib/bash/bzr.simple

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/merge.py *

bzrlib/merge_core.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

contrib/zsh/_bzr

doc/formats.txt

doc/index.txt

doc/random.txt

doc/todo-from-arch.txt

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Up-front, stat all files in order and remove those which are deleted or

# out-of-date. Don't actually re-read them until they're needed. That ought

# to bring all the inodes into core so that future stats to them are fast, and

# it preserves the nice property that any caller will always get up-to-date

# data except in unavoidable cases.

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

# TODO: Perhaps use a Python pickle instead of a text file; might be faster.

CACHE_HEADER = "### bzr hashcache v5\n"

import os, stat, time

import sha

from bzrlib.osutils import sha_file

from bzrlib.trace import mutter, warning

from bzrlib.atomicfile import AtomicFile

from bzrlib.osutils import pathjoin

FP_MODE_COLUMN = 5

def _fingerprint(abspath):

try:

fs = os.lstat(abspath)

except OSError:

# might be missing, etc

return None

if stat.S_ISDIR(fs.st_mode):

return None

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev, fs.st_mode)

class HashCache(object):

"""Cache for looking up file SHA-1.

Files are considered to match the cached value if the fingerprint

of the file has not changed. This includes its mtime, ctime,

device number, inode number, and size. This should catch

modifications or replacement of the file by a new one.

This may not catch modifications that do not change the file's

size and that occur within the resolution window of the

timestamps. To handle this we specifically do not cache files

which have changed since the start of the present second, since

they could undetectably change again.

This scheme may fail if the machine's clock steps backwards.

Don't do that.

This does not canonicalize the paths passed in; that should be

done by the caller.

_cache

Indexed by path, points to a two-tuple of the SHA-1 of the file.

and its fingerprint.

stat_count

number of times files have been statted

hit_count

number of times files have been retrieved from the cache, avoiding a

re-read

miss_count

number of misses (times files have been completely re-read)

"""

needs_write = False

def __init__(self, basedir):

self.basedir = basedir

self.hit_count = 0

self.miss_count = 0

self.stat_count = 0

100

self.danger_count = 0

101

self.removed_count = 0

102

self.update_count = 0

103

self._cache = {}

104

105

def cache_file_name(self):

106

# FIXME: duplicate path logic here, this should be

107

# something like 'branch.controlfile'.

108

return pathjoin(self.basedir, '.bzr', 'stat-cache')

109

110

def clear(self):

111

"""Discard all cached information.

112

113

This does not reset the counters."""

114

if self._cache:

115

self.needs_write = True

116

self._cache = {}

117

118

119

def scan(self):

120

"""Scan all files and remove entries where the cache entry is obsolete.

121

122

Obsolete entries are those where the file has been modified or deleted

123

since the entry was inserted.

124

"""

125

prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]

126

prep.sort()

127

128

for inum, path, cache_entry in prep:

129

abspath = pathjoin(self.basedir, path)

130

fp = _fingerprint(abspath)

131

self.stat_count += 1

132

133

cache_fp = cache_entry[1]

134

135

if (not fp) or (cache_fp != fp):

136

# not here or not a regular file anymore

137

self.removed_count += 1

138

self.needs_write = True

139

del self._cache[path]

140

141

142

def get_sha1(self, path):

143

"""Return the sha1 of a file.

144

"""

145

abspath = pathjoin(self.basedir, path)

146

self.stat_count += 1

147

file_fp = _fingerprint(abspath)

148

149

if not file_fp:

150

# not a regular file or not existing

151

if path in self._cache:

152

self.removed_count += 1

153

self.needs_write = True

154

del self._cache[path]

155

return None

156

157

if path in self._cache:

158

cache_sha1, cache_fp = self._cache[path]

159

else:

160

cache_sha1, cache_fp = None, None

161

162

if cache_fp == file_fp:

163

self.hit_count += 1

164

return cache_sha1

165

166

self.miss_count += 1

167

168

169

mode = file_fp[FP_MODE_COLUMN]

170

if stat.S_ISREG(mode):

171

digest = sha_file(file(abspath, 'rb', buffering=65000))

172

elif stat.S_ISLNK(mode):

173

link_target = os.readlink(abspath)

174

digest = sha.new(os.readlink(abspath)).hexdigest()

175

else:

176

raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))

177

178

now = int(time.time())

179

if file_fp[1] >= now or file_fp[2] >= now:

180

# changed too recently; can't be cached. we can

181

# return the result and it could possibly be cached

182

# next time.

183

self.danger_count += 1

184

if cache_fp:

185

self.removed_count += 1

186

self.needs_write = True

187

del self._cache[path]

188

else:

189

self.update_count += 1

190

self.needs_write = True

191

self._cache[path] = (digest, file_fp)

192

return digest

193

194

def write(self):

195

"""Write contents of cache to file."""

196

outf = AtomicFile(self.cache_file_name(), 'wb')

197

try:

198

print >>outf, CACHE_HEADER,

199

200

for path, c in self._cache.iteritems():

201

assert '//' not in path, path

202

outf.write(path.encode('utf-8'))

203

outf.write('// ')

204

print >>outf, c[0], # hex sha1

205

for fld in c[1]:

206

print >>outf, "%d" % fld,

207

print >>outf

208

209

outf.commit()

210

self.needs_write = False

211

finally:

212

if not outf.closed:

213

outf.abort()

214

215

def read(self):

216

"""Reinstate cache from file.

217

218

Overwrites existing cache.

219

220

If the cache file has the wrong version marker, this just clears

221

the cache."""

222

self._cache = {}

223

224

fn = self.cache_file_name()

225

try:

226

inf = file(fn, 'rb', buffering=65000)

227

except IOError, e:

228

mutter("failed to open %s: %s", fn, e)

229

# better write it now so it is valid

230

self.needs_write = True

231

return

232

233

234

hdr = inf.readline()

235

if hdr != CACHE_HEADER:

236

mutter('cache header marker not found at top of %s;'

237

' discarding cache', fn)

238

self.needs_write = True

239

return

240

241

for l in inf:

242

pos = l.index('// ')

243

path = l[:pos].decode('utf-8')

244

if path in self._cache:

245

warning('duplicated path %r in cache' % path)

246

continue

247

248

pos += 3

249

fields = l[pos:].split(' ')

250

if len(fields) != 7:

251

warning("bad line in hashcache: %r" % l)

252

continue

253

254

sha1 = fields[0]

255

if len(sha1) != 40:

256

warning("bad sha1 in hashcache: %r" % sha1)

257

continue

258

259

fp = tuple(map(long, fields[1:]))

260

261

self._cache[path] = (sha1, fp)

262

263

self.needs_write = False

264

265

266

267

Older »