~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: Martin Pool
Date: 2005-07-11 04:53:07 UTC
Revision ID: mbp@sourcefrog.net-20050711045307-2b38378d043dc25c

- Refactor weave calculation of inclusions

files added:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

files removed:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugins/__init__.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/emacs

contrib/emacs/bzr-mode.el

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/tests/test_weave.py => tools/testweave.py

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Up-front, stat all files in order and remove those which are deleted or

# out-of-date. Don't actually re-read them until they're needed. That ought

# to bring all the inodes into core so that future stats to them are fast, and

# it preserves the nice property that any caller will always get up-to-date

# data except in unavoidable cases.

# TODO: Perhaps have a way to stat all the files in inode order, and

# then remember that they're all fresh for the lifetime of the object?

# TODO: Keep track of whether there are in-memory updates that need to

# be flushed.

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

# TODO: Perhaps use a Python pickle instead of a text file; might be faster.

CACHE_HEADER = "### bzr hashcache v5\n"

import os, stat, time

import sha

from bzrlib.osutils import sha_file, pathjoin, safe_unicode

from bzrlib.trace import mutter, warning

from bzrlib.atomicfile import AtomicFile

from bzrlib.errors import BzrError

FP_MTIME_COLUMN = 1

FP_CTIME_COLUMN = 2

FP_MODE_COLUMN = 5

def _fingerprint(abspath):

import os, stat

try:

fs = os.lstat(abspath)

except OSError:

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev, fs.st_mode)

long(fs.st_ctime), fs.st_ino, fs.st_dev)

class HashCache(object):

"""

needs_write = False

def __init__(self, root, cache_file_name, mode=None):

"""Create a hash cache in base dir, and set the file mode to mode."""

self.root = safe_unicode(root)

def __init__(self, basedir):

self.basedir = basedir

100

self.hit_count = 0

101

self.miss_count = 0

102

self.stat_count = 0

103

self.danger_count = 0

104

self.removed_count = 0

105

self.update_count = 0

106

self._cache = {}

107

self._mode = mode

108

self._cache_file_name = safe_unicode(cache_file_name)

109

110

def cache_file_name(self):

111

return self._cache_file_name

import os.path

100

return os.path.join(self.basedir, '.bzr', 'stat-cache')

101

102

103

112

104

113

105

def clear(self):

114

106

"""Discard all cached information.

118

110

self.needs_write = True

119

111

self._cache = {}

120

112

121

def scan(self):

122

"""Scan all files and remove entries where the cache entry is obsolete.

123

124

Obsolete entries are those where the file has been modified or deleted

125

since the entry was inserted.

126

"""

127

# FIXME optimisation opportunity, on linux [and check other oses]:

128

# rather than iteritems order, stat in inode order.

129

prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]

130

prep.sort()

131

132

for inum, path, cache_entry in prep:

133

abspath = pathjoin(self.root, path)

134

fp = _fingerprint(abspath)

135

self.stat_count += 1

136

137

cache_fp = cache_entry[1]

138

139

if (not fp) or (cache_fp != fp):

140

# not here or not a regular file anymore

141

self.removed_count += 1

142

self.needs_write = True

143

del self._cache[path]

144

145

113

146

114

def get_sha1(self, path):

147

"""Return the sha1 of a file.

115

"""Return the hex SHA-1 of the contents of the file at path.

116

117

XXX: If the file does not exist or is not a plain file???

148

118

"""

149

abspath = pathjoin(self.root, path)

119

120

import os, time

121

from bzrlib.osutils import sha_file

122

from bzrlib.trace import mutter

123

124

abspath = os.path.join(self.basedir, path)

125

fp = _fingerprint(abspath)

126

c = self._cache.get(path)

127

if c:

128

cache_sha1, cache_fp = c

129

else:

130

cache_sha1, cache_fp = None, None

131

150

132

self.stat_count += 1

151

file_fp = _fingerprint(abspath)

152

153

if not file_fp:

154

# not a regular file or not existing

155

if path in self._cache:

156

self.removed_count += 1

157

self.needs_write = True

158

del self._cache[path]

159

return None

160

161

if path in self._cache:

162

cache_sha1, cache_fp = self._cache[path]

163

else:

164

cache_sha1, cache_fp = None, None

165

166

if cache_fp == file_fp:

133

134

if not fp:

135

# not a regular file

136

return None

137

elif cache_fp and (cache_fp == fp):

167

138

self.hit_count += 1

168

139

return cache_sha1

169

170

self.miss_count += 1

171

172

173

mode = file_fp[FP_MODE_COLUMN]

174

if stat.S_ISREG(mode):

175

digest = sha_file(file(abspath, 'rb', buffering=65000))

176

elif stat.S_ISLNK(mode):

177

digest = sha.new(os.readlink(abspath)).hexdigest()

178

140

else:

179

raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))

141

self.miss_count += 1

142

digest = sha_file(file(abspath, 'rb'))

180

143

181

now = int(time.time())

182

if file_fp[FP_MTIME_COLUMN] >= now or file_fp[FP_CTIME_COLUMN] >= now:

183

# changed too recently; can't be cached. we can

184

# return the result and it could possibly be cached

185

# next time.

186

187

# the point is that we only want to cache when we are sure that any

188

# subsequent modifications of the file can be detected. If a

189

# modification neither changes the inode, the device, the size, nor

190

# the mode, then we can only distinguish it by time; therefore we

191

# need to let sufficient time elapse before we may cache this entry

192

# again. If we didn't do this, then, for example, a very quick 1

193

# byte replacement in the file might go undetected.

194

self.danger_count += 1

195

if cache_fp:

196

self.removed_count += 1

144

now = int(time.time())

145

if fp[1] >= now or fp[2] >= now:

146

# changed too recently; can't be cached. we can

147

# return the result and it could possibly be cached

148

# next time.

149

self.danger_count += 1

150

if cache_fp:

151

mutter("remove outdated entry for %s" % path)

152

self.needs_write = True

153

del self._cache[path]

154

elif (fp != cache_fp) or (digest != cache_sha1):

155

mutter("update entry for %s" % path)

156

mutter(" %r" % (fp,))

157

mutter(" %r" % (cache_fp,))

197

158

self.needs_write = True

198

del self._cache[path]

199

else:

200

self.update_count += 1

201

self.needs_write = True

202

self._cache[path] = (digest, file_fp)

203

return digest

204

159

self._cache[path] = (digest, fp)

160

161

return digest

162

163

164

205

165

def write(self):

206

166

"""Write contents of cache to file."""

207

outf = AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode)

167

from atomicfile import AtomicFile

168

169

outf = AtomicFile(self.cache_file_name(), 'wb')

208

170

try:

209

171

print >>outf, CACHE_HEADER,

210

172

216

178

for fld in c[1]:

217

179

print >>outf, "%d" % fld,

218

180

print >>outf

181

219

182

outf.commit()

220

183

self.needs_write = False

221

mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",

222

self.cache_file_name(), self.hit_count, self.miss_count,

223

self.stat_count,

224

self.danger_count, self.update_count)

225

184

finally:

226

185

if not outf.closed:

227

186

outf.abort()

187

188

228

189

229

190

def read(self):

230

191

"""Reinstate cache from file.

233

194

234

195

If the cache file has the wrong version marker, this just clears

235

196

the cache."""

197

from bzrlib.trace import mutter, warning

198

236

199

self._cache = {}

237

200

238

201

fn = self.cache_file_name()

239

202

try:

240

inf = file(fn, 'rb', buffering=65000)

203

inf = file(fn, 'rb')

241

204

except IOError, e:

242

mutter("failed to open %s: %s", fn, e)

243

# better write it now so it is valid

244

self.needs_write = True

205

mutter("failed to open %s: %s" % (fn, e))

245

206

return

246

207

247

208

248

209

hdr = inf.readline()

249

210

if hdr != CACHE_HEADER:

250

mutter('cache header marker not found at top of %s;'

251

' discarding cache', fn)

252

self.needs_write = True

211

mutter('cache header marker not found at top of %s; discarding cache'

212

% fn)

253

213

return

254

214

255

215

for l in inf:

261

221

262

222

pos += 3

263

223

fields = l[pos:].split(' ')

264

if len(fields) != 7:

224

if len(fields) != 6:

265

225

warning("bad line in hashcache: %r" % l)

266

226

continue

267

227

Older »