~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: Martin Pool
Date: 2005-08-18 05:44:39 UTC
Revision ID: mbp@sourcefrog.net-20050818054439-ba0873b87a8c1671

- add code to run weave utility under profiler

files added:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/plugins/checkperms

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files removed:
BRANCH.TODO

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/using_aliases.txt

generate_docs.py

patience-test.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_diff.py => bzrlib/selftest/testdiff.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_log.py => bzrlib/selftest/testlog.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revision.py => bzrlib/selftest/testrevision.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/tests/test_weave.py => tools/testweave.py

doc/tutorial.txt => tutorial.txt

files modified:
.bzrignore

.rsyncexclude

HACKING

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

# TODO: Perhaps use a Python pickle instead of a text file; might be faster.

CACHE_HEADER = "### bzr hashcache v5\n"

import os, stat, time

import sha

from bzrlib.osutils import sha_file, pathjoin, safe_unicode

from bzrlib.osutils import sha_file

from bzrlib.trace import mutter, warning

from bzrlib.atomicfile import AtomicFile

from bzrlib.errors import BzrError

FP_MTIME_COLUMN = 1

FP_CTIME_COLUMN = 2

FP_MODE_COLUMN = 5

def _fingerprint(abspath):

try:

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev, fs.st_mode)

long(fs.st_ctime), fs.st_ino, fs.st_dev)

class HashCache(object):

"""

needs_write = False

def __init__(self, root, cache_file_name, mode=None):

"""Create a hash cache in base dir, and set the file mode to mode."""

self.root = safe_unicode(root)

def __init__(self, basedir):

self.basedir = basedir

100

self.hit_count = 0

101

self.miss_count = 0

102

self.stat_count = 0

104

self.removed_count = 0

105

self.update_count = 0

106

self._cache = {}

107

self._mode = mode

108

self._cache_file_name = safe_unicode(cache_file_name)

109

110

100

def cache_file_name(self):

111

return self._cache_file_name

101

return os.sep.join([self.basedir, '.bzr', 'stat-cache'])

102

103

104

112

105

113

106

def clear(self):

114

107

"""Discard all cached information.

118

111

self.needs_write = True

119

112

self._cache = {}

120

113

114

121

115

def scan(self):

122

116

"""Scan all files and remove entries where the cache entry is obsolete.

123

117

124

118

Obsolete entries are those where the file has been modified or deleted

125

119

since the entry was inserted.

126

120

"""

127

# FIXME optimisation opportunity, on linux [and check other oses]:

128

# rather than iteritems order, stat in inode order.

129

121

prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]

130

122

prep.sort()

131

123

132

124

for inum, path, cache_entry in prep:

133

abspath = pathjoin(self.root, path)

125

abspath = os.sep.join([self.basedir, path])

134

126

fp = _fingerprint(abspath)

135

127

self.stat_count += 1

136

128

143

135

del self._cache[path]

144

136

145

137

138

146

139

def get_sha1(self, path):

147

140

"""Return the sha1 of a file.

148

141

"""

149

abspath = pathjoin(self.root, path)

142

abspath = os.sep.join([self.basedir, path])

150

143

self.stat_count += 1

151

144

file_fp = _fingerprint(abspath)

152

145

168

161

return cache_sha1

169

162

170

163

self.miss_count += 1

171

172

173

mode = file_fp[FP_MODE_COLUMN]

174

if stat.S_ISREG(mode):

175

digest = sha_file(file(abspath, 'rb', buffering=65000))

176

elif stat.S_ISLNK(mode):

177

digest = sha.new(os.readlink(abspath)).hexdigest()

178

else:

179

raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))

164

digest = sha_file(file(abspath, 'rb', buffering=65000))

180

165

181

166

now = int(time.time())

182

if file_fp[FP_MTIME_COLUMN] >= now or file_fp[FP_CTIME_COLUMN] >= now:

167

if file_fp[1] >= now or file_fp[2] >= now:

183

168

# changed too recently; can't be cached. we can

184

169

# return the result and it could possibly be cached

185

170

# next time.

186

187

# the point is that we only want to cache when we are sure that any

188

# subsequent modifications of the file can be detected. If a

189

# modification neither changes the inode, the device, the size, nor

190

# the mode, then we can only distinguish it by time; therefore we

191

# need to let sufficient time elapse before we may cache this entry

192

# again. If we didn't do this, then, for example, a very quick 1

193

# byte replacement in the file might go undetected.

194

171

self.danger_count += 1

195

172

if cache_fp:

196

173

self.removed_count += 1

200

177

self.update_count += 1

201

178

self.needs_write = True

202

179

self._cache[path] = (digest, file_fp)

180

203

181

return digest

204

182

183

184

185

205

186

def write(self):

206

187

"""Write contents of cache to file."""

207

outf = AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode)

188

from atomicfile import AtomicFile

189

190

outf = AtomicFile(self.cache_file_name(), 'wb')

208

191

try:

209

192

print >>outf, CACHE_HEADER,

210

193

216

199

for fld in c[1]:

217

200

print >>outf, "%d" % fld,

218

201

print >>outf

202

219

203

outf.commit()

220

204

self.needs_write = False

221

mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",

222

self.cache_file_name(), self.hit_count, self.miss_count,

223

self.stat_count,

224

self.danger_count, self.update_count)

225

205

finally:

226

206

if not outf.closed:

227

207

outf.abort()

208

209

228

210

229

211

def read(self):

230

212

"""Reinstate cache from file.

239

221

try:

240

222

inf = file(fn, 'rb', buffering=65000)

241

223

except IOError, e:

242

mutter("failed to open %s: %s", fn, e)

243

# better write it now so it is valid

244

self.needs_write = True

224

mutter("failed to open %s: %s" % (fn, e))

245

225

return

246

226

247

227

248

228

hdr = inf.readline()

249

229

if hdr != CACHE_HEADER:

250

mutter('cache header marker not found at top of %s;'

251

' discarding cache', fn)

252

self.needs_write = True

230

mutter('cache header marker not found at top of %s; discarding cache'

231

% fn)

253

232

return

254

233

255

234

for l in inf:

261

240

262

241

pos += 3

263

242

fields = l[pos:].split(' ')

264

if len(fields) != 7:

243

if len(fields) != 6:

265

244

warning("bad line in hashcache: %r" % l)

266

245

continue

267

246

Older »