~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/cache.py

Committer: Martin Pool
Date: 2005-05-05 06:38:18 UTC
Revision ID: mbp@sourcefrog.net-20050505063818-3eb3260343878325

- do upload CHANGELOG to web server, even though it's autogenerated

files added:
bzrlib/cache.py

bzrlib/tests.py

files removed:
HACKING

Makefile

bzr-man.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/selftest

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/revfile-annotation.txt

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/testweave.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
contrib/bash/bzr.simple => contrib/bash/bzr

bzrlib/util/elementtree/ => elementtree/

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/mdiff.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/store.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

contrib/zsh/_bzr

doc/formats.txt

doc/index.txt

doc/revfile.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py

testbzr

urlgrabber/keepalive.py

Show diffs side-by-side

added added

removed removed

bzrlib/cache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import stat, os, sha, time

from binascii import b2a_qp, a2b_qp

from trace import mutter

# file fingerprints are: (path, size, mtime, ctime, ino, dev).

# if this is the same for this file as in the previous revision, we

# assume the content is the same and the SHA-1 is the same.

# This is stored in a fingerprint file that also contains the file-id

# and the content SHA-1.

# Thus for any given file we can quickly get the SHA-1, either from

# the cache or if the cache is out of date.

# At the moment this is stored in a simple textfile; it might be nice

# to use a tdb instead.

# What we need:

# build a new cache from scratch

# load cache, incrementally update it

# TODO: Have a paranoid mode where we always compare the texts and

# always recalculate the digest, to trap modification without stat

# change and SHA collisions.

def fingerprint(path, abspath):

try:

fs = os.lstat(abspath)

except OSError:

# might be missing, etc

return None

if stat.S_ISDIR(fs.st_mode):

return None

return (fs.st_size, fs.st_mtime,

fs.st_ctime, fs.st_ino, fs.st_dev)

def write_cache(branch, entry_iter):

outf = branch.controlfile('work-cache.tmp', 'wt')

for entry in entry_iter:

outf.write(entry[0] + ' ' + entry[1] + ' ')

outf.write(b2a_qp(entry[2], True))

outf.write(' %d %d %d %d %d\n' % entry[3:])

outf.close()

os.rename(branch.controlfilename('work-cache.tmp'),

branch.controlfilename('work-cache'))

def load_cache(branch):

cache = {}

try:

cachefile = branch.controlfile('work-cache', 'rt')

except IOError:

return cache

for l in cachefile:

f = l.split(' ')

file_id = f[0]

if file_id in cache:

raise BzrError("duplicated file_id in cache: {%s}" % file_id)

cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])

return cache

def _files_from_inventory(inv):

for path, ie in inv.iter_entries():

if ie.kind != 'file':

continue

yield ie.file_id, path

100

101

102

def build_cache(branch):

103

inv = branch.read_working_inventory()

104

105

cache = {}

106

_update_cache_from_list(branch, cache, _files_from_inventory(inv))

107

108

109

110

def update_cache(branch, inv):

111

# TODO: It's supposed to be faster to stat the files in order by inum.

112

# We don't directly know the inum of the files of course but we do

113

# know where they were last sighted, so we can sort by that.

114

115

cache = load_cache(branch)

116

return _update_cache_from_list(branch, cache, _files_from_inventory(inv))

117

118

119

120

def _update_cache_from_list(branch, cache, to_update):

121

"""Update the cache to have info on the named files.

122

123

to_update is a sequence of (file_id, path) pairs.

124

"""

125

hardcheck = dirty = 0

126

for file_id, path in to_update:

127

fap = branch.abspath(path)

128

fp = fingerprint(fap, path)

129

cacheentry = cache.get(file_id)

130

131

if fp == None: # not here

132

if cacheentry:

133

del cache[file_id]

134

dirty += 1

135

continue

136

137

if cacheentry and (cacheentry[3:] == fp):

138

continue # all stat fields unchanged

139

140

hardcheck += 1

141

142

dig = sha.new(file(fap, 'rb').read()).hexdigest()

143

144

if cacheentry == None or dig != cacheentry[1]:

145

# if there was no previous entry for this file, or if the

146

# SHA has changed, then update the cache

147

cacheentry = (file_id, dig, path) + fp

148

cache[file_id] = cacheentry

149

dirty += 1

150

151

mutter('work cache: read %d files, %d changed' % (hardcheck, dirty))

152

153

if dirty:

154

write_cache(branch, cache.itervalues())

155

156

return cache

Older »