~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/statcache.py

Committer: Martin Pool
Date: 2005-05-17 06:56:16 UTC
Revision ID: mbp@sourcefrog.net-20050517065616-6f23381d6184a8aa

- add space for un-merged patches

files removed:
bzr-man.py

bzrlib/lock.py

bzrlib/meta_store.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/selftest

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/upgrade.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

effbot

effbot/__init__.py

effbot/org

effbot/org/__init__.py

effbot/org/gzip_consumer.py

effbot/org/http_client.py

effbot/org/http_manager.py

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

plugins/rsync

plugins/rsync/__init__.py

plugins/rsync/rsync_update.py

files renamed:
contrib/bash/bzr.simple => contrib/bash/bzr

files modified:
.bzrignore

NEWS

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/statcache.py

bzrlib/status.py

bzrlib/store.py

bzrlib/tree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

doc/formats.txt

testbzr

urlgrabber/keepalive.py

Show diffs side-by-side

added added

removed removed

bzrlib/statcache.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import stat, os, sha, time

from binascii import b2a_qp, a2b_qp

from trace import mutter

from errors import BzrError, BzrCheckError

to gradually adjust your clock or don't use bzr over the step.

At the moment this is stored in a simple textfile; it might be nice

to use a tdb instead to allow faster lookup by file-id.

to use a tdb instead.

The cache is represented as a map from file_id to a tuple of (file_id,

sha1, path, size, mtime, ctime, ino, dev).

The SHA-1 is stored in memory as a hexdigest.

This version of the file on disk has one line per record, and fields

separated by \0 records.

File names are written out as the quoted-printable encoding of their

UTF-8 representation.

"""

# order of fields returned by fingerprint()

SC_DEV = 7

CACHE_HEADER = "### bzr statcache v4"

def fingerprint(abspath):

try:

fs = os.lstat(abspath)

105

102

fs.st_ctime, fs.st_ino, fs.st_dev)

106

103

107

104

108

109

def _write_cache(basedir, entries):

105

def _write_cache(basedir, entry_iter, dangerfiles):

110

106

from atomicfile import AtomicFile

111

107

112

108

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

113

109

outf = AtomicFile(cachefn, 'wb')

114

110

try:

115

outf.write(CACHE_HEADER + '\n')

116

117

for entry in entries:

111

for entry in entry_iter:

118

112

if len(entry) != 8:

119

113

raise ValueError("invalid statcache entry tuple %r" % entry)

120

outf.write(entry[0].encode('utf-8')) # file id

121

outf.write('\0')

122

outf.write(entry[1]) # hex sha1

123

outf.write('\0')

124

outf.write(entry[2].encode('utf-8')) # name

114

115

if entry[SC_FILE_ID] in dangerfiles:

116

continue # changed too recently

117

outf.write(entry[0]) # file id

118

outf.write(' ')

119

outf.write(entry[1]) # hex sha1

120

outf.write(' ')

121

outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name

125

122

for nf in entry[3:]:

126

outf.write('\0%d' % nf)

123

outf.write(' %d' % nf)

127

124

outf.write('\n')

128

125

129

126

outf.commit()

130

127

finally:

131

128

if not outf.closed:

132

129

outf.abort()

133

134

135

def _try_write_cache(basedir, entries):

136

try:

137

return _write_cache(basedir, entries)

138

except IOError, e:

139

mutter("cannot update statcache in %s: %s" % (basedir, e))

140

except OSError, e:

141

mutter("cannot update statcache in %s: %s" % (basedir, e))

142

143

130

144

131

145

132

def load_cache(basedir):

146

import re

133

from sets import Set

147

134

cache = {}

148

seen_paths = {}

149

from bzrlib.trace import warning

150

151

assert isinstance(basedir, basestring)

152

153

sha_re = re.compile(r'[a-f0-9]{40}')

135

seen_paths = Set()

154

136

155

137

try:

156

138

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

157

cachefile = open(cachefn, 'rb')

139

cachefile = open(cachefn, 'r')

158

140

except IOError:

159

141

return cache

160

161

line1 = cachefile.readline().rstrip('\r\n')

162

if line1 != CACHE_HEADER:

163

mutter('cache header marker not found at top of %s; discarding cache'

164

% cachefn)

165

return cache

166

142

167

143

for l in cachefile:

168

f = l.split('\0')

144

f = l.split(' ')

169

145

170

file_id = f[0].decode('utf-8')

146

file_id = f[0]

171

147

if file_id in cache:

172

warning("duplicated file_id in cache: {%s}" % file_id)

173

174

text_sha = f[1]

175

if len(text_sha) != 40 or not sha_re.match(text_sha):

176

raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)

148

raise BzrError("duplicated file_id in cache: {%s}" % file_id)

177

149

178

path = f[2].decode('utf-8')

150

path = a2b_qp(f[2]).decode('utf-8')

179

151

if path in seen_paths:

180

warning("duplicated path in cache: %r" % path)

181

seen_paths[path] = True

152

raise BzrCheckError("duplicated path in cache: %r" % path)

153

seen_paths.add(path)

182

154

183

entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])

155

entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])

184

156

if len(entry) != 8:

185

157

raise ValueError("invalid statcache entry tuple %r" % entry)

186

158

189

161

190

162

191

163

164

192

165

def _files_from_inventory(inv):

193

166

for path, ie in inv.iter_entries():

194

167

if ie.kind != 'file':

206

179

flush -- discard any previous cache and recalculate from scratch.

207

180

"""

208

181

209

# load the existing cache; use information there to find a list of

210

# files ordered by inode, which is alleged to be the fastest order

211

# to stat the files.

212

182

213

to_update = _files_from_inventory(inv)

183

# TODO: It's supposed to be faster to stat the files in order by inum.

184

# We don't directly know the inum of the files of course but we do

185

# know where they were last sighted, so we can sort by that.

214

186

215

187

assert isinstance(flush, bool)

216

188

if flush:

217

189

cache = {}

218

190

else:

219

191

cache = load_cache(basedir)

220

221

by_inode = []

222

without_inode = []

223

for file_id, path in to_update:

224

if file_id in cache:

225

by_inode.append((cache[file_id][SC_INO], file_id, path))

226

else:

227

without_inode.append((file_id, path))

228

by_inode.sort()

229

230

to_update = [a[1:] for a in by_inode] + without_inode

231

232

stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0

233

234

# dangerfiles have been recently touched and can't be committed to

235

# a persistent cache yet, but they are returned to the caller.

236

dangerfiles = []

192

return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))

193

194

195

196

def _update_cache_from_list(basedir, cache, to_update):

197

"""Update and return the cache for given files.

198

199

cache -- Previously cached values to be validated.

200

201

to_update -- Sequence of (file_id, path) pairs to check.

202

"""

203

204

from sets import Set

205

206

stat_cnt = missing_cnt = hardcheck = change_cnt = 0

207

208

# files that have been recently touched and can't be

209

# committed to a persistent cache yet.

237

210

211

dangerfiles = Set()

238

212

now = int(time.time())

239

213

240

214

## mutter('update statcache under %r' % basedir)

251

225

change_cnt += 1

252

226

missing_cnt += 1

253

227

continue

254

elif not cacheentry:

255

new_cnt += 1

256

228

257

229

if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):

258

dangerfiles.append(file_id)

230

dangerfiles.add(file_id)

259

231

260

232

if cacheentry and (cacheentry[3:] == fp):

261

233

continue # all stat fields unchanged

272

244

change_cnt += 1

273

245

274

246

mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '

275

'%d deleted, %d new, '

276

247

'%d in cache'

277

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),

278

missing_cnt, new_cnt, len(cache)))

248

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))

279

249

280

250

if change_cnt:

281

251

mutter('updating on-disk statcache')

282

283

if dangerfiles:

284

safe_cache = cache.copy()

285

for file_id in dangerfiles:

286

del safe_cache[file_id]

287

else:

288

safe_cache = cache

289

290

_try_write_cache(basedir, safe_cache.itervalues())

252

_write_cache(basedir, cache.itervalues(), dangerfiles)

291

253

292

254

return cache

Older »