~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/statcache.py

Committer: Martin Pool
Date: 2005-05-19 08:31:06 UTC
Revision ID: mbp@sourcefrog.net-20050519083106-ebe71562d3bda4a7

- fix typo

files removed:
bzr-man.py

bzrlib/lock.py

bzrlib/progress.py

bzrlib/selftest.py

bzrlib/whitebox.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/upload-bzr.dev

patches/annotate3.patch

patches/annotate4.patch

patches/find-touching-from-seq.diff

patches/progress.diff

files renamed:
contrib/bash/bzr.simple => contrib/bash/bzr

files modified:
.bzrignore

NEWS

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

bzrlib/status.py

bzrlib/store.py

bzrlib/tree.py

contrib/add-bzr-to-baz

testbzr

Show diffs side-by-side

added added

removed removed

bzrlib/statcache.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import stat, os, sha, time

from binascii import b2a_qp, a2b_qp

from trace import mutter

from errors import BzrError, BzrCheckError

to gradually adjust your clock or don't use bzr over the step.

At the moment this is stored in a simple textfile; it might be nice

to use a tdb instead to allow faster lookup by file-id.

to use a tdb instead.

The cache is represented as a map from file_id to a tuple of (file_id,

sha1, path, size, mtime, ctime, ino, dev).

The SHA-1 is stored in memory as a hexdigest.

This version of the file on disk has one line per record, and fields

separated by \0 records.

File names and file-ids are written out as the quoted-printable

encoding of their UTF-8 representation. (file-ids shouldn't contain

wierd characters, but it might happen.)

"""

# order of fields returned by fingerprint()

CACHE_HEADER = "### bzr statcache v4"

CACHE_HEADER = "### bzr statcache v2"

def fingerprint(abspath):

105

107

fs.st_ctime, fs.st_ino, fs.st_dev)

106

108

107

109

108

109

def _write_cache(basedir, entries):

110

def _write_cache(basedir, entry_iter, dangerfiles):

110

111

from atomicfile import AtomicFile

111

112

113

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

113

114

outf = AtomicFile(cachefn, 'wb')

115

outf.write(CACHE_HEADER + '\n')

114

116

try:

115

outf.write(CACHE_HEADER + '\n')

116

117

for entry in entries:

117

for entry in entry_iter:

118

if len(entry) != 8:

119

raise ValueError("invalid statcache entry tuple %r" % entry)

120

outf.write(entry[0].encode('utf-8')) # file id

121

outf.write('\0')

122

outf.write(entry[1]) # hex sha1

123

outf.write('\0')

124

outf.write(entry[2].encode('utf-8')) # name

120

121

if entry[SC_FILE_ID] in dangerfiles:

122

continue # changed too recently

123

outf.write(b2a_qp(entry[0].encode('utf-8'))) # file id

124

outf.write(' ')

125

outf.write(entry[1]) # hex sha1

126

outf.write(' ')

127

outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name

125

128

for nf in entry[3:]:

126

outf.write('\0%d' % nf)

129

outf.write(' %d' % nf)

127

130

outf.write('\n')

128

131

129

132

outf.commit()

130

133

finally:

131

134

if not outf.closed:

132

135

outf.abort()

133

134

135

def _try_write_cache(basedir, entries):

136

try:

137

return _write_cache(basedir, entries)

138

except IOError, e:

139

mutter("cannot update statcache in %s: %s" % (basedir, e))

140

except OSError, e:

141

mutter("cannot update statcache in %s: %s" % (basedir, e))

142

143

136

144

137

145

138

def load_cache(basedir):

146

139

import re

147

140

cache = {}

148

141

seen_paths = {}

149

from bzrlib.trace import warning

150

151

assert isinstance(basedir, basestring)

152

142

153

143

sha_re = re.compile(r'[a-f0-9]{40}')

154

144

160

150

161

151

line1 = cachefile.readline().rstrip('\r\n')

162

152

if line1 != CACHE_HEADER:

163

mutter('cache header marker not found at top of %s; discarding cache'

164

% cachefn)

153

mutter('cache header marker not found at top of %s' % cachefn)

165

154

return cache

166

155

167

156

for l in cachefile:

168

f = l.split('\0')

157

f = l.split(' ')

169

158

170

file_id = f[0].decode('utf-8')

159

file_id = a2b_qp(f[0]).decode('utf-8')

171

160

if file_id in cache:

172

warning("duplicated file_id in cache: {%s}" % file_id)

161

raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)

173

162

174

163

text_sha = f[1]

175

164

if len(text_sha) != 40 or not sha_re.match(text_sha):

176

165

raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)

177

166

178

path = f[2].decode('utf-8')

167

path = a2b_qp(f[2]).decode('utf-8')

179

168

if path in seen_paths:

180

warning("duplicated path in cache: %r" % path)

169

raise BzrCheckError("duplicated path in cache: %r" % path)

181

170

seen_paths[path] = True

182

171

183

172

entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])

189

178

190

179

191

180

181

192

182

def _files_from_inventory(inv):

193

183

for path, ie in inv.iter_entries():

194

184

if ie.kind != 'file':

206

196

flush -- discard any previous cache and recalculate from scratch.

207

197

"""

208

198

209

# load the existing cache; use information there to find a list of

210

# files ordered by inode, which is alleged to be the fastest order

211

# to stat the files.

212

199

213

to_update = _files_from_inventory(inv)

200

# TODO: It's supposed to be faster to stat the files in order by inum.

201

# We don't directly know the inum of the files of course but we do

202

# know where they were last sighted, so we can sort by that.

214

203

215

204

assert isinstance(flush, bool)

216

205

if flush:

217

206

cache = {}

218

207

else:

219

208

cache = load_cache(basedir)

220

221

by_inode = []

222

without_inode = []

223

for file_id, path in to_update:

224

if file_id in cache:

225

by_inode.append((cache[file_id][SC_INO], file_id, path))

226

else:

227

without_inode.append((file_id, path))

228

by_inode.sort()

229

230

to_update = [a[1:] for a in by_inode] + without_inode

231

232

stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0

233

234

# dangerfiles have been recently touched and can't be committed to

235

# a persistent cache yet, but they are returned to the caller.

236

dangerfiles = []

209

return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))

210

211

212

213

def _update_cache_from_list(basedir, cache, to_update):

214

"""Update and return the cache for given files.

215

216

cache -- Previously cached values to be validated.

217

218

to_update -- Sequence of (file_id, path) pairs to check.

219

"""

220

stat_cnt = missing_cnt = hardcheck = change_cnt = 0

221

222

# dangerfiles have been recently touched and can't be

223

# committed to a persistent cache yet.

224

dangerfiles = {}

237

225

238

226

now = int(time.time())

239

227

251

239

change_cnt += 1

252

240

missing_cnt += 1

253

241

continue

254

elif not cacheentry:

255

new_cnt += 1

256

242

257

243

if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):

258

dangerfiles.append(file_id)

244

dangerfiles[file_id] = True

259

245

260

246

if cacheentry and (cacheentry[3:] == fp):

261

247

continue # all stat fields unchanged

272

258

change_cnt += 1

273

259

274

260

mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '

275

'%d deleted, %d new, '

276

261

'%d in cache'

277

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),

278

missing_cnt, new_cnt, len(cache)))

262

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))

279

263

280

264

if change_cnt:

281

265

mutter('updating on-disk statcache')

282

283

if dangerfiles:

284

safe_cache = cache.copy()

285

for file_id in dangerfiles:

286

del safe_cache[file_id]

287

else:

288

safe_cache = cache

289

290

_try_write_cache(basedir, safe_cache.itervalues())

266

_write_cache(basedir, cache.itervalues(), dangerfiles)

291

267

292

268

return cache

Older »