~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/statcache.py

Committer: Martin Pool
Date: 2005-05-25 03:27:02 UTC
Revision ID: mbp@sourcefrog.net-20050525032702-395f038adb33c235

- clean up statcache code
- stat files in order by inum
- report on added/deleted files

files added:
patches/symlink-support.patch

files modified:
NEWS

TODO

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/remotebranch.py

bzrlib/statcache.py

bzrlib/tree.py

testbzr

urlgrabber/keepalive.py

Show diffs side-by-side

added added

removed removed

bzrlib/statcache.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import stat, os, sha, time

from binascii import b2a_qp, a2b_qp

from trace import mutter

from errors import BzrError, BzrCheckError

The SHA-1 is stored in memory as a hexdigest.

File names are written out as the quoted-printable encoding of their

UTF-8 representation.

File names and file-ids are written out with non-ascii or whitespace

characters given as python-style unicode escapes. (file-ids shouldn't

contain wierd characters, but it might happen.)

"""

# order of fields returned by fingerprint()

SC_DEV = 7

CACHE_HEADER = "### bzr statcache v3"

def fingerprint(abspath):

try:

fs = os.lstat(abspath)

102

106

fs.st_ctime, fs.st_ino, fs.st_dev)

103

107

104

108

105

def _write_cache(basedir, entry_iter, dangerfiles):

109

110

def safe_quote(s):

111

return s.encode('unicode_escape') \

112

.replace('\n', '\\u000a') \

113

.replace(' ', '\\u0020') \

114

.replace('\r', '\\u000d')

115

116

117

def _write_cache(basedir, entries):

106

118

from atomicfile import AtomicFile

107

119

108

120

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

109

121

outf = AtomicFile(cachefn, 'wb')

122

outf.write(CACHE_HEADER + '\n')

110

123

try:

111

for entry in entry_iter:

124

for entry in entries:

112

125

if len(entry) != 8:

113

126

raise ValueError("invalid statcache entry tuple %r" % entry)

114

115

if entry[SC_FILE_ID] in dangerfiles:

116

continue # changed too recently

117

outf.write(entry[0]) # file id

118

outf.write(' ')

119

outf.write(entry[1]) # hex sha1

120

outf.write(' ')

121

outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name

127

outf.write(safe_quote(entry[0])) # file id

128

outf.write(' ')

129

outf.write(entry[1]) # hex sha1

130

outf.write(' ')

131

outf.write(safe_quote(entry[2])) # name

122

132

for nf in entry[3:]:

123

133

outf.write(' %d' % nf)

124

134

outf.write('\n')

127

137

finally:

128

138

if not outf.closed:

129

139

outf.abort()

140

141

142

def _try_write_cache(basedir, entries):

143

try:

144

return _write_cache(basedir, entries)

145

except IOError, e:

146

mutter("cannot update statcache in %s: %s" % (basedir, e))

147

except OSError, e:

148

mutter("cannot update statcache in %s: %s" % (basedir, e))

149

130

150

131

151

132

152

def load_cache(basedir):

133

from sets import Set

153

import re

134

154

cache = {}

135

seen_paths = Set()

155

seen_paths = {}

156

157

sha_re = re.compile(r'[a-f0-9]{40}')

136

158

137

159

try:

138

160

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

139

cachefile = open(cachefn, 'r')

161

cachefile = open(cachefn, 'rb')

140

162

except IOError:

141

163

return cache

142

164

165

line1 = cachefile.readline().rstrip('\r\n')

166

if line1 != CACHE_HEADER:

167

mutter('cache header marker not found at top of %s' % cachefn)

168

return cache

169

143

170

for l in cachefile:

144

171

f = l.split(' ')

145

172

146

file_id = f[0]

173

file_id = f[0].decode('unicode_escape')

147

174

if file_id in cache:

148

raise BzrError("duplicated file_id in cache: {%s}" % file_id)

175

raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)

176

177

text_sha = f[1]

178

if len(text_sha) != 40 or not sha_re.match(text_sha):

179

raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)

149

180

150

path = a2b_qp(f[2]).decode('utf-8')

181

path = f[2].decode('unicode_escape')

151

182

if path in seen_paths:

152

183

raise BzrCheckError("duplicated path in cache: %r" % path)

153

seen_paths.add(path)

184

seen_paths[path] = True

154

185

155

entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])

186

entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])

156

187

if len(entry) != 8:

157

188

raise ValueError("invalid statcache entry tuple %r" % entry)

158

189

179

210

flush -- discard any previous cache and recalculate from scratch.

180

211

"""

181

212

213

# load the existing cache; use information there to find a list of

214

# files ordered by inode, which is alleged to be the fastest order

215

# to stat the files.

182

216

183

# TODO: It's supposed to be faster to stat the files in order by inum.

184

# We don't directly know the inum of the files of course but we do

185

# know where they were last sighted, so we can sort by that.

217

to_update = _files_from_inventory(inv)

186

218

187

219

assert isinstance(flush, bool)

188

220

if flush:

189

221

cache = {}

190

222

else:

191

223

cache = load_cache(basedir)

192

return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))

193

194

195

196

def _update_cache_from_list(basedir, cache, to_update):

197

"""Update and return the cache for given files.

198

199

cache -- Previously cached values to be validated.

200

201

to_update -- Sequence of (file_id, path) pairs to check.

202

"""

203

204

from sets import Set

205

206

stat_cnt = missing_cnt = hardcheck = change_cnt = 0

207

208

# files that have been recently touched and can't be

209

# committed to a persistent cache yet.

224

225

by_inode = []

226

without_inode = []

227

for file_id, path in to_update:

228

if file_id in cache:

229

by_inode.append((cache[file_id][SC_INO], file_id, path))

230

else:

231

without_inode.append((file_id, path))

232

by_inode.sort()

233

234

to_update = [a[1:] for a in by_inode] + without_inode

235

236

stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0

237

238

# dangerfiles have been recently touched and can't be committed to

239

# a persistent cache yet, but they are returned to the caller.

240

dangerfiles = []

210

241

211

dangerfiles = Set()

212

242

now = int(time.time())

213

243

214

244

## mutter('update statcache under %r' % basedir)

225

255

change_cnt += 1

226

256

missing_cnt += 1

227

257

continue

258

elif not cacheentry:

259

new_cnt += 1

228

260

229

261

if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):

230

dangerfiles.add(file_id)

262

dangerfiles.append(file_id)

231

263

232

264

if cacheentry and (cacheentry[3:] == fp):

233

265

continue # all stat fields unchanged

244

276

change_cnt += 1

245

277

246

278

mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '

279

'%d deleted, %d new, '

247

280

'%d in cache'

248

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))

281

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),

282

missing_cnt, new_cnt, len(cache)))

249

283

250

284

if change_cnt:

251

285

mutter('updating on-disk statcache')

252

_write_cache(basedir, cache.itervalues(), dangerfiles)

286

287

if dangerfiles:

288

safe_cache = cache.copy()

289

for file_id in dangerfiles:

290

del safe_cache[file_id]

291

else:

292

safe_cache = cache

293

294

_try_write_cache(basedir, safe_cache.itervalues())

253

295

254

296

return cache

Older »