~bzr-pqm/bzr/bzr.dev : revision 542

15

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

16

17

import stat, os, sha, time

18

from binascii import b2a_qp, a2b_qp

18

19

20

from trace import mutter

20

21

from errors import BzrError, BzrCheckError

58

59

to gradually adjust your clock or don't use bzr over the step.

59

60

61

At the moment this is stored in a simple textfile; it might be nice

61

to use a tdb instead to allow faster lookup by file-id.

62

to use a tdb instead.

62

63

64

The cache is represented as a map from file_id to a tuple of (file_id,

64

65

sha1, path, size, mtime, ctime, ino, dev).

65

66

67

The SHA-1 is stored in memory as a hexdigest.

67

68

This version of the file on disk has one line per record, and fields

69

separated by \0 records.

69

File names and file-ids are written out as the quoted-printable

70

encoding of their UTF-8 representation. (file-ids shouldn't contain

71

wierd characters, but it might happen.)

70

72

"""

71

73

72

74

# order of fields returned by fingerprint()

88

90

89

91

90

92

91

CACHE_HEADER = "### bzr statcache v4"

93

CACHE_HEADER = "### bzr statcache v2"

92

94

93

95

94

96

def fingerprint(abspath):

105

107

fs.st_ctime, fs.st_ino, fs.st_dev)

106

108

107

109

108

109

def _write_cache(basedir, entries):

110

def _write_cache(basedir, entry_iter, dangerfiles):

110

111

from atomicfile import AtomicFile

111

112

113

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

113

114

outf = AtomicFile(cachefn, 'wb')

115

outf.write(CACHE_HEADER + '\n')

114

116

try:

115

outf.write(CACHE_HEADER + '\n')

116

117

for entry in entries:

117

for entry in entry_iter:

118

if len(entry) != 8:

119

raise ValueError("invalid statcache entry tuple %r" % entry)

120

outf.write(entry[0].encode('utf-8')) # file id

121

outf.write('\0')

122

outf.write(entry[1]) # hex sha1

123

outf.write('\0')

124

outf.write(entry[2].encode('utf-8')) # name

120

121

if entry[SC_FILE_ID] in dangerfiles:

122

continue # changed too recently

123

outf.write(b2a_qp(entry[0].encode('utf-8'))) # file id

124

outf.write(' ')

125

outf.write(entry[1]) # hex sha1

126

outf.write(' ')

127

outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name

125

128

for nf in entry[3:]:

126

outf.write('\0%d' % nf)

129

outf.write(' %d' % nf)

127

130

outf.write('\n')

128

131

129

132

outf.commit()

130

133

finally:

131

134

if not outf.closed:

132

135

outf.abort()

133

134

135

def _try_write_cache(basedir, entries):

136

try:

137

return _write_cache(basedir, entries)

138

except IOError, e:

139

mutter("cannot update statcache in %s: %s" % (basedir, e))

140

except OSError, e:

141

mutter("cannot update statcache in %s: %s" % (basedir, e))

142

143

136

144

137

145

138

def load_cache(basedir):

146

139

import re

147

140

cache = {}

148

141

seen_paths = {}

149

from bzrlib.trace import warning

150

142

151

143

sha_re = re.compile(r'[a-f0-9]{40}')

152

144

158

150

159

151

line1 = cachefile.readline().rstrip('\r\n')

160

152

if line1 != CACHE_HEADER:

161

mutter('cache header marker not found at top of %s; discarding cache'

162

% cachefn)

153

mutter('cache header marker not found at top of %s' % cachefn)

163

154

return cache

164

155

165

156

for l in cachefile:

166

f = l.split('\0')

157

f = l.split(' ')

167

158

168

file_id = f[0].decode('utf-8')

159

file_id = a2b_qp(f[0]).decode('utf-8')

169

160

if file_id in cache:

170

warning("duplicated file_id in cache: {%s}" % file_id)

161

raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)

171

162

172

163

text_sha = f[1]

173

164

if len(text_sha) != 40 or not sha_re.match(text_sha):

174

165

raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)

175

166

176

path = f[2].decode('utf-8')

167

path = a2b_qp(f[2]).decode('utf-8')

177

168

if path in seen_paths:

178

warning("duplicated path in cache: %r" % path)

169

raise BzrCheckError("duplicated path in cache: %r" % path)

179

170

seen_paths[path] = True

180

171

181

172

entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])

187

178

188

179

189

180

181

190

182

def _files_from_inventory(inv):

191

183

for path, ie in inv.iter_entries():

192

184

if ie.kind != 'file':

204

196

flush -- discard any previous cache and recalculate from scratch.

205

197

"""

206

198

207

# load the existing cache; use information there to find a list of

208

# files ordered by inode, which is alleged to be the fastest order

209

# to stat the files.

210

199

211

to_update = _files_from_inventory(inv)

200

# TODO: It's supposed to be faster to stat the files in order by inum.

201

# We don't directly know the inum of the files of course but we do

202

# know where they were last sighted, so we can sort by that.

212

203

213

204

assert isinstance(flush, bool)

214

205

if flush:

215

206

cache = {}

216

207

else:

217

208

cache = load_cache(basedir)

218

219

by_inode = []

220

without_inode = []

221

for file_id, path in to_update:

222

if file_id in cache:

223

by_inode.append((cache[file_id][SC_INO], file_id, path))

224

else:

225

without_inode.append((file_id, path))

226

by_inode.sort()

227

228

to_update = [a[1:] for a in by_inode] + without_inode

229

230

stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0

231

232

# dangerfiles have been recently touched and can't be committed to

233

# a persistent cache yet, but they are returned to the caller.

234

dangerfiles = []

209

return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))

210

211

212

213

def _update_cache_from_list(basedir, cache, to_update):

214

"""Update and return the cache for given files.

215

216

cache -- Previously cached values to be validated.

217

218

to_update -- Sequence of (file_id, path) pairs to check.

219

"""

220

stat_cnt = missing_cnt = hardcheck = change_cnt = 0

221

222

# dangerfiles have been recently touched and can't be

223

# committed to a persistent cache yet.

224

dangerfiles = {}

235

225

236

226

now = int(time.time())

237

227

249

239

change_cnt += 1

250

240

missing_cnt += 1

251

241

continue

252

elif not cacheentry:

253

new_cnt += 1

254

242

255

243

if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):

256

dangerfiles.append(file_id)

244

dangerfiles[file_id] = True

257

245

258

246

if cacheentry and (cacheentry[3:] == fp):

259

247

continue # all stat fields unchanged

270

258

change_cnt += 1

271

259

272

260

mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '

273

'%d deleted, %d new, '

274

261

'%d in cache'

275

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),

276

missing_cnt, new_cnt, len(cache)))

262

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))

277

263

278

264

if change_cnt:

279

265

mutter('updating on-disk statcache')

280

281

if dangerfiles:

282

safe_cache = cache.copy()

283

for file_id in dangerfiles:

284

del safe_cache[file_id]

285

else:

286

safe_cache = cache

287

288

_try_write_cache(basedir, safe_cache.itervalues())

266

_write_cache(basedir, cache.itervalues(), dangerfiles)

289

267

290

268

return cache