~bzr-pqm/bzr/bzr.dev : revision 520

15

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

16

17

import stat, os, sha, time

18

from binascii import b2a_qp, a2b_qp

18

19

20

from trace import mutter

20

21

from errors import BzrError, BzrCheckError

65

66

67

The SHA-1 is stored in memory as a hexdigest.

67

68

File names and file-ids are written out with non-ascii or whitespace

69

characters given as python-style unicode escapes. (file-ids shouldn't

70

contain wierd characters, but it might happen.)

69

File names are written out as the quoted-printable encoding of their

70

UTF-8 representation.

71

"""

72

73

# order of fields returned by fingerprint()

88

SC_DEV = 7

89

90

91

92

CACHE_HEADER = "### bzr statcache v3"

93

94

95

91

def fingerprint(abspath):

96

92

try:

97

93

fs = os.lstat(abspath)

106

102

fs.st_ctime, fs.st_ino, fs.st_dev)

107

103

108

104

109

110

def safe_quote(s):

111

return s.encode('unicode_escape') \

112

.replace('\n', '\\u000a') \

113

.replace(' ', '\\u0020') \

114

.replace('\r', '\\u000d')

115

116

117

def _write_cache(basedir, entries):

105

def _write_cache(basedir, entry_iter, dangerfiles):

118

106

from atomicfile import AtomicFile

119

107

120

108

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

121

109

outf = AtomicFile(cachefn, 'wb')

122

outf.write(CACHE_HEADER + '\n')

123

110

try:

124

for entry in entries:

111

for entry in entry_iter:

125

112

if len(entry) != 8:

126

113

raise ValueError("invalid statcache entry tuple %r" % entry)

127

outf.write(safe_quote(entry[0])) # file id

128

outf.write(' ')

129

outf.write(entry[1]) # hex sha1

130

outf.write(' ')

131

outf.write(safe_quote(entry[2])) # name

114

115

if entry[SC_FILE_ID] in dangerfiles:

116

continue # changed too recently

117

outf.write(entry[0]) # file id

118

outf.write(' ')

119

outf.write(entry[1]) # hex sha1

120

outf.write(' ')

121

outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name

132

122

for nf in entry[3:]:

133

123

outf.write(' %d' % nf)

134

124

outf.write('\n')

137

127

finally:

138

128

if not outf.closed:

139

129

outf.abort()

140

141

142

def _try_write_cache(basedir, entries):

143

try:

144

return _write_cache(basedir, entries)

145

except IOError, e:

146

mutter("cannot update statcache in %s: %s" % (basedir, e))

147

except OSError, e:

148

mutter("cannot update statcache in %s: %s" % (basedir, e))

149

150

130

151

131

152

132

def load_cache(basedir):

153

import re

133

from sets import Set

154

134

cache = {}

155

seen_paths = {}

156

157

sha_re = re.compile(r'[a-f0-9]{40}')

135

seen_paths = Set()

158

136

159

137

try:

160

138

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

161

cachefile = open(cachefn, 'rb')

139

cachefile = open(cachefn, 'r')

162

140

except IOError:

163

141

return cache

164

165

line1 = cachefile.readline().rstrip('\r\n')

166

if line1 != CACHE_HEADER:

167

mutter('cache header marker not found at top of %s' % cachefn)

168

return cache

169

142

170

143

for l in cachefile:

171

144

f = l.split(' ')

172

145

173

file_id = f[0].decode('unicode_escape')

146

file_id = f[0]

174

147

if file_id in cache:

175

raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)

176

177

text_sha = f[1]

178

if len(text_sha) != 40 or not sha_re.match(text_sha):

179

raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)

148

raise BzrError("duplicated file_id in cache: {%s}" % file_id)

180

149

181

path = f[2].decode('unicode_escape')

150

path = a2b_qp(f[2]).decode('utf-8')

182

151

if path in seen_paths:

183

152

raise BzrCheckError("duplicated path in cache: %r" % path)

184

seen_paths[path] = True

153

seen_paths.add(path)

185

154

186

entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])

155

entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])

187

156

if len(entry) != 8:

188

157

raise ValueError("invalid statcache entry tuple %r" % entry)

189

158

210

179

flush -- discard any previous cache and recalculate from scratch.

211

180

"""

212

181

213

# load the existing cache; use information there to find a list of

214

# files ordered by inode, which is alleged to be the fastest order

215

# to stat the files.

216

182

217

to_update = _files_from_inventory(inv)

183

# TODO: It's supposed to be faster to stat the files in order by inum.

184

# We don't directly know the inum of the files of course but we do

185

# know where they were last sighted, so we can sort by that.

218

186

219

187

assert isinstance(flush, bool)

220

188

if flush:

221

189

cache = {}

222

190

else:

223

191

cache = load_cache(basedir)

224

225

by_inode = []

226

without_inode = []

227

for file_id, path in to_update:

228

if file_id in cache:

229

by_inode.append((cache[file_id][SC_INO], file_id, path))

230

else:

231

without_inode.append((file_id, path))

232

by_inode.sort()

233

234

to_update = [a[1:] for a in by_inode] + without_inode

235

236

stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0

237

238

# dangerfiles have been recently touched and can't be committed to

239

# a persistent cache yet, but they are returned to the caller.

240

dangerfiles = []

192

return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))

193

194

195

196

def _update_cache_from_list(basedir, cache, to_update):

197

"""Update and return the cache for given files.

198

199

cache -- Previously cached values to be validated.

200

201

to_update -- Sequence of (file_id, path) pairs to check.

202

"""

203

204

from sets import Set

205

206

stat_cnt = missing_cnt = hardcheck = change_cnt = 0

207

208

# files that have been recently touched and can't be

209

# committed to a persistent cache yet.

241

210

211

dangerfiles = Set()

242

212

now = int(time.time())

243

213

244

214

## mutter('update statcache under %r' % basedir)

255

225

change_cnt += 1

256

226

missing_cnt += 1

257

227

continue

258

elif not cacheentry:

259

new_cnt += 1

260

228

261

229

if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):

262

dangerfiles.append(file_id)

230

dangerfiles.add(file_id)

263

231

264

232

if cacheentry and (cacheentry[3:] == fp):

265

233

continue # all stat fields unchanged

276

244

change_cnt += 1

277

245

278

246

mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '

279

'%d deleted, %d new, '

280

247

'%d in cache'

281

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles),

282

missing_cnt, new_cnt, len(cache)))

248

% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))

283

249

284

250

if change_cnt:

285

251

mutter('updating on-disk statcache')

286

287

if dangerfiles:

288

safe_cache = cache.copy()

289

for file_id in dangerfiles:

290

del safe_cache[file_id]

291

else:

292

safe_cache = cache

293

294

_try_write_cache(basedir, safe_cache.itervalues())

252

_write_cache(basedir, cache.itervalues(), dangerfiles)

295

253

296

254

return cache