~bzr-pqm/bzr/bzr.dev

Committer: Martin Pool
Date: 2005-05-16 04:32:21 UTC
Revision ID: mbp@sourcefrog.net-20050516043221-12a9da66feb5d67b

clean up stat cache code:
- smarter UTF-8 and quopri encoding of file names
- check paths are not duplicated in cache
- check lines are well-formed
- more docs

files modified:
bzrlib/statcache.py

Show diffs side-by-side

added added

removed removed

bzrlib/statcache.py

from binascii import b2a_qp, a2b_qp

from trace import mutter

from errors import BzrError

from errors import BzrError, BzrCheckError

"""File stat cache to speed up tree comparisons.

information of a file in the working directory, without actually

reading and hashing the whole file.

Implementation

==============

Users of this module should not need to know about how this is

implemented, and in particular should not depend on the particular

data which is stored or its format.

This is done by maintaining a cache indexed by a file fingerprint of

(path, size, mtime, ctime, ino, dev) pointing to the SHA-1. If the

fingerprint has changed, we assume the file content has not changed

The cache is represented as a map from file_id to a tuple of (file_id,

sha1, path, size, mtime, ctime, ino, dev).

The SHA-1 is stored in memory as a hexdigest.

File names are written out as the quoted-printable encoding of their

UTF-8 representation.

"""

# order of fields returned by fingerprint()

FP_SIZE = 0

FP_MTIME = 1

FP_CTIME = 2

FP_INO = 3

FP_DEV = 4

# order of fields in the statcache file and in the in-memory map

SC_FILE_ID = 0

SC_SHA1 = 1

SC_PATH = 2

SC_SIZE = 3

SC_MTIME = 4

SC_CTIME = 5

SC_INO = 6

SC_DEV = 7

def fingerprint(abspath):

106

from atomicfile import AtomicFile

107

108

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

outf = AtomicFile(cachefn, 'wb', 'utf-8')

109

outf = AtomicFile(cachefn, 'wb')

110

try:

111

for entry in entry_iter:

if entry[0] in dangerfiles:

continue

outf.write(entry[0] + ' ' + entry[1] + ' ')

outf.write(b2a_qp(entry[2], True))

outf.write(' %d %d %d %d %d\n' % entry[3:])

112

if len(entry) != 8:

113

raise ValueError("invalid statcache entry tuple %r" % entry)

114

115

if entry[SC_FILE_ID] in dangerfiles:

116

continue # changed too recently

117

outf.write(entry[0]) # file id

118

outf.write(' ')

119

outf.write(entry[1]) # hex sha1

120

outf.write(' ')

121

outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name

122

for nf in entry[3:]:

123

outf.write(' %d' % nf)

124

outf.write('\n')

125

126

outf.commit()

127

finally:

102

130

103

131

104

132

def load_cache(basedir):

105

import codecs

106

133

from sets import Set

107

134

cache = {}

135

seen_paths = Set()

108

136

109

137

try:

110

138

cachefn = os.path.join(basedir, '.bzr', 'stat-cache')

111

cachefile = codecs.open(cachefn, 'r', 'utf-8')

139

cachefile = open(cachefn, 'r')

112

140

except IOError:

113

141

return cache

114

142

115

143

for l in cachefile:

116

144

f = l.split(' ')

145

117

146

file_id = f[0]

118

147

if file_id in cache:

119

148

raise BzrError("duplicated file_id in cache: {%s}" % file_id)

120

cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])

149

150

path = a2b_qp(f[2]).decode('utf-8')

151

if path in seen_paths:

152

raise BzrCheckError("duplicated path in cache: %r" % path)

153

seen_paths.add(path)

154

155

entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])

156

if len(entry) != 8:

157

raise ValueError("invalid statcache entry tuple %r" % entry)

158

159

cache[file_id] = entry

121

160

return cache

122

161

123

162

Older »