~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_py.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-07-20 19:48:22 UTC
mfrom: (2474.1.74 dirstate_pyrex)
Revision ID: pqm@pqm.ubuntu.com-20070720194822-smqttk05w6efypf0

(John Arbash Meinel) Implement DirState._read_dirblocks() in pyrex

files added:
bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/tests/test__dirstate_helpers.py

files modified:
.bzrignore

NEWS

bzrlib/benchmarks/__init__.py

bzrlib/dirstate.py

bzrlib/tests/__init__.py

bzrlib/tests/test_dirstate.py

bzrlib/workingtree_4.py

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/_dirstate_helpers_py.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Python implementations of Dirstate Helper functions."""

import os

# We cannot import the dirstate module, because it loads this module

# All we really need is the IN_MEMORY_MODIFIED constant

from bzrlib.dirstate import DirState

def _bisect_path_left_py(paths, path):

"""Return the index where to insert path into paths.

This uses the dirblock sorting. So all children in a directory come before

the children of children. For example::

b-c

d-e

a-a

a=c

Will be sorted as::

a-a

a=c

a/b

a/b-c

a/d

a/d-e

a/b/c

a/d/e

:param paths: A list of paths to search through

:param path: A single path to insert

:return: An offset where 'path' can be inserted.

:seealso: bisect.bisect_left

"""

hi = len(paths)

lo = 0

while lo < hi:

mid = (lo + hi) // 2

# Grab the dirname for the current dirblock

cur = paths[mid]

if _cmp_path_by_dirblock_py(cur, path) < 0:

lo = mid + 1

else:

hi = mid

return lo

def _bisect_path_right_py(paths, path):

"""Return the index where to insert path into paths.

This uses a path-wise comparison so we get::

a-b

a=b

a/b

Rather than::

a-b

a/b

a=b

:param paths: A list of paths to search through

:param path: A single path to insert

:return: An offset where 'path' can be inserted.

:seealso: bisect.bisect_right

"""

hi = len(paths)

lo = 0

while lo < hi:

mid = (lo+hi)//2

# Grab the dirname for the current dirblock

cur = paths[mid]

if _cmp_path_by_dirblock_py(path, cur) < 0:

hi = mid

else:

lo = mid + 1

100

return lo

101

102

103

def bisect_dirblock_py(dirblocks, dirname, lo=0, hi=None, cache={}):

104

"""Return the index where to insert dirname into the dirblocks.

105

106

The return value idx is such that all directories blocks in dirblock[:idx]

107

have names < dirname, and all blocks in dirblock[idx:] have names >=

108

dirname.

109

110

Optional args lo (default 0) and hi (default len(dirblocks)) bound the

111

slice of a to be searched.

112

"""

113

if hi is None:

114

hi = len(dirblocks)

115

try:

116

dirname_split = cache[dirname]

117

except KeyError:

118

dirname_split = dirname.split('/')

119

cache[dirname] = dirname_split

120

while lo < hi:

121

mid = (lo + hi) // 2

122

# Grab the dirname for the current dirblock

123

cur = dirblocks[mid][0]

124

try:

125

cur_split = cache[cur]

126

except KeyError:

127

cur_split = cur.split('/')

128

cache[cur] = cur_split

129

if cur_split < dirname_split: lo = mid + 1

130

else: hi = mid

131

return lo

132

133

134

def cmp_by_dirs_py(path1, path2):

135

"""Compare two paths directory by directory.

136

137

This is equivalent to doing::

138

139

cmp(path1.split('/'), path2.split('/'))

140

141

The idea is that you should compare path components separately. This

142

differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and

143

``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.

144

145

:param path1: first path

146

:param path2: second path

147

:return: positive number if ``path1`` comes first,

148

0 if paths are equal,

149

and negative number if ``path2`` sorts first

150

"""

151

if not isinstance(path1, str):

152

raise TypeError("'path1' must be a plain string, not %s: %r"

153

% (type(path1), path1))

154

if not isinstance(path2, str):

155

raise TypeError("'path2' must be a plain string, not %s: %r"

156

% (type(path2), path2))

157

return cmp(path1.split('/'), path2.split('/'))

158

159

160

def _cmp_path_by_dirblock_py(path1, path2):

161

"""Compare two paths based on what directory they are in.

162

163

This generates a sort order, such that all children of a directory are

164

sorted together, and grandchildren are in the same order as the

165

children appear. But all grandchildren come after all children.

166

167

:param path1: first path

168

:param path2: the second path

169

:return: positive number if ``path1`` comes first,

170

0 if paths are equal

171

and a negative number if ``path2`` sorts first

172

"""

173

if not isinstance(path1, str):

174

raise TypeError("'path1' must be a plain string, not %s: %r"

175

% (type(path1), path1))

176

if not isinstance(path2, str):

177

raise TypeError("'path2' must be a plain string, not %s: %r"

178

% (type(path2), path2))

179

dirname1, basename1 = os.path.split(path1)

180

key1 = (dirname1.split('/'), basename1)

181

dirname2, basename2 = os.path.split(path2)

182

key2 = (dirname2.split('/'), basename2)

183

return cmp(key1, key2)

184

185

186

def _read_dirblocks_py(state):

187

"""Read in the dirblocks for the given DirState object.

188

189

This is tightly bound to the DirState internal representation. It should be

190

thought of as a member function, which is only separated out so that we can

191

re-write it in pyrex.

192

193

:param state: A DirState object.

194

:return: None

195

"""

196

state._state_file.seek(state._end_of_header)

197

text = state._state_file.read()

198

# TODO: check the crc checksums. crc_measured = zlib.crc32(text)

199

200

fields = text.split('\0')

201

# Remove the last blank entry

202

trailing = fields.pop()

203

assert trailing == ''

204

# consider turning fields into a tuple.

205

206

# skip the first field which is the trailing null from the header.

207

cur = 1

208

# Each line now has an extra '\n' field which is not used

209

# so we just skip over it

210

# entry size:

211

# 3 fields for the key

212

# + number of fields per tree_data (5) * tree count

213

# + newline

214

num_present_parents = state._num_present_parents()

215

tree_count = 1 + num_present_parents

216

entry_size = state._fields_per_entry()

217

expected_field_count = entry_size * state._num_entries

218

field_count = len(fields)

219

# this checks our adjustment, and also catches file too short.

220

assert field_count - cur == expected_field_count, \

221

'field count incorrect %s != %s, entry_size=%s, '\

222

'num_entries=%s fields=%r' % (

223

field_count - cur, expected_field_count, entry_size,

224

state._num_entries, fields)

225

226

if num_present_parents == 1:

227

# Bind external functions to local names

228

_int = int

229

# We access all fields in order, so we can just iterate over

230

# them. Grab an straight iterator over the fields. (We use an

231

# iterator because we don't want to do a lot of additions, nor

232

# do we want to do a lot of slicing)

233

next = iter(fields).next

234

# Move the iterator to the current position

235

for x in xrange(cur):

236

next()

237

# The two blocks here are deliberate: the root block and the

238

# contents-of-root block.

239

state._dirblocks = [('', []), ('', [])]

240

current_block = state._dirblocks[0][1]

241

current_dirname = ''

242

append_entry = current_block.append

243

for count in xrange(state._num_entries):

244

dirname = next()

245

name = next()

246

file_id = next()

247

if dirname != current_dirname:

248

# new block - different dirname

249

current_block = []

250

current_dirname = dirname

251

state._dirblocks.append((current_dirname, current_block))

252

append_entry = current_block.append

253

# we know current_dirname == dirname, so re-use it to avoid

254

# creating new strings

255

entry = ((current_dirname, name, file_id),

256

[(# Current Tree

257

next(), # minikind

258

next(), # fingerprint

259

_int(next()), # size

260

next() == 'y', # executable

261

next(), # packed_stat or revision_id

262

263

( # Parent 1

264

next(), # minikind

265

next(), # fingerprint

266

_int(next()), # size

267

next() == 'y', # executable

268

next(), # packed_stat or revision_id

269

270

])

271

trailing = next()

272

assert trailing == '\n'

273

# append the entry to the current block

274

append_entry(entry)

275

state._split_root_dirblock_into_contents()

276

else:

277

fields_to_entry = state._get_fields_to_entry()

278

entries = [fields_to_entry(fields[pos:pos+entry_size])

279

for pos in xrange(cur, field_count, entry_size)]

280

state._entries_to_current_state(entries)

281

# To convert from format 2 => format 3

282

# state._dirblocks = sorted(state._dirblocks,

283

# key=lambda blk:blk[0].split('/'))

284

# To convert from format 3 => format 2

285

# state._dirblocks = sorted(state._dirblocks)

286

state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

287

Older »