~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/weave.py

Committer: Martin Pool
Date: 2005-07-18 11:34:12 UTC
Revision ID: mbp@sourcefrog.net-20050718113412-7cced1933e2c6891

- various optimizations to weave add code

files modified:
bzrlib/weave.py

Show diffs side-by-side

added added

removed removed

bzrlib/weave.py

# making _extract build and return a list, rather than being a generator

# takes 37.94s

# with python -O, r923 does 2000 versions in 36.87s

# with optimizations to avoid mutating lists - 35.75! I guess copying

# all the elements every time costs more than the small manipulations.

# a surprisingly small change.

# r931, which avoids using a generator for extract, does 36.98s

# with memoized inclusions, takes 41.49s; not very good

# with slots, takes 37.35s; without takes 39.16, a bit surprising

# with the delta calculation mixed in with the add method, rather than

# separated, takes 36.78s

# with delta folded in and mutation of the list, 36.13s

# with all this and simplification of add code, 33s

# TODO: Perhaps have copy method for Weave instances?

# XXX: If we do weaves this way, will a merge still behave the same

148

168

_sha1s

149

169

List of hex SHA-1 of each version, or None if not recorded.

150

170

"""

171

172

## __slots__ = ['_l', '_v', '_sha1s']

173

151

174

def __init__(self):

152

175

self._l = []

153

176

self._v = []

175

198

176

199

text

177

200

Sequence of lines to be added in the new version."""

178

## self._check_versions(parents)

201

202

self._check_versions(parents)

179

203

## self._check_lines(text)

180

idx = len(self._v)

204

new_version = len(self._v)

181

205

182

206

import sha

183

207

s = sha.new()

184

for l in text:

185

s.update(l)

208

map(s.update, text)

186

209

sha1 = s.hexdigest()

187

210

del s

188

211

189

# TODO: It'd probably be faster to append things on to a new

190

# list rather than modifying the existing one, which is likely

191

# to cause a lot of copying.

192

193

if parents:

194

ancestors = self.inclusions(parents)

195

delta = self._delta(ancestors, text)

196

197

# offset gives the number of lines that have been inserted

198

# into the weave up to the current point; if the original edit instruction

199

# says to change line A then we actually change (A+offset)

200

offset = 0

201

202

for i1, i2, newlines in delta:

203

assert 0 <= i1

204

assert i1 <= i2

205

assert i2 <= len(self._l)

206

207

# the deletion and insertion are handled separately.

208

# first delete the region.

209

if i1 != i2:

210

self._l.insert(i1+offset, ('[', idx))

211

self._l.insert(i2+offset+1, (']', idx))

212

offset += 2

213

# is this OK???

214

215

if newlines:

216

# there may have been a deletion spanning up to

217

# i2; we want to insert after this region to make sure

218

# we don't destroy ourselves

219

i = i2 + offset

220

self._l[i:i] = [('{', idx)] \

221

+ newlines \

222

+ [('}', idx)]

223

offset += 2 + len(newlines)

224

225

self._addversion(parents)

226

else:

227

# special case; adding with no parents revision; can do this

228

# more quickly by just appending unconditionally

229

self._l.append(('{', idx))

230

self._l += text

231

self._l.append(('}', idx))

232

233

self._addversion(None)

234

212

# if we abort after here the weave will be corrupt

213

self._addversion(parents)

235

214

self._sha1s.append(sha1)

236

237

return idx

215

216

217

if not parents:

218

# special case; adding with no parents revision; can do

219

# this more quickly by just appending unconditionally.

220

# even more specially, if we're adding an empty text we

221

# need do nothing at all.

222

if text:

223

self._l.append(('{', new_version))

224

self._l.extend(text)

225

self._l.append(('}', new_version))

226

227

return new_version

228

229

if len(parents) == 1 and sha1 == self._sha1s[parents[0]]:

230

# special case: same as the single parent

231

return new_version

232

233

234

ancestors = self.inclusions(parents)

235

236

l = self._l

237

238

# basis a list of (origin, lineno, line)

239

basis_lineno = []

240

basis_lines = []

241

for origin, lineno, line in self._extract(ancestors):

242

basis_lineno.append(lineno)

243

basis_lines.append(line)

244

245

# another small special case: a merge, producing the same text as auto-merge

246

if text == basis_lines:

247

return new_version

248

249

# add a sentinal, because we can also match against the final line

250

basis_lineno.append(len(self._l))

251

252

# XXX: which line of the weave should we really consider

253

# matches the end of the file? the current code says it's the

254

# last line of the weave?

255

256

#print 'basis_lines:', basis_lines

257

#print 'new_lines: ', lines

258

259

from difflib import SequenceMatcher

260

s = SequenceMatcher(None, basis_lines, text)

261

262

# offset gives the number of lines that have been inserted

263

# into the weave up to the current point; if the original edit instruction

264

# says to change line A then we actually change (A+offset)

265

offset = 0

266

267

for tag, i1, i2, j1, j2 in s.get_opcodes():

268

# i1,i2 are given in offsets within basis_lines; we need to map them

269

# back to offsets within the entire weave

270

#print 'raw match', tag, i1, i2, j1, j2

271

if tag == 'equal':

272

continue

273

274

i1 = basis_lineno[i1]

275

i2 = basis_lineno[i2]

276

277

assert 0 <= i1 <= i2 <= len(old_l)

278

assert 0 <= j1 <= j2 <= len(text)

279

280

#print tag, i1, i2, j1, j2

281

282

# the deletion and insertion are handled separately.

283

# first delete the region.

284

if i1 != i2:

285

self._l.insert(i1+offset, ('[', new_version))

286

self._l.insert(i2+offset+1, (']', new_version))

287

offset += 2

288

289

if j1 != j2:

290

# there may have been a deletion spanning up to

291

# i2; we want to insert after this region to make sure

292

# we don't destroy ourselves

293

i = i2 + offset

294

self._l[i:i] = ([('{', new_version)]

295

+ text[j1:j2]

296

+ [('}', new_version)])

297

offset += 2 + (j2 - j1)

298

299

return new_version

238

300

239

301

240

302

def inclusions(self, versions):

510

572

If line1=line2, this is a pure insert; if newlines=[] this is a

511

573

pure delete. (Similar to difflib.)

512

574

"""

513

# basis a list of (origin, lineno, line)

514

basis_lineno = []

515

basis_lines = []

516

for origin, lineno, line in self._extract(included):

517

basis_lineno.append(lineno)

518

basis_lines.append(line)

519

520

# add a sentinal, because we can also match against the final line

521

basis_lineno.append(len(self._l))

522

523

# XXX: which line of the weave should we really consider

524

# matches the end of the file? the current code says it's the

525

# last line of the weave?

526

527

from difflib import SequenceMatcher

528

s = SequenceMatcher(None, basis_lines, lines)

529

530

# TODO: Perhaps return line numbers from composed weave as well?

531

532

for tag, i1, i2, j1, j2 in s.get_opcodes():

533

##print tag, i1, i2, j1, j2

534

535

if tag == 'equal':

536

continue

537

538

# i1,i2 are given in offsets within basis_lines; we need to map them

539

# back to offsets within the entire weave

540

real_i1 = basis_lineno[i1]

541

real_i2 = basis_lineno[i2]

542

543

assert 0 <= j1

544

assert j1 <= j2

545

assert j2 <= len(lines)

546

547

yield real_i1, real_i2, lines[j1:j2]

548

575

549

576

550

577

788

815

pb = ProgressBar()

789

816

w.check(pb)

790

817

pb.clear()

818

print '%d versions ok' % w.numversions()

791

819

792

820

elif cmd == 'inclusions':

793

821

w = readit()

Older »