~bzr-pqm/bzr/bzr.dev

Committer: Martin Pool
Date: 2005-07-11 07:25:42 UTC
Revision ID: mbp@sourcefrog.net-20050711072542-6b1917e90ffc20cf

- merge john's plugins-have-test_suite.patch:
- plugins can provide tests
- plugins command shows more useful information

files removed:
bzrlib/intset.py

bzrlib/selftest/testinv.py

bzrlib/selftest/teststatus.py

tools/weavemerge.sh

files modified:
README

TODO

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/hashcache.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

testsweet.py

tools/convertfile.py

tools/testweave.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/weave.py

"""Weave - storage of related text file versions"""

# before intset (r923) 2000 versions in 41.5s

# with intset (r926) 2000 versions in 93s !!!

# better to just use plain sets.

# making _extract build and return a list, rather than being a generator

# takes 37.94s

# with python -O, r923 does 2000 versions in 36.87s

# with optimizations to avoid mutating lists - 35.75! I guess copying

# all the elements every time costs more than the small manipulations.

# a surprisingly small change.

# r931, which avoids using a generator for extract, does 36.98s

# with memoized inclusions, takes 41.49s; not very good

# with slots, takes 37.35s; without takes 39.16, a bit surprising

# with the delta calculation mixed in with the add method, rather than

# separated, takes 36.78s

# with delta folded in and mutation of the list, 36.13s

# with all this and simplification of add code, 33s

# TODO: Perhaps have copy method for Weave instances?

# XXX: If we do weaves this way, will a merge still behave the same

# way if it's done in a different order? That's a pretty desirable

# property.

# TODO: How to write these to disk? One option is cPickle, which

# would be fast but less friendly to C, and perhaps not portable. Another is

# TODO: Nothing here so far assumes the lines are really \n newlines,

# rather than being split up in some other way. We could accomodate

# binaries, perhaps by naively splitting on \n or perhaps using

# something like a rolling checksum.

# TODO: Perhaps track SHA-1 in the header for protection? This would

# be redundant with it being stored in the inventory, but perhaps

# usefully so?

# TODO: Track version names as well as indexes.

# TODO: Probably do transitive expansion when specifying parents?

# TODO: Separate out some code to read and write weaves.

# TODO: End marker for each version so we can stop reading?

# TODO: Check that no insertion occurs inside a deletion that was

# active in the version of the insertion.

# TODO: In addition to the SHA-1 check, perhaps have some code that

# checks structural constraints of the weave: ie that insertions are

# properly nested, that there is no text outside of an insertion, that

# insertions or deletions are not repeated, etc.

# TODO: Parallel-extract that passes back each line along with a

# description of which revisions include it. Nice for checking all

# shas in parallel.

# TODO: Perhaps a special slower check() method that verifies more

# nesting constraints and the MD5 of each version?

try:

set

frozenset

except NameError:

from sets import Set, ImmutableSet

set = Set

frozenset = ImmutableSet

del Set, ImmutableSet

class WeaveError(Exception):

107

the version-id is used to reference it in the larger world.

108

109

The weave is represented as a list mixing edit instructions and

110

literal text. Each entry in _weave can be either a string (or

literal text. Each entry in _l can be either a string (or

111

unicode), or a tuple. If a string, it means that the given line

112

should be output in the currently active revisions.

113

100

151

138

should be no way to get an earlier version deleting a later

152

139

version.

153

140

154

_weave

155

Text of the weave; list of control instruction tuples and strings.

141

142

Text of the weave.

156

143

157

_parents

144

158

145

List of parents, indexed by version number.

159

146

It is only necessary to store the minimal set of parents for

160

147

each version; the parent's parents are implied.

162

149

_sha1s

163

150

List of hex SHA-1 of each version, or None if not recorded.

164

151

"""

165

166

__slots__ = ['_weave', '_parents', '_sha1s']

167

168

152

def __init__(self):

169

self._weave = []

170

self._parents = []

153

self._l = []

154

self._v = []

171

155

self._sha1s = []

172

156

173

157

174

158

def __eq__(self, other):

175

159

if not isinstance(other, Weave):

176

160

return False

177

return self._parents == other._parents \

178

and self._weave == other._weave

161

return self._v == other._v \

162

and self._l == other._l

179

163

180

164

181

165

def __ne__(self, other):

192

176

193

177

text

194

178

Sequence of lines to be added in the new version."""

195

196

self._check_versions(parents)

179

## self._check_versions(parents)

197

180

## self._check_lines(text)

198

new_version = len(self._parents)

181

idx = len(self._v)

199

182

200

183

import sha

201

184

s = sha.new()

202

map(s.update, text)

185

for l in text:

186

s.update(l)

203

187

sha1 = s.hexdigest()

204

188

del s

205

189

206

# if we abort after here the weave will be corrupt

207

self._parents.append(frozenset(parents))

190

if parents:

191

ancestors = self.inclusions(parents)

192

delta = self._delta(ancestors, text)

193

194

# offset gives the number of lines that have been inserted

195

# into the weave up to the current point; if the original edit instruction

196

# says to change line A then we actually change (A+offset)

197

offset = 0

198

199

for i1, i2, newlines in delta:

200

assert 0 <= i1

201

assert i1 <= i2

202

assert i2 <= len(self._l)

203

204

# the deletion and insertion are handled separately.

205

# first delete the region.

206

if i1 != i2:

207

self._l.insert(i1+offset, ('[', idx))

208

self._l.insert(i2+offset+1, (']', idx))

209

offset += 2

210

# is this OK???

211

212

if newlines:

213

# there may have been a deletion spanning up to

214

# i2; we want to insert after this region to make sure

215

# we don't destroy ourselves

216

i = i2 + offset

217

self._l[i:i] = [('{', idx)] \

218

+ newlines \

219

+ [('}', idx)]

220

offset += 2 + len(newlines)

221

222

self._addversion(parents)

223

else:

224

# special case; adding with no parents revision; can do this

225

# more quickly by just appending unconditionally

226

self._l.append(('{', idx))

227

self._l += text

228

self._l.append(('}', idx))

229

230

self._addversion(None)

231

208

232

self._sha1s.append(sha1)

209

210

211

if not parents:

212

# special case; adding with no parents revision; can do

213

# this more quickly by just appending unconditionally.

214

# even more specially, if we're adding an empty text we

215

# need do nothing at all.

216

if text:

217

self._weave.append(('{', new_version))

218

self._weave.extend(text)

219

self._weave.append(('}', new_version))

220

221

return new_version

222

223

if len(parents) == 1:

224

pv = list(parents)[0]

225

if sha1 == self._sha1s[pv]:

226

# special case: same as the single parent

227

return new_version

228

229

230

ancestors = self.inclusions(parents)

231

232

l = self._weave

233

234

# basis a list of (origin, lineno, line)

235

basis_lineno = []

236

basis_lines = []

237

for origin, lineno, line in self._extract(ancestors):

238

basis_lineno.append(lineno)

239

basis_lines.append(line)

240

241

# another small special case: a merge, producing the same text as auto-merge

242

if text == basis_lines:

243

return new_version

244

245

# add a sentinal, because we can also match against the final line

246

basis_lineno.append(len(self._weave))

247

248

# XXX: which line of the weave should we really consider

249

# matches the end of the file? the current code says it's the

250

# last line of the weave?

251

252

#print 'basis_lines:', basis_lines

253

#print 'new_lines: ', lines

254

255

from difflib import SequenceMatcher

256

s = SequenceMatcher(None, basis_lines, text)

257

258

# offset gives the number of lines that have been inserted

259

# into the weave up to the current point; if the original edit instruction

260

# says to change line A then we actually change (A+offset)

261

offset = 0

262

263

for tag, i1, i2, j1, j2 in s.get_opcodes():

264

# i1,i2 are given in offsets within basis_lines; we need to map them

265

# back to offsets within the entire weave

266

#print 'raw match', tag, i1, i2, j1, j2

267

if tag == 'equal':

268

continue

269

270

i1 = basis_lineno[i1]

271

i2 = basis_lineno[i2]

272

273

assert 0 <= j1 <= j2 <= len(text)

274

275

#print tag, i1, i2, j1, j2

276

277

# the deletion and insertion are handled separately.

278

# first delete the region.

279

if i1 != i2:

280

self._weave.insert(i1+offset, ('[', new_version))

281

self._weave.insert(i2+offset+1, (']', new_version))

282

offset += 2

283

284

if j1 != j2:

285

# there may have been a deletion spanning up to

286

# i2; we want to insert after this region to make sure

287

# we don't destroy ourselves

288

i = i2 + offset

289

self._weave[i:i] = ([('{', new_version)]

290

+ text[j1:j2]

291

+ [('}', new_version)])

292

offset += 2 + (j2 - j1)

293

294

return new_version

233

234

return idx

295

235

296

236

297

237

def inclusions(self, versions):

302

242

while v >= 0:

303

243

if v in i:

304

244

# include all its parents

305

i.update(self._parents[v])

245

i.update(self._v[v])

306

246

v -= 1

307

247

return i

308

248

except IndexError:

311

251

312

252

def minimal_parents(self, version):

313

253

"""Find the minimal set of parents for the version."""

314

included = self._parents[version]

254

included = self._v[version]

315

255

if not included:

316

256

return []

317

257

331

271

return mininc

332

272

333

273

274

def _addversion(self, parents):

275

if parents:

276

self._v.append(parents)

277

else:

278

self._v.append(frozenset())

279

334

280

335

281

def _check_lines(self, text):

336

282

if not isinstance(text, list):

347

293

"""Check everything in the sequence of indexes is valid"""

348

294

for i in indexes:

349

295

try:

350

self._parents[i]

296

self._v[i]

351

297

except IndexError:

352

298

raise IndexError("invalid version number %r" % i)

353

299

364

310

yield origin, text

365

311

366

312

367

def _walk(self):

368

"""Walk the weave.

369

370

Yields sequence of

371

(lineno, insert, deletes, text)

372

for each literal line.

373

"""

374

375

istack = []

376

dset = set()

377

378

lineno = 0 # line of weave, 0-based

379

380

for l in self._weave:

381

if isinstance(l, tuple):

382

c, v = l

383

isactive = None

384

if c == '{':

385

istack.append(v)

386

elif c == '}':

387

oldv = istack.pop()

388

elif c == '[':

389

assert v not in dset

390

dset.add(v)

391

elif c == ']':

392

dset.remove(v)

393

else:

394

raise WeaveFormatError('unexpected instruction %r'

395

% v)

396

else:

397

assert isinstance(l, basestring)

398

assert istack

399

yield lineno, istack[-1], dset, l

400

lineno += 1

401

402

403

404

313

def _extract(self, versions):

405

314

"""Yield annotation of lines in included set.

406

315

419

328

420

329

isactive = None

421

330

422

result = []

423

424

331

WFE = WeaveFormatError

425

332

426

for l in self._weave:

333

for l in self._l:

427

334

if isinstance(l, tuple):

428

335

c, v = l

429

336

isactive = None

447

354

if isactive is None:

448

355

isactive = (not dset) and istack and (istack[-1] in included)

449

356

if isactive:

450

result.append((istack[-1], lineno, l))

357

yield istack[-1], lineno, l

451

358

lineno += 1

452

359

453

360

if istack:

457

364

raise WFE("unclosed deletion blocks at end of weave",

458

365

dset)

459

366

460

return result

461

462

463

367

464

368

def get_iter(self, version):

465

369

"""Yield lines for the specified version."""

473

377

474

378

def mash_iter(self, included):

475

379

"""Return composed version of multiple included versions."""

380

included = frozenset(included)

476

381

for origin, lineno, text in self._extract(included):

477

382

yield text

478

383

479

384

480

385

def dump(self, to_file):

481

386

from pprint import pprint

482

print >>to_file, "Weave._weave = ",

483

pprint(self._weave, to_file)

484

print >>to_file, "Weave._parents = ",

485

pprint(self._parents, to_file)

387

print >>to_file, "Weave._l = ",

388

pprint(self._l, to_file)

389

print >>to_file, "Weave._v = ",

390

pprint(self._v, to_file)

486

391

487

392

488

393

489

394

def numversions(self):

490

l = len(self._parents)

395

l = len(self._v)

491

396

assert l == len(self._sha1s)

492

397

return l

493

398

494

399

495

def __len__(self):

496

return self.numversions()

497

498

499

400

def check(self, progress_bar=None):

500

401

# check no circular inclusions

501

402

for version in range(self.numversions()):

502

inclusions = list(self._parents[version])

403

inclusions = list(self._v[version])

503

404

if inclusions:

504

405

inclusions.sort()

505

406

if inclusions[-1] >= version:

565

466

If line1=line2, this is a pure insert; if newlines=[] this is a

566

467

pure delete. (Similar to difflib.)

567

468

"""

568

569

570

571

def plan_merge(self, ver_a, ver_b):

572

"""Return pseudo-annotation indicating how the two versions merge.

573

574

This is computed between versions a and b and their common

575

base.

576

577

Weave lines present in none of them are skipped entirely.

578

"""

579

inc_a = self.inclusions([ver_a])

580

inc_b = self.inclusions([ver_b])

581

inc_c = inc_a & inc_b

582

583

for lineno, insert, deleteset, line in self._walk():

584

if deleteset & inc_c:

585

# killed in parent; can't be in either a or b

586

# not relevant to our work

587

yield 'killed-base', line

588

elif insert in inc_c:

589

# was inserted in base

590

killed_a = bool(deleteset & inc_a)

591

killed_b = bool(deleteset & inc_b)

592

if killed_a and killed_b:

593

yield 'killed-both', line

594

elif killed_a:

595

yield 'killed-a', line

596

elif killed_b:

597

yield 'killed-b', line

598

else:

599

yield 'unchanged', line

600

elif insert in inc_a:

601

if deleteset & inc_a:

602

yield 'ghost-a', line

603

else:

604

# new in A; not in B

605

yield 'new-a', line

606

elif insert in inc_b:

607

if deleteset & inc_b:

608

yield 'ghost-b', line

609

else:

610

yield 'new-b', line

611

else:

612

# not in either revision

613

yield 'irrelevant', line

614

615

yield 'unchanged', '' # terminator

616

617

618

619

def weave_merge(self, plan):

620

lines_a = []

621

lines_b = []

622

ch_a = ch_b = False

623

624

for state, line in plan:

625

if state == 'unchanged' or state == 'killed-both':

626

# resync and flush queued conflicts changes if any

627

if not lines_a and not lines_b:

628

pass

629

elif ch_a and not ch_b:

630

# one-sided change:

631

for l in lines_a: yield l

632

elif ch_b and not ch_a:

633

for l in lines_b: yield l

634

elif lines_a == lines_b:

635

for l in lines_a: yield l

636

else:

637

yield '<<<<\n'

638

for l in lines_a: yield l

639

yield '====\n'

640

for l in lines_b: yield l

641

yield '>>>>\n'

642

643

del lines_a[:]

644

del lines_b[:]

645

ch_a = ch_b = False

646

647

if state == 'unchanged':

648

if line:

649

yield line

650

elif state == 'killed-a':

651

ch_a = True

652

lines_b.append(line)

653

elif state == 'killed-b':

654

ch_b = True

655

lines_a.append(line)

656

elif state == 'new-a':

657

ch_a = True

658

lines_a.append(line)

659

elif state == 'new-b':

660

ch_b = True

661

lines_b.append(line)

662

else:

663

assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',

664

'killed-both'), \

665

state

666

667

668

669

670

671

672

673

def weave_info(w):

469

# basis a list of (origin, lineno, line)

470

basis_lineno = []

471

basis_lines = []

472

for origin, lineno, line in self._extract(included):

473

basis_lineno.append(lineno)

474

basis_lines.append(line)

475

476

# add a sentinal, because we can also match against the final line

477

basis_lineno.append(len(self._l))

478

479

# XXX: which line of the weave should we really consider

480

# matches the end of the file? the current code says it's the

481

# last line of the weave?

482

483

from difflib import SequenceMatcher

484

s = SequenceMatcher(None, basis_lines, lines)

485

486

# TODO: Perhaps return line numbers from composed weave as well?

487

488

for tag, i1, i2, j1, j2 in s.get_opcodes():

489

##print tag, i1, i2, j1, j2

490

491

if tag == 'equal':

492

continue

493

494

# i1,i2 are given in offsets within basis_lines; we need to map them

495

# back to offsets within the entire weave

496

real_i1 = basis_lineno[i1]

497

real_i2 = basis_lineno[i2]

498

499

assert 0 <= j1

500

assert j1 <= j2

501

assert j2 <= len(lines)

502

503

yield real_i1, real_i2, lines[j1:j2]

504

505

506

507

def weave_info(filename, out):

674

508

"""Show some text information about the weave."""

675

print '%6s %40s %20s' % ('ver', 'sha1', 'parents')

676

for i in (6, 40, 20):

677

print '-' * i,

678

679

for i in range(w.numversions()):

680

sha1 = w._sha1s[i]

681

print '%6d %40s %s' % (i, sha1, ' '.join(map(str, w._parents[i])))

682

683

684

685

def weave_stats(weave_file):

686

from bzrlib.progress import ProgressBar

687

from bzrlib.weavefile import read_weave

688

689

pb = ProgressBar()

690

691

wf = file(weave_file, 'rb')

509

from weavefile import read_weave

510

wf = file(filename, 'rb')

692

511

w = read_weave(wf)

693

512

# FIXME: doesn't work on pipes

694

513

weave_size = wf.tell()

514

print >>out, "weave file size %d bytes" % weave_size

515

print >>out, "weave contains %d versions" % len(w._v)

695

516

696

517

total = 0

697

vers = len(w)

698

for i in range(vers):

699

pb.update('checking sizes', i, vers)

700

for line in w.get_iter(i):

701

total += len(line)

702

703

pb.clear()

704

705

print 'versions %9d' % vers

706

print 'weave file %9d bytes' % weave_size

707

print 'total contents %9d bytes' % total

708

print 'compression ratio %9.2fx' % (float(total) / float(weave_size))

709

518

print '%6s %6s %8s %40s %20s' % ('ver', 'lines', 'bytes', 'sha1', 'parents')

519

for i in (6, 6, 8, 40, 20):

520

print '-' * i,

521

522

for i in range(len(w._v)):

523

text = w.get(i)

524

lines = len(text)

525

bytes = sum((len(a) for a in text))

526

sha1 = w._sha1s[i]

527

print '%6d %6d %8d %40s' % (i, lines, bytes, sha1),

528

for pv in w._v[i]:

529

print pv,

530

531

total += bytes

532

533

print >>out, "versions total %d bytes" % total

534

print >>out, "compression ratio %.3f" % (float(total)/float(weave_size))

710

535

711

536

712

537

def usage():

810

635

lasto = origin

811

636

812

637

elif cmd == 'info':

813

weave_info(readit())

814

815

elif cmd == 'stats':

816

weave_stats(argv[2])

638

weave_info(argv[2], sys.stdout)

817

639

818

640

elif cmd == 'check':

819

641

w = readit()

820

642

pb = ProgressBar()

821

643

w.check(pb)

822

644

pb.clear()

823

print '%d versions ok' % w.numversions()

824

645

825

646

elif cmd == 'inclusions':

826

647

w = readit()

828

649

829

650

elif cmd == 'parents':

830

651

w = readit()

831

print ' '.join(map(str, w._parents[int(argv[3])]))

832

833

elif cmd == 'plan-merge':

834

w = readit()

835

for state, line in w.plan_merge(int(argv[3]), int(argv[4])):

836

if line:

837

print '%14s | %s' % (state, line),

652

print ' '.join(map(str, w._v[int(argv[3])]))

838

653

839

654

elif cmd == 'merge':

840

w = readit()

841

p = w.plan_merge(int(argv[3]), int(argv[4]))

842

sys.stdout.writelines(w.weave_merge(p))

843

844

elif cmd == 'mash-merge':

845

655

if len(argv) != 5:

846

656

usage()

847

657

return 1

Older »