~bzr-pqm/bzr/bzr.dev : revision 2520.4.1

1

from bzrlib.lazy_import import lazy_import

2

3

lazy_import(globals(), """

4

import errno

5

import itertools

6

import os

7

from StringIO import StringIO

8

9

from bzrlib import (

10

patiencediff,

11

trace,

12

ui,

13

)

14

from bzrlib.util import bencode

15

""")

16

from bzrlib.tuned_gzip import GzipFile

17

18

19

def topo_iter(vf):

20

seen = set()

21

descendants = {}

22

for version_id in vf.versions():

23

for parent_id in vf.get_parents(version_id):

24

descendants.setdefault(parent_id, []).append(version_id)

25

cur = [v for v in vf.versions() if len(vf.get_parents(v)) == 0]

26

while len(cur) > 0:

27

next = []

28

for version_id in cur:

29

if version_id in seen:

30

continue

31

parents = vf.get_parents(version_id)

32

if not seen.issuperset(parents):

33

continue

34

next.extend(descendants.get(version_id, []))

35

yield version_id

36

seen.add(version_id)

37

cur = next

38

39

40

class MultiParent(object):

41

42

def __init__(self, hunks=None):

43

if hunks is not None:

44

self.hunks = hunks

45

else:

46

self.hunks = []

47

48

def __repr__(self):

49

return "MultiParent(%r)" % self.hunks

50

51

def __eq__(self, other):

52

if self.__class__ is not other.__class__:

53

return False

54

return (self.hunks == other.hunks)

55

56

@staticmethod

57

def from_lines(text, parents=()):

58

"""Produce a MultiParent from a list of lines and parents"""

59

def compare(parent):

60

matcher = patiencediff.PatienceSequenceMatcher(None, parent,

61

text)

62

return matcher.get_matching_blocks()

63

parent_comparisons = [compare(p) for p in parents]

64

cur_line = 0

65

new_text = NewText([])

66

parent_text = []

67

block_iter = [iter(i) for i in parent_comparisons]

68

diff = MultiParent([])

69

def next_block(p):

70

try:

71

return block_iter[p].next()

72

except StopIteration:

73

return None

74

cur_block = [next_block(p) for p, i in enumerate(block_iter)]

75

while cur_line < len(text):

76

best_match = None

77

for p, block in enumerate(cur_block):

78

if block is None:

79

continue

80

i, j, n = block

81

while j + n < cur_line:

82

block = cur_block[p] = next_block(p)

83

if block is None:

84

break

85

i, j, n = block

86

if block is None:

87

continue

88

if j > cur_line:

89

continue

90

offset = cur_line - j

91

i += offset

92

j = cur_line

93

n -= offset

94

if n == 0:

95

continue

96

if best_match is None or n > best_match.num_lines:

97

best_match = ParentText(p, i, j, n)

98

if best_match is None:

99

new_text.lines.append(text[cur_line])

100

cur_line += 1

101

else:

102

if len(new_text.lines) > 0:

103

diff.hunks.append(new_text)

104

new_text = NewText([])

105

diff.hunks.append(best_match)

106

cur_line += best_match.num_lines

107

if len(new_text.lines) > 0:

108

diff.hunks.append(new_text)

109

return diff

110

111

@classmethod

112

def from_texts(cls, text, parents=()):

113

"""Produce a MultiParent from a text and list of parent text"""

114

return cls.from_lines(text.splitlines(True),

115

[p.splitlines(True) for p in parents])

116

117

def to_patch(self):

118

"""Yield text lines for a patch"""

119

for hunk in self.hunks:

120

for line in hunk.to_patch():

121

yield line

122

123

def patch_len(self):

124

return len(''.join(self.to_patch()))

125

126

def zipped_patch_len(self):

127

return len(gzip_string(self.to_patch()))

128

129

@staticmethod

130

def from_patch(lines):

131

"""Produce a MultiParent from a sequence of lines"""

132

line_iter = iter(lines)

133

hunks = []

134

cur_line = None

135

while(True):

136

try:

137

cur_line = line_iter.next()

138

except StopIteration:

139

break

140

if cur_line[0] == 'i':

141

num_lines = int(cur_line.split(' ')[1])

142

hunk_lines = [line_iter.next() for x in xrange(num_lines)]

143

hunk_lines[-1] = hunk_lines[-1][:-1]

144

hunks.append(NewText(hunk_lines))

145

elif cur_line[0] == '\n':

146

hunks[-1].lines[-1] += '\n'

147

else:

148

assert cur_line[0] == 'c', cur_line[0]

149

parent, parent_pos, child_pos, num_lines =\

150

[int(v) for v in cur_line.split(' ')[1:]]

151

hunks.append(ParentText(parent, parent_pos, child_pos,

152

num_lines))

153

return MultiParent(hunks)

154

155

def range_iterator(self):

156

"""Iterate through the hunks, with range indicated

157

158

kind is "new" or "parent".

159

for "new", data is a list of lines.

160

for "parent", data is (parent, parent_start, parent_end)

161

:return: a generator of (start, end, kind, data)

162

"""

163

start = 0

164

for hunk in self.hunks:

165

if isinstance(hunk, NewText):

166

kind = 'new'

167

end = start + len(hunk.lines)

168

data = hunk.lines

169

else:

170

kind = 'parent'

171

start = hunk.child_pos

172

end = start + hunk.num_lines

173

data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +

174

hunk.num_lines)

175

yield start, end, kind, data

176

start = end

177

178

def num_lines(self):

179

extra_n = 0

180

for hunk in reversed(self.hunks):

181

if isinstance(hunk, ParentText):

182

return hunk.child_pos + hunk.num_lines + extra_n

183

extra_n += len(hunk.lines)

184

return extra_n

185

186

def is_snapshot(self):

187

if len(self.hunks) != 1:

188

return False

189

return (isinstance(self.hunks[0], NewText))

190

191

192

class NewText(object):

193

"""The contents of text that is introduced by this text"""

194

195

def __init__(self, lines):

196

self.lines = lines

197

198

def __eq__(self, other):

199

if self.__class__ is not other.__class__:

200

return False

201

return (other.lines == self.lines)

202

203

def __repr__(self):

204

return 'NewText(%r)' % self.lines

205

206

def to_patch(self):

207

yield 'i %d\n' % len(self.lines)

208

for line in self.lines:

209

yield line

210

yield '\n'

211

212

213

class ParentText(object):

214

"""A reference to text present in a parent text"""

215

216

def __init__(self, parent, parent_pos, child_pos, num_lines):

217

self.parent = parent

218

self.parent_pos = parent_pos

219

self.child_pos = child_pos

220

self.num_lines = num_lines

221

222

def __repr__(self):

223

return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\

224

' %(num_lines)r)' % self.__dict__

225

226

def __eq__(self, other):

227

if self.__class__ != other.__class__:

228

return False

229

return (self.__dict__ == other.__dict__)

230

231

def to_patch(self):

232

yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\

233

% self.__dict__

234

235

236

class BaseVersionedFile(object):

237

"""VersionedFile skeleton for MultiParent"""

238

239

def __init__(self, snapshot_interval=25, max_snapshots=None):

240

self._lines = {}

241

self._parents = {}

242

self._snapshots = set()

243

self.snapshot_interval = snapshot_interval

244

self.max_snapshots = max_snapshots

245

246

def versions(self):

247

return iter(self._parents)

248

249

def do_snapshot(self, version_id, parent_ids):

250

if self.snapshot_interval is None:

251

return False

252

if self.max_snapshots is not None and\

253

len(self._snapshots) == self.max_snapshots:

254

return False

255

if len(parent_ids) == 0:

256

return True

257

for ignored in xrange(self.snapshot_interval):

258

if len(parent_ids) == 0:

259

return False

260

version_ids = parent_ids

261

parent_ids = []

262

for version_id in version_ids:

263

if version_id not in self._snapshots:

264

parent_ids.extend(self._parents[version_id])

265

else:

266

return True

267

268

def add_version(self, lines, version_id, parent_ids,

269

force_snapshot=None, single_parent=False):

270

if force_snapshot is None:

271

do_snapshot = self.do_snapshot(version_id, parent_ids)

272

else:

273

do_snapshot = force_snapshot

274

if do_snapshot:

275

self._snapshots.add(version_id)

276

diff = MultiParent([NewText(lines)])

277

else:

278

if single_parent:

279

parent_lines = self.get_line_list(parent_ids[:1])

280

else:

281

parent_lines = self.get_line_list(parent_ids)

282

diff = MultiParent.from_lines(lines, parent_lines)

283

if diff.is_snapshot():

284

self._snapshots.add(version_id)

285

self.add_diff(diff, version_id, parent_ids)

286

self._lines[version_id] = lines

287

288

def get_parents(self, version_id):

289

return self._parents[version_id]

290

291

def make_snapshot(self, version_id):

292

snapdiff = MultiParent([NewText(self.cache_version(version_id))])

293

self.add_diff(snapdiff, version_id, self._parents[version_id])

294

self._snapshots.add(version_id)

295

296

def import_versionedfile(self, vf, snapshots, no_cache=True,

297

single_parent=False, verify=False):

298

"""Import all revisions of a versionedfile

299

300

:param vf: The versionedfile to import

301

:param snapshots: If provided, the revisions to make snapshots of.

302

Otherwise, this will be auto-determined

303

:param no_cache: If true, clear the cache after every add.

304

:param single_parent: If true, omit all but one parent text, (but

305

retain parent metadata).

306

"""

307

assert no_cache or not verify

308

revisions = set(vf.versions())

309

total = len(revisions)

310

pb = ui.ui_factory.nested_progress_bar()

311

try:

312

while len(revisions) > 0:

313

added = set()

314

for revision in revisions:

315

parents = vf.get_parents(revision)

316

if [p for p in parents if p not in self._parents] != []:

317

continue

318

lines = [a + ' ' + l for a, l in

319

vf.annotate_iter(revision)]

320

if snapshots is None:

321

force_snapshot = None

322

else:

323

force_snapshot = (revision in snapshots)

324

self.add_version(lines, revision, parents, force_snapshot,

325

single_parent)

326

added.add(revision)

327

if no_cache:

328

self.clear_cache()

329

vf.clear_cache()

330

if verify:

331

assert lines == self.get_line_list([revision])[0]

332

self.clear_cache()

333

pb.update('Importing revisions',

334

(total - len(revisions)) + len(added), total)

335

revisions = [r for r in revisions if r not in added]

336

finally:

337

pb.finished()

338

339

def select_snapshots(self, vf):

340

build_ancestors = {}

341

descendants = {}

342

snapshots = set()

343

for version_id in topo_iter(vf):

344

potential_build_ancestors = set(vf.get_parents(version_id))

345

parents = vf.get_parents(version_id)

346

if len(parents) == 0:

347

snapshots.add(version_id)

348

build_ancestors[version_id] = set()

349

else:

350

for parent in vf.get_parents(version_id):

351

potential_build_ancestors.update(build_ancestors[parent])

352

if len(potential_build_ancestors) > self.snapshot_interval:

353

snapshots.add(version_id)

354

build_ancestors[version_id] = set()

355

else:

356

build_ancestors[version_id] = potential_build_ancestors

357

return snapshots

358

359

def select_by_size(self, num):

360

"""Select snapshots for minimum output size"""

361

num -= len(self._snapshots)

362

new_snapshots = self.get_size_ranking()[-num:]

363

return [v for n, v in new_snapshots]

364

365

def get_size_ranking(self):

366

versions = []

367

new_snapshots = set()

368

for version_id in self.versions():

369

if version_id in self._snapshots:

370

continue

371

diff_len = self.get_diff(version_id).patch_len()

372

snapshot_len = MultiParent([NewText(

373

self.cache_version(version_id))]).patch_len()

374

versions.append((snapshot_len - diff_len, version_id))

375

versions.sort()

376

return versions

377

return [v for n, v in versions]

378

379

def import_diffs(self, vf):

380

for version_id in vf.versions():

381

self.add_diff(vf.get_diff(version_id), version_id,

382

vf._parents[version_id])

383

384

def get_build_ranking(self):

385

could_avoid = {}

386

referenced_by = {}

387

for version_id in topo_iter(self):

388

could_avoid[version_id] = set()

389

if version_id not in self._snapshots:

390

for parent_id in self._parents[version_id]:

391

could_avoid[version_id].update(could_avoid[parent_id])

392

could_avoid[version_id].update(self._parents)

393

could_avoid[version_id].discard(version_id)

394

for avoid_id in could_avoid[version_id]:

395

referenced_by.setdefault(avoid_id, set()).add(version_id)

396

available_versions = list(self.versions())

397

ranking = []

398

while len(available_versions) > 0:

399

available_versions.sort(key=lambda x:

400

len(could_avoid[x]) *

401

len(referenced_by.get(x, [])))

402

selected = available_versions.pop()

403

ranking.append(selected)

404

for version_id in referenced_by[selected]:

405

could_avoid[version_id].difference_update(

406

could_avoid[selected])

407

for version_id in could_avoid[selected]:

408

referenced_by[version_id].difference_update(

409

referenced_by[selected]

410

)

411

return ranking

412

413

def clear_cache(self):

414

self._lines.clear()

415

416

def get_line_list(self, version_ids):

417

return [self.cache_version(v) for v in version_ids]

418

419

def cache_version(self, version_id):

420

try:

421

return self._lines[version_id]

422

except KeyError:

423

pass

424

diff = self.get_diff(version_id)

425

lines = []

426

reconstructor = _Reconstructor(self, self._lines,

427

self._parents)

428

reconstructor.reconstruct_version(lines, version_id)

429

self._lines[version_id] = lines

430

return lines

431

432

433

class MultiMemoryVersionedFile(BaseVersionedFile):

434

435

def __init__(self, snapshot_interval=25, max_snapshots=None):

436

BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)

437

self._diffs = {}

438

439

def add_diff(self, diff, version_id, parent_ids):

440

self._diffs[version_id] = diff

441

self._parents[version_id] = parent_ids

442

443

def get_diff(self, version_id):

444

return self._diffs[version_id]

445

446

def destroy(self):

447

self._diffs = {}

448

449

450

class MultiVersionedFile(BaseVersionedFile):

451

452

def __init__(self, filename, snapshot_interval=25, max_snapshots=None):

453

BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)

454

self._filename = filename

455

self._diff_offset = {}

456

457

def get_diff(self, version_id):

458

start, count = self._diff_offset[version_id]

459

infile = open(self._filename + '.mpknit', 'rb')

460

try:

461

infile.seek(start)

462

sio = StringIO(infile.read(count))

463

finally:

464

infile.close()

465

zip_file = GzipFile(None, mode='rb', fileobj=sio)

466

try:

467

file_version_id = zip_file.readline()

468

return MultiParent.from_patch(zip_file.readlines())

469

finally:

470

zip_file.close()

471

472

def add_diff(self, diff, version_id, parent_ids):

473

outfile = open(self._filename + '.mpknit', 'ab')

474

try:

475

start = outfile.tell()

476

try:

477

zipfile = GzipFile(None, mode='ab', fileobj=outfile)

478

zipfile.writelines(itertools.chain(

479

['version %s\n' % version_id], diff.to_patch()))

480

finally:

481

zipfile.close()

482

end = outfile.tell()

483

finally:

484

outfile.close()

485

self._diff_offset[version_id] = (start, end-start)

486

self._parents[version_id] = parent_ids

487

488

def destroy(self):

489

try:

490

os.unlink(self._filename + '.mpknit')

491

except OSError, e:

492

if e.errno != errno.ENOENT:

493

raise

494

try:

495

os.unlink(self._filename + '.mpidx')

496

except OSError, e:

497

if e.errno != errno.ENOENT:

498

raise

499

500

def save(self):

501

open(self._filename + '.mpidx', 'wb').write(bencode.bencode(

502

(self._parents, list(self._snapshots), self._diff_offset)))

503

504

def load(self):

505

self._parents, snapshots, self._diff_offset = bencode.bdecode(

506

open(self._filename + '.mpidx', 'rb').read())

507

self._snapshots = set(snapshots)

508

509

510

class _Reconstructor(object):

511

"""Build a text from the diffs, ancestry graph and cached lines"""

512

513

def __init__(self, diffs, lines, parents):

514

self.diffs = diffs

515

self.lines = lines

516

self.parents = parents

517

self.cursor = {}

518

519

def reconstruct(self, lines, parent_text, version_id):

520

"""Append the lines referred to by a ParentText to lines"""

521

parent_id = self.parents[version_id][parent_text.parent]

522

end = parent_text.parent_pos + parent_text.num_lines

523

return self._reconstruct(lines, parent_id, parent_text.parent_pos,

524

end)

525

526

def _reconstruct(self, lines, req_version_id, req_start, req_end):

527

"""Append lines for the requested version_id range"""

528

# stack of pending range requests

529

pending_reqs = [(req_version_id, req_start, req_end)]

530

while len(pending_reqs) > 0:

531

req_version_id, req_start, req_end = pending_reqs.pop()

532

# lazily allocate cursors for versions

533

try:

534

start, end, kind, data, iterator = self.cursor[req_version_id]

535

except KeyError:

536

iterator = self.diffs.get_diff(req_version_id).range_iterator()

537

start, end, kind, data = iterator.next()

538

if start > req_start:

539

iterator = self.diffs.get_diff(req_version_id).range_iterator()

540

start, end, kind, data = iterator.next()

541

542

# find the first hunk relevant to the request

543

while end <= req_start:

544

start, end, kind, data = iterator.next()

545

self.cursor[req_version_id] = start, end, kind, data, iterator

546

# if the hunk can't satisfy the whole request, split it in two,

547

# and leave the second half for later.

548

if req_end > end:

549

pending_reqs.append((req_version_id, end, req_end))

550

req_end = end

551

if kind == 'new':

552

lines.extend(data[req_start - start: (req_end - start)])

553

else:

554

# If the hunk is a ParentText, rewrite it as a range request

555

# for the parent, and make it the next pending request.

556

parent, parent_start, parent_end = data

557

new_version_id = self.parents[req_version_id][parent]

558

new_start = parent_start + req_start - start

559

new_end = parent_end + req_end - end

560

pending_reqs.append((new_version_id, new_start, new_end))

561

562

def reconstruct_version(self, lines, version_id):

563

length = self.diffs.get_diff(version_id).num_lines()

564

return self._reconstruct(lines, version_id, 0, length)

565

566

def gzip_string(lines):

567

sio = StringIO()

568

data_file = GzipFile(None, mode='wb', fileobj=sio)

569

data_file.writelines(lines)

570

data_file.close()

571

return sio.getvalue()