~bzr-pqm/bzr/bzr.dev : revision 5830.3.5

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Repository formats built around versioned files."""

18

19

20

from bzrlib.lazy_import import lazy_import

21

lazy_import(globals(), """

22

from bzrlib import (

23

check,

24

debug,

25

fetch as _mod_fetch,

26

fifo_cache,

27

gpg,

28

graph,

29

inventory_delta,

30

lru_cache,

31

osutils,

32

revision as _mod_revision,

33

serializer as _mod_serializer,

34

static_tuple,

35

tsort,

36

ui,

37

versionedfile,

38

)

39

40

from bzrlib.recordcounter import RecordCounter

41

from bzrlib.revisiontree import InventoryRevisionTree

42

from bzrlib.testament import Testament

43

""")

44

45

from bzrlib import (

46

errors,

47

)

48

from bzrlib.decorators import (

49

needs_read_lock,

50

needs_write_lock,

51

only_raises,

52

)

53

from bzrlib.inventory import (

54

Inventory,

55

InventoryDirectory,

56

ROOT_ID,

57

entry_factory,

58

)

59

60

from bzrlib.repository import (

61

CommitBuilder,

62

InterRepository,

63

MetaDirRepository,

64

MetaDirRepositoryFormat,

65

Repository,

66

RepositoryFormat,

67

)

68

69

from bzrlib.trace import (

70

mutter,

71

)

72

73

74

class VersionedFileRepositoryFormat(RepositoryFormat):

75

"""Base class for all repository formats that are VersionedFiles-based."""

76

77

supports_full_versioned_files = True

78

79

# Should commit add an inventory, or an inventory delta to the repository.

80

_commit_inv_deltas = True

81

# What order should fetch operations request streams in?

82

# The default is unordered as that is the cheapest for an origin to

83

# provide.

84

_fetch_order = 'unordered'

85

# Does this repository format use deltas that can be fetched as-deltas ?

86

# (E.g. knits, where the knit deltas can be transplanted intact.

87

# We default to False, which will ensure that enough data to get

88

# a full text out of any fetch stream will be grabbed.

89

_fetch_uses_deltas = False

90

91

92

class VersionedFileCommitBuilder(CommitBuilder):

93

"""Commit builder implementation for versioned files based repositories.

94

"""

95

96

# this commit builder supports the record_entry_contents interface

97

supports_record_entry_contents = True

98

99

# the default CommitBuilder does not manage trees whose root is versioned.

100

_versioned_root = False

101

102

def __init__(self, repository, parents, config, timestamp=None,

103

timezone=None, committer=None, revprops=None,

104

revision_id=None, lossy=False):

105

super(VersionedFileCommitBuilder, self).__init__(repository,

106

parents, config, timestamp, timezone, committer, revprops,

107

revision_id, lossy)

108

try:

109

basis_id = self.parents[0]

110

except IndexError:

111

basis_id = _mod_revision.NULL_REVISION

112

self.basis_delta_revision = basis_id

113

self.new_inventory = Inventory(None)

114

self._basis_delta = []

115

self.__heads = graph.HeadsCache(repository.get_graph()).heads

116

# memo'd check for no-op commits.

117

self._any_changes = False

118

# API compatibility, older code that used CommitBuilder did not call

119

# .record_delete(), which means the delta that is computed would not be

120

# valid. Callers that will call record_delete() should call

121

# .will_record_deletes() to indicate that.

122

self._recording_deletes = False

123

124

def will_record_deletes(self):

125

"""Tell the commit builder that deletes are being notified.

126

127

This enables the accumulation of an inventory delta; for the resulting

128

commit to be valid, deletes against the basis MUST be recorded via

129

builder.record_delete().

130

"""

131

self._recording_deletes = True

132

133

def any_changes(self):

134

"""Return True if any entries were changed.

135

136

This includes merge-only changes. It is the core for the --unchanged

137

detection in commit.

138

139

:return: True if any changes have occured.

140

"""

141

return self._any_changes

142

143

def _ensure_fallback_inventories(self):

144

"""Ensure that appropriate inventories are available.

145

146

This only applies to repositories that are stacked, and is about

147

enusring the stacking invariants. Namely, that for any revision that is

148

present, we either have all of the file content, or we have the parent

149

inventory and the delta file content.

150

"""

151

if not self.repository._fallback_repositories:

152

return

153

if not self.repository._format.supports_chks:

154

raise errors.BzrError("Cannot commit directly to a stacked branch"

155

" in pre-2a formats. See "

156

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

157

# This is a stacked repo, we need to make sure we have the parent

158

# inventories for the parents.

159

parent_keys = [(p,) for p in self.parents]

160

parent_map = self.repository.inventories._index.get_parent_map(parent_keys)

161

missing_parent_keys = set([pk for pk in parent_keys

162

if pk not in parent_map])

163

fallback_repos = list(reversed(self.repository._fallback_repositories))

164

missing_keys = [('inventories', pk[0])

165

for pk in missing_parent_keys]

166

resume_tokens = []

167

while missing_keys and fallback_repos:

168

fallback_repo = fallback_repos.pop()

169

source = fallback_repo._get_source(self.repository._format)

170

sink = self.repository._get_sink()

171

stream = source.get_stream_for_missing_keys(missing_keys)

172

missing_keys = sink.insert_stream_without_locking(stream,

173

self.repository._format)

174

if missing_keys:

175

raise errors.BzrError('Unable to fill in parent inventories for a'

176

' stacked branch')

177

178

def commit(self, message):

179

"""Make the actual commit.

180

181

:return: The revision id of the recorded revision.

182

"""

183

self._validate_unicode_text(message, 'commit message')

184

rev = _mod_revision.Revision(

185

timestamp=self._timestamp,

186

timezone=self._timezone,

187

committer=self._committer,

188

message=message,

189

inventory_sha1=self.inv_sha1,

190

revision_id=self._new_revision_id,

191

properties=self._revprops)

192

rev.parent_ids = self.parents

193

self.repository.add_revision(self._new_revision_id, rev,

194

self.new_inventory, self._config)

195

self._ensure_fallback_inventories()

196

self.repository.commit_write_group()

197

return self._new_revision_id

198

199

def abort(self):

200

"""Abort the commit that is being built.

201

"""

202

self.repository.abort_write_group()

203

204

def revision_tree(self):

205

"""Return the tree that was just committed.

206

207

After calling commit() this can be called to get a

208

RevisionTree representing the newly committed tree. This is

209

preferred to calling Repository.revision_tree() because that may

210

require deserializing the inventory, while we already have a copy in

211

memory.

212

"""

213

if self.new_inventory is None:

214

self.new_inventory = self.repository.get_inventory(

215

self._new_revision_id)

216

return InventoryRevisionTree(self.repository, self.new_inventory,

217

self._new_revision_id)

218

219

def finish_inventory(self):

220

"""Tell the builder that the inventory is finished.

221

222

:return: The inventory id in the repository, which can be used with

223

repository.get_inventory.

224

"""

225

if self.new_inventory is None:

226

# an inventory delta was accumulated without creating a new

227

# inventory.

228

basis_id = self.basis_delta_revision

229

# We ignore the 'inventory' returned by add_inventory_by_delta

230

# because self.new_inventory is used to hint to the rest of the

231

# system what code path was taken

232

self.inv_sha1, _ = self.repository.add_inventory_by_delta(

233

basis_id, self._basis_delta, self._new_revision_id,

234

self.parents)

235

else:

236

if self.new_inventory.root is None:

237

raise AssertionError('Root entry should be supplied to'

238

' record_entry_contents, as of bzr 0.10.')

239

self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))

240

self.new_inventory.revision_id = self._new_revision_id

241

self.inv_sha1 = self.repository.add_inventory(

242

self._new_revision_id,

243

self.new_inventory,

244

self.parents

245

)

246

return self._new_revision_id

247

248

def _check_root(self, ie, parent_invs, tree):

249

"""Helper for record_entry_contents.

250

251

:param ie: An entry being added.

252

:param parent_invs: The inventories of the parent revisions of the

253

commit.

254

:param tree: The tree that is being committed.

255

"""

256

# In this revision format, root entries have no knit or weave When

257

# serializing out to disk and back in root.revision is always

258

# _new_revision_id

259

ie.revision = self._new_revision_id

260

261

def _require_root_change(self, tree):

262

"""Enforce an appropriate root object change.

263

264

This is called once when record_iter_changes is called, if and only if

265

the root was not in the delta calculated by record_iter_changes.

266

267

:param tree: The tree which is being committed.

268

"""

269

if len(self.parents) == 0:

270

raise errors.RootMissing()

271

entry = entry_factory['directory'](tree.path2id(''), '',

272

None)

273

entry.revision = self._new_revision_id

274

self._basis_delta.append(('', '', entry.file_id, entry))

275

276

def _get_delta(self, ie, basis_inv, path):

277

"""Get a delta against the basis inventory for ie."""

278

if ie.file_id not in basis_inv:

279

# add

280

result = (None, path, ie.file_id, ie)

281

self._basis_delta.append(result)

282

return result

283

elif ie != basis_inv[ie.file_id]:

284

# common but altered

285

# TODO: avoid tis id2path call.

286

result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)

287

self._basis_delta.append(result)

288

return result

289

else:

290

# common, unaltered

291

return None

292

293

def _heads(self, file_id, revision_ids):

294

"""Calculate the graph heads for revision_ids in the graph of file_id.

295

296

This can use either a per-file graph or a global revision graph as we

297

have an identity relationship between the two graphs.

298

"""

299

return self.__heads(revision_ids)

300

301

def get_basis_delta(self):

302

"""Return the complete inventory delta versus the basis inventory.

303

304

This has been built up with the calls to record_delete and

305

record_entry_contents. The client must have already called

306

will_record_deletes() to indicate that they will be generating a

307

complete delta.

308

309

:return: An inventory delta, suitable for use with apply_delta, or

310

Repository.add_inventory_by_delta, etc.

311

"""

312

if not self._recording_deletes:

313

raise AssertionError("recording deletes not activated.")

314

return self._basis_delta

315

316

def record_delete(self, path, file_id):

317

"""Record that a delete occured against a basis tree.

318

319

This is an optional API - when used it adds items to the basis_delta

320

being accumulated by the commit builder. It cannot be called unless the

321

method will_record_deletes() has been called to inform the builder that

322

a delta is being supplied.

323

324

:param path: The path of the thing deleted.

325

:param file_id: The file id that was deleted.

326

"""

327

if not self._recording_deletes:

328

raise AssertionError("recording deletes not activated.")

329

delta = (path, None, file_id, None)

330

self._basis_delta.append(delta)

331

self._any_changes = True

332

return delta

333

334

def record_entry_contents(self, ie, parent_invs, path, tree,

335

content_summary):

336

"""Record the content of ie from tree into the commit if needed.

337

338

Side effect: sets ie.revision when unchanged

339

340

:param ie: An inventory entry present in the commit.

341

:param parent_invs: The inventories of the parent revisions of the

342

commit.

343

:param path: The path the entry is at in the tree.

344

:param tree: The tree which contains this entry and should be used to

345

obtain content.

346

:param content_summary: Summary data from the tree about the paths

347

content - stat, length, exec, sha/link target. This is only

348

accessed when the entry has a revision of None - that is when it is

349

a candidate to commit.

350

:return: A tuple (change_delta, version_recorded, fs_hash).

351

change_delta is an inventory_delta change for this entry against

352

the basis tree of the commit, or None if no change occured against

353

the basis tree.

354

version_recorded is True if a new version of the entry has been

355

recorded. For instance, committing a merge where a file was only

356

changed on the other side will return (delta, False).

357

fs_hash is either None, or the hash details for the path (currently

358

a tuple of the contents sha1 and the statvalue returned by

359

tree.get_file_with_stat()).

360

"""

361

if self.new_inventory.root is None:

362

if ie.parent_id is not None:

363

raise errors.RootMissing()

364

self._check_root(ie, parent_invs, tree)

365

if ie.revision is None:

366

kind = content_summary[0]

367

else:

368

# ie is carried over from a prior commit

369

kind = ie.kind

370

# XXX: repository specific check for nested tree support goes here - if

371

# the repo doesn't want nested trees we skip it ?

372

if (kind == 'tree-reference' and

373

not self.repository._format.supports_tree_reference):

374

# mismatch between commit builder logic and repository:

375

# this needs the entry creation pushed down into the builder.

376

raise NotImplementedError('Missing repository subtree support.')

377

self.new_inventory.add(ie)

378

379

# TODO: slow, take it out of the inner loop.

380

try:

381

basis_inv = parent_invs[0]

382

except IndexError:

383

basis_inv = Inventory(root_id=None)

384

385

# ie.revision is always None if the InventoryEntry is considered

386

# for committing. We may record the previous parents revision if the

387

# content is actually unchanged against a sole head.

388

if ie.revision is not None:

389

if not self._versioned_root and path == '':

390

# repositories that do not version the root set the root's

391

# revision to the new commit even when no change occurs (more

392

# specifically, they do not record a revision on the root; and

393

# the rev id is assigned to the root during deserialisation -

394

# this masks when a change may have occurred against the basis.

395

# To match this we always issue a delta, because the revision

396

# of the root will always be changing.

397

if ie.file_id in basis_inv:

398

delta = (basis_inv.id2path(ie.file_id), path,

399

ie.file_id, ie)

400

else:

401

# add

402

delta = (None, path, ie.file_id, ie)

403

self._basis_delta.append(delta)

404

return delta, False, None

405

else:

406

# we don't need to commit this, because the caller already

407

# determined that an existing revision of this file is

408

# appropriate. If it's not being considered for committing then

409

# it and all its parents to the root must be unaltered so

410

# no-change against the basis.

411

if ie.revision == self._new_revision_id:

412

raise AssertionError("Impossible situation, a skipped "

413

"inventory entry (%r) claims to be modified in this "

414

"commit (%r).", (ie, self._new_revision_id))

415

return None, False, None

416

# XXX: Friction: parent_candidates should return a list not a dict

417

# so that we don't have to walk the inventories again.

418

parent_candiate_entries = ie.parent_candidates(parent_invs)

419

head_set = self._heads(ie.file_id, parent_candiate_entries.keys())

420

heads = []

421

for inv in parent_invs:

422

if ie.file_id in inv:

423

old_rev = inv[ie.file_id].revision

424

if old_rev in head_set:

425

heads.append(inv[ie.file_id].revision)

426

head_set.remove(inv[ie.file_id].revision)

427

428

store = False

429

# now we check to see if we need to write a new record to the

430

# file-graph.

431

# We write a new entry unless there is one head to the ancestors, and

432

# the kind-derived content is unchanged.

433

434

# Cheapest check first: no ancestors, or more the one head in the

435

# ancestors, we write a new node.

436

if len(heads) != 1:

437

store = True

438

if not store:

439

# There is a single head, look it up for comparison

440

parent_entry = parent_candiate_entries[heads[0]]

441

# if the non-content specific data has changed, we'll be writing a

442

# node:

443

if (parent_entry.parent_id != ie.parent_id or

444

parent_entry.name != ie.name):

445

store = True

446

# now we need to do content specific checks:

447

if not store:

448

# if the kind changed the content obviously has

449

if kind != parent_entry.kind:

450

store = True

451

# Stat cache fingerprint feedback for the caller - None as we usually

452

# don't generate one.

453

fingerprint = None

454

if kind == 'file':

455

if content_summary[2] is None:

456

raise ValueError("Files must not have executable = None")

457

if not store:

458

# We can't trust a check of the file length because of content

459

# filtering...

460

if (# if the exec bit has changed we have to store:

461

parent_entry.executable != content_summary[2]):

462

store = True

463

elif parent_entry.text_sha1 == content_summary[3]:

464

# all meta and content is unchanged (using a hash cache

465

# hit to check the sha)

466

ie.revision = parent_entry.revision

467

ie.text_size = parent_entry.text_size

468

ie.text_sha1 = parent_entry.text_sha1

469

ie.executable = parent_entry.executable

470

return self._get_delta(ie, basis_inv, path), False, None

471

else:

472

# Either there is only a hash change(no hash cache entry,

473

# or same size content change), or there is no change on

474

# this file at all.

475

# Provide the parent's hash to the store layer, so that the

476

# content is unchanged we will not store a new node.

477

nostore_sha = parent_entry.text_sha1

478

if store:

479

# We want to record a new node regardless of the presence or

480

# absence of a content change in the file.

481

nostore_sha = None

482

ie.executable = content_summary[2]

483

file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)

484

try:

485

text = file_obj.read()

486

finally:

487

file_obj.close()

488

try:

489

ie.text_sha1, ie.text_size = self._add_text_to_weave(

490

ie.file_id, text, heads, nostore_sha)

491

# Let the caller know we generated a stat fingerprint.

492

fingerprint = (ie.text_sha1, stat_value)

493

except errors.ExistingContent:

494

# Turns out that the file content was unchanged, and we were

495

# only going to store a new node if it was changed. Carry over

496

# the entry.

497

ie.revision = parent_entry.revision

498

ie.text_size = parent_entry.text_size

499

ie.text_sha1 = parent_entry.text_sha1

500

ie.executable = parent_entry.executable

501

return self._get_delta(ie, basis_inv, path), False, None

502

elif kind == 'directory':

503

if not store:

504

# all data is meta here, nothing specific to directory, so

505

# carry over:

506

ie.revision = parent_entry.revision

507

return self._get_delta(ie, basis_inv, path), False, None

508

self._add_text_to_weave(ie.file_id, '', heads, None)

509

elif kind == 'symlink':

510

current_link_target = content_summary[3]

511

if not store:

512

# symlink target is not generic metadata, check if it has

513

# changed.

514

if current_link_target != parent_entry.symlink_target:

515

store = True

516

if not store:

517

# unchanged, carry over.

518

ie.revision = parent_entry.revision

519

ie.symlink_target = parent_entry.symlink_target

520

return self._get_delta(ie, basis_inv, path), False, None

521

ie.symlink_target = current_link_target

522

self._add_text_to_weave(ie.file_id, '', heads, None)

523

elif kind == 'tree-reference':

524

if not store:

525

if content_summary[3] != parent_entry.reference_revision:

526

store = True

527

if not store:

528

# unchanged, carry over.

529

ie.reference_revision = parent_entry.reference_revision

530

ie.revision = parent_entry.revision

531

return self._get_delta(ie, basis_inv, path), False, None

532

ie.reference_revision = content_summary[3]

533

if ie.reference_revision is None:

534

raise AssertionError("invalid content_summary for nested tree: %r"

535

% (content_summary,))

536

self._add_text_to_weave(ie.file_id, '', heads, None)

537

else:

538

raise NotImplementedError('unknown kind')

539

ie.revision = self._new_revision_id

540

# The initial commit adds a root directory, but this in itself is not

541

# a worthwhile commit.

542

if (self.basis_delta_revision != _mod_revision.NULL_REVISION or

543

path != ""):

544

self._any_changes = True

545

return self._get_delta(ie, basis_inv, path), True, fingerprint

546

547

def record_iter_changes(self, tree, basis_revision_id, iter_changes,

548

_entry_factory=entry_factory):

549

"""Record a new tree via iter_changes.

550

551

:param tree: The tree to obtain text contents from for changed objects.

552

:param basis_revision_id: The revision id of the tree the iter_changes

553

has been generated against. Currently assumed to be the same

554

as self.parents[0] - if it is not, errors may occur.

555

:param iter_changes: An iter_changes iterator with the changes to apply

556

to basis_revision_id. The iterator must not include any items with

557

a current kind of None - missing items must be either filtered out

558

or errored-on beefore record_iter_changes sees the item.

559

:param _entry_factory: Private method to bind entry_factory locally for

560

performance.

561

:return: A generator of (file_id, relpath, fs_hash) tuples for use with

562

tree._observed_sha1.

563

"""

564

# Create an inventory delta based on deltas between all the parents and

565

# deltas between all the parent inventories. We use inventory delta's

566

# between the inventory objects because iter_changes masks

567

# last-changed-field only changes.

568

# Working data:

569

# file_id -> change map, change is fileid, paths, changed, versioneds,

570

# parents, names, kinds, executables

571

merged_ids = {}

572

# {file_id -> revision_id -> inventory entry, for entries in parent

573

# trees that are not parents[0]

574

parent_entries = {}

575

ghost_basis = False

576

try:

577

revtrees = list(self.repository.revision_trees(self.parents))

578

except errors.NoSuchRevision:

579

# one or more ghosts, slow path.

580

revtrees = []

581

for revision_id in self.parents:

582

try:

583

revtrees.append(self.repository.revision_tree(revision_id))

584

except errors.NoSuchRevision:

585

if not revtrees:

586

basis_revision_id = _mod_revision.NULL_REVISION

587

ghost_basis = True

588

revtrees.append(self.repository.revision_tree(

589

_mod_revision.NULL_REVISION))

590

# The basis inventory from a repository

591

if revtrees:

592

basis_inv = revtrees[0].inventory

593

else:

594

basis_inv = self.repository.revision_tree(

595

_mod_revision.NULL_REVISION).inventory

596

if len(self.parents) > 0:

597

if basis_revision_id != self.parents[0] and not ghost_basis:

598

raise Exception(

599

"arbitrary basis parents not yet supported with merges")

600

for revtree in revtrees[1:]:

601

for change in revtree.inventory._make_delta(basis_inv):

602

if change[1] is None:

603

# Not present in this parent.

604

continue

605

if change[2] not in merged_ids:

606

if change[0] is not None:

607

basis_entry = basis_inv[change[2]]

608

merged_ids[change[2]] = [

609

# basis revid

610

basis_entry.revision,

611

# new tree revid

612

change[3].revision]

613

parent_entries[change[2]] = {

614

# basis parent

615

basis_entry.revision:basis_entry,

616

# this parent

617

change[3].revision:change[3],

618

}

619

else:

620

merged_ids[change[2]] = [change[3].revision]

621

parent_entries[change[2]] = {change[3].revision:change[3]}

622

else:

623

merged_ids[change[2]].append(change[3].revision)

624

parent_entries[change[2]][change[3].revision] = change[3]

625

else:

626

merged_ids = {}

627

# Setup the changes from the tree:

628

# changes maps file_id -> (change, [parent revision_ids])

629

changes= {}

630

for change in iter_changes:

631

# This probably looks up in basis_inv way to much.

632

if change[1][0] is not None:

633

head_candidate = [basis_inv[change[0]].revision]

634

else:

635

head_candidate = []

636

changes[change[0]] = change, merged_ids.get(change[0],

637

head_candidate)

638

unchanged_merged = set(merged_ids) - set(changes)

639

# Extend the changes dict with synthetic changes to record merges of

640

# texts.

641

for file_id in unchanged_merged:

642

# Record a merged version of these items that did not change vs the

643

# basis. This can be either identical parallel changes, or a revert

644

# of a specific file after a merge. The recorded content will be

645

# that of the current tree (which is the same as the basis), but

646

# the per-file graph will reflect a merge.

647

# NB:XXX: We are reconstructing path information we had, this

648

# should be preserved instead.

649

# inv delta change: (file_id, (path_in_source, path_in_target),

650

# changed_content, versioned, parent, name, kind,

651

# executable)

652

try:

653

basis_entry = basis_inv[file_id]

654

except errors.NoSuchId:

655

# a change from basis->some_parents but file_id isn't in basis

656

# so was new in the merge, which means it must have changed

657

# from basis -> current, and as it hasn't the add was reverted

658

# by the user. So we discard this change.

659

pass

660

else:

661

change = (file_id,

662

(basis_inv.id2path(file_id), tree.id2path(file_id)),

663

False, (True, True),

664

(basis_entry.parent_id, basis_entry.parent_id),

665

(basis_entry.name, basis_entry.name),

666

(basis_entry.kind, basis_entry.kind),

667

(basis_entry.executable, basis_entry.executable))

668

changes[file_id] = (change, merged_ids[file_id])

669

# changes contains tuples with the change and a set of inventory

670

# candidates for the file.

671

# inv delta is:

672

# old_path, new_path, file_id, new_inventory_entry

673

seen_root = False # Is the root in the basis delta?

674

inv_delta = self._basis_delta

675

modified_rev = self._new_revision_id

676

for change, head_candidates in changes.values():

677

if change[3][1]: # versioned in target.

678

# Several things may be happening here:

679

# We may have a fork in the per-file graph

680

# - record a change with the content from tree

681

# We may have a change against < all trees

682

# - carry over the tree that hasn't changed

683

# We may have a change against all trees

684

# - record the change with the content from tree

685

kind = change[6][1]

686

file_id = change[0]

687

entry = _entry_factory[kind](file_id, change[5][1],

688

change[4][1])

689

head_set = self._heads(change[0], set(head_candidates))

690

heads = []

691

# Preserve ordering.

692

for head_candidate in head_candidates:

693

if head_candidate in head_set:

694

heads.append(head_candidate)

695

head_set.remove(head_candidate)

696

carried_over = False

697

if len(heads) == 1:

698

# Could be a carry-over situation:

699

parent_entry_revs = parent_entries.get(file_id, None)

700

if parent_entry_revs:

701

parent_entry = parent_entry_revs.get(heads[0], None)

702

else:

703

parent_entry = None

704

if parent_entry is None:

705

# The parent iter_changes was called against is the one

706

# that is the per-file head, so any change is relevant

707

# iter_changes is valid.

708

carry_over_possible = False

709

else:

710

# could be a carry over situation

711

# A change against the basis may just indicate a merge,

712

# we need to check the content against the source of the

713

# merge to determine if it was changed after the merge

714

# or carried over.

715

if (parent_entry.kind != entry.kind or

716

parent_entry.parent_id != entry.parent_id or

717

parent_entry.name != entry.name):

718

# Metadata common to all entries has changed

719

# against per-file parent

720

carry_over_possible = False

721

else:

722

carry_over_possible = True

723

# per-type checks for changes against the parent_entry

724

# are done below.

725

else:

726

# Cannot be a carry-over situation

727

carry_over_possible = False

728

# Populate the entry in the delta

729

if kind == 'file':

730

# XXX: There is still a small race here: If someone reverts the content of a file

731

# after iter_changes examines and decides it has changed,

732

# we will unconditionally record a new version even if some

733

# other process reverts it while commit is running (with

734

# the revert happening after iter_changes did its

735

# examination).

736

if change[7][1]:

737

entry.executable = True

738

else:

739

entry.executable = False

740

if (carry_over_possible and

741

parent_entry.executable == entry.executable):

742

# Check the file length, content hash after reading

743

# the file.

744

nostore_sha = parent_entry.text_sha1

745

else:

746

nostore_sha = None

747

file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])

748

try:

749

text = file_obj.read()

750

finally:

751

file_obj.close()

752

try:

753

entry.text_sha1, entry.text_size = self._add_text_to_weave(

754

file_id, text, heads, nostore_sha)

755

yield file_id, change[1][1], (entry.text_sha1, stat_value)

756

except errors.ExistingContent:

757

# No content change against a carry_over parent

758

# Perhaps this should also yield a fs hash update?

759

carried_over = True

760

entry.text_size = parent_entry.text_size

761

entry.text_sha1 = parent_entry.text_sha1

762

elif kind == 'symlink':

763

# Wants a path hint?

764

entry.symlink_target = tree.get_symlink_target(file_id)

765

if (carry_over_possible and

766

parent_entry.symlink_target == entry.symlink_target):

767

carried_over = True

768

else:

769

self._add_text_to_weave(change[0], '', heads, None)

770

elif kind == 'directory':

771

if carry_over_possible:

772

carried_over = True

773

else:

774

# Nothing to set on the entry.

775

# XXX: split into the Root and nonRoot versions.

776

if change[1][1] != '' or self.repository.supports_rich_root():

777

self._add_text_to_weave(change[0], '', heads, None)

778

elif kind == 'tree-reference':

779

if not self.repository._format.supports_tree_reference:

780

# This isn't quite sane as an error, but we shouldn't

781

# ever see this code path in practice: tree's don't

782

# permit references when the repo doesn't support tree

783

# references.

784

raise errors.UnsupportedOperation(tree.add_reference,

785

self.repository)

786

reference_revision = tree.get_reference_revision(change[0])

787

entry.reference_revision = reference_revision

788

if (carry_over_possible and

789

parent_entry.reference_revision == reference_revision):

790

carried_over = True

791

else:

792

self._add_text_to_weave(change[0], '', heads, None)

793

else:

794

raise AssertionError('unknown kind %r' % kind)

795

if not carried_over:

796

entry.revision = modified_rev

797

else:

798

entry.revision = parent_entry.revision

799

else:

800

entry = None

801

new_path = change[1][1]

802

inv_delta.append((change[1][0], new_path, change[0], entry))

803

if new_path == '':

804

seen_root = True

805

self.new_inventory = None

806

# The initial commit adds a root directory, but this in itself is not

807

# a worthwhile commit.

808

if ((len(inv_delta) > 0 and basis_revision_id != _mod_revision.NULL_REVISION) or

809

(len(inv_delta) > 1 and basis_revision_id == _mod_revision.NULL_REVISION)):

810

# This should perhaps be guarded by a check that the basis we

811

# commit against is the basis for the commit and if not do a delta

812

# against the basis.

813

self._any_changes = True

814

if not seen_root:

815

# housekeeping root entry changes do not affect no-change commits.

816

self._require_root_change(tree)

817

self.basis_delta_revision = basis_revision_id

818

819

def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):

820

parent_keys = tuple([(file_id, parent) for parent in parents])

821

return self.repository.texts._add_text(

822

(file_id, self._new_revision_id), parent_keys, new_text,

823

nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]

824

825

826

class VersionedFileRootCommitBuilder(VersionedFileCommitBuilder):

827

"""This commitbuilder actually records the root id"""

828

829

# the root entry gets versioned properly by this builder.

830

_versioned_root = True

831

832

def _check_root(self, ie, parent_invs, tree):

833

"""Helper for record_entry_contents.

834

835

:param ie: An entry being added.

836

:param parent_invs: The inventories of the parent revisions of the

837

commit.

838

:param tree: The tree that is being committed.

839

"""

840

841

def _require_root_change(self, tree):

842

"""Enforce an appropriate root object change.

843

844

This is called once when record_iter_changes is called, if and only if

845

the root was not in the delta calculated by record_iter_changes.

846

847

:param tree: The tree which is being committed.

848

"""

849

# versioned roots do not change unless the tree found a change.

850

851

852

class VersionedFileRepository(Repository):

853

"""Repository holding history for one or more branches.

854

855

The repository holds and retrieves historical information including

856

revisions and file history. It's normally accessed only by the Branch,

857

which views a particular line of development through that history.

858

859

The Repository builds on top of some byte storage facilies (the revisions,

860

signatures, inventories, texts and chk_bytes attributes) and a Transport,

861

which respectively provide byte storage and a means to access the (possibly

862

remote) disk.

863

864

The byte storage facilities are addressed via tuples, which we refer to

865

as 'keys' throughout the code base. Revision_keys, inventory_keys and

866

signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:

867

(file_id, revision_id). chk_bytes uses CHK keys - a 1-tuple with a single

868

byte string made up of a hash identifier and a hash value.

869

We use this interface because it allows low friction with the underlying

870

code that implements disk indices, network encoding and other parts of

871

bzrlib.

872

873

:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing

874

the serialised revisions for the repository. This can be used to obtain

875

revision graph information or to access raw serialised revisions.

876

The result of trying to insert data into the repository via this store

877

is undefined: it should be considered read-only except for implementors

878

of repositories.

879

:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing

880

the serialised signatures for the repository. This can be used to

881

obtain access to raw serialised signatures. The result of trying to

882

insert data into the repository via this store is undefined: it should

883

be considered read-only except for implementors of repositories.

884

:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing

885

the serialised inventories for the repository. This can be used to

886

obtain unserialised inventories. The result of trying to insert data

887

into the repository via this store is undefined: it should be

888

considered read-only except for implementors of repositories.

889

:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the

890

texts of files and directories for the repository. This can be used to

891

obtain file texts or file graphs. Note that Repository.iter_file_bytes

892

is usually a better interface for accessing file texts.

893

The result of trying to insert data into the repository via this store

894

is undefined: it should be considered read-only except for implementors

895

of repositories.

896

:ivar chk_bytes: A bzrlib.versionedfile.VersionedFiles instance containing

897

any data the repository chooses to store or have indexed by its hash.

898

The result of trying to insert data into the repository via this store

899

is undefined: it should be considered read-only except for implementors

900

of repositories.

901

:ivar _transport: Transport for file access to repository, typically

902

pointing to .bzr/repository.

903

"""

904

905

# What class to use for a CommitBuilder. Often it's simpler to change this

906

# in a Repository class subclass rather than to override

907

# get_commit_builder.

908

_commit_builder_class = VersionedFileCommitBuilder

909

910

def add_fallback_repository(self, repository):

911

"""Add a repository to use for looking up data not held locally.

912

913

:param repository: A repository.

914

"""

915

if not self._format.supports_external_lookups:

916

raise errors.UnstackableRepositoryFormat(self._format, self.base)

917

if self.is_locked():

918

# This repository will call fallback.unlock() when we transition to

919

# the unlocked state, so we make sure to increment the lock count

920

repository.lock_read()

921

self._check_fallback_repository(repository)

922

self._fallback_repositories.append(repository)

923

self.texts.add_fallback_versioned_files(repository.texts)

924

self.inventories.add_fallback_versioned_files(repository.inventories)

925

self.revisions.add_fallback_versioned_files(repository.revisions)

926

self.signatures.add_fallback_versioned_files(repository.signatures)

927

if self.chk_bytes is not None:

928

self.chk_bytes.add_fallback_versioned_files(repository.chk_bytes)

929

930

@only_raises(errors.LockNotHeld, errors.LockBroken)

931

def unlock(self):

932

super(VersionedFileRepository, self).unlock()

933

if self.control_files._lock_count == 0:

934

self._inventory_entry_cache.clear()

935

936

def add_inventory(self, revision_id, inv, parents):

937

"""Add the inventory inv to the repository as revision_id.

938

939

:param parents: The revision ids of the parents that revision_id

940

is known to have and are in the repository already.

941

942

:returns: The validator(which is a sha1 digest, though what is sha'd is

943

repository format specific) of the serialized inventory.

944

"""

945

if not self.is_in_write_group():

946

raise AssertionError("%r not in write group" % (self,))

947

_mod_revision.check_not_reserved_id(revision_id)

948

if not (inv.revision_id is None or inv.revision_id == revision_id):

949

raise AssertionError(

950

"Mismatch between inventory revision"

951

" id and insertion revid (%r, %r)"

952

% (inv.revision_id, revision_id))

953

if inv.root is None:

954

raise errors.RootMissing()

955

return self._add_inventory_checked(revision_id, inv, parents)

956

957

def _add_inventory_checked(self, revision_id, inv, parents):

958

"""Add inv to the repository after checking the inputs.

959

960

This function can be overridden to allow different inventory styles.

961

962

:seealso: add_inventory, for the contract.

963

"""

964

inv_lines = self._serializer.write_inventory_to_lines(inv)

965

return self._inventory_add_lines(revision_id, parents,

966

inv_lines, check_content=False)

967

968

def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,

969

parents, basis_inv=None, propagate_caches=False):

970

"""Add a new inventory expressed as a delta against another revision.

971

972

See the inventory developers documentation for the theory behind

973

inventory deltas.

974

975

:param basis_revision_id: The inventory id the delta was created

976

against. (This does not have to be a direct parent.)

977

:param delta: The inventory delta (see Inventory.apply_delta for

978

details).

979

:param new_revision_id: The revision id that the inventory is being

980

added for.

981

:param parents: The revision ids of the parents that revision_id is

982

known to have and are in the repository already. These are supplied

983

for repositories that depend on the inventory graph for revision

984

graph access, as well as for those that pun ancestry with delta

985

compression.

986

:param basis_inv: The basis inventory if it is already known,

987

otherwise None.

988

:param propagate_caches: If True, the caches for this inventory are

989

copied to and updated for the result if possible.

990

991

:returns: (validator, new_inv)

992

The validator(which is a sha1 digest, though what is sha'd is

993

repository format specific) of the serialized inventory, and the

994

resulting inventory.

995

"""

996

if not self.is_in_write_group():

997

raise AssertionError("%r not in write group" % (self,))

998

_mod_revision.check_not_reserved_id(new_revision_id)

999

basis_tree = self.revision_tree(basis_revision_id)

1000

basis_tree.lock_read()

1001

try:

1002

# Note that this mutates the inventory of basis_tree, which not all

1003

# inventory implementations may support: A better idiom would be to

1004

# return a new inventory, but as there is no revision tree cache in

1005

# repository this is safe for now - RBC 20081013

1006

if basis_inv is None:

1007

basis_inv = basis_tree.inventory

1008

basis_inv.apply_delta(delta)

1009

basis_inv.revision_id = new_revision_id

1010

return (self.add_inventory(new_revision_id, basis_inv, parents),

1011

basis_inv)

1012

finally:

1013

basis_tree.unlock()

1014

1015

def _inventory_add_lines(self, revision_id, parents, lines,

1016

check_content=True):

1017

"""Store lines in inv_vf and return the sha1 of the inventory."""

1018

parents = [(parent,) for parent in parents]

1019

result = self.inventories.add_lines((revision_id,), parents, lines,

1020

check_content=check_content)[0]

1021

self.inventories._access.flush()

1022

return result

1023

1024

def add_revision(self, revision_id, rev, inv=None, config=None):

1025

"""Add rev to the revision store as revision_id.

1026

1027

:param revision_id: the revision id to use.

1028

:param rev: The revision object.

1029

:param inv: The inventory for the revision. if None, it will be looked

1030

up in the inventory storer

1031

:param config: If None no digital signature will be created.

1032

If supplied its signature_needed method will be used

1033

to determine if a signature should be made.

1034

"""

1035

# TODO: jam 20070210 Shouldn't we check rev.revision_id and

1036

# rev.parent_ids?

1037

_mod_revision.check_not_reserved_id(revision_id)

1038

if config is not None and config.signature_needed():

1039

if inv is None:

1040

inv = self.get_inventory(revision_id)

1041

tree = InventoryRevisionTree(self, inv, revision_id)

1042

testament = Testament(rev, tree)

1043

plaintext = testament.as_short_text()

1044

self.store_revision_signature(

1045

gpg.GPGStrategy(config), plaintext, revision_id)

1046

# check inventory present

1047

if not self.inventories.get_parent_map([(revision_id,)]):

1048

if inv is None:

1049

raise errors.WeaveRevisionNotPresent(revision_id,

1050

self.inventories)

1051

else:

1052

# yes, this is not suitable for adding with ghosts.

1053

rev.inventory_sha1 = self.add_inventory(revision_id, inv,

1054

rev.parent_ids)

1055

else:

1056

key = (revision_id,)

1057

rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]

1058

self._add_revision(rev)

1059

1060

def _add_revision(self, revision):

1061

text = self._serializer.write_revision_to_string(revision)

1062

key = (revision.revision_id,)

1063

parents = tuple((parent,) for parent in revision.parent_ids)

1064

self.revisions.add_lines(key, parents, osutils.split_lines(text))

1065

1066

def _check_inventories(self, checker):

1067

"""Check the inventories found from the revision scan.

1068

1069

This is responsible for verifying the sha1 of inventories and

1070

creating a pending_keys set that covers data referenced by inventories.

1071

"""

1072

bar = ui.ui_factory.nested_progress_bar()

1073

try:

1074

self._do_check_inventories(checker, bar)

1075

finally:

1076

bar.finished()

1077

1078

def _do_check_inventories(self, checker, bar):

1079

"""Helper for _check_inventories."""

1080

revno = 0

1081

keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}

1082

kinds = ['chk_bytes', 'texts']

1083

count = len(checker.pending_keys)

1084

bar.update("inventories", 0, 2)

1085

current_keys = checker.pending_keys

1086

checker.pending_keys = {}

1087

# Accumulate current checks.

1088

for key in current_keys:

1089

if key[0] != 'inventories' and key[0] not in kinds:

1090

checker._report_items.append('unknown key type %r' % (key,))

1091

keys[key[0]].add(key[1:])

1092

if keys['inventories']:

1093

# NB: output order *should* be roughly sorted - topo or

1094

# inverse topo depending on repository - either way decent

1095

# to just delta against. However, pre-CHK formats didn't

1096

# try to optimise inventory layout on disk. As such the

1097

# pre-CHK code path does not use inventory deltas.

1098

last_object = None

1099

for record in self.inventories.check(keys=keys['inventories']):

1100

if record.storage_kind == 'absent':

1101

checker._report_items.append(

1102

'Missing inventory {%s}' % (record.key,))

1103

else:

1104

last_object = self._check_record('inventories', record,

1105

checker, last_object,

1106

current_keys[('inventories',) + record.key])

1107

del keys['inventories']

1108

else:

1109

return

1110

bar.update("texts", 1)

1111

while (checker.pending_keys or keys['chk_bytes']

1112

or keys['texts']):

1113

# Something to check.

1114

current_keys = checker.pending_keys

1115

checker.pending_keys = {}

1116

# Accumulate current checks.

1117

for key in current_keys:

1118

if key[0] not in kinds:

1119

checker._report_items.append('unknown key type %r' % (key,))

1120

keys[key[0]].add(key[1:])

1121

# Check the outermost kind only - inventories || chk_bytes || texts

1122

for kind in kinds:

1123

if keys[kind]:

1124

last_object = None

1125

for record in getattr(self, kind).check(keys=keys[kind]):

1126

if record.storage_kind == 'absent':

1127

checker._report_items.append(

1128

'Missing %s {%s}' % (kind, record.key,))

1129

else:

1130

last_object = self._check_record(kind, record,

1131

checker, last_object, current_keys[(kind,) + record.key])

1132

keys[kind] = set()

1133

break

1134

1135

def _check_record(self, kind, record, checker, last_object, item_data):

1136

"""Check a single text from this repository."""

1137

if kind == 'inventories':

1138

rev_id = record.key[0]

1139

inv = self._deserialise_inventory(rev_id,

1140

record.get_bytes_as('fulltext'))

1141

if last_object is not None:

1142

delta = inv._make_delta(last_object)

1143

for old_path, path, file_id, ie in delta:

1144

if ie is None:

1145

continue

1146

ie.check(checker, rev_id, inv)

1147

else:

1148

for path, ie in inv.iter_entries():

1149

ie.check(checker, rev_id, inv)

1150

if self._format.fast_deltas:

1151

return inv

1152

elif kind == 'chk_bytes':

1153

# No code written to check chk_bytes for this repo format.

1154

checker._report_items.append(

1155

'unsupported key type chk_bytes for %s' % (record.key,))

1156

elif kind == 'texts':

1157

self._check_text(record, checker, item_data)

1158

else:

1159

checker._report_items.append(

1160

'unknown key type %s for %s' % (kind, record.key))

1161

1162

def _check_text(self, record, checker, item_data):

1163

"""Check a single text."""

1164

# Check it is extractable.

1165

# TODO: check length.

1166

if record.storage_kind == 'chunked':

1167

chunks = record.get_bytes_as(record.storage_kind)

1168

sha1 = osutils.sha_strings(chunks)

1169

length = sum(map(len, chunks))

1170

else:

1171

content = record.get_bytes_as('fulltext')

1172

sha1 = osutils.sha_string(content)

1173

length = len(content)

1174

if item_data and sha1 != item_data[1]:

1175

checker._report_items.append(

1176

'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %

1177

(record.key, sha1, item_data[1], item_data[2]))

1178

1179

def __init__(self, _format, a_bzrdir, control_files):

1180

"""Instantiate a VersionedFileRepository.

1181

1182

:param _format: The format of the repository on disk.

1183

:param a_bzrdir: The BzrDir of the repository.

1184

:param control_files: Control files to use for locking, etc.

1185

"""

1186

# In the future we will have a single api for all stores for

1187

# getting file texts, inventories and revisions, then

1188

# this construct will accept instances of those things.

1189

super(VersionedFileRepository, self).__init__(_format, a_bzrdir,

1190

control_files)

1191

# for tests

1192

self._reconcile_does_inventory_gc = True

1193

self._reconcile_fixes_text_parents = False

1194

self._reconcile_backsup_inventory = True

1195

# An InventoryEntry cache, used during deserialization

1196

self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)

1197

# Is it safe to return inventory entries directly from the entry cache,

1198

# rather copying them?

1199

self._safe_to_return_from_cache = False

1200

1201

@needs_read_lock

1202

def gather_stats(self, revid=None, committers=None):

1203

"""See Repository.gather_stats()."""

1204

result = super(VersionedFileRepository, self).gather_stats(revid, committers)

1205

# now gather global repository information

1206

# XXX: This is available for many repos regardless of listability.

1207

if self.user_transport.listable():

1208

# XXX: do we want to __define len__() ?

1209

# Maybe the versionedfiles object should provide a different

1210

# method to get the number of keys.

1211

result['revisions'] = len(self.revisions.keys())

1212

# result['size'] = t

1213

return result

1214

1215

def get_commit_builder(self, branch, parents, config, timestamp=None,

1216

timezone=None, committer=None, revprops=None,

1217

revision_id=None, lossy=False):

1218

"""Obtain a CommitBuilder for this repository.

1219

1220

:param branch: Branch to commit to.

1221

:param parents: Revision ids of the parents of the new revision.

1222

:param config: Configuration to use.

1223

:param timestamp: Optional timestamp recorded for commit.

1224

:param timezone: Optional timezone for timestamp.

1225

:param committer: Optional committer to set for commit.

1226

:param revprops: Optional dictionary of revision properties.

1227

:param revision_id: Optional revision id.

1228

:param lossy: Whether to discard data that can not be natively

1229

represented, when pushing to a foreign VCS

1230

"""

1231

if self._fallback_repositories and not self._format.supports_chks:

1232

raise errors.BzrError("Cannot commit directly to a stacked branch"

1233

" in pre-2a formats. See "

1234

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

1235

result = self._commit_builder_class(self, parents, config,

1236

timestamp, timezone, committer, revprops, revision_id,

1237

lossy)

1238

self.start_write_group()

1239

return result

1240

1241

def get_missing_parent_inventories(self, check_for_missing_texts=True):

1242

"""Return the keys of missing inventory parents for revisions added in

1243

this write group.

1244

1245

A revision is not complete if the inventory delta for that revision

1246

cannot be calculated. Therefore if the parent inventories of a

1247

revision are not present, the revision is incomplete, and e.g. cannot

1248

be streamed by a smart server. This method finds missing inventory

1249

parents for revisions added in this write group.

1250

"""

1251

if not self._format.supports_external_lookups:

1252

# This is only an issue for stacked repositories

1253

return set()

1254

if not self.is_in_write_group():

1255

raise AssertionError('not in a write group')

1256

1257

# XXX: We assume that every added revision already has its

1258

# corresponding inventory, so we only check for parent inventories that

1259

# might be missing, rather than all inventories.

1260

parents = set(self.revisions._index.get_missing_parents())

1261

parents.discard(_mod_revision.NULL_REVISION)

1262

unstacked_inventories = self.inventories._index

1263

present_inventories = unstacked_inventories.get_parent_map(

1264

key[-1:] for key in parents)

1265

parents.difference_update(present_inventories)

1266

if len(parents) == 0:

1267

# No missing parent inventories.

1268

return set()

1269

if not check_for_missing_texts:

1270

return set(('inventories', rev_id) for (rev_id,) in parents)

1271

# Ok, now we have a list of missing inventories. But these only matter

1272

# if the inventories that reference them are missing some texts they

1273

# appear to introduce.

1274

# XXX: Texts referenced by all added inventories need to be present,

1275

# but at the moment we're only checking for texts referenced by

1276

# inventories at the graph's edge.

1277

key_deps = self.revisions._index._key_dependencies

1278

key_deps.satisfy_refs_for_keys(present_inventories)

1279

referrers = frozenset(r[0] for r in key_deps.get_referrers())

1280

file_ids = self.fileids_altered_by_revision_ids(referrers)

1281

missing_texts = set()

1282

for file_id, version_ids in file_ids.iteritems():

1283

missing_texts.update(

1284

(file_id, version_id) for version_id in version_ids)

1285

present_texts = self.texts.get_parent_map(missing_texts)

1286

missing_texts.difference_update(present_texts)

1287

if not missing_texts:

1288

# No texts are missing, so all revisions and their deltas are

1289

# reconstructable.

1290

return set()

1291

# Alternatively the text versions could be returned as the missing

1292

# keys, but this is likely to be less data.

1293

missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)

1294

return missing_keys

1295

1296

@needs_read_lock

1297

def has_revisions(self, revision_ids):

1298

"""Probe to find out the presence of multiple revisions.

1299

1300

:param revision_ids: An iterable of revision_ids.

1301

:return: A set of the revision_ids that were present.

1302

"""

1303

parent_map = self.revisions.get_parent_map(

1304

[(rev_id,) for rev_id in revision_ids])

1305

result = set()

1306

if _mod_revision.NULL_REVISION in revision_ids:

1307

result.add(_mod_revision.NULL_REVISION)

1308

result.update([key[0] for key in parent_map])

1309

return result

1310

1311

@needs_read_lock

1312

def get_revision_reconcile(self, revision_id):

1313

"""'reconcile' helper routine that allows access to a revision always.

1314

1315

This variant of get_revision does not cross check the weave graph

1316

against the revision one as get_revision does: but it should only

1317

be used by reconcile, or reconcile-alike commands that are correcting

1318

or testing the revision graph.

1319

"""

1320

return self._get_revisions([revision_id])[0]

1321

1322

@needs_read_lock

1323

def get_revisions(self, revision_ids):

1324

"""Get many revisions at once.

1325

1326

Repositories that need to check data on every revision read should

1327

subclass this method.

1328

"""

1329

return self._get_revisions(revision_ids)

1330

1331

@needs_read_lock

1332

def _get_revisions(self, revision_ids):

1333

"""Core work logic to get many revisions without sanity checks."""

1334

revs = {}

1335

for revid, rev in self._iter_revisions(revision_ids):

1336

if rev is None:

1337

raise errors.NoSuchRevision(self, revid)

1338

revs[revid] = rev

1339

return [revs[revid] for revid in revision_ids]

1340

1341

def _iter_revisions(self, revision_ids):

1342

"""Iterate over revision objects.

1343

1344

:param revision_ids: An iterable of revisions to examine. None may be

1345

passed to request all revisions known to the repository. Note that

1346

not all repositories can find unreferenced revisions; for those

1347

repositories only referenced ones will be returned.

1348

:return: An iterator of (revid, revision) tuples. Absent revisions (

1349

those asked for but not available) are returned as (revid, None).

1350

"""

1351

if revision_ids is None:

1352

revision_ids = self.all_revision_ids()

1353

else:

1354

for rev_id in revision_ids:

1355

if not rev_id or not isinstance(rev_id, basestring):

1356

raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)

1357

keys = [(key,) for key in revision_ids]

1358

stream = self.revisions.get_record_stream(keys, 'unordered', True)

1359

for record in stream:

1360

revid = record.key[0]

1361

if record.storage_kind == 'absent':

1362

yield (revid, None)

1363

else:

1364

text = record.get_bytes_as('fulltext')

1365

rev = self._serializer.read_revision_from_string(text)

1366

yield (revid, rev)

1367

1368

@needs_write_lock

1369

def add_signature_text(self, revision_id, signature):

1370

"""Store a signature text for a revision.

1371

1372

:param revision_id: Revision id of the revision

1373

:param signature: Signature text.

1374

"""

1375

self.signatures.add_lines((revision_id,), (),

1376

osutils.split_lines(signature))

1377

1378

def find_text_key_references(self):

1379

"""Find the text key references within the repository.

1380

1381

:return: A dictionary mapping text keys ((fileid, revision_id) tuples)

1382

to whether they were referred to by the inventory of the

1383

revision_id that they contain. The inventory texts from all present

1384

revision ids are assessed to generate this report.

1385

"""

1386

revision_keys = self.revisions.keys()

1387

w = self.inventories

1388

pb = ui.ui_factory.nested_progress_bar()

1389

try:

1390

return self._serializer._find_text_key_references(

1391

w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))

1392

finally:

1393

pb.finished()

1394

1395

def _inventory_xml_lines_for_keys(self, keys):

1396

"""Get a line iterator of the sort needed for findind references.

1397

1398

Not relevant for non-xml inventory repositories.

1399

1400

Ghosts in revision_keys are ignored.

1401

1402

:param revision_keys: The revision keys for the inventories to inspect.

1403

:return: An iterator over (inventory line, revid) for the fulltexts of

1404

all of the xml inventories specified by revision_keys.

1405

"""

1406

stream = self.inventories.get_record_stream(keys, 'unordered', True)

1407

for record in stream:

1408

if record.storage_kind != 'absent':

1409

chunks = record.get_bytes_as('chunked')

1410

revid = record.key[-1]

1411

lines = osutils.chunks_to_lines(chunks)

1412

for line in lines:

1413

yield line, revid

1414

1415

def _find_file_ids_from_xml_inventory_lines(self, line_iterator,

1416

revision_keys):

1417

"""Helper routine for fileids_altered_by_revision_ids.

1418

1419

This performs the translation of xml lines to revision ids.

1420

1421

:param line_iterator: An iterator of lines, origin_version_id

1422

:param revision_keys: The revision ids to filter for. This should be a

1423

set or other type which supports efficient __contains__ lookups, as

1424

the revision key from each parsed line will be looked up in the

1425

revision_keys filter.

1426

:return: a dictionary mapping altered file-ids to an iterable of

1427

revision_ids. Each altered file-ids has the exact revision_ids that

1428

altered it listed explicitly.

1429

"""

1430

seen = set(self._serializer._find_text_key_references(

1431

line_iterator).iterkeys())

1432

parent_keys = self._find_parent_keys_of_revisions(revision_keys)

1433

parent_seen = set(self._serializer._find_text_key_references(

1434

self._inventory_xml_lines_for_keys(parent_keys)))

1435

new_keys = seen - parent_seen

1436

result = {}

1437

setdefault = result.setdefault

1438

for key in new_keys:

1439

setdefault(key[0], set()).add(key[-1])

1440

return result

1441

1442

def _find_parent_keys_of_revisions(self, revision_keys):

1443

"""Similar to _find_parent_ids_of_revisions, but used with keys.

1444

1445

:param revision_keys: An iterable of revision_keys.

1446

:return: The parents of all revision_keys that are not already in

1447

revision_keys

1448

"""

1449

parent_map = self.revisions.get_parent_map(revision_keys)

1450

parent_keys = set()

1451

map(parent_keys.update, parent_map.itervalues())

1452

parent_keys.difference_update(revision_keys)

1453

parent_keys.discard(_mod_revision.NULL_REVISION)

1454

return parent_keys

1455

1456

def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):

1457

"""Find the file ids and versions affected by revisions.

1458

1459

:param revisions: an iterable containing revision ids.

1460

:param _inv_weave: The inventory weave from this repository or None.

1461

If None, the inventory weave will be opened automatically.

1462

:return: a dictionary mapping altered file-ids to an iterable of

1463

revision_ids. Each altered file-ids has the exact revision_ids that

1464

altered it listed explicitly.

1465

"""

1466

selected_keys = set((revid,) for revid in revision_ids)

1467

w = _inv_weave or self.inventories

1468

return self._find_file_ids_from_xml_inventory_lines(

1469

w.iter_lines_added_or_present_in_keys(

1470

selected_keys, pb=None),

1471

selected_keys)

1472

1473

def iter_files_bytes(self, desired_files):

1474

"""Iterate through file versions.

1475

1476

Files will not necessarily be returned in the order they occur in

1477

desired_files. No specific order is guaranteed.

1478

1479

Yields pairs of identifier, bytes_iterator. identifier is an opaque

1480

value supplied by the caller as part of desired_files. It should

1481

uniquely identify the file version in the caller's context. (Examples:

1482

an index number or a TreeTransform trans_id.)

1483

1484

bytes_iterator is an iterable of bytestrings for the file. The

1485

kind of iterable and length of the bytestrings are unspecified, but for

1486

this implementation, it is a list of bytes produced by

1487

VersionedFile.get_record_stream().

1488

1489

:param desired_files: a list of (file_id, revision_id, identifier)

1490

triples

1491

"""

1492

text_keys = {}

1493

for file_id, revision_id, callable_data in desired_files:

1494

text_keys[(file_id, revision_id)] = callable_data

1495

for record in self.texts.get_record_stream(text_keys, 'unordered', True):

1496

if record.storage_kind == 'absent':

1497

raise errors.RevisionNotPresent(record.key, self)

1498

yield text_keys[record.key], record.get_bytes_as('chunked')

1499

1500

def _generate_text_key_index(self, text_key_references=None,

1501

ancestors=None):

1502

"""Generate a new text key index for the repository.

1503

1504

This is an expensive function that will take considerable time to run.

1505

1506

:return: A dict mapping text keys ((file_id, revision_id) tuples) to a

1507

list of parents, also text keys. When a given key has no parents,

1508

the parents list will be [NULL_REVISION].

1509

"""

1510

# All revisions, to find inventory parents.

1511

if ancestors is None:

1512

graph = self.get_graph()

1513

ancestors = graph.get_parent_map(self.all_revision_ids())

1514

if text_key_references is None:

1515

text_key_references = self.find_text_key_references()

1516

pb = ui.ui_factory.nested_progress_bar()

1517

try:

1518

return self._do_generate_text_key_index(ancestors,

1519

text_key_references, pb)

1520

finally:

1521

pb.finished()

1522

1523

def _do_generate_text_key_index(self, ancestors, text_key_references, pb):

1524

"""Helper for _generate_text_key_index to avoid deep nesting."""

1525

revision_order = tsort.topo_sort(ancestors)

1526

invalid_keys = set()

1527

revision_keys = {}

1528

for revision_id in revision_order:

1529

revision_keys[revision_id] = set()

1530

text_count = len(text_key_references)

1531

# a cache of the text keys to allow reuse; costs a dict of all the

1532

# keys, but saves a 2-tuple for every child of a given key.

1533

text_key_cache = {}

1534

for text_key, valid in text_key_references.iteritems():

1535

if not valid:

1536

invalid_keys.add(text_key)

1537

else:

1538

revision_keys[text_key[1]].add(text_key)

1539

text_key_cache[text_key] = text_key

1540

del text_key_references

1541

text_index = {}

1542

text_graph = graph.Graph(graph.DictParentsProvider(text_index))

1543

NULL_REVISION = _mod_revision.NULL_REVISION

1544

# Set a cache with a size of 10 - this suffices for bzr.dev but may be

1545

# too small for large or very branchy trees. However, for 55K path

1546

# trees, it would be easy to use too much memory trivially. Ideally we

1547

# could gauge this by looking at available real memory etc, but this is

1548

# always a tricky proposition.

1549

inventory_cache = lru_cache.LRUCache(10)

1550

batch_size = 10 # should be ~150MB on a 55K path tree

1551

batch_count = len(revision_order) / batch_size + 1

1552

processed_texts = 0

1553

pb.update("Calculating text parents", processed_texts, text_count)

1554

for offset in xrange(batch_count):

1555

to_query = revision_order[offset * batch_size:(offset + 1) *

1556

batch_size]

1557

if not to_query:

1558

break

1559

for revision_id in to_query:

1560

parent_ids = ancestors[revision_id]

1561

for text_key in revision_keys[revision_id]:

1562

pb.update("Calculating text parents", processed_texts)

1563

processed_texts += 1

1564

candidate_parents = []

1565

for parent_id in parent_ids:

1566

parent_text_key = (text_key[0], parent_id)

1567

try:

1568

check_parent = parent_text_key not in \

1569

revision_keys[parent_id]

1570

except KeyError:

1571

# the parent parent_id is a ghost:

1572

check_parent = False

1573

# truncate the derived graph against this ghost.

1574

parent_text_key = None

1575

if check_parent:

1576

# look at the parent commit details inventories to

1577

# determine possible candidates in the per file graph.

1578

# TODO: cache here.

1579

try:

1580

inv = inventory_cache[parent_id]

1581

except KeyError:

1582

inv = self.revision_tree(parent_id).inventory

1583

inventory_cache[parent_id] = inv

1584

try:

1585

parent_entry = inv[text_key[0]]

1586

except (KeyError, errors.NoSuchId):

1587

parent_entry = None

1588

if parent_entry is not None:

1589

parent_text_key = (

1590

text_key[0], parent_entry.revision)

1591

else:

1592

parent_text_key = None

1593

if parent_text_key is not None:

1594

candidate_parents.append(

1595

text_key_cache[parent_text_key])

1596

parent_heads = text_graph.heads(candidate_parents)

1597

new_parents = list(parent_heads)

1598

new_parents.sort(key=lambda x:candidate_parents.index(x))

1599

if new_parents == []:

1600

new_parents = [NULL_REVISION]

1601

text_index[text_key] = new_parents

1602

1603

for text_key in invalid_keys:

1604

text_index[text_key] = [NULL_REVISION]

1605

return text_index

1606

1607

def item_keys_introduced_by(self, revision_ids, _files_pb=None):

1608

"""Get an iterable listing the keys of all the data introduced by a set

1609

of revision IDs.

1610

1611

The keys will be ordered so that the corresponding items can be safely

1612

fetched and inserted in that order.

1613

1614

:returns: An iterable producing tuples of (knit-kind, file-id,

1615

versions). knit-kind is one of 'file', 'inventory', 'signatures',

1616

'revisions'. file-id is None unless knit-kind is 'file'.

1617

"""

1618

for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):

1619

yield result

1620

del _files_pb

1621

for result in self._find_non_file_keys_to_fetch(revision_ids):

1622

yield result

1623

1624

def _find_file_keys_to_fetch(self, revision_ids, pb):

1625

# XXX: it's a bit weird to control the inventory weave caching in this

1626

# generator. Ideally the caching would be done in fetch.py I think. Or

1627

# maybe this generator should explicitly have the contract that it

1628

# should not be iterated until the previously yielded item has been

1629

# processed?

1630

inv_w = self.inventories

1631

1632

# file ids that changed

1633

file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)

1634

count = 0

1635

num_file_ids = len(file_ids)

1636

for file_id, altered_versions in file_ids.iteritems():

1637

if pb is not None:

1638

pb.update("Fetch texts", count, num_file_ids)

1639

count += 1

1640

yield ("file", file_id, altered_versions)

1641

1642

def _find_non_file_keys_to_fetch(self, revision_ids):

1643

# inventory

1644

yield ("inventory", None, revision_ids)

1645

1646

# signatures

1647

# XXX: Note ATM no callers actually pay attention to this return

1648

# instead they just use the list of revision ids and ignore

1649

# missing sigs. Consider removing this work entirely

1650

revisions_with_signatures = set(self.signatures.get_parent_map(

1651

[(r,) for r in revision_ids]))

1652

revisions_with_signatures = set(

1653

[r for (r,) in revisions_with_signatures])

1654

revisions_with_signatures.intersection_update(revision_ids)

1655

yield ("signatures", None, revisions_with_signatures)

1656

1657

# revisions

1658

yield ("revisions", None, revision_ids)

1659

1660

@needs_read_lock

1661

def get_inventory(self, revision_id):

1662

"""Get Inventory object by revision id."""

1663

return self.iter_inventories([revision_id]).next()

1664

1665

def iter_inventories(self, revision_ids, ordering=None):

1666

"""Get many inventories by revision_ids.

1667

1668

This will buffer some or all of the texts used in constructing the

1669

inventories in memory, but will only parse a single inventory at a

1670

time.

1671

1672

:param revision_ids: The expected revision ids of the inventories.

1673

:param ordering: optional ordering, e.g. 'topological'. If not

1674

specified, the order of revision_ids will be preserved (by

1675

buffering if necessary).

1676

:return: An iterator of inventories.

1677

"""

1678

if ((None in revision_ids)

1679

or (_mod_revision.NULL_REVISION in revision_ids)):

1680

raise ValueError('cannot get null revision inventory')

1681

return self._iter_inventories(revision_ids, ordering)

1682

1683

def _iter_inventories(self, revision_ids, ordering):

1684

"""single-document based inventory iteration."""

1685

inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)

1686

for text, revision_id in inv_xmls:

1687

yield self._deserialise_inventory(revision_id, text)

1688

1689

def _iter_inventory_xmls(self, revision_ids, ordering):

1690

if ordering is None:

1691

order_as_requested = True

1692

ordering = 'unordered'

1693

else:

1694

order_as_requested = False

1695

keys = [(revision_id,) for revision_id in revision_ids]

1696

if not keys:

1697

return

1698

if order_as_requested:

1699

key_iter = iter(keys)

1700

next_key = key_iter.next()

1701

stream = self.inventories.get_record_stream(keys, ordering, True)

1702

text_chunks = {}

1703

for record in stream:

1704

if record.storage_kind != 'absent':

1705

chunks = record.get_bytes_as('chunked')

1706

if order_as_requested:

1707

text_chunks[record.key] = chunks

1708

else:

1709

yield ''.join(chunks), record.key[-1]

1710

else:

1711

raise errors.NoSuchRevision(self, record.key)

1712

if order_as_requested:

1713

# Yield as many results as we can while preserving order.

1714

while next_key in text_chunks:

1715

chunks = text_chunks.pop(next_key)

1716

yield ''.join(chunks), next_key[-1]

1717

try:

1718

next_key = key_iter.next()

1719

except StopIteration:

1720

# We still want to fully consume the get_record_stream,

1721

# just in case it is not actually finished at this point

1722

next_key = None

1723

break

1724

1725

def _deserialise_inventory(self, revision_id, xml):

1726

"""Transform the xml into an inventory object.

1727

1728

:param revision_id: The expected revision id of the inventory.

1729

:param xml: A serialised inventory.

1730

"""

1731

result = self._serializer.read_inventory_from_string(xml, revision_id,

1732

entry_cache=self._inventory_entry_cache,

1733

return_from_cache=self._safe_to_return_from_cache)

1734

if result.revision_id != revision_id:

1735

raise AssertionError('revision id mismatch %s != %s' % (

1736

result.revision_id, revision_id))

1737

return result

1738

1739

def get_serializer_format(self):

1740

return self._serializer.format_num

1741

1742

@needs_read_lock

1743

def _get_inventory_xml(self, revision_id):

1744

"""Get serialized inventory as a string."""

1745

texts = self._iter_inventory_xmls([revision_id], 'unordered')

1746

try:

1747

text, revision_id = texts.next()

1748

except StopIteration:

1749

raise errors.HistoryMissing(self, 'inventory', revision_id)

1750

return text

1751

1752

@needs_read_lock

1753

def revision_tree(self, revision_id):

1754

"""Return Tree for a revision on this branch.

1755

1756

`revision_id` may be NULL_REVISION for the empty tree revision.

1757

"""

1758

revision_id = _mod_revision.ensure_null(revision_id)

1759

# TODO: refactor this to use an existing revision object

1760

# so we don't need to read it in twice.

1761

if revision_id == _mod_revision.NULL_REVISION:

1762

return InventoryRevisionTree(self,

1763

Inventory(root_id=None), _mod_revision.NULL_REVISION)

1764

else:

1765

inv = self.get_inventory(revision_id)

1766

return InventoryRevisionTree(self, inv, revision_id)

1767

1768

def revision_trees(self, revision_ids):

1769

"""Return Trees for revisions in this repository.

1770

1771

:param revision_ids: a sequence of revision-ids;

1772

a revision-id may not be None or 'null:'

1773

"""

1774

inventories = self.iter_inventories(revision_ids)

1775

for inv in inventories:

1776

yield InventoryRevisionTree(self, inv, inv.revision_id)

1777

1778

def _filtered_revision_trees(self, revision_ids, file_ids):

1779

"""Return Tree for a revision on this branch with only some files.

1780

1781

:param revision_ids: a sequence of revision-ids;

1782

a revision-id may not be None or 'null:'

1783

:param file_ids: if not None, the result is filtered

1784

so that only those file-ids, their parents and their

1785

children are included.

1786

"""

1787

inventories = self.iter_inventories(revision_ids)

1788

for inv in inventories:

1789

# Should we introduce a FilteredRevisionTree class rather

1790

# than pre-filter the inventory here?

1791

filtered_inv = inv.filter(file_ids)

1792

yield InventoryRevisionTree(self, filtered_inv, filtered_inv.revision_id)

1793

1794

def get_parent_map(self, revision_ids):

1795

"""See graph.StackedParentsProvider.get_parent_map"""

1796

# revisions index works in keys; this just works in revisions

1797

# therefore wrap and unwrap

1798

query_keys = []

1799

result = {}

1800

for revision_id in revision_ids:

1801

if revision_id == _mod_revision.NULL_REVISION:

1802

result[revision_id] = ()

1803

elif revision_id is None:

1804

raise ValueError('get_parent_map(None) is not valid')

1805

else:

1806

query_keys.append((revision_id ,))

1807

for ((revision_id,), parent_keys) in \

1808

self.revisions.get_parent_map(query_keys).iteritems():

1809

if parent_keys:

1810

result[revision_id] = tuple([parent_revid

1811

for (parent_revid,) in parent_keys])

1812

else:

1813

result[revision_id] = (_mod_revision.NULL_REVISION,)

1814

return result

1815

1816

@needs_read_lock

1817

def get_known_graph_ancestry(self, revision_ids):

1818

"""Return the known graph for a set of revision ids and their ancestors.

1819

"""

1820

st = static_tuple.StaticTuple

1821

revision_keys = [st(r_id).intern() for r_id in revision_ids]

1822

known_graph = self.revisions.get_known_graph_ancestry(revision_keys)

1823

return graph.GraphThunkIdsToKeys(known_graph)

1824

1825

@needs_read_lock

1826

def get_file_graph(self):

1827

"""Return the graph walker for text revisions."""

1828

return graph.Graph(self.texts)

1829

1830

def _get_versioned_file_checker(self, text_key_references=None,

1831

ancestors=None):

1832

"""Return an object suitable for checking versioned files.

1833

1834

:param text_key_references: if non-None, an already built

1835

dictionary mapping text keys ((fileid, revision_id) tuples)

1836

to whether they were referred to by the inventory of the

1837

revision_id that they contain. If None, this will be

1838

calculated.

1839

:param ancestors: Optional result from

1840

self.get_graph().get_parent_map(self.all_revision_ids()) if already

1841

available.

1842

"""

1843

return _VersionedFileChecker(self,

1844

text_key_references=text_key_references, ancestors=ancestors)

1845

1846

@needs_read_lock

1847

def has_signature_for_revision_id(self, revision_id):

1848

"""Query for a revision signature for revision_id in the repository."""

1849

if not self.has_revision(revision_id):

1850

raise errors.NoSuchRevision(self, revision_id)

1851

sig_present = (1 == len(

1852

self.signatures.get_parent_map([(revision_id,)])))

1853

return sig_present

1854

1855

@needs_read_lock

1856

def get_signature_text(self, revision_id):

1857

"""Return the text for a signature."""

1858

stream = self.signatures.get_record_stream([(revision_id,)],

1859

'unordered', True)

1860

record = stream.next()

1861

if record.storage_kind == 'absent':

1862

raise errors.NoSuchRevision(self, revision_id)

1863

return record.get_bytes_as('fulltext')

1864

1865

@needs_read_lock

1866

def _check(self, revision_ids, callback_refs, check_repo):

1867

result = check.VersionedFileCheck(self, check_repo=check_repo)

1868

result.check(callback_refs)

1869

return result

1870

1871

def _find_inconsistent_revision_parents(self, revisions_iterator=None):

1872

"""Find revisions with different parent lists in the revision object

1873

and in the index graph.

1874

1875

:param revisions_iterator: None, or an iterator of (revid,

1876

Revision-or-None). This iterator controls the revisions checked.

1877

:returns: an iterator yielding tuples of (revison-id, parents-in-index,

1878

parents-in-revision).

1879

"""

1880

if not self.is_locked():

1881

raise AssertionError()

1882

vf = self.revisions

1883

if revisions_iterator is None:

1884

revisions_iterator = self._iter_revisions(None)

1885

for revid, revision in revisions_iterator:

1886

if revision is None:

1887

pass

1888

parent_map = vf.get_parent_map([(revid,)])

1889

parents_according_to_index = tuple(parent[-1] for parent in

1890

parent_map[(revid,)])

1891

parents_according_to_revision = tuple(revision.parent_ids)

1892

if parents_according_to_index != parents_according_to_revision:

1893

yield (revid, parents_according_to_index,

1894

parents_according_to_revision)

1895

1896

def _check_for_inconsistent_revision_parents(self):

1897

inconsistencies = list(self._find_inconsistent_revision_parents())

1898

if inconsistencies:

1899

raise errors.BzrCheckError(

1900

"Revision knit has inconsistent parents.")

1901

1902

def _get_sink(self):

1903

"""Return a sink for streaming into this repository."""

1904

return StreamSink(self)

1905

1906

def _get_source(self, to_format):

1907

"""Return a source for streaming from this repository."""

1908

return StreamSource(self, to_format)

1909

1910

1911

class MetaDirVersionedFileRepository(MetaDirRepository,

1912

VersionedFileRepository):

1913

"""Repositories in a meta-dir, that work via versioned file objects."""

1914

1915

def __init__(self, _format, a_bzrdir, control_files):

1916

super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir,

1917

control_files)

1918

1919

1920

class MetaDirVersionedFileRepositoryFormat(MetaDirRepositoryFormat,

1921

VersionedFileRepositoryFormat):

1922

"""Base class for repository formats using versioned files in metadirs."""

1923

1924

1925

class StreamSink(object):

1926

"""An object that can insert a stream into a repository.

1927

1928

This interface handles the complexity of reserialising inventories and

1929

revisions from different formats, and allows unidirectional insertion into

1930

stacked repositories without looking for the missing basis parents

1931

beforehand.

1932

"""

1933

1934

def __init__(self, target_repo):

1935

self.target_repo = target_repo

1936

1937

def insert_stream(self, stream, src_format, resume_tokens):

1938

"""Insert a stream's content into the target repository.

1939

1940

:param src_format: a bzr repository format.

1941

1942

:return: a list of resume tokens and an iterable of keys additional

1943

items required before the insertion can be completed.

1944

"""

1945

self.target_repo.lock_write()

1946

try:

1947

if resume_tokens:

1948

self.target_repo.resume_write_group(resume_tokens)

1949

is_resume = True

1950

else:

1951

self.target_repo.start_write_group()

1952

is_resume = False

1953

try:

1954

# locked_insert_stream performs a commit|suspend.

1955

missing_keys = self.insert_stream_without_locking(stream,

1956

src_format, is_resume)

1957

if missing_keys:

1958

# suspend the write group and tell the caller what we is

1959

# missing. We know we can suspend or else we would not have

1960

# entered this code path. (All repositories that can handle

1961

# missing keys can handle suspending a write group).

1962

write_group_tokens = self.target_repo.suspend_write_group()

1963

return write_group_tokens, missing_keys

1964

hint = self.target_repo.commit_write_group()

1965

to_serializer = self.target_repo._format._serializer

1966

src_serializer = src_format._serializer

1967

if (to_serializer != src_serializer and

1968

self.target_repo._format.pack_compresses):

1969

self.target_repo.pack(hint=hint)

1970

return [], set()

1971

except:

1972

self.target_repo.abort_write_group(suppress_errors=True)

1973

raise

1974

finally:

1975

self.target_repo.unlock()

1976

1977

def insert_stream_without_locking(self, stream, src_format,

1978

is_resume=False):

1979

"""Insert a stream's content into the target repository.

1980

1981

This assumes that you already have a locked repository and an active

1982

write group.

1983

1984

:param src_format: a bzr repository format.

1985

:param is_resume: Passed down to get_missing_parent_inventories to

1986

indicate if we should be checking for missing texts at the same

1987

time.

1988

1989

:return: A set of keys that are missing.

1990

"""

1991

if not self.target_repo.is_write_locked():

1992

raise errors.ObjectNotLocked(self)

1993

if not self.target_repo.is_in_write_group():

1994

raise errors.BzrError('you must already be in a write group')

1995

to_serializer = self.target_repo._format._serializer

1996

src_serializer = src_format._serializer

1997

new_pack = None

1998

if to_serializer == src_serializer:

1999

# If serializers match and the target is a pack repository, set the

2000

# write cache size on the new pack. This avoids poor performance

2001

# on transports where append is unbuffered (such as

2002

# RemoteTransport). This is safe to do because nothing should read

2003

# back from the target repository while a stream with matching

2004

# serialization is being inserted.

2005

# The exception is that a delta record from the source that should

2006

# be a fulltext may need to be expanded by the target (see

2007

# test_fetch_revisions_with_deltas_into_pack); but we take care to

2008

# explicitly flush any buffered writes first in that rare case.

2009

try:

2010

new_pack = self.target_repo._pack_collection._new_pack

2011

except AttributeError:

2012

# Not a pack repository

2013

pass

2014

else:

2015

new_pack.set_write_cache_size(1024*1024)

2016

for substream_type, substream in stream:

2017

if 'stream' in debug.debug_flags:

2018

mutter('inserting substream: %s', substream_type)

2019

if substream_type == 'texts':

2020

self.target_repo.texts.insert_record_stream(substream)

2021

elif substream_type == 'inventories':

2022

if src_serializer == to_serializer:

2023

self.target_repo.inventories.insert_record_stream(

2024

substream)

2025

else:

2026

self._extract_and_insert_inventories(

2027

substream, src_serializer)

2028

elif substream_type == 'inventory-deltas':

2029

self._extract_and_insert_inventory_deltas(

2030

substream, src_serializer)

2031

elif substream_type == 'chk_bytes':

2032

# XXX: This doesn't support conversions, as it assumes the

2033

# conversion was done in the fetch code.

2034

self.target_repo.chk_bytes.insert_record_stream(substream)

2035

elif substream_type == 'revisions':

2036

# This may fallback to extract-and-insert more often than

2037

# required if the serializers are different only in terms of

2038

# the inventory.

2039

if src_serializer == to_serializer:

2040

self.target_repo.revisions.insert_record_stream(substream)

2041

else:

2042

self._extract_and_insert_revisions(substream,

2043

src_serializer)

2044

elif substream_type == 'signatures':

2045

self.target_repo.signatures.insert_record_stream(substream)

2046

else:

2047

raise AssertionError('kaboom! %s' % (substream_type,))

2048

# Done inserting data, and the missing_keys calculations will try to

2049

# read back from the inserted data, so flush the writes to the new pack

2050

# (if this is pack format).

2051

if new_pack is not None:

2052

new_pack._write_data('', flush=True)

2053

# Find all the new revisions (including ones from resume_tokens)

2054

missing_keys = self.target_repo.get_missing_parent_inventories(

2055

check_for_missing_texts=is_resume)

2056

try:

2057

for prefix, versioned_file in (

2058

('texts', self.target_repo.texts),

2059

('inventories', self.target_repo.inventories),

2060

('revisions', self.target_repo.revisions),

2061

('signatures', self.target_repo.signatures),

2062

('chk_bytes', self.target_repo.chk_bytes),

2063

):

2064

if versioned_file is None:

2065

continue

2066

# TODO: key is often going to be a StaticTuple object

2067

# I don't believe we can define a method by which

2068

# (prefix,) + StaticTuple will work, though we could

2069

# define a StaticTuple.sq_concat that would allow you to

2070

# pass in either a tuple or a StaticTuple as the second

2071

# object, so instead we could have:

2072

# StaticTuple(prefix) + key here...

2073

missing_keys.update((prefix,) + key for key in

2074

versioned_file.get_missing_compression_parent_keys())

2075

except NotImplementedError:

2076

# cannot even attempt suspending, and missing would have failed

2077

# during stream insertion.

2078

missing_keys = set()

2079

return missing_keys

2080

2081

def _extract_and_insert_inventory_deltas(self, substream, serializer):

2082

target_rich_root = self.target_repo._format.rich_root_data

2083

target_tree_refs = self.target_repo._format.supports_tree_reference

2084

for record in substream:

2085

# Insert the delta directly

2086

inventory_delta_bytes = record.get_bytes_as('fulltext')

2087

deserialiser = inventory_delta.InventoryDeltaDeserializer()

2088

try:

2089

parse_result = deserialiser.parse_text_bytes(

2090

inventory_delta_bytes)

2091

except inventory_delta.IncompatibleInventoryDelta, err:

2092

mutter("Incompatible delta: %s", err.msg)

2093

raise errors.IncompatibleRevision(self.target_repo._format)

2094

basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result

2095

revision_id = new_id

2096

parents = [key[0] for key in record.parents]

2097

self.target_repo.add_inventory_by_delta(

2098

basis_id, inv_delta, revision_id, parents)

2099

2100

def _extract_and_insert_inventories(self, substream, serializer,

2101

parse_delta=None):

2102

"""Generate a new inventory versionedfile in target, converting data.

2103

2104

The inventory is retrieved from the source, (deserializing it), and

2105

stored in the target (reserializing it in a different format).

2106

"""

2107

target_rich_root = self.target_repo._format.rich_root_data

2108

target_tree_refs = self.target_repo._format.supports_tree_reference

2109

for record in substream:

2110

# It's not a delta, so it must be a fulltext in the source

2111

# serializer's format.

2112

bytes = record.get_bytes_as('fulltext')

2113

revision_id = record.key[0]

2114

inv = serializer.read_inventory_from_string(bytes, revision_id)

2115

parents = [key[0] for key in record.parents]

2116

self.target_repo.add_inventory(revision_id, inv, parents)

2117

# No need to keep holding this full inv in memory when the rest of

2118

# the substream is likely to be all deltas.

2119

del inv

2120

2121

def _extract_and_insert_revisions(self, substream, serializer):

2122

for record in substream:

2123

bytes = record.get_bytes_as('fulltext')

2124

revision_id = record.key[0]

2125

rev = serializer.read_revision_from_string(bytes)

2126

if rev.revision_id != revision_id:

2127

raise AssertionError('wtf: %s != %s' % (rev, revision_id))

2128

self.target_repo.add_revision(revision_id, rev)

2129

2130

def finished(self):

2131

if self.target_repo._format._fetch_reconcile:

2132

self.target_repo.reconcile()

2133

2134

2135

class StreamSource(object):

2136

"""A source of a stream for fetching between repositories."""

2137

2138

def __init__(self, from_repository, to_format):

2139

"""Create a StreamSource streaming from from_repository."""

2140

self.from_repository = from_repository

2141

self.to_format = to_format

2142

self._record_counter = RecordCounter()

2143

2144

def delta_on_metadata(self):

2145

"""Return True if delta's are permitted on metadata streams.

2146

2147

That is on revisions and signatures.

2148

"""

2149

src_serializer = self.from_repository._format._serializer

2150

target_serializer = self.to_format._serializer

2151

return (self.to_format._fetch_uses_deltas and

2152

src_serializer == target_serializer)

2153

2154

def _fetch_revision_texts(self, revs):

2155

# fetch signatures first and then the revision texts

2156

# may need to be a InterRevisionStore call here.

2157

from_sf = self.from_repository.signatures

2158

# A missing signature is just skipped.

2159

keys = [(rev_id,) for rev_id in revs]

2160

signatures = versionedfile.filter_absent(from_sf.get_record_stream(

2161

keys,

2162

self.to_format._fetch_order,

2163

not self.to_format._fetch_uses_deltas))

2164

# If a revision has a delta, this is actually expanded inside the

2165

# insert_record_stream code now, which is an alternate fix for

2166

# bug #261339

2167

from_rf = self.from_repository.revisions

2168

revisions = from_rf.get_record_stream(

2169

keys,

2170

self.to_format._fetch_order,

2171

not self.delta_on_metadata())

2172

return [('signatures', signatures), ('revisions', revisions)]

2173

2174

def _generate_root_texts(self, revs):

2175

"""This will be called by get_stream between fetching weave texts and

2176

fetching the inventory weave.

2177

"""

2178

if self._rich_root_upgrade():

2179

return _mod_fetch.Inter1and2Helper(

2180

self.from_repository).generate_root_texts(revs)

2181

else:

2182

return []

2183

2184

def get_stream(self, search):

2185

phase = 'file'

2186

revs = search.get_keys()

2187

graph = self.from_repository.get_graph()

2188

revs = tsort.topo_sort(graph.get_parent_map(revs))

2189

data_to_fetch = self.from_repository.item_keys_introduced_by(revs)

2190

text_keys = []

2191

for knit_kind, file_id, revisions in data_to_fetch:

2192

if knit_kind != phase:

2193

phase = knit_kind

2194

# Make a new progress bar for this phase

2195

if knit_kind == "file":

2196

# Accumulate file texts

2197

text_keys.extend([(file_id, revision) for revision in

2198

revisions])

2199

elif knit_kind == "inventory":

2200

# Now copy the file texts.

2201

from_texts = self.from_repository.texts

2202

yield ('texts', from_texts.get_record_stream(

2203

text_keys, self.to_format._fetch_order,

2204

not self.to_format._fetch_uses_deltas))

2205

# Cause an error if a text occurs after we have done the

2206

# copy.

2207

text_keys = None

2208

# Before we process the inventory we generate the root

2209

# texts (if necessary) so that the inventories references

2210

# will be valid.

2211

for _ in self._generate_root_texts(revs):

2212

yield _

2213

# we fetch only the referenced inventories because we do not

2214

# know for unselected inventories whether all their required

2215

# texts are present in the other repository - it could be

2216

# corrupt.

2217

for info in self._get_inventory_stream(revs):

2218

yield info

2219

elif knit_kind == "signatures":

2220

# Nothing to do here; this will be taken care of when

2221

# _fetch_revision_texts happens.

2222

pass

2223

elif knit_kind == "revisions":

2224

for record in self._fetch_revision_texts(revs):

2225

yield record

2226

else:

2227

raise AssertionError("Unknown knit kind %r" % knit_kind)

2228

2229

def get_stream_for_missing_keys(self, missing_keys):

2230

# missing keys can only occur when we are byte copying and not

2231

# translating (because translation means we don't send

2232

# unreconstructable deltas ever).

2233

keys = {}

2234

keys['texts'] = set()

2235

keys['revisions'] = set()

2236

keys['inventories'] = set()

2237

keys['chk_bytes'] = set()

2238

keys['signatures'] = set()

2239

for key in missing_keys:

2240

keys[key[0]].add(key[1:])

2241

if len(keys['revisions']):

2242

# If we allowed copying revisions at this point, we could end up

2243

# copying a revision without copying its required texts: a

2244

# violation of the requirements for repository integrity.

2245

raise AssertionError(

2246

'cannot copy revisions to fill in missing deltas %s' % (

2247

keys['revisions'],))

2248

for substream_kind, keys in keys.iteritems():

2249

vf = getattr(self.from_repository, substream_kind)

2250

if vf is None and keys:

2251

raise AssertionError(

2252

"cannot fill in keys for a versioned file we don't"

2253

" have: %s needs %s" % (substream_kind, keys))

2254

if not keys:

2255

# No need to stream something we don't have

2256

continue

2257

if substream_kind == 'inventories':

2258

# Some missing keys are genuinely ghosts, filter those out.

2259

present = self.from_repository.inventories.get_parent_map(keys)

2260

revs = [key[0] for key in present]

2261

# Get the inventory stream more-or-less as we do for the

2262

# original stream; there's no reason to assume that records

2263

# direct from the source will be suitable for the sink. (Think

2264

# e.g. 2a -> 1.9-rich-root).

2265

for info in self._get_inventory_stream(revs, missing=True):

2266

yield info

2267

continue

2268

2269

# Ask for full texts always so that we don't need more round trips

2270

# after this stream.

2271

# Some of the missing keys are genuinely ghosts, so filter absent

2272

# records. The Sink is responsible for doing another check to

2273

# ensure that ghosts don't introduce missing data for future

2274

# fetches.

2275

stream = versionedfile.filter_absent(vf.get_record_stream(keys,

2276

self.to_format._fetch_order, True))

2277

yield substream_kind, stream

2278

2279

def inventory_fetch_order(self):

2280

if self._rich_root_upgrade():

2281

return 'topological'

2282

else:

2283

return self.to_format._fetch_order

2284

2285

def _rich_root_upgrade(self):

2286

return (not self.from_repository._format.rich_root_data and

2287

self.to_format.rich_root_data)

2288

2289

def _get_inventory_stream(self, revision_ids, missing=False):

2290

from_format = self.from_repository._format

2291

if (from_format.supports_chks and self.to_format.supports_chks and

2292

from_format.network_name() == self.to_format.network_name()):

2293

raise AssertionError(

2294

"this case should be handled by GroupCHKStreamSource")

2295

elif 'forceinvdeltas' in debug.debug_flags:

2296

return self._get_convertable_inventory_stream(revision_ids,

2297

delta_versus_null=missing)

2298

elif from_format.network_name() == self.to_format.network_name():

2299

# Same format.

2300

return self._get_simple_inventory_stream(revision_ids,

2301

missing=missing)

2302

elif (not from_format.supports_chks and not self.to_format.supports_chks

2303

and from_format._serializer == self.to_format._serializer):

2304

# Essentially the same format.

2305

return self._get_simple_inventory_stream(revision_ids,

2306

missing=missing)

2307

else:

2308

# Any time we switch serializations, we want to use an

2309

# inventory-delta based approach.

2310

return self._get_convertable_inventory_stream(revision_ids,

2311

delta_versus_null=missing)

2312

2313

def _get_simple_inventory_stream(self, revision_ids, missing=False):

2314

# NB: This currently reopens the inventory weave in source;

2315

# using a single stream interface instead would avoid this.

2316

from_weave = self.from_repository.inventories

2317

if missing:

2318

delta_closure = True

2319

else:

2320

delta_closure = not self.delta_on_metadata()

2321

yield ('inventories', from_weave.get_record_stream(

2322

[(rev_id,) for rev_id in revision_ids],

2323

self.inventory_fetch_order(), delta_closure))

2324

2325

def _get_convertable_inventory_stream(self, revision_ids,

2326

delta_versus_null=False):

2327

# The two formats are sufficiently different that there is no fast

2328

# path, so we need to send just inventorydeltas, which any

2329

# sufficiently modern client can insert into any repository.

2330

# The StreamSink code expects to be able to

2331

# convert on the target, so we need to put bytes-on-the-wire that can

2332

# be converted. That means inventory deltas (if the remote is <1.19,

2333

# RemoteStreamSink will fallback to VFS to insert the deltas).

2334

yield ('inventory-deltas',

2335

self._stream_invs_as_deltas(revision_ids,

2336

delta_versus_null=delta_versus_null))

2337

2338

def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):

2339

"""Return a stream of inventory-deltas for the given rev ids.

2340

2341

:param revision_ids: The list of inventories to transmit

2342

:param delta_versus_null: Don't try to find a minimal delta for this

2343

entry, instead compute the delta versus the NULL_REVISION. This

2344

effectively streams a complete inventory. Used for stuff like

2345

filling in missing parents, etc.

2346

"""

2347

from_repo = self.from_repository

2348

revision_keys = [(rev_id,) for rev_id in revision_ids]

2349

parent_map = from_repo.inventories.get_parent_map(revision_keys)

2350

# XXX: possibly repos could implement a more efficient iter_inv_deltas

2351

# method...

2352

inventories = self.from_repository.iter_inventories(

2353

revision_ids, 'topological')

2354

format = from_repo._format

2355

invs_sent_so_far = set([_mod_revision.NULL_REVISION])

2356

inventory_cache = lru_cache.LRUCache(50)

2357

null_inventory = from_repo.revision_tree(

2358

_mod_revision.NULL_REVISION).inventory

2359

# XXX: ideally the rich-root/tree-refs flags would be per-revision, not

2360

# per-repo (e.g. streaming a non-rich-root revision out of a rich-root

2361

# repo back into a non-rich-root repo ought to be allowed)

2362

serializer = inventory_delta.InventoryDeltaSerializer(

2363

versioned_root=format.rich_root_data,

2364

tree_references=format.supports_tree_reference)

2365

for inv in inventories:

2366

key = (inv.revision_id,)

2367

parent_keys = parent_map.get(key, ())

2368

delta = None

2369

if not delta_versus_null and parent_keys:

2370

# The caller did not ask for complete inventories and we have

2371

# some parents that we can delta against. Make a delta against

2372

# each parent so that we can find the smallest.

2373

parent_ids = [parent_key[0] for parent_key in parent_keys]

2374

for parent_id in parent_ids:

2375

if parent_id not in invs_sent_so_far:

2376

# We don't know that the remote side has this basis, so

2377

# we can't use it.

2378

continue

2379

if parent_id == _mod_revision.NULL_REVISION:

2380

parent_inv = null_inventory

2381

else:

2382

parent_inv = inventory_cache.get(parent_id, None)

2383

if parent_inv is None:

2384

parent_inv = from_repo.get_inventory(parent_id)

2385

candidate_delta = inv._make_delta(parent_inv)

2386

if (delta is None or

2387

len(delta) > len(candidate_delta)):

2388

delta = candidate_delta

2389

basis_id = parent_id

2390

if delta is None:

2391

# Either none of the parents ended up being suitable, or we

2392

# were asked to delta against NULL

2393

basis_id = _mod_revision.NULL_REVISION

2394

delta = inv._make_delta(null_inventory)

2395

invs_sent_so_far.add(inv.revision_id)

2396

inventory_cache[inv.revision_id] = inv

2397

delta_serialized = ''.join(

2398

serializer.delta_to_lines(basis_id, key[-1], delta))

2399

yield versionedfile.FulltextContentFactory(

2400

key, parent_keys, None, delta_serialized)

2401

2402

2403

class _VersionedFileChecker(object):

2404

2405

def __init__(self, repository, text_key_references=None, ancestors=None):

2406

self.repository = repository

2407

self.text_index = self.repository._generate_text_key_index(

2408

text_key_references=text_key_references, ancestors=ancestors)

2409

2410

def calculate_file_version_parents(self, text_key):

2411

"""Calculate the correct parents for a file version according to

2412

the inventories.

2413

"""

2414

parent_keys = self.text_index[text_key]

2415

if parent_keys == [_mod_revision.NULL_REVISION]:

2416

return ()

2417

return tuple(parent_keys)

2418

2419

def check_file_version_parents(self, texts, progress_bar=None):

2420

"""Check the parents stored in a versioned file are correct.

2421

2422

It also detects file versions that are not referenced by their

2423

corresponding revision's inventory.

2424

2425

:returns: A tuple of (wrong_parents, dangling_file_versions).

2426

wrong_parents is a dict mapping {revision_id: (stored_parents,

2427

correct_parents)} for each revision_id where the stored parents

2428

are not correct. dangling_file_versions is a set of (file_id,

2429

revision_id) tuples for versions that are present in this versioned

2430

file, but not used by the corresponding inventory.

2431

"""

2432

local_progress = None

2433

if progress_bar is None:

2434

local_progress = ui.ui_factory.nested_progress_bar()

2435

progress_bar = local_progress

2436

try:

2437

return self._check_file_version_parents(texts, progress_bar)

2438

finally:

2439

if local_progress:

2440

local_progress.finished()

2441

2442

def _check_file_version_parents(self, texts, progress_bar):

2443

"""See check_file_version_parents."""

2444

wrong_parents = {}

2445

self.file_ids = set([file_id for file_id, _ in

2446

self.text_index.iterkeys()])

2447

# text keys is now grouped by file_id

2448

n_versions = len(self.text_index)

2449

progress_bar.update('loading text store', 0, n_versions)

2450

parent_map = self.repository.texts.get_parent_map(self.text_index)

2451

# On unlistable transports this could well be empty/error...

2452

text_keys = self.repository.texts.keys()

2453

unused_keys = frozenset(text_keys) - set(self.text_index)

2454

for num, key in enumerate(self.text_index.iterkeys()):

2455

progress_bar.update('checking text graph', num, n_versions)

2456

correct_parents = self.calculate_file_version_parents(key)

2457

try:

2458

knit_parents = parent_map[key]

2459

except errors.RevisionNotPresent:

2460

# Missing text!

2461

knit_parents = None

2462

if correct_parents != knit_parents:

2463

wrong_parents[key] = (knit_parents, correct_parents)

2464

return wrong_parents, unused_keys

2465

2466

2467

class InterDifferingSerializer(InterRepository):

2468

2469

@classmethod

2470

def _get_repo_format_to_test(self):

2471

return None

2472

2473

@staticmethod

2474

def is_compatible(source, target):

2475

if not source._format.supports_full_versioned_files:

2476

return False

2477

if not target._format.supports_full_versioned_files:

2478

return False

2479

# This is redundant with format.check_conversion_target(), however that

2480

# raises an exception, and we just want to say "False" as in we won't

2481

# support converting between these formats.

2482

if 'IDS_never' in debug.debug_flags:

2483

return False

2484

if source.supports_rich_root() and not target.supports_rich_root():

2485

return False

2486

if (source._format.supports_tree_reference

2487

and not target._format.supports_tree_reference):

2488

return False

2489

if target._fallback_repositories and target._format.supports_chks:

2490

# IDS doesn't know how to copy CHKs for the parent inventories it

2491

# adds to stacked repos.

2492

return False

2493

if 'IDS_always' in debug.debug_flags:

2494

return True

2495

# Only use this code path for local source and target. IDS does far

2496

# too much IO (both bandwidth and roundtrips) over a network.

2497

if not source.bzrdir.transport.base.startswith('file:///'):

2498

return False

2499

if not target.bzrdir.transport.base.startswith('file:///'):

2500

return False

2501

return True

2502

2503

def _get_trees(self, revision_ids, cache):

2504

possible_trees = []

2505

for rev_id in revision_ids:

2506

if rev_id in cache:

2507

possible_trees.append((rev_id, cache[rev_id]))

2508

else:

2509

# Not cached, but inventory might be present anyway.

2510

try:

2511

tree = self.source.revision_tree(rev_id)

2512

except errors.NoSuchRevision:

2513

# Nope, parent is ghost.

2514

pass

2515

else:

2516

cache[rev_id] = tree

2517

possible_trees.append((rev_id, tree))

2518

return possible_trees

2519

2520

def _get_delta_for_revision(self, tree, parent_ids, possible_trees):

2521

"""Get the best delta and base for this revision.

2522

2523

:return: (basis_id, delta)

2524

"""

2525

deltas = []

2526

# Generate deltas against each tree, to find the shortest.

2527

texts_possibly_new_in_tree = set()

2528

for basis_id, basis_tree in possible_trees:

2529

delta = tree.inventory._make_delta(basis_tree.inventory)

2530

for old_path, new_path, file_id, new_entry in delta:

2531

if new_path is None:

2532

# This file_id isn't present in the new rev, so we don't

2533

# care about it.

2534

continue

2535

if not new_path:

2536

# Rich roots are handled elsewhere...

2537

continue

2538

kind = new_entry.kind

2539

if kind != 'directory' and kind != 'file':

2540

# No text record associated with this inventory entry.

2541

continue

2542

# This is a directory or file that has changed somehow.

2543

texts_possibly_new_in_tree.add((file_id, new_entry.revision))

2544

deltas.append((len(delta), basis_id, delta))

2545

deltas.sort()

2546

return deltas[0][1:]

2547

2548

def _fetch_parent_invs_for_stacking(self, parent_map, cache):

2549

"""Find all parent revisions that are absent, but for which the

2550

inventory is present, and copy those inventories.

2551

2552

This is necessary to preserve correctness when the source is stacked

2553

without fallbacks configured. (Note that in cases like upgrade the

2554

source may be not have _fallback_repositories even though it is

2555

stacked.)

2556

"""

2557

parent_revs = set()

2558

for parents in parent_map.values():

2559

parent_revs.update(parents)

2560

present_parents = self.source.get_parent_map(parent_revs)

2561

absent_parents = set(parent_revs).difference(present_parents)

2562

parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(

2563

(rev_id,) for rev_id in absent_parents)

2564

parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]

2565

for parent_tree in self.source.revision_trees(parent_inv_ids):

2566

current_revision_id = parent_tree.get_revision_id()

2567

parents_parents_keys = parent_invs_keys_for_stacking[

2568

(current_revision_id,)]

2569

parents_parents = [key[-1] for key in parents_parents_keys]

2570

basis_id = _mod_revision.NULL_REVISION

2571

basis_tree = self.source.revision_tree(basis_id)

2572

delta = parent_tree.inventory._make_delta(basis_tree.inventory)

2573

self.target.add_inventory_by_delta(

2574

basis_id, delta, current_revision_id, parents_parents)

2575

cache[current_revision_id] = parent_tree

2576

2577

def _fetch_batch(self, revision_ids, basis_id, cache):

2578

"""Fetch across a few revisions.

2579

2580

:param revision_ids: The revisions to copy

2581

:param basis_id: The revision_id of a tree that must be in cache, used

2582

as a basis for delta when no other base is available

2583

:param cache: A cache of RevisionTrees that we can use.

2584

:return: The revision_id of the last converted tree. The RevisionTree

2585

for it will be in cache

2586

"""

2587

# Walk though all revisions; get inventory deltas, copy referenced

2588

# texts that delta references, insert the delta, revision and

2589

# signature.

2590

root_keys_to_create = set()

2591

text_keys = set()

2592

pending_deltas = []

2593

pending_revisions = []

2594

parent_map = self.source.get_parent_map(revision_ids)

2595

self._fetch_parent_invs_for_stacking(parent_map, cache)

2596

self.source._safe_to_return_from_cache = True

2597

for tree in self.source.revision_trees(revision_ids):

2598

# Find a inventory delta for this revision.

2599

# Find text entries that need to be copied, too.

2600

current_revision_id = tree.get_revision_id()

2601

parent_ids = parent_map.get(current_revision_id, ())

2602

parent_trees = self._get_trees(parent_ids, cache)

2603

possible_trees = list(parent_trees)

2604

if len(possible_trees) == 0:

2605

# There either aren't any parents, or the parents are ghosts,

2606

# so just use the last converted tree.

2607

possible_trees.append((basis_id, cache[basis_id]))

2608

basis_id, delta = self._get_delta_for_revision(tree, parent_ids,

2609

possible_trees)

2610

revision = self.source.get_revision(current_revision_id)

2611

pending_deltas.append((basis_id, delta,

2612

current_revision_id, revision.parent_ids))

2613

if self._converting_to_rich_root:

2614

self._revision_id_to_root_id[current_revision_id] = \

2615

tree.get_root_id()

2616

# Determine which texts are in present in this revision but not in

2617

# any of the available parents.

2618

texts_possibly_new_in_tree = set()

2619

for old_path, new_path, file_id, entry in delta:

2620

if new_path is None:

2621

# This file_id isn't present in the new rev

2622

continue

2623

if not new_path:

2624

# This is the root

2625

if not self.target.supports_rich_root():

2626

# The target doesn't support rich root, so we don't

2627

# copy

2628

continue

2629

if self._converting_to_rich_root:

2630

# This can't be copied normally, we have to insert

2631

# it specially

2632

root_keys_to_create.add((file_id, entry.revision))

2633

continue

2634

kind = entry.kind

2635

texts_possibly_new_in_tree.add((file_id, entry.revision))

2636

for basis_id, basis_tree in possible_trees:

2637

basis_inv = basis_tree.inventory

2638

for file_key in list(texts_possibly_new_in_tree):

2639

file_id, file_revision = file_key

2640

try:

2641

entry = basis_inv[file_id]

2642

except errors.NoSuchId:

2643

continue

2644

if entry.revision == file_revision:

2645

texts_possibly_new_in_tree.remove(file_key)

2646

text_keys.update(texts_possibly_new_in_tree)

2647

pending_revisions.append(revision)

2648

cache[current_revision_id] = tree

2649

basis_id = current_revision_id

2650

self.source._safe_to_return_from_cache = False

2651

# Copy file texts

2652

from_texts = self.source.texts

2653

to_texts = self.target.texts

2654

if root_keys_to_create:

2655

root_stream = _mod_fetch._new_root_data_stream(

2656

root_keys_to_create, self._revision_id_to_root_id, parent_map,

2657

self.source)

2658

to_texts.insert_record_stream(root_stream)

2659

to_texts.insert_record_stream(from_texts.get_record_stream(

2660

text_keys, self.target._format._fetch_order,

2661

not self.target._format._fetch_uses_deltas))

2662

# insert inventory deltas

2663

for delta in pending_deltas:

2664

self.target.add_inventory_by_delta(*delta)

2665

if self.target._fallback_repositories:

2666

# Make sure this stacked repository has all the parent inventories

2667

# for the new revisions that we are about to insert. We do this

2668

# before adding the revisions so that no revision is added until

2669

# all the inventories it may depend on are added.

2670

# Note that this is overzealous, as we may have fetched these in an

2671

# earlier batch.

2672

parent_ids = set()

2673

revision_ids = set()

2674

for revision in pending_revisions:

2675

revision_ids.add(revision.revision_id)

2676

parent_ids.update(revision.parent_ids)

2677

parent_ids.difference_update(revision_ids)

2678

parent_ids.discard(_mod_revision.NULL_REVISION)

2679

parent_map = self.source.get_parent_map(parent_ids)

2680

# we iterate over parent_map and not parent_ids because we don't

2681

# want to try copying any revision which is a ghost

2682

for parent_tree in self.source.revision_trees(parent_map):

2683

current_revision_id = parent_tree.get_revision_id()

2684

parents_parents = parent_map[current_revision_id]

2685

possible_trees = self._get_trees(parents_parents, cache)

2686

if len(possible_trees) == 0:

2687

# There either aren't any parents, or the parents are

2688

# ghosts, so just use the last converted tree.

2689

possible_trees.append((basis_id, cache[basis_id]))

2690

basis_id, delta = self._get_delta_for_revision(parent_tree,

2691

parents_parents, possible_trees)

2692

self.target.add_inventory_by_delta(

2693

basis_id, delta, current_revision_id, parents_parents)

2694

# insert signatures and revisions

2695

for revision in pending_revisions:

2696

try:

2697

signature = self.source.get_signature_text(

2698

revision.revision_id)

2699

self.target.add_signature_text(revision.revision_id,

2700

signature)

2701

except errors.NoSuchRevision:

2702

pass

2703

self.target.add_revision(revision.revision_id, revision)

2704

return basis_id

2705

2706

def _fetch_all_revisions(self, revision_ids, pb):

2707

"""Fetch everything for the list of revisions.

2708

2709

:param revision_ids: The list of revisions to fetch. Must be in

2710

topological order.

2711

:param pb: A ProgressTask

2712

:return: None

2713

"""

2714

basis_id, basis_tree = self._get_basis(revision_ids[0])

2715

batch_size = 100

2716

cache = lru_cache.LRUCache(100)

2717

cache[basis_id] = basis_tree

2718

del basis_tree # We don't want to hang on to it here

2719

hints = []

2720

a_graph = None

2721

2722

for offset in range(0, len(revision_ids), batch_size):

2723

self.target.start_write_group()

2724

try:

2725

pb.update('Transferring revisions', offset,

2726

len(revision_ids))

2727

batch = revision_ids[offset:offset+batch_size]

2728

basis_id = self._fetch_batch(batch, basis_id, cache)

2729

except:

2730

self.source._safe_to_return_from_cache = False

2731

self.target.abort_write_group()

2732

raise

2733

else:

2734

hint = self.target.commit_write_group()

2735

if hint:

2736

hints.extend(hint)

2737

if hints and self.target._format.pack_compresses:

2738

self.target.pack(hint=hints)

2739

pb.update('Transferring revisions', len(revision_ids),

2740

len(revision_ids))

2741

2742

@needs_write_lock

2743

def fetch(self, revision_id=None, find_ghosts=False,

2744

fetch_spec=None):

2745

"""See InterRepository.fetch()."""

2746

if fetch_spec is not None:

2747

revision_ids = fetch_spec.get_keys()

2748

else:

2749

revision_ids = None

2750

ui.ui_factory.warn_experimental_format_fetch(self)

2751

if (not self.source.supports_rich_root()

2752

and self.target.supports_rich_root()):

2753

self._converting_to_rich_root = True

2754

self._revision_id_to_root_id = {}

2755

else:

2756

self._converting_to_rich_root = False

2757

# See <https://launchpad.net/bugs/456077> asking for a warning here

2758

if self.source._format.network_name() != self.target._format.network_name():

2759

ui.ui_factory.show_user_warning('cross_format_fetch',

2760

from_format=self.source._format,

2761

to_format=self.target._format)

2762

if revision_ids is None:

2763

if revision_id:

2764

search_revision_ids = [revision_id]

2765

else:

2766

search_revision_ids = None

2767

revision_ids = self.target.search_missing_revision_ids(self.source,

2768

revision_ids=search_revision_ids,

2769

find_ghosts=find_ghosts).get_keys()

2770

if not revision_ids:

2771

return 0, 0

2772

revision_ids = tsort.topo_sort(

2773

self.source.get_graph().get_parent_map(revision_ids))

2774

if not revision_ids:

2775

return 0, 0

2776

# Walk though all revisions; get inventory deltas, copy referenced

2777

# texts that delta references, insert the delta, revision and

2778

# signature.

2779

pb = ui.ui_factory.nested_progress_bar()

2780

try:

2781

self._fetch_all_revisions(revision_ids, pb)

2782

finally:

2783

pb.finished()

2784

return len(revision_ids), 0

2785

2786

def _get_basis(self, first_revision_id):

2787

"""Get a revision and tree which exists in the target.

2788

2789

This assumes that first_revision_id is selected for transmission

2790

because all other ancestors are already present. If we can't find an

2791

ancestor we fall back to NULL_REVISION since we know that is safe.

2792

2793

:return: (basis_id, basis_tree)

2794

"""

2795

first_rev = self.source.get_revision(first_revision_id)

2796

try:

2797

basis_id = first_rev.parent_ids[0]

2798

# only valid as a basis if the target has it

2799

self.target.get_revision(basis_id)

2800

# Try to get a basis tree - if it's a ghost it will hit the

2801

# NoSuchRevision case.

2802

basis_tree = self.source.revision_tree(basis_id)

2803

except (IndexError, errors.NoSuchRevision):

2804

basis_id = _mod_revision.NULL_REVISION

2805

basis_tree = self.source.revision_tree(basis_id)

2806

return basis_id, basis_tree

2807

2808

2809

class InterSameDataRepository(InterRepository):

2810

"""Code for converting between repositories that represent the same data.

2811

2812

Data format and model must match for this to work.

2813

"""

2814

2815

@classmethod

2816

def _get_repo_format_to_test(self):

2817

"""Repository format for testing with.

2818

2819

InterSameData can pull from subtree to subtree and from non-subtree to

2820

non-subtree, so we test this with the richest repository format.

2821

"""

2822

from bzrlib.repofmt import knitrepo

2823

return knitrepo.RepositoryFormatKnit3()

2824

2825

@staticmethod

2826

def is_compatible(source, target):

2827

return (

2828

InterRepository._same_model(source, target) and

2829

source._format.supports_full_versioned_files and

2830

target._format.supports_full_versioned_files)

2831

2832

2833

InterRepository.register_optimiser(InterDifferingSerializer)

2834

InterRepository.register_optimiser(InterSameDataRepository)

2835

2836

2837

def install_revisions(repository, iterable, num_revisions=None, pb=None):

2838

"""Install all revision data into a repository.

2839

2840

Accepts an iterable of revision, tree, signature tuples. The signature

2841

may be None.

2842

"""

2843

repository.start_write_group()

2844

try:

2845

inventory_cache = lru_cache.LRUCache(10)

2846

for n, (revision, revision_tree, signature) in enumerate(iterable):

2847

_install_revision(repository, revision, revision_tree, signature,

2848

inventory_cache)

2849

if pb is not None:

2850

pb.update('Transferring revisions', n + 1, num_revisions)

2851

except:

2852

repository.abort_write_group()

2853

raise

2854

else:

2855

repository.commit_write_group()

2856

2857

2858

def _install_revision(repository, rev, revision_tree, signature,

2859

inventory_cache):

2860

"""Install all revision data into a repository."""

2861

present_parents = []

2862

parent_trees = {}

2863

for p_id in rev.parent_ids:

2864

if repository.has_revision(p_id):

2865

present_parents.append(p_id)

2866

parent_trees[p_id] = repository.revision_tree(p_id)

2867

else:

2868

parent_trees[p_id] = repository.revision_tree(

2869

_mod_revision.NULL_REVISION)

2870

2871

inv = revision_tree.inventory

2872

entries = inv.iter_entries()

2873

# backwards compatibility hack: skip the root id.

2874

if not repository.supports_rich_root():

2875

path, root = entries.next()

2876

if root.revision != rev.revision_id:

2877

raise errors.IncompatibleRevision(repr(repository))

2878

text_keys = {}

2879

for path, ie in entries:

2880

text_keys[(ie.file_id, ie.revision)] = ie

2881

text_parent_map = repository.texts.get_parent_map(text_keys)

2882

missing_texts = set(text_keys) - set(text_parent_map)

2883

# Add the texts that are not already present

2884

for text_key in missing_texts:

2885

ie = text_keys[text_key]

2886

text_parents = []

2887

# FIXME: TODO: The following loop overlaps/duplicates that done by

2888

# commit to determine parents. There is a latent/real bug here where

2889

# the parents inserted are not those commit would do - in particular

2890

# they are not filtered by heads(). RBC, AB

2891

for revision, tree in parent_trees.iteritems():

2892

if ie.file_id not in tree:

2893

continue

2894

parent_id = tree.get_file_revision(ie.file_id)

2895

if parent_id in text_parents:

2896

continue

2897

text_parents.append((ie.file_id, parent_id))

2898

lines = revision_tree.get_file(ie.file_id).readlines()

2899

repository.texts.add_lines(text_key, text_parents, lines)

2900

try:

2901

# install the inventory

2902

if repository._format._commit_inv_deltas and len(rev.parent_ids):

2903

# Cache this inventory

2904

inventory_cache[rev.revision_id] = inv

2905

try:

2906

basis_inv = inventory_cache[rev.parent_ids[0]]

2907

except KeyError:

2908

repository.add_inventory(rev.revision_id, inv, present_parents)

2909

else:

2910

delta = inv._make_delta(basis_inv)

2911

repository.add_inventory_by_delta(rev.parent_ids[0], delta,

2912

rev.revision_id, present_parents)

2913

else:

2914

repository.add_inventory(rev.revision_id, inv, present_parents)

2915

except errors.RevisionAlreadyPresent:

2916

pass

2917

if signature is not None:

2918

repository.add_signature_text(rev.revision_id, signature)

2919

repository.add_revision(rev.revision_id, rev, inv)

2920

2921

2922

def install_revision(repository, rev, revision_tree):

2923

"""Install all revision data into a repository."""

2924

install_revisions(repository, [(rev, revision_tree, None)])