~bzr-pqm/bzr/bzr.dev : revision 5844

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Repository formats built around versioned files."""

18

19

20

from bzrlib.lazy_import import lazy_import

21

lazy_import(globals(), """

22

from bzrlib import (

23

debug,

24

fetch as _mod_fetch,

25

fifo_cache,

26

gpg,

27

graph,

28

inventory_delta,

29

lru_cache,

30

osutils,

31

revision as _mod_revision,

32

serializer as _mod_serializer,

33

static_tuple,

34

tsort,

35

ui,

36

versionedfile,

37

)

38

39

from bzrlib.recordcounter import RecordCounter

40

from bzrlib.revisiontree import InventoryRevisionTree

41

from bzrlib.testament import Testament

42

""")

43

44

from bzrlib import (

45

errors,

46

)

47

from bzrlib.decorators import (

48

needs_read_lock,

49

needs_write_lock,

50

only_raises,

51

)

52

from bzrlib.inventory import (

53

Inventory,

54

InventoryDirectory,

55

ROOT_ID,

56

entry_factory,

57

)

58

59

from bzrlib.repository import (

60

CommitBuilder,

61

InterRepository,

62

MetaDirRepository,

63

MetaDirRepositoryFormat,

64

Repository,

65

RepositoryFormat,

66

)

67

68

from bzrlib.trace import (

69

mutter,

70

)

71

72

73

class VersionedFileRepositoryFormat(RepositoryFormat):

74

"""Base class for all repository formats that are VersionedFiles-based."""

75

76

supports_full_versioned_files = True

77

78

# Should commit add an inventory, or an inventory delta to the repository.

79

_commit_inv_deltas = True

80

# What order should fetch operations request streams in?

81

# The default is unordered as that is the cheapest for an origin to

82

# provide.

83

_fetch_order = 'unordered'

84

# Does this repository format use deltas that can be fetched as-deltas ?

85

# (E.g. knits, where the knit deltas can be transplanted intact.

86

# We default to False, which will ensure that enough data to get

87

# a full text out of any fetch stream will be grabbed.

88

_fetch_uses_deltas = False

89

90

91

class VersionedFileCommitBuilder(CommitBuilder):

92

"""Commit builder implementation for versioned files based repositories.

93

"""

94

95

# this commit builder supports the record_entry_contents interface

96

supports_record_entry_contents = True

97

98

# the default CommitBuilder does not manage trees whose root is versioned.

99

_versioned_root = False

100

101

def __init__(self, repository, parents, config, timestamp=None,

102

timezone=None, committer=None, revprops=None,

103

revision_id=None, lossy=False):

104

super(VersionedFileCommitBuilder, self).__init__(repository,

105

parents, config, timestamp, timezone, committer, revprops,

106

revision_id, lossy)

107

self.new_inventory = Inventory(None)

108

self._basis_delta = []

109

self.__heads = graph.HeadsCache(repository.get_graph()).heads

110

# memo'd check for no-op commits.

111

self._any_changes = False

112

# API compatibility, older code that used CommitBuilder did not call

113

# .record_delete(), which means the delta that is computed would not be

114

# valid. Callers that will call record_delete() should call

115

# .will_record_deletes() to indicate that.

116

self._recording_deletes = False

117

118

def will_record_deletes(self):

119

"""Tell the commit builder that deletes are being notified.

120

121

This enables the accumulation of an inventory delta; for the resulting

122

commit to be valid, deletes against the basis MUST be recorded via

123

builder.record_delete().

124

"""

125

self._recording_deletes = True

126

try:

127

basis_id = self.parents[0]

128

except IndexError:

129

basis_id = _mod_revision.NULL_REVISION

130

self.basis_delta_revision = basis_id

131

132

def any_changes(self):

133

"""Return True if any entries were changed.

134

135

This includes merge-only changes. It is the core for the --unchanged

136

detection in commit.

137

138

:return: True if any changes have occured.

139

"""

140

return self._any_changes

141

142

def _ensure_fallback_inventories(self):

143

"""Ensure that appropriate inventories are available.

144

145

This only applies to repositories that are stacked, and is about

146

enusring the stacking invariants. Namely, that for any revision that is

147

present, we either have all of the file content, or we have the parent

148

inventory and the delta file content.

149

"""

150

if not self.repository._fallback_repositories:

151

return

152

if not self.repository._format.supports_chks:

153

raise errors.BzrError("Cannot commit directly to a stacked branch"

154

" in pre-2a formats. See "

155

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

156

# This is a stacked repo, we need to make sure we have the parent

157

# inventories for the parents.

158

parent_keys = [(p,) for p in self.parents]

159

parent_map = self.repository.inventories._index.get_parent_map(parent_keys)

160

missing_parent_keys = set([pk for pk in parent_keys

161

if pk not in parent_map])

162

fallback_repos = list(reversed(self.repository._fallback_repositories))

163

missing_keys = [('inventories', pk[0])

164

for pk in missing_parent_keys]

165

resume_tokens = []

166

while missing_keys and fallback_repos:

167

fallback_repo = fallback_repos.pop()

168

source = fallback_repo._get_source(self.repository._format)

169

sink = self.repository._get_sink()

170

stream = source.get_stream_for_missing_keys(missing_keys)

171

missing_keys = sink.insert_stream_without_locking(stream,

172

self.repository._format)

173

if missing_keys:

174

raise errors.BzrError('Unable to fill in parent inventories for a'

175

' stacked branch')

176

177

def commit(self, message):

178

"""Make the actual commit.

179

180

:return: The revision id of the recorded revision.

181

"""

182

self._validate_unicode_text(message, 'commit message')

183

rev = _mod_revision.Revision(

184

timestamp=self._timestamp,

185

timezone=self._timezone,

186

committer=self._committer,

187

message=message,

188

inventory_sha1=self.inv_sha1,

189

revision_id=self._new_revision_id,

190

properties=self._revprops)

191

rev.parent_ids = self.parents

192

self.repository.add_revision(self._new_revision_id, rev,

193

self.new_inventory, self._config)

194

self._ensure_fallback_inventories()

195

self.repository.commit_write_group()

196

return self._new_revision_id

197

198

def abort(self):

199

"""Abort the commit that is being built.

200

"""

201

self.repository.abort_write_group()

202

203

def revision_tree(self):

204

"""Return the tree that was just committed.

205

206

After calling commit() this can be called to get a

207

RevisionTree representing the newly committed tree. This is

208

preferred to calling Repository.revision_tree() because that may

209

require deserializing the inventory, while we already have a copy in

210

memory.

211

"""

212

if self.new_inventory is None:

213

self.new_inventory = self.repository.get_inventory(

214

self._new_revision_id)

215

return InventoryRevisionTree(self.repository, self.new_inventory,

216

self._new_revision_id)

217

218

def finish_inventory(self):

219

"""Tell the builder that the inventory is finished.

220

221

:return: The inventory id in the repository, which can be used with

222

repository.get_inventory.

223

"""

224

if self.new_inventory is None:

225

# an inventory delta was accumulated without creating a new

226

# inventory.

227

basis_id = self.basis_delta_revision

228

# We ignore the 'inventory' returned by add_inventory_by_delta

229

# because self.new_inventory is used to hint to the rest of the

230

# system what code path was taken

231

self.inv_sha1, _ = self.repository.add_inventory_by_delta(

232

basis_id, self._basis_delta, self._new_revision_id,

233

self.parents)

234

else:

235

if self.new_inventory.root is None:

236

raise AssertionError('Root entry should be supplied to'

237

' record_entry_contents, as of bzr 0.10.')

238

self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))

239

self.new_inventory.revision_id = self._new_revision_id

240

self.inv_sha1 = self.repository.add_inventory(

241

self._new_revision_id,

242

self.new_inventory,

243

self.parents

244

)

245

return self._new_revision_id

246

247

def _check_root(self, ie, parent_invs, tree):

248

"""Helper for record_entry_contents.

249

250

:param ie: An entry being added.

251

:param parent_invs: The inventories of the parent revisions of the

252

commit.

253

:param tree: The tree that is being committed.

254

"""

255

# In this revision format, root entries have no knit or weave When

256

# serializing out to disk and back in root.revision is always

257

# _new_revision_id

258

ie.revision = self._new_revision_id

259

260

def _require_root_change(self, tree):

261

"""Enforce an appropriate root object change.

262

263

This is called once when record_iter_changes is called, if and only if

264

the root was not in the delta calculated by record_iter_changes.

265

266

:param tree: The tree which is being committed.

267

"""

268

if len(self.parents) == 0:

269

raise errors.RootMissing()

270

entry = entry_factory['directory'](tree.path2id(''), '',

271

None)

272

entry.revision = self._new_revision_id

273

self._basis_delta.append(('', '', entry.file_id, entry))

274

275

def _get_delta(self, ie, basis_inv, path):

276

"""Get a delta against the basis inventory for ie."""

277

if ie.file_id not in basis_inv:

278

# add

279

result = (None, path, ie.file_id, ie)

280

self._basis_delta.append(result)

281

return result

282

elif ie != basis_inv[ie.file_id]:

283

# common but altered

284

# TODO: avoid tis id2path call.

285

result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)

286

self._basis_delta.append(result)

287

return result

288

else:

289

# common, unaltered

290

return None

291

292

def _heads(self, file_id, revision_ids):

293

"""Calculate the graph heads for revision_ids in the graph of file_id.

294

295

This can use either a per-file graph or a global revision graph as we

296

have an identity relationship between the two graphs.

297

"""

298

return self.__heads(revision_ids)

299

300

def get_basis_delta(self):

301

"""Return the complete inventory delta versus the basis inventory.

302

303

This has been built up with the calls to record_delete and

304

record_entry_contents. The client must have already called

305

will_record_deletes() to indicate that they will be generating a

306

complete delta.

307

308

:return: An inventory delta, suitable for use with apply_delta, or

309

Repository.add_inventory_by_delta, etc.

310

"""

311

if not self._recording_deletes:

312

raise AssertionError("recording deletes not activated.")

313

return self._basis_delta

314

315

def record_delete(self, path, file_id):

316

"""Record that a delete occured against a basis tree.

317

318

This is an optional API - when used it adds items to the basis_delta

319

being accumulated by the commit builder. It cannot be called unless the

320

method will_record_deletes() has been called to inform the builder that

321

a delta is being supplied.

322

323

:param path: The path of the thing deleted.

324

:param file_id: The file id that was deleted.

325

"""

326

if not self._recording_deletes:

327

raise AssertionError("recording deletes not activated.")

328

delta = (path, None, file_id, None)

329

self._basis_delta.append(delta)

330

self._any_changes = True

331

return delta

332

333

def record_entry_contents(self, ie, parent_invs, path, tree,

334

content_summary):

335

"""Record the content of ie from tree into the commit if needed.

336

337

Side effect: sets ie.revision when unchanged

338

339

:param ie: An inventory entry present in the commit.

340

:param parent_invs: The inventories of the parent revisions of the

341

commit.

342

:param path: The path the entry is at in the tree.

343

:param tree: The tree which contains this entry and should be used to

344

obtain content.

345

:param content_summary: Summary data from the tree about the paths

346

content - stat, length, exec, sha/link target. This is only

347

accessed when the entry has a revision of None - that is when it is

348

a candidate to commit.

349

:return: A tuple (change_delta, version_recorded, fs_hash).

350

change_delta is an inventory_delta change for this entry against

351

the basis tree of the commit, or None if no change occured against

352

the basis tree.

353

version_recorded is True if a new version of the entry has been

354

recorded. For instance, committing a merge where a file was only

355

changed on the other side will return (delta, False).

356

fs_hash is either None, or the hash details for the path (currently

357

a tuple of the contents sha1 and the statvalue returned by

358

tree.get_file_with_stat()).

359

"""

360

if self.new_inventory.root is None:

361

if ie.parent_id is not None:

362

raise errors.RootMissing()

363

self._check_root(ie, parent_invs, tree)

364

if ie.revision is None:

365

kind = content_summary[0]

366

else:

367

# ie is carried over from a prior commit

368

kind = ie.kind

369

# XXX: repository specific check for nested tree support goes here - if

370

# the repo doesn't want nested trees we skip it ?

371

if (kind == 'tree-reference' and

372

not self.repository._format.supports_tree_reference):

373

# mismatch between commit builder logic and repository:

374

# this needs the entry creation pushed down into the builder.

375

raise NotImplementedError('Missing repository subtree support.')

376

self.new_inventory.add(ie)

377

378

# TODO: slow, take it out of the inner loop.

379

try:

380

basis_inv = parent_invs[0]

381

except IndexError:

382

basis_inv = Inventory(root_id=None)

383

384

# ie.revision is always None if the InventoryEntry is considered

385

# for committing. We may record the previous parents revision if the

386

# content is actually unchanged against a sole head.

387

if ie.revision is not None:

388

if not self._versioned_root and path == '':

389

# repositories that do not version the root set the root's

390

# revision to the new commit even when no change occurs (more

391

# specifically, they do not record a revision on the root; and

392

# the rev id is assigned to the root during deserialisation -

393

# this masks when a change may have occurred against the basis.

394

# To match this we always issue a delta, because the revision

395

# of the root will always be changing.

396

if ie.file_id in basis_inv:

397

delta = (basis_inv.id2path(ie.file_id), path,

398

ie.file_id, ie)

399

else:

400

# add

401

delta = (None, path, ie.file_id, ie)

402

self._basis_delta.append(delta)

403

return delta, False, None

404

else:

405

# we don't need to commit this, because the caller already

406

# determined that an existing revision of this file is

407

# appropriate. If it's not being considered for committing then

408

# it and all its parents to the root must be unaltered so

409

# no-change against the basis.

410

if ie.revision == self._new_revision_id:

411

raise AssertionError("Impossible situation, a skipped "

412

"inventory entry (%r) claims to be modified in this "

413

"commit (%r).", (ie, self._new_revision_id))

414

return None, False, None

415

# XXX: Friction: parent_candidates should return a list not a dict

416

# so that we don't have to walk the inventories again.

417

parent_candiate_entries = ie.parent_candidates(parent_invs)

418

head_set = self._heads(ie.file_id, parent_candiate_entries.keys())

419

heads = []

420

for inv in parent_invs:

421

if ie.file_id in inv:

422

old_rev = inv[ie.file_id].revision

423

if old_rev in head_set:

424

heads.append(inv[ie.file_id].revision)

425

head_set.remove(inv[ie.file_id].revision)

426

427

store = False

428

# now we check to see if we need to write a new record to the

429

# file-graph.

430

# We write a new entry unless there is one head to the ancestors, and

431

# the kind-derived content is unchanged.

432

433

# Cheapest check first: no ancestors, or more the one head in the

434

# ancestors, we write a new node.

435

if len(heads) != 1:

436

store = True

437

if not store:

438

# There is a single head, look it up for comparison

439

parent_entry = parent_candiate_entries[heads[0]]

440

# if the non-content specific data has changed, we'll be writing a

441

# node:

442

if (parent_entry.parent_id != ie.parent_id or

443

parent_entry.name != ie.name):

444

store = True

445

# now we need to do content specific checks:

446

if not store:

447

# if the kind changed the content obviously has

448

if kind != parent_entry.kind:

449

store = True

450

# Stat cache fingerprint feedback for the caller - None as we usually

451

# don't generate one.

452

fingerprint = None

453

if kind == 'file':

454

if content_summary[2] is None:

455

raise ValueError("Files must not have executable = None")

456

if not store:

457

# We can't trust a check of the file length because of content

458

# filtering...

459

if (# if the exec bit has changed we have to store:

460

parent_entry.executable != content_summary[2]):

461

store = True

462

elif parent_entry.text_sha1 == content_summary[3]:

463

# all meta and content is unchanged (using a hash cache

464

# hit to check the sha)

465

ie.revision = parent_entry.revision

466

ie.text_size = parent_entry.text_size

467

ie.text_sha1 = parent_entry.text_sha1

468

ie.executable = parent_entry.executable

469

return self._get_delta(ie, basis_inv, path), False, None

470

else:

471

# Either there is only a hash change(no hash cache entry,

472

# or same size content change), or there is no change on

473

# this file at all.

474

# Provide the parent's hash to the store layer, so that the

475

# content is unchanged we will not store a new node.

476

nostore_sha = parent_entry.text_sha1

477

if store:

478

# We want to record a new node regardless of the presence or

479

# absence of a content change in the file.

480

nostore_sha = None

481

ie.executable = content_summary[2]

482

file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)

483

try:

484

text = file_obj.read()

485

finally:

486

file_obj.close()

487

try:

488

ie.text_sha1, ie.text_size = self._add_text_to_weave(

489

ie.file_id, text, heads, nostore_sha)

490

# Let the caller know we generated a stat fingerprint.

491

fingerprint = (ie.text_sha1, stat_value)

492

except errors.ExistingContent:

493

# Turns out that the file content was unchanged, and we were

494

# only going to store a new node if it was changed. Carry over

495

# the entry.

496

ie.revision = parent_entry.revision

497

ie.text_size = parent_entry.text_size

498

ie.text_sha1 = parent_entry.text_sha1

499

ie.executable = parent_entry.executable

500

return self._get_delta(ie, basis_inv, path), False, None

501

elif kind == 'directory':

502

if not store:

503

# all data is meta here, nothing specific to directory, so

504

# carry over:

505

ie.revision = parent_entry.revision

506

return self._get_delta(ie, basis_inv, path), False, None

507

self._add_text_to_weave(ie.file_id, '', heads, None)

508

elif kind == 'symlink':

509

current_link_target = content_summary[3]

510

if not store:

511

# symlink target is not generic metadata, check if it has

512

# changed.

513

if current_link_target != parent_entry.symlink_target:

514

store = True

515

if not store:

516

# unchanged, carry over.

517

ie.revision = parent_entry.revision

518

ie.symlink_target = parent_entry.symlink_target

519

return self._get_delta(ie, basis_inv, path), False, None

520

ie.symlink_target = current_link_target

521

self._add_text_to_weave(ie.file_id, '', heads, None)

522

elif kind == 'tree-reference':

523

if not store:

524

if content_summary[3] != parent_entry.reference_revision:

525

store = True

526

if not store:

527

# unchanged, carry over.

528

ie.reference_revision = parent_entry.reference_revision

529

ie.revision = parent_entry.revision

530

return self._get_delta(ie, basis_inv, path), False, None

531

ie.reference_revision = content_summary[3]

532

if ie.reference_revision is None:

533

raise AssertionError("invalid content_summary for nested tree: %r"

534

% (content_summary,))

535

self._add_text_to_weave(ie.file_id, '', heads, None)

536

else:

537

raise NotImplementedError('unknown kind')

538

ie.revision = self._new_revision_id

539

self._any_changes = True

540

return self._get_delta(ie, basis_inv, path), True, fingerprint

541

542

def record_iter_changes(self, tree, basis_revision_id, iter_changes,

543

_entry_factory=entry_factory):

544

"""Record a new tree via iter_changes.

545

546

:param tree: The tree to obtain text contents from for changed objects.

547

:param basis_revision_id: The revision id of the tree the iter_changes

548

has been generated against. Currently assumed to be the same

549

as self.parents[0] - if it is not, errors may occur.

550

:param iter_changes: An iter_changes iterator with the changes to apply

551

to basis_revision_id. The iterator must not include any items with

552

a current kind of None - missing items must be either filtered out

553

or errored-on beefore record_iter_changes sees the item.

554

:param _entry_factory: Private method to bind entry_factory locally for

555

performance.

556

:return: A generator of (file_id, relpath, fs_hash) tuples for use with

557

tree._observed_sha1.

558

"""

559

# Create an inventory delta based on deltas between all the parents and

560

# deltas between all the parent inventories. We use inventory delta's

561

# between the inventory objects because iter_changes masks

562

# last-changed-field only changes.

563

# Working data:

564

# file_id -> change map, change is fileid, paths, changed, versioneds,

565

# parents, names, kinds, executables

566

merged_ids = {}

567

# {file_id -> revision_id -> inventory entry, for entries in parent

568

# trees that are not parents[0]

569

parent_entries = {}

570

ghost_basis = False

571

try:

572

revtrees = list(self.repository.revision_trees(self.parents))

573

except errors.NoSuchRevision:

574

# one or more ghosts, slow path.

575

revtrees = []

576

for revision_id in self.parents:

577

try:

578

revtrees.append(self.repository.revision_tree(revision_id))

579

except errors.NoSuchRevision:

580

if not revtrees:

581

basis_revision_id = _mod_revision.NULL_REVISION

582

ghost_basis = True

583

revtrees.append(self.repository.revision_tree(

584

_mod_revision.NULL_REVISION))

585

# The basis inventory from a repository

586

if revtrees:

587

basis_inv = revtrees[0].inventory

588

else:

589

basis_inv = self.repository.revision_tree(

590

_mod_revision.NULL_REVISION).inventory

591

if len(self.parents) > 0:

592

if basis_revision_id != self.parents[0] and not ghost_basis:

593

raise Exception(

594

"arbitrary basis parents not yet supported with merges")

595

for revtree in revtrees[1:]:

596

for change in revtree.inventory._make_delta(basis_inv):

597

if change[1] is None:

598

# Not present in this parent.

599

continue

600

if change[2] not in merged_ids:

601

if change[0] is not None:

602

basis_entry = basis_inv[change[2]]

603

merged_ids[change[2]] = [

604

# basis revid

605

basis_entry.revision,

606

# new tree revid

607

change[3].revision]

608

parent_entries[change[2]] = {

609

# basis parent

610

basis_entry.revision:basis_entry,

611

# this parent

612

change[3].revision:change[3],

613

}

614

else:

615

merged_ids[change[2]] = [change[3].revision]

616

parent_entries[change[2]] = {change[3].revision:change[3]}

617

else:

618

merged_ids[change[2]].append(change[3].revision)

619

parent_entries[change[2]][change[3].revision] = change[3]

620

else:

621

merged_ids = {}

622

# Setup the changes from the tree:

623

# changes maps file_id -> (change, [parent revision_ids])

624

changes= {}

625

for change in iter_changes:

626

# This probably looks up in basis_inv way to much.

627

if change[1][0] is not None:

628

head_candidate = [basis_inv[change[0]].revision]

629

else:

630

head_candidate = []

631

changes[change[0]] = change, merged_ids.get(change[0],

632

head_candidate)

633

unchanged_merged = set(merged_ids) - set(changes)

634

# Extend the changes dict with synthetic changes to record merges of

635

# texts.

636

for file_id in unchanged_merged:

637

# Record a merged version of these items that did not change vs the

638

# basis. This can be either identical parallel changes, or a revert

639

# of a specific file after a merge. The recorded content will be

640

# that of the current tree (which is the same as the basis), but

641

# the per-file graph will reflect a merge.

642

# NB:XXX: We are reconstructing path information we had, this

643

# should be preserved instead.

644

# inv delta change: (file_id, (path_in_source, path_in_target),

645

# changed_content, versioned, parent, name, kind,

646

# executable)

647

try:

648

basis_entry = basis_inv[file_id]

649

except errors.NoSuchId:

650

# a change from basis->some_parents but file_id isn't in basis

651

# so was new in the merge, which means it must have changed

652

# from basis -> current, and as it hasn't the add was reverted

653

# by the user. So we discard this change.

654

pass

655

else:

656

change = (file_id,

657

(basis_inv.id2path(file_id), tree.id2path(file_id)),

658

False, (True, True),

659

(basis_entry.parent_id, basis_entry.parent_id),

660

(basis_entry.name, basis_entry.name),

661

(basis_entry.kind, basis_entry.kind),

662

(basis_entry.executable, basis_entry.executable))

663

changes[file_id] = (change, merged_ids[file_id])

664

# changes contains tuples with the change and a set of inventory

665

# candidates for the file.

666

# inv delta is:

667

# old_path, new_path, file_id, new_inventory_entry

668

seen_root = False # Is the root in the basis delta?

669

inv_delta = self._basis_delta

670

modified_rev = self._new_revision_id

671

for change, head_candidates in changes.values():

672

if change[3][1]: # versioned in target.

673

# Several things may be happening here:

674

# We may have a fork in the per-file graph

675

# - record a change with the content from tree

676

# We may have a change against < all trees

677

# - carry over the tree that hasn't changed

678

# We may have a change against all trees

679

# - record the change with the content from tree

680

kind = change[6][1]

681

file_id = change[0]

682

entry = _entry_factory[kind](file_id, change[5][1],

683

change[4][1])

684

head_set = self._heads(change[0], set(head_candidates))

685

heads = []

686

# Preserve ordering.

687

for head_candidate in head_candidates:

688

if head_candidate in head_set:

689

heads.append(head_candidate)

690

head_set.remove(head_candidate)

691

carried_over = False

692

if len(heads) == 1:

693

# Could be a carry-over situation:

694

parent_entry_revs = parent_entries.get(file_id, None)

695

if parent_entry_revs:

696

parent_entry = parent_entry_revs.get(heads[0], None)

697

else:

698

parent_entry = None

699

if parent_entry is None:

700

# The parent iter_changes was called against is the one

701

# that is the per-file head, so any change is relevant

702

# iter_changes is valid.

703

carry_over_possible = False

704

else:

705

# could be a carry over situation

706

# A change against the basis may just indicate a merge,

707

# we need to check the content against the source of the

708

# merge to determine if it was changed after the merge

709

# or carried over.

710

if (parent_entry.kind != entry.kind or

711

parent_entry.parent_id != entry.parent_id or

712

parent_entry.name != entry.name):

713

# Metadata common to all entries has changed

714

# against per-file parent

715

carry_over_possible = False

716

else:

717

carry_over_possible = True

718

# per-type checks for changes against the parent_entry

719

# are done below.

720

else:

721

# Cannot be a carry-over situation

722

carry_over_possible = False

723

# Populate the entry in the delta

724

if kind == 'file':

725

# XXX: There is still a small race here: If someone reverts the content of a file

726

# after iter_changes examines and decides it has changed,

727

# we will unconditionally record a new version even if some

728

# other process reverts it while commit is running (with

729

# the revert happening after iter_changes did its

730

# examination).

731

if change[7][1]:

732

entry.executable = True

733

else:

734

entry.executable = False

735

if (carry_over_possible and

736

parent_entry.executable == entry.executable):

737

# Check the file length, content hash after reading

738

# the file.

739

nostore_sha = parent_entry.text_sha1

740

else:

741

nostore_sha = None

742

file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])

743

try:

744

text = file_obj.read()

745

finally:

746

file_obj.close()

747

try:

748

entry.text_sha1, entry.text_size = self._add_text_to_weave(

749

file_id, text, heads, nostore_sha)

750

yield file_id, change[1][1], (entry.text_sha1, stat_value)

751

except errors.ExistingContent:

752

# No content change against a carry_over parent

753

# Perhaps this should also yield a fs hash update?

754

carried_over = True

755

entry.text_size = parent_entry.text_size

756

entry.text_sha1 = parent_entry.text_sha1

757

elif kind == 'symlink':

758

# Wants a path hint?

759

entry.symlink_target = tree.get_symlink_target(file_id)

760

if (carry_over_possible and

761

parent_entry.symlink_target == entry.symlink_target):

762

carried_over = True

763

else:

764

self._add_text_to_weave(change[0], '', heads, None)

765

elif kind == 'directory':

766

if carry_over_possible:

767

carried_over = True

768

else:

769

# Nothing to set on the entry.

770

# XXX: split into the Root and nonRoot versions.

771

if change[1][1] != '' or self.repository.supports_rich_root():

772

self._add_text_to_weave(change[0], '', heads, None)

773

elif kind == 'tree-reference':

774

if not self.repository._format.supports_tree_reference:

775

# This isn't quite sane as an error, but we shouldn't

776

# ever see this code path in practice: tree's don't

777

# permit references when the repo doesn't support tree

778

# references.

779

raise errors.UnsupportedOperation(tree.add_reference,

780

self.repository)

781

reference_revision = tree.get_reference_revision(change[0])

782

entry.reference_revision = reference_revision

783

if (carry_over_possible and

784

parent_entry.reference_revision == reference_revision):

785

carried_over = True

786

else:

787

self._add_text_to_weave(change[0], '', heads, None)

788

else:

789

raise AssertionError('unknown kind %r' % kind)

790

if not carried_over:

791

entry.revision = modified_rev

792

else:

793

entry.revision = parent_entry.revision

794

else:

795

entry = None

796

new_path = change[1][1]

797

inv_delta.append((change[1][0], new_path, change[0], entry))

798

if new_path == '':

799

seen_root = True

800

self.new_inventory = None

801

if len(inv_delta):

802

# This should perhaps be guarded by a check that the basis we

803

# commit against is the basis for the commit and if not do a delta

804

# against the basis.

805

self._any_changes = True

806

if not seen_root:

807

# housekeeping root entry changes do not affect no-change commits.

808

self._require_root_change(tree)

809

self.basis_delta_revision = basis_revision_id

810

811

def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):

812

parent_keys = tuple([(file_id, parent) for parent in parents])

813

return self.repository.texts._add_text(

814

(file_id, self._new_revision_id), parent_keys, new_text,

815

nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]

816

817

818

class VersionedFileRootCommitBuilder(VersionedFileCommitBuilder):

819

"""This commitbuilder actually records the root id"""

820

821

# the root entry gets versioned properly by this builder.

822

_versioned_root = True

823

824

def _check_root(self, ie, parent_invs, tree):

825

"""Helper for record_entry_contents.

826

827

:param ie: An entry being added.

828

:param parent_invs: The inventories of the parent revisions of the

829

commit.

830

:param tree: The tree that is being committed.

831

"""

832

833

def _require_root_change(self, tree):

834

"""Enforce an appropriate root object change.

835

836

This is called once when record_iter_changes is called, if and only if

837

the root was not in the delta calculated by record_iter_changes.

838

839

:param tree: The tree which is being committed.

840

"""

841

# versioned roots do not change unless the tree found a change.

842

843

844

class VersionedFileRepository(Repository):

845

"""Repository holding history for one or more branches.

846

847

The repository holds and retrieves historical information including

848

revisions and file history. It's normally accessed only by the Branch,

849

which views a particular line of development through that history.

850

851

The Repository builds on top of some byte storage facilies (the revisions,

852

signatures, inventories, texts and chk_bytes attributes) and a Transport,

853

which respectively provide byte storage and a means to access the (possibly

854

remote) disk.

855

856

The byte storage facilities are addressed via tuples, which we refer to

857

as 'keys' throughout the code base. Revision_keys, inventory_keys and

858

signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:

859

(file_id, revision_id). chk_bytes uses CHK keys - a 1-tuple with a single

860

byte string made up of a hash identifier and a hash value.

861

We use this interface because it allows low friction with the underlying

862

code that implements disk indices, network encoding and other parts of

863

bzrlib.

864

865

:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing

866

the serialised revisions for the repository. This can be used to obtain

867

revision graph information or to access raw serialised revisions.

868

The result of trying to insert data into the repository via this store

869

is undefined: it should be considered read-only except for implementors

870

of repositories.

871

:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing

872

the serialised signatures for the repository. This can be used to

873

obtain access to raw serialised signatures. The result of trying to

874

insert data into the repository via this store is undefined: it should

875

be considered read-only except for implementors of repositories.

876

:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing

877

the serialised inventories for the repository. This can be used to

878

obtain unserialised inventories. The result of trying to insert data

879

into the repository via this store is undefined: it should be

880

considered read-only except for implementors of repositories.

881

:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the

882

texts of files and directories for the repository. This can be used to

883

obtain file texts or file graphs. Note that Repository.iter_file_bytes

884

is usually a better interface for accessing file texts.

885

The result of trying to insert data into the repository via this store

886

is undefined: it should be considered read-only except for implementors

887

of repositories.

888

:ivar chk_bytes: A bzrlib.versionedfile.VersionedFiles instance containing

889

any data the repository chooses to store or have indexed by its hash.

890

The result of trying to insert data into the repository via this store

891

is undefined: it should be considered read-only except for implementors

892

of repositories.

893

:ivar _transport: Transport for file access to repository, typically

894

pointing to .bzr/repository.

895

"""

896

897

# What class to use for a CommitBuilder. Often it's simpler to change this

898

# in a Repository class subclass rather than to override

899

# get_commit_builder.

900

_commit_builder_class = VersionedFileCommitBuilder

901

902

def add_fallback_repository(self, repository):

903

"""Add a repository to use for looking up data not held locally.

904

905

:param repository: A repository.

906

"""

907

if not self._format.supports_external_lookups:

908

raise errors.UnstackableRepositoryFormat(self._format, self.base)

909

if self.is_locked():

910

# This repository will call fallback.unlock() when we transition to

911

# the unlocked state, so we make sure to increment the lock count

912

repository.lock_read()

913

self._check_fallback_repository(repository)

914

self._fallback_repositories.append(repository)

915

self.texts.add_fallback_versioned_files(repository.texts)

916

self.inventories.add_fallback_versioned_files(repository.inventories)

917

self.revisions.add_fallback_versioned_files(repository.revisions)

918

self.signatures.add_fallback_versioned_files(repository.signatures)

919

if self.chk_bytes is not None:

920

self.chk_bytes.add_fallback_versioned_files(repository.chk_bytes)

921

922

@only_raises(errors.LockNotHeld, errors.LockBroken)

923

def unlock(self):

924

super(VersionedFileRepository, self).unlock()

925

if self.control_files._lock_count == 0:

926

self._inventory_entry_cache.clear()

927

928

def add_inventory(self, revision_id, inv, parents):

929

"""Add the inventory inv to the repository as revision_id.

930

931

:param parents: The revision ids of the parents that revision_id

932

is known to have and are in the repository already.

933

934

:returns: The validator(which is a sha1 digest, though what is sha'd is

935

repository format specific) of the serialized inventory.

936

"""

937

if not self.is_in_write_group():

938

raise AssertionError("%r not in write group" % (self,))

939

_mod_revision.check_not_reserved_id(revision_id)

940

if not (inv.revision_id is None or inv.revision_id == revision_id):

941

raise AssertionError(

942

"Mismatch between inventory revision"

943

" id and insertion revid (%r, %r)"

944

% (inv.revision_id, revision_id))

945

if inv.root is None:

946

raise errors.RootMissing()

947

return self._add_inventory_checked(revision_id, inv, parents)

948

949

def _add_inventory_checked(self, revision_id, inv, parents):

950

"""Add inv to the repository after checking the inputs.

951

952

This function can be overridden to allow different inventory styles.

953

954

:seealso: add_inventory, for the contract.

955

"""

956

inv_lines = self._serializer.write_inventory_to_lines(inv)

957

return self._inventory_add_lines(revision_id, parents,

958

inv_lines, check_content=False)

959

960

def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,

961

parents, basis_inv=None, propagate_caches=False):

962

"""Add a new inventory expressed as a delta against another revision.

963

964

See the inventory developers documentation for the theory behind

965

inventory deltas.

966

967

:param basis_revision_id: The inventory id the delta was created

968

against. (This does not have to be a direct parent.)

969

:param delta: The inventory delta (see Inventory.apply_delta for

970

details).

971

:param new_revision_id: The revision id that the inventory is being

972

added for.

973

:param parents: The revision ids of the parents that revision_id is

974

known to have and are in the repository already. These are supplied

975

for repositories that depend on the inventory graph for revision

976

graph access, as well as for those that pun ancestry with delta

977

compression.

978

:param basis_inv: The basis inventory if it is already known,

979

otherwise None.

980

:param propagate_caches: If True, the caches for this inventory are

981

copied to and updated for the result if possible.

982

983

:returns: (validator, new_inv)

984

The validator(which is a sha1 digest, though what is sha'd is

985

repository format specific) of the serialized inventory, and the

986

resulting inventory.

987

"""

988

if not self.is_in_write_group():

989

raise AssertionError("%r not in write group" % (self,))

990

_mod_revision.check_not_reserved_id(new_revision_id)

991

basis_tree = self.revision_tree(basis_revision_id)

992

basis_tree.lock_read()

993

try:

994

# Note that this mutates the inventory of basis_tree, which not all

995

# inventory implementations may support: A better idiom would be to

996

# return a new inventory, but as there is no revision tree cache in

997

# repository this is safe for now - RBC 20081013

998

if basis_inv is None:

999

basis_inv = basis_tree.inventory

1000

basis_inv.apply_delta(delta)

1001

basis_inv.revision_id = new_revision_id

1002

return (self.add_inventory(new_revision_id, basis_inv, parents),

1003

basis_inv)

1004

finally:

1005

basis_tree.unlock()

1006

1007

def _inventory_add_lines(self, revision_id, parents, lines,

1008

check_content=True):

1009

"""Store lines in inv_vf and return the sha1 of the inventory."""

1010

parents = [(parent,) for parent in parents]

1011

result = self.inventories.add_lines((revision_id,), parents, lines,

1012

check_content=check_content)[0]

1013

self.inventories._access.flush()

1014

return result

1015

1016

def add_revision(self, revision_id, rev, inv=None, config=None):

1017

"""Add rev to the revision store as revision_id.

1018

1019

:param revision_id: the revision id to use.

1020

:param rev: The revision object.

1021

:param inv: The inventory for the revision. if None, it will be looked

1022

up in the inventory storer

1023

:param config: If None no digital signature will be created.

1024

If supplied its signature_needed method will be used

1025

to determine if a signature should be made.

1026

"""

1027

# TODO: jam 20070210 Shouldn't we check rev.revision_id and

1028

# rev.parent_ids?

1029

_mod_revision.check_not_reserved_id(revision_id)

1030

if config is not None and config.signature_needed():

1031

if inv is None:

1032

inv = self.get_inventory(revision_id)

1033

tree = InventoryRevisionTree(self, inv, revision_id)

1034

testament = Testament(rev, tree)

1035

plaintext = testament.as_short_text()

1036

self.store_revision_signature(

1037

gpg.GPGStrategy(config), plaintext, revision_id)

1038

# check inventory present

1039

if not self.inventories.get_parent_map([(revision_id,)]):

1040

if inv is None:

1041

raise errors.WeaveRevisionNotPresent(revision_id,

1042

self.inventories)

1043

else:

1044

# yes, this is not suitable for adding with ghosts.

1045

rev.inventory_sha1 = self.add_inventory(revision_id, inv,

1046

rev.parent_ids)

1047

else:

1048

key = (revision_id,)

1049

rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]

1050

self._add_revision(rev)

1051

1052

def _add_revision(self, revision):

1053

text = self._serializer.write_revision_to_string(revision)

1054

key = (revision.revision_id,)

1055

parents = tuple((parent,) for parent in revision.parent_ids)

1056

self.revisions.add_lines(key, parents, osutils.split_lines(text))

1057

1058

def _check_inventories(self, checker):

1059

"""Check the inventories found from the revision scan.

1060

1061

This is responsible for verifying the sha1 of inventories and

1062

creating a pending_keys set that covers data referenced by inventories.

1063

"""

1064

bar = ui.ui_factory.nested_progress_bar()

1065

try:

1066

self._do_check_inventories(checker, bar)

1067

finally:

1068

bar.finished()

1069

1070

def _do_check_inventories(self, checker, bar):

1071

"""Helper for _check_inventories."""

1072

revno = 0

1073

keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}

1074

kinds = ['chk_bytes', 'texts']

1075

count = len(checker.pending_keys)

1076

bar.update("inventories", 0, 2)

1077

current_keys = checker.pending_keys

1078

checker.pending_keys = {}

1079

# Accumulate current checks.

1080

for key in current_keys:

1081

if key[0] != 'inventories' and key[0] not in kinds:

1082

checker._report_items.append('unknown key type %r' % (key,))

1083

keys[key[0]].add(key[1:])

1084

if keys['inventories']:

1085

# NB: output order *should* be roughly sorted - topo or

1086

# inverse topo depending on repository - either way decent

1087

# to just delta against. However, pre-CHK formats didn't

1088

# try to optimise inventory layout on disk. As such the

1089

# pre-CHK code path does not use inventory deltas.

1090

last_object = None

1091

for record in self.inventories.check(keys=keys['inventories']):

1092

if record.storage_kind == 'absent':

1093

checker._report_items.append(

1094

'Missing inventory {%s}' % (record.key,))

1095

else:

1096

last_object = self._check_record('inventories', record,

1097

checker, last_object,

1098

current_keys[('inventories',) + record.key])

1099

del keys['inventories']

1100

else:

1101

return

1102

bar.update("texts", 1)

1103

while (checker.pending_keys or keys['chk_bytes']

1104

or keys['texts']):

1105

# Something to check.

1106

current_keys = checker.pending_keys

1107

checker.pending_keys = {}

1108

# Accumulate current checks.

1109

for key in current_keys:

1110

if key[0] not in kinds:

1111

checker._report_items.append('unknown key type %r' % (key,))

1112

keys[key[0]].add(key[1:])

1113

# Check the outermost kind only - inventories || chk_bytes || texts

1114

for kind in kinds:

1115

if keys[kind]:

1116

last_object = None

1117

for record in getattr(self, kind).check(keys=keys[kind]):

1118

if record.storage_kind == 'absent':

1119

checker._report_items.append(

1120

'Missing %s {%s}' % (kind, record.key,))

1121

else:

1122

last_object = self._check_record(kind, record,

1123

checker, last_object, current_keys[(kind,) + record.key])

1124

keys[kind] = set()

1125

break

1126

1127

def _check_record(self, kind, record, checker, last_object, item_data):

1128

"""Check a single text from this repository."""

1129

if kind == 'inventories':

1130

rev_id = record.key[0]

1131

inv = self._deserialise_inventory(rev_id,

1132

record.get_bytes_as('fulltext'))

1133

if last_object is not None:

1134

delta = inv._make_delta(last_object)

1135

for old_path, path, file_id, ie in delta:

1136

if ie is None:

1137

continue

1138

ie.check(checker, rev_id, inv)

1139

else:

1140

for path, ie in inv.iter_entries():

1141

ie.check(checker, rev_id, inv)

1142

if self._format.fast_deltas:

1143

return inv

1144

elif kind == 'chk_bytes':

1145

# No code written to check chk_bytes for this repo format.

1146

checker._report_items.append(

1147

'unsupported key type chk_bytes for %s' % (record.key,))

1148

elif kind == 'texts':

1149

self._check_text(record, checker, item_data)

1150

else:

1151

checker._report_items.append(

1152

'unknown key type %s for %s' % (kind, record.key))

1153

1154

def _check_text(self, record, checker, item_data):

1155

"""Check a single text."""

1156

# Check it is extractable.

1157

# TODO: check length.

1158

if record.storage_kind == 'chunked':

1159

chunks = record.get_bytes_as(record.storage_kind)

1160

sha1 = osutils.sha_strings(chunks)

1161

length = sum(map(len, chunks))

1162

else:

1163

content = record.get_bytes_as('fulltext')

1164

sha1 = osutils.sha_string(content)

1165

length = len(content)

1166

if item_data and sha1 != item_data[1]:

1167

checker._report_items.append(

1168

'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %

1169

(record.key, sha1, item_data[1], item_data[2]))

1170

1171

def __init__(self, _format, a_bzrdir, control_files):

1172

"""Instantiate a VersionedFileRepository.

1173

1174

:param _format: The format of the repository on disk.

1175

:param a_bzrdir: The BzrDir of the repository.

1176

:param control_files: Control files to use for locking, etc.

1177

"""

1178

# In the future we will have a single api for all stores for

1179

# getting file texts, inventories and revisions, then

1180

# this construct will accept instances of those things.

1181

super(VersionedFileRepository, self).__init__(_format, a_bzrdir,

1182

control_files)

1183

# for tests

1184

self._reconcile_does_inventory_gc = True

1185

self._reconcile_fixes_text_parents = False

1186

self._reconcile_backsup_inventory = True

1187

# An InventoryEntry cache, used during deserialization

1188

self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)

1189

# Is it safe to return inventory entries directly from the entry cache,

1190

# rather copying them?

1191

self._safe_to_return_from_cache = False

1192

1193

@needs_read_lock

1194

def gather_stats(self, revid=None, committers=None):

1195

"""See Repository.gather_stats()."""

1196

result = super(VersionedFileRepository, self).gather_stats(revid, committers)

1197

# now gather global repository information

1198

# XXX: This is available for many repos regardless of listability.

1199

if self.user_transport.listable():

1200

# XXX: do we want to __define len__() ?

1201

# Maybe the versionedfiles object should provide a different

1202

# method to get the number of keys.

1203

result['revisions'] = len(self.revisions.keys())

1204

# result['size'] = t

1205

return result

1206

1207

1208

def get_commit_builder(self, branch, parents, config, timestamp=None,

1209

timezone=None, committer=None, revprops=None,

1210

revision_id=None, lossy=False):

1211

"""Obtain a CommitBuilder for this repository.

1212

1213

:param branch: Branch to commit to.

1214

:param parents: Revision ids of the parents of the new revision.

1215

:param config: Configuration to use.

1216

:param timestamp: Optional timestamp recorded for commit.

1217

:param timezone: Optional timezone for timestamp.

1218

:param committer: Optional committer to set for commit.

1219

:param revprops: Optional dictionary of revision properties.

1220

:param revision_id: Optional revision id.

1221

:param lossy: Whether to discard data that can not be natively

1222

represented, when pushing to a foreign VCS

1223

"""

1224

if self._fallback_repositories and not self._format.supports_chks:

1225

raise errors.BzrError("Cannot commit directly to a stacked branch"

1226

" in pre-2a formats. See "

1227

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

1228

result = self._commit_builder_class(self, parents, config,

1229

timestamp, timezone, committer, revprops, revision_id,

1230

lossy)

1231

self.start_write_group()

1232

return result

1233

1234

def get_missing_parent_inventories(self, check_for_missing_texts=True):

1235

"""Return the keys of missing inventory parents for revisions added in

1236

this write group.

1237

1238

A revision is not complete if the inventory delta for that revision

1239

cannot be calculated. Therefore if the parent inventories of a

1240

revision are not present, the revision is incomplete, and e.g. cannot

1241

be streamed by a smart server. This method finds missing inventory

1242

parents for revisions added in this write group.

1243

"""

1244

if not self._format.supports_external_lookups:

1245

# This is only an issue for stacked repositories

1246

return set()

1247

if not self.is_in_write_group():

1248

raise AssertionError('not in a write group')

1249

1250

# XXX: We assume that every added revision already has its

1251

# corresponding inventory, so we only check for parent inventories that

1252

# might be missing, rather than all inventories.

1253

parents = set(self.revisions._index.get_missing_parents())

1254

parents.discard(_mod_revision.NULL_REVISION)

1255

unstacked_inventories = self.inventories._index

1256

present_inventories = unstacked_inventories.get_parent_map(

1257

key[-1:] for key in parents)

1258

parents.difference_update(present_inventories)

1259

if len(parents) == 0:

1260

# No missing parent inventories.

1261

return set()

1262

if not check_for_missing_texts:

1263

return set(('inventories', rev_id) for (rev_id,) in parents)

1264

# Ok, now we have a list of missing inventories. But these only matter

1265

# if the inventories that reference them are missing some texts they

1266

# appear to introduce.

1267

# XXX: Texts referenced by all added inventories need to be present,

1268

# but at the moment we're only checking for texts referenced by

1269

# inventories at the graph's edge.

1270

key_deps = self.revisions._index._key_dependencies

1271

key_deps.satisfy_refs_for_keys(present_inventories)

1272

referrers = frozenset(r[0] for r in key_deps.get_referrers())

1273

file_ids = self.fileids_altered_by_revision_ids(referrers)

1274

missing_texts = set()

1275

for file_id, version_ids in file_ids.iteritems():

1276

missing_texts.update(

1277

(file_id, version_id) for version_id in version_ids)

1278

present_texts = self.texts.get_parent_map(missing_texts)

1279

missing_texts.difference_update(present_texts)

1280

if not missing_texts:

1281

# No texts are missing, so all revisions and their deltas are

1282

# reconstructable.

1283

return set()

1284

# Alternatively the text versions could be returned as the missing

1285

# keys, but this is likely to be less data.

1286

missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)

1287

return missing_keys

1288

1289

@needs_read_lock

1290

def has_revisions(self, revision_ids):

1291

"""Probe to find out the presence of multiple revisions.

1292

1293

:param revision_ids: An iterable of revision_ids.

1294

:return: A set of the revision_ids that were present.

1295

"""

1296

parent_map = self.revisions.get_parent_map(

1297

[(rev_id,) for rev_id in revision_ids])

1298

result = set()

1299

if _mod_revision.NULL_REVISION in revision_ids:

1300

result.add(_mod_revision.NULL_REVISION)

1301

result.update([key[0] for key in parent_map])

1302

return result

1303

1304

@needs_read_lock

1305

def get_revision_reconcile(self, revision_id):

1306

"""'reconcile' helper routine that allows access to a revision always.

1307

1308

This variant of get_revision does not cross check the weave graph

1309

against the revision one as get_revision does: but it should only

1310

be used by reconcile, or reconcile-alike commands that are correcting

1311

or testing the revision graph.

1312

"""

1313

return self._get_revisions([revision_id])[0]

1314

1315

@needs_read_lock

1316

def get_revisions(self, revision_ids):

1317

"""Get many revisions at once.

1318

1319

Repositories that need to check data on every revision read should

1320

subclass this method.

1321

"""

1322

return self._get_revisions(revision_ids)

1323

1324

@needs_read_lock

1325

def _get_revisions(self, revision_ids):

1326

"""Core work logic to get many revisions without sanity checks."""

1327

revs = {}

1328

for revid, rev in self._iter_revisions(revision_ids):

1329

if rev is None:

1330

raise errors.NoSuchRevision(self, revid)

1331

revs[revid] = rev

1332

return [revs[revid] for revid in revision_ids]

1333

1334

def _iter_revisions(self, revision_ids):

1335

"""Iterate over revision objects.

1336

1337

:param revision_ids: An iterable of revisions to examine. None may be

1338

passed to request all revisions known to the repository. Note that

1339

not all repositories can find unreferenced revisions; for those

1340

repositories only referenced ones will be returned.

1341

:return: An iterator of (revid, revision) tuples. Absent revisions (

1342

those asked for but not available) are returned as (revid, None).

1343

"""

1344

if revision_ids is None:

1345

revision_ids = self.all_revision_ids()

1346

else:

1347

for rev_id in revision_ids:

1348

if not rev_id or not isinstance(rev_id, basestring):

1349

raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)

1350

keys = [(key,) for key in revision_ids]

1351

stream = self.revisions.get_record_stream(keys, 'unordered', True)

1352

for record in stream:

1353

revid = record.key[0]

1354

if record.storage_kind == 'absent':

1355

yield (revid, None)

1356

else:

1357

text = record.get_bytes_as('fulltext')

1358

rev = self._serializer.read_revision_from_string(text)

1359

yield (revid, rev)

1360

1361

@needs_write_lock

1362

def add_signature_text(self, revision_id, signature):

1363

"""Store a signature text for a revision.

1364

1365

:param revision_id: Revision id of the revision

1366

:param signature: Signature text.

1367

"""

1368

self.signatures.add_lines((revision_id,), (),

1369

osutils.split_lines(signature))

1370

1371

def find_text_key_references(self):

1372

"""Find the text key references within the repository.

1373

1374

:return: A dictionary mapping text keys ((fileid, revision_id) tuples)

1375

to whether they were referred to by the inventory of the

1376

revision_id that they contain. The inventory texts from all present

1377

revision ids are assessed to generate this report.

1378

"""

1379

revision_keys = self.revisions.keys()

1380

w = self.inventories

1381

pb = ui.ui_factory.nested_progress_bar()

1382

try:

1383

return self._serializer._find_text_key_references(

1384

w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))

1385

finally:

1386

pb.finished()

1387

1388

def _inventory_xml_lines_for_keys(self, keys):

1389

"""Get a line iterator of the sort needed for findind references.

1390

1391

Not relevant for non-xml inventory repositories.

1392

1393

Ghosts in revision_keys are ignored.

1394

1395

:param revision_keys: The revision keys for the inventories to inspect.

1396

:return: An iterator over (inventory line, revid) for the fulltexts of

1397

all of the xml inventories specified by revision_keys.

1398

"""

1399

stream = self.inventories.get_record_stream(keys, 'unordered', True)

1400

for record in stream:

1401

if record.storage_kind != 'absent':

1402

chunks = record.get_bytes_as('chunked')

1403

revid = record.key[-1]

1404

lines = osutils.chunks_to_lines(chunks)

1405

for line in lines:

1406

yield line, revid

1407

1408

def _find_file_ids_from_xml_inventory_lines(self, line_iterator,

1409

revision_keys):

1410

"""Helper routine for fileids_altered_by_revision_ids.

1411

1412

This performs the translation of xml lines to revision ids.

1413

1414

:param line_iterator: An iterator of lines, origin_version_id

1415

:param revision_keys: The revision ids to filter for. This should be a

1416

set or other type which supports efficient __contains__ lookups, as

1417

the revision key from each parsed line will be looked up in the

1418

revision_keys filter.

1419

:return: a dictionary mapping altered file-ids to an iterable of

1420

revision_ids. Each altered file-ids has the exact revision_ids that

1421

altered it listed explicitly.

1422

"""

1423

seen = set(self._serializer._find_text_key_references(

1424

line_iterator).iterkeys())

1425

parent_keys = self._find_parent_keys_of_revisions(revision_keys)

1426

parent_seen = set(self._serializer._find_text_key_references(

1427

self._inventory_xml_lines_for_keys(parent_keys)))

1428

new_keys = seen - parent_seen

1429

result = {}

1430

setdefault = result.setdefault

1431

for key in new_keys:

1432

setdefault(key[0], set()).add(key[-1])

1433

return result

1434

1435

def _find_parent_keys_of_revisions(self, revision_keys):

1436

"""Similar to _find_parent_ids_of_revisions, but used with keys.

1437

1438

:param revision_keys: An iterable of revision_keys.

1439

:return: The parents of all revision_keys that are not already in

1440

revision_keys

1441

"""

1442

parent_map = self.revisions.get_parent_map(revision_keys)

1443

parent_keys = set()

1444

map(parent_keys.update, parent_map.itervalues())

1445

parent_keys.difference_update(revision_keys)

1446

parent_keys.discard(_mod_revision.NULL_REVISION)

1447

return parent_keys

1448

1449

def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):

1450

"""Find the file ids and versions affected by revisions.

1451

1452

:param revisions: an iterable containing revision ids.

1453

:param _inv_weave: The inventory weave from this repository or None.

1454

If None, the inventory weave will be opened automatically.

1455

:return: a dictionary mapping altered file-ids to an iterable of

1456

revision_ids. Each altered file-ids has the exact revision_ids that

1457

altered it listed explicitly.

1458

"""

1459

selected_keys = set((revid,) for revid in revision_ids)

1460

w = _inv_weave or self.inventories

1461

return self._find_file_ids_from_xml_inventory_lines(

1462

w.iter_lines_added_or_present_in_keys(

1463

selected_keys, pb=None),

1464

selected_keys)

1465

1466

def iter_files_bytes(self, desired_files):

1467

"""Iterate through file versions.

1468

1469

Files will not necessarily be returned in the order they occur in

1470

desired_files. No specific order is guaranteed.

1471

1472

Yields pairs of identifier, bytes_iterator. identifier is an opaque

1473

value supplied by the caller as part of desired_files. It should

1474

uniquely identify the file version in the caller's context. (Examples:

1475

an index number or a TreeTransform trans_id.)

1476

1477

bytes_iterator is an iterable of bytestrings for the file. The

1478

kind of iterable and length of the bytestrings are unspecified, but for

1479

this implementation, it is a list of bytes produced by

1480

VersionedFile.get_record_stream().

1481

1482

:param desired_files: a list of (file_id, revision_id, identifier)

1483

triples

1484

"""

1485

text_keys = {}

1486

for file_id, revision_id, callable_data in desired_files:

1487

text_keys[(file_id, revision_id)] = callable_data

1488

for record in self.texts.get_record_stream(text_keys, 'unordered', True):

1489

if record.storage_kind == 'absent':

1490

raise errors.RevisionNotPresent(record.key, self)

1491

yield text_keys[record.key], record.get_bytes_as('chunked')

1492

1493

def _generate_text_key_index(self, text_key_references=None,

1494

ancestors=None):

1495

"""Generate a new text key index for the repository.

1496

1497

This is an expensive function that will take considerable time to run.

1498

1499

:return: A dict mapping text keys ((file_id, revision_id) tuples) to a

1500

list of parents, also text keys. When a given key has no parents,

1501

the parents list will be [NULL_REVISION].

1502

"""

1503

# All revisions, to find inventory parents.

1504

if ancestors is None:

1505

graph = self.get_graph()

1506

ancestors = graph.get_parent_map(self.all_revision_ids())

1507

if text_key_references is None:

1508

text_key_references = self.find_text_key_references()

1509

pb = ui.ui_factory.nested_progress_bar()

1510

try:

1511

return self._do_generate_text_key_index(ancestors,

1512

text_key_references, pb)

1513

finally:

1514

pb.finished()

1515

1516

def _do_generate_text_key_index(self, ancestors, text_key_references, pb):

1517

"""Helper for _generate_text_key_index to avoid deep nesting."""

1518

revision_order = tsort.topo_sort(ancestors)

1519

invalid_keys = set()

1520

revision_keys = {}

1521

for revision_id in revision_order:

1522

revision_keys[revision_id] = set()

1523

text_count = len(text_key_references)

1524

# a cache of the text keys to allow reuse; costs a dict of all the

1525

# keys, but saves a 2-tuple for every child of a given key.

1526

text_key_cache = {}

1527

for text_key, valid in text_key_references.iteritems():

1528

if not valid:

1529

invalid_keys.add(text_key)

1530

else:

1531

revision_keys[text_key[1]].add(text_key)

1532

text_key_cache[text_key] = text_key

1533

del text_key_references

1534

text_index = {}

1535

text_graph = graph.Graph(graph.DictParentsProvider(text_index))

1536

NULL_REVISION = _mod_revision.NULL_REVISION

1537

# Set a cache with a size of 10 - this suffices for bzr.dev but may be

1538

# too small for large or very branchy trees. However, for 55K path

1539

# trees, it would be easy to use too much memory trivially. Ideally we

1540

# could gauge this by looking at available real memory etc, but this is

1541

# always a tricky proposition.

1542

inventory_cache = lru_cache.LRUCache(10)

1543

batch_size = 10 # should be ~150MB on a 55K path tree

1544

batch_count = len(revision_order) / batch_size + 1

1545

processed_texts = 0

1546

pb.update("Calculating text parents", processed_texts, text_count)

1547

for offset in xrange(batch_count):

1548

to_query = revision_order[offset * batch_size:(offset + 1) *

1549

batch_size]

1550

if not to_query:

1551

break

1552

for revision_id in to_query:

1553

parent_ids = ancestors[revision_id]

1554

for text_key in revision_keys[revision_id]:

1555

pb.update("Calculating text parents", processed_texts)

1556

processed_texts += 1

1557

candidate_parents = []

1558

for parent_id in parent_ids:

1559

parent_text_key = (text_key[0], parent_id)

1560

try:

1561

check_parent = parent_text_key not in \

1562

revision_keys[parent_id]

1563

except KeyError:

1564

# the parent parent_id is a ghost:

1565

check_parent = False

1566

# truncate the derived graph against this ghost.

1567

parent_text_key = None

1568

if check_parent:

1569

# look at the parent commit details inventories to

1570

# determine possible candidates in the per file graph.

1571

# TODO: cache here.

1572

try:

1573

inv = inventory_cache[parent_id]

1574

except KeyError:

1575

inv = self.revision_tree(parent_id).inventory

1576

inventory_cache[parent_id] = inv

1577

try:

1578

parent_entry = inv[text_key[0]]

1579

except (KeyError, errors.NoSuchId):

1580

parent_entry = None

1581

if parent_entry is not None:

1582

parent_text_key = (

1583

text_key[0], parent_entry.revision)

1584

else:

1585

parent_text_key = None

1586

if parent_text_key is not None:

1587

candidate_parents.append(

1588

text_key_cache[parent_text_key])

1589

parent_heads = text_graph.heads(candidate_parents)

1590

new_parents = list(parent_heads)

1591

new_parents.sort(key=lambda x:candidate_parents.index(x))

1592

if new_parents == []:

1593

new_parents = [NULL_REVISION]

1594

text_index[text_key] = new_parents

1595

1596

for text_key in invalid_keys:

1597

text_index[text_key] = [NULL_REVISION]

1598

return text_index

1599

1600

def item_keys_introduced_by(self, revision_ids, _files_pb=None):

1601

"""Get an iterable listing the keys of all the data introduced by a set

1602

of revision IDs.

1603

1604

The keys will be ordered so that the corresponding items can be safely

1605

fetched and inserted in that order.

1606

1607

:returns: An iterable producing tuples of (knit-kind, file-id,

1608

versions). knit-kind is one of 'file', 'inventory', 'signatures',

1609

'revisions'. file-id is None unless knit-kind is 'file'.

1610

"""

1611

for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):

1612

yield result

1613

del _files_pb

1614

for result in self._find_non_file_keys_to_fetch(revision_ids):

1615

yield result

1616

1617

def _find_file_keys_to_fetch(self, revision_ids, pb):

1618

# XXX: it's a bit weird to control the inventory weave caching in this

1619

# generator. Ideally the caching would be done in fetch.py I think. Or

1620

# maybe this generator should explicitly have the contract that it

1621

# should not be iterated until the previously yielded item has been

1622

# processed?

1623

inv_w = self.inventories

1624

1625

# file ids that changed

1626

file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)

1627

count = 0

1628

num_file_ids = len(file_ids)

1629

for file_id, altered_versions in file_ids.iteritems():

1630

if pb is not None:

1631

pb.update("Fetch texts", count, num_file_ids)

1632

count += 1

1633

yield ("file", file_id, altered_versions)

1634

1635

def _find_non_file_keys_to_fetch(self, revision_ids):

1636

# inventory

1637

yield ("inventory", None, revision_ids)

1638

1639

# signatures

1640

# XXX: Note ATM no callers actually pay attention to this return

1641

# instead they just use the list of revision ids and ignore

1642

# missing sigs. Consider removing this work entirely

1643

revisions_with_signatures = set(self.signatures.get_parent_map(

1644

[(r,) for r in revision_ids]))

1645

revisions_with_signatures = set(

1646

[r for (r,) in revisions_with_signatures])

1647

revisions_with_signatures.intersection_update(revision_ids)

1648

yield ("signatures", None, revisions_with_signatures)

1649

1650

# revisions

1651

yield ("revisions", None, revision_ids)

1652

1653

@needs_read_lock

1654

def get_inventory(self, revision_id):

1655

"""Get Inventory object by revision id."""

1656

return self.iter_inventories([revision_id]).next()

1657

1658

def iter_inventories(self, revision_ids, ordering=None):

1659

"""Get many inventories by revision_ids.

1660

1661

This will buffer some or all of the texts used in constructing the

1662

inventories in memory, but will only parse a single inventory at a

1663

time.

1664

1665

:param revision_ids: The expected revision ids of the inventories.

1666

:param ordering: optional ordering, e.g. 'topological'. If not

1667

specified, the order of revision_ids will be preserved (by

1668

buffering if necessary).

1669

:return: An iterator of inventories.

1670

"""

1671

if ((None in revision_ids)

1672

or (_mod_revision.NULL_REVISION in revision_ids)):

1673

raise ValueError('cannot get null revision inventory')

1674

return self._iter_inventories(revision_ids, ordering)

1675

1676

def _iter_inventories(self, revision_ids, ordering):

1677

"""single-document based inventory iteration."""

1678

inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)

1679

for text, revision_id in inv_xmls:

1680

yield self._deserialise_inventory(revision_id, text)

1681

1682

def _iter_inventory_xmls(self, revision_ids, ordering):

1683

if ordering is None:

1684

order_as_requested = True

1685

ordering = 'unordered'

1686

else:

1687

order_as_requested = False

1688

keys = [(revision_id,) for revision_id in revision_ids]

1689

if not keys:

1690

return

1691

if order_as_requested:

1692

key_iter = iter(keys)

1693

next_key = key_iter.next()

1694

stream = self.inventories.get_record_stream(keys, ordering, True)

1695

text_chunks = {}

1696

for record in stream:

1697

if record.storage_kind != 'absent':

1698

chunks = record.get_bytes_as('chunked')

1699

if order_as_requested:

1700

text_chunks[record.key] = chunks

1701

else:

1702

yield ''.join(chunks), record.key[-1]

1703

else:

1704

raise errors.NoSuchRevision(self, record.key)

1705

if order_as_requested:

1706

# Yield as many results as we can while preserving order.

1707

while next_key in text_chunks:

1708

chunks = text_chunks.pop(next_key)

1709

yield ''.join(chunks), next_key[-1]

1710

try:

1711

next_key = key_iter.next()

1712

except StopIteration:

1713

# We still want to fully consume the get_record_stream,

1714

# just in case it is not actually finished at this point

1715

next_key = None

1716

break

1717

1718

def _deserialise_inventory(self, revision_id, xml):

1719

"""Transform the xml into an inventory object.

1720

1721

:param revision_id: The expected revision id of the inventory.

1722

:param xml: A serialised inventory.

1723

"""

1724

result = self._serializer.read_inventory_from_string(xml, revision_id,

1725

entry_cache=self._inventory_entry_cache,

1726

return_from_cache=self._safe_to_return_from_cache)

1727

if result.revision_id != revision_id:

1728

raise AssertionError('revision id mismatch %s != %s' % (

1729

result.revision_id, revision_id))

1730

return result

1731

1732

def get_serializer_format(self):

1733

return self._serializer.format_num

1734

1735

@needs_read_lock

1736

def _get_inventory_xml(self, revision_id):

1737

"""Get serialized inventory as a string."""

1738

texts = self._iter_inventory_xmls([revision_id], 'unordered')

1739

try:

1740

text, revision_id = texts.next()

1741

except StopIteration:

1742

raise errors.HistoryMissing(self, 'inventory', revision_id)

1743

return text

1744

1745

@needs_read_lock

1746

def revision_tree(self, revision_id):

1747

"""Return Tree for a revision on this branch.

1748

1749

`revision_id` may be NULL_REVISION for the empty tree revision.

1750

"""

1751

revision_id = _mod_revision.ensure_null(revision_id)

1752

# TODO: refactor this to use an existing revision object

1753

# so we don't need to read it in twice.

1754

if revision_id == _mod_revision.NULL_REVISION:

1755

return InventoryRevisionTree(self,

1756

Inventory(root_id=None), _mod_revision.NULL_REVISION)

1757

else:

1758

inv = self.get_inventory(revision_id)

1759

return InventoryRevisionTree(self, inv, revision_id)

1760

1761

def revision_trees(self, revision_ids):

1762

"""Return Trees for revisions in this repository.

1763

1764

:param revision_ids: a sequence of revision-ids;

1765

a revision-id may not be None or 'null:'

1766

"""

1767

inventories = self.iter_inventories(revision_ids)

1768

for inv in inventories:

1769

yield InventoryRevisionTree(self, inv, inv.revision_id)

1770

1771

def _filtered_revision_trees(self, revision_ids, file_ids):

1772

"""Return Tree for a revision on this branch with only some files.

1773

1774

:param revision_ids: a sequence of revision-ids;

1775

a revision-id may not be None or 'null:'

1776

:param file_ids: if not None, the result is filtered

1777

so that only those file-ids, their parents and their

1778

children are included.

1779

"""

1780

inventories = self.iter_inventories(revision_ids)

1781

for inv in inventories:

1782

# Should we introduce a FilteredRevisionTree class rather

1783

# than pre-filter the inventory here?

1784

filtered_inv = inv.filter(file_ids)

1785

yield InventoryRevisionTree(self, filtered_inv, filtered_inv.revision_id)

1786

1787

def get_parent_map(self, revision_ids):

1788

"""See graph.StackedParentsProvider.get_parent_map"""

1789

# revisions index works in keys; this just works in revisions

1790

# therefore wrap and unwrap

1791

query_keys = []

1792

result = {}

1793

for revision_id in revision_ids:

1794

if revision_id == _mod_revision.NULL_REVISION:

1795

result[revision_id] = ()

1796

elif revision_id is None:

1797

raise ValueError('get_parent_map(None) is not valid')

1798

else:

1799

query_keys.append((revision_id ,))

1800

for ((revision_id,), parent_keys) in \

1801

self.revisions.get_parent_map(query_keys).iteritems():

1802

if parent_keys:

1803

result[revision_id] = tuple([parent_revid

1804

for (parent_revid,) in parent_keys])

1805

else:

1806

result[revision_id] = (_mod_revision.NULL_REVISION,)

1807

return result

1808

1809

@needs_read_lock

1810

def get_known_graph_ancestry(self, revision_ids):

1811

"""Return the known graph for a set of revision ids and their ancestors.

1812

"""

1813

st = static_tuple.StaticTuple

1814

revision_keys = [st(r_id).intern() for r_id in revision_ids]

1815

known_graph = self.revisions.get_known_graph_ancestry(revision_keys)

1816

return graph.GraphThunkIdsToKeys(known_graph)

1817

1818

def _get_versioned_file_checker(self, text_key_references=None,

1819

ancestors=None):

1820

"""Return an object suitable for checking versioned files.

1821

1822

:param text_key_references: if non-None, an already built

1823

dictionary mapping text keys ((fileid, revision_id) tuples)

1824

to whether they were referred to by the inventory of the

1825

revision_id that they contain. If None, this will be

1826

calculated.

1827

:param ancestors: Optional result from

1828

self.get_graph().get_parent_map(self.all_revision_ids()) if already

1829

available.

1830

"""

1831

return _VersionedFileChecker(self,

1832

text_key_references=text_key_references, ancestors=ancestors)

1833

1834

@needs_read_lock

1835

def has_signature_for_revision_id(self, revision_id):

1836

"""Query for a revision signature for revision_id in the repository."""

1837

if not self.has_revision(revision_id):

1838

raise errors.NoSuchRevision(self, revision_id)

1839

sig_present = (1 == len(

1840

self.signatures.get_parent_map([(revision_id,)])))

1841

return sig_present

1842

1843

@needs_read_lock

1844

def get_signature_text(self, revision_id):

1845

"""Return the text for a signature."""

1846

stream = self.signatures.get_record_stream([(revision_id,)],

1847

'unordered', True)

1848

record = stream.next()

1849

if record.storage_kind == 'absent':

1850

raise errors.NoSuchRevision(self, revision_id)

1851

return record.get_bytes_as('fulltext')

1852

1853

def _find_inconsistent_revision_parents(self, revisions_iterator=None):

1854

"""Find revisions with different parent lists in the revision object

1855

and in the index graph.

1856

1857

:param revisions_iterator: None, or an iterator of (revid,

1858

Revision-or-None). This iterator controls the revisions checked.

1859

:returns: an iterator yielding tuples of (revison-id, parents-in-index,

1860

parents-in-revision).

1861

"""

1862

if not self.is_locked():

1863

raise AssertionError()

1864

vf = self.revisions

1865

if revisions_iterator is None:

1866

revisions_iterator = self._iter_revisions(None)

1867

for revid, revision in revisions_iterator:

1868

if revision is None:

1869

pass

1870

parent_map = vf.get_parent_map([(revid,)])

1871

parents_according_to_index = tuple(parent[-1] for parent in

1872

parent_map[(revid,)])

1873

parents_according_to_revision = tuple(revision.parent_ids)

1874

if parents_according_to_index != parents_according_to_revision:

1875

yield (revid, parents_according_to_index,

1876

parents_according_to_revision)

1877

1878

def _check_for_inconsistent_revision_parents(self):

1879

inconsistencies = list(self._find_inconsistent_revision_parents())

1880

if inconsistencies:

1881

raise errors.BzrCheckError(

1882

"Revision knit has inconsistent parents.")

1883

1884

def _get_sink(self):

1885

"""Return a sink for streaming into this repository."""

1886

return StreamSink(self)

1887

1888

def _get_source(self, to_format):

1889

"""Return a source for streaming from this repository."""

1890

return StreamSource(self, to_format)

1891

1892

1893

class MetaDirVersionedFileRepository(MetaDirRepository,

1894

VersionedFileRepository):

1895

"""Repositories in a meta-dir, that work via versioned file objects."""

1896

1897

def __init__(self, _format, a_bzrdir, control_files):

1898

super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir,

1899

control_files)

1900

1901

1902

class MetaDirVersionedFileRepositoryFormat(MetaDirRepositoryFormat,

1903

VersionedFileRepositoryFormat):

1904

"""Base class for repository formats using versioned files in metadirs."""

1905

1906

1907

class StreamSink(object):

1908

"""An object that can insert a stream into a repository.

1909

1910

This interface handles the complexity of reserialising inventories and

1911

revisions from different formats, and allows unidirectional insertion into

1912

stacked repositories without looking for the missing basis parents

1913

beforehand.

1914

"""

1915

1916

def __init__(self, target_repo):

1917

self.target_repo = target_repo

1918

1919

def insert_stream(self, stream, src_format, resume_tokens):

1920

"""Insert a stream's content into the target repository.

1921

1922

:param src_format: a bzr repository format.

1923

1924

:return: a list of resume tokens and an iterable of keys additional

1925

items required before the insertion can be completed.

1926

"""

1927

self.target_repo.lock_write()

1928

try:

1929

if resume_tokens:

1930

self.target_repo.resume_write_group(resume_tokens)

1931

is_resume = True

1932

else:

1933

self.target_repo.start_write_group()

1934

is_resume = False

1935

try:

1936

# locked_insert_stream performs a commit|suspend.

1937

missing_keys = self.insert_stream_without_locking(stream,

1938

src_format, is_resume)

1939

if missing_keys:

1940

# suspend the write group and tell the caller what we is

1941

# missing. We know we can suspend or else we would not have

1942

# entered this code path. (All repositories that can handle

1943

# missing keys can handle suspending a write group).

1944

write_group_tokens = self.target_repo.suspend_write_group()

1945

return write_group_tokens, missing_keys

1946

hint = self.target_repo.commit_write_group()

1947

to_serializer = self.target_repo._format._serializer

1948

src_serializer = src_format._serializer

1949

if (to_serializer != src_serializer and

1950

self.target_repo._format.pack_compresses):

1951

self.target_repo.pack(hint=hint)

1952

return [], set()

1953

except:

1954

self.target_repo.abort_write_group(suppress_errors=True)

1955

raise

1956

finally:

1957

self.target_repo.unlock()

1958

1959

def insert_stream_without_locking(self, stream, src_format,

1960

is_resume=False):

1961

"""Insert a stream's content into the target repository.

1962

1963

This assumes that you already have a locked repository and an active

1964

write group.

1965

1966

:param src_format: a bzr repository format.

1967

:param is_resume: Passed down to get_missing_parent_inventories to

1968

indicate if we should be checking for missing texts at the same

1969

time.

1970

1971

:return: A set of keys that are missing.

1972

"""

1973

if not self.target_repo.is_write_locked():

1974

raise errors.ObjectNotLocked(self)

1975

if not self.target_repo.is_in_write_group():

1976

raise errors.BzrError('you must already be in a write group')

1977

to_serializer = self.target_repo._format._serializer

1978

src_serializer = src_format._serializer

1979

new_pack = None

1980

if to_serializer == src_serializer:

1981

# If serializers match and the target is a pack repository, set the

1982

# write cache size on the new pack. This avoids poor performance

1983

# on transports where append is unbuffered (such as

1984

# RemoteTransport). This is safe to do because nothing should read

1985

# back from the target repository while a stream with matching

1986

# serialization is being inserted.

1987

# The exception is that a delta record from the source that should

1988

# be a fulltext may need to be expanded by the target (see

1989

# test_fetch_revisions_with_deltas_into_pack); but we take care to

1990

# explicitly flush any buffered writes first in that rare case.

1991

try:

1992

new_pack = self.target_repo._pack_collection._new_pack

1993

except AttributeError:

1994

# Not a pack repository

1995

pass

1996

else:

1997

new_pack.set_write_cache_size(1024*1024)

1998

for substream_type, substream in stream:

1999

if 'stream' in debug.debug_flags:

2000

mutter('inserting substream: %s', substream_type)

2001

if substream_type == 'texts':

2002

self.target_repo.texts.insert_record_stream(substream)

2003

elif substream_type == 'inventories':

2004

if src_serializer == to_serializer:

2005

self.target_repo.inventories.insert_record_stream(

2006

substream)

2007

else:

2008

self._extract_and_insert_inventories(

2009

substream, src_serializer)

2010

elif substream_type == 'inventory-deltas':

2011

self._extract_and_insert_inventory_deltas(

2012

substream, src_serializer)

2013

elif substream_type == 'chk_bytes':

2014

# XXX: This doesn't support conversions, as it assumes the

2015

# conversion was done in the fetch code.

2016

self.target_repo.chk_bytes.insert_record_stream(substream)

2017

elif substream_type == 'revisions':

2018

# This may fallback to extract-and-insert more often than

2019

# required if the serializers are different only in terms of

2020

# the inventory.

2021

if src_serializer == to_serializer:

2022

self.target_repo.revisions.insert_record_stream(substream)

2023

else:

2024

self._extract_and_insert_revisions(substream,

2025

src_serializer)

2026

elif substream_type == 'signatures':

2027

self.target_repo.signatures.insert_record_stream(substream)

2028

else:

2029

raise AssertionError('kaboom! %s' % (substream_type,))

2030

# Done inserting data, and the missing_keys calculations will try to

2031

# read back from the inserted data, so flush the writes to the new pack

2032

# (if this is pack format).

2033

if new_pack is not None:

2034

new_pack._write_data('', flush=True)

2035

# Find all the new revisions (including ones from resume_tokens)

2036

missing_keys = self.target_repo.get_missing_parent_inventories(

2037

check_for_missing_texts=is_resume)

2038

try:

2039

for prefix, versioned_file in (

2040

('texts', self.target_repo.texts),

2041

('inventories', self.target_repo.inventories),

2042

('revisions', self.target_repo.revisions),

2043

('signatures', self.target_repo.signatures),

2044

('chk_bytes', self.target_repo.chk_bytes),

2045

):

2046

if versioned_file is None:

2047

continue

2048

# TODO: key is often going to be a StaticTuple object

2049

# I don't believe we can define a method by which

2050

# (prefix,) + StaticTuple will work, though we could

2051

# define a StaticTuple.sq_concat that would allow you to

2052

# pass in either a tuple or a StaticTuple as the second

2053

# object, so instead we could have:

2054

# StaticTuple(prefix) + key here...

2055

missing_keys.update((prefix,) + key for key in

2056

versioned_file.get_missing_compression_parent_keys())

2057

except NotImplementedError:

2058

# cannot even attempt suspending, and missing would have failed

2059

# during stream insertion.

2060

missing_keys = set()

2061

return missing_keys

2062

2063

def _extract_and_insert_inventory_deltas(self, substream, serializer):

2064

target_rich_root = self.target_repo._format.rich_root_data

2065

target_tree_refs = self.target_repo._format.supports_tree_reference

2066

for record in substream:

2067

# Insert the delta directly

2068

inventory_delta_bytes = record.get_bytes_as('fulltext')

2069

deserialiser = inventory_delta.InventoryDeltaDeserializer()

2070

try:

2071

parse_result = deserialiser.parse_text_bytes(

2072

inventory_delta_bytes)

2073

except inventory_delta.IncompatibleInventoryDelta, err:

2074

mutter("Incompatible delta: %s", err.msg)

2075

raise errors.IncompatibleRevision(self.target_repo._format)

2076

basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result

2077

revision_id = new_id

2078

parents = [key[0] for key in record.parents]

2079

self.target_repo.add_inventory_by_delta(

2080

basis_id, inv_delta, revision_id, parents)

2081

2082

def _extract_and_insert_inventories(self, substream, serializer,

2083

parse_delta=None):

2084

"""Generate a new inventory versionedfile in target, converting data.

2085

2086

The inventory is retrieved from the source, (deserializing it), and

2087

stored in the target (reserializing it in a different format).

2088

"""

2089

target_rich_root = self.target_repo._format.rich_root_data

2090

target_tree_refs = self.target_repo._format.supports_tree_reference

2091

for record in substream:

2092

# It's not a delta, so it must be a fulltext in the source

2093

# serializer's format.

2094

bytes = record.get_bytes_as('fulltext')

2095

revision_id = record.key[0]

2096

inv = serializer.read_inventory_from_string(bytes, revision_id)

2097

parents = [key[0] for key in record.parents]

2098

self.target_repo.add_inventory(revision_id, inv, parents)

2099

# No need to keep holding this full inv in memory when the rest of

2100

# the substream is likely to be all deltas.

2101

del inv

2102

2103

def _extract_and_insert_revisions(self, substream, serializer):

2104

for record in substream:

2105

bytes = record.get_bytes_as('fulltext')

2106

revision_id = record.key[0]

2107

rev = serializer.read_revision_from_string(bytes)

2108

if rev.revision_id != revision_id:

2109

raise AssertionError('wtf: %s != %s' % (rev, revision_id))

2110

self.target_repo.add_revision(revision_id, rev)

2111

2112

def finished(self):

2113

if self.target_repo._format._fetch_reconcile:

2114

self.target_repo.reconcile()

2115

2116

2117

class StreamSource(object):

2118

"""A source of a stream for fetching between repositories."""

2119

2120

def __init__(self, from_repository, to_format):

2121

"""Create a StreamSource streaming from from_repository."""

2122

self.from_repository = from_repository

2123

self.to_format = to_format

2124

self._record_counter = RecordCounter()

2125

2126

def delta_on_metadata(self):

2127

"""Return True if delta's are permitted on metadata streams.

2128

2129

That is on revisions and signatures.

2130

"""

2131

src_serializer = self.from_repository._format._serializer

2132

target_serializer = self.to_format._serializer

2133

return (self.to_format._fetch_uses_deltas and

2134

src_serializer == target_serializer)

2135

2136

def _fetch_revision_texts(self, revs):

2137

# fetch signatures first and then the revision texts

2138

# may need to be a InterRevisionStore call here.

2139

from_sf = self.from_repository.signatures

2140

# A missing signature is just skipped.

2141

keys = [(rev_id,) for rev_id in revs]

2142

signatures = versionedfile.filter_absent(from_sf.get_record_stream(

2143

keys,

2144

self.to_format._fetch_order,

2145

not self.to_format._fetch_uses_deltas))

2146

# If a revision has a delta, this is actually expanded inside the

2147

# insert_record_stream code now, which is an alternate fix for

2148

# bug #261339

2149

from_rf = self.from_repository.revisions

2150

revisions = from_rf.get_record_stream(

2151

keys,

2152

self.to_format._fetch_order,

2153

not self.delta_on_metadata())

2154

return [('signatures', signatures), ('revisions', revisions)]

2155

2156

def _generate_root_texts(self, revs):

2157

"""This will be called by get_stream between fetching weave texts and

2158

fetching the inventory weave.

2159

"""

2160

if self._rich_root_upgrade():

2161

return _mod_fetch.Inter1and2Helper(

2162

self.from_repository).generate_root_texts(revs)

2163

else:

2164

return []

2165

2166

def get_stream(self, search):

2167

phase = 'file'

2168

revs = search.get_keys()

2169

graph = self.from_repository.get_graph()

2170

revs = tsort.topo_sort(graph.get_parent_map(revs))

2171

data_to_fetch = self.from_repository.item_keys_introduced_by(revs)

2172

text_keys = []

2173

for knit_kind, file_id, revisions in data_to_fetch:

2174

if knit_kind != phase:

2175

phase = knit_kind

2176

# Make a new progress bar for this phase

2177

if knit_kind == "file":

2178

# Accumulate file texts

2179

text_keys.extend([(file_id, revision) for revision in

2180

revisions])

2181

elif knit_kind == "inventory":

2182

# Now copy the file texts.

2183

from_texts = self.from_repository.texts

2184

yield ('texts', from_texts.get_record_stream(

2185

text_keys, self.to_format._fetch_order,

2186

not self.to_format._fetch_uses_deltas))

2187

# Cause an error if a text occurs after we have done the

2188

# copy.

2189

text_keys = None

2190

# Before we process the inventory we generate the root

2191

# texts (if necessary) so that the inventories references

2192

# will be valid.

2193

for _ in self._generate_root_texts(revs):

2194

yield _

2195

# we fetch only the referenced inventories because we do not

2196

# know for unselected inventories whether all their required

2197

# texts are present in the other repository - it could be

2198

# corrupt.

2199

for info in self._get_inventory_stream(revs):

2200

yield info

2201

elif knit_kind == "signatures":

2202

# Nothing to do here; this will be taken care of when

2203

# _fetch_revision_texts happens.

2204

pass

2205

elif knit_kind == "revisions":

2206

for record in self._fetch_revision_texts(revs):

2207

yield record

2208

else:

2209

raise AssertionError("Unknown knit kind %r" % knit_kind)

2210

2211

def get_stream_for_missing_keys(self, missing_keys):

2212

# missing keys can only occur when we are byte copying and not

2213

# translating (because translation means we don't send

2214

# unreconstructable deltas ever).

2215

keys = {}

2216

keys['texts'] = set()

2217

keys['revisions'] = set()

2218

keys['inventories'] = set()

2219

keys['chk_bytes'] = set()

2220

keys['signatures'] = set()

2221

for key in missing_keys:

2222

keys[key[0]].add(key[1:])

2223

if len(keys['revisions']):

2224

# If we allowed copying revisions at this point, we could end up

2225

# copying a revision without copying its required texts: a

2226

# violation of the requirements for repository integrity.

2227

raise AssertionError(

2228

'cannot copy revisions to fill in missing deltas %s' % (

2229

keys['revisions'],))

2230

for substream_kind, keys in keys.iteritems():

2231

vf = getattr(self.from_repository, substream_kind)

2232

if vf is None and keys:

2233

raise AssertionError(

2234

"cannot fill in keys for a versioned file we don't"

2235

" have: %s needs %s" % (substream_kind, keys))

2236

if not keys:

2237

# No need to stream something we don't have

2238

continue

2239

if substream_kind == 'inventories':

2240

# Some missing keys are genuinely ghosts, filter those out.

2241

present = self.from_repository.inventories.get_parent_map(keys)

2242

revs = [key[0] for key in present]

2243

# Get the inventory stream more-or-less as we do for the

2244

# original stream; there's no reason to assume that records

2245

# direct from the source will be suitable for the sink. (Think

2246

# e.g. 2a -> 1.9-rich-root).

2247

for info in self._get_inventory_stream(revs, missing=True):

2248

yield info

2249

continue

2250

2251

# Ask for full texts always so that we don't need more round trips

2252

# after this stream.

2253

# Some of the missing keys are genuinely ghosts, so filter absent

2254

# records. The Sink is responsible for doing another check to

2255

# ensure that ghosts don't introduce missing data for future

2256

# fetches.

2257

stream = versionedfile.filter_absent(vf.get_record_stream(keys,

2258

self.to_format._fetch_order, True))

2259

yield substream_kind, stream

2260

2261

def inventory_fetch_order(self):

2262

if self._rich_root_upgrade():

2263

return 'topological'

2264

else:

2265

return self.to_format._fetch_order

2266

2267

def _rich_root_upgrade(self):

2268

return (not self.from_repository._format.rich_root_data and

2269

self.to_format.rich_root_data)

2270

2271

def _get_inventory_stream(self, revision_ids, missing=False):

2272

from_format = self.from_repository._format

2273

if (from_format.supports_chks and self.to_format.supports_chks and

2274

from_format.network_name() == self.to_format.network_name()):

2275

raise AssertionError(

2276

"this case should be handled by GroupCHKStreamSource")

2277

elif 'forceinvdeltas' in debug.debug_flags:

2278

return self._get_convertable_inventory_stream(revision_ids,

2279

delta_versus_null=missing)

2280

elif from_format.network_name() == self.to_format.network_name():

2281

# Same format.

2282

return self._get_simple_inventory_stream(revision_ids,

2283

missing=missing)

2284

elif (not from_format.supports_chks and not self.to_format.supports_chks

2285

and from_format._serializer == self.to_format._serializer):

2286

# Essentially the same format.

2287

return self._get_simple_inventory_stream(revision_ids,

2288

missing=missing)

2289

else:

2290

# Any time we switch serializations, we want to use an

2291

# inventory-delta based approach.

2292

return self._get_convertable_inventory_stream(revision_ids,

2293

delta_versus_null=missing)

2294

2295

def _get_simple_inventory_stream(self, revision_ids, missing=False):

2296

# NB: This currently reopens the inventory weave in source;

2297

# using a single stream interface instead would avoid this.

2298

from_weave = self.from_repository.inventories

2299

if missing:

2300

delta_closure = True

2301

else:

2302

delta_closure = not self.delta_on_metadata()

2303

yield ('inventories', from_weave.get_record_stream(

2304

[(rev_id,) for rev_id in revision_ids],

2305

self.inventory_fetch_order(), delta_closure))

2306

2307

def _get_convertable_inventory_stream(self, revision_ids,

2308

delta_versus_null=False):

2309

# The two formats are sufficiently different that there is no fast

2310

# path, so we need to send just inventorydeltas, which any

2311

# sufficiently modern client can insert into any repository.

2312

# The StreamSink code expects to be able to

2313

# convert on the target, so we need to put bytes-on-the-wire that can

2314

# be converted. That means inventory deltas (if the remote is <1.19,

2315

# RemoteStreamSink will fallback to VFS to insert the deltas).

2316

yield ('inventory-deltas',

2317

self._stream_invs_as_deltas(revision_ids,

2318

delta_versus_null=delta_versus_null))

2319

2320

def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):

2321

"""Return a stream of inventory-deltas for the given rev ids.

2322

2323

:param revision_ids: The list of inventories to transmit

2324

:param delta_versus_null: Don't try to find a minimal delta for this

2325

entry, instead compute the delta versus the NULL_REVISION. This

2326

effectively streams a complete inventory. Used for stuff like

2327

filling in missing parents, etc.

2328

"""

2329

from_repo = self.from_repository

2330

revision_keys = [(rev_id,) for rev_id in revision_ids]

2331

parent_map = from_repo.inventories.get_parent_map(revision_keys)

2332

# XXX: possibly repos could implement a more efficient iter_inv_deltas

2333

# method...

2334

inventories = self.from_repository.iter_inventories(

2335

revision_ids, 'topological')

2336

format = from_repo._format

2337

invs_sent_so_far = set([_mod_revision.NULL_REVISION])

2338

inventory_cache = lru_cache.LRUCache(50)

2339

null_inventory = from_repo.revision_tree(

2340

_mod_revision.NULL_REVISION).inventory

2341

# XXX: ideally the rich-root/tree-refs flags would be per-revision, not

2342

# per-repo (e.g. streaming a non-rich-root revision out of a rich-root

2343

# repo back into a non-rich-root repo ought to be allowed)

2344

serializer = inventory_delta.InventoryDeltaSerializer(

2345

versioned_root=format.rich_root_data,

2346

tree_references=format.supports_tree_reference)

2347

for inv in inventories:

2348

key = (inv.revision_id,)

2349

parent_keys = parent_map.get(key, ())

2350

delta = None

2351

if not delta_versus_null and parent_keys:

2352

# The caller did not ask for complete inventories and we have

2353

# some parents that we can delta against. Make a delta against

2354

# each parent so that we can find the smallest.

2355

parent_ids = [parent_key[0] for parent_key in parent_keys]

2356

for parent_id in parent_ids:

2357

if parent_id not in invs_sent_so_far:

2358

# We don't know that the remote side has this basis, so

2359

# we can't use it.

2360

continue

2361

if parent_id == _mod_revision.NULL_REVISION:

2362

parent_inv = null_inventory

2363

else:

2364

parent_inv = inventory_cache.get(parent_id, None)

2365

if parent_inv is None:

2366

parent_inv = from_repo.get_inventory(parent_id)

2367

candidate_delta = inv._make_delta(parent_inv)

2368

if (delta is None or

2369

len(delta) > len(candidate_delta)):

2370

delta = candidate_delta

2371

basis_id = parent_id

2372

if delta is None:

2373

# Either none of the parents ended up being suitable, or we

2374

# were asked to delta against NULL

2375

basis_id = _mod_revision.NULL_REVISION

2376

delta = inv._make_delta(null_inventory)

2377

invs_sent_so_far.add(inv.revision_id)

2378

inventory_cache[inv.revision_id] = inv

2379

delta_serialized = ''.join(

2380

serializer.delta_to_lines(basis_id, key[-1], delta))

2381

yield versionedfile.FulltextContentFactory(

2382

key, parent_keys, None, delta_serialized)

2383

2384

2385

class _VersionedFileChecker(object):

2386

2387

def __init__(self, repository, text_key_references=None, ancestors=None):

2388

self.repository = repository

2389

self.text_index = self.repository._generate_text_key_index(

2390

text_key_references=text_key_references, ancestors=ancestors)

2391

2392

def calculate_file_version_parents(self, text_key):

2393

"""Calculate the correct parents for a file version according to

2394

the inventories.

2395

"""

2396

parent_keys = self.text_index[text_key]

2397

if parent_keys == [_mod_revision.NULL_REVISION]:

2398

return ()

2399

return tuple(parent_keys)

2400

2401

def check_file_version_parents(self, texts, progress_bar=None):

2402

"""Check the parents stored in a versioned file are correct.

2403

2404

It also detects file versions that are not referenced by their

2405

corresponding revision's inventory.

2406

2407

:returns: A tuple of (wrong_parents, dangling_file_versions).

2408

wrong_parents is a dict mapping {revision_id: (stored_parents,

2409

correct_parents)} for each revision_id where the stored parents

2410

are not correct. dangling_file_versions is a set of (file_id,

2411

revision_id) tuples for versions that are present in this versioned

2412

file, but not used by the corresponding inventory.

2413

"""

2414

local_progress = None

2415

if progress_bar is None:

2416

local_progress = ui.ui_factory.nested_progress_bar()

2417

progress_bar = local_progress

2418

try:

2419

return self._check_file_version_parents(texts, progress_bar)

2420

finally:

2421

if local_progress:

2422

local_progress.finished()

2423

2424

def _check_file_version_parents(self, texts, progress_bar):

2425

"""See check_file_version_parents."""

2426

wrong_parents = {}

2427

self.file_ids = set([file_id for file_id, _ in

2428

self.text_index.iterkeys()])

2429

# text keys is now grouped by file_id

2430

n_versions = len(self.text_index)

2431

progress_bar.update('loading text store', 0, n_versions)

2432

parent_map = self.repository.texts.get_parent_map(self.text_index)

2433

# On unlistable transports this could well be empty/error...

2434

text_keys = self.repository.texts.keys()

2435

unused_keys = frozenset(text_keys) - set(self.text_index)

2436

for num, key in enumerate(self.text_index.iterkeys()):

2437

progress_bar.update('checking text graph', num, n_versions)

2438

correct_parents = self.calculate_file_version_parents(key)

2439

try:

2440

knit_parents = parent_map[key]

2441

except errors.RevisionNotPresent:

2442

# Missing text!

2443

knit_parents = None

2444

if correct_parents != knit_parents:

2445

wrong_parents[key] = (knit_parents, correct_parents)

2446

return wrong_parents, unused_keys

2447

2448

2449

class InterDifferingSerializer(InterRepository):

2450

2451

@classmethod

2452

def _get_repo_format_to_test(self):

2453

return None

2454

2455

@staticmethod

2456

def is_compatible(source, target):

2457

if not source._format.supports_full_versioned_files:

2458

return False

2459

if not target._format.supports_full_versioned_files:

2460

return False

2461

# This is redundant with format.check_conversion_target(), however that

2462

# raises an exception, and we just want to say "False" as in we won't

2463

# support converting between these formats.

2464

if 'IDS_never' in debug.debug_flags:

2465

return False

2466

if source.supports_rich_root() and not target.supports_rich_root():

2467

return False

2468

if (source._format.supports_tree_reference

2469

and not target._format.supports_tree_reference):

2470

return False

2471

if target._fallback_repositories and target._format.supports_chks:

2472

# IDS doesn't know how to copy CHKs for the parent inventories it

2473

# adds to stacked repos.

2474

return False

2475

if 'IDS_always' in debug.debug_flags:

2476

return True

2477

# Only use this code path for local source and target. IDS does far

2478

# too much IO (both bandwidth and roundtrips) over a network.

2479

if not source.bzrdir.transport.base.startswith('file:///'):

2480

return False

2481

if not target.bzrdir.transport.base.startswith('file:///'):

2482

return False

2483

return True

2484

2485

def _get_trees(self, revision_ids, cache):

2486

possible_trees = []

2487

for rev_id in revision_ids:

2488

if rev_id in cache:

2489

possible_trees.append((rev_id, cache[rev_id]))

2490

else:

2491

# Not cached, but inventory might be present anyway.

2492

try:

2493

tree = self.source.revision_tree(rev_id)

2494

except errors.NoSuchRevision:

2495

# Nope, parent is ghost.

2496

pass

2497

else:

2498

cache[rev_id] = tree

2499

possible_trees.append((rev_id, tree))

2500

return possible_trees

2501

2502

def _get_delta_for_revision(self, tree, parent_ids, possible_trees):

2503

"""Get the best delta and base for this revision.

2504

2505

:return: (basis_id, delta)

2506

"""

2507

deltas = []

2508

# Generate deltas against each tree, to find the shortest.

2509

texts_possibly_new_in_tree = set()

2510

for basis_id, basis_tree in possible_trees:

2511

delta = tree.inventory._make_delta(basis_tree.inventory)

2512

for old_path, new_path, file_id, new_entry in delta:

2513

if new_path is None:

2514

# This file_id isn't present in the new rev, so we don't

2515

# care about it.

2516

continue

2517

if not new_path:

2518

# Rich roots are handled elsewhere...

2519

continue

2520

kind = new_entry.kind

2521

if kind != 'directory' and kind != 'file':

2522

# No text record associated with this inventory entry.

2523

continue

2524

# This is a directory or file that has changed somehow.

2525

texts_possibly_new_in_tree.add((file_id, new_entry.revision))

2526

deltas.append((len(delta), basis_id, delta))

2527

deltas.sort()

2528

return deltas[0][1:]

2529

2530

def _fetch_parent_invs_for_stacking(self, parent_map, cache):

2531

"""Find all parent revisions that are absent, but for which the

2532

inventory is present, and copy those inventories.

2533

2534

This is necessary to preserve correctness when the source is stacked

2535

without fallbacks configured. (Note that in cases like upgrade the

2536

source may be not have _fallback_repositories even though it is

2537

stacked.)

2538

"""

2539

parent_revs = set()

2540

for parents in parent_map.values():

2541

parent_revs.update(parents)

2542

present_parents = self.source.get_parent_map(parent_revs)

2543

absent_parents = set(parent_revs).difference(present_parents)

2544

parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(

2545

(rev_id,) for rev_id in absent_parents)

2546

parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]

2547

for parent_tree in self.source.revision_trees(parent_inv_ids):

2548

current_revision_id = parent_tree.get_revision_id()

2549

parents_parents_keys = parent_invs_keys_for_stacking[

2550

(current_revision_id,)]

2551

parents_parents = [key[-1] for key in parents_parents_keys]

2552

basis_id = _mod_revision.NULL_REVISION

2553

basis_tree = self.source.revision_tree(basis_id)

2554

delta = parent_tree.inventory._make_delta(basis_tree.inventory)

2555

self.target.add_inventory_by_delta(

2556

basis_id, delta, current_revision_id, parents_parents)

2557

cache[current_revision_id] = parent_tree

2558

2559

def _fetch_batch(self, revision_ids, basis_id, cache):

2560

"""Fetch across a few revisions.

2561

2562

:param revision_ids: The revisions to copy

2563

:param basis_id: The revision_id of a tree that must be in cache, used

2564

as a basis for delta when no other base is available

2565

:param cache: A cache of RevisionTrees that we can use.

2566

:return: The revision_id of the last converted tree. The RevisionTree

2567

for it will be in cache

2568

"""

2569

# Walk though all revisions; get inventory deltas, copy referenced

2570

# texts that delta references, insert the delta, revision and

2571

# signature.

2572

root_keys_to_create = set()

2573

text_keys = set()

2574

pending_deltas = []

2575

pending_revisions = []

2576

parent_map = self.source.get_parent_map(revision_ids)

2577

self._fetch_parent_invs_for_stacking(parent_map, cache)

2578

self.source._safe_to_return_from_cache = True

2579

for tree in self.source.revision_trees(revision_ids):

2580

# Find a inventory delta for this revision.

2581

# Find text entries that need to be copied, too.

2582

current_revision_id = tree.get_revision_id()

2583

parent_ids = parent_map.get(current_revision_id, ())

2584

parent_trees = self._get_trees(parent_ids, cache)

2585

possible_trees = list(parent_trees)

2586

if len(possible_trees) == 0:

2587

# There either aren't any parents, or the parents are ghosts,

2588

# so just use the last converted tree.

2589

possible_trees.append((basis_id, cache[basis_id]))

2590

basis_id, delta = self._get_delta_for_revision(tree, parent_ids,

2591

possible_trees)

2592

revision = self.source.get_revision(current_revision_id)

2593

pending_deltas.append((basis_id, delta,

2594

current_revision_id, revision.parent_ids))

2595

if self._converting_to_rich_root:

2596

self._revision_id_to_root_id[current_revision_id] = \

2597

tree.get_root_id()

2598

# Determine which texts are in present in this revision but not in

2599

# any of the available parents.

2600

texts_possibly_new_in_tree = set()

2601

for old_path, new_path, file_id, entry in delta:

2602

if new_path is None:

2603

# This file_id isn't present in the new rev

2604

continue

2605

if not new_path:

2606

# This is the root

2607

if not self.target.supports_rich_root():

2608

# The target doesn't support rich root, so we don't

2609

# copy

2610

continue

2611

if self._converting_to_rich_root:

2612

# This can't be copied normally, we have to insert

2613

# it specially

2614

root_keys_to_create.add((file_id, entry.revision))

2615

continue

2616

kind = entry.kind

2617

texts_possibly_new_in_tree.add((file_id, entry.revision))

2618

for basis_id, basis_tree in possible_trees:

2619

basis_inv = basis_tree.inventory

2620

for file_key in list(texts_possibly_new_in_tree):

2621

file_id, file_revision = file_key

2622

try:

2623

entry = basis_inv[file_id]

2624

except errors.NoSuchId:

2625

continue

2626

if entry.revision == file_revision:

2627

texts_possibly_new_in_tree.remove(file_key)

2628

text_keys.update(texts_possibly_new_in_tree)

2629

pending_revisions.append(revision)

2630

cache[current_revision_id] = tree

2631

basis_id = current_revision_id

2632

self.source._safe_to_return_from_cache = False

2633

# Copy file texts

2634

from_texts = self.source.texts

2635

to_texts = self.target.texts

2636

if root_keys_to_create:

2637

root_stream = _mod_fetch._new_root_data_stream(

2638

root_keys_to_create, self._revision_id_to_root_id, parent_map,

2639

self.source)

2640

to_texts.insert_record_stream(root_stream)

2641

to_texts.insert_record_stream(from_texts.get_record_stream(

2642

text_keys, self.target._format._fetch_order,

2643

not self.target._format._fetch_uses_deltas))

2644

# insert inventory deltas

2645

for delta in pending_deltas:

2646

self.target.add_inventory_by_delta(*delta)

2647

if self.target._fallback_repositories:

2648

# Make sure this stacked repository has all the parent inventories

2649

# for the new revisions that we are about to insert. We do this

2650

# before adding the revisions so that no revision is added until

2651

# all the inventories it may depend on are added.

2652

# Note that this is overzealous, as we may have fetched these in an

2653

# earlier batch.

2654

parent_ids = set()

2655

revision_ids = set()

2656

for revision in pending_revisions:

2657

revision_ids.add(revision.revision_id)

2658

parent_ids.update(revision.parent_ids)

2659

parent_ids.difference_update(revision_ids)

2660

parent_ids.discard(_mod_revision.NULL_REVISION)

2661

parent_map = self.source.get_parent_map(parent_ids)

2662

# we iterate over parent_map and not parent_ids because we don't

2663

# want to try copying any revision which is a ghost

2664

for parent_tree in self.source.revision_trees(parent_map):

2665

current_revision_id = parent_tree.get_revision_id()

2666

parents_parents = parent_map[current_revision_id]

2667

possible_trees = self._get_trees(parents_parents, cache)

2668

if len(possible_trees) == 0:

2669

# There either aren't any parents, or the parents are

2670

# ghosts, so just use the last converted tree.

2671

possible_trees.append((basis_id, cache[basis_id]))

2672

basis_id, delta = self._get_delta_for_revision(parent_tree,

2673

parents_parents, possible_trees)

2674

self.target.add_inventory_by_delta(

2675

basis_id, delta, current_revision_id, parents_parents)

2676

# insert signatures and revisions

2677

for revision in pending_revisions:

2678

try:

2679

signature = self.source.get_signature_text(

2680

revision.revision_id)

2681

self.target.add_signature_text(revision.revision_id,

2682

signature)

2683

except errors.NoSuchRevision:

2684

pass

2685

self.target.add_revision(revision.revision_id, revision)

2686

return basis_id

2687

2688

def _fetch_all_revisions(self, revision_ids, pb):

2689

"""Fetch everything for the list of revisions.

2690

2691

:param revision_ids: The list of revisions to fetch. Must be in

2692

topological order.

2693

:param pb: A ProgressTask

2694

:return: None

2695

"""

2696

basis_id, basis_tree = self._get_basis(revision_ids[0])

2697

batch_size = 100

2698

cache = lru_cache.LRUCache(100)

2699

cache[basis_id] = basis_tree

2700

del basis_tree # We don't want to hang on to it here

2701

hints = []

2702

a_graph = None

2703

2704

for offset in range(0, len(revision_ids), batch_size):

2705

self.target.start_write_group()

2706

try:

2707

pb.update('Transferring revisions', offset,

2708

len(revision_ids))

2709

batch = revision_ids[offset:offset+batch_size]

2710

basis_id = self._fetch_batch(batch, basis_id, cache)

2711

except:

2712

self.source._safe_to_return_from_cache = False

2713

self.target.abort_write_group()

2714

raise

2715

else:

2716

hint = self.target.commit_write_group()

2717

if hint:

2718

hints.extend(hint)

2719

if hints and self.target._format.pack_compresses:

2720

self.target.pack(hint=hints)

2721

pb.update('Transferring revisions', len(revision_ids),

2722

len(revision_ids))

2723

2724

@needs_write_lock

2725

def fetch(self, revision_id=None, find_ghosts=False,

2726

fetch_spec=None):

2727

"""See InterRepository.fetch()."""

2728

if fetch_spec is not None:

2729

revision_ids = fetch_spec.get_keys()

2730

else:

2731

revision_ids = None

2732

ui.ui_factory.warn_experimental_format_fetch(self)

2733

if (not self.source.supports_rich_root()

2734

and self.target.supports_rich_root()):

2735

self._converting_to_rich_root = True

2736

self._revision_id_to_root_id = {}

2737

else:

2738

self._converting_to_rich_root = False

2739

# See <https://launchpad.net/bugs/456077> asking for a warning here

2740

if self.source._format.network_name() != self.target._format.network_name():

2741

ui.ui_factory.show_user_warning('cross_format_fetch',

2742

from_format=self.source._format,

2743

to_format=self.target._format)

2744

if revision_ids is None:

2745

if revision_id:

2746

search_revision_ids = [revision_id]

2747

else:

2748

search_revision_ids = None

2749

revision_ids = self.target.search_missing_revision_ids(self.source,

2750

revision_ids=search_revision_ids,

2751

find_ghosts=find_ghosts).get_keys()

2752

if not revision_ids:

2753

return 0, 0

2754

revision_ids = tsort.topo_sort(

2755

self.source.get_graph().get_parent_map(revision_ids))

2756

if not revision_ids:

2757

return 0, 0

2758

# Walk though all revisions; get inventory deltas, copy referenced

2759

# texts that delta references, insert the delta, revision and

2760

# signature.

2761

pb = ui.ui_factory.nested_progress_bar()

2762

try:

2763

self._fetch_all_revisions(revision_ids, pb)

2764

finally:

2765

pb.finished()

2766

return len(revision_ids), 0

2767

2768

def _get_basis(self, first_revision_id):

2769

"""Get a revision and tree which exists in the target.

2770

2771

This assumes that first_revision_id is selected for transmission

2772

because all other ancestors are already present. If we can't find an

2773

ancestor we fall back to NULL_REVISION since we know that is safe.

2774

2775

:return: (basis_id, basis_tree)

2776

"""

2777

first_rev = self.source.get_revision(first_revision_id)

2778

try:

2779

basis_id = first_rev.parent_ids[0]

2780

# only valid as a basis if the target has it

2781

self.target.get_revision(basis_id)

2782

# Try to get a basis tree - if it's a ghost it will hit the

2783

# NoSuchRevision case.

2784

basis_tree = self.source.revision_tree(basis_id)

2785

except (IndexError, errors.NoSuchRevision):

2786

basis_id = _mod_revision.NULL_REVISION

2787

basis_tree = self.source.revision_tree(basis_id)

2788

return basis_id, basis_tree

2789

2790

2791

class InterSameDataRepository(InterRepository):

2792

"""Code for converting between repositories that represent the same data.

2793

2794

Data format and model must match for this to work.

2795

"""

2796

2797

@classmethod

2798

def _get_repo_format_to_test(self):

2799

"""Repository format for testing with.

2800

2801

InterSameData can pull from subtree to subtree and from non-subtree to

2802

non-subtree, so we test this with the richest repository format.

2803

"""

2804

from bzrlib.repofmt import knitrepo

2805

return knitrepo.RepositoryFormatKnit3()

2806

2807

@staticmethod

2808

def is_compatible(source, target):

2809

return (

2810

InterRepository._same_model(source, target) and

2811

source._format.supports_full_versioned_files and

2812

target._format.supports_full_versioned_files)

2813

2814

2815

InterRepository.register_optimiser(InterDifferingSerializer)

2816

InterRepository.register_optimiser(InterSameDataRepository)

2817

2818

2819

def install_revisions(repository, iterable, num_revisions=None, pb=None):

2820

"""Install all revision data into a repository.

2821

2822

Accepts an iterable of revision, tree, signature tuples. The signature

2823

may be None.

2824

"""

2825

repository.start_write_group()

2826

try:

2827

inventory_cache = lru_cache.LRUCache(10)

2828

for n, (revision, revision_tree, signature) in enumerate(iterable):

2829

_install_revision(repository, revision, revision_tree, signature,

2830

inventory_cache)

2831

if pb is not None:

2832

pb.update('Transferring revisions', n + 1, num_revisions)

2833

except:

2834

repository.abort_write_group()

2835

raise

2836

else:

2837

repository.commit_write_group()

2838

2839

2840

def _install_revision(repository, rev, revision_tree, signature,

2841

inventory_cache):

2842

"""Install all revision data into a repository."""

2843

present_parents = []

2844

parent_trees = {}

2845

for p_id in rev.parent_ids:

2846

if repository.has_revision(p_id):

2847

present_parents.append(p_id)

2848

parent_trees[p_id] = repository.revision_tree(p_id)

2849

else:

2850

parent_trees[p_id] = repository.revision_tree(

2851

_mod_revision.NULL_REVISION)

2852

2853

inv = revision_tree.inventory

2854

entries = inv.iter_entries()

2855

# backwards compatibility hack: skip the root id.

2856

if not repository.supports_rich_root():

2857

path, root = entries.next()

2858

if root.revision != rev.revision_id:

2859

raise errors.IncompatibleRevision(repr(repository))

2860

text_keys = {}

2861

for path, ie in entries:

2862

text_keys[(ie.file_id, ie.revision)] = ie

2863

text_parent_map = repository.texts.get_parent_map(text_keys)

2864

missing_texts = set(text_keys) - set(text_parent_map)

2865

# Add the texts that are not already present

2866

for text_key in missing_texts:

2867

ie = text_keys[text_key]

2868

text_parents = []

2869

# FIXME: TODO: The following loop overlaps/duplicates that done by

2870

# commit to determine parents. There is a latent/real bug here where

2871

# the parents inserted are not those commit would do - in particular

2872

# they are not filtered by heads(). RBC, AB

2873

for revision, tree in parent_trees.iteritems():

2874

if ie.file_id not in tree:

2875

continue

2876

parent_id = tree.get_file_revision(ie.file_id)

2877

if parent_id in text_parents:

2878

continue

2879

text_parents.append((ie.file_id, parent_id))

2880

lines = revision_tree.get_file(ie.file_id).readlines()

2881

repository.texts.add_lines(text_key, text_parents, lines)

2882

try:

2883

# install the inventory

2884

if repository._format._commit_inv_deltas and len(rev.parent_ids):

2885

# Cache this inventory

2886

inventory_cache[rev.revision_id] = inv

2887

try:

2888

basis_inv = inventory_cache[rev.parent_ids[0]]

2889

except KeyError:

2890

repository.add_inventory(rev.revision_id, inv, present_parents)

2891

else:

2892

delta = inv._make_delta(basis_inv)

2893

repository.add_inventory_by_delta(rev.parent_ids[0], delta,

2894

rev.revision_id, present_parents)

2895

else:

2896

repository.add_inventory(rev.revision_id, inv, present_parents)

2897

except errors.RevisionAlreadyPresent:

2898

pass

2899

if signature is not None:

2900

repository.add_signature_text(rev.revision_id, signature)

2901

repository.add_revision(rev.revision_id, rev, inv)

2902

2903

2904

def install_revision(repository, rev, revision_tree):

2905

"""Install all revision data into a repository."""

2906

install_revisions(repository, [(rev, revision_tree, None)])