13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
17
from bzrlib.lazy_import import lazy_import
20
18
lazy_import(globals(), """
24
23
from bzrlib import (
39
40
revision as _mod_revision,
44
46
from bzrlib.bundle import serializer
45
47
from bzrlib.revisiontree import RevisionTree
46
48
from bzrlib.store.versioned import VersionedFileStore
47
from bzrlib.store.text import TextStore
48
49
from bzrlib.testament import Testament
51
52
from bzrlib.decorators import needs_read_lock, needs_write_lock
52
53
from bzrlib.inter import InterObject
53
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
54
from bzrlib.symbol_versioning import (
57
from bzrlib.trace import mutter, mutter_callsite, note, warning
54
from bzrlib.inventory import (
60
from bzrlib import registry
61
from bzrlib.trace import (
62
log_exception_quietly, note, mutter, mutter_callsite, warning)
60
65
# Old formats display a warning, but only once
61
66
_deprecation_warning_done = False
69
class CommitBuilder(object):
70
"""Provides an interface to build up a commit.
72
This allows describing a tree to be committed without needing to
73
know the internals of the format of the repository.
76
# all clients should supply tree roots.
77
record_root_entry = True
78
# the default CommitBuilder does not manage trees whose root is versioned.
79
_versioned_root = False
81
def __init__(self, repository, parents, config, timestamp=None,
82
timezone=None, committer=None, revprops=None,
84
"""Initiate a CommitBuilder.
86
:param repository: Repository to commit to.
87
:param parents: Revision ids of the parents of the new revision.
88
:param config: Configuration to use.
89
:param timestamp: Optional timestamp recorded for commit.
90
:param timezone: Optional timezone for timestamp.
91
:param committer: Optional committer to set for commit.
92
:param revprops: Optional dictionary of revision properties.
93
:param revision_id: Optional revision id.
98
self._committer = self._config.username()
100
self._committer = committer
102
self.new_inventory = Inventory(None)
103
self._new_revision_id = revision_id
104
self.parents = parents
105
self.repository = repository
108
if revprops is not None:
109
self._validate_revprops(revprops)
110
self._revprops.update(revprops)
112
if timestamp is None:
113
timestamp = time.time()
114
# Restrict resolution to 1ms
115
self._timestamp = round(timestamp, 3)
118
self._timezone = osutils.local_time_offset()
120
self._timezone = int(timezone)
122
self._generate_revision_if_needed()
123
self.__heads = graph.HeadsCache(repository.get_graph()).heads
124
self._basis_delta = []
125
# API compatibility, older code that used CommitBuilder did not call
126
# .record_delete(), which means the delta that is computed would not be
127
# valid. Callers that will call record_delete() should call
128
# .will_record_deletes() to indicate that.
129
self._recording_deletes = False
130
# memo'd check for no-op commits.
131
self._any_changes = False
133
def any_changes(self):
134
"""Return True if any entries were changed.
136
This includes merge-only changes. It is the core for the --unchanged
139
:return: True if any changes have occured.
141
return self._any_changes
143
def _validate_unicode_text(self, text, context):
144
"""Verify things like commit messages don't have bogus characters."""
146
raise ValueError('Invalid value for %s: %r' % (context, text))
148
def _validate_revprops(self, revprops):
149
for key, value in revprops.iteritems():
150
# We know that the XML serializers do not round trip '\r'
151
# correctly, so refuse to accept them
152
if not isinstance(value, basestring):
153
raise ValueError('revision property (%s) is not a valid'
154
' (unicode) string: %r' % (key, value))
155
self._validate_unicode_text(value,
156
'revision property (%s)' % (key,))
158
def commit(self, message):
159
"""Make the actual commit.
161
:return: The revision id of the recorded revision.
163
self._validate_unicode_text(message, 'commit message')
164
rev = _mod_revision.Revision(
165
timestamp=self._timestamp,
166
timezone=self._timezone,
167
committer=self._committer,
169
inventory_sha1=self.inv_sha1,
170
revision_id=self._new_revision_id,
171
properties=self._revprops)
172
rev.parent_ids = self.parents
173
self.repository.add_revision(self._new_revision_id, rev,
174
self.new_inventory, self._config)
175
self.repository.commit_write_group()
176
return self._new_revision_id
179
"""Abort the commit that is being built.
181
self.repository.abort_write_group()
183
def revision_tree(self):
184
"""Return the tree that was just committed.
186
After calling commit() this can be called to get a RevisionTree
187
representing the newly committed tree. This is preferred to
188
calling Repository.revision_tree() because that may require
189
deserializing the inventory, while we already have a copy in
192
if self.new_inventory is None:
193
self.new_inventory = self.repository.get_inventory(
194
self._new_revision_id)
195
return RevisionTree(self.repository, self.new_inventory,
196
self._new_revision_id)
198
def finish_inventory(self):
199
"""Tell the builder that the inventory is finished.
201
:return: The inventory id in the repository, which can be used with
202
repository.get_inventory.
204
if self.new_inventory is None:
205
# an inventory delta was accumulated without creating a new
207
basis_id = self.basis_delta_revision
208
self.inv_sha1 = self.repository.add_inventory_by_delta(
209
basis_id, self._basis_delta, self._new_revision_id,
212
if self.new_inventory.root is None:
213
raise AssertionError('Root entry should be supplied to'
214
' record_entry_contents, as of bzr 0.10.')
215
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
216
self.new_inventory.revision_id = self._new_revision_id
217
self.inv_sha1 = self.repository.add_inventory(
218
self._new_revision_id,
222
return self._new_revision_id
224
def _gen_revision_id(self):
225
"""Return new revision-id."""
226
return generate_ids.gen_revision_id(self._config.username(),
229
def _generate_revision_if_needed(self):
230
"""Create a revision id if None was supplied.
232
If the repository can not support user-specified revision ids
233
they should override this function and raise CannotSetRevisionId
234
if _new_revision_id is not None.
236
:raises: CannotSetRevisionId
238
if self._new_revision_id is None:
239
self._new_revision_id = self._gen_revision_id()
240
self.random_revid = True
242
self.random_revid = False
244
def _heads(self, file_id, revision_ids):
245
"""Calculate the graph heads for revision_ids in the graph of file_id.
247
This can use either a per-file graph or a global revision graph as we
248
have an identity relationship between the two graphs.
250
return self.__heads(revision_ids)
252
def _check_root(self, ie, parent_invs, tree):
253
"""Helper for record_entry_contents.
255
:param ie: An entry being added.
256
:param parent_invs: The inventories of the parent revisions of the
258
:param tree: The tree that is being committed.
260
# In this revision format, root entries have no knit or weave When
261
# serializing out to disk and back in root.revision is always
263
ie.revision = self._new_revision_id
265
def _require_root_change(self, tree):
266
"""Enforce an appropriate root object change.
268
This is called once when record_iter_changes is called, if and only if
269
the root was not in the delta calculated by record_iter_changes.
271
:param tree: The tree which is being committed.
273
# NB: if there are no parents then this method is not called, so no
274
# need to guard on parents having length.
275
entry = entry_factory['directory'](tree.path2id(''), '',
277
entry.revision = self._new_revision_id
278
self._basis_delta.append(('', '', entry.file_id, entry))
280
def _get_delta(self, ie, basis_inv, path):
281
"""Get a delta against the basis inventory for ie."""
282
if ie.file_id not in basis_inv:
284
result = (None, path, ie.file_id, ie)
285
self._basis_delta.append(result)
287
elif ie != basis_inv[ie.file_id]:
289
# TODO: avoid tis id2path call.
290
result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
291
self._basis_delta.append(result)
297
def get_basis_delta(self):
298
"""Return the complete inventory delta versus the basis inventory.
300
This has been built up with the calls to record_delete and
301
record_entry_contents. The client must have already called
302
will_record_deletes() to indicate that they will be generating a
305
:return: An inventory delta, suitable for use with apply_delta, or
306
Repository.add_inventory_by_delta, etc.
308
if not self._recording_deletes:
309
raise AssertionError("recording deletes not activated.")
310
return self._basis_delta
312
def record_delete(self, path, file_id):
313
"""Record that a delete occured against a basis tree.
315
This is an optional API - when used it adds items to the basis_delta
316
being accumulated by the commit builder. It cannot be called unless the
317
method will_record_deletes() has been called to inform the builder that
318
a delta is being supplied.
320
:param path: The path of the thing deleted.
321
:param file_id: The file id that was deleted.
323
if not self._recording_deletes:
324
raise AssertionError("recording deletes not activated.")
325
delta = (path, None, file_id, None)
326
self._basis_delta.append(delta)
327
self._any_changes = True
330
def will_record_deletes(self):
331
"""Tell the commit builder that deletes are being notified.
333
This enables the accumulation of an inventory delta; for the resulting
334
commit to be valid, deletes against the basis MUST be recorded via
335
builder.record_delete().
337
self._recording_deletes = True
339
basis_id = self.parents[0]
341
basis_id = _mod_revision.NULL_REVISION
342
self.basis_delta_revision = basis_id
344
def record_entry_contents(self, ie, parent_invs, path, tree,
346
"""Record the content of ie from tree into the commit if needed.
348
Side effect: sets ie.revision when unchanged
350
:param ie: An inventory entry present in the commit.
351
:param parent_invs: The inventories of the parent revisions of the
353
:param path: The path the entry is at in the tree.
354
:param tree: The tree which contains this entry and should be used to
356
:param content_summary: Summary data from the tree about the paths
357
content - stat, length, exec, sha/link target. This is only
358
accessed when the entry has a revision of None - that is when it is
359
a candidate to commit.
360
:return: A tuple (change_delta, version_recorded, fs_hash).
361
change_delta is an inventory_delta change for this entry against
362
the basis tree of the commit, or None if no change occured against
364
version_recorded is True if a new version of the entry has been
365
recorded. For instance, committing a merge where a file was only
366
changed on the other side will return (delta, False).
367
fs_hash is either None, or the hash details for the path (currently
368
a tuple of the contents sha1 and the statvalue returned by
369
tree.get_file_with_stat()).
371
if self.new_inventory.root is None:
372
if ie.parent_id is not None:
373
raise errors.RootMissing()
374
self._check_root(ie, parent_invs, tree)
375
if ie.revision is None:
376
kind = content_summary[0]
378
# ie is carried over from a prior commit
380
# XXX: repository specific check for nested tree support goes here - if
381
# the repo doesn't want nested trees we skip it ?
382
if (kind == 'tree-reference' and
383
not self.repository._format.supports_tree_reference):
384
# mismatch between commit builder logic and repository:
385
# this needs the entry creation pushed down into the builder.
386
raise NotImplementedError('Missing repository subtree support.')
387
self.new_inventory.add(ie)
389
# TODO: slow, take it out of the inner loop.
391
basis_inv = parent_invs[0]
393
basis_inv = Inventory(root_id=None)
395
# ie.revision is always None if the InventoryEntry is considered
396
# for committing. We may record the previous parents revision if the
397
# content is actually unchanged against a sole head.
398
if ie.revision is not None:
399
if not self._versioned_root and path == '':
400
# repositories that do not version the root set the root's
401
# revision to the new commit even when no change occurs (more
402
# specifically, they do not record a revision on the root; and
403
# the rev id is assigned to the root during deserialisation -
404
# this masks when a change may have occurred against the basis.
405
# To match this we always issue a delta, because the revision
406
# of the root will always be changing.
407
if ie.file_id in basis_inv:
408
delta = (basis_inv.id2path(ie.file_id), path,
412
delta = (None, path, ie.file_id, ie)
413
self._basis_delta.append(delta)
414
return delta, False, None
416
# we don't need to commit this, because the caller already
417
# determined that an existing revision of this file is
418
# appropriate. If its not being considered for committing then
419
# it and all its parents to the root must be unaltered so
420
# no-change against the basis.
421
if ie.revision == self._new_revision_id:
422
raise AssertionError("Impossible situation, a skipped "
423
"inventory entry (%r) claims to be modified in this "
424
"commit (%r).", (ie, self._new_revision_id))
425
return None, False, None
426
# XXX: Friction: parent_candidates should return a list not a dict
427
# so that we don't have to walk the inventories again.
428
parent_candiate_entries = ie.parent_candidates(parent_invs)
429
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
431
for inv in parent_invs:
432
if ie.file_id in inv:
433
old_rev = inv[ie.file_id].revision
434
if old_rev in head_set:
435
heads.append(inv[ie.file_id].revision)
436
head_set.remove(inv[ie.file_id].revision)
439
# now we check to see if we need to write a new record to the
441
# We write a new entry unless there is one head to the ancestors, and
442
# the kind-derived content is unchanged.
444
# Cheapest check first: no ancestors, or more the one head in the
445
# ancestors, we write a new node.
449
# There is a single head, look it up for comparison
450
parent_entry = parent_candiate_entries[heads[0]]
451
# if the non-content specific data has changed, we'll be writing a
453
if (parent_entry.parent_id != ie.parent_id or
454
parent_entry.name != ie.name):
456
# now we need to do content specific checks:
458
# if the kind changed the content obviously has
459
if kind != parent_entry.kind:
461
# Stat cache fingerprint feedback for the caller - None as we usually
462
# don't generate one.
465
if content_summary[2] is None:
466
raise ValueError("Files must not have executable = None")
468
if (# if the file length changed we have to store:
469
parent_entry.text_size != content_summary[1] or
470
# if the exec bit has changed we have to store:
471
parent_entry.executable != content_summary[2]):
473
elif parent_entry.text_sha1 == content_summary[3]:
474
# all meta and content is unchanged (using a hash cache
475
# hit to check the sha)
476
ie.revision = parent_entry.revision
477
ie.text_size = parent_entry.text_size
478
ie.text_sha1 = parent_entry.text_sha1
479
ie.executable = parent_entry.executable
480
return self._get_delta(ie, basis_inv, path), False, None
482
# Either there is only a hash change(no hash cache entry,
483
# or same size content change), or there is no change on
485
# Provide the parent's hash to the store layer, so that the
486
# content is unchanged we will not store a new node.
487
nostore_sha = parent_entry.text_sha1
489
# We want to record a new node regardless of the presence or
490
# absence of a content change in the file.
492
ie.executable = content_summary[2]
493
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
495
text = file_obj.read()
499
ie.text_sha1, ie.text_size = self._add_text_to_weave(
500
ie.file_id, text, heads, nostore_sha)
501
# Let the caller know we generated a stat fingerprint.
502
fingerprint = (ie.text_sha1, stat_value)
503
except errors.ExistingContent:
504
# Turns out that the file content was unchanged, and we were
505
# only going to store a new node if it was changed. Carry over
507
ie.revision = parent_entry.revision
508
ie.text_size = parent_entry.text_size
509
ie.text_sha1 = parent_entry.text_sha1
510
ie.executable = parent_entry.executable
511
return self._get_delta(ie, basis_inv, path), False, None
512
elif kind == 'directory':
514
# all data is meta here, nothing specific to directory, so
516
ie.revision = parent_entry.revision
517
return self._get_delta(ie, basis_inv, path), False, None
518
self._add_text_to_weave(ie.file_id, '', heads, None)
519
elif kind == 'symlink':
520
current_link_target = content_summary[3]
522
# symlink target is not generic metadata, check if it has
524
if current_link_target != parent_entry.symlink_target:
527
# unchanged, carry over.
528
ie.revision = parent_entry.revision
529
ie.symlink_target = parent_entry.symlink_target
530
return self._get_delta(ie, basis_inv, path), False, None
531
ie.symlink_target = current_link_target
532
self._add_text_to_weave(ie.file_id, '', heads, None)
533
elif kind == 'tree-reference':
535
if content_summary[3] != parent_entry.reference_revision:
538
# unchanged, carry over.
539
ie.reference_revision = parent_entry.reference_revision
540
ie.revision = parent_entry.revision
541
return self._get_delta(ie, basis_inv, path), False, None
542
ie.reference_revision = content_summary[3]
543
self._add_text_to_weave(ie.file_id, '', heads, None)
545
raise NotImplementedError('unknown kind')
546
ie.revision = self._new_revision_id
547
self._any_changes = True
548
return self._get_delta(ie, basis_inv, path), True, fingerprint
550
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
551
_entry_factory=entry_factory):
552
"""Record a new tree via iter_changes.
554
:param tree: The tree to obtain text contents from for changed objects.
555
:param basis_revision_id: The revision id of the tree the iter_changes
556
has been generated against. Currently assumed to be the same
557
as self.parents[0] - if it is not, errors may occur.
558
:param iter_changes: An iter_changes iterator with the changes to apply
559
to basis_revision_id. The iterator must not include any items with
560
a current kind of None - missing items must be either filtered out
561
or errored-on beefore record_iter_changes sees the item.
562
:param _entry_factory: Private method to bind entry_factory locally for
564
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
567
# Create an inventory delta based on deltas between all the parents and
568
# deltas between all the parent inventories. We use inventory delta's
569
# between the inventory objects because iter_changes masks
570
# last-changed-field only changes.
572
# file_id -> change map, change is fileid, paths, changed, versioneds,
573
# parents, names, kinds, executables
575
# {file_id -> revision_id -> inventory entry, for entries in parent
576
# trees that are not parents[0]
580
revtrees = list(self.repository.revision_trees(self.parents))
581
except errors.NoSuchRevision:
582
# one or more ghosts, slow path.
584
for revision_id in self.parents:
586
revtrees.append(self.repository.revision_tree(revision_id))
587
except errors.NoSuchRevision:
589
basis_revision_id = _mod_revision.NULL_REVISION
591
revtrees.append(self.repository.revision_tree(
592
_mod_revision.NULL_REVISION))
593
# The basis inventory from a repository
595
basis_inv = revtrees[0].inventory
597
basis_inv = self.repository.revision_tree(
598
_mod_revision.NULL_REVISION).inventory
599
if len(self.parents) > 0:
600
if basis_revision_id != self.parents[0] and not ghost_basis:
602
"arbitrary basis parents not yet supported with merges")
603
for revtree in revtrees[1:]:
604
for change in revtree.inventory._make_delta(basis_inv):
605
if change[1] is None:
606
# Not present in this parent.
608
if change[2] not in merged_ids:
609
if change[0] is not None:
610
basis_entry = basis_inv[change[2]]
611
merged_ids[change[2]] = [
613
basis_entry.revision,
616
parent_entries[change[2]] = {
618
basis_entry.revision:basis_entry,
620
change[3].revision:change[3],
623
merged_ids[change[2]] = [change[3].revision]
624
parent_entries[change[2]] = {change[3].revision:change[3]}
626
merged_ids[change[2]].append(change[3].revision)
627
parent_entries[change[2]][change[3].revision] = change[3]
630
# Setup the changes from the tree:
631
# changes maps file_id -> (change, [parent revision_ids])
633
for change in iter_changes:
634
# This probably looks up in basis_inv way to much.
635
if change[1][0] is not None:
636
head_candidate = [basis_inv[change[0]].revision]
639
changes[change[0]] = change, merged_ids.get(change[0],
641
unchanged_merged = set(merged_ids) - set(changes)
642
# Extend the changes dict with synthetic changes to record merges of
644
for file_id in unchanged_merged:
645
# Record a merged version of these items that did not change vs the
646
# basis. This can be either identical parallel changes, or a revert
647
# of a specific file after a merge. The recorded content will be
648
# that of the current tree (which is the same as the basis), but
649
# the per-file graph will reflect a merge.
650
# NB:XXX: We are reconstructing path information we had, this
651
# should be preserved instead.
652
# inv delta change: (file_id, (path_in_source, path_in_target),
653
# changed_content, versioned, parent, name, kind,
656
basis_entry = basis_inv[file_id]
657
except errors.NoSuchId:
658
# a change from basis->some_parents but file_id isn't in basis
659
# so was new in the merge, which means it must have changed
660
# from basis -> current, and as it hasn't the add was reverted
661
# by the user. So we discard this change.
665
(basis_inv.id2path(file_id), tree.id2path(file_id)),
667
(basis_entry.parent_id, basis_entry.parent_id),
668
(basis_entry.name, basis_entry.name),
669
(basis_entry.kind, basis_entry.kind),
670
(basis_entry.executable, basis_entry.executable))
671
changes[file_id] = (change, merged_ids[file_id])
672
# changes contains tuples with the change and a set of inventory
673
# candidates for the file.
675
# old_path, new_path, file_id, new_inventory_entry
676
seen_root = False # Is the root in the basis delta?
677
inv_delta = self._basis_delta
678
modified_rev = self._new_revision_id
679
for change, head_candidates in changes.values():
680
if change[3][1]: # versioned in target.
681
# Several things may be happening here:
682
# We may have a fork in the per-file graph
683
# - record a change with the content from tree
684
# We may have a change against < all trees
685
# - carry over the tree that hasn't changed
686
# We may have a change against all trees
687
# - record the change with the content from tree
690
entry = _entry_factory[kind](file_id, change[5][1],
692
head_set = self._heads(change[0], set(head_candidates))
695
for head_candidate in head_candidates:
696
if head_candidate in head_set:
697
heads.append(head_candidate)
698
head_set.remove(head_candidate)
701
# Could be a carry-over situation:
702
parent_entry_revs = parent_entries.get(file_id, None)
703
if parent_entry_revs:
704
parent_entry = parent_entry_revs.get(heads[0], None)
707
if parent_entry is None:
708
# The parent iter_changes was called against is the one
709
# that is the per-file head, so any change is relevant
710
# iter_changes is valid.
711
carry_over_possible = False
713
# could be a carry over situation
714
# A change against the basis may just indicate a merge,
715
# we need to check the content against the source of the
716
# merge to determine if it was changed after the merge
718
if (parent_entry.kind != entry.kind or
719
parent_entry.parent_id != entry.parent_id or
720
parent_entry.name != entry.name):
721
# Metadata common to all entries has changed
722
# against per-file parent
723
carry_over_possible = False
725
carry_over_possible = True
726
# per-type checks for changes against the parent_entry
729
# Cannot be a carry-over situation
730
carry_over_possible = False
731
# Populate the entry in the delta
733
# XXX: There is still a small race here: If someone reverts the content of a file
734
# after iter_changes examines and decides it has changed,
735
# we will unconditionally record a new version even if some
736
# other process reverts it while commit is running (with
737
# the revert happening after iter_changes did it's
740
entry.executable = True
742
entry.executable = False
743
if (carry_over_possible and
744
parent_entry.executable == entry.executable):
745
# Check the file length, content hash after reading
747
nostore_sha = parent_entry.text_sha1
750
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
752
text = file_obj.read()
756
entry.text_sha1, entry.text_size = self._add_text_to_weave(
757
file_id, text, heads, nostore_sha)
758
yield file_id, change[1][1], (entry.text_sha1, stat_value)
759
except errors.ExistingContent:
760
# No content change against a carry_over parent
761
# Perhaps this should also yield a fs hash update?
763
entry.text_size = parent_entry.text_size
764
entry.text_sha1 = parent_entry.text_sha1
765
elif kind == 'symlink':
767
entry.symlink_target = tree.get_symlink_target(file_id)
768
if (carry_over_possible and
769
parent_entry.symlink_target == entry.symlink_target):
772
self._add_text_to_weave(change[0], '', heads, None)
773
elif kind == 'directory':
774
if carry_over_possible:
777
# Nothing to set on the entry.
778
# XXX: split into the Root and nonRoot versions.
779
if change[1][1] != '' or self.repository.supports_rich_root():
780
self._add_text_to_weave(change[0], '', heads, None)
781
elif kind == 'tree-reference':
782
if not self.repository._format.supports_tree_reference:
783
# This isn't quite sane as an error, but we shouldn't
784
# ever see this code path in practice: tree's don't
785
# permit references when the repo doesn't support tree
787
raise errors.UnsupportedOperation(tree.add_reference,
789
reference_revision = tree.get_reference_revision(change[0])
790
entry.reference_revision = reference_revision
791
if (carry_over_possible and
792
parent_entry.reference_revision == reference_revision):
795
self._add_text_to_weave(change[0], '', heads, None)
797
raise AssertionError('unknown kind %r' % kind)
799
entry.revision = modified_rev
801
entry.revision = parent_entry.revision
804
new_path = change[1][1]
805
inv_delta.append((change[1][0], new_path, change[0], entry))
808
self.new_inventory = None
810
self._any_changes = True
812
# housekeeping root entry changes do not affect no-change commits.
813
self._require_root_change(tree)
814
self.basis_delta_revision = basis_revision_id
816
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
817
parent_keys = tuple([(file_id, parent) for parent in parents])
818
return self.repository.texts._add_text(
819
(file_id, self._new_revision_id), parent_keys, new_text,
820
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
823
class RootCommitBuilder(CommitBuilder):
824
"""This commitbuilder actually records the root id"""
826
# the root entry gets versioned properly by this builder.
827
_versioned_root = True
829
def _check_root(self, ie, parent_invs, tree):
830
"""Helper for record_entry_contents.
832
:param ie: An entry being added.
833
:param parent_invs: The inventories of the parent revisions of the
835
:param tree: The tree that is being committed.
838
def _require_root_change(self, tree):
839
"""Enforce an appropriate root object change.
841
This is called once when record_iter_changes is called, if and only if
842
the root was not in the delta calculated by record_iter_changes.
844
:param tree: The tree which is being committed.
846
# versioned roots do not change unless the tree found a change.
64
849
######################################################################
67
853
class Repository(object):
68
854
"""Repository holding history for one or more branches.
406
1528
def copy_content_into(self, destination, revision_id=None):
407
1529
"""Make a complete copy of the content in self into destination.
409
This is a destructive operation! Do not use it on existing
1531
This is a destructive operation! Do not use it on existing
412
revision_id = osutils.safe_revision_id(revision_id)
413
1534
return InterRepository.get(self, destination).copy_content(revision_id)
415
1536
def commit_write_group(self):
416
1537
"""Commit the contents accrued within the current write group.
418
1539
:seealso: start_write_group.
1541
:return: it may return an opaque hint that can be passed to 'pack'.
420
1543
if self._write_group is not self.get_transaction():
421
1544
# has an unlock or relock occured ?
422
raise errors.BzrError('mismatched lock context and write group.')
423
self._commit_write_group()
1545
raise errors.BzrError('mismatched lock context %r and '
1547
(self.get_transaction(), self._write_group))
1548
result = self._commit_write_group()
424
1549
self._write_group = None
426
1552
def _commit_write_group(self):
427
1553
"""Template method for per-repository write group cleanup.
429
This is called before the write group is considered to be
1555
This is called before the write group is considered to be
430
1556
finished and should ensure that all data handed to the repository
431
for writing during the write group is safely committed (to the
1557
for writing during the write group is safely committed (to the
432
1558
extent possible considering file system caching etc).
435
def fetch(self, source, revision_id=None, pb=None):
1561
def suspend_write_group(self):
1562
raise errors.UnsuspendableWriteGroup(self)
1564
def get_missing_parent_inventories(self, check_for_missing_texts=True):
1565
"""Return the keys of missing inventory parents for revisions added in
1568
A revision is not complete if the inventory delta for that revision
1569
cannot be calculated. Therefore if the parent inventories of a
1570
revision are not present, the revision is incomplete, and e.g. cannot
1571
be streamed by a smart server. This method finds missing inventory
1572
parents for revisions added in this write group.
1574
if not self._format.supports_external_lookups:
1575
# This is only an issue for stacked repositories
1577
if not self.is_in_write_group():
1578
raise AssertionError('not in a write group')
1580
# XXX: We assume that every added revision already has its
1581
# corresponding inventory, so we only check for parent inventories that
1582
# might be missing, rather than all inventories.
1583
parents = set(self.revisions._index.get_missing_parents())
1584
parents.discard(_mod_revision.NULL_REVISION)
1585
unstacked_inventories = self.inventories._index
1586
present_inventories = unstacked_inventories.get_parent_map(
1587
key[-1:] for key in parents)
1588
parents.difference_update(present_inventories)
1589
if len(parents) == 0:
1590
# No missing parent inventories.
1592
if not check_for_missing_texts:
1593
return set(('inventories', rev_id) for (rev_id,) in parents)
1594
# Ok, now we have a list of missing inventories. But these only matter
1595
# if the inventories that reference them are missing some texts they
1596
# appear to introduce.
1597
# XXX: Texts referenced by all added inventories need to be present,
1598
# but at the moment we're only checking for texts referenced by
1599
# inventories at the graph's edge.
1600
key_deps = self.revisions._index._key_dependencies
1601
key_deps.add_keys(present_inventories)
1602
referrers = frozenset(r[0] for r in key_deps.get_referrers())
1603
file_ids = self.fileids_altered_by_revision_ids(referrers)
1604
missing_texts = set()
1605
for file_id, version_ids in file_ids.iteritems():
1606
missing_texts.update(
1607
(file_id, version_id) for version_id in version_ids)
1608
present_texts = self.texts.get_parent_map(missing_texts)
1609
missing_texts.difference_update(present_texts)
1610
if not missing_texts:
1611
# No texts are missing, so all revisions and their deltas are
1614
# Alternatively the text versions could be returned as the missing
1615
# keys, but this is likely to be less data.
1616
missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
1619
def refresh_data(self):
1620
"""Re-read any data needed to to synchronise with disk.
1622
This method is intended to be called after another repository instance
1623
(such as one used by a smart server) has inserted data into the
1624
repository. It may not be called during a write group, but may be
1625
called at any other time.
1627
if self.is_in_write_group():
1628
raise errors.InternalBzrError(
1629
"May not refresh_data while in a write group.")
1630
self._refresh_data()
1632
def resume_write_group(self, tokens):
1633
if not self.is_write_locked():
1634
raise errors.NotWriteLocked(self)
1635
if self._write_group:
1636
raise errors.BzrError('already in a write group')
1637
self._resume_write_group(tokens)
1638
# so we can detect unlock/relock - the write group is now entered.
1639
self._write_group = self.get_transaction()
1641
def _resume_write_group(self, tokens):
1642
raise errors.UnsuspendableWriteGroup(self)
1644
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False,
436
1646
"""Fetch the content required to construct revision_id from source.
438
If revision_id is None all content is copied.
1648
If revision_id is None and fetch_spec is None, then all content is
1651
fetch() may not be used when the repository is in a write group -
1652
either finish the current write group before using fetch, or use
1653
fetch before starting the write group.
1655
:param find_ghosts: Find and copy revisions in the source that are
1656
ghosts in the target (and not reachable directly by walking out to
1657
the first-present revision in target from revision_id).
1658
:param revision_id: If specified, all the content needed for this
1659
revision ID will be copied to the target. Fetch will determine for
1660
itself which content needs to be copied.
1661
:param fetch_spec: If specified, a SearchResult or
1662
PendingAncestryResult that describes which revisions to copy. This
1663
allows copying multiple heads at once. Mutually exclusive with
440
revision_id = osutils.safe_revision_id(revision_id)
1666
if fetch_spec is not None and revision_id is not None:
1667
raise AssertionError(
1668
"fetch_spec and revision_id are mutually exclusive.")
1669
if self.is_in_write_group():
1670
raise errors.InternalBzrError(
1671
"May not fetch while in a write group.")
1672
# fast path same-url fetch operations
1673
# TODO: lift out to somewhere common with RemoteRepository
1674
# <https://bugs.edge.launchpad.net/bzr/+bug/401646>
1675
if (self.has_same_location(source)
1676
and fetch_spec is None
1677
and self._has_same_fallbacks(source)):
1678
# check that last_revision is in 'from' and then return a
1680
if (revision_id is not None and
1681
not _mod_revision.is_null(revision_id)):
1682
self.get_revision(revision_id)
1684
# if there is no specific appropriate InterRepository, this will get
1685
# the InterRepository base class, which raises an
1686
# IncompatibleRepositories when asked to fetch.
441
1687
inter = InterRepository.get(source, self)
443
return inter.fetch(revision_id=revision_id, pb=pb)
444
except NotImplementedError:
445
raise errors.IncompatibleRepositories(source, self)
1688
return inter.fetch(revision_id=revision_id, pb=pb,
1689
find_ghosts=find_ghosts, fetch_spec=fetch_spec)
447
1691
def create_bundle(self, target, base, fileobj, format=None):
448
1692
return serializer.write_bundle(self, target, base, fileobj, format)
450
def get_commit_builder(self, branch, parents, config, timestamp=None,
451
timezone=None, committer=None, revprops=None,
1694
def get_commit_builder(self, branch, parents, config, timestamp=None,
1695
timezone=None, committer=None, revprops=None,
452
1696
revision_id=None):
453
1697
"""Obtain a CommitBuilder for this repository.
455
1699
:param branch: Branch to commit to.
456
1700
:param parents: Revision ids of the parents of the new revision.
457
1701
:param config: Configuration to use.
542
1795
dest_repo = a_bzrdir.open_repository()
543
1796
return dest_repo
1798
def _get_sink(self):
1799
"""Return a sink for streaming into this repository."""
1800
return StreamSink(self)
1802
def _get_source(self, to_format):
1803
"""Return a source for streaming from this repository."""
1804
return StreamSource(self, to_format)
545
1806
@needs_read_lock
546
1807
def has_revision(self, revision_id):
547
1808
"""True if this repository has a copy of the revision."""
548
if 'evil' in debug.debug_flags:
549
mutter_callsite(2, "has_revision is a LBYL symptom.")
550
revision_id = osutils.safe_revision_id(revision_id)
551
return self._revision_store.has_revision_id(revision_id,
552
self.get_transaction())
1809
return revision_id in self.has_revisions((revision_id,))
1812
def has_revisions(self, revision_ids):
1813
"""Probe to find out the presence of multiple revisions.
1815
:param revision_ids: An iterable of revision_ids.
1816
:return: A set of the revision_ids that were present.
1818
parent_map = self.revisions.get_parent_map(
1819
[(rev_id,) for rev_id in revision_ids])
1821
if _mod_revision.NULL_REVISION in revision_ids:
1822
result.add(_mod_revision.NULL_REVISION)
1823
result.update([key[0] for key in parent_map])
1827
def get_revision(self, revision_id):
1828
"""Return the Revision object for a named revision."""
1829
return self.get_revisions([revision_id])[0]
554
1831
@needs_read_lock
555
1832
def get_revision_reconcile(self, revision_id):
556
1833
"""'reconcile' helper routine that allows access to a revision always.
558
1835
This variant of get_revision does not cross check the weave graph
559
1836
against the revision one as get_revision does: but it should only
560
1837
be used by reconcile, or reconcile-alike commands that are correcting
561
1838
or testing the revision graph.
563
if not revision_id or not isinstance(revision_id, basestring):
564
raise errors.InvalidRevisionId(revision_id=revision_id,
566
return self.get_revisions([revision_id])[0]
1840
return self._get_revisions([revision_id])[0]
568
1842
@needs_read_lock
569
1843
def get_revisions(self, revision_ids):
570
revision_ids = [osutils.safe_revision_id(r) for r in revision_ids]
571
revs = self._revision_store.get_revisions(revision_ids,
572
self.get_transaction())
574
assert not isinstance(rev.revision_id, unicode)
575
for parent_id in rev.parent_ids:
576
assert not isinstance(parent_id, unicode)
1844
"""Get many revisions at once.
1846
Repositories that need to check data on every revision read should
1847
subclass this method.
1849
return self._get_revisions(revision_ids)
1852
def _get_revisions(self, revision_ids):
1853
"""Core work logic to get many revisions without sanity checks."""
1855
for revid, rev in self._iter_revisions(revision_ids):
1857
raise errors.NoSuchRevision(self, revid)
1859
return [revs[revid] for revid in revision_ids]
1861
def _iter_revisions(self, revision_ids):
1862
"""Iterate over revision objects.
1864
:param revision_ids: An iterable of revisions to examine. None may be
1865
passed to request all revisions known to the repository. Note that
1866
not all repositories can find unreferenced revisions; for those
1867
repositories only referenced ones will be returned.
1868
:return: An iterator of (revid, revision) tuples. Absent revisions (
1869
those asked for but not available) are returned as (revid, None).
1871
if revision_ids is None:
1872
revision_ids = self.all_revision_ids()
1874
for rev_id in revision_ids:
1875
if not rev_id or not isinstance(rev_id, basestring):
1876
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1877
keys = [(key,) for key in revision_ids]
1878
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1879
for record in stream:
1880
revid = record.key[0]
1881
if record.storage_kind == 'absent':
1884
text = record.get_bytes_as('fulltext')
1885
rev = self._serializer.read_revision_from_string(text)
579
1888
@needs_read_lock
580
1889
def get_revision_xml(self, revision_id):
581
1890
# TODO: jam 20070210 This shouldn't be necessary since get_revision
582
1891
# would have already do it.
583
1892
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
584
revision_id = osutils.safe_revision_id(revision_id)
1893
# TODO: this can't just be replaced by:
1894
# return self._serializer.write_revision_to_string(
1895
# self.get_revision(revision_id))
1896
# as cStringIO preservers the encoding unlike write_revision_to_string
1897
# or some other call down the path.
585
1898
rev = self.get_revision(revision_id)
1899
rev_tmp = cStringIO.StringIO()
587
1900
# the current serializer..
588
self._revision_store._serializer.write_revision(rev, rev_tmp)
1901
self._serializer.write_revision(rev, rev_tmp)
590
1903
return rev_tmp.getvalue()
593
def get_revision(self, revision_id):
594
"""Return the Revision object for a named revision"""
595
# TODO: jam 20070210 get_revision_reconcile should do this for us
596
revision_id = osutils.safe_revision_id(revision_id)
597
r = self.get_revision_reconcile(revision_id)
598
# weave corruption can lead to absent revision markers that should be
600
# the following test is reasonably cheap (it needs a single weave read)
601
# and the weave is cached in read transactions. In write transactions
602
# it is not cached but typically we only read a small number of
603
# revisions. For knits when they are introduced we will probably want
604
# to ensure that caching write transactions are in use.
605
inv = self.get_inventory_weave()
606
self._check_revision_parents(r, inv)
610
def get_deltas_for_revisions(self, revisions):
1905
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
611
1906
"""Produce a generator of revision deltas.
613
1908
Note that the input is a sequence of REVISIONS, not revision_ids.
614
1909
Trees will be held in memory until the generator exits.
615
1910
Each delta is relative to the revision's lefthand predecessor.
1912
:param specific_fileids: if not None, the result is filtered
1913
so that only those file-ids, their parents and their
1914
children are included.
1916
# Get the revision-ids of interest
617
1917
required_trees = set()
618
1918
for revision in revisions:
619
1919
required_trees.add(revision.revision_id)
620
1920
required_trees.update(revision.parent_ids[:1])
621
trees = dict((t.get_revision_id(), t) for
622
t in self.revision_trees(required_trees))
1922
# Get the matching filtered trees. Note that it's more
1923
# efficient to pass filtered trees to changes_from() rather
1924
# than doing the filtering afterwards. changes_from() could
1925
# arguably do the filtering itself but it's path-based, not
1926
# file-id based, so filtering before or afterwards is
1928
if specific_fileids is None:
1929
trees = dict((t.get_revision_id(), t) for
1930
t in self.revision_trees(required_trees))
1932
trees = dict((t.get_revision_id(), t) for
1933
t in self._filtered_revision_trees(required_trees,
1936
# Calculate the deltas
623
1937
for revision in revisions:
624
1938
if not revision.parent_ids:
625
old_tree = self.revision_tree(None)
1939
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
627
1941
old_tree = trees[revision.parent_ids[0]]
628
1942
yield trees[revision.revision_id].changes_from(old_tree)
630
1944
@needs_read_lock
631
def get_revision_delta(self, revision_id):
1945
def get_revision_delta(self, revision_id, specific_fileids=None):
632
1946
"""Return the delta for one revision.
634
1948
The delta is relative to the left-hand predecessor of the
1951
:param specific_fileids: if not None, the result is filtered
1952
so that only those file-ids, their parents and their
1953
children are included.
637
1955
r = self.get_revision(revision_id)
638
return list(self.get_deltas_for_revisions([r]))[0]
640
def _check_revision_parents(self, revision, inventory):
641
"""Private to Repository and Fetch.
643
This checks the parentage of revision in an inventory weave for
644
consistency and is only applicable to inventory-weave-for-ancestry
645
using repository formats & fetchers.
647
weave_parents = inventory.get_parents(revision.revision_id)
648
weave_names = inventory.versions()
649
for parent_id in revision.parent_ids:
650
if parent_id in weave_names:
651
# this parent must not be a ghost.
652
if not parent_id in weave_parents:
654
raise errors.CorruptRepository(self)
1956
return list(self.get_deltas_for_revisions([r],
1957
specific_fileids=specific_fileids))[0]
656
1959
@needs_write_lock
657
1960
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
658
revision_id = osutils.safe_revision_id(revision_id)
659
1961
signature = gpg_strategy.sign(plaintext)
660
self._revision_store.add_revision_signature_text(revision_id,
662
self.get_transaction())
664
def fileids_altered_by_revision_ids(self, revision_ids):
665
"""Find the file ids and versions affected by revisions.
667
:param revisions: an iterable containing revision ids.
668
:return: a dictionary mapping altered file-ids to an iterable of
669
revision_ids. Each altered file-ids has the exact revision_ids that
670
altered it listed explicitly.
672
assert self._serializer.support_altered_by_hack, \
673
("fileids_altered_by_revision_ids only supported for branches "
674
"which store inventory as unnested xml, not on %r" % self)
675
selected_revision_ids = set(osutils.safe_revision_id(r)
676
for r in revision_ids)
677
w = self.get_inventory_weave()
1962
self.add_signature_text(revision_id, signature)
1965
def add_signature_text(self, revision_id, signature):
1966
self.signatures.add_lines((revision_id,), (),
1967
osutils.split_lines(signature))
1969
def find_text_key_references(self):
1970
"""Find the text key references within the repository.
1972
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1973
to whether they were referred to by the inventory of the
1974
revision_id that they contain. The inventory texts from all present
1975
revision ids are assessed to generate this report.
1977
revision_keys = self.revisions.keys()
1978
w = self.inventories
1979
pb = ui.ui_factory.nested_progress_bar()
1981
return self._find_text_key_references_from_xml_inventory_lines(
1982
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1986
def _find_text_key_references_from_xml_inventory_lines(self,
1988
"""Core routine for extracting references to texts from inventories.
1990
This performs the translation of xml lines to revision ids.
1992
:param line_iterator: An iterator of lines, origin_version_id
1993
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1994
to whether they were referred to by the inventory of the
1995
revision_id that they contain. Note that if that revision_id was
1996
not part of the line_iterator's output then False will be given -
1997
even though it may actually refer to that key.
1999
if not self._serializer.support_altered_by_hack:
2000
raise AssertionError(
2001
"_find_text_key_references_from_xml_inventory_lines only "
2002
"supported for branches which store inventory as unnested xml"
2003
", not on %r" % self)
680
2006
# this code needs to read every new line in every inventory for the
681
2007
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
682
# not present in one of those inventories is unnecessary but not
2008
# not present in one of those inventories is unnecessary but not
683
2009
# harmful because we are filtering by the revision id marker in the
684
# inventory lines : we only select file ids altered in one of those
2010
# inventory lines : we only select file ids altered in one of those
685
2011
# revisions. We don't need to see all lines in the inventory because
686
2012
# only those added in an inventory in rev X can contain a revision=X
773
2303
versions). knit-kind is one of 'file', 'inventory', 'signatures',
774
2304
'revisions'. file-id is None unless knit-kind is 'file'.
2306
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):
2309
for result in self._find_non_file_keys_to_fetch(revision_ids):
2312
def _find_file_keys_to_fetch(self, revision_ids, pb):
776
2313
# XXX: it's a bit weird to control the inventory weave caching in this
777
2314
# generator. Ideally the caching would be done in fetch.py I think. Or
778
2315
# maybe this generator should explicitly have the contract that it
779
2316
# should not be iterated until the previously yielded item has been
781
inv_w = self.get_inventory_weave()
2318
inv_w = self.inventories
784
2320
# file ids that changed
785
file_ids = self.fileids_altered_by_revision_ids(revision_ids)
2321
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
787
2323
num_file_ids = len(file_ids)
788
2324
for file_id, altered_versions in file_ids.iteritems():
789
if _files_pb is not None:
790
_files_pb.update("fetch texts", count, num_file_ids)
2326
pb.update("fetch texts", count, num_file_ids)
792
2328
yield ("file", file_id, altered_versions)
793
# We're done with the files_pb. Note that it finished by the caller,
794
# just as it was created by the caller.
2330
def _find_non_file_keys_to_fetch(self, revision_ids):
798
2332
yield ("inventory", None, revision_ids)
802
revisions_with_signatures = set()
803
for rev_id in revision_ids:
805
self.get_signature_text(rev_id)
806
except errors.NoSuchRevision:
810
revisions_with_signatures.add(rev_id)
2335
# XXX: Note ATM no callers actually pay attention to this return
2336
# instead they just use the list of revision ids and ignore
2337
# missing sigs. Consider removing this work entirely
2338
revisions_with_signatures = set(self.signatures.get_parent_map(
2339
[(r,) for r in revision_ids]))
2340
revisions_with_signatures = set(
2341
[r for (r,) in revisions_with_signatures])
2342
revisions_with_signatures.intersection_update(revision_ids)
811
2343
yield ("signatures", None, revisions_with_signatures)
814
2346
yield ("revisions", None, revision_ids)
816
2348
@needs_read_lock
817
def get_inventory_weave(self):
818
return self.control_weaves.get_weave('inventory',
819
self.get_transaction())
822
2349
def get_inventory(self, revision_id):
823
"""Get Inventory object by hash."""
824
# TODO: jam 20070210 Technically we don't need to sanitize, since all
825
# called functions must sanitize.
826
revision_id = osutils.safe_revision_id(revision_id)
827
return self.deserialise_inventory(
828
revision_id, self.get_inventory_xml(revision_id))
2350
"""Get Inventory object by revision id."""
2351
return self.iter_inventories([revision_id]).next()
2353
def iter_inventories(self, revision_ids, ordering=None):
2354
"""Get many inventories by revision_ids.
2356
This will buffer some or all of the texts used in constructing the
2357
inventories in memory, but will only parse a single inventory at a
2360
:param revision_ids: The expected revision ids of the inventories.
2361
:param ordering: optional ordering, e.g. 'topological'. If not
2362
specified, the order of revision_ids will be preserved (by
2363
buffering if necessary).
2364
:return: An iterator of inventories.
2366
if ((None in revision_ids)
2367
or (_mod_revision.NULL_REVISION in revision_ids)):
2368
raise ValueError('cannot get null revision inventory')
2369
return self._iter_inventories(revision_ids, ordering)
2371
def _iter_inventories(self, revision_ids, ordering):
2372
"""single-document based inventory iteration."""
2373
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2374
for text, revision_id in inv_xmls:
2375
yield self.deserialise_inventory(revision_id, text)
2377
def _iter_inventory_xmls(self, revision_ids, ordering):
2378
if ordering is None:
2379
order_as_requested = True
2380
ordering = 'unordered'
2382
order_as_requested = False
2383
keys = [(revision_id,) for revision_id in revision_ids]
2386
if order_as_requested:
2387
key_iter = iter(keys)
2388
next_key = key_iter.next()
2389
stream = self.inventories.get_record_stream(keys, ordering, True)
2391
for record in stream:
2392
if record.storage_kind != 'absent':
2393
chunks = record.get_bytes_as('chunked')
2394
if order_as_requested:
2395
text_chunks[record.key] = chunks
2397
yield ''.join(chunks), record.key[-1]
2399
raise errors.NoSuchRevision(self, record.key)
2400
if order_as_requested:
2401
# Yield as many results as we can while preserving order.
2402
while next_key in text_chunks:
2403
chunks = text_chunks.pop(next_key)
2404
yield ''.join(chunks), next_key[-1]
2406
next_key = key_iter.next()
2407
except StopIteration:
2408
# We still want to fully consume the get_record_stream,
2409
# just in case it is not actually finished at this point
830
2413
def deserialise_inventory(self, revision_id, xml):
831
"""Transform the xml into an inventory object.
2414
"""Transform the xml into an inventory object.
833
2416
:param revision_id: The expected revision id of the inventory.
834
2417
:param xml: A serialised inventory.
836
revision_id = osutils.safe_revision_id(revision_id)
837
result = self._serializer.read_inventory_from_string(xml)
838
result.root.revision = revision_id
2419
result = self._serializer.read_inventory_from_string(xml, revision_id,
2420
entry_cache=self._inventory_entry_cache)
2421
if result.revision_id != revision_id:
2422
raise AssertionError('revision id mismatch %s != %s' % (
2423
result.revision_id, revision_id))
841
2426
def serialise_inventory(self, inv):
842
2427
return self._serializer.write_inventory_to_string(inv)
2429
def _serialise_inventory_to_lines(self, inv):
2430
return self._serializer.write_inventory_to_lines(inv)
844
2432
def get_serializer_format(self):
845
2433
return self._serializer.format_num
847
2435
@needs_read_lock
848
2436
def get_inventory_xml(self, revision_id):
849
2437
"""Get inventory XML as a file object."""
850
revision_id = osutils.safe_revision_id(revision_id)
2438
texts = self._iter_inventory_xmls([revision_id], 'unordered')
852
assert isinstance(revision_id, str), type(revision_id)
853
iw = self.get_inventory_weave()
854
return iw.get_text(revision_id)
2440
text, revision_id = texts.next()
2441
except StopIteration:
856
2442
raise errors.HistoryMissing(self, 'inventory', revision_id)
858
2445
@needs_read_lock
859
2446
def get_inventory_sha1(self, revision_id):
860
2447
"""Return the sha1 hash of the inventory entry
862
# TODO: jam 20070210 Shouldn't this be deprecated / removed?
863
revision_id = osutils.safe_revision_id(revision_id)
864
2449
return self.get_revision(revision_id).inventory_sha1
867
def get_revision_graph(self, revision_id=None):
868
"""Return a dictionary containing the revision graph.
870
:param revision_id: The revision_id to get a graph from. If None, then
871
the entire revision graph is returned. This is a deprecated mode of
872
operation and will be removed in the future.
873
:return: a dictionary of revision_id->revision_parents_list.
875
if 'evil' in debug.debug_flags:
877
"get_revision_graph scales with size of history.")
878
# special case NULL_REVISION
879
if revision_id == _mod_revision.NULL_REVISION:
881
revision_id = osutils.safe_revision_id(revision_id)
882
a_weave = self.get_inventory_weave()
883
all_revisions = self._eliminate_revisions_not_present(
885
entire_graph = dict([(node, tuple(a_weave.get_parents(node))) for
886
node in all_revisions])
887
if revision_id is None:
889
elif revision_id not in entire_graph:
890
raise errors.NoSuchRevision(self, revision_id)
892
# add what can be reached from revision_id
894
pending = set([revision_id])
895
while len(pending) > 0:
897
result[node] = entire_graph[node]
898
for revision_id in result[node]:
899
if revision_id not in result:
900
pending.add(revision_id)
904
def get_revision_graph_with_ghosts(self, revision_ids=None):
905
"""Return a graph of the revisions with ghosts marked as applicable.
907
:param revision_ids: an iterable of revisions to graph or None for all.
908
:return: a Graph object with the graph reachable from revision_ids.
910
if 'evil' in debug.debug_flags:
912
"get_revision_graph_with_ghosts scales with size of history.")
913
result = deprecated_graph.Graph()
915
pending = set(self.all_revision_ids())
918
pending = set(osutils.safe_revision_id(r) for r in revision_ids)
919
# special case NULL_REVISION
920
if _mod_revision.NULL_REVISION in pending:
921
pending.remove(_mod_revision.NULL_REVISION)
922
required = set(pending)
925
revision_id = pending.pop()
927
rev = self.get_revision(revision_id)
928
except errors.NoSuchRevision:
929
if revision_id in required:
932
result.add_ghost(revision_id)
934
for parent_id in rev.parent_ids:
935
# is this queued or done ?
936
if (parent_id not in pending and
937
parent_id not in done):
939
pending.add(parent_id)
940
result.add_node(revision_id, rev.parent_ids)
941
done.add(revision_id)
944
def _get_history_vf(self):
945
"""Get a versionedfile whose history graph reflects all revisions.
947
For weave repositories, this is the inventory weave.
949
return self.get_inventory_weave()
2451
def get_rev_id_for_revno(self, revno, known_pair):
2452
"""Return the revision id of a revno, given a later (revno, revid)
2453
pair in the same history.
2455
:return: if found (True, revid). If the available history ran out
2456
before reaching the revno, then this returns
2457
(False, (closest_revno, closest_revid)).
2459
known_revno, known_revid = known_pair
2460
partial_history = [known_revid]
2461
distance_from_known = known_revno - revno
2462
if distance_from_known < 0:
2464
'requested revno (%d) is later than given known revno (%d)'
2465
% (revno, known_revno))
2468
self, partial_history, stop_index=distance_from_known)
2469
except errors.RevisionNotPresent, err:
2470
if err.revision_id == known_revid:
2471
# The start revision (known_revid) wasn't found.
2473
# This is a stacked repository with no fallbacks, or a there's a
2474
# left-hand ghost. Either way, even though the revision named in
2475
# the error isn't in this repo, we know it's the next step in this
2476
# left-hand history.
2477
partial_history.append(err.revision_id)
2478
if len(partial_history) <= distance_from_known:
2479
# Didn't find enough history to get a revid for the revno.
2480
earliest_revno = known_revno - len(partial_history) + 1
2481
return (False, (earliest_revno, partial_history[-1]))
2482
if len(partial_history) - 1 > distance_from_known:
2483
raise AssertionError('_iter_for_revno returned too much history')
2484
return (True, partial_history[-1])
951
2486
def iter_reverse_revision_history(self, revision_id):
952
2487
"""Iterate backwards through revision ids in the lefthand history
1003
2539
for repositories to maintain loaded indices across multiple locks
1004
2540
by checking inside their implementation of this method to see
1005
2541
whether their indices are still valid. This depends of course on
1006
the disk format being validatable in this manner.
2542
the disk format being validatable in this manner. This method is
2543
also called by the refresh_data() public interface to cause a refresh
2544
to occur while in a write lock so that data inserted by a smart server
2545
push operation is visible on the client's instance of the physical
1009
2549
@needs_read_lock
1010
2550
def revision_tree(self, revision_id):
1011
2551
"""Return Tree for a revision on this branch.
1013
`revision_id` may be None for the empty tree revision.
2553
`revision_id` may be NULL_REVISION for the empty tree revision.
2555
revision_id = _mod_revision.ensure_null(revision_id)
1015
2556
# TODO: refactor this to use an existing revision object
1016
2557
# so we don't need to read it in twice.
1017
if revision_id is None or revision_id == _mod_revision.NULL_REVISION:
1018
return RevisionTree(self, Inventory(root_id=None),
2558
if revision_id == _mod_revision.NULL_REVISION:
2559
return RevisionTree(self, Inventory(root_id=None),
1019
2560
_mod_revision.NULL_REVISION)
1021
revision_id = osutils.safe_revision_id(revision_id)
1022
2562
inv = self.get_revision_inventory(revision_id)
1023
2563
return RevisionTree(self, inv, revision_id)
1026
2565
def revision_trees(self, revision_ids):
1027
"""Return Tree for a revision on this branch.
1029
`revision_id` may not be None or 'null:'"""
1030
assert None not in revision_ids
1031
assert _mod_revision.NULL_REVISION not in revision_ids
1032
texts = self.get_inventory_weave().get_texts(revision_ids)
1033
for text, revision_id in zip(texts, revision_ids):
1034
inv = self.deserialise_inventory(revision_id, text)
1035
yield RevisionTree(self, inv, revision_id)
2566
"""Return Trees for revisions in this repository.
2568
:param revision_ids: a sequence of revision-ids;
2569
a revision-id may not be None or 'null:'
2571
inventories = self.iter_inventories(revision_ids)
2572
for inv in inventories:
2573
yield RevisionTree(self, inv, inv.revision_id)
2575
def _filtered_revision_trees(self, revision_ids, file_ids):
2576
"""Return Tree for a revision on this branch with only some files.
2578
:param revision_ids: a sequence of revision-ids;
2579
a revision-id may not be None or 'null:'
2580
:param file_ids: if not None, the result is filtered
2581
so that only those file-ids, their parents and their
2582
children are included.
2584
inventories = self.iter_inventories(revision_ids)
2585
for inv in inventories:
2586
# Should we introduce a FilteredRevisionTree class rather
2587
# than pre-filter the inventory here?
2588
filtered_inv = inv.filter(file_ids)
2589
yield RevisionTree(self, filtered_inv, filtered_inv.revision_id)
1037
2591
@needs_read_lock
1038
2592
def get_ancestry(self, revision_id, topo_sorted=True):
1039
2593
"""Return a list of revision-ids integrated by a revision.
1041
The first element of the list is always None, indicating the origin
1042
revision. This might change when we have history horizons, or
2595
The first element of the list is always None, indicating the origin
2596
revision. This might change when we have history horizons, or
1043
2597
perhaps we should have a new API.
1045
2599
This is topologically sorted.
1047
2601
if _mod_revision.is_null(revision_id):
1049
revision_id = osutils.safe_revision_id(revision_id)
1050
2603
if not self.has_revision(revision_id):
1051
2604
raise errors.NoSuchRevision(self, revision_id)
1052
w = self.get_inventory_weave()
1053
candidates = w.get_ancestry(revision_id, topo_sorted)
1054
return [None] + candidates # self._eliminate_revisions_not_present(candidates)
2605
graph = self.get_graph()
2607
search = graph._make_breadth_first_searcher([revision_id])
2610
found, ghosts = search.next_with_ghosts()
2611
except StopIteration:
2614
if _mod_revision.NULL_REVISION in keys:
2615
keys.remove(_mod_revision.NULL_REVISION)
2617
parent_map = graph.get_parent_map(keys)
2618
keys = tsort.topo_sort(parent_map)
2619
return [None] + list(keys)
2621
def pack(self, hint=None):
1057
2622
"""Compress the data within the repository.
1059
2624
This operation only makes sense for some repository types. For other
1060
2625
types it should be a no-op that just returns.
1062
2627
This stub method does not require a lock, but subclasses should use
1063
@needs_write_lock as this is a long running call its reasonable to
2628
@needs_write_lock as this is a long running call its reasonable to
1064
2629
implicitly lock for the user.
1068
def print_file(self, file, revision_id):
1069
"""Print `file` to stdout.
1071
FIXME RBC 20060125 as John Meinel points out this is a bad api
1072
- it writes to stdout, it assumes that that is valid etc. Fix
1073
by creating a new more flexible convenience function.
2631
:param hint: If not supplied, the whole repository is packed.
2632
If supplied, the repository may use the hint parameter as a
2633
hint for the parts of the repository to pack. A hint can be
2634
obtained from the result of commit_write_group(). Out of
2635
date hints are simply ignored, because concurrent operations
2636
can obsolete them rapidly.
1075
revision_id = osutils.safe_revision_id(revision_id)
1076
tree = self.revision_tree(revision_id)
1077
# use inventory as it was in that revision
1078
file_id = tree.inventory.path2id(file)
1080
# TODO: jam 20060427 Write a test for this code path
1081
# it had a bug in it, and was raising the wrong
1083
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1084
tree.print_file(file_id)
1086
2639
def get_transaction(self):
1087
2640
return self.control_files.get_transaction()
1089
def revision_parents(self, revision_id):
1090
revision_id = osutils.safe_revision_id(revision_id)
1091
return self.get_inventory_weave().parent_names(revision_id)
1093
def get_parents(self, revision_ids):
1094
"""See StackedParentsProvider.get_parents"""
2642
def get_parent_map(self, revision_ids):
2643
"""See graph.StackedParentsProvider.get_parent_map"""
2644
# revisions index works in keys; this just works in revisions
2645
# therefore wrap and unwrap
1096
2648
for revision_id in revision_ids:
1097
2649
if revision_id == _mod_revision.NULL_REVISION:
1101
parents = self.get_revision(revision_id).parent_ids
1102
except errors.NoSuchRevision:
1105
if len(parents) == 0:
1106
parents = [_mod_revision.NULL_REVISION]
1107
parents_list.append(parents)
2650
result[revision_id] = ()
2651
elif revision_id is None:
2652
raise ValueError('get_parent_map(None) is not valid')
2654
query_keys.append((revision_id ,))
2655
for ((revision_id,), parent_keys) in \
2656
self.revisions.get_parent_map(query_keys).iteritems():
2658
result[revision_id] = tuple(parent_revid
2659
for (parent_revid,) in parent_keys)
2661
result[revision_id] = (_mod_revision.NULL_REVISION,)
1110
2664
def _make_parents_provider(self):
1604
3397
content is copied.
1605
3398
:param pb: optional progress bar to use for progress reports. If not
1606
3399
provided a default one will be created.
1608
Returns the copied revision count and the failed revisions in a tuple:
1611
raise NotImplementedError(self.fetch)
3402
from bzrlib.fetch import RepoFetcher
3403
f = RepoFetcher(to_repository=self.target,
3404
from_repository=self.source,
3405
last_revision=revision_id,
3406
fetch_spec=fetch_spec,
3407
pb=pb, find_ghosts=find_ghosts)
3409
def _walk_to_common_revisions(self, revision_ids):
3410
"""Walk out from revision_ids in source to revisions target has.
3412
:param revision_ids: The start point for the search.
3413
:return: A set of revision ids.
3415
target_graph = self.target.get_graph()
3416
revision_ids = frozenset(revision_ids)
3417
missing_revs = set()
3418
source_graph = self.source.get_graph()
3419
# ensure we don't pay silly lookup costs.
3420
searcher = source_graph._make_breadth_first_searcher(revision_ids)
3421
null_set = frozenset([_mod_revision.NULL_REVISION])
3422
searcher_exhausted = False
3426
# Iterate the searcher until we have enough next_revs
3427
while len(next_revs) < self._walk_to_common_revisions_batch_size:
3429
next_revs_part, ghosts_part = searcher.next_with_ghosts()
3430
next_revs.update(next_revs_part)
3431
ghosts.update(ghosts_part)
3432
except StopIteration:
3433
searcher_exhausted = True
3435
# If there are ghosts in the source graph, and the caller asked for
3436
# them, make sure that they are present in the target.
3437
# We don't care about other ghosts as we can't fetch them and
3438
# haven't been asked to.
3439
ghosts_to_check = set(revision_ids.intersection(ghosts))
3440
revs_to_get = set(next_revs).union(ghosts_to_check)
3442
have_revs = set(target_graph.get_parent_map(revs_to_get))
3443
# we always have NULL_REVISION present.
3444
have_revs = have_revs.union(null_set)
3445
# Check if the target is missing any ghosts we need.
3446
ghosts_to_check.difference_update(have_revs)
3448
# One of the caller's revision_ids is a ghost in both the
3449
# source and the target.
3450
raise errors.NoSuchRevision(
3451
self.source, ghosts_to_check.pop())
3452
missing_revs.update(next_revs - have_revs)
3453
# Because we may have walked past the original stop point, make
3454
# sure everything is stopped
3455
stop_revs = searcher.find_seen_ancestors(have_revs)
3456
searcher.stop_searching_any(stop_revs)
3457
if searcher_exhausted:
3459
return searcher.get_result()
1613
3461
@needs_read_lock
1614
def missing_revision_ids(self, revision_id=None):
3462
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1615
3463
"""Return the revision ids that source has that target does not.
1617
These are returned in topological order.
1619
3465
:param revision_id: only return revision ids included by this
3467
:param find_ghosts: If True find missing revisions in deep history
3468
rather than just finding the surface difference.
3469
:return: A bzrlib.graph.SearchResult.
3471
# stop searching at found target revisions.
3472
if not find_ghosts and revision_id is not None:
3473
return self._walk_to_common_revisions([revision_id])
1622
3474
# generic, possibly worst case, slow code path.
1623
3475
target_ids = set(self.target.all_revision_ids())
1624
3476
if revision_id is not None:
1625
# TODO: jam 20070210 InterRepository is internal enough that it
1626
# should assume revision_ids are already utf-8
1627
revision_id = osutils.safe_revision_id(revision_id)
1628
3477
source_ids = self.source.get_ancestry(revision_id)
1629
assert source_ids[0] is None
3478
if source_ids[0] is not None:
3479
raise AssertionError()
1630
3480
source_ids.pop(0)
1632
3482
source_ids = self.source.all_revision_ids()
1633
3483
result_set = set(source_ids).difference(target_ids)
1634
# this may look like a no-op: its not. It preserves the ordering
1635
# other_ids had while only returning the members from other_ids
1636
# that we've decided we need.
1637
return [rev_id for rev_id in source_ids if rev_id in result_set]
3484
return self.source.revision_ids_to_search_result(result_set)
3487
def _same_model(source, target):
3488
"""True if source and target have the same data representation.
3490
Note: this is always called on the base class; overriding it in a
3491
subclass will have no effect.
3494
InterRepository._assert_same_model(source, target)
3496
except errors.IncompatibleRepositories, e:
3500
def _assert_same_model(source, target):
3501
"""Raise an exception if two repositories do not use the same model.
3503
if source.supports_rich_root() != target.supports_rich_root():
3504
raise errors.IncompatibleRepositories(source, target,
3505
"different rich-root support")
3506
if source._serializer != target._serializer:
3507
raise errors.IncompatibleRepositories(source, target,
3508
"different serializers")
1640
3511
class InterSameDataRepository(InterRepository):
1641
3512
"""Code for converting between repositories that represent the same data.
1643
3514
Data format and model must match for this to work.
1647
3518
def _get_repo_format_to_test(self):
1648
"""Repository format for testing with."""
1649
return RepositoryFormat.get_default_format()
3519
"""Repository format for testing with.
3521
InterSameData can pull from subtree to subtree and from non-subtree to
3522
non-subtree, so we test this with the richest repository format.
3524
from bzrlib.repofmt import knitrepo
3525
return knitrepo.RepositoryFormatKnit3()
1652
3528
def is_compatible(source, target):
1653
if source.supports_rich_root() != target.supports_rich_root():
1655
if source._serializer != target._serializer:
1660
def copy_content(self, revision_id=None):
1661
"""Make a complete copy of the content in self into destination.
1663
This copies both the repository's revision data, and configuration information
1664
such as the make_working_trees setting.
1666
This is a destructive operation! Do not use it on existing
1669
:param revision_id: Only copy the content needed to construct
1670
revision_id and its parents.
1673
self.target.set_make_working_trees(self.source.make_working_trees())
1674
except NotImplementedError:
1676
# TODO: jam 20070210 This is fairly internal, so we should probably
1677
# just assert that revision_id is not unicode.
1678
revision_id = osutils.safe_revision_id(revision_id)
1679
# but don't bother fetching if we have the needed data now.
1680
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
1681
self.target.has_revision(revision_id)):
1683
self.target.fetch(self.source, revision_id=revision_id)
1686
def fetch(self, revision_id=None, pb=None):
1687
"""See InterRepository.fetch()."""
1688
from bzrlib.fetch import GenericRepoFetcher
1689
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
1690
self.source, self.source._format, self.target,
1691
self.target._format)
1692
# TODO: jam 20070210 This should be an assert, not a translate
1693
revision_id = osutils.safe_revision_id(revision_id)
1694
f = GenericRepoFetcher(to_repository=self.target,
1695
from_repository=self.source,
1696
last_revision=revision_id,
1698
return f.count_copied, f.failed_revisions
3529
return InterRepository._same_model(source, target)
1701
3532
class InterWeaveRepo(InterSameDataRepository):
1702
"""Optimised code paths between Weave based repositories."""
3533
"""Optimised code paths between Weave based repositories.
3535
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
3536
implemented lazy inter-object optimisation.
1705
3540
def _get_repo_format_to_test(self):
1829
3653
def is_compatible(source, target):
1830
3654
"""Be compatible with known Knit formats.
1832
3656
We don't test for the stores being of specific types because that
1833
could lead to confusing results, and there is no need to be
3657
could lead to confusing results, and there is no need to be
1834
3658
overly general.
1836
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1
3660
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
1838
return (isinstance(source._format, (RepositoryFormatKnit1)) and
1839
isinstance(target._format, (RepositoryFormatKnit1)))
3662
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
3663
isinstance(target._format, RepositoryFormatKnit))
1840
3664
except AttributeError:
1844
def fetch(self, revision_id=None, pb=None):
1845
"""See InterRepository.fetch()."""
1846
from bzrlib.fetch import KnitRepoFetcher
1847
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
1848
self.source, self.source._format, self.target, self.target._format)
1849
# TODO: jam 20070210 This should be an assert, not a translate
1850
revision_id = osutils.safe_revision_id(revision_id)
1851
f = KnitRepoFetcher(to_repository=self.target,
1852
from_repository=self.source,
1853
last_revision=revision_id,
1855
return f.count_copied, f.failed_revisions
3666
return are_knits and InterRepository._same_model(source, target)
1857
3668
@needs_read_lock
1858
def missing_revision_ids(self, revision_id=None):
3669
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1859
3670
"""See InterRepository.missing_revision_ids()."""
1860
3671
if revision_id is not None:
1861
3672
source_ids = self.source.get_ancestry(revision_id)
1862
assert source_ids[0] is None
3673
if source_ids[0] is not None:
3674
raise AssertionError()
1863
3675
source_ids.pop(0)
1865
source_ids = self.source._all_possible_ids()
3677
source_ids = self.source.all_revision_ids()
1866
3678
source_ids_set = set(source_ids)
1867
3679
# source_ids is the worst possible case we may need to pull.
1868
3680
# now we want to filter source_ids against what we actually
1869
3681
# have in target, but don't try to check for existence where we know
1870
3682
# we do not have a revision as that would be pointless.
1871
target_ids = set(self.target._all_possible_ids())
3683
target_ids = set(self.target.all_revision_ids())
1872
3684
possibly_present_revisions = target_ids.intersection(source_ids_set)
1873
actually_present_revisions = set(self.target._eliminate_revisions_not_present(possibly_present_revisions))
3685
actually_present_revisions = set(
3686
self.target._eliminate_revisions_not_present(possibly_present_revisions))
1874
3687
required_revisions = source_ids_set.difference(actually_present_revisions)
1875
required_topo_revisions = [rev_id for rev_id in source_ids if rev_id in required_revisions]
1876
3688
if revision_id is not None:
1877
3689
# we used get_ancestry to determine source_ids then we are assured all
1878
3690
# revisions referenced are present as they are installed in topological order.
1879
3691
# and the tip revision was validated by get_ancestry.
1880
return required_topo_revisions
3692
result_set = required_revisions
1882
# if we just grabbed the possibly available ids, then
3694
# if we just grabbed the possibly available ids, then
1883
3695
# we only have an estimate of whats available and need to validate
1884
3696
# that against the revision records.
1885
return self.source._eliminate_revisions_not_present(required_topo_revisions)
1888
class InterModel1and2(InterRepository):
1891
def _get_repo_format_to_test(self):
1895
def is_compatible(source, target):
1896
if not source.supports_rich_root() and target.supports_rich_root():
1902
def fetch(self, revision_id=None, pb=None):
1903
"""See InterRepository.fetch()."""
1904
from bzrlib.fetch import Model1toKnit2Fetcher
1905
# TODO: jam 20070210 This should be an assert, not a translate
1906
revision_id = osutils.safe_revision_id(revision_id)
1907
f = Model1toKnit2Fetcher(to_repository=self.target,
1908
from_repository=self.source,
1909
last_revision=revision_id,
1911
return f.count_copied, f.failed_revisions
1914
def copy_content(self, revision_id=None):
1915
"""Make a complete copy of the content in self into destination.
1917
This is a destructive operation! Do not use it on existing
1920
:param revision_id: Only copy the content needed to construct
1921
revision_id and its parents.
1924
self.target.set_make_working_trees(self.source.make_working_trees())
1925
except NotImplementedError:
1927
# TODO: jam 20070210 Internal, assert, don't translate
1928
revision_id = osutils.safe_revision_id(revision_id)
1929
# but don't bother fetching if we have the needed data now.
1930
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
1931
self.target.has_revision(revision_id)):
1933
self.target.fetch(self.source, revision_id=revision_id)
1936
class InterKnit1and2(InterKnitRepo):
1939
def _get_repo_format_to_test(self):
1943
def is_compatible(source, target):
1944
"""Be compatible with Knit1 source and Knit3 target"""
1945
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
1947
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1, \
1948
RepositoryFormatKnit3
1949
return (isinstance(source._format, (RepositoryFormatKnit1)) and
1950
isinstance(target._format, (RepositoryFormatKnit3)))
1951
except AttributeError:
1955
def fetch(self, revision_id=None, pb=None):
1956
"""See InterRepository.fetch()."""
1957
from bzrlib.fetch import Knit1to2Fetcher
1958
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
1959
self.source, self.source._format, self.target,
1960
self.target._format)
1961
# TODO: jam 20070210 This should be an assert, not a translate
1962
revision_id = osutils.safe_revision_id(revision_id)
1963
f = Knit1to2Fetcher(to_repository=self.target,
1964
from_repository=self.source,
1965
last_revision=revision_id,
1967
return f.count_copied, f.failed_revisions
1970
class InterRemoteRepository(InterRepository):
1971
"""Code for converting between RemoteRepository objects.
1973
This just gets an non-remote repository from the RemoteRepository, and calls
1974
InterRepository.get again.
1977
def __init__(self, source, target):
1978
if isinstance(source, remote.RemoteRepository):
1979
source._ensure_real()
1980
real_source = source._real_repository
1982
real_source = source
1983
if isinstance(target, remote.RemoteRepository):
1984
target._ensure_real()
1985
real_target = target._real_repository
1987
real_target = target
1988
self.real_inter = InterRepository.get(real_source, real_target)
1991
def is_compatible(source, target):
1992
if isinstance(source, remote.RemoteRepository):
1994
if isinstance(target, remote.RemoteRepository):
1998
def copy_content(self, revision_id=None):
1999
self.real_inter.copy_content(revision_id=revision_id)
2001
def fetch(self, revision_id=None, pb=None):
2002
self.real_inter.fetch(revision_id=revision_id, pb=pb)
2005
def _get_repo_format_to_test(self):
3698
self.source._eliminate_revisions_not_present(required_revisions))
3699
return self.source.revision_ids_to_search_result(result_set)
3702
class InterDifferingSerializer(InterRepository):
3705
def _get_repo_format_to_test(self):
3709
def is_compatible(source, target):
3710
"""Be compatible with Knit2 source and Knit3 target"""
3711
# This is redundant with format.check_conversion_target(), however that
3712
# raises an exception, and we just want to say "False" as in we won't
3713
# support converting between these formats.
3714
if 'IDS_never' in debug.debug_flags:
3716
if source.supports_rich_root() and not target.supports_rich_root():
3718
if (source._format.supports_tree_reference
3719
and not target._format.supports_tree_reference):
3721
if target._fallback_repositories and target._format.supports_chks:
3722
# IDS doesn't know how to copy CHKs for the parent inventories it
3723
# adds to stacked repos.
3725
if 'IDS_always' in debug.debug_flags:
3727
# Only use this code path for local source and target. IDS does far
3728
# too much IO (both bandwidth and roundtrips) over a network.
3729
if not source.bzrdir.transport.base.startswith('file:///'):
3731
if not target.bzrdir.transport.base.startswith('file:///'):
3735
def _get_trees(self, revision_ids, cache):
3737
for rev_id in revision_ids:
3739
possible_trees.append((rev_id, cache[rev_id]))
3741
# Not cached, but inventory might be present anyway.
3743
tree = self.source.revision_tree(rev_id)
3744
except errors.NoSuchRevision:
3745
# Nope, parent is ghost.
3748
cache[rev_id] = tree
3749
possible_trees.append((rev_id, tree))
3750
return possible_trees
3752
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3753
"""Get the best delta and base for this revision.
3755
:return: (basis_id, delta)
3758
# Generate deltas against each tree, to find the shortest.
3759
texts_possibly_new_in_tree = set()
3760
for basis_id, basis_tree in possible_trees:
3761
delta = tree.inventory._make_delta(basis_tree.inventory)
3762
for old_path, new_path, file_id, new_entry in delta:
3763
if new_path is None:
3764
# This file_id isn't present in the new rev, so we don't
3768
# Rich roots are handled elsewhere...
3770
kind = new_entry.kind
3771
if kind != 'directory' and kind != 'file':
3772
# No text record associated with this inventory entry.
3774
# This is a directory or file that has changed somehow.
3775
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3776
deltas.append((len(delta), basis_id, delta))
3778
return deltas[0][1:]
3780
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3781
"""Find all parent revisions that are absent, but for which the
3782
inventory is present, and copy those inventories.
3784
This is necessary to preserve correctness when the source is stacked
3785
without fallbacks configured. (Note that in cases like upgrade the
3786
source may be not have _fallback_repositories even though it is
3790
for parents in parent_map.values():
3791
parent_revs.update(parents)
3792
present_parents = self.source.get_parent_map(parent_revs)
3793
absent_parents = set(parent_revs).difference(present_parents)
3794
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3795
(rev_id,) for rev_id in absent_parents)
3796
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3797
for parent_tree in self.source.revision_trees(parent_inv_ids):
3798
current_revision_id = parent_tree.get_revision_id()
3799
parents_parents_keys = parent_invs_keys_for_stacking[
3800
(current_revision_id,)]
3801
parents_parents = [key[-1] for key in parents_parents_keys]
3802
basis_id = _mod_revision.NULL_REVISION
3803
basis_tree = self.source.revision_tree(basis_id)
3804
delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3805
self.target.add_inventory_by_delta(
3806
basis_id, delta, current_revision_id, parents_parents)
3807
cache[current_revision_id] = parent_tree
3809
def _fetch_batch(self, revision_ids, basis_id, cache):
3810
"""Fetch across a few revisions.
3812
:param revision_ids: The revisions to copy
3813
:param basis_id: The revision_id of a tree that must be in cache, used
3814
as a basis for delta when no other base is available
3815
:param cache: A cache of RevisionTrees that we can use.
3816
:return: The revision_id of the last converted tree. The RevisionTree
3817
for it will be in cache
3819
# Walk though all revisions; get inventory deltas, copy referenced
3820
# texts that delta references, insert the delta, revision and
3822
root_keys_to_create = set()
3825
pending_revisions = []
3826
parent_map = self.source.get_parent_map(revision_ids)
3827
self._fetch_parent_invs_for_stacking(parent_map, cache)
3828
for tree in self.source.revision_trees(revision_ids):
3829
# Find a inventory delta for this revision.
3830
# Find text entries that need to be copied, too.
3831
current_revision_id = tree.get_revision_id()
3832
parent_ids = parent_map.get(current_revision_id, ())
3833
parent_trees = self._get_trees(parent_ids, cache)
3834
possible_trees = list(parent_trees)
3835
if len(possible_trees) == 0:
3836
# There either aren't any parents, or the parents are ghosts,
3837
# so just use the last converted tree.
3838
possible_trees.append((basis_id, cache[basis_id]))
3839
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3841
if self._converting_to_rich_root:
3842
self._revision_id_to_root_id[current_revision_id] = \
3844
# Determine which texts are in present in this revision but not in
3845
# any of the available parents.
3846
texts_possibly_new_in_tree = set()
3847
for old_path, new_path, file_id, entry in delta:
3848
if new_path is None:
3849
# This file_id isn't present in the new rev
3853
if not self.target.supports_rich_root():
3854
# The target doesn't support rich root, so we don't
3857
if self._converting_to_rich_root:
3858
# This can't be copied normally, we have to insert
3860
root_keys_to_create.add((file_id, entry.revision))
3863
texts_possibly_new_in_tree.add((file_id, entry.revision))
3864
for basis_id, basis_tree in possible_trees:
3865
basis_inv = basis_tree.inventory
3866
for file_key in list(texts_possibly_new_in_tree):
3867
file_id, file_revision = file_key
3869
entry = basis_inv[file_id]
3870
except errors.NoSuchId:
3872
if entry.revision == file_revision:
3873
texts_possibly_new_in_tree.remove(file_key)
3874
text_keys.update(texts_possibly_new_in_tree)
3875
revision = self.source.get_revision(current_revision_id)
3876
pending_deltas.append((basis_id, delta,
3877
current_revision_id, revision.parent_ids))
3878
pending_revisions.append(revision)
3879
cache[current_revision_id] = tree
3880
basis_id = current_revision_id
3882
from_texts = self.source.texts
3883
to_texts = self.target.texts
3884
if root_keys_to_create:
3885
from bzrlib.fetch import _new_root_data_stream
3886
root_stream = _new_root_data_stream(
3887
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3889
to_texts.insert_record_stream(root_stream)
3890
to_texts.insert_record_stream(from_texts.get_record_stream(
3891
text_keys, self.target._format._fetch_order,
3892
not self.target._format._fetch_uses_deltas))
3893
# insert inventory deltas
3894
for delta in pending_deltas:
3895
self.target.add_inventory_by_delta(*delta)
3896
if self.target._fallback_repositories:
3897
# Make sure this stacked repository has all the parent inventories
3898
# for the new revisions that we are about to insert. We do this
3899
# before adding the revisions so that no revision is added until
3900
# all the inventories it may depend on are added.
3901
# Note that this is overzealous, as we may have fetched these in an
3904
revision_ids = set()
3905
for revision in pending_revisions:
3906
revision_ids.add(revision.revision_id)
3907
parent_ids.update(revision.parent_ids)
3908
parent_ids.difference_update(revision_ids)
3909
parent_ids.discard(_mod_revision.NULL_REVISION)
3910
parent_map = self.source.get_parent_map(parent_ids)
3911
# we iterate over parent_map and not parent_ids because we don't
3912
# want to try copying any revision which is a ghost
3913
for parent_tree in self.source.revision_trees(parent_map):
3914
current_revision_id = parent_tree.get_revision_id()
3915
parents_parents = parent_map[current_revision_id]
3916
possible_trees = self._get_trees(parents_parents, cache)
3917
if len(possible_trees) == 0:
3918
# There either aren't any parents, or the parents are
3919
# ghosts, so just use the last converted tree.
3920
possible_trees.append((basis_id, cache[basis_id]))
3921
basis_id, delta = self._get_delta_for_revision(parent_tree,
3922
parents_parents, possible_trees)
3923
self.target.add_inventory_by_delta(
3924
basis_id, delta, current_revision_id, parents_parents)
3925
# insert signatures and revisions
3926
for revision in pending_revisions:
3928
signature = self.source.get_signature_text(
3929
revision.revision_id)
3930
self.target.add_signature_text(revision.revision_id,
3932
except errors.NoSuchRevision:
3934
self.target.add_revision(revision.revision_id, revision)
3937
def _fetch_all_revisions(self, revision_ids, pb):
3938
"""Fetch everything for the list of revisions.
3940
:param revision_ids: The list of revisions to fetch. Must be in
3942
:param pb: A ProgressTask
3945
basis_id, basis_tree = self._get_basis(revision_ids[0])
3947
cache = lru_cache.LRUCache(100)
3948
cache[basis_id] = basis_tree
3949
del basis_tree # We don't want to hang on to it here
3951
for offset in range(0, len(revision_ids), batch_size):
3952
self.target.start_write_group()
3954
pb.update('Transferring revisions', offset,
3956
batch = revision_ids[offset:offset+batch_size]
3957
basis_id = self._fetch_batch(batch, basis_id, cache)
3959
self.target.abort_write_group()
3962
hint = self.target.commit_write_group()
3965
if hints and self.target._format.pack_compresses:
3966
self.target.pack(hint=hints)
3967
pb.update('Transferring revisions', len(revision_ids),
3971
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3973
"""See InterRepository.fetch()."""
3974
if fetch_spec is not None:
3975
raise AssertionError("Not implemented yet...")
3976
if (not self.source.supports_rich_root()
3977
and self.target.supports_rich_root()):
3978
self._converting_to_rich_root = True
3979
self._revision_id_to_root_id = {}
3981
self._converting_to_rich_root = False
3982
revision_ids = self.target.search_missing_revision_ids(self.source,
3983
revision_id, find_ghosts=find_ghosts).get_keys()
3984
if not revision_ids:
3986
revision_ids = tsort.topo_sort(
3987
self.source.get_graph().get_parent_map(revision_ids))
3988
if not revision_ids:
3990
# Walk though all revisions; get inventory deltas, copy referenced
3991
# texts that delta references, insert the delta, revision and
3994
my_pb = ui.ui_factory.nested_progress_bar()
3997
symbol_versioning.warn(
3998
symbol_versioning.deprecated_in((1, 14, 0))
3999
% "pb parameter to fetch()")
4002
self._fetch_all_revisions(revision_ids, pb)
4004
if my_pb is not None:
4006
return len(revision_ids), 0
4008
def _get_basis(self, first_revision_id):
4009
"""Get a revision and tree which exists in the target.
4011
This assumes that first_revision_id is selected for transmission
4012
because all other ancestors are already present. If we can't find an
4013
ancestor we fall back to NULL_REVISION since we know that is safe.
4015
:return: (basis_id, basis_tree)
4017
first_rev = self.source.get_revision(first_revision_id)
4019
basis_id = first_rev.parent_ids[0]
4020
# only valid as a basis if the target has it
4021
self.target.get_revision(basis_id)
4022
# Try to get a basis tree - if its a ghost it will hit the
4023
# NoSuchRevision case.
4024
basis_tree = self.source.revision_tree(basis_id)
4025
except (IndexError, errors.NoSuchRevision):
4026
basis_id = _mod_revision.NULL_REVISION
4027
basis_tree = self.source.revision_tree(basis_id)
4028
return basis_id, basis_tree
4031
InterRepository.register_optimiser(InterDifferingSerializer)
2009
4032
InterRepository.register_optimiser(InterSameDataRepository)
2010
4033
InterRepository.register_optimiser(InterWeaveRepo)
2011
4034
InterRepository.register_optimiser(InterKnitRepo)
2012
InterRepository.register_optimiser(InterModel1and2)
2013
InterRepository.register_optimiser(InterKnit1and2)
2014
InterRepository.register_optimiser(InterRemoteRepository)
2017
4037
class CopyConverter(object):
2018
4038
"""A repository conversion tool which just performs a copy of the content.
2020
4040
This is slow but quite reliable.
2066
4086
self.pb.update(message, self.count, self.total)
2069
class CommitBuilder(object):
2070
"""Provides an interface to build up a commit.
2072
This allows describing a tree to be committed without needing to
2073
know the internals of the format of the repository.
2076
record_root_entry = False
2077
def __init__(self, repository, parents, config, timestamp=None,
2078
timezone=None, committer=None, revprops=None,
2080
"""Initiate a CommitBuilder.
2082
:param repository: Repository to commit to.
2083
:param parents: Revision ids of the parents of the new revision.
2084
:param config: Configuration to use.
2085
:param timestamp: Optional timestamp recorded for commit.
2086
:param timezone: Optional timezone for timestamp.
2087
:param committer: Optional committer to set for commit.
2088
:param revprops: Optional dictionary of revision properties.
2089
:param revision_id: Optional revision id.
2091
self._config = config
2093
if committer is None:
2094
self._committer = self._config.username()
2096
assert isinstance(committer, basestring), type(committer)
2097
self._committer = committer
2099
self.new_inventory = Inventory(None)
2100
self._new_revision_id = osutils.safe_revision_id(revision_id)
2101
self.parents = parents
2102
self.repository = repository
2105
if revprops is not None:
2106
self._revprops.update(revprops)
2108
if timestamp is None:
2109
timestamp = time.time()
2110
# Restrict resolution to 1ms
2111
self._timestamp = round(timestamp, 3)
2113
if timezone is None:
2114
self._timezone = osutils.local_time_offset()
2116
self._timezone = int(timezone)
2118
self._generate_revision_if_needed()
2120
def commit(self, message):
2121
"""Make the actual commit.
2123
:return: The revision id of the recorded revision.
2125
rev = _mod_revision.Revision(
2126
timestamp=self._timestamp,
2127
timezone=self._timezone,
2128
committer=self._committer,
2130
inventory_sha1=self.inv_sha1,
2131
revision_id=self._new_revision_id,
2132
properties=self._revprops)
2133
rev.parent_ids = self.parents
2134
self.repository.add_revision(self._new_revision_id, rev,
2135
self.new_inventory, self._config)
2136
self.repository.commit_write_group()
2137
return self._new_revision_id
2140
"""Abort the commit that is being built.
2142
self.repository.abort_write_group()
2144
def revision_tree(self):
2145
"""Return the tree that was just committed.
2147
After calling commit() this can be called to get a RevisionTree
2148
representing the newly committed tree. This is preferred to
2149
calling Repository.revision_tree() because that may require
2150
deserializing the inventory, while we already have a copy in
2153
return RevisionTree(self.repository, self.new_inventory,
2154
self._new_revision_id)
2156
def finish_inventory(self):
2157
"""Tell the builder that the inventory is finished."""
2158
if self.new_inventory.root is None:
2159
symbol_versioning.warn('Root entry should be supplied to'
2160
' record_entry_contents, as of bzr 0.10.',
2161
DeprecationWarning, stacklevel=2)
2162
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
2163
self.new_inventory.revision_id = self._new_revision_id
2164
self.inv_sha1 = self.repository.add_inventory(
2165
self._new_revision_id,
2170
def _gen_revision_id(self):
2171
"""Return new revision-id."""
2172
return generate_ids.gen_revision_id(self._config.username(),
2175
def _generate_revision_if_needed(self):
2176
"""Create a revision id if None was supplied.
2178
If the repository can not support user-specified revision ids
2179
they should override this function and raise CannotSetRevisionId
2180
if _new_revision_id is not None.
2182
:raises: CannotSetRevisionId
2184
if self._new_revision_id is None:
2185
self._new_revision_id = self._gen_revision_id()
2187
def record_entry_contents(self, ie, parent_invs, path, tree):
2188
"""Record the content of ie from tree into the commit if needed.
2190
Side effect: sets ie.revision when unchanged
2192
:param ie: An inventory entry present in the commit.
2193
:param parent_invs: The inventories of the parent revisions of the
2195
:param path: The path the entry is at in the tree.
2196
:param tree: The tree which contains this entry and should be used to
2199
if self.new_inventory.root is None and ie.parent_id is not None:
2200
symbol_versioning.warn('Root entry should be supplied to'
2201
' record_entry_contents, as of bzr 0.10.',
2202
DeprecationWarning, stacklevel=2)
2203
self.record_entry_contents(tree.inventory.root.copy(), parent_invs,
2205
self.new_inventory.add(ie)
2207
# ie.revision is always None if the InventoryEntry is considered
2208
# for committing. ie.snapshot will record the correct revision
2209
# which may be the sole parent if it is untouched.
2210
if ie.revision is not None:
2213
# In this revision format, root entries have no knit or weave
2214
if ie is self.new_inventory.root:
2215
# When serializing out to disk and back in
2216
# root.revision is always _new_revision_id
2217
ie.revision = self._new_revision_id
2219
previous_entries = ie.find_previous_heads(
2221
self.repository.weave_store,
2222
self.repository.get_transaction())
2223
# we are creating a new revision for ie in the history store
2225
ie.snapshot(self._new_revision_id, path, previous_entries, tree, self)
2227
def modified_directory(self, file_id, file_parents):
2228
"""Record the presence of a symbolic link.
2230
:param file_id: The file_id of the link to record.
2231
:param file_parents: The per-file parent revision ids.
2233
self._add_text_to_weave(file_id, [], file_parents.keys())
2235
def modified_reference(self, file_id, file_parents):
2236
"""Record the modification of a reference.
2238
:param file_id: The file_id of the link to record.
2239
:param file_parents: The per-file parent revision ids.
2241
self._add_text_to_weave(file_id, [], file_parents.keys())
2243
def modified_file_text(self, file_id, file_parents,
2244
get_content_byte_lines, text_sha1=None,
2246
"""Record the text of file file_id
2248
:param file_id: The file_id of the file to record the text of.
2249
:param file_parents: The per-file parent revision ids.
2250
:param get_content_byte_lines: A callable which will return the byte
2252
:param text_sha1: Optional SHA1 of the file contents.
2253
:param text_size: Optional size of the file contents.
2255
# mutter('storing text of file {%s} in revision {%s} into %r',
2256
# file_id, self._new_revision_id, self.repository.weave_store)
2257
# special case to avoid diffing on renames or
2259
if (len(file_parents) == 1
2260
and text_sha1 == file_parents.values()[0].text_sha1
2261
and text_size == file_parents.values()[0].text_size):
2262
previous_ie = file_parents.values()[0]
2263
versionedfile = self.repository.weave_store.get_weave(file_id,
2264
self.repository.get_transaction())
2265
versionedfile.clone_text(self._new_revision_id,
2266
previous_ie.revision, file_parents.keys())
2267
return text_sha1, text_size
2269
new_lines = get_content_byte_lines()
2270
# TODO: Rather than invoking sha_strings here, _add_text_to_weave
2271
# should return the SHA1 and size
2272
self._add_text_to_weave(file_id, new_lines, file_parents.keys())
2273
return osutils.sha_strings(new_lines), \
2274
sum(map(len, new_lines))
2276
def modified_link(self, file_id, file_parents, link_target):
2277
"""Record the presence of a symbolic link.
2279
:param file_id: The file_id of the link to record.
2280
:param file_parents: The per-file parent revision ids.
2281
:param link_target: Target location of this link.
2283
self._add_text_to_weave(file_id, [], file_parents.keys())
2285
def _add_text_to_weave(self, file_id, new_lines, parents):
2286
versionedfile = self.repository.weave_store.get_weave_or_empty(
2287
file_id, self.repository.get_transaction())
2288
versionedfile.add_lines(self._new_revision_id, parents, new_lines)
2289
versionedfile.clear_cache()
2292
class _CommitBuilder(CommitBuilder):
2293
"""Temporary class so old CommitBuilders are detected properly
2295
Note: CommitBuilder works whether or not root entry is recorded.
2298
record_root_entry = True
2301
class RootCommitBuilder(CommitBuilder):
2302
"""This commitbuilder actually records the root id"""
2304
record_root_entry = True
2306
def record_entry_contents(self, ie, parent_invs, path, tree):
2307
"""Record the content of ie from tree into the commit if needed.
2309
Side effect: sets ie.revision when unchanged
2311
:param ie: An inventory entry present in the commit.
2312
:param parent_invs: The inventories of the parent revisions of the
2314
:param path: The path the entry is at in the tree.
2315
:param tree: The tree which contains this entry and should be used to
2318
assert self.new_inventory.root is not None or ie.parent_id is None
2319
self.new_inventory.add(ie)
2321
# ie.revision is always None if the InventoryEntry is considered
2322
# for committing. ie.snapshot will record the correct revision
2323
# which may be the sole parent if it is untouched.
2324
if ie.revision is not None:
2327
previous_entries = ie.find_previous_heads(
2329
self.repository.weave_store,
2330
self.repository.get_transaction())
2331
# we are creating a new revision for ie in the history store
2333
ie.snapshot(self._new_revision_id, path, previous_entries, tree, self)
2336
4089
_unescape_map = {
2361
4114
if _unescape_re is None:
2362
4115
_unescape_re = re.compile('\&([^;]*);')
2363
4116
return _unescape_re.sub(_unescaper, data)
4119
class _VersionedFileChecker(object):
4121
def __init__(self, repository, text_key_references=None, ancestors=None):
4122
self.repository = repository
4123
self.text_index = self.repository._generate_text_key_index(
4124
text_key_references=text_key_references, ancestors=ancestors)
4126
def calculate_file_version_parents(self, text_key):
4127
"""Calculate the correct parents for a file version according to
4130
parent_keys = self.text_index[text_key]
4131
if parent_keys == [_mod_revision.NULL_REVISION]:
4133
return tuple(parent_keys)
4135
def check_file_version_parents(self, texts, progress_bar=None):
4136
"""Check the parents stored in a versioned file are correct.
4138
It also detects file versions that are not referenced by their
4139
corresponding revision's inventory.
4141
:returns: A tuple of (wrong_parents, dangling_file_versions).
4142
wrong_parents is a dict mapping {revision_id: (stored_parents,
4143
correct_parents)} for each revision_id where the stored parents
4144
are not correct. dangling_file_versions is a set of (file_id,
4145
revision_id) tuples for versions that are present in this versioned
4146
file, but not used by the corresponding inventory.
4148
local_progress = None
4149
if progress_bar is None:
4150
local_progress = ui.ui_factory.nested_progress_bar()
4151
progress_bar = local_progress
4153
return self._check_file_version_parents(texts, progress_bar)
4156
local_progress.finished()
4158
def _check_file_version_parents(self, texts, progress_bar):
4159
"""See check_file_version_parents."""
4161
self.file_ids = set([file_id for file_id, _ in
4162
self.text_index.iterkeys()])
4163
# text keys is now grouped by file_id
4164
n_versions = len(self.text_index)
4165
progress_bar.update('loading text store', 0, n_versions)
4166
parent_map = self.repository.texts.get_parent_map(self.text_index)
4167
# On unlistable transports this could well be empty/error...
4168
text_keys = self.repository.texts.keys()
4169
unused_keys = frozenset(text_keys) - set(self.text_index)
4170
for num, key in enumerate(self.text_index.iterkeys()):
4171
progress_bar.update('checking text graph', num, n_versions)
4172
correct_parents = self.calculate_file_version_parents(key)
4174
knit_parents = parent_map[key]
4175
except errors.RevisionNotPresent:
4178
if correct_parents != knit_parents:
4179
wrong_parents[key] = (knit_parents, correct_parents)
4180
return wrong_parents, unused_keys
4183
def _old_get_graph(repository, revision_id):
4184
"""DO NOT USE. That is all. I'm serious."""
4185
graph = repository.get_graph()
4186
revision_graph = dict(((key, value) for key, value in
4187
graph.iter_ancestry([revision_id]) if value is not None))
4188
return _strip_NULL_ghosts(revision_graph)
4191
def _strip_NULL_ghosts(revision_graph):
4192
"""Also don't use this. more compatibility code for unmigrated clients."""
4193
# Filter ghosts, and null:
4194
if _mod_revision.NULL_REVISION in revision_graph:
4195
del revision_graph[_mod_revision.NULL_REVISION]
4196
for key, parents in revision_graph.items():
4197
revision_graph[key] = tuple(parent for parent in parents if parent
4199
return revision_graph
4202
class StreamSink(object):
4203
"""An object that can insert a stream into a repository.
4205
This interface handles the complexity of reserialising inventories and
4206
revisions from different formats, and allows unidirectional insertion into
4207
stacked repositories without looking for the missing basis parents
4211
def __init__(self, target_repo):
4212
self.target_repo = target_repo
4214
def insert_stream(self, stream, src_format, resume_tokens):
4215
"""Insert a stream's content into the target repository.
4217
:param src_format: a bzr repository format.
4219
:return: a list of resume tokens and an iterable of keys additional
4220
items required before the insertion can be completed.
4222
self.target_repo.lock_write()
4225
self.target_repo.resume_write_group(resume_tokens)
4228
self.target_repo.start_write_group()
4231
# locked_insert_stream performs a commit|suspend.
4232
return self._locked_insert_stream(stream, src_format, is_resume)
4234
self.target_repo.abort_write_group(suppress_errors=True)
4237
self.target_repo.unlock()
4239
def _locked_insert_stream(self, stream, src_format, is_resume):
4240
to_serializer = self.target_repo._format._serializer
4241
src_serializer = src_format._serializer
4243
if to_serializer == src_serializer:
4244
# If serializers match and the target is a pack repository, set the
4245
# write cache size on the new pack. This avoids poor performance
4246
# on transports where append is unbuffered (such as
4247
# RemoteTransport). This is safe to do because nothing should read
4248
# back from the target repository while a stream with matching
4249
# serialization is being inserted.
4250
# The exception is that a delta record from the source that should
4251
# be a fulltext may need to be expanded by the target (see
4252
# test_fetch_revisions_with_deltas_into_pack); but we take care to
4253
# explicitly flush any buffered writes first in that rare case.
4255
new_pack = self.target_repo._pack_collection._new_pack
4256
except AttributeError:
4257
# Not a pack repository
4260
new_pack.set_write_cache_size(1024*1024)
4261
for substream_type, substream in stream:
4262
if 'stream' in debug.debug_flags:
4263
mutter('inserting substream: %s', substream_type)
4264
if substream_type == 'texts':
4265
self.target_repo.texts.insert_record_stream(substream)
4266
elif substream_type == 'inventories':
4267
if src_serializer == to_serializer:
4268
self.target_repo.inventories.insert_record_stream(
4271
self._extract_and_insert_inventories(
4272
substream, src_serializer)
4273
elif substream_type == 'inventory-deltas':
4274
self._extract_and_insert_inventory_deltas(
4275
substream, src_serializer)
4276
elif substream_type == 'chk_bytes':
4277
# XXX: This doesn't support conversions, as it assumes the
4278
# conversion was done in the fetch code.
4279
self.target_repo.chk_bytes.insert_record_stream(substream)
4280
elif substream_type == 'revisions':
4281
# This may fallback to extract-and-insert more often than
4282
# required if the serializers are different only in terms of
4284
if src_serializer == to_serializer:
4285
self.target_repo.revisions.insert_record_stream(
4288
self._extract_and_insert_revisions(substream,
4290
elif substream_type == 'signatures':
4291
self.target_repo.signatures.insert_record_stream(substream)
4293
raise AssertionError('kaboom! %s' % (substream_type,))
4294
# Done inserting data, and the missing_keys calculations will try to
4295
# read back from the inserted data, so flush the writes to the new pack
4296
# (if this is pack format).
4297
if new_pack is not None:
4298
new_pack._write_data('', flush=True)
4299
# Find all the new revisions (including ones from resume_tokens)
4300
missing_keys = self.target_repo.get_missing_parent_inventories(
4301
check_for_missing_texts=is_resume)
4303
for prefix, versioned_file in (
4304
('texts', self.target_repo.texts),
4305
('inventories', self.target_repo.inventories),
4306
('revisions', self.target_repo.revisions),
4307
('signatures', self.target_repo.signatures),
4308
('chk_bytes', self.target_repo.chk_bytes),
4310
if versioned_file is None:
4312
missing_keys.update((prefix,) + key for key in
4313
versioned_file.get_missing_compression_parent_keys())
4314
except NotImplementedError:
4315
# cannot even attempt suspending, and missing would have failed
4316
# during stream insertion.
4317
missing_keys = set()
4320
# suspend the write group and tell the caller what we is
4321
# missing. We know we can suspend or else we would not have
4322
# entered this code path. (All repositories that can handle
4323
# missing keys can handle suspending a write group).
4324
write_group_tokens = self.target_repo.suspend_write_group()
4325
return write_group_tokens, missing_keys
4326
hint = self.target_repo.commit_write_group()
4327
if (to_serializer != src_serializer and
4328
self.target_repo._format.pack_compresses):
4329
self.target_repo.pack(hint=hint)
4332
def _extract_and_insert_inventory_deltas(self, substream, serializer):
4333
target_rich_root = self.target_repo._format.rich_root_data
4334
target_tree_refs = self.target_repo._format.supports_tree_reference
4335
for record in substream:
4336
# Insert the delta directly
4337
inventory_delta_bytes = record.get_bytes_as('fulltext')
4338
deserialiser = inventory_delta.InventoryDeltaDeserializer()
4340
parse_result = deserialiser.parse_text_bytes(
4341
inventory_delta_bytes)
4342
except inventory_delta.IncompatibleInventoryDelta, err:
4343
trace.mutter("Incompatible delta: %s", err.msg)
4344
raise errors.IncompatibleRevision(self.target_repo._format)
4345
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
4346
revision_id = new_id
4347
parents = [key[0] for key in record.parents]
4348
self.target_repo.add_inventory_by_delta(
4349
basis_id, inv_delta, revision_id, parents)
4351
def _extract_and_insert_inventories(self, substream, serializer,
4353
"""Generate a new inventory versionedfile in target, converting data.
4355
The inventory is retrieved from the source, (deserializing it), and
4356
stored in the target (reserializing it in a different format).
4358
target_rich_root = self.target_repo._format.rich_root_data
4359
target_tree_refs = self.target_repo._format.supports_tree_reference
4360
for record in substream:
4361
# It's not a delta, so it must be a fulltext in the source
4362
# serializer's format.
4363
bytes = record.get_bytes_as('fulltext')
4364
revision_id = record.key[0]
4365
inv = serializer.read_inventory_from_string(bytes, revision_id)
4366
parents = [key[0] for key in record.parents]
4367
self.target_repo.add_inventory(revision_id, inv, parents)
4368
# No need to keep holding this full inv in memory when the rest of
4369
# the substream is likely to be all deltas.
4372
def _extract_and_insert_revisions(self, substream, serializer):
4373
for record in substream:
4374
bytes = record.get_bytes_as('fulltext')
4375
revision_id = record.key[0]
4376
rev = serializer.read_revision_from_string(bytes)
4377
if rev.revision_id != revision_id:
4378
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
4379
self.target_repo.add_revision(revision_id, rev)
4382
if self.target_repo._format._fetch_reconcile:
4383
self.target_repo.reconcile()
4386
class StreamSource(object):
4387
"""A source of a stream for fetching between repositories."""
4389
def __init__(self, from_repository, to_format):
4390
"""Create a StreamSource streaming from from_repository."""
4391
self.from_repository = from_repository
4392
self.to_format = to_format
4394
def delta_on_metadata(self):
4395
"""Return True if delta's are permitted on metadata streams.
4397
That is on revisions and signatures.
4399
src_serializer = self.from_repository._format._serializer
4400
target_serializer = self.to_format._serializer
4401
return (self.to_format._fetch_uses_deltas and
4402
src_serializer == target_serializer)
4404
def _fetch_revision_texts(self, revs):
4405
# fetch signatures first and then the revision texts
4406
# may need to be a InterRevisionStore call here.
4407
from_sf = self.from_repository.signatures
4408
# A missing signature is just skipped.
4409
keys = [(rev_id,) for rev_id in revs]
4410
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
4412
self.to_format._fetch_order,
4413
not self.to_format._fetch_uses_deltas))
4414
# If a revision has a delta, this is actually expanded inside the
4415
# insert_record_stream code now, which is an alternate fix for
4417
from_rf = self.from_repository.revisions
4418
revisions = from_rf.get_record_stream(
4420
self.to_format._fetch_order,
4421
not self.delta_on_metadata())
4422
return [('signatures', signatures), ('revisions', revisions)]
4424
def _generate_root_texts(self, revs):
4425
"""This will be called by get_stream between fetching weave texts and
4426
fetching the inventory weave.
4428
if self._rich_root_upgrade():
4430
return bzrlib.fetch.Inter1and2Helper(
4431
self.from_repository).generate_root_texts(revs)
4435
def get_stream(self, search):
4437
revs = search.get_keys()
4438
graph = self.from_repository.get_graph()
4439
revs = tsort.topo_sort(graph.get_parent_map(revs))
4440
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
4442
for knit_kind, file_id, revisions in data_to_fetch:
4443
if knit_kind != phase:
4445
# Make a new progress bar for this phase
4446
if knit_kind == "file":
4447
# Accumulate file texts
4448
text_keys.extend([(file_id, revision) for revision in
4450
elif knit_kind == "inventory":
4451
# Now copy the file texts.
4452
from_texts = self.from_repository.texts
4453
yield ('texts', from_texts.get_record_stream(
4454
text_keys, self.to_format._fetch_order,
4455
not self.to_format._fetch_uses_deltas))
4456
# Cause an error if a text occurs after we have done the
4459
# Before we process the inventory we generate the root
4460
# texts (if necessary) so that the inventories references
4462
for _ in self._generate_root_texts(revs):
4464
# we fetch only the referenced inventories because we do not
4465
# know for unselected inventories whether all their required
4466
# texts are present in the other repository - it could be
4468
for info in self._get_inventory_stream(revs):
4470
elif knit_kind == "signatures":
4471
# Nothing to do here; this will be taken care of when
4472
# _fetch_revision_texts happens.
4474
elif knit_kind == "revisions":
4475
for record in self._fetch_revision_texts(revs):
4478
raise AssertionError("Unknown knit kind %r" % knit_kind)
4480
def get_stream_for_missing_keys(self, missing_keys):
4481
# missing keys can only occur when we are byte copying and not
4482
# translating (because translation means we don't send
4483
# unreconstructable deltas ever).
4485
keys['texts'] = set()
4486
keys['revisions'] = set()
4487
keys['inventories'] = set()
4488
keys['chk_bytes'] = set()
4489
keys['signatures'] = set()
4490
for key in missing_keys:
4491
keys[key[0]].add(key[1:])
4492
if len(keys['revisions']):
4493
# If we allowed copying revisions at this point, we could end up
4494
# copying a revision without copying its required texts: a
4495
# violation of the requirements for repository integrity.
4496
raise AssertionError(
4497
'cannot copy revisions to fill in missing deltas %s' % (
4498
keys['revisions'],))
4499
for substream_kind, keys in keys.iteritems():
4500
vf = getattr(self.from_repository, substream_kind)
4501
if vf is None and keys:
4502
raise AssertionError(
4503
"cannot fill in keys for a versioned file we don't"
4504
" have: %s needs %s" % (substream_kind, keys))
4506
# No need to stream something we don't have
4508
if substream_kind == 'inventories':
4509
# Some missing keys are genuinely ghosts, filter those out.
4510
present = self.from_repository.inventories.get_parent_map(keys)
4511
revs = [key[0] for key in present]
4512
# Get the inventory stream more-or-less as we do for the
4513
# original stream; there's no reason to assume that records
4514
# direct from the source will be suitable for the sink. (Think
4515
# e.g. 2a -> 1.9-rich-root).
4516
for info in self._get_inventory_stream(revs, missing=True):
4520
# Ask for full texts always so that we don't need more round trips
4521
# after this stream.
4522
# Some of the missing keys are genuinely ghosts, so filter absent
4523
# records. The Sink is responsible for doing another check to
4524
# ensure that ghosts don't introduce missing data for future
4526
stream = versionedfile.filter_absent(vf.get_record_stream(keys,
4527
self.to_format._fetch_order, True))
4528
yield substream_kind, stream
4530
def inventory_fetch_order(self):
4531
if self._rich_root_upgrade():
4532
return 'topological'
4534
return self.to_format._fetch_order
4536
def _rich_root_upgrade(self):
4537
return (not self.from_repository._format.rich_root_data and
4538
self.to_format.rich_root_data)
4540
def _get_inventory_stream(self, revision_ids, missing=False):
4541
from_format = self.from_repository._format
4542
if (from_format.supports_chks and self.to_format.supports_chks and
4543
from_format.network_name() == self.to_format.network_name()):
4544
raise AssertionError(
4545
"this case should be handled by GroupCHKStreamSource")
4546
elif 'forceinvdeltas' in debug.debug_flags:
4547
return self._get_convertable_inventory_stream(revision_ids,
4548
delta_versus_null=missing)
4549
elif from_format.network_name() == self.to_format.network_name():
4551
return self._get_simple_inventory_stream(revision_ids,
4553
elif (not from_format.supports_chks and not self.to_format.supports_chks
4554
and from_format._serializer == self.to_format._serializer):
4555
# Essentially the same format.
4556
return self._get_simple_inventory_stream(revision_ids,
4559
# Any time we switch serializations, we want to use an
4560
# inventory-delta based approach.
4561
return self._get_convertable_inventory_stream(revision_ids,
4562
delta_versus_null=missing)
4564
def _get_simple_inventory_stream(self, revision_ids, missing=False):
4565
# NB: This currently reopens the inventory weave in source;
4566
# using a single stream interface instead would avoid this.
4567
from_weave = self.from_repository.inventories
4569
delta_closure = True
4571
delta_closure = not self.delta_on_metadata()
4572
yield ('inventories', from_weave.get_record_stream(
4573
[(rev_id,) for rev_id in revision_ids],
4574
self.inventory_fetch_order(), delta_closure))
4576
def _get_convertable_inventory_stream(self, revision_ids,
4577
delta_versus_null=False):
4578
# The source is using CHKs, but the target either doesn't or it has a
4579
# different serializer. The StreamSink code expects to be able to
4580
# convert on the target, so we need to put bytes-on-the-wire that can
4581
# be converted. That means inventory deltas (if the remote is <1.19,
4582
# RemoteStreamSink will fallback to VFS to insert the deltas).
4583
yield ('inventory-deltas',
4584
self._stream_invs_as_deltas(revision_ids,
4585
delta_versus_null=delta_versus_null))
4587
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
4588
"""Return a stream of inventory-deltas for the given rev ids.
4590
:param revision_ids: The list of inventories to transmit
4591
:param delta_versus_null: Don't try to find a minimal delta for this
4592
entry, instead compute the delta versus the NULL_REVISION. This
4593
effectively streams a complete inventory. Used for stuff like
4594
filling in missing parents, etc.
4596
from_repo = self.from_repository
4597
revision_keys = [(rev_id,) for rev_id in revision_ids]
4598
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4599
# XXX: possibly repos could implement a more efficient iter_inv_deltas
4601
inventories = self.from_repository.iter_inventories(
4602
revision_ids, 'topological')
4603
format = from_repo._format
4604
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
4605
inventory_cache = lru_cache.LRUCache(50)
4606
null_inventory = from_repo.revision_tree(
4607
_mod_revision.NULL_REVISION).inventory
4608
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
4609
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
4610
# repo back into a non-rich-root repo ought to be allowed)
4611
serializer = inventory_delta.InventoryDeltaSerializer(
4612
versioned_root=format.rich_root_data,
4613
tree_references=format.supports_tree_reference)
4614
for inv in inventories:
4615
key = (inv.revision_id,)
4616
parent_keys = parent_map.get(key, ())
4618
if not delta_versus_null and parent_keys:
4619
# The caller did not ask for complete inventories and we have
4620
# some parents that we can delta against. Make a delta against
4621
# each parent so that we can find the smallest.
4622
parent_ids = [parent_key[0] for parent_key in parent_keys]
4623
for parent_id in parent_ids:
4624
if parent_id not in invs_sent_so_far:
4625
# We don't know that the remote side has this basis, so
4628
if parent_id == _mod_revision.NULL_REVISION:
4629
parent_inv = null_inventory
4631
parent_inv = inventory_cache.get(parent_id, None)
4632
if parent_inv is None:
4633
parent_inv = from_repo.get_inventory(parent_id)
4634
candidate_delta = inv._make_delta(parent_inv)
4635
if (delta is None or
4636
len(delta) > len(candidate_delta)):
4637
delta = candidate_delta
4638
basis_id = parent_id
4640
# Either none of the parents ended up being suitable, or we
4641
# were asked to delta against NULL
4642
basis_id = _mod_revision.NULL_REVISION
4643
delta = inv._make_delta(null_inventory)
4644
invs_sent_so_far.add(inv.revision_id)
4645
inventory_cache[inv.revision_id] = inv
4646
delta_serialized = ''.join(
4647
serializer.delta_to_lines(basis_id, key[-1], delta))
4648
yield versionedfile.FulltextContentFactory(
4649
key, parent_keys, None, delta_serialized)
4652
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4653
stop_revision=None):
4654
"""Extend the partial history to include a given index
4656
If a stop_index is supplied, stop when that index has been reached.
4657
If a stop_revision is supplied, stop when that revision is
4658
encountered. Otherwise, stop when the beginning of history is
4661
:param stop_index: The index which should be present. When it is
4662
present, history extension will stop.
4663
:param stop_revision: The revision id which should be present. When
4664
it is encountered, history extension will stop.
4666
start_revision = partial_history_cache[-1]
4667
iterator = repo.iter_reverse_revision_history(start_revision)
4669
#skip the last revision in the list
4672
if (stop_index is not None and
4673
len(partial_history_cache) > stop_index):
4675
if partial_history_cache[-1] == stop_revision:
4677
revision_id = iterator.next()
4678
partial_history_cache.append(revision_id)
4679
except StopIteration: