13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
17
19
from bzrlib.lazy_import import lazy_import
18
20
lazy_import(globals(), """
23
25
from bzrlib import (
37
38
revision as _mod_revision,
43
from bzrlib.bundle import serializer
44
43
from bzrlib.revisiontree import RevisionTree
45
44
from bzrlib.store.versioned import VersionedFileStore
45
from bzrlib.store.text import TextStore
46
46
from bzrlib.testament import Testament
49
50
from bzrlib.decorators import needs_read_lock, needs_write_lock
50
51
from bzrlib.inter import InterObject
51
from bzrlib.inventory import (
57
from bzrlib import registry
52
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
58
53
from bzrlib.symbol_versioning import (
61
from bzrlib.trace import (
62
log_exception_quietly, note, mutter, mutter_callsite, warning)
57
from bzrlib.trace import mutter, note, warning
65
60
# Old formats display a warning, but only once
66
61
_deprecation_warning_done = False
69
class CommitBuilder(object):
70
"""Provides an interface to build up a commit.
72
This allows describing a tree to be committed without needing to
73
know the internals of the format of the repository.
76
# all clients should supply tree roots.
77
record_root_entry = True
78
# the default CommitBuilder does not manage trees whose root is versioned.
79
_versioned_root = False
81
def __init__(self, repository, parents, config, timestamp=None,
82
timezone=None, committer=None, revprops=None,
84
"""Initiate a CommitBuilder.
86
:param repository: Repository to commit to.
87
:param parents: Revision ids of the parents of the new revision.
88
:param config: Configuration to use.
89
:param timestamp: Optional timestamp recorded for commit.
90
:param timezone: Optional timezone for timestamp.
91
:param committer: Optional committer to set for commit.
92
:param revprops: Optional dictionary of revision properties.
93
:param revision_id: Optional revision id.
98
self._committer = self._config.username()
100
self._committer = committer
102
self.new_inventory = Inventory(None)
103
self._new_revision_id = revision_id
104
self.parents = parents
105
self.repository = repository
108
if revprops is not None:
109
self._validate_revprops(revprops)
110
self._revprops.update(revprops)
112
if timestamp is None:
113
timestamp = time.time()
114
# Restrict resolution to 1ms
115
self._timestamp = round(timestamp, 3)
118
self._timezone = osutils.local_time_offset()
120
self._timezone = int(timezone)
122
self._generate_revision_if_needed()
123
self.__heads = graph.HeadsCache(repository.get_graph()).heads
124
self._basis_delta = []
125
# API compatibility, older code that used CommitBuilder did not call
126
# .record_delete(), which means the delta that is computed would not be
127
# valid. Callers that will call record_delete() should call
128
# .will_record_deletes() to indicate that.
129
self._recording_deletes = False
130
# memo'd check for no-op commits.
131
self._any_changes = False
133
def any_changes(self):
134
"""Return True if any entries were changed.
136
This includes merge-only changes. It is the core for the --unchanged
139
:return: True if any changes have occured.
141
return self._any_changes
143
def _validate_unicode_text(self, text, context):
144
"""Verify things like commit messages don't have bogus characters."""
146
raise ValueError('Invalid value for %s: %r' % (context, text))
148
def _validate_revprops(self, revprops):
149
for key, value in revprops.iteritems():
150
# We know that the XML serializers do not round trip '\r'
151
# correctly, so refuse to accept them
152
if not isinstance(value, basestring):
153
raise ValueError('revision property (%s) is not a valid'
154
' (unicode) string: %r' % (key, value))
155
self._validate_unicode_text(value,
156
'revision property (%s)' % (key,))
158
def commit(self, message):
159
"""Make the actual commit.
161
:return: The revision id of the recorded revision.
163
self._validate_unicode_text(message, 'commit message')
164
rev = _mod_revision.Revision(
165
timestamp=self._timestamp,
166
timezone=self._timezone,
167
committer=self._committer,
169
inventory_sha1=self.inv_sha1,
170
revision_id=self._new_revision_id,
171
properties=self._revprops)
172
rev.parent_ids = self.parents
173
self.repository.add_revision(self._new_revision_id, rev,
174
self.new_inventory, self._config)
175
self.repository.commit_write_group()
176
return self._new_revision_id
179
"""Abort the commit that is being built.
181
self.repository.abort_write_group()
183
def revision_tree(self):
184
"""Return the tree that was just committed.
186
After calling commit() this can be called to get a RevisionTree
187
representing the newly committed tree. This is preferred to
188
calling Repository.revision_tree() because that may require
189
deserializing the inventory, while we already have a copy in
192
if self.new_inventory is None:
193
self.new_inventory = self.repository.get_inventory(
194
self._new_revision_id)
195
return RevisionTree(self.repository, self.new_inventory,
196
self._new_revision_id)
198
def finish_inventory(self):
199
"""Tell the builder that the inventory is finished.
201
:return: The inventory id in the repository, which can be used with
202
repository.get_inventory.
204
if self.new_inventory is None:
205
# an inventory delta was accumulated without creating a new
207
basis_id = self.basis_delta_revision
208
self.inv_sha1 = self.repository.add_inventory_by_delta(
209
basis_id, self._basis_delta, self._new_revision_id,
212
if self.new_inventory.root is None:
213
raise AssertionError('Root entry should be supplied to'
214
' record_entry_contents, as of bzr 0.10.')
215
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
216
self.new_inventory.revision_id = self._new_revision_id
217
self.inv_sha1 = self.repository.add_inventory(
218
self._new_revision_id,
222
return self._new_revision_id
224
def _gen_revision_id(self):
225
"""Return new revision-id."""
226
return generate_ids.gen_revision_id(self._config.username(),
229
def _generate_revision_if_needed(self):
230
"""Create a revision id if None was supplied.
232
If the repository can not support user-specified revision ids
233
they should override this function and raise CannotSetRevisionId
234
if _new_revision_id is not None.
236
:raises: CannotSetRevisionId
238
if self._new_revision_id is None:
239
self._new_revision_id = self._gen_revision_id()
240
self.random_revid = True
242
self.random_revid = False
244
def _heads(self, file_id, revision_ids):
245
"""Calculate the graph heads for revision_ids in the graph of file_id.
247
This can use either a per-file graph or a global revision graph as we
248
have an identity relationship between the two graphs.
250
return self.__heads(revision_ids)
252
def _check_root(self, ie, parent_invs, tree):
253
"""Helper for record_entry_contents.
255
:param ie: An entry being added.
256
:param parent_invs: The inventories of the parent revisions of the
258
:param tree: The tree that is being committed.
260
# In this revision format, root entries have no knit or weave When
261
# serializing out to disk and back in root.revision is always
263
ie.revision = self._new_revision_id
265
def _require_root_change(self, tree):
266
"""Enforce an appropriate root object change.
268
This is called once when record_iter_changes is called, if and only if
269
the root was not in the delta calculated by record_iter_changes.
271
:param tree: The tree which is being committed.
273
# NB: if there are no parents then this method is not called, so no
274
# need to guard on parents having length.
275
entry = entry_factory['directory'](tree.path2id(''), '',
277
entry.revision = self._new_revision_id
278
self._basis_delta.append(('', '', entry.file_id, entry))
280
def _get_delta(self, ie, basis_inv, path):
281
"""Get a delta against the basis inventory for ie."""
282
if ie.file_id not in basis_inv:
284
result = (None, path, ie.file_id, ie)
285
self._basis_delta.append(result)
287
elif ie != basis_inv[ie.file_id]:
289
# TODO: avoid tis id2path call.
290
result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
291
self._basis_delta.append(result)
297
def get_basis_delta(self):
298
"""Return the complete inventory delta versus the basis inventory.
300
This has been built up with the calls to record_delete and
301
record_entry_contents. The client must have already called
302
will_record_deletes() to indicate that they will be generating a
305
:return: An inventory delta, suitable for use with apply_delta, or
306
Repository.add_inventory_by_delta, etc.
308
if not self._recording_deletes:
309
raise AssertionError("recording deletes not activated.")
310
return self._basis_delta
312
def record_delete(self, path, file_id):
313
"""Record that a delete occured against a basis tree.
315
This is an optional API - when used it adds items to the basis_delta
316
being accumulated by the commit builder. It cannot be called unless the
317
method will_record_deletes() has been called to inform the builder that
318
a delta is being supplied.
320
:param path: The path of the thing deleted.
321
:param file_id: The file id that was deleted.
323
if not self._recording_deletes:
324
raise AssertionError("recording deletes not activated.")
325
delta = (path, None, file_id, None)
326
self._basis_delta.append(delta)
327
self._any_changes = True
330
def will_record_deletes(self):
331
"""Tell the commit builder that deletes are being notified.
333
This enables the accumulation of an inventory delta; for the resulting
334
commit to be valid, deletes against the basis MUST be recorded via
335
builder.record_delete().
337
self._recording_deletes = True
339
basis_id = self.parents[0]
341
basis_id = _mod_revision.NULL_REVISION
342
self.basis_delta_revision = basis_id
344
def record_entry_contents(self, ie, parent_invs, path, tree,
346
"""Record the content of ie from tree into the commit if needed.
348
Side effect: sets ie.revision when unchanged
350
:param ie: An inventory entry present in the commit.
351
:param parent_invs: The inventories of the parent revisions of the
353
:param path: The path the entry is at in the tree.
354
:param tree: The tree which contains this entry and should be used to
356
:param content_summary: Summary data from the tree about the paths
357
content - stat, length, exec, sha/link target. This is only
358
accessed when the entry has a revision of None - that is when it is
359
a candidate to commit.
360
:return: A tuple (change_delta, version_recorded, fs_hash).
361
change_delta is an inventory_delta change for this entry against
362
the basis tree of the commit, or None if no change occured against
364
version_recorded is True if a new version of the entry has been
365
recorded. For instance, committing a merge where a file was only
366
changed on the other side will return (delta, False).
367
fs_hash is either None, or the hash details for the path (currently
368
a tuple of the contents sha1 and the statvalue returned by
369
tree.get_file_with_stat()).
371
if self.new_inventory.root is None:
372
if ie.parent_id is not None:
373
raise errors.RootMissing()
374
self._check_root(ie, parent_invs, tree)
375
if ie.revision is None:
376
kind = content_summary[0]
378
# ie is carried over from a prior commit
380
# XXX: repository specific check for nested tree support goes here - if
381
# the repo doesn't want nested trees we skip it ?
382
if (kind == 'tree-reference' and
383
not self.repository._format.supports_tree_reference):
384
# mismatch between commit builder logic and repository:
385
# this needs the entry creation pushed down into the builder.
386
raise NotImplementedError('Missing repository subtree support.')
387
self.new_inventory.add(ie)
389
# TODO: slow, take it out of the inner loop.
391
basis_inv = parent_invs[0]
393
basis_inv = Inventory(root_id=None)
395
# ie.revision is always None if the InventoryEntry is considered
396
# for committing. We may record the previous parents revision if the
397
# content is actually unchanged against a sole head.
398
if ie.revision is not None:
399
if not self._versioned_root and path == '':
400
# repositories that do not version the root set the root's
401
# revision to the new commit even when no change occurs (more
402
# specifically, they do not record a revision on the root; and
403
# the rev id is assigned to the root during deserialisation -
404
# this masks when a change may have occurred against the basis.
405
# To match this we always issue a delta, because the revision
406
# of the root will always be changing.
407
if ie.file_id in basis_inv:
408
delta = (basis_inv.id2path(ie.file_id), path,
412
delta = (None, path, ie.file_id, ie)
413
self._basis_delta.append(delta)
414
return delta, False, None
416
# we don't need to commit this, because the caller already
417
# determined that an existing revision of this file is
418
# appropriate. If its not being considered for committing then
419
# it and all its parents to the root must be unaltered so
420
# no-change against the basis.
421
if ie.revision == self._new_revision_id:
422
raise AssertionError("Impossible situation, a skipped "
423
"inventory entry (%r) claims to be modified in this "
424
"commit (%r).", (ie, self._new_revision_id))
425
return None, False, None
426
# XXX: Friction: parent_candidates should return a list not a dict
427
# so that we don't have to walk the inventories again.
428
parent_candiate_entries = ie.parent_candidates(parent_invs)
429
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
431
for inv in parent_invs:
432
if ie.file_id in inv:
433
old_rev = inv[ie.file_id].revision
434
if old_rev in head_set:
435
heads.append(inv[ie.file_id].revision)
436
head_set.remove(inv[ie.file_id].revision)
439
# now we check to see if we need to write a new record to the
441
# We write a new entry unless there is one head to the ancestors, and
442
# the kind-derived content is unchanged.
444
# Cheapest check first: no ancestors, or more the one head in the
445
# ancestors, we write a new node.
449
# There is a single head, look it up for comparison
450
parent_entry = parent_candiate_entries[heads[0]]
451
# if the non-content specific data has changed, we'll be writing a
453
if (parent_entry.parent_id != ie.parent_id or
454
parent_entry.name != ie.name):
456
# now we need to do content specific checks:
458
# if the kind changed the content obviously has
459
if kind != parent_entry.kind:
461
# Stat cache fingerprint feedback for the caller - None as we usually
462
# don't generate one.
465
if content_summary[2] is None:
466
raise ValueError("Files must not have executable = None")
468
if (# if the file length changed we have to store:
469
parent_entry.text_size != content_summary[1] or
470
# if the exec bit has changed we have to store:
471
parent_entry.executable != content_summary[2]):
473
elif parent_entry.text_sha1 == content_summary[3]:
474
# all meta and content is unchanged (using a hash cache
475
# hit to check the sha)
476
ie.revision = parent_entry.revision
477
ie.text_size = parent_entry.text_size
478
ie.text_sha1 = parent_entry.text_sha1
479
ie.executable = parent_entry.executable
480
return self._get_delta(ie, basis_inv, path), False, None
482
# Either there is only a hash change(no hash cache entry,
483
# or same size content change), or there is no change on
485
# Provide the parent's hash to the store layer, so that the
486
# content is unchanged we will not store a new node.
487
nostore_sha = parent_entry.text_sha1
489
# We want to record a new node regardless of the presence or
490
# absence of a content change in the file.
492
ie.executable = content_summary[2]
493
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
495
lines = file_obj.readlines()
499
ie.text_sha1, ie.text_size = self._add_text_to_weave(
500
ie.file_id, lines, heads, nostore_sha)
501
# Let the caller know we generated a stat fingerprint.
502
fingerprint = (ie.text_sha1, stat_value)
503
except errors.ExistingContent:
504
# Turns out that the file content was unchanged, and we were
505
# only going to store a new node if it was changed. Carry over
507
ie.revision = parent_entry.revision
508
ie.text_size = parent_entry.text_size
509
ie.text_sha1 = parent_entry.text_sha1
510
ie.executable = parent_entry.executable
511
return self._get_delta(ie, basis_inv, path), False, None
512
elif kind == 'directory':
514
# all data is meta here, nothing specific to directory, so
516
ie.revision = parent_entry.revision
517
return self._get_delta(ie, basis_inv, path), False, None
519
self._add_text_to_weave(ie.file_id, lines, heads, None)
520
elif kind == 'symlink':
521
current_link_target = content_summary[3]
523
# symlink target is not generic metadata, check if it has
525
if current_link_target != parent_entry.symlink_target:
528
# unchanged, carry over.
529
ie.revision = parent_entry.revision
530
ie.symlink_target = parent_entry.symlink_target
531
return self._get_delta(ie, basis_inv, path), False, None
532
ie.symlink_target = current_link_target
534
self._add_text_to_weave(ie.file_id, lines, heads, None)
535
elif kind == 'tree-reference':
537
if content_summary[3] != parent_entry.reference_revision:
540
# unchanged, carry over.
541
ie.reference_revision = parent_entry.reference_revision
542
ie.revision = parent_entry.revision
543
return self._get_delta(ie, basis_inv, path), False, None
544
ie.reference_revision = content_summary[3]
546
self._add_text_to_weave(ie.file_id, lines, heads, None)
548
raise NotImplementedError('unknown kind')
549
ie.revision = self._new_revision_id
550
self._any_changes = True
551
return self._get_delta(ie, basis_inv, path), True, fingerprint
553
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
554
_entry_factory=entry_factory):
555
"""Record a new tree via iter_changes.
557
:param tree: The tree to obtain text contents from for changed objects.
558
:param basis_revision_id: The revision id of the tree the iter_changes
559
has been generated against. Currently assumed to be the same
560
as self.parents[0] - if it is not, errors may occur.
561
:param iter_changes: An iter_changes iterator with the changes to apply
562
to basis_revision_id. The iterator must not include any items with
563
a current kind of None - missing items must be either filtered out
564
or errored-on beefore record_iter_changes sees the item.
565
:param _entry_factory: Private method to bind entry_factory locally for
567
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
570
# Create an inventory delta based on deltas between all the parents and
571
# deltas between all the parent inventories. We use inventory delta's
572
# between the inventory objects because iter_changes masks
573
# last-changed-field only changes.
575
# file_id -> change map, change is fileid, paths, changed, versioneds,
576
# parents, names, kinds, executables
578
# {file_id -> revision_id -> inventory entry, for entries in parent
579
# trees that are not parents[0]
583
revtrees = list(self.repository.revision_trees(self.parents))
584
except errors.NoSuchRevision:
585
# one or more ghosts, slow path.
587
for revision_id in self.parents:
589
revtrees.append(self.repository.revision_tree(revision_id))
590
except errors.NoSuchRevision:
592
basis_revision_id = _mod_revision.NULL_REVISION
594
revtrees.append(self.repository.revision_tree(
595
_mod_revision.NULL_REVISION))
596
# The basis inventory from a repository
598
basis_inv = revtrees[0].inventory
600
basis_inv = self.repository.revision_tree(
601
_mod_revision.NULL_REVISION).inventory
602
if len(self.parents) > 0:
603
if basis_revision_id != self.parents[0] and not ghost_basis:
605
"arbitrary basis parents not yet supported with merges")
606
for revtree in revtrees[1:]:
607
for change in revtree.inventory._make_delta(basis_inv):
608
if change[1] is None:
609
# Not present in this parent.
611
if change[2] not in merged_ids:
612
if change[0] is not None:
613
basis_entry = basis_inv[change[2]]
614
merged_ids[change[2]] = [
616
basis_entry.revision,
619
parent_entries[change[2]] = {
621
basis_entry.revision:basis_entry,
623
change[3].revision:change[3],
626
merged_ids[change[2]] = [change[3].revision]
627
parent_entries[change[2]] = {change[3].revision:change[3]}
629
merged_ids[change[2]].append(change[3].revision)
630
parent_entries[change[2]][change[3].revision] = change[3]
633
# Setup the changes from the tree:
634
# changes maps file_id -> (change, [parent revision_ids])
636
for change in iter_changes:
637
# This probably looks up in basis_inv way to much.
638
if change[1][0] is not None:
639
head_candidate = [basis_inv[change[0]].revision]
642
changes[change[0]] = change, merged_ids.get(change[0],
644
unchanged_merged = set(merged_ids) - set(changes)
645
# Extend the changes dict with synthetic changes to record merges of
647
for file_id in unchanged_merged:
648
# Record a merged version of these items that did not change vs the
649
# basis. This can be either identical parallel changes, or a revert
650
# of a specific file after a merge. The recorded content will be
651
# that of the current tree (which is the same as the basis), but
652
# the per-file graph will reflect a merge.
653
# NB:XXX: We are reconstructing path information we had, this
654
# should be preserved instead.
655
# inv delta change: (file_id, (path_in_source, path_in_target),
656
# changed_content, versioned, parent, name, kind,
659
basis_entry = basis_inv[file_id]
660
except errors.NoSuchId:
661
# a change from basis->some_parents but file_id isn't in basis
662
# so was new in the merge, which means it must have changed
663
# from basis -> current, and as it hasn't the add was reverted
664
# by the user. So we discard this change.
668
(basis_inv.id2path(file_id), tree.id2path(file_id)),
670
(basis_entry.parent_id, basis_entry.parent_id),
671
(basis_entry.name, basis_entry.name),
672
(basis_entry.kind, basis_entry.kind),
673
(basis_entry.executable, basis_entry.executable))
674
changes[file_id] = (change, merged_ids[file_id])
675
# changes contains tuples with the change and a set of inventory
676
# candidates for the file.
678
# old_path, new_path, file_id, new_inventory_entry
679
seen_root = False # Is the root in the basis delta?
680
inv_delta = self._basis_delta
681
modified_rev = self._new_revision_id
682
for change, head_candidates in changes.values():
683
if change[3][1]: # versioned in target.
684
# Several things may be happening here:
685
# We may have a fork in the per-file graph
686
# - record a change with the content from tree
687
# We may have a change against < all trees
688
# - carry over the tree that hasn't changed
689
# We may have a change against all trees
690
# - record the change with the content from tree
693
entry = _entry_factory[kind](file_id, change[5][1],
695
head_set = self._heads(change[0], set(head_candidates))
698
for head_candidate in head_candidates:
699
if head_candidate in head_set:
700
heads.append(head_candidate)
701
head_set.remove(head_candidate)
704
# Could be a carry-over situation:
705
parent_entry_revs = parent_entries.get(file_id, None)
706
if parent_entry_revs:
707
parent_entry = parent_entry_revs.get(heads[0], None)
710
if parent_entry is None:
711
# The parent iter_changes was called against is the one
712
# that is the per-file head, so any change is relevant
713
# iter_changes is valid.
714
carry_over_possible = False
716
# could be a carry over situation
717
# A change against the basis may just indicate a merge,
718
# we need to check the content against the source of the
719
# merge to determine if it was changed after the merge
721
if (parent_entry.kind != entry.kind or
722
parent_entry.parent_id != entry.parent_id or
723
parent_entry.name != entry.name):
724
# Metadata common to all entries has changed
725
# against per-file parent
726
carry_over_possible = False
728
carry_over_possible = True
729
# per-type checks for changes against the parent_entry
732
# Cannot be a carry-over situation
733
carry_over_possible = False
734
# Populate the entry in the delta
736
# XXX: There is still a small race here: If someone reverts the content of a file
737
# after iter_changes examines and decides it has changed,
738
# we will unconditionally record a new version even if some
739
# other process reverts it while commit is running (with
740
# the revert happening after iter_changes did it's
743
entry.executable = True
745
entry.executable = False
746
if (carry_over_possible and
747
parent_entry.executable == entry.executable):
748
# Check the file length, content hash after reading
750
nostore_sha = parent_entry.text_sha1
753
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
755
lines = file_obj.readlines()
759
entry.text_sha1, entry.text_size = self._add_text_to_weave(
760
file_id, lines, heads, nostore_sha)
761
yield file_id, change[1][1], (entry.text_sha1, stat_value)
762
except errors.ExistingContent:
763
# No content change against a carry_over parent
764
# Perhaps this should also yield a fs hash update?
766
entry.text_size = parent_entry.text_size
767
entry.text_sha1 = parent_entry.text_sha1
768
elif kind == 'symlink':
770
entry.symlink_target = tree.get_symlink_target(file_id)
771
if (carry_over_possible and
772
parent_entry.symlink_target == entry.symlink_target):
775
self._add_text_to_weave(change[0], [], heads, None)
776
elif kind == 'directory':
777
if carry_over_possible:
780
# Nothing to set on the entry.
781
# XXX: split into the Root and nonRoot versions.
782
if change[1][1] != '' or self.repository.supports_rich_root():
783
self._add_text_to_weave(change[0], [], heads, None)
784
elif kind == 'tree-reference':
785
if not self.repository._format.supports_tree_reference:
786
# This isn't quite sane as an error, but we shouldn't
787
# ever see this code path in practice: tree's don't
788
# permit references when the repo doesn't support tree
790
raise errors.UnsupportedOperation(tree.add_reference,
792
entry.reference_revision = \
793
tree.get_reference_revision(change[0])
794
if (carry_over_possible and
795
parent_entry.reference_revision == reference_revision):
798
self._add_text_to_weave(change[0], [], heads, None)
800
raise AssertionError('unknown kind %r' % kind)
802
entry.revision = modified_rev
804
entry.revision = parent_entry.revision
807
new_path = change[1][1]
808
inv_delta.append((change[1][0], new_path, change[0], entry))
811
self.new_inventory = None
813
self._any_changes = True
815
# housekeeping root entry changes do not affect no-change commits.
816
self._require_root_change(tree)
817
self.basis_delta_revision = basis_revision_id
819
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
820
# Note: as we read the content directly from the tree, we know its not
821
# been turned into unicode or badly split - but a broken tree
822
# implementation could give us bad output from readlines() so this is
823
# not a guarantee of safety. What would be better is always checking
824
# the content during test suite execution. RBC 20070912
825
parent_keys = tuple((file_id, parent) for parent in parents)
826
return self.repository.texts.add_lines(
827
(file_id, self._new_revision_id), parent_keys, new_lines,
828
nostore_sha=nostore_sha, random_id=self.random_revid,
829
check_content=False)[0:2]
832
class RootCommitBuilder(CommitBuilder):
833
"""This commitbuilder actually records the root id"""
835
# the root entry gets versioned properly by this builder.
836
_versioned_root = True
838
def _check_root(self, ie, parent_invs, tree):
839
"""Helper for record_entry_contents.
841
:param ie: An entry being added.
842
:param parent_invs: The inventories of the parent revisions of the
844
:param tree: The tree that is being committed.
847
def _require_root_change(self, tree):
848
"""Enforce an appropriate root object change.
850
This is called once when record_iter_changes is called, if and only if
851
the root was not in the delta calculated by record_iter_changes.
853
:param tree: The tree which is being committed.
855
# versioned roots do not change unless the tree found a change.
858
64
######################################################################
865
71
revisions and file history. It's normally accessed only by the Branch,
866
72
which views a particular line of development through that history.
868
The Repository builds on top of some byte storage facilies (the revisions,
869
signatures, inventories and texts attributes) and a Transport, which
870
respectively provide byte storage and a means to access the (possibly
74
The Repository builds on top of Stores and a Transport, which respectively
75
describe the disk data format and the way of accessing the (possibly
873
The byte storage facilities are addressed via tuples, which we refer to
874
as 'keys' throughout the code base. Revision_keys, inventory_keys and
875
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
876
(file_id, revision_id). We use this interface because it allows low
877
friction with the underlying code that implements disk indices, network
878
encoding and other parts of bzrlib.
880
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
881
the serialised revisions for the repository. This can be used to obtain
882
revision graph information or to access raw serialised revisions.
883
The result of trying to insert data into the repository via this store
884
is undefined: it should be considered read-only except for implementors
886
:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
887
the serialised signatures for the repository. This can be used to
888
obtain access to raw serialised signatures. The result of trying to
889
insert data into the repository via this store is undefined: it should
890
be considered read-only except for implementors of repositories.
891
:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
892
the serialised inventories for the repository. This can be used to
893
obtain unserialised inventories. The result of trying to insert data
894
into the repository via this store is undefined: it should be
895
considered read-only except for implementors of repositories.
896
:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
897
texts of files and directories for the repository. This can be used to
898
obtain file texts or file graphs. Note that Repository.iter_file_bytes
899
is usually a better interface for accessing file texts.
900
The result of trying to insert data into the repository via this store
901
is undefined: it should be considered read-only except for implementors
903
:ivar _transport: Transport for file access to repository, typically
904
pointing to .bzr/repository.
907
# What class to use for a CommitBuilder. Often its simpler to change this
908
# in a Repository class subclass rather than to override
909
# get_commit_builder.
910
_commit_builder_class = CommitBuilder
911
# The search regex used by xml based repositories to determine what things
912
# where changed in a single commit.
913
79
_file_ids_altered_regex = lazy_regex.lazy_compile(
914
80
r'file_id="(?P<file_id>[^"]+)"'
915
r'.* revision="(?P<revision_id>[^"]+)"'
81
r'.*revision="(?P<revision_id>[^"]+)"'
918
def abort_write_group(self, suppress_errors=False):
919
"""Commit the contents accrued within the current write group.
921
:param suppress_errors: if true, abort_write_group will catch and log
922
unexpected errors that happen during the abort, rather than
923
allowing them to propagate. Defaults to False.
925
:seealso: start_write_group.
927
if self._write_group is not self.get_transaction():
928
# has an unlock or relock occured ?
929
raise errors.BzrError('mismatched lock context and write group.')
931
self._abort_write_group()
932
except Exception, exc:
933
self._write_group = None
934
if not suppress_errors:
936
mutter('abort_write_group failed')
937
log_exception_quietly()
938
note('bzr: ERROR (ignored): %s', exc)
939
self._write_group = None
941
def _abort_write_group(self):
942
"""Template method for per-repository write group cleanup.
944
This is called during abort before the write group is considered to be
945
finished and should cleanup any internal state accrued during the write
946
group. There is no requirement that data handed to the repository be
947
*not* made available - this is not a rollback - but neither should any
948
attempt be made to ensure that data added is fully commited. Abort is
949
invoked when an error has occured so futher disk or network operations
950
may not be possible or may error and if possible should not be
954
def add_fallback_repository(self, repository):
955
"""Add a repository to use for looking up data not held locally.
957
:param repository: A repository.
959
if not self._format.supports_external_lookups:
960
raise errors.UnstackableRepositoryFormat(self._format, self.base)
961
self._check_fallback_repository(repository)
962
self._fallback_repositories.append(repository)
963
self.texts.add_fallback_versioned_files(repository.texts)
964
self.inventories.add_fallback_versioned_files(repository.inventories)
965
self.revisions.add_fallback_versioned_files(repository.revisions)
966
self.signatures.add_fallback_versioned_files(repository.signatures)
968
def _check_fallback_repository(self, repository):
969
"""Check that this repository can fallback to repository safely.
971
Raise an error if not.
973
:param repository: A repository to fallback to.
975
return InterRepository._assert_same_model(self, repository)
977
85
def add_inventory(self, revision_id, inv, parents):
978
86
"""Add the inventory inv to the repository as revision_id.
980
88
:param parents: The revision ids of the parents that revision_id
981
89
is known to have and are in the repository already.
983
:returns: The validator(which is a sha1 digest, though what is sha'd is
984
repository format specific) of the serialized inventory.
91
returns the sha1 of the serialized inventory.
986
if not self.is_in_write_group():
987
raise AssertionError("%r not in write group" % (self,))
93
revision_id = osutils.safe_revision_id(revision_id)
988
94
_mod_revision.check_not_reserved_id(revision_id)
989
if not (inv.revision_id is None or inv.revision_id == revision_id):
990
raise AssertionError(
991
"Mismatch between inventory revision"
992
" id and insertion revid (%r, %r)"
993
% (inv.revision_id, revision_id))
995
raise AssertionError()
996
inv_lines = self._serialise_inventory_to_lines(inv)
997
return self._inventory_add_lines(revision_id, parents,
998
inv_lines, check_content=False)
1000
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
1002
"""Add a new inventory expressed as a delta against another revision.
1004
:param basis_revision_id: The inventory id the delta was created
1005
against. (This does not have to be a direct parent.)
1006
:param delta: The inventory delta (see Inventory.apply_delta for
1008
:param new_revision_id: The revision id that the inventory is being
1010
:param parents: The revision ids of the parents that revision_id is
1011
known to have and are in the repository already. These are supplied
1012
for repositories that depend on the inventory graph for revision
1013
graph access, as well as for those that pun ancestry with delta
1016
:returns: (validator, new_inv)
1017
The validator(which is a sha1 digest, though what is sha'd is
1018
repository format specific) of the serialized inventory, and the
1019
resulting inventory.
1021
if not self.is_in_write_group():
1022
raise AssertionError("%r not in write group" % (self,))
1023
_mod_revision.check_not_reserved_id(new_revision_id)
1024
basis_tree = self.revision_tree(basis_revision_id)
1025
basis_tree.lock_read()
1027
# Note that this mutates the inventory of basis_tree, which not all
1028
# inventory implementations may support: A better idiom would be to
1029
# return a new inventory, but as there is no revision tree cache in
1030
# repository this is safe for now - RBC 20081013
1031
basis_inv = basis_tree.inventory
1032
basis_inv.apply_delta(delta)
1033
basis_inv.revision_id = new_revision_id
1034
return (self.add_inventory(new_revision_id, basis_inv, parents),
1039
def _inventory_add_lines(self, revision_id, parents, lines,
1040
check_content=True):
1041
"""Store lines in inv_vf and return the sha1 of the inventory."""
1042
parents = [(parent,) for parent in parents]
1043
return self.inventories.add_lines((revision_id,), parents, lines,
1044
check_content=check_content)[0]
95
assert inv.revision_id is None or inv.revision_id == revision_id, \
96
"Mismatch between inventory revision" \
97
" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
98
assert inv.root is not None
99
inv_text = self.serialise_inventory(inv)
100
inv_sha1 = osutils.sha_string(inv_text)
101
inv_vf = self.control_weaves.get_weave('inventory',
102
self.get_transaction())
103
self._inventory_add_lines(inv_vf, revision_id, parents,
104
osutils.split_lines(inv_text))
107
def _inventory_add_lines(self, inv_vf, revision_id, parents, lines):
109
for parent in parents:
111
final_parents.append(parent)
113
inv_vf.add_lines(revision_id, final_parents, lines)
1046
116
def add_revision(self, revision_id, rev, inv=None, config=None):
1047
117
"""Add rev to the revision store as revision_id.
1568
424
dest_repo = a_bzrdir.open_repository()
1569
425
return dest_repo
1571
def _get_sink(self):
1572
"""Return a sink for streaming into this repository."""
1573
return StreamSink(self)
1575
def _get_source(self, to_format):
1576
"""Return a source for streaming from this repository."""
1577
return StreamSource(self, to_format)
1579
427
@needs_read_lock
1580
428
def has_revision(self, revision_id):
1581
429
"""True if this repository has a copy of the revision."""
1582
return revision_id in self.has_revisions((revision_id,))
1585
def has_revisions(self, revision_ids):
1586
"""Probe to find out the presence of multiple revisions.
1588
:param revision_ids: An iterable of revision_ids.
1589
:return: A set of the revision_ids that were present.
1591
parent_map = self.revisions.get_parent_map(
1592
[(rev_id,) for rev_id in revision_ids])
1594
if _mod_revision.NULL_REVISION in revision_ids:
1595
result.add(_mod_revision.NULL_REVISION)
1596
result.update([key[0] for key in parent_map])
1600
def get_revision(self, revision_id):
1601
"""Return the Revision object for a named revision."""
1602
return self.get_revisions([revision_id])[0]
430
revision_id = osutils.safe_revision_id(revision_id)
431
return self._revision_store.has_revision_id(revision_id,
432
self.get_transaction())
1604
434
@needs_read_lock
1605
435
def get_revision_reconcile(self, revision_id):
1606
436
"""'reconcile' helper routine that allows access to a revision always.
1608
438
This variant of get_revision does not cross check the weave graph
1609
439
against the revision one as get_revision does: but it should only
1610
440
be used by reconcile, or reconcile-alike commands that are correcting
1611
441
or testing the revision graph.
1613
return self._get_revisions([revision_id])[0]
443
if not revision_id or not isinstance(revision_id, basestring):
444
raise errors.InvalidRevisionId(revision_id=revision_id,
446
return self.get_revisions([revision_id])[0]
1615
448
@needs_read_lock
1616
449
def get_revisions(self, revision_ids):
1617
"""Get many revisions at once."""
1618
return self._get_revisions(revision_ids)
1621
def _get_revisions(self, revision_ids):
1622
"""Core work logic to get many revisions without sanity checks."""
1623
for rev_id in revision_ids:
1624
if not rev_id or not isinstance(rev_id, basestring):
1625
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1626
keys = [(key,) for key in revision_ids]
1627
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1629
for record in stream:
1630
if record.storage_kind == 'absent':
1631
raise errors.NoSuchRevision(self, record.key[0])
1632
text = record.get_bytes_as('fulltext')
1633
rev = self._serializer.read_revision_from_string(text)
1634
revs[record.key[0]] = rev
1635
return [revs[revid] for revid in revision_ids]
450
revision_ids = [osutils.safe_revision_id(r) for r in revision_ids]
451
revs = self._revision_store.get_revisions(revision_ids,
452
self.get_transaction())
454
assert not isinstance(rev.revision_id, unicode)
455
for parent_id in rev.parent_ids:
456
assert not isinstance(parent_id, unicode)
1637
459
@needs_read_lock
1638
460
def get_revision_xml(self, revision_id):
1639
461
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1640
462
# would have already do it.
1641
463
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1642
# TODO: this can't just be replaced by:
1643
# return self._serializer.write_revision_to_string(
1644
# self.get_revision(revision_id))
1645
# as cStringIO preservers the encoding unlike write_revision_to_string
1646
# or some other call down the path.
464
revision_id = osutils.safe_revision_id(revision_id)
1647
465
rev = self.get_revision(revision_id)
1648
rev_tmp = cStringIO.StringIO()
1649
467
# the current serializer..
1650
self._serializer.write_revision(rev, rev_tmp)
468
self._revision_store._serializer.write_revision(rev, rev_tmp)
1652
470
return rev_tmp.getvalue()
1654
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
473
def get_revision(self, revision_id):
474
"""Return the Revision object for a named revision"""
475
# TODO: jam 20070210 get_revision_reconcile should do this for us
476
revision_id = osutils.safe_revision_id(revision_id)
477
r = self.get_revision_reconcile(revision_id)
478
# weave corruption can lead to absent revision markers that should be
480
# the following test is reasonably cheap (it needs a single weave read)
481
# and the weave is cached in read transactions. In write transactions
482
# it is not cached but typically we only read a small number of
483
# revisions. For knits when they are introduced we will probably want
484
# to ensure that caching write transactions are in use.
485
inv = self.get_inventory_weave()
486
self._check_revision_parents(r, inv)
490
def get_deltas_for_revisions(self, revisions):
1655
491
"""Produce a generator of revision deltas.
1657
493
Note that the input is a sequence of REVISIONS, not revision_ids.
1658
494
Trees will be held in memory until the generator exits.
1659
495
Each delta is relative to the revision's lefthand predecessor.
1661
:param specific_fileids: if not None, the result is filtered
1662
so that only those file-ids, their parents and their
1663
children are included.
1665
# Get the revision-ids of interest
1666
497
required_trees = set()
1667
498
for revision in revisions:
1668
499
required_trees.add(revision.revision_id)
1669
500
required_trees.update(revision.parent_ids[:1])
1671
# Get the matching filtered trees. Note that it's more
1672
# efficient to pass filtered trees to changes_from() rather
1673
# than doing the filtering afterwards. changes_from() could
1674
# arguably do the filtering itself but it's path-based, not
1675
# file-id based, so filtering before or afterwards is
1677
if specific_fileids is None:
1678
trees = dict((t.get_revision_id(), t) for
1679
t in self.revision_trees(required_trees))
1681
trees = dict((t.get_revision_id(), t) for
1682
t in self._filtered_revision_trees(required_trees,
1685
# Calculate the deltas
501
trees = dict((t.get_revision_id(), t) for
502
t in self.revision_trees(required_trees))
1686
503
for revision in revisions:
1687
504
if not revision.parent_ids:
1688
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
505
old_tree = self.revision_tree(None)
1690
507
old_tree = trees[revision.parent_ids[0]]
1691
508
yield trees[revision.revision_id].changes_from(old_tree)
1693
510
@needs_read_lock
1694
def get_revision_delta(self, revision_id, specific_fileids=None):
511
def get_revision_delta(self, revision_id):
1695
512
"""Return the delta for one revision.
1697
514
The delta is relative to the left-hand predecessor of the
1700
:param specific_fileids: if not None, the result is filtered
1701
so that only those file-ids, their parents and their
1702
children are included.
1704
517
r = self.get_revision(revision_id)
1705
return list(self.get_deltas_for_revisions([r],
1706
specific_fileids=specific_fileids))[0]
518
return list(self.get_deltas_for_revisions([r]))[0]
520
def _check_revision_parents(self, revision, inventory):
521
"""Private to Repository and Fetch.
523
This checks the parentage of revision in an inventory weave for
524
consistency and is only applicable to inventory-weave-for-ancestry
525
using repository formats & fetchers.
527
weave_parents = inventory.get_parents(revision.revision_id)
528
weave_names = inventory.versions()
529
for parent_id in revision.parent_ids:
530
if parent_id in weave_names:
531
# this parent must not be a ghost.
532
if not parent_id in weave_parents:
534
raise errors.CorruptRepository(self)
1708
536
@needs_write_lock
1709
537
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
538
revision_id = osutils.safe_revision_id(revision_id)
1710
539
signature = gpg_strategy.sign(plaintext)
1711
self.add_signature_text(revision_id, signature)
1714
def add_signature_text(self, revision_id, signature):
1715
self.signatures.add_lines((revision_id,), (),
1716
osutils.split_lines(signature))
1718
def find_text_key_references(self):
1719
"""Find the text key references within the repository.
1721
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1722
to whether they were referred to by the inventory of the
1723
revision_id that they contain. The inventory texts from all present
1724
revision ids are assessed to generate this report.
1726
revision_keys = self.revisions.keys()
1727
w = self.inventories
1728
pb = ui.ui_factory.nested_progress_bar()
1730
return self._find_text_key_references_from_xml_inventory_lines(
1731
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1735
def _find_text_key_references_from_xml_inventory_lines(self,
1737
"""Core routine for extracting references to texts from inventories.
1739
This performs the translation of xml lines to revision ids.
1741
:param line_iterator: An iterator of lines, origin_version_id
1742
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1743
to whether they were referred to by the inventory of the
1744
revision_id that they contain. Note that if that revision_id was
1745
not part of the line_iterator's output then False will be given -
1746
even though it may actually refer to that key.
1748
if not self._serializer.support_altered_by_hack:
1749
raise AssertionError(
1750
"_find_text_key_references_from_xml_inventory_lines only "
1751
"supported for branches which store inventory as unnested xml"
1752
", not on %r" % self)
540
self._revision_store.add_revision_signature_text(revision_id,
542
self.get_transaction())
544
def fileids_altered_by_revision_ids(self, revision_ids):
545
"""Find the file ids and versions affected by revisions.
547
:param revisions: an iterable containing revision ids.
548
:return: a dictionary mapping altered file-ids to an iterable of
549
revision_ids. Each altered file-ids has the exact revision_ids that
550
altered it listed explicitly.
552
assert self._serializer.support_altered_by_hack, \
553
("fileids_altered_by_revision_ids only supported for branches "
554
"which store inventory as unnested xml, not on %r" % self)
555
selected_revision_ids = set(osutils.safe_revision_id(r)
556
for r in revision_ids)
557
w = self.get_inventory_weave()
1755
560
# this code needs to read every new line in every inventory for the
1756
561
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1757
# not present in one of those inventories is unnecessary but not
562
# not present in one of those inventories is unnecessary but not
1758
563
# harmful because we are filtering by the revision id marker in the
1759
# inventory lines : we only select file ids altered in one of those
564
# inventory lines : we only select file ids altered in one of those
1760
565
# revisions. We don't need to see all lines in the inventory because
1761
566
# only those added in an inventory in rev X can contain a revision=X
1771
576
search = self._file_ids_altered_regex.search
1772
577
unescape = _unescape_xml
1773
578
setdefault = result.setdefault
1774
for line, line_key in line_iterator:
1775
match = search(line)
1778
# One call to match.group() returning multiple items is quite a
1779
# bit faster than 2 calls to match.group() each returning 1
1780
file_id, revision_id = match.group('file_id', 'revision_id')
1782
# Inlining the cache lookups helps a lot when you make 170,000
1783
# lines and 350k ids, versus 8.4 unique ids.
1784
# Using a cache helps in 2 ways:
1785
# 1) Avoids unnecessary decoding calls
1786
# 2) Re-uses cached strings, which helps in future set and
1788
# (2) is enough that removing encoding entirely along with
1789
# the cache (so we are using plain strings) results in no
1790
# performance improvement.
1792
revision_id = unescape_revid_cache[revision_id]
1794
unescaped = unescape(revision_id)
1795
unescape_revid_cache[revision_id] = unescaped
1796
revision_id = unescaped
1798
# Note that unconditionally unescaping means that we deserialise
1799
# every fileid, which for general 'pull' is not great, but we don't
1800
# really want to have some many fulltexts that this matters anyway.
1803
file_id = unescape_fileid_cache[file_id]
1805
unescaped = unescape(file_id)
1806
unescape_fileid_cache[file_id] = unescaped
1809
key = (file_id, revision_id)
1810
setdefault(key, False)
1811
if revision_id == line_key[-1]:
1815
def _inventory_xml_lines_for_keys(self, keys):
1816
"""Get a line iterator of the sort needed for findind references.
1818
Not relevant for non-xml inventory repositories.
1820
Ghosts in revision_keys are ignored.
1822
:param revision_keys: The revision keys for the inventories to inspect.
1823
:return: An iterator over (inventory line, revid) for the fulltexts of
1824
all of the xml inventories specified by revision_keys.
1826
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1827
for record in stream:
1828
if record.storage_kind != 'absent':
1829
chunks = record.get_bytes_as('chunked')
1830
revid = record.key[-1]
1831
lines = osutils.chunks_to_lines(chunks)
1835
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1837
"""Helper routine for fileids_altered_by_revision_ids.
1839
This performs the translation of xml lines to revision ids.
1841
:param line_iterator: An iterator of lines, origin_version_id
1842
:param revision_ids: The revision ids to filter for. This should be a
1843
set or other type which supports efficient __contains__ lookups, as
1844
the revision id from each parsed line will be looked up in the
1845
revision_ids filter.
1846
:return: a dictionary mapping altered file-ids to an iterable of
1847
revision_ids. Each altered file-ids has the exact revision_ids that
1848
altered it listed explicitly.
1850
seen = set(self._find_text_key_references_from_xml_inventory_lines(
1851
line_iterator).iterkeys())
1852
# Note that revision_ids are revision keys.
1853
parent_maps = self.revisions.get_parent_map(revision_ids)
1855
map(parents.update, parent_maps.itervalues())
1856
parents.difference_update(revision_ids)
1857
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
1858
self._inventory_xml_lines_for_keys(parents)))
1859
new_keys = seen - parent_seen
1861
setdefault = result.setdefault
1862
for key in new_keys:
1863
setdefault(key[0], set()).add(key[-1])
1866
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1867
"""Find the file ids and versions affected by revisions.
1869
:param revisions: an iterable containing revision ids.
1870
:param _inv_weave: The inventory weave from this repository or None.
1871
If None, the inventory weave will be opened automatically.
1872
:return: a dictionary mapping altered file-ids to an iterable of
1873
revision_ids. Each altered file-ids has the exact revision_ids that
1874
altered it listed explicitly.
1876
selected_keys = set((revid,) for revid in revision_ids)
1877
w = _inv_weave or self.inventories
1878
pb = ui.ui_factory.nested_progress_bar()
1880
return self._find_file_ids_from_xml_inventory_lines(
1881
w.iter_lines_added_or_present_in_keys(
1882
selected_keys, pb=pb),
1887
def iter_files_bytes(self, desired_files):
1888
"""Iterate through file versions.
1890
Files will not necessarily be returned in the order they occur in
1891
desired_files. No specific order is guaranteed.
1893
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1894
value supplied by the caller as part of desired_files. It should
1895
uniquely identify the file version in the caller's context. (Examples:
1896
an index number or a TreeTransform trans_id.)
1898
bytes_iterator is an iterable of bytestrings for the file. The
1899
kind of iterable and length of the bytestrings are unspecified, but for
1900
this implementation, it is a list of bytes produced by
1901
VersionedFile.get_record_stream().
1903
:param desired_files: a list of (file_id, revision_id, identifier)
1907
for file_id, revision_id, callable_data in desired_files:
1908
text_keys[(file_id, revision_id)] = callable_data
1909
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
1910
if record.storage_kind == 'absent':
1911
raise errors.RevisionNotPresent(record.key, self)
1912
yield text_keys[record.key], record.get_bytes_as('chunked')
1914
def _generate_text_key_index(self, text_key_references=None,
1916
"""Generate a new text key index for the repository.
1918
This is an expensive function that will take considerable time to run.
1920
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1921
list of parents, also text keys. When a given key has no parents,
1922
the parents list will be [NULL_REVISION].
1924
# All revisions, to find inventory parents.
1925
if ancestors is None:
1926
graph = self.get_graph()
1927
ancestors = graph.get_parent_map(self.all_revision_ids())
1928
if text_key_references is None:
1929
text_key_references = self.find_text_key_references()
1930
pb = ui.ui_factory.nested_progress_bar()
1932
return self._do_generate_text_key_index(ancestors,
1933
text_key_references, pb)
1937
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1938
"""Helper for _generate_text_key_index to avoid deep nesting."""
1939
revision_order = tsort.topo_sort(ancestors)
1940
invalid_keys = set()
1942
for revision_id in revision_order:
1943
revision_keys[revision_id] = set()
1944
text_count = len(text_key_references)
1945
# a cache of the text keys to allow reuse; costs a dict of all the
1946
# keys, but saves a 2-tuple for every child of a given key.
1948
for text_key, valid in text_key_references.iteritems():
1950
invalid_keys.add(text_key)
1952
revision_keys[text_key[1]].add(text_key)
1953
text_key_cache[text_key] = text_key
1954
del text_key_references
1956
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1957
NULL_REVISION = _mod_revision.NULL_REVISION
1958
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1959
# too small for large or very branchy trees. However, for 55K path
1960
# trees, it would be easy to use too much memory trivially. Ideally we
1961
# could gauge this by looking at available real memory etc, but this is
1962
# always a tricky proposition.
1963
inventory_cache = lru_cache.LRUCache(10)
1964
batch_size = 10 # should be ~150MB on a 55K path tree
1965
batch_count = len(revision_order) / batch_size + 1
1967
pb.update("Calculating text parents", processed_texts, text_count)
1968
for offset in xrange(batch_count):
1969
to_query = revision_order[offset * batch_size:(offset + 1) *
1973
for rev_tree in self.revision_trees(to_query):
1974
revision_id = rev_tree.get_revision_id()
1975
parent_ids = ancestors[revision_id]
1976
for text_key in revision_keys[revision_id]:
1977
pb.update("Calculating text parents", processed_texts)
1978
processed_texts += 1
1979
candidate_parents = []
1980
for parent_id in parent_ids:
1981
parent_text_key = (text_key[0], parent_id)
1983
check_parent = parent_text_key not in \
1984
revision_keys[parent_id]
1986
# the parent parent_id is a ghost:
1987
check_parent = False
1988
# truncate the derived graph against this ghost.
1989
parent_text_key = None
1991
# look at the parent commit details inventories to
1992
# determine possible candidates in the per file graph.
1995
inv = inventory_cache[parent_id]
1997
inv = self.revision_tree(parent_id).inventory
1998
inventory_cache[parent_id] = inv
1999
parent_entry = inv._byid.get(text_key[0], None)
2000
if parent_entry is not None:
2002
text_key[0], parent_entry.revision)
2004
parent_text_key = None
2005
if parent_text_key is not None:
2006
candidate_parents.append(
2007
text_key_cache[parent_text_key])
2008
parent_heads = text_graph.heads(candidate_parents)
2009
new_parents = list(parent_heads)
2010
new_parents.sort(key=lambda x:candidate_parents.index(x))
2011
if new_parents == []:
2012
new_parents = [NULL_REVISION]
2013
text_index[text_key] = new_parents
2015
for text_key in invalid_keys:
2016
text_index[text_key] = [NULL_REVISION]
2019
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
2020
"""Get an iterable listing the keys of all the data introduced by a set
2023
The keys will be ordered so that the corresponding items can be safely
2024
fetched and inserted in that order.
2026
:returns: An iterable producing tuples of (knit-kind, file-id,
2027
versions). knit-kind is one of 'file', 'inventory', 'signatures',
2028
'revisions'. file-id is None unless knit-kind is 'file'.
2030
# XXX: it's a bit weird to control the inventory weave caching in this
2031
# generator. Ideally the caching would be done in fetch.py I think. Or
2032
# maybe this generator should explicitly have the contract that it
2033
# should not be iterated until the previously yielded item has been
2035
inv_w = self.inventories
2037
# file ids that changed
2038
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
2040
num_file_ids = len(file_ids)
2041
for file_id, altered_versions in file_ids.iteritems():
2042
if _files_pb is not None:
2043
_files_pb.update("fetch texts", count, num_file_ids)
2045
yield ("file", file_id, altered_versions)
2046
# We're done with the files_pb. Note that it finished by the caller,
2047
# just as it was created by the caller.
2051
yield ("inventory", None, revision_ids)
2054
# XXX: Note ATM no callers actually pay attention to this return
2055
# instead they just use the list of revision ids and ignore
2056
# missing sigs. Consider removing this work entirely
2057
revisions_with_signatures = set(self.signatures.get_parent_map(
2058
[(r,) for r in revision_ids]))
2059
revisions_with_signatures = set(
2060
[r for (r,) in revisions_with_signatures])
2061
revisions_with_signatures.intersection_update(revision_ids)
2062
yield ("signatures", None, revisions_with_signatures)
2065
yield ("revisions", None, revision_ids)
579
pb = ui.ui_factory.nested_progress_bar()
581
for line in w.iter_lines_added_or_present_in_versions(
582
selected_revision_ids, pb=pb):
586
# One call to match.group() returning multiple items is quite a
587
# bit faster than 2 calls to match.group() each returning 1
588
file_id, revision_id = match.group('file_id', 'revision_id')
590
# Inlining the cache lookups helps a lot when you make 170,000
591
# lines and 350k ids, versus 8.4 unique ids.
592
# Using a cache helps in 2 ways:
593
# 1) Avoids unnecessary decoding calls
594
# 2) Re-uses cached strings, which helps in future set and
596
# (2) is enough that removing encoding entirely along with
597
# the cache (so we are using plain strings) results in no
598
# performance improvement.
600
revision_id = unescape_revid_cache[revision_id]
602
unescaped = unescape(revision_id)
603
unescape_revid_cache[revision_id] = unescaped
604
revision_id = unescaped
606
if revision_id in selected_revision_ids:
608
file_id = unescape_fileid_cache[file_id]
610
unescaped = unescape(file_id)
611
unescape_fileid_cache[file_id] = unescaped
613
setdefault(file_id, set()).add(revision_id)
619
def get_inventory_weave(self):
620
return self.control_weaves.get_weave('inventory',
621
self.get_transaction())
2067
623
@needs_read_lock
2068
624
def get_inventory(self, revision_id):
2069
"""Get Inventory object by revision id."""
2070
return self.iter_inventories([revision_id]).next()
2072
def iter_inventories(self, revision_ids):
2073
"""Get many inventories by revision_ids.
2075
This will buffer some or all of the texts used in constructing the
2076
inventories in memory, but will only parse a single inventory at a
2079
:param revision_ids: The expected revision ids of the inventories.
2080
:return: An iterator of inventories.
2082
if ((None in revision_ids)
2083
or (_mod_revision.NULL_REVISION in revision_ids)):
2084
raise ValueError('cannot get null revision inventory')
2085
return self._iter_inventories(revision_ids)
2087
def _iter_inventories(self, revision_ids):
2088
"""single-document based inventory iteration."""
2089
for text, revision_id in self._iter_inventory_xmls(revision_ids):
2090
yield self.deserialise_inventory(revision_id, text)
2092
def _iter_inventory_xmls(self, revision_ids):
2093
keys = [(revision_id,) for revision_id in revision_ids]
2094
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2096
for record in stream:
2097
if record.storage_kind != 'absent':
2098
text_chunks[record.key] = record.get_bytes_as('chunked')
2100
raise errors.NoSuchRevision(self, record.key)
2102
chunks = text_chunks.pop(key)
2103
yield ''.join(chunks), key[-1]
625
"""Get Inventory object by hash."""
626
# TODO: jam 20070210 Technically we don't need to sanitize, since all
627
# called functions must sanitize.
628
revision_id = osutils.safe_revision_id(revision_id)
629
return self.deserialise_inventory(
630
revision_id, self.get_inventory_xml(revision_id))
2105
632
def deserialise_inventory(self, revision_id, xml):
2106
"""Transform the xml into an inventory object.
633
"""Transform the xml into an inventory object.
2108
635
:param revision_id: The expected revision id of the inventory.
2109
636
:param xml: A serialised inventory.
2111
result = self._serializer.read_inventory_from_string(xml, revision_id,
2112
entry_cache=self._inventory_entry_cache)
2113
if result.revision_id != revision_id:
2114
raise AssertionError('revision id mismatch %s != %s' % (
2115
result.revision_id, revision_id))
638
revision_id = osutils.safe_revision_id(revision_id)
639
result = self._serializer.read_inventory_from_string(xml)
640
result.root.revision = revision_id
2118
643
def serialise_inventory(self, inv):
2119
644
return self._serializer.write_inventory_to_string(inv)
2121
def _serialise_inventory_to_lines(self, inv):
2122
return self._serializer.write_inventory_to_lines(inv)
2124
def get_serializer_format(self):
2125
return self._serializer.format_num
2127
646
@needs_read_lock
2128
647
def get_inventory_xml(self, revision_id):
2129
648
"""Get inventory XML as a file object."""
2130
texts = self._iter_inventory_xmls([revision_id])
649
revision_id = osutils.safe_revision_id(revision_id)
2132
text, revision_id = texts.next()
2133
except StopIteration:
651
assert isinstance(revision_id, str), type(revision_id)
652
iw = self.get_inventory_weave()
653
return iw.get_text(revision_id)
2134
655
raise errors.HistoryMissing(self, 'inventory', revision_id)
2137
657
@needs_read_lock
2138
658
def get_inventory_sha1(self, revision_id):
2139
659
"""Return the sha1 hash of the inventory entry
661
# TODO: jam 20070210 Shouldn't this be deprecated / removed?
662
revision_id = osutils.safe_revision_id(revision_id)
2141
663
return self.get_revision(revision_id).inventory_sha1
666
def get_revision_graph(self, revision_id=None):
667
"""Return a dictionary containing the revision graph.
669
:param revision_id: The revision_id to get a graph from. If None, then
670
the entire revision graph is returned. This is a deprecated mode of
671
operation and will be removed in the future.
672
:return: a dictionary of revision_id->revision_parents_list.
674
# special case NULL_REVISION
675
if revision_id == _mod_revision.NULL_REVISION:
677
revision_id = osutils.safe_revision_id(revision_id)
678
a_weave = self.get_inventory_weave()
679
all_revisions = self._eliminate_revisions_not_present(
681
entire_graph = dict([(node, a_weave.get_parents(node)) for
682
node in all_revisions])
683
if revision_id is None:
685
elif revision_id not in entire_graph:
686
raise errors.NoSuchRevision(self, revision_id)
688
# add what can be reached from revision_id
690
pending = set([revision_id])
691
while len(pending) > 0:
693
result[node] = entire_graph[node]
694
for revision_id in result[node]:
695
if revision_id not in result:
696
pending.add(revision_id)
700
def get_revision_graph_with_ghosts(self, revision_ids=None):
701
"""Return a graph of the revisions with ghosts marked as applicable.
703
:param revision_ids: an iterable of revisions to graph or None for all.
704
:return: a Graph object with the graph reachable from revision_ids.
706
result = graph.Graph()
708
pending = set(self.all_revision_ids())
711
pending = set(osutils.safe_revision_id(r) for r in revision_ids)
712
# special case NULL_REVISION
713
if _mod_revision.NULL_REVISION in pending:
714
pending.remove(_mod_revision.NULL_REVISION)
715
required = set(pending)
718
revision_id = pending.pop()
720
rev = self.get_revision(revision_id)
721
except errors.NoSuchRevision:
722
if revision_id in required:
725
result.add_ghost(revision_id)
727
for parent_id in rev.parent_ids:
728
# is this queued or done ?
729
if (parent_id not in pending and
730
parent_id not in done):
732
pending.add(parent_id)
733
result.add_node(revision_id, rev.parent_ids)
734
done.add(revision_id)
737
def _get_history_vf(self):
738
"""Get a versionedfile whose history graph reflects all revisions.
740
For weave repositories, this is the inventory weave.
742
return self.get_inventory_weave()
2143
744
def iter_reverse_revision_history(self, revision_id):
2144
745
"""Iterate backwards through revision ids in the lefthand history
2146
747
:param revision_id: The revision id to start with. All its lefthand
2147
748
ancestors will be traversed.
2149
graph = self.get_graph()
750
revision_id = osutils.safe_revision_id(revision_id)
751
if revision_id in (None, _mod_revision.NULL_REVISION):
2150
753
next_id = revision_id
754
versionedfile = self._get_history_vf()
2152
if next_id in (None, _mod_revision.NULL_REVISION):
2155
# Note: The following line may raise KeyError in the event of
2156
# truncated history. We decided not to have a try:except:raise
2157
# RevisionNotPresent here until we see a use for it, because of the
2158
# cost in an inner loop that is by its very nature O(history).
2159
# Robert Collins 20080326
2160
parents = graph.get_parent_map([next_id])[next_id]
757
parents = versionedfile.get_parents(next_id)
2161
758
if len(parents) == 0:
2191
789
reconciler.reconcile()
2192
790
return reconciler
2194
def _refresh_data(self):
2195
"""Helper called from lock_* to ensure coherency with disk.
2197
The default implementation does nothing; it is however possible
2198
for repositories to maintain loaded indices across multiple locks
2199
by checking inside their implementation of this method to see
2200
whether their indices are still valid. This depends of course on
2201
the disk format being validatable in this manner. This method is
2202
also called by the refresh_data() public interface to cause a refresh
2203
to occur while in a write lock so that data inserted by a smart server
2204
push operation is visible on the client's instance of the physical
2208
792
@needs_read_lock
2209
793
def revision_tree(self, revision_id):
2210
794
"""Return Tree for a revision on this branch.
2212
`revision_id` may be NULL_REVISION for the empty tree revision.
796
`revision_id` may be None for the empty tree revision.
2214
revision_id = _mod_revision.ensure_null(revision_id)
2215
798
# TODO: refactor this to use an existing revision object
2216
799
# so we don't need to read it in twice.
2217
if revision_id == _mod_revision.NULL_REVISION:
2218
return RevisionTree(self, Inventory(root_id=None),
800
if revision_id is None or revision_id == _mod_revision.NULL_REVISION:
801
return RevisionTree(self, Inventory(root_id=None),
2219
802
_mod_revision.NULL_REVISION)
804
revision_id = osutils.safe_revision_id(revision_id)
2221
805
inv = self.get_revision_inventory(revision_id)
2222
806
return RevisionTree(self, inv, revision_id)
2224
809
def revision_trees(self, revision_ids):
2225
"""Return Trees for revisions in this repository.
2227
:param revision_ids: a sequence of revision-ids;
2228
a revision-id may not be None or 'null:'
2230
inventories = self.iter_inventories(revision_ids)
2231
for inv in inventories:
2232
yield RevisionTree(self, inv, inv.revision_id)
2234
def _filtered_revision_trees(self, revision_ids, file_ids):
2235
"""Return Tree for a revision on this branch with only some files.
2237
:param revision_ids: a sequence of revision-ids;
2238
a revision-id may not be None or 'null:'
2239
:param file_ids: if not None, the result is filtered
2240
so that only those file-ids, their parents and their
2241
children are included.
2243
inventories = self.iter_inventories(revision_ids)
2244
for inv in inventories:
2245
# Should we introduce a FilteredRevisionTree class rather
2246
# than pre-filter the inventory here?
2247
filtered_inv = inv.filter(file_ids)
2248
yield RevisionTree(self, filtered_inv, filtered_inv.revision_id)
810
"""Return Tree for a revision on this branch.
812
`revision_id` may not be None or 'null:'"""
813
assert None not in revision_ids
814
assert _mod_revision.NULL_REVISION not in revision_ids
815
texts = self.get_inventory_weave().get_texts(revision_ids)
816
for text, revision_id in zip(texts, revision_ids):
817
inv = self.deserialise_inventory(revision_id, text)
818
yield RevisionTree(self, inv, revision_id)
2250
820
@needs_read_lock
2251
def get_ancestry(self, revision_id, topo_sorted=True):
821
def get_ancestry(self, revision_id):
2252
822
"""Return a list of revision-ids integrated by a revision.
2254
The first element of the list is always None, indicating the origin
2255
revision. This might change when we have history horizons, or
824
The first element of the list is always None, indicating the origin
825
revision. This might change when we have history horizons, or
2256
826
perhaps we should have a new API.
2258
828
This is topologically sorted.
2260
if _mod_revision.is_null(revision_id):
830
if revision_id is None:
832
revision_id = osutils.safe_revision_id(revision_id)
2262
833
if not self.has_revision(revision_id):
2263
834
raise errors.NoSuchRevision(self, revision_id)
2264
graph = self.get_graph()
2266
search = graph._make_breadth_first_searcher([revision_id])
2269
found, ghosts = search.next_with_ghosts()
2270
except StopIteration:
2273
if _mod_revision.NULL_REVISION in keys:
2274
keys.remove(_mod_revision.NULL_REVISION)
2276
parent_map = graph.get_parent_map(keys)
2277
keys = tsort.topo_sort(parent_map)
2278
return [None] + list(keys)
2281
"""Compress the data within the repository.
2283
This operation only makes sense for some repository types. For other
2284
types it should be a no-op that just returns.
2286
This stub method does not require a lock, but subclasses should use
2287
@needs_write_lock as this is a long running call its reasonable to
2288
implicitly lock for the user.
835
w = self.get_inventory_weave()
836
candidates = w.get_ancestry(revision_id)
837
return [None] + candidates # self._eliminate_revisions_not_present(candidates)
840
def print_file(self, file, revision_id):
841
"""Print `file` to stdout.
843
FIXME RBC 20060125 as John Meinel points out this is a bad api
844
- it writes to stdout, it assumes that that is valid etc. Fix
845
by creating a new more flexible convenience function.
847
revision_id = osutils.safe_revision_id(revision_id)
848
tree = self.revision_tree(revision_id)
849
# use inventory as it was in that revision
850
file_id = tree.inventory.path2id(file)
852
# TODO: jam 20060427 Write a test for this code path
853
# it had a bug in it, and was raising the wrong
855
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
856
tree.print_file(file_id)
2291
858
def get_transaction(self):
2292
859
return self.control_files.get_transaction()
2294
def get_parent_map(self, revision_ids):
2295
"""See graph._StackedParentsProvider.get_parent_map"""
2296
# revisions index works in keys; this just works in revisions
2297
# therefore wrap and unwrap
2300
for revision_id in revision_ids:
2301
if revision_id == _mod_revision.NULL_REVISION:
2302
result[revision_id] = ()
2303
elif revision_id is None:
2304
raise ValueError('get_parent_map(None) is not valid')
2306
query_keys.append((revision_id ,))
2307
for ((revision_id,), parent_keys) in \
2308
self.revisions.get_parent_map(query_keys).iteritems():
2310
result[revision_id] = tuple(parent_revid
2311
for (parent_revid,) in parent_keys)
2313
result[revision_id] = (_mod_revision.NULL_REVISION,)
2316
def _make_parents_provider(self):
2319
def get_graph(self, other_repository=None):
2320
"""Return the graph walker for this repository format"""
2321
parents_provider = self._make_parents_provider()
2322
if (other_repository is not None and
2323
not self.has_same_location(other_repository)):
2324
parents_provider = graph._StackedParentsProvider(
2325
[parents_provider, other_repository._make_parents_provider()])
2326
return graph.Graph(parents_provider)
2328
def _get_versioned_file_checker(self, text_key_references=None):
2329
"""Return an object suitable for checking versioned files.
2331
:param text_key_references: if non-None, an already built
2332
dictionary mapping text keys ((fileid, revision_id) tuples)
2333
to whether they were referred to by the inventory of the
2334
revision_id that they contain. If None, this will be
2337
return _VersionedFileChecker(self,
2338
text_key_references=text_key_references)
2340
def revision_ids_to_search_result(self, result_set):
2341
"""Convert a set of revision ids to a graph SearchResult."""
2342
result_parents = set()
2343
for parents in self.get_graph().get_parent_map(
2344
result_set).itervalues():
2345
result_parents.update(parents)
2346
included_keys = result_set.intersection(result_parents)
2347
start_keys = result_set.difference(included_keys)
2348
exclude_keys = result_parents.difference(result_set)
2349
result = graph.SearchResult(start_keys, exclude_keys,
2350
len(result_set), result_set)
861
def revision_parents(self, revision_id):
862
revision_id = osutils.safe_revision_id(revision_id)
863
return self.get_inventory_weave().parent_names(revision_id)
2353
865
@needs_write_lock
2354
866
def set_make_working_trees(self, new_value):
2989
1346
content is copied.
2990
1347
:param pb: optional progress bar to use for progress reports. If not
2991
1348
provided a default one will be created.
2994
from bzrlib.fetch import RepoFetcher
2995
f = RepoFetcher(to_repository=self.target,
2996
from_repository=self.source,
2997
last_revision=revision_id,
2998
fetch_spec=fetch_spec,
2999
pb=pb, find_ghosts=find_ghosts)
3001
def _walk_to_common_revisions(self, revision_ids):
3002
"""Walk out from revision_ids in source to revisions target has.
3004
:param revision_ids: The start point for the search.
3005
:return: A set of revision ids.
3007
target_graph = self.target.get_graph()
3008
revision_ids = frozenset(revision_ids)
3009
# Fast path for the case where all the revisions are already in the
3011
# (Although this does incur an extra round trip for the
3012
# fairly common case where the target doesn't already have the revision
3014
if set(target_graph.get_parent_map(revision_ids)) == revision_ids:
3015
return graph.SearchResult(revision_ids, set(), 0, set())
3016
missing_revs = set()
3017
source_graph = self.source.get_graph()
3018
# ensure we don't pay silly lookup costs.
3019
searcher = source_graph._make_breadth_first_searcher(revision_ids)
3020
null_set = frozenset([_mod_revision.NULL_REVISION])
3021
searcher_exhausted = False
3025
# Iterate the searcher until we have enough next_revs
3026
while len(next_revs) < self._walk_to_common_revisions_batch_size:
3028
next_revs_part, ghosts_part = searcher.next_with_ghosts()
3029
next_revs.update(next_revs_part)
3030
ghosts.update(ghosts_part)
3031
except StopIteration:
3032
searcher_exhausted = True
3034
# If there are ghosts in the source graph, and the caller asked for
3035
# them, make sure that they are present in the target.
3036
# We don't care about other ghosts as we can't fetch them and
3037
# haven't been asked to.
3038
ghosts_to_check = set(revision_ids.intersection(ghosts))
3039
revs_to_get = set(next_revs).union(ghosts_to_check)
3041
have_revs = set(target_graph.get_parent_map(revs_to_get))
3042
# we always have NULL_REVISION present.
3043
have_revs = have_revs.union(null_set)
3044
# Check if the target is missing any ghosts we need.
3045
ghosts_to_check.difference_update(have_revs)
3047
# One of the caller's revision_ids is a ghost in both the
3048
# source and the target.
3049
raise errors.NoSuchRevision(
3050
self.source, ghosts_to_check.pop())
3051
missing_revs.update(next_revs - have_revs)
3052
# Because we may have walked past the original stop point, make
3053
# sure everything is stopped
3054
stop_revs = searcher.find_seen_ancestors(have_revs)
3055
searcher.stop_searching_any(stop_revs)
3056
if searcher_exhausted:
3058
return searcher.get_result()
1350
Returns the copied revision count and the failed revisions in a tuple:
1353
raise NotImplementedError(self.fetch)
3060
1355
@needs_read_lock
3061
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1356
def missing_revision_ids(self, revision_id=None):
3062
1357
"""Return the revision ids that source has that target does not.
1359
These are returned in topological order.
3064
1361
:param revision_id: only return revision ids included by this
3066
:param find_ghosts: If True find missing revisions in deep history
3067
rather than just finding the surface difference.
3068
:return: A bzrlib.graph.SearchResult.
3070
# stop searching at found target revisions.
3071
if not find_ghosts and revision_id is not None:
3072
return self._walk_to_common_revisions([revision_id])
3073
1364
# generic, possibly worst case, slow code path.
3074
1365
target_ids = set(self.target.all_revision_ids())
3075
1366
if revision_id is not None:
1367
# TODO: jam 20070210 InterRepository is internal enough that it
1368
# should assume revision_ids are already utf-8
1369
revision_id = osutils.safe_revision_id(revision_id)
3076
1370
source_ids = self.source.get_ancestry(revision_id)
3077
if source_ids[0] is not None:
3078
raise AssertionError()
1371
assert source_ids[0] is None
3079
1372
source_ids.pop(0)
3081
1374
source_ids = self.source.all_revision_ids()
3082
1375
result_set = set(source_ids).difference(target_ids)
3083
return self.source.revision_ids_to_search_result(result_set)
3086
def _same_model(source, target):
3087
"""True if source and target have the same data representation.
3089
Note: this is always called on the base class; overriding it in a
3090
subclass will have no effect.
3093
InterRepository._assert_same_model(source, target)
3095
except errors.IncompatibleRepositories, e:
3099
def _assert_same_model(source, target):
3100
"""Raise an exception if two repositories do not use the same model.
3102
if source.supports_rich_root() != target.supports_rich_root():
3103
raise errors.IncompatibleRepositories(source, target,
3104
"different rich-root support")
3105
if source._serializer != target._serializer:
3106
raise errors.IncompatibleRepositories(source, target,
3107
"different serializers")
1376
# this may look like a no-op: its not. It preserves the ordering
1377
# other_ids had while only returning the members from other_ids
1378
# that we've decided we need.
1379
return [rev_id for rev_id in source_ids if rev_id in result_set]
3110
1382
class InterSameDataRepository(InterRepository):
3111
1383
"""Code for converting between repositories that represent the same data.
3113
1385
Data format and model must match for this to work.
3117
1389
def _get_repo_format_to_test(self):
3118
"""Repository format for testing with.
3120
InterSameData can pull from subtree to subtree and from non-subtree to
3121
non-subtree, so we test this with the richest repository format.
3123
from bzrlib.repofmt import knitrepo
3124
return knitrepo.RepositoryFormatKnit3()
1390
"""Repository format for testing with."""
1391
return RepositoryFormat.get_default_format()
3127
1394
def is_compatible(source, target):
3128
return InterRepository._same_model(source, target)
1395
if source.supports_rich_root() != target.supports_rich_root():
1397
if source._serializer != target._serializer:
1402
def copy_content(self, revision_id=None):
1403
"""Make a complete copy of the content in self into destination.
1405
This copies both the repository's revision data, and configuration information
1406
such as the make_working_trees setting.
1408
This is a destructive operation! Do not use it on existing
1411
:param revision_id: Only copy the content needed to construct
1412
revision_id and its parents.
1415
self.target.set_make_working_trees(self.source.make_working_trees())
1416
except NotImplementedError:
1418
# TODO: jam 20070210 This is fairly internal, so we should probably
1419
# just assert that revision_id is not unicode.
1420
revision_id = osutils.safe_revision_id(revision_id)
1421
# but don't bother fetching if we have the needed data now.
1422
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
1423
self.target.has_revision(revision_id)):
1425
self.target.fetch(self.source, revision_id=revision_id)
1428
def fetch(self, revision_id=None, pb=None):
1429
"""See InterRepository.fetch()."""
1430
from bzrlib.fetch import GenericRepoFetcher
1431
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
1432
self.source, self.source._format, self.target,
1433
self.target._format)
1434
# TODO: jam 20070210 This should be an assert, not a translate
1435
revision_id = osutils.safe_revision_id(revision_id)
1436
f = GenericRepoFetcher(to_repository=self.target,
1437
from_repository=self.source,
1438
last_revision=revision_id,
1440
return f.count_copied, f.failed_revisions
3131
1443
class InterWeaveRepo(InterSameDataRepository):
3132
"""Optimised code paths between Weave based repositories.
3134
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
3135
implemented lazy inter-object optimisation.
1444
"""Optimised code paths between Weave based repositories."""
3139
1447
def _get_repo_format_to_test(self):
3252
1571
def is_compatible(source, target):
3253
1572
"""Be compatible with known Knit formats.
3255
1574
We don't test for the stores being of specific types because that
3256
could lead to confusing results, and there is no need to be
1575
could lead to confusing results, and there is no need to be
3257
1576
overly general.
3259
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
1578
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1
3261
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
3262
isinstance(target._format, RepositoryFormatKnit))
1580
return (isinstance(source._format, (RepositoryFormatKnit1)) and
1581
isinstance(target._format, (RepositoryFormatKnit1)))
3263
1582
except AttributeError:
3265
return are_knits and InterRepository._same_model(source, target)
1586
def fetch(self, revision_id=None, pb=None):
1587
"""See InterRepository.fetch()."""
1588
from bzrlib.fetch import KnitRepoFetcher
1589
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
1590
self.source, self.source._format, self.target, self.target._format)
1591
# TODO: jam 20070210 This should be an assert, not a translate
1592
revision_id = osutils.safe_revision_id(revision_id)
1593
f = KnitRepoFetcher(to_repository=self.target,
1594
from_repository=self.source,
1595
last_revision=revision_id,
1597
return f.count_copied, f.failed_revisions
3267
1599
@needs_read_lock
3268
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1600
def missing_revision_ids(self, revision_id=None):
3269
1601
"""See InterRepository.missing_revision_ids()."""
3270
1602
if revision_id is not None:
3271
1603
source_ids = self.source.get_ancestry(revision_id)
3272
if source_ids[0] is not None:
3273
raise AssertionError()
1604
assert source_ids[0] is None
3274
1605
source_ids.pop(0)
3276
source_ids = self.source.all_revision_ids()
1607
source_ids = self.source._all_possible_ids()
3277
1608
source_ids_set = set(source_ids)
3278
1609
# source_ids is the worst possible case we may need to pull.
3279
1610
# now we want to filter source_ids against what we actually
3280
1611
# have in target, but don't try to check for existence where we know
3281
1612
# we do not have a revision as that would be pointless.
3282
target_ids = set(self.target.all_revision_ids())
1613
target_ids = set(self.target._all_possible_ids())
3283
1614
possibly_present_revisions = target_ids.intersection(source_ids_set)
3284
actually_present_revisions = set(
3285
self.target._eliminate_revisions_not_present(possibly_present_revisions))
1615
actually_present_revisions = set(self.target._eliminate_revisions_not_present(possibly_present_revisions))
3286
1616
required_revisions = source_ids_set.difference(actually_present_revisions)
1617
required_topo_revisions = [rev_id for rev_id in source_ids if rev_id in required_revisions]
3287
1618
if revision_id is not None:
3288
1619
# we used get_ancestry to determine source_ids then we are assured all
3289
1620
# revisions referenced are present as they are installed in topological order.
3290
1621
# and the tip revision was validated by get_ancestry.
3291
result_set = required_revisions
1622
return required_topo_revisions
3293
# if we just grabbed the possibly available ids, then
1624
# if we just grabbed the possibly available ids, then
3294
1625
# we only have an estimate of whats available and need to validate
3295
1626
# that against the revision records.
3297
self.source._eliminate_revisions_not_present(required_revisions))
3298
return self.source.revision_ids_to_search_result(result_set)
3301
class InterPackRepo(InterSameDataRepository):
3302
"""Optimised code paths between Pack based repositories."""
1627
return self.source._eliminate_revisions_not_present(required_topo_revisions)
1630
class InterModel1and2(InterRepository):
3305
1633
def _get_repo_format_to_test(self):
3306
from bzrlib.repofmt import pack_repo
3307
return pack_repo.RepositoryFormatKnitPack1()
3310
1637
def is_compatible(source, target):
3311
"""Be compatible with known Pack formats.
3313
We don't test for the stores being of specific types because that
3314
could lead to confusing results, and there is no need to be
1638
if not source.supports_rich_root() and target.supports_rich_root():
1644
def fetch(self, revision_id=None, pb=None):
1645
"""See InterRepository.fetch()."""
1646
from bzrlib.fetch import Model1toKnit2Fetcher
1647
# TODO: jam 20070210 This should be an assert, not a translate
1648
revision_id = osutils.safe_revision_id(revision_id)
1649
f = Model1toKnit2Fetcher(to_repository=self.target,
1650
from_repository=self.source,
1651
last_revision=revision_id,
1653
return f.count_copied, f.failed_revisions
1656
def copy_content(self, revision_id=None):
1657
"""Make a complete copy of the content in self into destination.
1659
This is a destructive operation! Do not use it on existing
1662
:param revision_id: Only copy the content needed to construct
1663
revision_id and its parents.
3317
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
3319
are_packs = (isinstance(source._format, RepositoryFormatPack) and
3320
isinstance(target._format, RepositoryFormatPack))
1666
self.target.set_make_working_trees(self.source.make_working_trees())
1667
except NotImplementedError:
1669
# TODO: jam 20070210 Internal, assert, don't translate
1670
revision_id = osutils.safe_revision_id(revision_id)
1671
# but don't bother fetching if we have the needed data now.
1672
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
1673
self.target.has_revision(revision_id)):
1675
self.target.fetch(self.source, revision_id=revision_id)
1678
class InterKnit1and2(InterKnitRepo):
1681
def _get_repo_format_to_test(self):
1685
def is_compatible(source, target):
1686
"""Be compatible with Knit1 source and Knit3 target"""
1687
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
1689
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1, \
1690
RepositoryFormatKnit3
1691
return (isinstance(source._format, (RepositoryFormatKnit1)) and
1692
isinstance(target._format, (RepositoryFormatKnit3)))
3321
1693
except AttributeError:
3323
return are_packs and InterRepository._same_model(source, target)
3325
1696
@needs_write_lock
3326
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
1697
def fetch(self, revision_id=None, pb=None):
3328
1698
"""See InterRepository.fetch()."""
3329
if (len(self.source._fallback_repositories) > 0 or
3330
len(self.target._fallback_repositories) > 0):
3331
# The pack layer is not aware of fallback repositories, so when
3332
# fetching from a stacked repository or into a stacked repository
3333
# we use the generic fetch logic which uses the VersionedFiles
3334
# attributes on repository.
3335
from bzrlib.fetch import RepoFetcher
3336
fetcher = RepoFetcher(self.target, self.source, revision_id,
3337
pb, find_ghosts, fetch_spec=fetch_spec)
3338
if fetch_spec is not None:
3339
if len(list(fetch_spec.heads)) != 1:
3340
raise AssertionError(
3341
"InterPackRepo.fetch doesn't support "
3342
"fetching multiple heads yet.")
3343
revision_id = list(fetch_spec.heads)[0]
3345
if revision_id is None:
3347
# everything to do - use pack logic
3348
# to fetch from all packs to one without
3349
# inventory parsing etc, IFF nothing to be copied is in the target.
3351
source_revision_ids = frozenset(self.source.all_revision_ids())
3352
revision_ids = source_revision_ids - \
3353
frozenset(self.target.get_parent_map(source_revision_ids))
3354
revision_keys = [(revid,) for revid in revision_ids]
3355
index = self.target._pack_collection.revision_index.combined_index
3356
present_revision_ids = set(item[1][0] for item in
3357
index.iter_entries(revision_keys))
3358
revision_ids = set(revision_ids) - present_revision_ids
3359
# implementing the TODO will involve:
3360
# - detecting when all of a pack is selected
3361
# - avoiding as much as possible pre-selection, so the
3362
# more-core routines such as create_pack_from_packs can filter in
3363
# a just-in-time fashion. (though having a HEADS list on a
3364
# repository might make this a lot easier, because we could
3365
# sensibly detect 'new revisions' without doing a full index scan.
3366
elif _mod_revision.is_null(revision_id):
3371
revision_ids = self.search_missing_revision_ids(revision_id,
3372
find_ghosts=find_ghosts).get_keys()
3373
except errors.NoSuchRevision:
3374
raise errors.InstallFailed([revision_id])
3375
if len(revision_ids) == 0:
3377
return self._pack(self.source, self.target, revision_ids)
3379
def _pack(self, source, target, revision_ids):
3380
from bzrlib.repofmt.pack_repo import Packer
3381
packs = source._pack_collection.all_packs()
3382
pack = Packer(self.target._pack_collection, packs, '.fetch',
3383
revision_ids).pack()
3384
if pack is not None:
3385
self.target._pack_collection._save_pack_names()
3386
copied_revs = pack.get_revision_count()
3387
# Trigger an autopack. This may duplicate effort as we've just done
3388
# a pack creation, but for now it is simpler to think about as
3389
# 'upload data, then repack if needed'.
3390
self.target._pack_collection.autopack()
3391
return (copied_revs, [])
3396
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3397
"""See InterRepository.missing_revision_ids().
3399
:param find_ghosts: Find ghosts throughout the ancestry of
3402
if not find_ghosts and revision_id is not None:
3403
return self._walk_to_common_revisions([revision_id])
3404
elif revision_id is not None:
3405
# Find ghosts: search for revisions pointing from one repository to
3406
# the other, and vice versa, anywhere in the history of revision_id.
3407
graph = self.target.get_graph(other_repository=self.source)
3408
searcher = graph._make_breadth_first_searcher([revision_id])
3412
next_revs, ghosts = searcher.next_with_ghosts()
3413
except StopIteration:
3415
if revision_id in ghosts:
3416
raise errors.NoSuchRevision(self.source, revision_id)
3417
found_ids.update(next_revs)
3418
found_ids.update(ghosts)
3419
found_ids = frozenset(found_ids)
3420
# Double query here: should be able to avoid this by changing the
3421
# graph api further.
3422
result_set = found_ids - frozenset(
3423
self.target.get_parent_map(found_ids))
3425
source_ids = self.source.all_revision_ids()
3426
# source_ids is the worst possible case we may need to pull.
3427
# now we want to filter source_ids against what we actually
3428
# have in target, but don't try to check for existence where we know
3429
# we do not have a revision as that would be pointless.
3430
target_ids = set(self.target.all_revision_ids())
3431
result_set = set(source_ids).difference(target_ids)
3432
return self.source.revision_ids_to_search_result(result_set)
3435
class InterDifferingSerializer(InterKnitRepo):
1699
from bzrlib.fetch import Knit1to2Fetcher
1700
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
1701
self.source, self.source._format, self.target,
1702
self.target._format)
1703
# TODO: jam 20070210 This should be an assert, not a translate
1704
revision_id = osutils.safe_revision_id(revision_id)
1705
f = Knit1to2Fetcher(to_repository=self.target,
1706
from_repository=self.source,
1707
last_revision=revision_id,
1709
return f.count_copied, f.failed_revisions
1712
class InterRemoteRepository(InterRepository):
1713
"""Code for converting between RemoteRepository objects.
1715
This just gets an non-remote repository from the RemoteRepository, and calls
1716
InterRepository.get again.
1719
def __init__(self, source, target):
1720
if isinstance(source, remote.RemoteRepository):
1721
source._ensure_real()
1722
real_source = source._real_repository
1724
real_source = source
1725
if isinstance(target, remote.RemoteRepository):
1726
target._ensure_real()
1727
real_target = target._real_repository
1729
real_target = target
1730
self.real_inter = InterRepository.get(real_source, real_target)
1733
def is_compatible(source, target):
1734
if isinstance(source, remote.RemoteRepository):
1736
if isinstance(target, remote.RemoteRepository):
1740
def copy_content(self, revision_id=None):
1741
self.real_inter.copy_content(revision_id=revision_id)
1743
def fetch(self, revision_id=None, pb=None):
1744
self.real_inter.fetch(revision_id=revision_id, pb=pb)
3438
1747
def _get_repo_format_to_test(self):
3442
def is_compatible(source, target):
3443
"""Be compatible with Knit2 source and Knit3 target"""
3444
if source.supports_rich_root() != target.supports_rich_root():
3446
# Ideally, we'd support fetching if the source had no tree references
3447
# even if it supported them...
3448
if (getattr(source, '_format.supports_tree_reference', False) and
3449
not getattr(target, '_format.supports_tree_reference', False)):
3453
def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):
3454
"""Get the best delta and base for this revision.
3456
:return: (basis_id, delta)
3458
possible_trees = [(parent_id, cache[parent_id])
3459
for parent_id in parent_ids
3460
if parent_id in cache]
3461
if len(possible_trees) == 0:
3462
# There either aren't any parents, or the parents aren't in the
3463
# cache, so just use the last converted tree
3464
possible_trees.append((basis_id, cache[basis_id]))
3466
for basis_id, basis_tree in possible_trees:
3467
delta = tree.inventory._make_delta(basis_tree.inventory)
3468
deltas.append((len(delta), basis_id, delta))
3470
return deltas[0][1:]
3472
def _fetch_batch(self, revision_ids, basis_id, cache):
3473
"""Fetch across a few revisions.
3475
:param revision_ids: The revisions to copy
3476
:param basis_id: The revision_id of a tree that must be in cache, used
3477
as a basis for delta when no other base is available
3478
:param cache: A cache of RevisionTrees that we can use.
3479
:return: The revision_id of the last converted tree. The RevisionTree
3480
for it will be in cache
3482
# Walk though all revisions; get inventory deltas, copy referenced
3483
# texts that delta references, insert the delta, revision and
3487
pending_revisions = []
3488
parent_map = self.source.get_parent_map(revision_ids)
3489
for tree in self.source.revision_trees(revision_ids):
3490
current_revision_id = tree.get_revision_id()
3491
parent_ids = parent_map.get(current_revision_id, ())
3492
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3494
# Find text entries that need to be copied
3495
for old_path, new_path, file_id, entry in delta:
3496
if new_path is not None:
3497
if not (new_path or self.target.supports_rich_root()):
3498
# We don't copy the text for the root node unless the
3499
# target supports_rich_root.
3501
text_keys.add((file_id, entry.revision))
3502
revision = self.source.get_revision(current_revision_id)
3503
pending_deltas.append((basis_id, delta,
3504
current_revision_id, revision.parent_ids))
3505
pending_revisions.append(revision)
3506
cache[current_revision_id] = tree
3507
basis_id = current_revision_id
3509
from_texts = self.source.texts
3510
to_texts = self.target.texts
3511
to_texts.insert_record_stream(from_texts.get_record_stream(
3512
text_keys, self.target._format._fetch_order,
3513
not self.target._format._fetch_uses_deltas))
3515
for delta in pending_deltas:
3516
self.target.add_inventory_by_delta(*delta)
3517
# insert signatures and revisions
3518
for revision in pending_revisions:
3520
signature = self.source.get_signature_text(
3521
revision.revision_id)
3522
self.target.add_signature_text(revision.revision_id,
3524
except errors.NoSuchRevision:
3526
self.target.add_revision(revision.revision_id, revision)
3529
def _fetch_all_revisions(self, revision_ids, pb):
3530
"""Fetch everything for the list of revisions.
3532
:param revision_ids: The list of revisions to fetch. Must be in
3534
:param pb: A ProgressBar
3537
basis_id, basis_tree = self._get_basis(revision_ids[0])
3539
cache = lru_cache.LRUCache(100)
3540
cache[basis_id] = basis_tree
3541
del basis_tree # We don't want to hang on to it here
3542
for offset in range(0, len(revision_ids), batch_size):
3543
self.target.start_write_group()
3545
pb.update('Transferring revisions', offset,
3547
batch = revision_ids[offset:offset+batch_size]
3548
basis_id = self._fetch_batch(batch, basis_id, cache)
3550
self.target.abort_write_group()
3553
self.target.commit_write_group()
3554
pb.update('Transferring revisions', len(revision_ids),
3558
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3560
"""See InterRepository.fetch()."""
3561
if fetch_spec is not None:
3562
raise AssertionError("Not implemented yet...")
3563
revision_ids = self.target.search_missing_revision_ids(self.source,
3564
revision_id, find_ghosts=find_ghosts).get_keys()
3565
if not revision_ids:
3567
revision_ids = tsort.topo_sort(
3568
self.source.get_graph().get_parent_map(revision_ids))
3570
my_pb = ui.ui_factory.nested_progress_bar()
3573
symbol_versioning.warn(
3574
symbol_versioning.deprecated_in((1, 14, 0))
3575
% "pb parameter to fetch()")
3578
self._fetch_all_revisions(revision_ids, pb)
3580
if my_pb is not None:
3582
return len(revision_ids), 0
3584
def _get_basis(self, first_revision_id):
3585
"""Get a revision and tree which exists in the target.
3587
This assumes that first_revision_id is selected for transmission
3588
because all other ancestors are already present. If we can't find an
3589
ancestor we fall back to NULL_REVISION since we know that is safe.
3591
:return: (basis_id, basis_tree)
3593
first_rev = self.source.get_revision(first_revision_id)
3595
basis_id = first_rev.parent_ids[0]
3596
# only valid as a basis if the target has it
3597
self.target.get_revision(basis_id)
3598
# Try to get a basis tree - if its a ghost it will hit the
3599
# NoSuchRevision case.
3600
basis_tree = self.source.revision_tree(basis_id)
3601
except (IndexError, errors.NoSuchRevision):
3602
basis_id = _mod_revision.NULL_REVISION
3603
basis_tree = self.source.revision_tree(basis_id)
3604
return basis_id, basis_tree
3607
InterRepository.register_optimiser(InterDifferingSerializer)
3608
1751
InterRepository.register_optimiser(InterSameDataRepository)
3609
1752
InterRepository.register_optimiser(InterWeaveRepo)
3610
1753
InterRepository.register_optimiser(InterKnitRepo)
3611
InterRepository.register_optimiser(InterPackRepo)
1754
InterRepository.register_optimiser(InterModel1and2)
1755
InterRepository.register_optimiser(InterKnit1and2)
1756
InterRepository.register_optimiser(InterRemoteRepository)
1759
class RepositoryTestProviderAdapter(object):
1760
"""A tool to generate a suite testing multiple repository formats at once.
1762
This is done by copying the test once for each transport and injecting
1763
the transport_server, transport_readonly_server, and bzrdir_format and
1764
repository_format classes into each copy. Each copy is also given a new id()
1765
to make it easy to identify.
1768
def __init__(self, transport_server, transport_readonly_server, formats,
1769
vfs_transport_factory=None):
1770
self._transport_server = transport_server
1771
self._transport_readonly_server = transport_readonly_server
1772
self._vfs_transport_factory = vfs_transport_factory
1773
self._formats = formats
1775
def adapt(self, test):
1776
result = unittest.TestSuite()
1777
for repository_format, bzrdir_format in self._formats:
1778
from copy import deepcopy
1779
new_test = deepcopy(test)
1780
new_test.transport_server = self._transport_server
1781
new_test.transport_readonly_server = self._transport_readonly_server
1782
# Only override the test's vfs_transport_factory if one was
1783
# specified, otherwise just leave the default in place.
1784
if self._vfs_transport_factory:
1785
new_test.vfs_transport_factory = self._vfs_transport_factory
1786
new_test.bzrdir_format = bzrdir_format
1787
new_test.repository_format = repository_format
1788
def make_new_test_id():
1789
new_id = "%s(%s)" % (new_test.id(), repository_format.__class__.__name__)
1790
return lambda: new_id
1791
new_test.id = make_new_test_id()
1792
result.addTest(new_test)
1796
class InterRepositoryTestProviderAdapter(object):
1797
"""A tool to generate a suite testing multiple inter repository formats.
1799
This is done by copying the test once for each interrepo provider and injecting
1800
the transport_server, transport_readonly_server, repository_format and
1801
repository_to_format classes into each copy.
1802
Each copy is also given a new id() to make it easy to identify.
1805
def __init__(self, transport_server, transport_readonly_server, formats):
1806
self._transport_server = transport_server
1807
self._transport_readonly_server = transport_readonly_server
1808
self._formats = formats
1810
def adapt(self, test):
1811
result = unittest.TestSuite()
1812
for interrepo_class, repository_format, repository_format_to in self._formats:
1813
from copy import deepcopy
1814
new_test = deepcopy(test)
1815
new_test.transport_server = self._transport_server
1816
new_test.transport_readonly_server = self._transport_readonly_server
1817
new_test.interrepo_class = interrepo_class
1818
new_test.repository_format = repository_format
1819
new_test.repository_format_to = repository_format_to
1820
def make_new_test_id():
1821
new_id = "%s(%s)" % (new_test.id(), interrepo_class.__name__)
1822
return lambda: new_id
1823
new_test.id = make_new_test_id()
1824
result.addTest(new_test)
1828
def default_test_list():
1829
"""Generate the default list of interrepo permutations to test."""
1830
from bzrlib.repofmt import knitrepo, weaverepo
1832
# test the default InterRepository between format 6 and the current
1834
# XXX: robertc 20060220 reinstate this when there are two supported
1835
# formats which do not have an optimal code path between them.
1836
#result.append((InterRepository,
1837
# RepositoryFormat6(),
1838
# RepositoryFormatKnit1()))
1839
for optimiser_class in InterRepository._optimisers:
1840
format_to_test = optimiser_class._get_repo_format_to_test()
1841
if format_to_test is not None:
1842
result.append((optimiser_class,
1843
format_to_test, format_to_test))
1844
# if there are specific combinations we want to use, we can add them
1846
result.append((InterModel1and2,
1847
weaverepo.RepositoryFormat5(),
1848
knitrepo.RepositoryFormatKnit3()))
1849
result.append((InterKnit1and2,
1850
knitrepo.RepositoryFormatKnit1(),
1851
knitrepo.RepositoryFormatKnit3()))
3614
1855
class CopyConverter(object):
3615
1856
"""A repository conversion tool which just performs a copy of the content.
3617
1858
This is slow but quite reliable.
3663
1904
self.pb.update(message, self.count, self.total)
1907
class CommitBuilder(object):
1908
"""Provides an interface to build up a commit.
1910
This allows describing a tree to be committed without needing to
1911
know the internals of the format of the repository.
1914
record_root_entry = False
1915
def __init__(self, repository, parents, config, timestamp=None,
1916
timezone=None, committer=None, revprops=None,
1918
"""Initiate a CommitBuilder.
1920
:param repository: Repository to commit to.
1921
:param parents: Revision ids of the parents of the new revision.
1922
:param config: Configuration to use.
1923
:param timestamp: Optional timestamp recorded for commit.
1924
:param timezone: Optional timezone for timestamp.
1925
:param committer: Optional committer to set for commit.
1926
:param revprops: Optional dictionary of revision properties.
1927
:param revision_id: Optional revision id.
1929
self._config = config
1931
if committer is None:
1932
self._committer = self._config.username()
1934
assert isinstance(committer, basestring), type(committer)
1935
self._committer = committer
1937
self.new_inventory = Inventory(None)
1938
self._new_revision_id = osutils.safe_revision_id(revision_id)
1939
self.parents = parents
1940
self.repository = repository
1943
if revprops is not None:
1944
self._revprops.update(revprops)
1946
if timestamp is None:
1947
timestamp = time.time()
1948
# Restrict resolution to 1ms
1949
self._timestamp = round(timestamp, 3)
1951
if timezone is None:
1952
self._timezone = osutils.local_time_offset()
1954
self._timezone = int(timezone)
1956
self._generate_revision_if_needed()
1958
def commit(self, message):
1959
"""Make the actual commit.
1961
:return: The revision id of the recorded revision.
1963
rev = _mod_revision.Revision(
1964
timestamp=self._timestamp,
1965
timezone=self._timezone,
1966
committer=self._committer,
1968
inventory_sha1=self.inv_sha1,
1969
revision_id=self._new_revision_id,
1970
properties=self._revprops)
1971
rev.parent_ids = self.parents
1972
self.repository.add_revision(self._new_revision_id, rev,
1973
self.new_inventory, self._config)
1974
return self._new_revision_id
1976
def revision_tree(self):
1977
"""Return the tree that was just committed.
1979
After calling commit() this can be called to get a RevisionTree
1980
representing the newly committed tree. This is preferred to
1981
calling Repository.revision_tree() because that may require
1982
deserializing the inventory, while we already have a copy in
1985
return RevisionTree(self.repository, self.new_inventory,
1986
self._new_revision_id)
1988
def finish_inventory(self):
1989
"""Tell the builder that the inventory is finished."""
1990
if self.new_inventory.root is None:
1991
symbol_versioning.warn('Root entry should be supplied to'
1992
' record_entry_contents, as of bzr 0.10.',
1993
DeprecationWarning, stacklevel=2)
1994
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
1995
self.new_inventory.revision_id = self._new_revision_id
1996
self.inv_sha1 = self.repository.add_inventory(
1997
self._new_revision_id,
2002
def _gen_revision_id(self):
2003
"""Return new revision-id."""
2004
return generate_ids.gen_revision_id(self._config.username(),
2007
def _generate_revision_if_needed(self):
2008
"""Create a revision id if None was supplied.
2010
If the repository can not support user-specified revision ids
2011
they should override this function and raise CannotSetRevisionId
2012
if _new_revision_id is not None.
2014
:raises: CannotSetRevisionId
2016
if self._new_revision_id is None:
2017
self._new_revision_id = self._gen_revision_id()
2019
def record_entry_contents(self, ie, parent_invs, path, tree):
2020
"""Record the content of ie from tree into the commit if needed.
2022
Side effect: sets ie.revision when unchanged
2024
:param ie: An inventory entry present in the commit.
2025
:param parent_invs: The inventories of the parent revisions of the
2027
:param path: The path the entry is at in the tree.
2028
:param tree: The tree which contains this entry and should be used to
2031
if self.new_inventory.root is None and ie.parent_id is not None:
2032
symbol_versioning.warn('Root entry should be supplied to'
2033
' record_entry_contents, as of bzr 0.10.',
2034
DeprecationWarning, stacklevel=2)
2035
self.record_entry_contents(tree.inventory.root.copy(), parent_invs,
2037
self.new_inventory.add(ie)
2039
# ie.revision is always None if the InventoryEntry is considered
2040
# for committing. ie.snapshot will record the correct revision
2041
# which may be the sole parent if it is untouched.
2042
if ie.revision is not None:
2045
# In this revision format, root entries have no knit or weave
2046
if ie is self.new_inventory.root:
2047
# When serializing out to disk and back in
2048
# root.revision is always _new_revision_id
2049
ie.revision = self._new_revision_id
2051
previous_entries = ie.find_previous_heads(
2053
self.repository.weave_store,
2054
self.repository.get_transaction())
2055
# we are creating a new revision for ie in the history store
2057
ie.snapshot(self._new_revision_id, path, previous_entries, tree, self)
2059
def modified_directory(self, file_id, file_parents):
2060
"""Record the presence of a symbolic link.
2062
:param file_id: The file_id of the link to record.
2063
:param file_parents: The per-file parent revision ids.
2065
self._add_text_to_weave(file_id, [], file_parents.keys())
2067
def modified_reference(self, file_id, file_parents):
2068
"""Record the modification of a reference.
2070
:param file_id: The file_id of the link to record.
2071
:param file_parents: The per-file parent revision ids.
2073
self._add_text_to_weave(file_id, [], file_parents.keys())
2075
def modified_file_text(self, file_id, file_parents,
2076
get_content_byte_lines, text_sha1=None,
2078
"""Record the text of file file_id
2080
:param file_id: The file_id of the file to record the text of.
2081
:param file_parents: The per-file parent revision ids.
2082
:param get_content_byte_lines: A callable which will return the byte
2084
:param text_sha1: Optional SHA1 of the file contents.
2085
:param text_size: Optional size of the file contents.
2087
# mutter('storing text of file {%s} in revision {%s} into %r',
2088
# file_id, self._new_revision_id, self.repository.weave_store)
2089
# special case to avoid diffing on renames or
2091
if (len(file_parents) == 1
2092
and text_sha1 == file_parents.values()[0].text_sha1
2093
and text_size == file_parents.values()[0].text_size):
2094
previous_ie = file_parents.values()[0]
2095
versionedfile = self.repository.weave_store.get_weave(file_id,
2096
self.repository.get_transaction())
2097
versionedfile.clone_text(self._new_revision_id,
2098
previous_ie.revision, file_parents.keys())
2099
return text_sha1, text_size
2101
new_lines = get_content_byte_lines()
2102
# TODO: Rather than invoking sha_strings here, _add_text_to_weave
2103
# should return the SHA1 and size
2104
self._add_text_to_weave(file_id, new_lines, file_parents.keys())
2105
return osutils.sha_strings(new_lines), \
2106
sum(map(len, new_lines))
2108
def modified_link(self, file_id, file_parents, link_target):
2109
"""Record the presence of a symbolic link.
2111
:param file_id: The file_id of the link to record.
2112
:param file_parents: The per-file parent revision ids.
2113
:param link_target: Target location of this link.
2115
self._add_text_to_weave(file_id, [], file_parents.keys())
2117
def _add_text_to_weave(self, file_id, new_lines, parents):
2118
versionedfile = self.repository.weave_store.get_weave_or_empty(
2119
file_id, self.repository.get_transaction())
2120
versionedfile.add_lines(self._new_revision_id, parents, new_lines)
2121
versionedfile.clear_cache()
2124
class _CommitBuilder(CommitBuilder):
2125
"""Temporary class so old CommitBuilders are detected properly
2127
Note: CommitBuilder works whether or not root entry is recorded.
2130
record_root_entry = True
2133
class RootCommitBuilder(CommitBuilder):
2134
"""This commitbuilder actually records the root id"""
2136
record_root_entry = True
2138
def record_entry_contents(self, ie, parent_invs, path, tree):
2139
"""Record the content of ie from tree into the commit if needed.
2141
Side effect: sets ie.revision when unchanged
2143
:param ie: An inventory entry present in the commit.
2144
:param parent_invs: The inventories of the parent revisions of the
2146
:param path: The path the entry is at in the tree.
2147
:param tree: The tree which contains this entry and should be used to
2150
assert self.new_inventory.root is not None or ie.parent_id is None
2151
self.new_inventory.add(ie)
2153
# ie.revision is always None if the InventoryEntry is considered
2154
# for committing. ie.snapshot will record the correct revision
2155
# which may be the sole parent if it is untouched.
2156
if ie.revision is not None:
2159
previous_entries = ie.find_previous_heads(
2161
self.repository.weave_store,
2162
self.repository.get_transaction())
2163
# we are creating a new revision for ie in the history store
2165
ie.snapshot(self._new_revision_id, path, previous_entries, tree, self)
3666
2168
_unescape_map = {
3691
2193
if _unescape_re is None:
3692
2194
_unescape_re = re.compile('\&([^;]*);')
3693
2195
return _unescape_re.sub(_unescaper, data)
3696
class _VersionedFileChecker(object):
3698
def __init__(self, repository, text_key_references=None):
3699
self.repository = repository
3700
self.text_index = self.repository._generate_text_key_index(
3701
text_key_references=text_key_references)
3703
def calculate_file_version_parents(self, text_key):
3704
"""Calculate the correct parents for a file version according to
3707
parent_keys = self.text_index[text_key]
3708
if parent_keys == [_mod_revision.NULL_REVISION]:
3710
return tuple(parent_keys)
3712
def check_file_version_parents(self, texts, progress_bar=None):
3713
"""Check the parents stored in a versioned file are correct.
3715
It also detects file versions that are not referenced by their
3716
corresponding revision's inventory.
3718
:returns: A tuple of (wrong_parents, dangling_file_versions).
3719
wrong_parents is a dict mapping {revision_id: (stored_parents,
3720
correct_parents)} for each revision_id where the stored parents
3721
are not correct. dangling_file_versions is a set of (file_id,
3722
revision_id) tuples for versions that are present in this versioned
3723
file, but not used by the corresponding inventory.
3726
self.file_ids = set([file_id for file_id, _ in
3727
self.text_index.iterkeys()])
3728
# text keys is now grouped by file_id
3729
n_weaves = len(self.file_ids)
3730
files_in_revisions = {}
3731
revisions_of_files = {}
3732
n_versions = len(self.text_index)
3733
progress_bar.update('loading text store', 0, n_versions)
3734
parent_map = self.repository.texts.get_parent_map(self.text_index)
3735
# On unlistable transports this could well be empty/error...
3736
text_keys = self.repository.texts.keys()
3737
unused_keys = frozenset(text_keys) - set(self.text_index)
3738
for num, key in enumerate(self.text_index.iterkeys()):
3739
if progress_bar is not None:
3740
progress_bar.update('checking text graph', num, n_versions)
3741
correct_parents = self.calculate_file_version_parents(key)
3743
knit_parents = parent_map[key]
3744
except errors.RevisionNotPresent:
3747
if correct_parents != knit_parents:
3748
wrong_parents[key] = (knit_parents, correct_parents)
3749
return wrong_parents, unused_keys
3752
def _old_get_graph(repository, revision_id):
3753
"""DO NOT USE. That is all. I'm serious."""
3754
graph = repository.get_graph()
3755
revision_graph = dict(((key, value) for key, value in
3756
graph.iter_ancestry([revision_id]) if value is not None))
3757
return _strip_NULL_ghosts(revision_graph)
3760
def _strip_NULL_ghosts(revision_graph):
3761
"""Also don't use this. more compatibility code for unmigrated clients."""
3762
# Filter ghosts, and null:
3763
if _mod_revision.NULL_REVISION in revision_graph:
3764
del revision_graph[_mod_revision.NULL_REVISION]
3765
for key, parents in revision_graph.items():
3766
revision_graph[key] = tuple(parent for parent in parents if parent
3768
return revision_graph
3771
class StreamSink(object):
3772
"""An object that can insert a stream into a repository.
3774
This interface handles the complexity of reserialising inventories and
3775
revisions from different formats, and allows unidirectional insertion into
3776
stacked repositories without looking for the missing basis parents
3780
def __init__(self, target_repo):
3781
self.target_repo = target_repo
3783
def insert_stream(self, stream, src_format, resume_tokens):
3784
"""Insert a stream's content into the target repository.
3786
:param src_format: a bzr repository format.
3788
:return: a list of resume tokens and an iterable of keys additional
3789
items required before the insertion can be completed.
3791
self.target_repo.lock_write()
3794
self.target_repo.resume_write_group(resume_tokens)
3796
self.target_repo.start_write_group()
3798
# locked_insert_stream performs a commit|suspend.
3799
return self._locked_insert_stream(stream, src_format)
3801
self.target_repo.abort_write_group(suppress_errors=True)
3804
self.target_repo.unlock()
3806
def _locked_insert_stream(self, stream, src_format):
3807
to_serializer = self.target_repo._format._serializer
3808
src_serializer = src_format._serializer
3809
if to_serializer == src_serializer:
3810
# If serializers match and the target is a pack repository, set the
3811
# write cache size on the new pack. This avoids poor performance
3812
# on transports where append is unbuffered (such as
3813
# RemoteTransport). This is safe to do because nothing should read
3814
# back from the target repository while a stream with matching
3815
# serialization is being inserted.
3816
# The exception is that a delta record from the source that should
3817
# be a fulltext may need to be expanded by the target (see
3818
# test_fetch_revisions_with_deltas_into_pack); but we take care to
3819
# explicitly flush any buffered writes first in that rare case.
3821
new_pack = self.target_repo._pack_collection._new_pack
3822
except AttributeError:
3823
# Not a pack repository
3826
new_pack.set_write_cache_size(1024*1024)
3827
for substream_type, substream in stream:
3828
if substream_type == 'texts':
3829
self.target_repo.texts.insert_record_stream(substream)
3830
elif substream_type == 'inventories':
3831
if src_serializer == to_serializer:
3832
self.target_repo.inventories.insert_record_stream(
3835
self._extract_and_insert_inventories(
3836
substream, src_serializer)
3837
elif substream_type == 'revisions':
3838
# This may fallback to extract-and-insert more often than
3839
# required if the serializers are different only in terms of
3841
if src_serializer == to_serializer:
3842
self.target_repo.revisions.insert_record_stream(
3845
self._extract_and_insert_revisions(substream,
3847
elif substream_type == 'signatures':
3848
self.target_repo.signatures.insert_record_stream(substream)
3850
raise AssertionError('kaboom! %s' % (substream_type,))
3852
missing_keys = set()
3853
for prefix, versioned_file in (
3854
('texts', self.target_repo.texts),
3855
('inventories', self.target_repo.inventories),
3856
('revisions', self.target_repo.revisions),
3857
('signatures', self.target_repo.signatures),
3859
missing_keys.update((prefix,) + key for key in
3860
versioned_file.get_missing_compression_parent_keys())
3861
except NotImplementedError:
3862
# cannot even attempt suspending, and missing would have failed
3863
# during stream insertion.
3864
missing_keys = set()
3867
# suspend the write group and tell the caller what we is
3868
# missing. We know we can suspend or else we would not have
3869
# entered this code path. (All repositories that can handle
3870
# missing keys can handle suspending a write group).
3871
write_group_tokens = self.target_repo.suspend_write_group()
3872
return write_group_tokens, missing_keys
3873
self.target_repo.commit_write_group()
3876
def _extract_and_insert_inventories(self, substream, serializer):
3877
"""Generate a new inventory versionedfile in target, converting data.
3879
The inventory is retrieved from the source, (deserializing it), and
3880
stored in the target (reserializing it in a different format).
3882
for record in substream:
3883
bytes = record.get_bytes_as('fulltext')
3884
revision_id = record.key[0]
3885
inv = serializer.read_inventory_from_string(bytes, revision_id)
3886
parents = [key[0] for key in record.parents]
3887
self.target_repo.add_inventory(revision_id, inv, parents)
3889
def _extract_and_insert_revisions(self, substream, serializer):
3890
for record in substream:
3891
bytes = record.get_bytes_as('fulltext')
3892
revision_id = record.key[0]
3893
rev = serializer.read_revision_from_string(bytes)
3894
if rev.revision_id != revision_id:
3895
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
3896
self.target_repo.add_revision(revision_id, rev)
3899
if self.target_repo._format._fetch_reconcile:
3900
self.target_repo.reconcile()
3903
class StreamSource(object):
3904
"""A source of a stream for fetching between repositories."""
3906
def __init__(self, from_repository, to_format):
3907
"""Create a StreamSource streaming from from_repository."""
3908
self.from_repository = from_repository
3909
self.to_format = to_format
3911
def delta_on_metadata(self):
3912
"""Return True if delta's are permitted on metadata streams.
3914
That is on revisions and signatures.
3916
src_serializer = self.from_repository._format._serializer
3917
target_serializer = self.to_format._serializer
3918
return (self.to_format._fetch_uses_deltas and
3919
src_serializer == target_serializer)
3921
def _fetch_revision_texts(self, revs):
3922
# fetch signatures first and then the revision texts
3923
# may need to be a InterRevisionStore call here.
3924
from_sf = self.from_repository.signatures
3925
# A missing signature is just skipped.
3926
keys = [(rev_id,) for rev_id in revs]
3927
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
3929
self.to_format._fetch_order,
3930
not self.to_format._fetch_uses_deltas))
3931
# If a revision has a delta, this is actually expanded inside the
3932
# insert_record_stream code now, which is an alternate fix for
3934
from_rf = self.from_repository.revisions
3935
revisions = from_rf.get_record_stream(
3937
self.to_format._fetch_order,
3938
not self.delta_on_metadata())
3939
return [('signatures', signatures), ('revisions', revisions)]
3941
def _generate_root_texts(self, revs):
3942
"""This will be called by __fetch between fetching weave texts and
3943
fetching the inventory weave.
3945
Subclasses should override this if they need to generate root texts
3946
after fetching weave texts.
3948
if self._rich_root_upgrade():
3950
return bzrlib.fetch.Inter1and2Helper(
3951
self.from_repository).generate_root_texts(revs)
3955
def get_stream(self, search):
3957
revs = search.get_keys()
3958
graph = self.from_repository.get_graph()
3959
revs = list(graph.iter_topo_order(revs))
3960
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
3962
for knit_kind, file_id, revisions in data_to_fetch:
3963
if knit_kind != phase:
3965
# Make a new progress bar for this phase
3966
if knit_kind == "file":
3967
# Accumulate file texts
3968
text_keys.extend([(file_id, revision) for revision in
3970
elif knit_kind == "inventory":
3971
# Now copy the file texts.
3972
from_texts = self.from_repository.texts
3973
yield ('texts', from_texts.get_record_stream(
3974
text_keys, self.to_format._fetch_order,
3975
not self.to_format._fetch_uses_deltas))
3976
# Cause an error if a text occurs after we have done the
3979
# Before we process the inventory we generate the root
3980
# texts (if necessary) so that the inventories references
3982
for _ in self._generate_root_texts(revs):
3984
# NB: This currently reopens the inventory weave in source;
3985
# using a single stream interface instead would avoid this.
3986
from_weave = self.from_repository.inventories
3987
# we fetch only the referenced inventories because we do not
3988
# know for unselected inventories whether all their required
3989
# texts are present in the other repository - it could be
3991
yield ('inventories', from_weave.get_record_stream(
3992
[(rev_id,) for rev_id in revs],
3993
self.inventory_fetch_order(),
3994
not self.delta_on_metadata()))
3995
elif knit_kind == "signatures":
3996
# Nothing to do here; this will be taken care of when
3997
# _fetch_revision_texts happens.
3999
elif knit_kind == "revisions":
4000
for record in self._fetch_revision_texts(revs):
4003
raise AssertionError("Unknown knit kind %r" % knit_kind)
4005
def get_stream_for_missing_keys(self, missing_keys):
4006
# missing keys can only occur when we are byte copying and not
4007
# translating (because translation means we don't send
4008
# unreconstructable deltas ever).
4010
keys['texts'] = set()
4011
keys['revisions'] = set()
4012
keys['inventories'] = set()
4013
keys['signatures'] = set()
4014
for key in missing_keys:
4015
keys[key[0]].add(key[1:])
4016
if len(keys['revisions']):
4017
# If we allowed copying revisions at this point, we could end up
4018
# copying a revision without copying its required texts: a
4019
# violation of the requirements for repository integrity.
4020
raise AssertionError(
4021
'cannot copy revisions to fill in missing deltas %s' % (
4022
keys['revisions'],))
4023
for substream_kind, keys in keys.iteritems():
4024
vf = getattr(self.from_repository, substream_kind)
4025
# Ask for full texts always so that we don't need more round trips
4026
# after this stream.
4027
stream = vf.get_record_stream(keys,
4028
self.to_format._fetch_order, True)
4029
yield substream_kind, stream
4031
def inventory_fetch_order(self):
4032
if self._rich_root_upgrade():
4033
return 'topological'
4035
return self.to_format._fetch_order
4037
def _rich_root_upgrade(self):
4038
return (not self.from_repository._format.rich_root_data and
4039
self.to_format.rich_root_data)