812
818
self._require_root_change(tree)
813
819
self.basis_delta_revision = basis_revision_id
815
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
816
parent_keys = tuple([(file_id, parent) for parent in parents])
817
return self.repository.texts._add_text(
818
(file_id, self._new_revision_id), parent_keys, new_text,
819
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
821
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
822
# Note: as we read the content directly from the tree, we know its not
823
# been turned into unicode or badly split - but a broken tree
824
# implementation could give us bad output from readlines() so this is
825
# not a guarantee of safety. What would be better is always checking
826
# the content during test suite execution. RBC 20070912
827
parent_keys = tuple((file_id, parent) for parent in parents)
828
return self.repository.texts.add_lines(
829
(file_id, self._new_revision_id), parent_keys, new_lines,
830
nostore_sha=nostore_sha, random_id=self.random_revid,
831
check_content=False)[0:2]
822
834
class RootCommitBuilder(CommitBuilder):
1146
1150
# The old API returned a list, should this actually be a set?
1147
1151
return parent_map.keys()
1149
def _check_inventories(self, checker):
1150
"""Check the inventories found from the revision scan.
1152
This is responsible for verifying the sha1 of inventories and
1153
creating a pending_keys set that covers data referenced by inventories.
1155
bar = ui.ui_factory.nested_progress_bar()
1157
self._do_check_inventories(checker, bar)
1161
def _do_check_inventories(self, checker, bar):
1162
"""Helper for _check_inventories."""
1164
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1165
kinds = ['chk_bytes', 'texts']
1166
count = len(checker.pending_keys)
1167
bar.update("inventories", 0, 2)
1168
current_keys = checker.pending_keys
1169
checker.pending_keys = {}
1170
# Accumulate current checks.
1171
for key in current_keys:
1172
if key[0] != 'inventories' and key[0] not in kinds:
1173
checker._report_items.append('unknown key type %r' % (key,))
1174
keys[key[0]].add(key[1:])
1175
if keys['inventories']:
1176
# NB: output order *should* be roughly sorted - topo or
1177
# inverse topo depending on repository - either way decent
1178
# to just delta against. However, pre-CHK formats didn't
1179
# try to optimise inventory layout on disk. As such the
1180
# pre-CHK code path does not use inventory deltas.
1182
for record in self.inventories.check(keys=keys['inventories']):
1183
if record.storage_kind == 'absent':
1184
checker._report_items.append(
1185
'Missing inventory {%s}' % (record.key,))
1187
last_object = self._check_record('inventories', record,
1188
checker, last_object,
1189
current_keys[('inventories',) + record.key])
1190
del keys['inventories']
1193
bar.update("texts", 1)
1194
while (checker.pending_keys or keys['chk_bytes']
1196
# Something to check.
1197
current_keys = checker.pending_keys
1198
checker.pending_keys = {}
1199
# Accumulate current checks.
1200
for key in current_keys:
1201
if key[0] not in kinds:
1202
checker._report_items.append('unknown key type %r' % (key,))
1203
keys[key[0]].add(key[1:])
1204
# Check the outermost kind only - inventories || chk_bytes || texts
1208
for record in getattr(self, kind).check(keys=keys[kind]):
1209
if record.storage_kind == 'absent':
1210
checker._report_items.append(
1211
'Missing inventory {%s}' % (record.key,))
1213
last_object = self._check_record(kind, record,
1214
checker, last_object, current_keys[(kind,) + record.key])
1218
def _check_record(self, kind, record, checker, last_object, item_data):
1219
"""Check a single text from this repository."""
1220
if kind == 'inventories':
1221
rev_id = record.key[0]
1222
inv = self.deserialise_inventory(rev_id,
1223
record.get_bytes_as('fulltext'))
1224
if last_object is not None:
1225
delta = inv._make_delta(last_object)
1226
for old_path, path, file_id, ie in delta:
1229
ie.check(checker, rev_id, inv)
1231
for path, ie in inv.iter_entries():
1232
ie.check(checker, rev_id, inv)
1233
if self._format.fast_deltas:
1235
elif kind == 'chk_bytes':
1236
# No code written to check chk_bytes for this repo format.
1237
checker._report_items.append(
1238
'unsupported key type chk_bytes for %s' % (record.key,))
1239
elif kind == 'texts':
1240
self._check_text(record, checker, item_data)
1242
checker._report_items.append(
1243
'unknown key type %s for %s' % (kind, record.key))
1245
def _check_text(self, record, checker, item_data):
1246
"""Check a single text."""
1247
# Check it is extractable.
1248
# TODO: check length.
1249
if record.storage_kind == 'chunked':
1250
chunks = record.get_bytes_as(record.storage_kind)
1251
sha1 = osutils.sha_strings(chunks)
1252
length = sum(map(len, chunks))
1254
content = record.get_bytes_as('fulltext')
1255
sha1 = osutils.sha_string(content)
1256
length = len(content)
1257
if item_data and sha1 != item_data[1]:
1258
checker._report_items.append(
1259
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1260
(record.key, sha1, item_data[1], item_data[2]))
1263
1154
def create(a_bzrdir):
1264
1155
"""Construct the current default format repository in a_bzrdir."""
1576
1449
present_inventories = unstacked_inventories.get_parent_map(
1577
1450
key[-1:] for key in parents)
1578
1451
parents.difference_update(present_inventories)
1579
if len(parents) == 0:
1580
# No missing parent inventories.
1582
if not check_for_missing_texts:
1583
return set(('inventories', rev_id) for (rev_id,) in parents)
1584
# Ok, now we have a list of missing inventories. But these only matter
1585
# if the inventories that reference them are missing some texts they
1586
# appear to introduce.
1587
# XXX: Texts referenced by all added inventories need to be present,
1588
# but at the moment we're only checking for texts referenced by
1589
# inventories at the graph's edge.
1590
key_deps = self.revisions._index._key_dependencies
1591
key_deps.add_keys(present_inventories)
1592
referrers = frozenset(r[0] for r in key_deps.get_referrers())
1593
file_ids = self.fileids_altered_by_revision_ids(referrers)
1594
missing_texts = set()
1595
for file_id, version_ids in file_ids.iteritems():
1596
missing_texts.update(
1597
(file_id, version_id) for version_id in version_ids)
1598
present_texts = self.texts.get_parent_map(missing_texts)
1599
missing_texts.difference_update(present_texts)
1600
if not missing_texts:
1601
# No texts are missing, so all revisions and their deltas are
1604
# Alternatively the text versions could be returned as the missing
1605
# keys, but this is likely to be less data.
1606
1452
missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
1607
1453
return missing_keys
1828
1670
@needs_read_lock
1829
1671
def get_revisions(self, revision_ids):
1830
"""Get many revisions at once.
1832
Repositories that need to check data on every revision read should
1833
subclass this method.
1672
"""Get many revisions at once."""
1835
1673
return self._get_revisions(revision_ids)
1837
1675
@needs_read_lock
1838
1676
def _get_revisions(self, revision_ids):
1839
1677
"""Core work logic to get many revisions without sanity checks."""
1678
for rev_id in revision_ids:
1679
if not rev_id or not isinstance(rev_id, basestring):
1680
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1681
keys = [(key,) for key in revision_ids]
1682
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1841
for revid, rev in self._iter_revisions(revision_ids):
1843
raise errors.NoSuchRevision(self, revid)
1684
for record in stream:
1685
if record.storage_kind == 'absent':
1686
raise errors.NoSuchRevision(self, record.key[0])
1687
text = record.get_bytes_as('fulltext')
1688
rev = self._serializer.read_revision_from_string(text)
1689
revs[record.key[0]] = rev
1845
1690
return [revs[revid] for revid in revision_ids]
1847
def _iter_revisions(self, revision_ids):
1848
"""Iterate over revision objects.
1850
:param revision_ids: An iterable of revisions to examine. None may be
1851
passed to request all revisions known to the repository. Note that
1852
not all repositories can find unreferenced revisions; for those
1853
repositories only referenced ones will be returned.
1854
:return: An iterator of (revid, revision) tuples. Absent revisions (
1855
those asked for but not available) are returned as (revid, None).
1857
if revision_ids is None:
1858
revision_ids = self.all_revision_ids()
1860
for rev_id in revision_ids:
1861
if not rev_id or not isinstance(rev_id, basestring):
1862
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1863
keys = [(key,) for key in revision_ids]
1864
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1865
for record in stream:
1866
revid = record.key[0]
1867
if record.storage_kind == 'absent':
1870
text = record.get_bytes_as('fulltext')
1871
rev = self._serializer.read_revision_from_string(text)
1874
1692
@needs_read_lock
1875
1693
def get_revision_xml(self, revision_id):
1876
1694
# TODO: jam 20070210 This shouldn't be necessary since get_revision
2070
1888
yield line, revid
2072
1890
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
2074
1892
"""Helper routine for fileids_altered_by_revision_ids.
2076
1894
This performs the translation of xml lines to revision ids.
2078
1896
:param line_iterator: An iterator of lines, origin_version_id
2079
:param revision_keys: The revision ids to filter for. This should be a
1897
:param revision_ids: The revision ids to filter for. This should be a
2080
1898
set or other type which supports efficient __contains__ lookups, as
2081
the revision key from each parsed line will be looked up in the
2082
revision_keys filter.
1899
the revision id from each parsed line will be looked up in the
1900
revision_ids filter.
2083
1901
:return: a dictionary mapping altered file-ids to an iterable of
2084
1902
revision_ids. Each altered file-ids has the exact revision_ids that
2085
1903
altered it listed explicitly.
2087
1905
seen = set(self._find_text_key_references_from_xml_inventory_lines(
2088
1906
line_iterator).iterkeys())
2089
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1907
# Note that revision_ids are revision keys.
1908
parent_maps = self.revisions.get_parent_map(revision_ids)
1910
map(parents.update, parent_maps.itervalues())
1911
parents.difference_update(revision_ids)
2090
1912
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
2091
self._inventory_xml_lines_for_keys(parent_keys)))
1913
self._inventory_xml_lines_for_keys(parents)))
2092
1914
new_keys = seen - parent_seen
2094
1916
setdefault = result.setdefault
2096
1918
setdefault(key[0], set()).add(key[-1])
2099
def _find_parent_ids_of_revisions(self, revision_ids):
2100
"""Find all parent ids that are mentioned in the revision graph.
2102
:return: set of revisions that are parents of revision_ids which are
2103
not part of revision_ids themselves
2105
parent_map = self.get_parent_map(revision_ids)
2107
map(parent_ids.update, parent_map.itervalues())
2108
parent_ids.difference_update(revision_ids)
2109
parent_ids.discard(_mod_revision.NULL_REVISION)
2112
def _find_parent_keys_of_revisions(self, revision_keys):
2113
"""Similar to _find_parent_ids_of_revisions, but used with keys.
2115
:param revision_keys: An iterable of revision_keys.
2116
:return: The parents of all revision_keys that are not already in
2119
parent_map = self.revisions.get_parent_map(revision_keys)
2121
map(parent_keys.update, parent_map.itervalues())
2122
parent_keys.difference_update(revision_keys)
2123
parent_keys.discard(_mod_revision.NULL_REVISION)
2126
1921
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
2127
1922
"""Find the file ids and versions affected by revisions.
2408
2204
return self.get_revision(revision_id).inventory_sha1
2410
def get_rev_id_for_revno(self, revno, known_pair):
2411
"""Return the revision id of a revno, given a later (revno, revid)
2412
pair in the same history.
2414
:return: if found (True, revid). If the available history ran out
2415
before reaching the revno, then this returns
2416
(False, (closest_revno, closest_revid)).
2418
known_revno, known_revid = known_pair
2419
partial_history = [known_revid]
2420
distance_from_known = known_revno - revno
2421
if distance_from_known < 0:
2423
'requested revno (%d) is later than given known revno (%d)'
2424
% (revno, known_revno))
2427
self, partial_history, stop_index=distance_from_known)
2428
except errors.RevisionNotPresent, err:
2429
if err.revision_id == known_revid:
2430
# The start revision (known_revid) wasn't found.
2432
# This is a stacked repository with no fallbacks, or a there's a
2433
# left-hand ghost. Either way, even though the revision named in
2434
# the error isn't in this repo, we know it's the next step in this
2435
# left-hand history.
2436
partial_history.append(err.revision_id)
2437
if len(partial_history) <= distance_from_known:
2438
# Didn't find enough history to get a revid for the revno.
2439
earliest_revno = known_revno - len(partial_history) + 1
2440
return (False, (earliest_revno, partial_history[-1]))
2441
if len(partial_history) - 1 > distance_from_known:
2442
raise AssertionError('_iter_for_revno returned too much history')
2443
return (True, partial_history[-1])
2445
2206
def iter_reverse_revision_history(self, revision_id):
2446
2207
"""Iterate backwards through revision ids in the lefthand history
2702
2454
return record.get_bytes_as('fulltext')
2704
2456
@needs_read_lock
2705
def check(self, revision_ids=None, callback_refs=None, check_repo=True):
2457
def check(self, revision_ids=None):
2706
2458
"""Check consistency of all history of given revision_ids.
2708
2460
Different repository implementations should override _check().
2710
2462
:param revision_ids: A non-empty list of revision_ids whose ancestry
2711
2463
will be checked. Typically the last revision_id of a branch.
2712
:param callback_refs: A dict of check-refs to resolve and callback
2713
the check/_check method on the items listed as wanting the ref.
2715
:param check_repo: If False do not check the repository contents, just
2716
calculate the data callback_refs requires and call them back.
2718
return self._check(revision_ids, callback_refs=callback_refs,
2719
check_repo=check_repo)
2465
return self._check(revision_ids)
2721
def _check(self, revision_ids, callback_refs, check_repo):
2722
result = check.Check(self, check_repo=check_repo)
2723
result.check(callback_refs)
2467
def _check(self, revision_ids):
2468
result = check.Check(self)
2726
2472
def _warn_if_deprecated(self):
3648
3377
return self.source.revision_ids_to_search_result(result_set)
3380
class InterPackRepo(InterSameDataRepository):
3381
"""Optimised code paths between Pack based repositories."""
3384
def _get_repo_format_to_test(self):
3385
from bzrlib.repofmt import pack_repo
3386
return pack_repo.RepositoryFormatKnitPack6RichRoot()
3389
def is_compatible(source, target):
3390
"""Be compatible with known Pack formats.
3392
We don't test for the stores being of specific types because that
3393
could lead to confusing results, and there is no need to be
3396
InterPackRepo does not support CHK based repositories.
3398
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
3399
from bzrlib.repofmt.groupcompress_repo import RepositoryFormatCHK1
3401
are_packs = (isinstance(source._format, RepositoryFormatPack) and
3402
isinstance(target._format, RepositoryFormatPack))
3403
not_packs = (isinstance(source._format, RepositoryFormatCHK1) or
3404
isinstance(target._format, RepositoryFormatCHK1))
3405
except AttributeError:
3407
if not_packs or not are_packs:
3409
return InterRepository._same_model(source, target)
3412
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3414
"""See InterRepository.fetch()."""
3415
if (len(self.source._fallback_repositories) > 0 or
3416
len(self.target._fallback_repositories) > 0):
3417
# The pack layer is not aware of fallback repositories, so when
3418
# fetching from a stacked repository or into a stacked repository
3419
# we use the generic fetch logic which uses the VersionedFiles
3420
# attributes on repository.
3421
from bzrlib.fetch import RepoFetcher
3422
fetcher = RepoFetcher(self.target, self.source, revision_id,
3423
pb, find_ghosts, fetch_spec=fetch_spec)
3424
if fetch_spec is not None:
3425
if len(list(fetch_spec.heads)) != 1:
3426
raise AssertionError(
3427
"InterPackRepo.fetch doesn't support "
3428
"fetching multiple heads yet.")
3429
revision_id = list(fetch_spec.heads)[0]
3431
if revision_id is None:
3433
# everything to do - use pack logic
3434
# to fetch from all packs to one without
3435
# inventory parsing etc, IFF nothing to be copied is in the target.
3437
source_revision_ids = frozenset(self.source.all_revision_ids())
3438
revision_ids = source_revision_ids - \
3439
frozenset(self.target.get_parent_map(source_revision_ids))
3440
revision_keys = [(revid,) for revid in revision_ids]
3441
index = self.target._pack_collection.revision_index.combined_index
3442
present_revision_ids = set(item[1][0] for item in
3443
index.iter_entries(revision_keys))
3444
revision_ids = set(revision_ids) - present_revision_ids
3445
# implementing the TODO will involve:
3446
# - detecting when all of a pack is selected
3447
# - avoiding as much as possible pre-selection, so the
3448
# more-core routines such as create_pack_from_packs can filter in
3449
# a just-in-time fashion. (though having a HEADS list on a
3450
# repository might make this a lot easier, because we could
3451
# sensibly detect 'new revisions' without doing a full index scan.
3452
elif _mod_revision.is_null(revision_id):
3456
revision_ids = self.search_missing_revision_ids(revision_id,
3457
find_ghosts=find_ghosts).get_keys()
3458
if len(revision_ids) == 0:
3460
return self._pack(self.source, self.target, revision_ids)
3462
def _pack(self, source, target, revision_ids):
3463
from bzrlib.repofmt.pack_repo import Packer
3464
packs = source._pack_collection.all_packs()
3465
pack = Packer(self.target._pack_collection, packs, '.fetch',
3466
revision_ids).pack()
3467
if pack is not None:
3468
self.target._pack_collection._save_pack_names()
3469
copied_revs = pack.get_revision_count()
3470
# Trigger an autopack. This may duplicate effort as we've just done
3471
# a pack creation, but for now it is simpler to think about as
3472
# 'upload data, then repack if needed'.
3473
self.target._pack_collection.autopack()
3474
return (copied_revs, [])
3479
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3480
"""See InterRepository.missing_revision_ids().
3482
:param find_ghosts: Find ghosts throughout the ancestry of
3485
if not find_ghosts and revision_id is not None:
3486
return self._walk_to_common_revisions([revision_id])
3487
elif revision_id is not None:
3488
# Find ghosts: search for revisions pointing from one repository to
3489
# the other, and vice versa, anywhere in the history of revision_id.
3490
graph = self.target.get_graph(other_repository=self.source)
3491
searcher = graph._make_breadth_first_searcher([revision_id])
3495
next_revs, ghosts = searcher.next_with_ghosts()
3496
except StopIteration:
3498
if revision_id in ghosts:
3499
raise errors.NoSuchRevision(self.source, revision_id)
3500
found_ids.update(next_revs)
3501
found_ids.update(ghosts)
3502
found_ids = frozenset(found_ids)
3503
# Double query here: should be able to avoid this by changing the
3504
# graph api further.
3505
result_set = found_ids - frozenset(
3506
self.target.get_parent_map(found_ids))
3508
source_ids = self.source.all_revision_ids()
3509
# source_ids is the worst possible case we may need to pull.
3510
# now we want to filter source_ids against what we actually
3511
# have in target, but don't try to check for existence where we know
3512
# we do not have a revision as that would be pointless.
3513
target_ids = set(self.target.all_revision_ids())
3514
result_set = set(source_ids).difference(target_ids)
3515
return self.source.revision_ids_to_search_result(result_set)
3651
3518
class InterDifferingSerializer(InterRepository):
3710
3577
self._revision_id_to_root_id[parent_id] = None
3712
3579
parent_root_id = self._revision_id_to_root_id[parent_id]
3713
if root_id == parent_root_id:
3714
# With stacking we _might_ want to refer to a non-local
3715
# revision, but this code path only applies when we have the
3716
# full content available, so ghosts really are ghosts, not just
3717
# the edge of local data.
3718
parent_keys.append((parent_id,))
3720
# root_id may be in the parent anyway.
3722
tree = self.source.revision_tree(parent_id)
3723
except errors.NoSuchRevision:
3724
# ghost, can't refer to it.
3728
parent_keys.append((tree.inventory[root_id].revision,))
3729
except errors.NoSuchId:
3732
g = graph.Graph(self.source.revisions)
3733
heads = g.heads(parent_keys)
3735
for key in parent_keys:
3736
if key in heads and key not in selected_keys:
3737
selected_keys.append(key)
3738
return tuple([(root_id,)+ key for key in selected_keys])
3580
if root_id == parent_root_id or parent_root_id is None:
3581
parent_keys.append((root_id, parent_id))
3582
return tuple(parent_keys)
3740
3584
def _new_root_data_stream(self, root_keys_to_create, parent_map):
3741
3585
for root_key in root_keys_to_create:
4190
4016
self.target_repo.signatures.insert_record_stream(substream)
4192
4018
raise AssertionError('kaboom! %s' % (substream_type,))
4193
# Done inserting data, and the missing_keys calculations will try to
4194
# read back from the inserted data, so flush the writes to the new pack
4195
# (if this is pack format).
4196
if new_pack is not None:
4197
new_pack._write_data('', flush=True)
4198
4019
# Find all the new revisions (including ones from resume_tokens)
4199
missing_keys = self.target_repo.get_missing_parent_inventories(
4200
check_for_missing_texts=is_resume)
4020
missing_keys = self.target_repo.get_missing_parent_inventories()
4202
4022
for prefix, versioned_file in (
4203
4023
('texts', self.target_repo.texts),
4204
4024
('inventories', self.target_repo.inventories),
4205
4025
('revisions', self.target_repo.revisions),
4206
4026
('signatures', self.target_repo.signatures),
4207
('chk_bytes', self.target_repo.chk_bytes),
4209
if versioned_file is None:
4211
4028
missing_keys.update((prefix,) + key for key in
4212
4029
versioned_file.get_missing_compression_parent_keys())
4213
4030
except NotImplementedError:
4527
4329
yield versionedfile.FulltextContentFactory(
4528
4330
key, parent_keys, None, as_bytes)
4531
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4532
stop_revision=None):
4533
"""Extend the partial history to include a given index
4535
If a stop_index is supplied, stop when that index has been reached.
4536
If a stop_revision is supplied, stop when that revision is
4537
encountered. Otherwise, stop when the beginning of history is
4540
:param stop_index: The index which should be present. When it is
4541
present, history extension will stop.
4542
:param stop_revision: The revision id which should be present. When
4543
it is encountered, history extension will stop.
4545
start_revision = partial_history_cache[-1]
4546
iterator = repo.iter_reverse_revision_history(start_revision)
4548
#skip the last revision in the list
4551
if (stop_index is not None and
4552
len(partial_history_cache) > stop_index):
4554
if partial_history_cache[-1] == stop_revision:
4556
revision_id = iterator.next()
4557
partial_history_cache.append(revision_id)
4558
except StopIteration: