1146
1146
# The old API returned a list, should this actually be a set?
1147
1147
return parent_map.keys()
1149
def _check_inventories(self, checker):
1150
"""Check the inventories found from the revision scan.
1152
This is responsible for verifying the sha1 of inventories and
1153
creating a pending_keys set that covers data referenced by inventories.
1155
bar = ui.ui_factory.nested_progress_bar()
1157
self._do_check_inventories(checker, bar)
1161
def _do_check_inventories(self, checker, bar):
1162
"""Helper for _check_inventories."""
1164
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1165
kinds = ['chk_bytes', 'texts']
1166
count = len(checker.pending_keys)
1167
bar.update("inventories", 0, 2)
1168
current_keys = checker.pending_keys
1169
checker.pending_keys = {}
1170
# Accumulate current checks.
1171
for key in current_keys:
1172
if key[0] != 'inventories' and key[0] not in kinds:
1173
checker._report_items.append('unknown key type %r' % (key,))
1174
keys[key[0]].add(key[1:])
1175
if keys['inventories']:
1176
# NB: output order *should* be roughly sorted - topo or
1177
# inverse topo depending on repository - either way decent
1178
# to just delta against. However, pre-CHK formats didn't
1179
# try to optimise inventory layout on disk. As such the
1180
# pre-CHK code path does not use inventory deltas.
1182
for record in self.inventories.check(keys=keys['inventories']):
1183
if record.storage_kind == 'absent':
1184
checker._report_items.append(
1185
'Missing inventory {%s}' % (record.key,))
1187
last_object = self._check_record('inventories', record,
1188
checker, last_object,
1189
current_keys[('inventories',) + record.key])
1190
del keys['inventories']
1193
bar.update("texts", 1)
1194
while (checker.pending_keys or keys['chk_bytes']
1196
# Something to check.
1197
current_keys = checker.pending_keys
1198
checker.pending_keys = {}
1199
# Accumulate current checks.
1200
for key in current_keys:
1201
if key[0] not in kinds:
1202
checker._report_items.append('unknown key type %r' % (key,))
1203
keys[key[0]].add(key[1:])
1204
# Check the outermost kind only - inventories || chk_bytes || texts
1208
for record in getattr(self, kind).check(keys=keys[kind]):
1209
if record.storage_kind == 'absent':
1210
checker._report_items.append(
1211
'Missing inventory {%s}' % (record.key,))
1213
last_object = self._check_record(kind, record,
1214
checker, last_object, current_keys[(kind,) + record.key])
1218
def _check_record(self, kind, record, checker, last_object, item_data):
1219
"""Check a single text from this repository."""
1220
if kind == 'inventories':
1221
rev_id = record.key[0]
1222
inv = self.deserialise_inventory(rev_id,
1223
record.get_bytes_as('fulltext'))
1224
if last_object is not None:
1225
delta = inv._make_delta(last_object)
1226
for old_path, path, file_id, ie in delta:
1229
ie.check(checker, rev_id, inv)
1231
for path, ie in inv.iter_entries():
1232
ie.check(checker, rev_id, inv)
1233
if self._format.fast_deltas:
1235
elif kind == 'chk_bytes':
1236
# No code written to check chk_bytes for this repo format.
1237
checker._report_items.append(
1238
'unsupported key type chk_bytes for %s' % (record.key,))
1239
elif kind == 'texts':
1240
self._check_text(record, checker, item_data)
1242
checker._report_items.append(
1243
'unknown key type %s for %s' % (kind, record.key))
1245
def _check_text(self, record, checker, item_data):
1246
"""Check a single text."""
1247
# Check it is extractable.
1248
# TODO: check length.
1249
if record.storage_kind == 'chunked':
1250
chunks = record.get_bytes_as(record.storage_kind)
1251
sha1 = osutils.sha_strings(chunks)
1252
length = sum(map(len, chunks))
1254
content = record.get_bytes_as('fulltext')
1255
sha1 = osutils.sha_string(content)
1256
length = len(content)
1257
if item_data and sha1 != item_data[1]:
1258
checker._report_items.append(
1259
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1260
(record.key, sha1, item_data[1], item_data[2]))
1150
1263
def create(a_bzrdir):
1151
1264
"""Construct the current default format repository in a_bzrdir."""
1715
1828
@needs_read_lock
1716
1829
def get_revisions(self, revision_ids):
1717
"""Get many revisions at once."""
1830
"""Get many revisions at once.
1832
Repositories that need to check data on every revision read should
1833
subclass this method.
1718
1835
return self._get_revisions(revision_ids)
1720
1837
@needs_read_lock
1721
1838
def _get_revisions(self, revision_ids):
1722
1839
"""Core work logic to get many revisions without sanity checks."""
1723
for rev_id in revision_ids:
1724
if not rev_id or not isinstance(rev_id, basestring):
1725
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1841
for revid, rev in self._iter_revisions(revision_ids):
1843
raise errors.NoSuchRevision(self, revid)
1845
return [revs[revid] for revid in revision_ids]
1847
def _iter_revisions(self, revision_ids):
1848
"""Iterate over revision objects.
1850
:param revision_ids: An iterable of revisions to examine. None may be
1851
passed to request all revisions known to the repository. Note that
1852
not all repositories can find unreferenced revisions; for those
1853
repositories only referenced ones will be returned.
1854
:return: An iterator of (revid, revision) tuples. Absent revisions (
1855
those asked for but not available) are returned as (revid, None).
1857
if revision_ids is None:
1858
revision_ids = self.all_revision_ids()
1860
for rev_id in revision_ids:
1861
if not rev_id or not isinstance(rev_id, basestring):
1862
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1726
1863
keys = [(key,) for key in revision_ids]
1727
1864
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1729
1865
for record in stream:
1866
revid = record.key[0]
1730
1867
if record.storage_kind == 'absent':
1731
raise errors.NoSuchRevision(self, record.key[0])
1732
text = record.get_bytes_as('fulltext')
1733
rev = self._serializer.read_revision_from_string(text)
1734
revs[record.key[0]] = rev
1735
return [revs[revid] for revid in revision_ids]
1870
text = record.get_bytes_as('fulltext')
1871
rev = self._serializer.read_revision_from_string(text)
1737
1874
@needs_read_lock
1738
1875
def get_revision_xml(self, revision_id):
2562
2702
return record.get_bytes_as('fulltext')
2564
2704
@needs_read_lock
2565
def check(self, revision_ids=None):
2705
def check(self, revision_ids=None, callback_refs=None, check_repo=True):
2566
2706
"""Check consistency of all history of given revision_ids.
2568
2708
Different repository implementations should override _check().
2570
2710
:param revision_ids: A non-empty list of revision_ids whose ancestry
2571
2711
will be checked. Typically the last revision_id of a branch.
2712
:param callback_refs: A dict of check-refs to resolve and callback
2713
the check/_check method on the items listed as wanting the ref.
2715
:param check_repo: If False do not check the repository contents, just
2716
calculate the data callback_refs requires and call them back.
2573
return self._check(revision_ids)
2718
return self._check(revision_ids, callback_refs=callback_refs,
2719
check_repo=check_repo)
2575
def _check(self, revision_ids):
2576
result = check.Check(self)
2721
def _check(self, revision_ids, callback_refs, check_repo):
2722
result = check.Check(self, check_repo=check_repo)
2723
result.check(callback_refs)
2580
2726
def _warn_if_deprecated(self):