2189
2282
self._reconcile_fixes_text_parents = True
2190
2283
self._reconcile_backsup_inventory = False
2192
def _warn_if_deprecated(self):
2285
def _warn_if_deprecated(self, branch=None):
2193
2286
# This class isn't deprecated, but one sub-format is
2194
2287
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2195
from bzrlib import repository
2196
if repository._deprecation_warning_done:
2198
repository._deprecation_warning_done = True
2199
warning("Format %s for %s is deprecated - please use"
2200
" 'bzr upgrade --1.6.1-rich-root'"
2201
% (self._format, self.bzrdir.transport.base))
2288
super(KnitPackRepository, self)._warn_if_deprecated(branch)
2203
2290
def _abort_write_group(self):
2291
self.revisions._index._key_dependencies.clear()
2204
2292
self._pack_collection._abort_write_group()
2206
def _find_inconsistent_revision_parents(self):
2207
"""Find revisions with incorrectly cached parents.
2209
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
2210
parents-in-revision).
2212
if not self.is_locked():
2213
raise errors.ObjectNotLocked(self)
2214
pb = ui.ui_factory.nested_progress_bar()
2217
revision_nodes = self._pack_collection.revision_index \
2218
.combined_index.iter_all_entries()
2219
index_positions = []
2220
# Get the cached index values for all revisions, and also the
2221
# location in each index of the revision text so we can perform
2223
for index, key, value, refs in revision_nodes:
2224
node = (index, key, value, refs)
2225
index_memo = self.revisions._index._node_to_position(node)
2226
assert index_memo[0] == index
2227
index_positions.append((index_memo, key[0],
2228
tuple(parent[0] for parent in refs[0])))
2229
pb.update("Reading revision index", 0, 0)
2230
index_positions.sort()
2232
pb.update("Checking cached revision graph", 0,
2233
len(index_positions))
2234
for offset in xrange(0, len(index_positions), 1000):
2235
pb.update("Checking cached revision graph", offset)
2236
to_query = index_positions[offset:offset + batch_size]
2239
rev_ids = [item[1] for item in to_query]
2240
revs = self.get_revisions(rev_ids)
2241
for revision, item in zip(revs, to_query):
2242
index_parents = item[2]
2243
rev_parents = tuple(revision.parent_ids)
2244
if index_parents != rev_parents:
2245
result.append((revision.revision_id, index_parents,
2294
def _get_source(self, to_format):
2295
if to_format.network_name() == self._format.network_name():
2296
return KnitPackStreamSource(self, to_format)
2297
return super(KnitPackRepository, self)._get_source(to_format)
2251
2299
def _make_parents_provider(self):
2252
2300
return graph.CachingParentsProvider(self)
2352
2425
transaction = self._transaction
2353
2426
self._transaction = None
2354
2427
transaction.finish()
2355
for repo in self._fallback_repositories:
2358
2429
self.control_files.unlock()
2431
if not self.is_locked():
2359
2432
for repo in self._fallback_repositories:
2363
class CHKInventoryRepository(KnitPackRepository):
2364
"""subclass of KnitPackRepository that uses CHK based inventories."""
2366
def _add_inventory_checked(self, revision_id, inv, parents):
2367
"""Add inv to the repository after checking the inputs.
2369
This function can be overridden to allow different inventory styles.
2371
:seealso: add_inventory, for the contract.
2374
serializer = self._format._serializer
2375
result = CHKInventory.from_inventory(self.chk_bytes, inv,
2376
maximum_size=serializer.maximum_size,
2377
search_key_name=serializer.search_key_name)
2378
inv_lines = result.to_lines()
2379
return self._inventory_add_lines(revision_id, parents,
2380
inv_lines, check_content=False)
2382
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
2383
parents, basis_inv=None, propagate_caches=False):
2384
"""Add a new inventory expressed as a delta against another revision.
2386
:param basis_revision_id: The inventory id the delta was created
2388
:param delta: The inventory delta (see Inventory.apply_delta for
2390
:param new_revision_id: The revision id that the inventory is being
2392
:param parents: The revision ids of the parents that revision_id is
2393
known to have and are in the repository already. These are supplied
2394
for repositories that depend on the inventory graph for revision
2395
graph access, as well as for those that pun ancestry with delta
2397
:param basis_inv: The basis inventory if it is already known,
2399
:param propagate_caches: If True, the caches for this inventory are
2400
copied to and updated for the result if possible.
2402
:returns: (validator, new_inv)
2403
The validator(which is a sha1 digest, though what is sha'd is
2404
repository format specific) of the serialized inventory, and the
2405
resulting inventory.
2407
if basis_revision_id == _mod_revision.NULL_REVISION:
2408
return KnitPackRepository.add_inventory_by_delta(self,
2409
basis_revision_id, delta, new_revision_id, parents)
2410
if not self.is_in_write_group():
2411
raise AssertionError("%r not in write group" % (self,))
2412
_mod_revision.check_not_reserved_id(new_revision_id)
2413
basis_tree = self.revision_tree(basis_revision_id)
2414
basis_tree.lock_read()
2416
if basis_inv is None:
2417
basis_inv = basis_tree.inventory
2418
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
2419
propagate_caches=propagate_caches)
2420
inv_lines = result.to_lines()
2421
return self._inventory_add_lines(new_revision_id, parents,
2422
inv_lines, check_content=False), result
2426
def _iter_inventories(self, revision_ids):
2427
"""Iterate over many inventory objects."""
2428
keys = [(revision_id,) for revision_id in revision_ids]
2429
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2431
for record in stream:
2432
if record.storage_kind != 'absent':
2433
texts[record.key] = record.get_bytes_as('fulltext')
2435
raise errors.NoSuchRevision(self, record.key)
2437
yield CHKInventory.deserialise(self.chk_bytes, texts[key], key)
2439
def _iter_inventory_xmls(self, revision_ids):
2440
# Without a native 'xml' inventory, this method doesn't make sense, so
2441
# make it raise to trap naughty direct users.
2442
raise NotImplementedError(self._iter_inventory_xmls)
2444
def _find_revision_outside_set(self, revision_ids):
2445
revision_set = frozenset(revision_ids)
2446
for revid in revision_ids:
2447
parent_ids = self.get_parent_map([revid]).get(revid, ())
2448
for parent in parent_ids:
2449
if parent in revision_set:
2450
# Parent is not outside the set
2452
if parent not in self.get_parent_map([parent]):
2456
return _mod_revision.NULL_REVISION
2458
def _find_file_keys_to_fetch(self, revision_ids, pb):
2459
rich_root = self.supports_rich_root()
2460
revision_outside_set = self._find_revision_outside_set(revision_ids)
2461
if revision_outside_set == _mod_revision.NULL_REVISION:
2462
uninteresting_root_keys = set()
2464
uninteresting_inv = self.get_inventory(revision_outside_set)
2465
uninteresting_root_keys = set([uninteresting_inv.id_to_entry.key()])
2466
interesting_root_keys = set()
2467
for idx, inv in enumerate(self.iter_inventories(revision_ids)):
2468
interesting_root_keys.add(inv.id_to_entry.key())
2469
revision_ids = frozenset(revision_ids)
2470
file_id_revisions = {}
2471
bytes_to_info = CHKInventory._bytes_to_utf8name_key
2472
for records, items in chk_map.iter_interesting_nodes(self.chk_bytes,
2473
interesting_root_keys, uninteresting_root_keys,
2475
# This is cheating a bit to use the last grabbed 'inv', but it
2477
for name, bytes in items:
2478
(name_utf8, file_id, revision_id) = bytes_to_info(bytes)
2479
if not rich_root and name_utf8 == '':
2481
if revision_id in revision_ids:
2482
# Would we rather build this up into file_id => revision
2485
file_id_revisions[file_id].add(revision_id)
2487
file_id_revisions[file_id] = set([revision_id])
2488
for file_id, revisions in file_id_revisions.iteritems():
2489
yield ('file', file_id, revisions)
2491
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
2492
"""Find the file ids and versions affected by revisions.
2494
:param revisions: an iterable containing revision ids.
2495
:param _inv_weave: The inventory weave from this repository or None.
2496
If None, the inventory weave will be opened automatically.
2497
:return: a dictionary mapping altered file-ids to an iterable of
2498
revision_ids. Each altered file-ids has the exact revision_ids that
2499
altered it listed explicitly.
2501
rich_roots = self.supports_rich_root()
2503
pb = ui.ui_factory.nested_progress_bar()
2505
total = len(revision_ids)
2506
for pos, inv in enumerate(self.iter_inventories(revision_ids)):
2507
pb.update("Finding text references", pos, total)
2508
for entry in inv.iter_just_entries():
2509
if entry.revision != inv.revision_id:
2511
if not rich_roots and entry.file_id == inv.root_id:
2513
alterations = result.setdefault(entry.file_id, set([]))
2514
alterations.add(entry.revision)
2519
def find_text_key_references(self):
2520
"""Find the text key references within the repository.
2522
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2523
to whether they were referred to by the inventory of the
2524
revision_id that they contain. The inventory texts from all present
2525
revision ids are assessed to generate this report.
2527
# XXX: Slow version but correct: rewrite as a series of delta
2528
# examinations/direct tree traversal. Note that that will require care
2529
# as a common node is reachable both from the inventory that added it,
2530
# and others afterwards.
2531
revision_keys = self.revisions.keys()
2533
rich_roots = self.supports_rich_root()
2534
pb = ui.ui_factory.nested_progress_bar()
2536
all_revs = self.all_revision_ids()
2537
total = len(all_revs)
2538
for pos, inv in enumerate(self.iter_inventories(all_revs)):
2539
pb.update("Finding text references", pos, total)
2540
for _, entry in inv.iter_entries():
2541
if not rich_roots and entry.file_id == inv.root_id:
2543
key = (entry.file_id, entry.revision)
2544
result.setdefault(key, False)
2545
if entry.revision == inv.revision_id:
2551
def _reconcile_pack(self, collection, packs, extension, revs, pb):
2552
packer = CHKReconcilePacker(collection, packs, extension, revs)
2553
return packer.pack(pb)
2556
class CHKReconcilePacker(ReconcilePacker):
2557
"""Subclass of ReconcilePacker for handling chk inventories."""
2559
def _process_inventory_lines(self, inv_lines):
2560
"""Generate a text key reference map rather for reconciling with."""
2561
repo = self._pack_collection.repo
2562
# XXX: This double-reads the inventories; but it works.
2563
refs = repo.find_text_key_references()
2564
self._text_refs = refs
2565
# during reconcile we:
2566
# - convert unreferenced texts to full texts
2567
# - correct texts which reference a text not copied to be full texts
2568
# - copy all others as-is but with corrected parents.
2569
# - so at this point we don't know enough to decide what becomes a full
2571
self._text_filter = None
2572
# Copy the selected inventory roots, extracting the CHK references
2574
pending_refs = set()
2575
for line, revid in inv_lines:
2576
if line.startswith('id_to_entry: '):
2577
pending_refs.add((line[13:],))
2579
pending_refs = self._copy_chks(pending_refs)
2436
class KnitPackStreamSource(StreamSource):
2437
"""A StreamSource used to transfer data between same-format KnitPack repos.
2439
This source assumes:
2440
1) Same serialization format for all objects
2441
2) Same root information
2442
3) XML format inventories
2443
4) Atomic inserts (so we can stream inventory texts before text
2448
def __init__(self, from_repository, to_format):
2449
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
2450
self._text_keys = None
2451
self._text_fetch_order = 'unordered'
2453
def _get_filtered_inv_stream(self, revision_ids):
2454
from_repo = self.from_repository
2455
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
2456
parent_keys = [(p,) for p in parent_ids]
2457
find_text_keys = from_repo._find_text_key_references_from_xml_inventory_lines
2458
parent_text_keys = set(find_text_keys(
2459
from_repo._inventory_xml_lines_for_keys(parent_keys)))
2460
content_text_keys = set()
2461
knit = KnitVersionedFiles(None, None)
2462
factory = KnitPlainFactory()
2463
def find_text_keys_from_content(record):
2464
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
2465
raise ValueError("Unknown content storage kind for"
2466
" inventory text: %s" % (record.storage_kind,))
2467
# It's a knit record, it has a _raw_record field (even if it was
2468
# reconstituted from a network stream).
2469
raw_data = record._raw_record
2470
# read the entire thing
2471
revision_id = record.key[-1]
2472
content, _ = knit._parse_record(revision_id, raw_data)
2473
if record.storage_kind == 'knit-delta-gz':
2474
line_iterator = factory.get_linedelta_content(content)
2475
elif record.storage_kind == 'knit-ft-gz':
2476
line_iterator = factory.get_fulltext_content(content)
2477
content_text_keys.update(find_text_keys(
2478
[(line, revision_id) for line in line_iterator]))
2479
revision_keys = [(r,) for r in revision_ids]
2480
def _filtered_inv_stream():
2481
source_vf = from_repo.inventories
2482
stream = source_vf.get_record_stream(revision_keys,
2484
for record in stream:
2485
if record.storage_kind == 'absent':
2486
raise errors.NoSuchRevision(from_repo, record.key)
2487
find_text_keys_from_content(record)
2489
self._text_keys = content_text_keys - parent_text_keys
2490
return ('inventories', _filtered_inv_stream())
2492
def _get_text_stream(self):
2493
# Note: We know we don't have to handle adding root keys, because both
2494
# the source and target are the identical network name.
2495
text_stream = self.from_repository.texts.get_record_stream(
2496
self._text_keys, self._text_fetch_order, False)
2497
return ('texts', text_stream)
2499
def get_stream(self, search):
2500
revision_ids = search.get_keys()
2501
for stream_info in self._fetch_revision_texts(revision_ids):
2503
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
2504
yield self._get_filtered_inv_stream(revision_ids)
2505
yield self._get_text_stream()
2582
2509
class RepositoryFormatPack(MetaDirRepositoryFormat):
3087
2933
return ("Development repository format, currently the same as "
3088
2934
"1.6.1-subtree with B+Tree indices.\n")
3091
class RepositoryFormatPackDevelopment5(RepositoryFormatPack):
3092
"""A no-subtrees development repository.
3094
This format should be retained until the second release after bzr 1.13.
3096
This is pack-1.9 with CHKMap based inventories.
3099
repository_class = CHKInventoryRepository
3100
_commit_builder_class = PackCommitBuilder
3101
_serializer = chk_serializer.chk_serializer_parent_id
3102
supports_external_lookups = True
3103
# What index classes to use
3104
index_builder_class = BTreeBuilder
3105
index_class = BTreeGraphIndex
3106
supports_chks = True
3107
_commit_inv_deltas = True
3109
def _get_matching_bzrdir(self):
3110
return bzrdir.format_registry.make_bzrdir('development5')
3112
def _ignore_setting_bzrdir(self, format):
3115
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3117
def get_format_string(self):
3118
"""See RepositoryFormat.get_format_string()."""
3119
# This will need to be updated (at least replacing 1.13 with the target
3120
# bzr release) once we merge brisbane-core into bzr.dev, I've used
3121
# 'merge-bbc-dev4-to-bzr.dev' into comments at relevant places to make
3122
# them easily greppable. -- vila 2009016
3123
return "Bazaar development format 5 (needs bzr.dev from before 1.13)\n"
3125
def get_format_description(self):
3126
"""See RepositoryFormat.get_format_description()."""
3127
return ("Development repository format, currently the same as"
3128
" 1.9 with B+Trees and chk support.\n")
3130
def check_conversion_target(self, target_format):
3134
class RepositoryFormatPackDevelopment5Subtree(RepositoryFormatPack):
3135
# merge-bbc-dev4-to-bzr.dev
3136
"""A subtrees development repository.
3138
This format should be retained until the second release after bzr 1.13.
3140
1.9-subtree[as it might have been] with CHKMap based inventories.
3143
repository_class = CHKInventoryRepository
3144
_commit_builder_class = PackRootCommitBuilder
3145
rich_root_data = True
3146
supports_tree_reference = True
3147
_serializer = chk_serializer.chk_serializer_subtree_parent_id
3148
supports_external_lookups = True
3149
# What index classes to use
3150
index_builder_class = BTreeBuilder
3151
index_class = BTreeGraphIndex
3152
supports_chks = True
3153
_commit_inv_deltas = True
3155
def _get_matching_bzrdir(self):
3156
return bzrdir.format_registry.make_bzrdir(
3157
'development5-subtree')
3159
def _ignore_setting_bzrdir(self, format):
3162
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3164
def check_conversion_target(self, target_format):
3165
if not target_format.rich_root_data:
3166
raise errors.BadConversionTarget(
3167
'Does not support rich root data.', target_format)
3168
if not getattr(target_format, 'supports_tree_reference', False):
3169
raise errors.BadConversionTarget(
3170
'Does not support nested trees', target_format)
3172
def get_format_string(self):
3173
"""See RepositoryFormat.get_format_string()."""
3174
# merge-bbc-dev4-to-bzr.dev
3175
return ("Bazaar development format 5 with subtree support"
3176
" (needs bzr.dev from before 1.13)\n")
3178
def get_format_description(self):
3179
"""See RepositoryFormat.get_format_description()."""
3180
return ("Development repository format, currently the same as"
3181
" 1.9-subtree with B+Tree and chk support.\n")
3184
class RepositoryFormatPackDevelopment5Hash16(RepositoryFormatPack):
3185
"""A no-subtrees development repository.
3187
This format should be retained until the second release after bzr 1.13.
3189
This is pack-1.9 with CHKMap based inventories with 16-way hash tries.
3192
repository_class = CHKInventoryRepository
3193
_commit_builder_class = PackCommitBuilder
3194
_serializer = chk_serializer.chk_serializer_16_parent_id
3195
supports_external_lookups = True
3196
# What index classes to use
3197
index_builder_class = BTreeBuilder
3198
index_class = BTreeGraphIndex
3199
supports_chks = True
3200
_commit_inv_deltas = True
3202
def _get_matching_bzrdir(self):
3203
return bzrdir.format_registry.make_bzrdir('development5-hash16')
3205
def _ignore_setting_bzrdir(self, format):
3208
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3210
def get_format_string(self):
3211
"""See RepositoryFormat.get_format_string()."""
3212
return ("Bazaar development format 5 hash 16"
3213
" (needs bzr.dev from before 1.13)\n")
3215
def get_format_description(self):
3216
"""See RepositoryFormat.get_format_description()."""
3217
return ("Development repository format, currently the same as"
3218
" 1.9 with B+Trees and chk support and 16-way hash tries\n")
3220
def check_conversion_target(self, target_format):
3224
class RepositoryFormatPackDevelopment5Hash255(RepositoryFormatPack):
3225
"""A no-subtrees development repository.
3227
This format should be retained until the second release after bzr 1.13.
3229
This is pack-1.9 with CHKMap based inventories with 255-way hash tries.
3232
repository_class = CHKInventoryRepository
3233
_commit_builder_class = PackCommitBuilder
3234
_serializer = chk_serializer.chk_serializer_255_parent_id
3235
supports_external_lookups = True
3236
# What index classes to use
3237
index_builder_class = BTreeBuilder
3238
index_class = BTreeGraphIndex
3239
supports_chks = True
3240
_commit_inv_deltas = True
3242
def _get_matching_bzrdir(self):
3243
return bzrdir.format_registry.make_bzrdir('development5-hash255')
3245
def _ignore_setting_bzrdir(self, format):
3248
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3250
def get_format_string(self):
3251
"""See RepositoryFormat.get_format_string()."""
3252
return ("Bazaar development format 5 hash 255"
3253
" (needs bzr.dev from before 1.13)\n")
3255
def get_format_description(self):
3256
"""See RepositoryFormat.get_format_description()."""
3257
return ("Development repository format, currently the same as"
3258
" 1.9 with B+Trees and chk support and 255-way hash tries\n")
3260
def check_conversion_target(self, target_format):