1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
25
revision as _mod_revision,
31
from bzrlib.knit import (
41
from bzrlib.index import (
43
GraphIndexPrefixAdapter,
46
from bzrlib.repofmt.knitrepo import (
49
from bzrlib.repofmt.pack_repo import (
54
PackRootCommitBuilder,
55
RepositoryPackCollection,
57
from bzrlib.repository import (
62
class KnitPackRepository(PackRepository):
64
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
66
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
67
_commit_builder_class, _serializer)
68
if self._format.supports_chks:
69
raise AssertionError("chk not supported")
70
index_transport = self._transport.clone('indices')
71
self._pack_collection = RepositoryPackCollection(self, self._transport,
73
self._transport.clone('upload'),
74
self._transport.clone('packs'),
75
_format.index_builder_class,
77
use_chk_index=self._format.supports_chks,
79
self.inventories = KnitVersionedFiles(
80
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
81
add_callback=self._pack_collection.inventory_index.add_callback,
82
deltas=True, parents=True, is_locked=self.is_locked),
83
data_access=self._pack_collection.inventory_index.data_access,
85
self.revisions = KnitVersionedFiles(
86
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
87
add_callback=self._pack_collection.revision_index.add_callback,
88
deltas=False, parents=True, is_locked=self.is_locked,
89
track_external_parent_refs=True),
90
data_access=self._pack_collection.revision_index.data_access,
92
self.signatures = KnitVersionedFiles(
93
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
94
add_callback=self._pack_collection.signature_index.add_callback,
95
deltas=False, parents=False, is_locked=self.is_locked),
96
data_access=self._pack_collection.signature_index.data_access,
98
self.texts = KnitVersionedFiles(
99
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
100
add_callback=self._pack_collection.text_index.add_callback,
101
deltas=True, parents=True, is_locked=self.is_locked),
102
data_access=self._pack_collection.text_index.data_access,
104
self.chk_bytes = None
105
# True when the repository object is 'write locked' (as opposed to the
106
# physical lock only taken out around changes to the pack-names list.)
107
# Another way to represent this would be a decorator around the control
108
# files object that presents logical locks as physical ones - if this
109
# gets ugly consider that alternative design. RBC 20071011
110
self._write_lock_count = 0
111
self._transaction = None
113
self._reconcile_does_inventory_gc = True
114
self._reconcile_fixes_text_parents = True
115
self._reconcile_backsup_inventory = False
117
def _get_source(self, to_format):
118
if to_format.network_name() == self._format.network_name():
119
return KnitPackStreamSource(self, to_format)
120
return PackRepository._get_source(self, to_format)
122
def _reconcile_pack(self, collection, packs, extension, revs, pb):
123
packer = KnitReconcilePacker(collection, packs, extension, revs)
124
return packer.pack(pb)
127
class RepositoryFormatKnitPack1(RepositoryFormatPack):
128
"""A no-subtrees parameterized Pack repository.
130
This format was introduced in 0.92.
133
repository_class = KnitPackRepository
134
_commit_builder_class = PackCommitBuilder
136
def _serializer(self):
137
return xml5.serializer_v5
138
# What index classes to use
139
index_builder_class = InMemoryGraphIndex
140
index_class = GraphIndex
142
def _get_matching_bzrdir(self):
143
return bzrdir.format_registry.make_bzrdir('pack-0.92')
145
def _ignore_setting_bzrdir(self, format):
148
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
150
def get_format_string(self):
151
"""See RepositoryFormat.get_format_string()."""
152
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
154
def get_format_description(self):
155
"""See RepositoryFormat.get_format_description()."""
156
return "Packs containing knits without subtree support"
159
class RepositoryFormatKnitPack3(RepositoryFormatPack):
160
"""A subtrees parameterized Pack repository.
162
This repository format uses the xml7 serializer to get:
163
- support for recording full info about the tree root
164
- support for recording tree-references
166
This format was introduced in 0.92.
169
repository_class = KnitPackRepository
170
_commit_builder_class = PackRootCommitBuilder
171
rich_root_data = True
173
supports_tree_reference = True
175
def _serializer(self):
176
return xml7.serializer_v7
177
# What index classes to use
178
index_builder_class = InMemoryGraphIndex
179
index_class = GraphIndex
181
def _get_matching_bzrdir(self):
182
return bzrdir.format_registry.make_bzrdir(
185
def _ignore_setting_bzrdir(self, format):
188
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
190
def get_format_string(self):
191
"""See RepositoryFormat.get_format_string()."""
192
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
194
def get_format_description(self):
195
"""See RepositoryFormat.get_format_description()."""
196
return "Packs containing knits with subtree support\n"
199
class RepositoryFormatKnitPack4(RepositoryFormatPack):
200
"""A rich-root, no subtrees parameterized Pack repository.
202
This repository format uses the xml6 serializer to get:
203
- support for recording full info about the tree root
205
This format was introduced in 1.0.
208
repository_class = KnitPackRepository
209
_commit_builder_class = PackRootCommitBuilder
210
rich_root_data = True
211
supports_tree_reference = False
213
def _serializer(self):
214
return xml6.serializer_v6
215
# What index classes to use
216
index_builder_class = InMemoryGraphIndex
217
index_class = GraphIndex
219
def _get_matching_bzrdir(self):
220
return bzrdir.format_registry.make_bzrdir(
223
def _ignore_setting_bzrdir(self, format):
226
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
228
def get_format_string(self):
229
"""See RepositoryFormat.get_format_string()."""
230
return ("Bazaar pack repository format 1 with rich root"
231
" (needs bzr 1.0)\n")
233
def get_format_description(self):
234
"""See RepositoryFormat.get_format_description()."""
235
return "Packs containing knits with rich root support\n"
238
class RepositoryFormatKnitPack5(RepositoryFormatPack):
239
"""Repository that supports external references to allow stacking.
243
Supports external lookups, which results in non-truncated ghosts after
244
reconcile compared to pack-0.92 formats.
247
repository_class = KnitPackRepository
248
_commit_builder_class = PackCommitBuilder
249
supports_external_lookups = True
250
# What index classes to use
251
index_builder_class = InMemoryGraphIndex
252
index_class = GraphIndex
255
def _serializer(self):
256
return xml5.serializer_v5
258
def _get_matching_bzrdir(self):
259
return bzrdir.format_registry.make_bzrdir('1.6')
261
def _ignore_setting_bzrdir(self, format):
264
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
266
def get_format_string(self):
267
"""See RepositoryFormat.get_format_string()."""
268
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
270
def get_format_description(self):
271
"""See RepositoryFormat.get_format_description()."""
272
return "Packs 5 (adds stacking support, requires bzr 1.6)"
275
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
276
"""A repository with rich roots and stacking.
278
New in release 1.6.1.
280
Supports stacking on other repositories, allowing data to be accessed
281
without being stored locally.
284
repository_class = KnitPackRepository
285
_commit_builder_class = PackRootCommitBuilder
286
rich_root_data = True
287
supports_tree_reference = False # no subtrees
288
supports_external_lookups = True
289
# What index classes to use
290
index_builder_class = InMemoryGraphIndex
291
index_class = GraphIndex
294
def _serializer(self):
295
return xml6.serializer_v6
297
def _get_matching_bzrdir(self):
298
return bzrdir.format_registry.make_bzrdir(
301
def _ignore_setting_bzrdir(self, format):
304
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
306
def get_format_string(self):
307
"""See RepositoryFormat.get_format_string()."""
308
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
310
def get_format_description(self):
311
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
314
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
315
"""A repository with rich roots and external references.
319
Supports external lookups, which results in non-truncated ghosts after
320
reconcile compared to pack-0.92 formats.
322
This format was deprecated because the serializer it uses accidentally
323
supported subtrees, when the format was not intended to. This meant that
324
someone could accidentally fetch from an incorrect repository.
327
repository_class = KnitPackRepository
328
_commit_builder_class = PackRootCommitBuilder
329
rich_root_data = True
330
supports_tree_reference = False # no subtrees
332
supports_external_lookups = True
333
# What index classes to use
334
index_builder_class = InMemoryGraphIndex
335
index_class = GraphIndex
338
def _serializer(self):
339
return xml7.serializer_v7
341
def _get_matching_bzrdir(self):
342
matching = bzrdir.format_registry.make_bzrdir(
344
matching.repository_format = self
347
def _ignore_setting_bzrdir(self, format):
350
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
352
def get_format_string(self):
353
"""See RepositoryFormat.get_format_string()."""
354
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
356
def get_format_description(self):
357
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
360
def is_deprecated(self):
364
class RepositoryFormatKnitPack6(RepositoryFormatPack):
365
"""A repository with stacking and btree indexes,
366
without rich roots or subtrees.
368
This is equivalent to pack-1.6 with B+Tree indices.
371
repository_class = KnitPackRepository
372
_commit_builder_class = PackCommitBuilder
373
supports_external_lookups = True
374
# What index classes to use
375
index_builder_class = btree_index.BTreeBuilder
376
index_class = btree_index.BTreeGraphIndex
379
def _serializer(self):
380
return xml5.serializer_v5
382
def _get_matching_bzrdir(self):
383
return bzrdir.format_registry.make_bzrdir('1.9')
385
def _ignore_setting_bzrdir(self, format):
388
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
390
def get_format_string(self):
391
"""See RepositoryFormat.get_format_string()."""
392
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
394
def get_format_description(self):
395
"""See RepositoryFormat.get_format_description()."""
396
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
399
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
400
"""A repository with rich roots, no subtrees, stacking and btree indexes.
402
1.6-rich-root with B+Tree indices.
405
repository_class = KnitPackRepository
406
_commit_builder_class = PackRootCommitBuilder
407
rich_root_data = True
408
supports_tree_reference = False # no subtrees
409
supports_external_lookups = True
410
# What index classes to use
411
index_builder_class = btree_index.BTreeBuilder
412
index_class = btree_index.BTreeGraphIndex
415
def _serializer(self):
416
return xml6.serializer_v6
418
def _get_matching_bzrdir(self):
419
return bzrdir.format_registry.make_bzrdir(
422
def _ignore_setting_bzrdir(self, format):
425
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
427
def get_format_string(self):
428
"""See RepositoryFormat.get_format_string()."""
429
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
431
def get_format_description(self):
432
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
435
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
436
"""A subtrees development repository.
438
This format should be retained in 2.3, to provide an upgrade path from this
439
to RepositoryFormat2aSubtree. It can be removed in later releases.
441
1.6.1-subtree[as it might have been] with B+Tree indices.
444
repository_class = KnitPackRepository
445
_commit_builder_class = PackRootCommitBuilder
446
rich_root_data = True
448
supports_tree_reference = True
449
supports_external_lookups = True
450
# What index classes to use
451
index_builder_class = btree_index.BTreeBuilder
452
index_class = btree_index.BTreeGraphIndex
455
def _serializer(self):
456
return xml7.serializer_v7
458
def _get_matching_bzrdir(self):
459
return bzrdir.format_registry.make_bzrdir(
460
'development5-subtree')
462
def _ignore_setting_bzrdir(self, format):
465
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
467
def get_format_string(self):
468
"""See RepositoryFormat.get_format_string()."""
469
return ("Bazaar development format 2 with subtree support "
470
"(needs bzr.dev from before 1.8)\n")
472
def get_format_description(self):
473
"""See RepositoryFormat.get_format_description()."""
474
return ("Development repository format, currently the same as "
475
"1.6.1-subtree with B+Tree indices.\n")
478
class KnitPackStreamSource(StreamSource):
479
"""A StreamSource used to transfer data between same-format KnitPack repos.
482
1) Same serialization format for all objects
483
2) Same root information
484
3) XML format inventories
485
4) Atomic inserts (so we can stream inventory texts before text
490
def __init__(self, from_repository, to_format):
491
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
492
self._text_keys = None
493
self._text_fetch_order = 'unordered'
495
def _get_filtered_inv_stream(self, revision_ids):
496
from_repo = self.from_repository
497
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
498
parent_keys = [(p,) for p in parent_ids]
499
find_text_keys = from_repo._serializer._find_text_key_references
500
parent_text_keys = set(find_text_keys(
501
from_repo._inventory_xml_lines_for_keys(parent_keys)))
502
content_text_keys = set()
503
knit = KnitVersionedFiles(None, None)
504
factory = KnitPlainFactory()
505
def find_text_keys_from_content(record):
506
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
507
raise ValueError("Unknown content storage kind for"
508
" inventory text: %s" % (record.storage_kind,))
509
# It's a knit record, it has a _raw_record field (even if it was
510
# reconstituted from a network stream).
511
raw_data = record._raw_record
512
# read the entire thing
513
revision_id = record.key[-1]
514
content, _ = knit._parse_record(revision_id, raw_data)
515
if record.storage_kind == 'knit-delta-gz':
516
line_iterator = factory.get_linedelta_content(content)
517
elif record.storage_kind == 'knit-ft-gz':
518
line_iterator = factory.get_fulltext_content(content)
519
content_text_keys.update(find_text_keys(
520
[(line, revision_id) for line in line_iterator]))
521
revision_keys = [(r,) for r in revision_ids]
522
def _filtered_inv_stream():
523
source_vf = from_repo.inventories
524
stream = source_vf.get_record_stream(revision_keys,
526
for record in stream:
527
if record.storage_kind == 'absent':
528
raise errors.NoSuchRevision(from_repo, record.key)
529
find_text_keys_from_content(record)
531
self._text_keys = content_text_keys - parent_text_keys
532
return ('inventories', _filtered_inv_stream())
534
def _get_text_stream(self):
535
# Note: We know we don't have to handle adding root keys, because both
536
# the source and target are the identical network name.
537
text_stream = self.from_repository.texts.get_record_stream(
538
self._text_keys, self._text_fetch_order, False)
539
return ('texts', text_stream)
541
def get_stream(self, search):
542
revision_ids = search.get_keys()
543
for stream_info in self._fetch_revision_texts(revision_ids):
545
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
546
yield self._get_filtered_inv_stream(revision_ids)
547
yield self._get_text_stream()
550
class KnitPacker(Packer):
551
"""Packer that works with knit packs."""
553
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
555
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
556
revision_ids=revision_ids,
557
reload_func=reload_func)
560
class KnitReconcilePacker(KnitPacker):
561
"""A packer which regenerates indices etc as it copies.
563
This is used by ``bzr reconcile`` to cause parent text pointers to be
567
def __init__(self, *args, **kwargs):
568
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
569
self._data_changed = False
571
def _process_inventory_lines(self, inv_lines):
572
"""Generate a text key reference map rather for reconciling with."""
573
repo = self._pack_collection.repo
574
refs = repo._serializer._find_text_key_references(inv_lines)
575
self._text_refs = refs
576
# during reconcile we:
577
# - convert unreferenced texts to full texts
578
# - correct texts which reference a text not copied to be full texts
579
# - copy all others as-is but with corrected parents.
580
# - so at this point we don't know enough to decide what becomes a full
582
self._text_filter = None
584
def _copy_text_texts(self):
585
"""generate what texts we should have and then copy."""
586
self.pb.update("Copying content texts", 3)
587
# we have three major tasks here:
588
# 1) generate the ideal index
589
repo = self._pack_collection.repo
590
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
592
self.new_pack.revision_index.iter_all_entries()])
593
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
594
# 2) generate a text_nodes list that contains all the deltas that can
595
# be used as-is, with corrected parents.
599
NULL_REVISION = _mod_revision.NULL_REVISION
600
text_index_map, text_nodes = self._get_text_nodes()
601
for node in text_nodes:
607
ideal_parents = tuple(ideal_index[node[1]])
609
discarded_nodes.append(node)
610
self._data_changed = True
612
if ideal_parents == (NULL_REVISION,):
614
if ideal_parents == node[3][0]:
616
ok_nodes.append(node)
617
elif ideal_parents[0:1] == node[3][0][0:1]:
618
# the left most parent is the same, or there are no parents
619
# today. Either way, we can preserve the representation as
620
# long as we change the refs to be inserted.
621
self._data_changed = True
622
ok_nodes.append((node[0], node[1], node[2],
623
(ideal_parents, node[3][1])))
624
self._data_changed = True
626
# Reinsert this text completely
627
bad_texts.append((node[1], ideal_parents))
628
self._data_changed = True
629
# we're finished with some data.
632
# 3) bulk copy the ok data
633
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
634
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
635
self.new_pack.text_index, readv_group_iter, total_items))
636
# 4) adhoc copy all the other texts.
637
# We have to topologically insert all texts otherwise we can fail to
638
# reconcile when parts of a single delta chain are preserved intact,
639
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
640
# reinserted, and if d3 has incorrect parents it will also be
641
# reinserted. If we insert d3 first, d2 is present (as it was bulk
642
# copied), so we will try to delta, but d2 is not currently able to be
643
# extracted because its basis d1 is not present. Topologically sorting
644
# addresses this. The following generates a sort for all the texts that
645
# are being inserted without having to reference the entire text key
646
# space (we only topo sort the revisions, which is smaller).
647
topo_order = tsort.topo_sort(ancestors)
648
rev_order = dict(zip(topo_order, range(len(topo_order))))
649
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
650
transaction = repo.get_transaction()
651
file_id_index = GraphIndexPrefixAdapter(
652
self.new_pack.text_index,
654
add_nodes_callback=self.new_pack.text_index.add_nodes)
655
data_access = knit._DirectPackAccess(
656
{self.new_pack.text_index:self.new_pack.access_tuple()})
657
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
658
self.new_pack.access_tuple())
659
output_texts = KnitVersionedFiles(
660
_KnitGraphIndex(self.new_pack.text_index,
661
add_callback=self.new_pack.text_index.add_nodes,
662
deltas=True, parents=True, is_locked=repo.is_locked),
663
data_access=data_access, max_delta_chain=200)
664
for key, parent_keys in bad_texts:
665
# We refer to the new pack to delta data being output.
666
# A possible improvement would be to catch errors on short reads
667
# and only flush then.
668
self.new_pack.flush()
670
for parent_key in parent_keys:
671
if parent_key[0] != key[0]:
672
# Graph parents must match the fileid
673
raise errors.BzrError('Mismatched key parent %r:%r' %
675
parents.append(parent_key[1])
676
text_lines = osutils.split_lines(repo.texts.get_record_stream(
677
[key], 'unordered', True).next().get_bytes_as('fulltext'))
678
output_texts.add_lines(key, parent_keys, text_lines,
679
random_id=True, check_content=False)
680
# 5) check that nothing inserted has a reference outside the keyspace.
681
missing_text_keys = self.new_pack.text_index._external_references()
682
if missing_text_keys:
683
raise errors.BzrCheckError('Reference to missing compression parents %r'
684
% (missing_text_keys,))
685
self._log_copied_texts()
687
def _use_pack(self, new_pack):
688
"""Override _use_pack to check for reconcile having changed content."""
689
# XXX: we might be better checking this at the copy time.
690
original_inventory_keys = set()
691
inv_index = self._pack_collection.inventory_index.combined_index
692
for entry in inv_index.iter_all_entries():
693
original_inventory_keys.add(entry[1])
694
new_inventory_keys = set()
695
for entry in new_pack.inventory_index.iter_all_entries():
696
new_inventory_keys.add(entry[1])
697
if new_inventory_keys != original_inventory_keys:
698
self._data_changed = True
699
return new_pack.data_inserted() and self._data_changed