1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
21
from itertools import izip
31
revision as _mod_revision,
39
from bzrlib.knit import (
49
from bzrlib.index import (
52
GraphIndexPrefixAdapter,
55
from bzrlib.repofmt.knitrepo import (
58
from bzrlib.repofmt.pack_repo import (
66
PackRootCommitBuilder,
67
RepositoryPackCollection,
69
from bzrlib.vf_repository import (
74
class KnitPackRepository(PackRepository, KnitRepository):
76
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
78
PackRepository.__init__(self, _format, a_bzrdir, control_files,
79
_commit_builder_class, _serializer)
80
if self._format.supports_chks:
81
raise AssertionError("chk not supported")
82
index_transport = self._transport.clone('indices')
83
self._pack_collection = KnitRepositoryPackCollection(self,
86
self._transport.clone('upload'),
87
self._transport.clone('packs'),
88
_format.index_builder_class,
92
self.inventories = KnitVersionedFiles(
93
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
94
add_callback=self._pack_collection.inventory_index.add_callback,
95
deltas=True, parents=True, is_locked=self.is_locked),
96
data_access=self._pack_collection.inventory_index.data_access,
98
self.revisions = KnitVersionedFiles(
99
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
100
add_callback=self._pack_collection.revision_index.add_callback,
101
deltas=False, parents=True, is_locked=self.is_locked,
102
track_external_parent_refs=True),
103
data_access=self._pack_collection.revision_index.data_access,
105
self.signatures = KnitVersionedFiles(
106
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
107
add_callback=self._pack_collection.signature_index.add_callback,
108
deltas=False, parents=False, is_locked=self.is_locked),
109
data_access=self._pack_collection.signature_index.data_access,
111
self.texts = KnitVersionedFiles(
112
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
113
add_callback=self._pack_collection.text_index.add_callback,
114
deltas=True, parents=True, is_locked=self.is_locked),
115
data_access=self._pack_collection.text_index.data_access,
117
self.chk_bytes = None
118
# True when the repository object is 'write locked' (as opposed to the
119
# physical lock only taken out around changes to the pack-names list.)
120
# Another way to represent this would be a decorator around the control
121
# files object that presents logical locks as physical ones - if this
122
# gets ugly consider that alternative design. RBC 20071011
123
self._write_lock_count = 0
124
self._transaction = None
126
self._reconcile_does_inventory_gc = True
127
self._reconcile_fixes_text_parents = True
128
self._reconcile_backsup_inventory = False
130
def _get_source(self, to_format):
131
if to_format.network_name() == self._format.network_name():
132
return KnitPackStreamSource(self, to_format)
133
return PackRepository._get_source(self, to_format)
135
def _reconcile_pack(self, collection, packs, extension, revs, pb):
136
packer = KnitReconcilePacker(collection, packs, extension, revs)
137
return packer.pack(pb)
140
class RepositoryFormatKnitPack1(RepositoryFormatPack):
141
"""A no-subtrees parameterized Pack repository.
143
This format was introduced in 0.92.
146
repository_class = KnitPackRepository
147
_commit_builder_class = PackCommitBuilder
149
def _serializer(self):
150
return xml5.serializer_v5
151
# What index classes to use
152
index_builder_class = InMemoryGraphIndex
153
index_class = GraphIndex
155
def _get_matching_bzrdir(self):
156
return bzrdir.format_registry.make_bzrdir('pack-0.92')
158
def _ignore_setting_bzrdir(self, format):
161
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
163
def get_format_string(self):
164
"""See RepositoryFormat.get_format_string()."""
165
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
167
def get_format_description(self):
168
"""See RepositoryFormat.get_format_description()."""
169
return "Packs containing knits without subtree support"
172
class RepositoryFormatKnitPack3(RepositoryFormatPack):
173
"""A subtrees parameterized Pack repository.
175
This repository format uses the xml7 serializer to get:
176
- support for recording full info about the tree root
177
- support for recording tree-references
179
This format was introduced in 0.92.
182
repository_class = KnitPackRepository
183
_commit_builder_class = PackRootCommitBuilder
184
rich_root_data = True
186
supports_tree_reference = True
188
def _serializer(self):
189
return xml7.serializer_v7
190
# What index classes to use
191
index_builder_class = InMemoryGraphIndex
192
index_class = GraphIndex
194
def _get_matching_bzrdir(self):
195
return bzrdir.format_registry.make_bzrdir(
198
def _ignore_setting_bzrdir(self, format):
201
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
203
def get_format_string(self):
204
"""See RepositoryFormat.get_format_string()."""
205
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
207
def get_format_description(self):
208
"""See RepositoryFormat.get_format_description()."""
209
return "Packs containing knits with subtree support\n"
212
class RepositoryFormatKnitPack4(RepositoryFormatPack):
213
"""A rich-root, no subtrees parameterized Pack repository.
215
This repository format uses the xml6 serializer to get:
216
- support for recording full info about the tree root
218
This format was introduced in 1.0.
221
repository_class = KnitPackRepository
222
_commit_builder_class = PackRootCommitBuilder
223
rich_root_data = True
224
supports_tree_reference = False
226
def _serializer(self):
227
return xml6.serializer_v6
228
# What index classes to use
229
index_builder_class = InMemoryGraphIndex
230
index_class = GraphIndex
232
def _get_matching_bzrdir(self):
233
return bzrdir.format_registry.make_bzrdir(
236
def _ignore_setting_bzrdir(self, format):
239
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
241
def get_format_string(self):
242
"""See RepositoryFormat.get_format_string()."""
243
return ("Bazaar pack repository format 1 with rich root"
244
" (needs bzr 1.0)\n")
246
def get_format_description(self):
247
"""See RepositoryFormat.get_format_description()."""
248
return "Packs containing knits with rich root support\n"
251
class RepositoryFormatKnitPack5(RepositoryFormatPack):
252
"""Repository that supports external references to allow stacking.
256
Supports external lookups, which results in non-truncated ghosts after
257
reconcile compared to pack-0.92 formats.
260
repository_class = KnitPackRepository
261
_commit_builder_class = PackCommitBuilder
262
supports_external_lookups = True
263
# What index classes to use
264
index_builder_class = InMemoryGraphIndex
265
index_class = GraphIndex
268
def _serializer(self):
269
return xml5.serializer_v5
271
def _get_matching_bzrdir(self):
272
return bzrdir.format_registry.make_bzrdir('1.6')
274
def _ignore_setting_bzrdir(self, format):
277
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
279
def get_format_string(self):
280
"""See RepositoryFormat.get_format_string()."""
281
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
283
def get_format_description(self):
284
"""See RepositoryFormat.get_format_description()."""
285
return "Packs 5 (adds stacking support, requires bzr 1.6)"
288
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
289
"""A repository with rich roots and stacking.
291
New in release 1.6.1.
293
Supports stacking on other repositories, allowing data to be accessed
294
without being stored locally.
297
repository_class = KnitPackRepository
298
_commit_builder_class = PackRootCommitBuilder
299
rich_root_data = True
300
supports_tree_reference = False # no subtrees
301
supports_external_lookups = True
302
# What index classes to use
303
index_builder_class = InMemoryGraphIndex
304
index_class = GraphIndex
307
def _serializer(self):
308
return xml6.serializer_v6
310
def _get_matching_bzrdir(self):
311
return bzrdir.format_registry.make_bzrdir(
314
def _ignore_setting_bzrdir(self, format):
317
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
319
def get_format_string(self):
320
"""See RepositoryFormat.get_format_string()."""
321
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
323
def get_format_description(self):
324
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
327
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
328
"""A repository with rich roots and external references.
332
Supports external lookups, which results in non-truncated ghosts after
333
reconcile compared to pack-0.92 formats.
335
This format was deprecated because the serializer it uses accidentally
336
supported subtrees, when the format was not intended to. This meant that
337
someone could accidentally fetch from an incorrect repository.
340
repository_class = KnitPackRepository
341
_commit_builder_class = PackRootCommitBuilder
342
rich_root_data = True
343
supports_tree_reference = False # no subtrees
345
supports_external_lookups = True
346
# What index classes to use
347
index_builder_class = InMemoryGraphIndex
348
index_class = GraphIndex
351
def _serializer(self):
352
return xml7.serializer_v7
354
def _get_matching_bzrdir(self):
355
matching = bzrdir.format_registry.make_bzrdir(
357
matching.repository_format = self
360
def _ignore_setting_bzrdir(self, format):
363
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
365
def get_format_string(self):
366
"""See RepositoryFormat.get_format_string()."""
367
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
369
def get_format_description(self):
370
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
373
def is_deprecated(self):
377
class RepositoryFormatKnitPack6(RepositoryFormatPack):
378
"""A repository with stacking and btree indexes,
379
without rich roots or subtrees.
381
This is equivalent to pack-1.6 with B+Tree indices.
384
repository_class = KnitPackRepository
385
_commit_builder_class = PackCommitBuilder
386
supports_external_lookups = True
387
# What index classes to use
388
index_builder_class = btree_index.BTreeBuilder
389
index_class = btree_index.BTreeGraphIndex
392
def _serializer(self):
393
return xml5.serializer_v5
395
def _get_matching_bzrdir(self):
396
return bzrdir.format_registry.make_bzrdir('1.9')
398
def _ignore_setting_bzrdir(self, format):
401
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
403
def get_format_string(self):
404
"""See RepositoryFormat.get_format_string()."""
405
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
407
def get_format_description(self):
408
"""See RepositoryFormat.get_format_description()."""
409
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
412
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
413
"""A repository with rich roots, no subtrees, stacking and btree indexes.
415
1.6-rich-root with B+Tree indices.
418
repository_class = KnitPackRepository
419
_commit_builder_class = PackRootCommitBuilder
420
rich_root_data = True
421
supports_tree_reference = False # no subtrees
422
supports_external_lookups = True
423
# What index classes to use
424
index_builder_class = btree_index.BTreeBuilder
425
index_class = btree_index.BTreeGraphIndex
428
def _serializer(self):
429
return xml6.serializer_v6
431
def _get_matching_bzrdir(self):
432
return bzrdir.format_registry.make_bzrdir(
435
def _ignore_setting_bzrdir(self, format):
438
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
440
def get_format_string(self):
441
"""See RepositoryFormat.get_format_string()."""
442
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
444
def get_format_description(self):
445
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
448
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
449
"""A subtrees development repository.
451
This format should be retained in 2.3, to provide an upgrade path from this
452
to RepositoryFormat2aSubtree. It can be removed in later releases.
454
1.6.1-subtree[as it might have been] with B+Tree indices.
457
repository_class = KnitPackRepository
458
_commit_builder_class = PackRootCommitBuilder
459
rich_root_data = True
461
supports_tree_reference = True
462
supports_external_lookups = True
463
# What index classes to use
464
index_builder_class = btree_index.BTreeBuilder
465
index_class = btree_index.BTreeGraphIndex
468
def _serializer(self):
469
return xml7.serializer_v7
471
def _get_matching_bzrdir(self):
472
return bzrdir.format_registry.make_bzrdir(
473
'development5-subtree')
475
def _ignore_setting_bzrdir(self, format):
478
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
480
def get_format_string(self):
481
"""See RepositoryFormat.get_format_string()."""
482
return ("Bazaar development format 2 with subtree support "
483
"(needs bzr.dev from before 1.8)\n")
485
def get_format_description(self):
486
"""See RepositoryFormat.get_format_description()."""
487
return ("Development repository format, currently the same as "
488
"1.6.1-subtree with B+Tree indices.\n")
491
class KnitPackStreamSource(StreamSource):
492
"""A StreamSource used to transfer data between same-format KnitPack repos.
495
1) Same serialization format for all objects
496
2) Same root information
497
3) XML format inventories
498
4) Atomic inserts (so we can stream inventory texts before text
503
def __init__(self, from_repository, to_format):
504
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
505
self._text_keys = None
506
self._text_fetch_order = 'unordered'
508
def _get_filtered_inv_stream(self, revision_ids):
509
from_repo = self.from_repository
510
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
511
parent_keys = [(p,) for p in parent_ids]
512
find_text_keys = from_repo._serializer._find_text_key_references
513
parent_text_keys = set(find_text_keys(
514
from_repo._inventory_xml_lines_for_keys(parent_keys)))
515
content_text_keys = set()
516
knit = KnitVersionedFiles(None, None)
517
factory = KnitPlainFactory()
518
def find_text_keys_from_content(record):
519
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
520
raise ValueError("Unknown content storage kind for"
521
" inventory text: %s" % (record.storage_kind,))
522
# It's a knit record, it has a _raw_record field (even if it was
523
# reconstituted from a network stream).
524
raw_data = record._raw_record
525
# read the entire thing
526
revision_id = record.key[-1]
527
content, _ = knit._parse_record(revision_id, raw_data)
528
if record.storage_kind == 'knit-delta-gz':
529
line_iterator = factory.get_linedelta_content(content)
530
elif record.storage_kind == 'knit-ft-gz':
531
line_iterator = factory.get_fulltext_content(content)
532
content_text_keys.update(find_text_keys(
533
[(line, revision_id) for line in line_iterator]))
534
revision_keys = [(r,) for r in revision_ids]
535
def _filtered_inv_stream():
536
source_vf = from_repo.inventories
537
stream = source_vf.get_record_stream(revision_keys,
539
for record in stream:
540
if record.storage_kind == 'absent':
541
raise errors.NoSuchRevision(from_repo, record.key)
542
find_text_keys_from_content(record)
544
self._text_keys = content_text_keys - parent_text_keys
545
return ('inventories', _filtered_inv_stream())
547
def _get_text_stream(self):
548
# Note: We know we don't have to handle adding root keys, because both
549
# the source and target are the identical network name.
550
text_stream = self.from_repository.texts.get_record_stream(
551
self._text_keys, self._text_fetch_order, False)
552
return ('texts', text_stream)
554
def get_stream(self, search):
555
revision_ids = search.get_keys()
556
for stream_info in self._fetch_revision_texts(revision_ids):
558
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
559
yield self._get_filtered_inv_stream(revision_ids)
560
yield self._get_text_stream()
563
class KnitPacker(Packer):
564
"""Packer that works with knit packs."""
566
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
568
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
569
revision_ids=revision_ids,
570
reload_func=reload_func)
572
def _pack_map_and_index_list(self, index_attribute):
573
"""Convert a list of packs to an index pack map and index list.
575
:param index_attribute: The attribute that the desired index is found
577
:return: A tuple (map, list) where map contains the dict from
578
index:pack_tuple, and list contains the indices in the preferred
583
for pack_obj in self.packs:
584
index = getattr(pack_obj, index_attribute)
585
indices.append(index)
586
pack_map[index] = pack_obj
587
return pack_map, indices
589
def _index_contents(self, indices, key_filter=None):
590
"""Get an iterable of the index contents from a pack_map.
592
:param indices: The list of indices to query
593
:param key_filter: An optional filter to limit the keys returned.
595
all_index = CombinedGraphIndex(indices)
596
if key_filter is None:
597
return all_index.iter_all_entries()
599
return all_index.iter_entries(key_filter)
601
def _copy_nodes(self, nodes, index_map, writer, write_index,
603
"""Copy knit nodes between packs with no graph references.
605
:param output_lines: Output full texts of copied items.
607
pb = ui.ui_factory.nested_progress_bar()
609
return self._do_copy_nodes(nodes, index_map, writer,
610
write_index, pb, output_lines=output_lines)
614
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
616
# for record verification
617
knit = KnitVersionedFiles(None, None)
618
# plan a readv on each source pack:
620
nodes = sorted(nodes)
621
# how to map this into knit.py - or knit.py into this?
622
# we don't want the typical knit logic, we want grouping by pack
623
# at this point - perhaps a helper library for the following code
624
# duplication points?
626
for index, key, value in nodes:
627
if index not in request_groups:
628
request_groups[index] = []
629
request_groups[index].append((key, value))
631
pb.update("Copied record", record_index, len(nodes))
632
for index, items in request_groups.iteritems():
633
pack_readv_requests = []
634
for key, value in items:
635
# ---- KnitGraphIndex.get_position
636
bits = value[1:].split(' ')
637
offset, length = int(bits[0]), int(bits[1])
638
pack_readv_requests.append((offset, length, (key, value[0])))
639
# linear scan up the pack
640
pack_readv_requests.sort()
642
pack_obj = index_map[index]
643
transport, path = pack_obj.access_tuple()
645
reader = pack.make_readv_reader(transport, path,
646
[offset[0:2] for offset in pack_readv_requests])
647
except errors.NoSuchFile:
648
if self._reload_func is not None:
651
for (names, read_func), (_1, _2, (key, eol_flag)) in \
652
izip(reader.iter_records(), pack_readv_requests):
653
raw_data = read_func(None)
654
# check the header only
655
if output_lines is not None:
656
output_lines(knit._parse_record(key[-1], raw_data)[0])
658
df, _ = knit._parse_record_header(key, raw_data)
660
pos, size = writer.add_bytes_record(raw_data, names)
661
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
662
pb.update("Copied record", record_index)
665
def _copy_nodes_graph(self, index_map, writer, write_index,
666
readv_group_iter, total_items, output_lines=False):
667
"""Copy knit nodes between packs.
669
:param output_lines: Return lines present in the copied data as
670
an iterator of line,version_id.
672
pb = ui.ui_factory.nested_progress_bar()
674
for result in self._do_copy_nodes_graph(index_map, writer,
675
write_index, output_lines, pb, readv_group_iter, total_items):
678
# Python 2.4 does not permit try:finally: in a generator.
684
def _do_copy_nodes_graph(self, index_map, writer, write_index,
685
output_lines, pb, readv_group_iter, total_items):
686
# for record verification
687
knit = KnitVersionedFiles(None, None)
688
# for line extraction when requested (inventories only)
690
factory = KnitPlainFactory()
692
pb.update("Copied record", record_index, total_items)
693
for index, readv_vector, node_vector in readv_group_iter:
695
pack_obj = index_map[index]
696
transport, path = pack_obj.access_tuple()
698
reader = pack.make_readv_reader(transport, path, readv_vector)
699
except errors.NoSuchFile:
700
if self._reload_func is not None:
703
for (names, read_func), (key, eol_flag, references) in \
704
izip(reader.iter_records(), node_vector):
705
raw_data = read_func(None)
707
# read the entire thing
708
content, _ = knit._parse_record(key[-1], raw_data)
709
if len(references[-1]) == 0:
710
line_iterator = factory.get_fulltext_content(content)
712
line_iterator = factory.get_linedelta_content(content)
713
for line in line_iterator:
716
# check the header only
717
df, _ = knit._parse_record_header(key, raw_data)
719
pos, size = writer.add_bytes_record(raw_data, names)
720
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
721
pb.update("Copied record", record_index)
724
def _process_inventory_lines(self, inv_lines):
725
"""Use up the inv_lines generator and setup a text key filter."""
726
repo = self._pack_collection.repo
727
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
728
inv_lines, self.revision_keys)
730
for fileid, file_revids in fileid_revisions.iteritems():
731
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
732
self._text_filter = text_filter
734
def _copy_inventory_texts(self):
735
# select inventory keys
736
inv_keys = self._revision_keys # currently the same keyspace, and note that
737
# querying for keys here could introduce a bug where an inventory item
738
# is missed, so do not change it to query separately without cross
739
# checking like the text key check below.
740
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
742
inv_nodes = self._index_contents(inventory_indices, inv_keys)
743
# copy inventory keys and adjust values
744
# XXX: Should be a helper function to allow different inv representation
746
self.pb.update("Copying inventory texts", 2)
747
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
748
# Only grab the output lines if we will be processing them
749
output_lines = bool(self.revision_ids)
750
inv_lines = self._copy_nodes_graph(inventory_index_map,
751
self.new_pack._writer, self.new_pack.inventory_index,
752
readv_group_iter, total_items, output_lines=output_lines)
753
if self.revision_ids:
754
self._process_inventory_lines(inv_lines)
756
# eat the iterator to cause it to execute.
758
self._text_filter = None
759
if 'pack' in debug.debug_flags:
760
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
761
time.ctime(), self._pack_collection._upload_transport.base,
762
self.new_pack.random_name,
763
self.new_pack.inventory_index.key_count(),
764
time.time() - self.new_pack.start_time)
766
def _update_pack_order(self, entries, index_to_pack_map):
767
"""Determine how we want our packs to be ordered.
769
This changes the sort order of the self.packs list so that packs unused
770
by 'entries' will be at the end of the list, so that future requests
771
can avoid probing them. Used packs will be at the front of the
772
self.packs list, in the order of their first use in 'entries'.
774
:param entries: A list of (index, ...) tuples
775
:param index_to_pack_map: A mapping from index objects to pack objects.
779
for entry in entries:
781
if index not in seen_indexes:
782
packs.append(index_to_pack_map[index])
783
seen_indexes.add(index)
784
if len(packs) == len(self.packs):
785
if 'pack' in debug.debug_flags:
786
trace.mutter('Not changing pack list, all packs used.')
788
seen_packs = set(packs)
789
for pack in self.packs:
790
if pack not in seen_packs:
793
if 'pack' in debug.debug_flags:
794
old_names = [p.access_tuple()[1] for p in self.packs]
795
new_names = [p.access_tuple()[1] for p in packs]
796
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
797
old_names, new_names)
800
def _copy_revision_texts(self):
802
if self.revision_ids:
803
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
806
# select revision keys
807
revision_index_map, revision_indices = self._pack_map_and_index_list(
809
revision_nodes = self._index_contents(revision_indices, revision_keys)
810
revision_nodes = list(revision_nodes)
811
self._update_pack_order(revision_nodes, revision_index_map)
812
# copy revision keys and adjust values
813
self.pb.update("Copying revision texts", 1)
814
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
815
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
816
self.new_pack.revision_index, readv_group_iter, total_items))
817
if 'pack' in debug.debug_flags:
818
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
819
time.ctime(), self._pack_collection._upload_transport.base,
820
self.new_pack.random_name,
821
self.new_pack.revision_index.key_count(),
822
time.time() - self.new_pack.start_time)
823
self._revision_keys = revision_keys
825
def _get_text_nodes(self):
826
text_index_map, text_indices = self._pack_map_and_index_list(
828
return text_index_map, self._index_contents(text_indices,
831
def _copy_text_texts(self):
833
text_index_map, text_nodes = self._get_text_nodes()
834
if self._text_filter is not None:
835
# We could return the keys copied as part of the return value from
836
# _copy_nodes_graph but this doesn't work all that well with the
837
# need to get line output too, so we check separately, and as we're
838
# going to buffer everything anyway, we check beforehand, which
839
# saves reading knit data over the wire when we know there are
841
text_nodes = set(text_nodes)
842
present_text_keys = set(_node[1] for _node in text_nodes)
843
missing_text_keys = set(self._text_filter) - present_text_keys
844
if missing_text_keys:
845
# TODO: raise a specific error that can handle many missing
847
trace.mutter("missing keys during fetch: %r", missing_text_keys)
848
a_missing_key = missing_text_keys.pop()
849
raise errors.RevisionNotPresent(a_missing_key[1],
851
# copy text keys and adjust values
852
self.pb.update("Copying content texts", 3)
853
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
854
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
855
self.new_pack.text_index, readv_group_iter, total_items))
856
self._log_copied_texts()
858
def _create_pack_from_packs(self):
859
self.pb.update("Opening pack", 0, 5)
860
self.new_pack = self.open_pack()
861
new_pack = self.new_pack
862
# buffer data - we won't be reading-back during the pack creation and
863
# this makes a significant difference on sftp pushes.
864
new_pack.set_write_cache_size(1024*1024)
865
if 'pack' in debug.debug_flags:
866
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
867
for a_pack in self.packs]
868
if self.revision_ids is not None:
869
rev_count = len(self.revision_ids)
872
trace.mutter('%s: create_pack: creating pack from source packs: '
873
'%s%s %s revisions wanted %s t=0',
874
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
875
plain_pack_list, rev_count)
876
self._copy_revision_texts()
877
self._copy_inventory_texts()
878
self._copy_text_texts()
879
# select signature keys
880
signature_filter = self._revision_keys # same keyspace
881
signature_index_map, signature_indices = self._pack_map_and_index_list(
883
signature_nodes = self._index_contents(signature_indices,
885
# copy signature keys and adjust values
886
self.pb.update("Copying signature texts", 4)
887
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
888
new_pack.signature_index)
889
if 'pack' in debug.debug_flags:
890
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
891
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
892
new_pack.signature_index.key_count(),
893
time.time() - new_pack.start_time)
894
new_pack._check_references()
895
if not self._use_pack(new_pack):
898
self.pb.update("Finishing pack", 5)
900
self._pack_collection.allocate(new_pack)
903
def _least_readv_node_readv(self, nodes):
904
"""Generate request groups for nodes using the least readv's.
906
:param nodes: An iterable of graph index nodes.
907
:return: Total node count and an iterator of the data needed to perform
908
readvs to obtain the data for nodes. Each item yielded by the
909
iterator is a tuple with:
910
index, readv_vector, node_vector. readv_vector is a list ready to
911
hand to the transport readv method, and node_vector is a list of
912
(key, eol_flag, references) for the node retrieved by the
913
matching readv_vector.
915
# group by pack so we do one readv per pack
916
nodes = sorted(nodes)
919
for index, key, value, references in nodes:
920
if index not in request_groups:
921
request_groups[index] = []
922
request_groups[index].append((key, value, references))
924
for index, items in request_groups.iteritems():
925
pack_readv_requests = []
926
for key, value, references in items:
927
# ---- KnitGraphIndex.get_position
928
bits = value[1:].split(' ')
929
offset, length = int(bits[0]), int(bits[1])
930
pack_readv_requests.append(
931
((offset, length), (key, value[0], references)))
932
# linear scan up the pack to maximum range combining.
933
pack_readv_requests.sort()
934
# split out the readv and the node data.
935
pack_readv = [readv for readv, node in pack_readv_requests]
936
node_vector = [node for readv, node in pack_readv_requests]
937
result.append((index, pack_readv, node_vector))
940
def _revision_node_readv(self, revision_nodes):
941
"""Return the total revisions and the readv's to issue.
943
:param revision_nodes: The revision index contents for the packs being
944
incorporated into the new pack.
945
:return: As per _least_readv_node_readv.
947
return self._least_readv_node_readv(revision_nodes)
950
class KnitReconcilePacker(KnitPacker):
951
"""A packer which regenerates indices etc as it copies.
953
This is used by ``bzr reconcile`` to cause parent text pointers to be
957
def __init__(self, *args, **kwargs):
958
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
959
self._data_changed = False
961
def _process_inventory_lines(self, inv_lines):
962
"""Generate a text key reference map rather for reconciling with."""
963
repo = self._pack_collection.repo
964
refs = repo._serializer._find_text_key_references(inv_lines)
965
self._text_refs = refs
966
# during reconcile we:
967
# - convert unreferenced texts to full texts
968
# - correct texts which reference a text not copied to be full texts
969
# - copy all others as-is but with corrected parents.
970
# - so at this point we don't know enough to decide what becomes a full
972
self._text_filter = None
974
def _copy_text_texts(self):
975
"""generate what texts we should have and then copy."""
976
self.pb.update("Copying content texts", 3)
977
# we have three major tasks here:
978
# 1) generate the ideal index
979
repo = self._pack_collection.repo
980
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
982
self.new_pack.revision_index.iter_all_entries()])
983
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
984
# 2) generate a text_nodes list that contains all the deltas that can
985
# be used as-is, with corrected parents.
989
NULL_REVISION = _mod_revision.NULL_REVISION
990
text_index_map, text_nodes = self._get_text_nodes()
991
for node in text_nodes:
997
ideal_parents = tuple(ideal_index[node[1]])
999
discarded_nodes.append(node)
1000
self._data_changed = True
1002
if ideal_parents == (NULL_REVISION,):
1004
if ideal_parents == node[3][0]:
1006
ok_nodes.append(node)
1007
elif ideal_parents[0:1] == node[3][0][0:1]:
1008
# the left most parent is the same, or there are no parents
1009
# today. Either way, we can preserve the representation as
1010
# long as we change the refs to be inserted.
1011
self._data_changed = True
1012
ok_nodes.append((node[0], node[1], node[2],
1013
(ideal_parents, node[3][1])))
1014
self._data_changed = True
1016
# Reinsert this text completely
1017
bad_texts.append((node[1], ideal_parents))
1018
self._data_changed = True
1019
# we're finished with some data.
1022
# 3) bulk copy the ok data
1023
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1024
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1025
self.new_pack.text_index, readv_group_iter, total_items))
1026
# 4) adhoc copy all the other texts.
1027
# We have to topologically insert all texts otherwise we can fail to
1028
# reconcile when parts of a single delta chain are preserved intact,
1029
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1030
# reinserted, and if d3 has incorrect parents it will also be
1031
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1032
# copied), so we will try to delta, but d2 is not currently able to be
1033
# extracted because its basis d1 is not present. Topologically sorting
1034
# addresses this. The following generates a sort for all the texts that
1035
# are being inserted without having to reference the entire text key
1036
# space (we only topo sort the revisions, which is smaller).
1037
topo_order = tsort.topo_sort(ancestors)
1038
rev_order = dict(zip(topo_order, range(len(topo_order))))
1039
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1040
transaction = repo.get_transaction()
1041
file_id_index = GraphIndexPrefixAdapter(
1042
self.new_pack.text_index,
1044
add_nodes_callback=self.new_pack.text_index.add_nodes)
1045
data_access = _DirectPackAccess(
1046
{self.new_pack.text_index:self.new_pack.access_tuple()})
1047
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1048
self.new_pack.access_tuple())
1049
output_texts = KnitVersionedFiles(
1050
_KnitGraphIndex(self.new_pack.text_index,
1051
add_callback=self.new_pack.text_index.add_nodes,
1052
deltas=True, parents=True, is_locked=repo.is_locked),
1053
data_access=data_access, max_delta_chain=200)
1054
for key, parent_keys in bad_texts:
1055
# We refer to the new pack to delta data being output.
1056
# A possible improvement would be to catch errors on short reads
1057
# and only flush then.
1058
self.new_pack.flush()
1060
for parent_key in parent_keys:
1061
if parent_key[0] != key[0]:
1062
# Graph parents must match the fileid
1063
raise errors.BzrError('Mismatched key parent %r:%r' %
1065
parents.append(parent_key[1])
1066
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1067
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1068
output_texts.add_lines(key, parent_keys, text_lines,
1069
random_id=True, check_content=False)
1070
# 5) check that nothing inserted has a reference outside the keyspace.
1071
missing_text_keys = self.new_pack.text_index._external_references()
1072
if missing_text_keys:
1073
raise errors.BzrCheckError('Reference to missing compression parents %r'
1074
% (missing_text_keys,))
1075
self._log_copied_texts()
1077
def _use_pack(self, new_pack):
1078
"""Override _use_pack to check for reconcile having changed content."""
1079
# XXX: we might be better checking this at the copy time.
1080
original_inventory_keys = set()
1081
inv_index = self._pack_collection.inventory_index.combined_index
1082
for entry in inv_index.iter_all_entries():
1083
original_inventory_keys.add(entry[1])
1084
new_inventory_keys = set()
1085
for entry in new_pack.inventory_index.iter_all_entries():
1086
new_inventory_keys.add(entry[1])
1087
if new_inventory_keys != original_inventory_keys:
1088
self._data_changed = True
1089
return new_pack.data_inserted() and self._data_changed
1092
class OptimisingKnitPacker(KnitPacker):
1093
"""A packer which spends more time to create better disk layouts."""
1095
def _revision_node_readv(self, revision_nodes):
1096
"""Return the total revisions and the readv's to issue.
1098
This sort places revisions in topological order with the ancestors
1101
:param revision_nodes: The revision index contents for the packs being
1102
incorporated into the new pack.
1103
:return: As per _least_readv_node_readv.
1105
# build an ancestors dict
1108
for index, key, value, references in revision_nodes:
1109
ancestors[key] = references[0]
1110
by_key[key] = (index, value, references)
1111
order = tsort.topo_sort(ancestors)
1113
# Single IO is pathological, but it will work as a starting point.
1115
for key in reversed(order):
1116
index, value, references = by_key[key]
1117
# ---- KnitGraphIndex.get_position
1118
bits = value[1:].split(' ')
1119
offset, length = int(bits[0]), int(bits[1])
1121
(index, [(offset, length)], [(key, value[0], references)]))
1122
# TODO: combine requests in the same index that are in ascending order.
1123
return total, requests
1125
def open_pack(self):
1126
"""Open a pack for the pack we are creating."""
1127
new_pack = super(OptimisingKnitPacker, self).open_pack()
1128
# Turn on the optimization flags for all the index builders.
1129
new_pack.revision_index.set_optimize(for_size=True)
1130
new_pack.inventory_index.set_optimize(for_size=True)
1131
new_pack.text_index.set_optimize(for_size=True)
1132
new_pack.signature_index.set_optimize(for_size=True)
1136
class KnitRepositoryPackCollection(RepositoryPackCollection):
1137
"""A knit pack collection."""
1139
pack_factory = NewPack
1140
resumed_pack_factory = ResumedPack
1141
normal_packer_class = KnitPacker
1142
optimising_packer_class = OptimisingKnitPacker