1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
21
from itertools import izip
31
revision as _mod_revision,
39
from bzrlib.knit import (
49
from bzrlib.index import (
52
GraphIndexPrefixAdapter,
55
from bzrlib.repofmt.knitrepo import (
58
from bzrlib.repofmt.pack_repo import (
66
PackRootCommitBuilder,
67
RepositoryPackCollection,
69
from bzrlib.vf_repository import (
74
class KnitPackRepository(PackRepository, KnitRepository):
76
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
78
PackRepository.__init__(self, _format, a_bzrdir, control_files,
79
_commit_builder_class, _serializer)
80
if self._format.supports_chks:
81
raise AssertionError("chk not supported")
82
index_transport = self._transport.clone('indices')
83
self._pack_collection = KnitRepositoryPackCollection(self,
86
self._transport.clone('upload'),
87
self._transport.clone('packs'),
88
_format.index_builder_class,
92
self.inventories = KnitVersionedFiles(
93
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
94
add_callback=self._pack_collection.inventory_index.add_callback,
95
deltas=True, parents=True, is_locked=self.is_locked),
96
data_access=self._pack_collection.inventory_index.data_access,
98
self.revisions = KnitVersionedFiles(
99
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
100
add_callback=self._pack_collection.revision_index.add_callback,
101
deltas=False, parents=True, is_locked=self.is_locked,
102
track_external_parent_refs=True),
103
data_access=self._pack_collection.revision_index.data_access,
105
self.signatures = KnitVersionedFiles(
106
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
107
add_callback=self._pack_collection.signature_index.add_callback,
108
deltas=False, parents=False, is_locked=self.is_locked),
109
data_access=self._pack_collection.signature_index.data_access,
111
self.texts = KnitVersionedFiles(
112
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
113
add_callback=self._pack_collection.text_index.add_callback,
114
deltas=True, parents=True, is_locked=self.is_locked),
115
data_access=self._pack_collection.text_index.data_access,
117
self.chk_bytes = None
118
# True when the repository object is 'write locked' (as opposed to the
119
# physical lock only taken out around changes to the pack-names list.)
120
# Another way to represent this would be a decorator around the control
121
# files object that presents logical locks as physical ones - if this
122
# gets ugly consider that alternative design. RBC 20071011
123
self._write_lock_count = 0
124
self._transaction = None
126
self._reconcile_does_inventory_gc = True
127
self._reconcile_fixes_text_parents = True
128
self._reconcile_backsup_inventory = False
130
def _get_source(self, to_format):
131
if to_format.network_name() == self._format.network_name():
132
return KnitPackStreamSource(self, to_format)
133
return PackRepository._get_source(self, to_format)
135
def _reconcile_pack(self, collection, packs, extension, revs, pb):
136
packer = KnitReconcilePacker(collection, packs, extension, revs)
137
return packer.pack(pb)
140
class RepositoryFormatKnitPack1(RepositoryFormatPack):
141
"""A no-subtrees parameterized Pack repository.
143
This format was introduced in 0.92.
146
repository_class = KnitPackRepository
147
_commit_builder_class = PackCommitBuilder
149
def _serializer(self):
150
return xml5.serializer_v5
151
# What index classes to use
152
index_builder_class = InMemoryGraphIndex
153
index_class = GraphIndex
155
def _get_matching_bzrdir(self):
156
return bzrdir.format_registry.make_bzrdir('pack-0.92')
158
def _ignore_setting_bzrdir(self, format):
161
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
164
def get_format_string(cls):
165
"""See RepositoryFormat.get_format_string()."""
166
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
168
def get_format_description(self):
169
"""See RepositoryFormat.get_format_description()."""
170
return "Packs containing knits without subtree support"
173
class RepositoryFormatKnitPack3(RepositoryFormatPack):
174
"""A subtrees parameterized Pack repository.
176
This repository format uses the xml7 serializer to get:
177
- support for recording full info about the tree root
178
- support for recording tree-references
180
This format was introduced in 0.92.
183
repository_class = KnitPackRepository
184
_commit_builder_class = PackRootCommitBuilder
185
rich_root_data = True
187
supports_tree_reference = True
189
def _serializer(self):
190
return xml7.serializer_v7
191
# What index classes to use
192
index_builder_class = InMemoryGraphIndex
193
index_class = GraphIndex
195
def _get_matching_bzrdir(self):
196
return bzrdir.format_registry.make_bzrdir(
199
def _ignore_setting_bzrdir(self, format):
202
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
205
def get_format_string(cls):
206
"""See RepositoryFormat.get_format_string()."""
207
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
209
def get_format_description(self):
210
"""See RepositoryFormat.get_format_description()."""
211
return "Packs containing knits with subtree support\n"
214
class RepositoryFormatKnitPack4(RepositoryFormatPack):
215
"""A rich-root, no subtrees parameterized Pack repository.
217
This repository format uses the xml6 serializer to get:
218
- support for recording full info about the tree root
220
This format was introduced in 1.0.
223
repository_class = KnitPackRepository
224
_commit_builder_class = PackRootCommitBuilder
225
rich_root_data = True
226
supports_tree_reference = False
228
def _serializer(self):
229
return xml6.serializer_v6
230
# What index classes to use
231
index_builder_class = InMemoryGraphIndex
232
index_class = GraphIndex
234
def _get_matching_bzrdir(self):
235
return bzrdir.format_registry.make_bzrdir(
238
def _ignore_setting_bzrdir(self, format):
241
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
244
def get_format_string(cls):
245
"""See RepositoryFormat.get_format_string()."""
246
return ("Bazaar pack repository format 1 with rich root"
247
" (needs bzr 1.0)\n")
249
def get_format_description(self):
250
"""See RepositoryFormat.get_format_description()."""
251
return "Packs containing knits with rich root support\n"
254
class RepositoryFormatKnitPack5(RepositoryFormatPack):
255
"""Repository that supports external references to allow stacking.
259
Supports external lookups, which results in non-truncated ghosts after
260
reconcile compared to pack-0.92 formats.
263
repository_class = KnitPackRepository
264
_commit_builder_class = PackCommitBuilder
265
supports_external_lookups = True
266
# What index classes to use
267
index_builder_class = InMemoryGraphIndex
268
index_class = GraphIndex
271
def _serializer(self):
272
return xml5.serializer_v5
274
def _get_matching_bzrdir(self):
275
return bzrdir.format_registry.make_bzrdir('1.6')
277
def _ignore_setting_bzrdir(self, format):
280
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
283
def get_format_string(cls):
284
"""See RepositoryFormat.get_format_string()."""
285
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
287
def get_format_description(self):
288
"""See RepositoryFormat.get_format_description()."""
289
return "Packs 5 (adds stacking support, requires bzr 1.6)"
292
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
293
"""A repository with rich roots and stacking.
295
New in release 1.6.1.
297
Supports stacking on other repositories, allowing data to be accessed
298
without being stored locally.
301
repository_class = KnitPackRepository
302
_commit_builder_class = PackRootCommitBuilder
303
rich_root_data = True
304
supports_tree_reference = False # no subtrees
305
supports_external_lookups = True
306
# What index classes to use
307
index_builder_class = InMemoryGraphIndex
308
index_class = GraphIndex
311
def _serializer(self):
312
return xml6.serializer_v6
314
def _get_matching_bzrdir(self):
315
return bzrdir.format_registry.make_bzrdir(
318
def _ignore_setting_bzrdir(self, format):
321
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
324
def get_format_string(cls):
325
"""See RepositoryFormat.get_format_string()."""
326
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
328
def get_format_description(self):
329
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
332
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
333
"""A repository with rich roots and external references.
337
Supports external lookups, which results in non-truncated ghosts after
338
reconcile compared to pack-0.92 formats.
340
This format was deprecated because the serializer it uses accidentally
341
supported subtrees, when the format was not intended to. This meant that
342
someone could accidentally fetch from an incorrect repository.
345
repository_class = KnitPackRepository
346
_commit_builder_class = PackRootCommitBuilder
347
rich_root_data = True
348
supports_tree_reference = False # no subtrees
350
supports_external_lookups = True
351
# What index classes to use
352
index_builder_class = InMemoryGraphIndex
353
index_class = GraphIndex
356
def _serializer(self):
357
return xml7.serializer_v7
359
def _get_matching_bzrdir(self):
360
matching = bzrdir.format_registry.make_bzrdir(
362
matching.repository_format = self
365
def _ignore_setting_bzrdir(self, format):
368
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
371
def get_format_string(cls):
372
"""See RepositoryFormat.get_format_string()."""
373
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
375
def get_format_description(self):
376
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
379
def is_deprecated(self):
383
class RepositoryFormatKnitPack6(RepositoryFormatPack):
384
"""A repository with stacking and btree indexes,
385
without rich roots or subtrees.
387
This is equivalent to pack-1.6 with B+Tree indices.
390
repository_class = KnitPackRepository
391
_commit_builder_class = PackCommitBuilder
392
supports_external_lookups = True
393
# What index classes to use
394
index_builder_class = btree_index.BTreeBuilder
395
index_class = btree_index.BTreeGraphIndex
398
def _serializer(self):
399
return xml5.serializer_v5
401
def _get_matching_bzrdir(self):
402
return bzrdir.format_registry.make_bzrdir('1.9')
404
def _ignore_setting_bzrdir(self, format):
407
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
410
def get_format_string(cls):
411
"""See RepositoryFormat.get_format_string()."""
412
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
414
def get_format_description(self):
415
"""See RepositoryFormat.get_format_description()."""
416
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
419
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
420
"""A repository with rich roots, no subtrees, stacking and btree indexes.
422
1.6-rich-root with B+Tree indices.
425
repository_class = KnitPackRepository
426
_commit_builder_class = PackRootCommitBuilder
427
rich_root_data = True
428
supports_tree_reference = False # no subtrees
429
supports_external_lookups = True
430
# What index classes to use
431
index_builder_class = btree_index.BTreeBuilder
432
index_class = btree_index.BTreeGraphIndex
435
def _serializer(self):
436
return xml6.serializer_v6
438
def _get_matching_bzrdir(self):
439
return bzrdir.format_registry.make_bzrdir(
442
def _ignore_setting_bzrdir(self, format):
445
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
448
def get_format_string(cls):
449
"""See RepositoryFormat.get_format_string()."""
450
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
452
def get_format_description(self):
453
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
456
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
457
"""A subtrees development repository.
459
This format should be retained in 2.3, to provide an upgrade path from this
460
to RepositoryFormat2aSubtree. It can be removed in later releases.
462
1.6.1-subtree[as it might have been] with B+Tree indices.
465
repository_class = KnitPackRepository
466
_commit_builder_class = PackRootCommitBuilder
467
rich_root_data = True
469
supports_tree_reference = True
470
supports_external_lookups = True
471
# What index classes to use
472
index_builder_class = btree_index.BTreeBuilder
473
index_class = btree_index.BTreeGraphIndex
476
def _serializer(self):
477
return xml7.serializer_v7
479
def _get_matching_bzrdir(self):
480
return bzrdir.format_registry.make_bzrdir(
481
'development5-subtree')
483
def _ignore_setting_bzrdir(self, format):
486
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
489
def get_format_string(cls):
490
"""See RepositoryFormat.get_format_string()."""
491
return ("Bazaar development format 2 with subtree support "
492
"(needs bzr.dev from before 1.8)\n")
494
def get_format_description(self):
495
"""See RepositoryFormat.get_format_description()."""
496
return ("Development repository format, currently the same as "
497
"1.6.1-subtree with B+Tree indices.\n")
500
class KnitPackStreamSource(StreamSource):
501
"""A StreamSource used to transfer data between same-format KnitPack repos.
504
1) Same serialization format for all objects
505
2) Same root information
506
3) XML format inventories
507
4) Atomic inserts (so we can stream inventory texts before text
512
def __init__(self, from_repository, to_format):
513
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
514
self._text_keys = None
515
self._text_fetch_order = 'unordered'
517
def _get_filtered_inv_stream(self, revision_ids):
518
from_repo = self.from_repository
519
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
520
parent_keys = [(p,) for p in parent_ids]
521
find_text_keys = from_repo._serializer._find_text_key_references
522
parent_text_keys = set(find_text_keys(
523
from_repo._inventory_xml_lines_for_keys(parent_keys)))
524
content_text_keys = set()
525
knit = KnitVersionedFiles(None, None)
526
factory = KnitPlainFactory()
527
def find_text_keys_from_content(record):
528
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
529
raise ValueError("Unknown content storage kind for"
530
" inventory text: %s" % (record.storage_kind,))
531
# It's a knit record, it has a _raw_record field (even if it was
532
# reconstituted from a network stream).
533
raw_data = record._raw_record
534
# read the entire thing
535
revision_id = record.key[-1]
536
content, _ = knit._parse_record(revision_id, raw_data)
537
if record.storage_kind == 'knit-delta-gz':
538
line_iterator = factory.get_linedelta_content(content)
539
elif record.storage_kind == 'knit-ft-gz':
540
line_iterator = factory.get_fulltext_content(content)
541
content_text_keys.update(find_text_keys(
542
[(line, revision_id) for line in line_iterator]))
543
revision_keys = [(r,) for r in revision_ids]
544
def _filtered_inv_stream():
545
source_vf = from_repo.inventories
546
stream = source_vf.get_record_stream(revision_keys,
548
for record in stream:
549
if record.storage_kind == 'absent':
550
raise errors.NoSuchRevision(from_repo, record.key)
551
find_text_keys_from_content(record)
553
self._text_keys = content_text_keys - parent_text_keys
554
return ('inventories', _filtered_inv_stream())
556
def _get_text_stream(self):
557
# Note: We know we don't have to handle adding root keys, because both
558
# the source and target are the identical network name.
559
text_stream = self.from_repository.texts.get_record_stream(
560
self._text_keys, self._text_fetch_order, False)
561
return ('texts', text_stream)
563
def get_stream(self, search):
564
revision_ids = search.get_keys()
565
for stream_info in self._fetch_revision_texts(revision_ids):
567
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
568
yield self._get_filtered_inv_stream(revision_ids)
569
yield self._get_text_stream()
572
class KnitPacker(Packer):
573
"""Packer that works with knit packs."""
575
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
577
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
578
revision_ids=revision_ids,
579
reload_func=reload_func)
581
def _pack_map_and_index_list(self, index_attribute):
582
"""Convert a list of packs to an index pack map and index list.
584
:param index_attribute: The attribute that the desired index is found
586
:return: A tuple (map, list) where map contains the dict from
587
index:pack_tuple, and list contains the indices in the preferred
592
for pack_obj in self.packs:
593
index = getattr(pack_obj, index_attribute)
594
indices.append(index)
595
pack_map[index] = pack_obj
596
return pack_map, indices
598
def _index_contents(self, indices, key_filter=None):
599
"""Get an iterable of the index contents from a pack_map.
601
:param indices: The list of indices to query
602
:param key_filter: An optional filter to limit the keys returned.
604
all_index = CombinedGraphIndex(indices)
605
if key_filter is None:
606
return all_index.iter_all_entries()
608
return all_index.iter_entries(key_filter)
610
def _copy_nodes(self, nodes, index_map, writer, write_index,
612
"""Copy knit nodes between packs with no graph references.
614
:param output_lines: Output full texts of copied items.
616
pb = ui.ui_factory.nested_progress_bar()
618
return self._do_copy_nodes(nodes, index_map, writer,
619
write_index, pb, output_lines=output_lines)
623
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
625
# for record verification
626
knit = KnitVersionedFiles(None, None)
627
# plan a readv on each source pack:
629
nodes = sorted(nodes)
630
# how to map this into knit.py - or knit.py into this?
631
# we don't want the typical knit logic, we want grouping by pack
632
# at this point - perhaps a helper library for the following code
633
# duplication points?
635
for index, key, value in nodes:
636
if index not in request_groups:
637
request_groups[index] = []
638
request_groups[index].append((key, value))
640
pb.update("Copied record", record_index, len(nodes))
641
for index, items in request_groups.iteritems():
642
pack_readv_requests = []
643
for key, value in items:
644
# ---- KnitGraphIndex.get_position
645
bits = value[1:].split(' ')
646
offset, length = int(bits[0]), int(bits[1])
647
pack_readv_requests.append((offset, length, (key, value[0])))
648
# linear scan up the pack
649
pack_readv_requests.sort()
651
pack_obj = index_map[index]
652
transport, path = pack_obj.access_tuple()
654
reader = pack.make_readv_reader(transport, path,
655
[offset[0:2] for offset in pack_readv_requests])
656
except errors.NoSuchFile:
657
if self._reload_func is not None:
660
for (names, read_func), (_1, _2, (key, eol_flag)) in \
661
izip(reader.iter_records(), pack_readv_requests):
662
raw_data = read_func(None)
663
# check the header only
664
if output_lines is not None:
665
output_lines(knit._parse_record(key[-1], raw_data)[0])
667
df, _ = knit._parse_record_header(key, raw_data)
669
pos, size = writer.add_bytes_record(raw_data, names)
670
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
671
pb.update("Copied record", record_index)
674
def _copy_nodes_graph(self, index_map, writer, write_index,
675
readv_group_iter, total_items, output_lines=False):
676
"""Copy knit nodes between packs.
678
:param output_lines: Return lines present in the copied data as
679
an iterator of line,version_id.
681
pb = ui.ui_factory.nested_progress_bar()
683
for result in self._do_copy_nodes_graph(index_map, writer,
684
write_index, output_lines, pb, readv_group_iter, total_items):
687
# Python 2.4 does not permit try:finally: in a generator.
693
def _do_copy_nodes_graph(self, index_map, writer, write_index,
694
output_lines, pb, readv_group_iter, total_items):
695
# for record verification
696
knit = KnitVersionedFiles(None, None)
697
# for line extraction when requested (inventories only)
699
factory = KnitPlainFactory()
701
pb.update("Copied record", record_index, total_items)
702
for index, readv_vector, node_vector in readv_group_iter:
704
pack_obj = index_map[index]
705
transport, path = pack_obj.access_tuple()
707
reader = pack.make_readv_reader(transport, path, readv_vector)
708
except errors.NoSuchFile:
709
if self._reload_func is not None:
712
for (names, read_func), (key, eol_flag, references) in \
713
izip(reader.iter_records(), node_vector):
714
raw_data = read_func(None)
716
# read the entire thing
717
content, _ = knit._parse_record(key[-1], raw_data)
718
if len(references[-1]) == 0:
719
line_iterator = factory.get_fulltext_content(content)
721
line_iterator = factory.get_linedelta_content(content)
722
for line in line_iterator:
725
# check the header only
726
df, _ = knit._parse_record_header(key, raw_data)
728
pos, size = writer.add_bytes_record(raw_data, names)
729
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
730
pb.update("Copied record", record_index)
733
def _process_inventory_lines(self, inv_lines):
734
"""Use up the inv_lines generator and setup a text key filter."""
735
repo = self._pack_collection.repo
736
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
737
inv_lines, self.revision_keys)
739
for fileid, file_revids in fileid_revisions.iteritems():
740
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
741
self._text_filter = text_filter
743
def _copy_inventory_texts(self):
744
# select inventory keys
745
inv_keys = self._revision_keys # currently the same keyspace, and note that
746
# querying for keys here could introduce a bug where an inventory item
747
# is missed, so do not change it to query separately without cross
748
# checking like the text key check below.
749
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
751
inv_nodes = self._index_contents(inventory_indices, inv_keys)
752
# copy inventory keys and adjust values
753
# XXX: Should be a helper function to allow different inv representation
755
self.pb.update("Copying inventory texts", 2)
756
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
757
# Only grab the output lines if we will be processing them
758
output_lines = bool(self.revision_ids)
759
inv_lines = self._copy_nodes_graph(inventory_index_map,
760
self.new_pack._writer, self.new_pack.inventory_index,
761
readv_group_iter, total_items, output_lines=output_lines)
762
if self.revision_ids:
763
self._process_inventory_lines(inv_lines)
765
# eat the iterator to cause it to execute.
767
self._text_filter = None
768
if 'pack' in debug.debug_flags:
769
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
770
time.ctime(), self._pack_collection._upload_transport.base,
771
self.new_pack.random_name,
772
self.new_pack.inventory_index.key_count(),
773
time.time() - self.new_pack.start_time)
775
def _update_pack_order(self, entries, index_to_pack_map):
776
"""Determine how we want our packs to be ordered.
778
This changes the sort order of the self.packs list so that packs unused
779
by 'entries' will be at the end of the list, so that future requests
780
can avoid probing them. Used packs will be at the front of the
781
self.packs list, in the order of their first use in 'entries'.
783
:param entries: A list of (index, ...) tuples
784
:param index_to_pack_map: A mapping from index objects to pack objects.
788
for entry in entries:
790
if index not in seen_indexes:
791
packs.append(index_to_pack_map[index])
792
seen_indexes.add(index)
793
if len(packs) == len(self.packs):
794
if 'pack' in debug.debug_flags:
795
trace.mutter('Not changing pack list, all packs used.')
797
seen_packs = set(packs)
798
for pack in self.packs:
799
if pack not in seen_packs:
802
if 'pack' in debug.debug_flags:
803
old_names = [p.access_tuple()[1] for p in self.packs]
804
new_names = [p.access_tuple()[1] for p in packs]
805
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
806
old_names, new_names)
809
def _copy_revision_texts(self):
811
if self.revision_ids:
812
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
815
# select revision keys
816
revision_index_map, revision_indices = self._pack_map_and_index_list(
818
revision_nodes = self._index_contents(revision_indices, revision_keys)
819
revision_nodes = list(revision_nodes)
820
self._update_pack_order(revision_nodes, revision_index_map)
821
# copy revision keys and adjust values
822
self.pb.update("Copying revision texts", 1)
823
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
824
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
825
self.new_pack.revision_index, readv_group_iter, total_items))
826
if 'pack' in debug.debug_flags:
827
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
828
time.ctime(), self._pack_collection._upload_transport.base,
829
self.new_pack.random_name,
830
self.new_pack.revision_index.key_count(),
831
time.time() - self.new_pack.start_time)
832
self._revision_keys = revision_keys
834
def _get_text_nodes(self):
835
text_index_map, text_indices = self._pack_map_and_index_list(
837
return text_index_map, self._index_contents(text_indices,
840
def _copy_text_texts(self):
842
text_index_map, text_nodes = self._get_text_nodes()
843
if self._text_filter is not None:
844
# We could return the keys copied as part of the return value from
845
# _copy_nodes_graph but this doesn't work all that well with the
846
# need to get line output too, so we check separately, and as we're
847
# going to buffer everything anyway, we check beforehand, which
848
# saves reading knit data over the wire when we know there are
850
text_nodes = set(text_nodes)
851
present_text_keys = set(_node[1] for _node in text_nodes)
852
missing_text_keys = set(self._text_filter) - present_text_keys
853
if missing_text_keys:
854
# TODO: raise a specific error that can handle many missing
856
trace.mutter("missing keys during fetch: %r", missing_text_keys)
857
a_missing_key = missing_text_keys.pop()
858
raise errors.RevisionNotPresent(a_missing_key[1],
860
# copy text keys and adjust values
861
self.pb.update("Copying content texts", 3)
862
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
863
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
864
self.new_pack.text_index, readv_group_iter, total_items))
865
self._log_copied_texts()
867
def _create_pack_from_packs(self):
868
self.pb.update("Opening pack", 0, 5)
869
self.new_pack = self.open_pack()
870
new_pack = self.new_pack
871
# buffer data - we won't be reading-back during the pack creation and
872
# this makes a significant difference on sftp pushes.
873
new_pack.set_write_cache_size(1024*1024)
874
if 'pack' in debug.debug_flags:
875
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
876
for a_pack in self.packs]
877
if self.revision_ids is not None:
878
rev_count = len(self.revision_ids)
881
trace.mutter('%s: create_pack: creating pack from source packs: '
882
'%s%s %s revisions wanted %s t=0',
883
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
884
plain_pack_list, rev_count)
885
self._copy_revision_texts()
886
self._copy_inventory_texts()
887
self._copy_text_texts()
888
# select signature keys
889
signature_filter = self._revision_keys # same keyspace
890
signature_index_map, signature_indices = self._pack_map_and_index_list(
892
signature_nodes = self._index_contents(signature_indices,
894
# copy signature keys and adjust values
895
self.pb.update("Copying signature texts", 4)
896
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
897
new_pack.signature_index)
898
if 'pack' in debug.debug_flags:
899
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
900
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
901
new_pack.signature_index.key_count(),
902
time.time() - new_pack.start_time)
903
new_pack._check_references()
904
if not self._use_pack(new_pack):
907
self.pb.update("Finishing pack", 5)
909
self._pack_collection.allocate(new_pack)
912
def _least_readv_node_readv(self, nodes):
913
"""Generate request groups for nodes using the least readv's.
915
:param nodes: An iterable of graph index nodes.
916
:return: Total node count and an iterator of the data needed to perform
917
readvs to obtain the data for nodes. Each item yielded by the
918
iterator is a tuple with:
919
index, readv_vector, node_vector. readv_vector is a list ready to
920
hand to the transport readv method, and node_vector is a list of
921
(key, eol_flag, references) for the node retrieved by the
922
matching readv_vector.
924
# group by pack so we do one readv per pack
925
nodes = sorted(nodes)
928
for index, key, value, references in nodes:
929
if index not in request_groups:
930
request_groups[index] = []
931
request_groups[index].append((key, value, references))
933
for index, items in request_groups.iteritems():
934
pack_readv_requests = []
935
for key, value, references in items:
936
# ---- KnitGraphIndex.get_position
937
bits = value[1:].split(' ')
938
offset, length = int(bits[0]), int(bits[1])
939
pack_readv_requests.append(
940
((offset, length), (key, value[0], references)))
941
# linear scan up the pack to maximum range combining.
942
pack_readv_requests.sort()
943
# split out the readv and the node data.
944
pack_readv = [readv for readv, node in pack_readv_requests]
945
node_vector = [node for readv, node in pack_readv_requests]
946
result.append((index, pack_readv, node_vector))
949
def _revision_node_readv(self, revision_nodes):
950
"""Return the total revisions and the readv's to issue.
952
:param revision_nodes: The revision index contents for the packs being
953
incorporated into the new pack.
954
:return: As per _least_readv_node_readv.
956
return self._least_readv_node_readv(revision_nodes)
959
class KnitReconcilePacker(KnitPacker):
960
"""A packer which regenerates indices etc as it copies.
962
This is used by ``bzr reconcile`` to cause parent text pointers to be
966
def __init__(self, *args, **kwargs):
967
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
968
self._data_changed = False
970
def _process_inventory_lines(self, inv_lines):
971
"""Generate a text key reference map rather for reconciling with."""
972
repo = self._pack_collection.repo
973
refs = repo._serializer._find_text_key_references(inv_lines)
974
self._text_refs = refs
975
# during reconcile we:
976
# - convert unreferenced texts to full texts
977
# - correct texts which reference a text not copied to be full texts
978
# - copy all others as-is but with corrected parents.
979
# - so at this point we don't know enough to decide what becomes a full
981
self._text_filter = None
983
def _copy_text_texts(self):
984
"""generate what texts we should have and then copy."""
985
self.pb.update("Copying content texts", 3)
986
# we have three major tasks here:
987
# 1) generate the ideal index
988
repo = self._pack_collection.repo
989
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
991
self.new_pack.revision_index.iter_all_entries()])
992
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
993
# 2) generate a text_nodes list that contains all the deltas that can
994
# be used as-is, with corrected parents.
998
NULL_REVISION = _mod_revision.NULL_REVISION
999
text_index_map, text_nodes = self._get_text_nodes()
1000
for node in text_nodes:
1006
ideal_parents = tuple(ideal_index[node[1]])
1008
discarded_nodes.append(node)
1009
self._data_changed = True
1011
if ideal_parents == (NULL_REVISION,):
1013
if ideal_parents == node[3][0]:
1015
ok_nodes.append(node)
1016
elif ideal_parents[0:1] == node[3][0][0:1]:
1017
# the left most parent is the same, or there are no parents
1018
# today. Either way, we can preserve the representation as
1019
# long as we change the refs to be inserted.
1020
self._data_changed = True
1021
ok_nodes.append((node[0], node[1], node[2],
1022
(ideal_parents, node[3][1])))
1023
self._data_changed = True
1025
# Reinsert this text completely
1026
bad_texts.append((node[1], ideal_parents))
1027
self._data_changed = True
1028
# we're finished with some data.
1031
# 3) bulk copy the ok data
1032
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1033
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1034
self.new_pack.text_index, readv_group_iter, total_items))
1035
# 4) adhoc copy all the other texts.
1036
# We have to topologically insert all texts otherwise we can fail to
1037
# reconcile when parts of a single delta chain are preserved intact,
1038
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1039
# reinserted, and if d3 has incorrect parents it will also be
1040
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1041
# copied), so we will try to delta, but d2 is not currently able to be
1042
# extracted because its basis d1 is not present. Topologically sorting
1043
# addresses this. The following generates a sort for all the texts that
1044
# are being inserted without having to reference the entire text key
1045
# space (we only topo sort the revisions, which is smaller).
1046
topo_order = tsort.topo_sort(ancestors)
1047
rev_order = dict(zip(topo_order, range(len(topo_order))))
1048
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1049
transaction = repo.get_transaction()
1050
file_id_index = GraphIndexPrefixAdapter(
1051
self.new_pack.text_index,
1053
add_nodes_callback=self.new_pack.text_index.add_nodes)
1054
data_access = _DirectPackAccess(
1055
{self.new_pack.text_index:self.new_pack.access_tuple()})
1056
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1057
self.new_pack.access_tuple())
1058
output_texts = KnitVersionedFiles(
1059
_KnitGraphIndex(self.new_pack.text_index,
1060
add_callback=self.new_pack.text_index.add_nodes,
1061
deltas=True, parents=True, is_locked=repo.is_locked),
1062
data_access=data_access, max_delta_chain=200)
1063
for key, parent_keys in bad_texts:
1064
# We refer to the new pack to delta data being output.
1065
# A possible improvement would be to catch errors on short reads
1066
# and only flush then.
1067
self.new_pack.flush()
1069
for parent_key in parent_keys:
1070
if parent_key[0] != key[0]:
1071
# Graph parents must match the fileid
1072
raise errors.BzrError('Mismatched key parent %r:%r' %
1074
parents.append(parent_key[1])
1075
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1076
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1077
output_texts.add_lines(key, parent_keys, text_lines,
1078
random_id=True, check_content=False)
1079
# 5) check that nothing inserted has a reference outside the keyspace.
1080
missing_text_keys = self.new_pack.text_index._external_references()
1081
if missing_text_keys:
1082
raise errors.BzrCheckError('Reference to missing compression parents %r'
1083
% (missing_text_keys,))
1084
self._log_copied_texts()
1086
def _use_pack(self, new_pack):
1087
"""Override _use_pack to check for reconcile having changed content."""
1088
# XXX: we might be better checking this at the copy time.
1089
original_inventory_keys = set()
1090
inv_index = self._pack_collection.inventory_index.combined_index
1091
for entry in inv_index.iter_all_entries():
1092
original_inventory_keys.add(entry[1])
1093
new_inventory_keys = set()
1094
for entry in new_pack.inventory_index.iter_all_entries():
1095
new_inventory_keys.add(entry[1])
1096
if new_inventory_keys != original_inventory_keys:
1097
self._data_changed = True
1098
return new_pack.data_inserted() and self._data_changed
1101
class OptimisingKnitPacker(KnitPacker):
1102
"""A packer which spends more time to create better disk layouts."""
1104
def _revision_node_readv(self, revision_nodes):
1105
"""Return the total revisions and the readv's to issue.
1107
This sort places revisions in topological order with the ancestors
1110
:param revision_nodes: The revision index contents for the packs being
1111
incorporated into the new pack.
1112
:return: As per _least_readv_node_readv.
1114
# build an ancestors dict
1117
for index, key, value, references in revision_nodes:
1118
ancestors[key] = references[0]
1119
by_key[key] = (index, value, references)
1120
order = tsort.topo_sort(ancestors)
1122
# Single IO is pathological, but it will work as a starting point.
1124
for key in reversed(order):
1125
index, value, references = by_key[key]
1126
# ---- KnitGraphIndex.get_position
1127
bits = value[1:].split(' ')
1128
offset, length = int(bits[0]), int(bits[1])
1130
(index, [(offset, length)], [(key, value[0], references)]))
1131
# TODO: combine requests in the same index that are in ascending order.
1132
return total, requests
1134
def open_pack(self):
1135
"""Open a pack for the pack we are creating."""
1136
new_pack = super(OptimisingKnitPacker, self).open_pack()
1137
# Turn on the optimization flags for all the index builders.
1138
new_pack.revision_index.set_optimize(for_size=True)
1139
new_pack.inventory_index.set_optimize(for_size=True)
1140
new_pack.text_index.set_optimize(for_size=True)
1141
new_pack.signature_index.set_optimize(for_size=True)
1145
class KnitRepositoryPackCollection(RepositoryPackCollection):
1146
"""A knit pack collection."""
1148
pack_factory = NewPack
1149
resumed_pack_factory = ResumedPack
1150
normal_packer_class = KnitPacker
1151
optimising_packer_class = OptimisingKnitPacker