1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from bzrlib.lazy_import import lazy_import
22
lazy_import(globals(), """
23
from itertools import izip
33
revision as _mod_revision,
41
from bzrlib.knit import (
51
from bzrlib.index import (
54
GraphIndexPrefixAdapter,
57
from bzrlib.repofmt.knitrepo import (
60
from bzrlib.repofmt.pack_repo import (
68
PackRootCommitBuilder,
69
RepositoryPackCollection,
71
from bzrlib.vf_repository import (
76
class KnitPackRepository(PackRepository, KnitRepository):
78
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
80
PackRepository.__init__(self, _format, a_bzrdir, control_files,
81
_commit_builder_class, _serializer)
82
if self._format.supports_chks:
83
raise AssertionError("chk not supported")
84
index_transport = self._transport.clone('indices')
85
self._pack_collection = KnitRepositoryPackCollection(self,
88
self._transport.clone('upload'),
89
self._transport.clone('packs'),
90
_format.index_builder_class,
94
self.inventories = KnitVersionedFiles(
95
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
96
add_callback=self._pack_collection.inventory_index.add_callback,
97
deltas=True, parents=True, is_locked=self.is_locked),
98
data_access=self._pack_collection.inventory_index.data_access,
100
self.revisions = KnitVersionedFiles(
101
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
102
add_callback=self._pack_collection.revision_index.add_callback,
103
deltas=False, parents=True, is_locked=self.is_locked,
104
track_external_parent_refs=True),
105
data_access=self._pack_collection.revision_index.data_access,
107
self.signatures = KnitVersionedFiles(
108
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
109
add_callback=self._pack_collection.signature_index.add_callback,
110
deltas=False, parents=False, is_locked=self.is_locked),
111
data_access=self._pack_collection.signature_index.data_access,
113
self.texts = KnitVersionedFiles(
114
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
115
add_callback=self._pack_collection.text_index.add_callback,
116
deltas=True, parents=True, is_locked=self.is_locked),
117
data_access=self._pack_collection.text_index.data_access,
119
self.chk_bytes = None
120
# True when the repository object is 'write locked' (as opposed to the
121
# physical lock only taken out around changes to the pack-names list.)
122
# Another way to represent this would be a decorator around the control
123
# files object that presents logical locks as physical ones - if this
124
# gets ugly consider that alternative design. RBC 20071011
125
self._write_lock_count = 0
126
self._transaction = None
128
self._reconcile_does_inventory_gc = True
129
self._reconcile_fixes_text_parents = True
130
self._reconcile_backsup_inventory = False
132
def _get_source(self, to_format):
133
if to_format.network_name() == self._format.network_name():
134
return KnitPackStreamSource(self, to_format)
135
return PackRepository._get_source(self, to_format)
137
def _reconcile_pack(self, collection, packs, extension, revs, pb):
138
packer = KnitReconcilePacker(collection, packs, extension, revs)
139
return packer.pack(pb)
142
class RepositoryFormatKnitPack1(RepositoryFormatPack):
143
"""A no-subtrees parameterized Pack repository.
145
This format was introduced in 0.92.
148
repository_class = KnitPackRepository
149
_commit_builder_class = PackCommitBuilder
151
def _serializer(self):
152
return xml5.serializer_v5
153
# What index classes to use
154
index_builder_class = InMemoryGraphIndex
155
index_class = GraphIndex
157
def _get_matching_bzrdir(self):
158
return controldir.format_registry.make_bzrdir('pack-0.92')
160
def _ignore_setting_bzrdir(self, format):
163
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
166
def get_format_string(cls):
167
"""See RepositoryFormat.get_format_string()."""
168
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
170
def get_format_description(self):
171
"""See RepositoryFormat.get_format_description()."""
172
return "Packs containing knits without subtree support"
175
class RepositoryFormatKnitPack3(RepositoryFormatPack):
176
"""A subtrees parameterized Pack repository.
178
This repository format uses the xml7 serializer to get:
179
- support for recording full info about the tree root
180
- support for recording tree-references
182
This format was introduced in 0.92.
185
repository_class = KnitPackRepository
186
_commit_builder_class = PackRootCommitBuilder
187
rich_root_data = True
189
supports_tree_reference = True
191
def _serializer(self):
192
return xml7.serializer_v7
193
# What index classes to use
194
index_builder_class = InMemoryGraphIndex
195
index_class = GraphIndex
197
def _get_matching_bzrdir(self):
198
return controldir.format_registry.make_bzrdir(
201
def _ignore_setting_bzrdir(self, format):
204
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
207
def get_format_string(cls):
208
"""See RepositoryFormat.get_format_string()."""
209
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
211
def get_format_description(self):
212
"""See RepositoryFormat.get_format_description()."""
213
return "Packs containing knits with subtree support\n"
216
class RepositoryFormatKnitPack4(RepositoryFormatPack):
217
"""A rich-root, no subtrees parameterized Pack repository.
219
This repository format uses the xml6 serializer to get:
220
- support for recording full info about the tree root
222
This format was introduced in 1.0.
225
repository_class = KnitPackRepository
226
_commit_builder_class = PackRootCommitBuilder
227
rich_root_data = True
228
supports_tree_reference = False
230
def _serializer(self):
231
return xml6.serializer_v6
232
# What index classes to use
233
index_builder_class = InMemoryGraphIndex
234
index_class = GraphIndex
236
def _get_matching_bzrdir(self):
237
return controldir.format_registry.make_bzrdir(
240
def _ignore_setting_bzrdir(self, format):
243
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
246
def get_format_string(cls):
247
"""See RepositoryFormat.get_format_string()."""
248
return ("Bazaar pack repository format 1 with rich root"
249
" (needs bzr 1.0)\n")
251
def get_format_description(self):
252
"""See RepositoryFormat.get_format_description()."""
253
return "Packs containing knits with rich root support\n"
256
class RepositoryFormatKnitPack5(RepositoryFormatPack):
257
"""Repository that supports external references to allow stacking.
261
Supports external lookups, which results in non-truncated ghosts after
262
reconcile compared to pack-0.92 formats.
265
repository_class = KnitPackRepository
266
_commit_builder_class = PackCommitBuilder
267
supports_external_lookups = True
268
# What index classes to use
269
index_builder_class = InMemoryGraphIndex
270
index_class = GraphIndex
273
def _serializer(self):
274
return xml5.serializer_v5
276
def _get_matching_bzrdir(self):
277
return controldir.format_registry.make_bzrdir('1.6')
279
def _ignore_setting_bzrdir(self, format):
282
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
285
def get_format_string(cls):
286
"""See RepositoryFormat.get_format_string()."""
287
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
289
def get_format_description(self):
290
"""See RepositoryFormat.get_format_description()."""
291
return "Packs 5 (adds stacking support, requires bzr 1.6)"
294
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
295
"""A repository with rich roots and stacking.
297
New in release 1.6.1.
299
Supports stacking on other repositories, allowing data to be accessed
300
without being stored locally.
303
repository_class = KnitPackRepository
304
_commit_builder_class = PackRootCommitBuilder
305
rich_root_data = True
306
supports_tree_reference = False # no subtrees
307
supports_external_lookups = True
308
# What index classes to use
309
index_builder_class = InMemoryGraphIndex
310
index_class = GraphIndex
313
def _serializer(self):
314
return xml6.serializer_v6
316
def _get_matching_bzrdir(self):
317
return controldir.format_registry.make_bzrdir(
320
def _ignore_setting_bzrdir(self, format):
323
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
326
def get_format_string(cls):
327
"""See RepositoryFormat.get_format_string()."""
328
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
330
def get_format_description(self):
331
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
334
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
335
"""A repository with rich roots and external references.
339
Supports external lookups, which results in non-truncated ghosts after
340
reconcile compared to pack-0.92 formats.
342
This format was deprecated because the serializer it uses accidentally
343
supported subtrees, when the format was not intended to. This meant that
344
someone could accidentally fetch from an incorrect repository.
347
repository_class = KnitPackRepository
348
_commit_builder_class = PackRootCommitBuilder
349
rich_root_data = True
350
supports_tree_reference = False # no subtrees
352
supports_external_lookups = True
353
# What index classes to use
354
index_builder_class = InMemoryGraphIndex
355
index_class = GraphIndex
358
def _serializer(self):
359
return xml7.serializer_v7
361
def _get_matching_bzrdir(self):
362
matching = controldir.format_registry.make_bzrdir(
364
matching.repository_format = self
367
def _ignore_setting_bzrdir(self, format):
370
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
373
def get_format_string(cls):
374
"""See RepositoryFormat.get_format_string()."""
375
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
377
def get_format_description(self):
378
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
381
def is_deprecated(self):
385
class RepositoryFormatKnitPack6(RepositoryFormatPack):
386
"""A repository with stacking and btree indexes,
387
without rich roots or subtrees.
389
This is equivalent to pack-1.6 with B+Tree indices.
392
repository_class = KnitPackRepository
393
_commit_builder_class = PackCommitBuilder
394
supports_external_lookups = True
395
# What index classes to use
396
index_builder_class = btree_index.BTreeBuilder
397
index_class = btree_index.BTreeGraphIndex
400
def _serializer(self):
401
return xml5.serializer_v5
403
def _get_matching_bzrdir(self):
404
return controldir.format_registry.make_bzrdir('1.9')
406
def _ignore_setting_bzrdir(self, format):
409
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
412
def get_format_string(cls):
413
"""See RepositoryFormat.get_format_string()."""
414
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
416
def get_format_description(self):
417
"""See RepositoryFormat.get_format_description()."""
418
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
421
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
422
"""A repository with rich roots, no subtrees, stacking and btree indexes.
424
1.6-rich-root with B+Tree indices.
427
repository_class = KnitPackRepository
428
_commit_builder_class = PackRootCommitBuilder
429
rich_root_data = True
430
supports_tree_reference = False # no subtrees
431
supports_external_lookups = True
432
# What index classes to use
433
index_builder_class = btree_index.BTreeBuilder
434
index_class = btree_index.BTreeGraphIndex
437
def _serializer(self):
438
return xml6.serializer_v6
440
def _get_matching_bzrdir(self):
441
return controldir.format_registry.make_bzrdir(
444
def _ignore_setting_bzrdir(self, format):
447
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
450
def get_format_string(cls):
451
"""See RepositoryFormat.get_format_string()."""
452
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
454
def get_format_description(self):
455
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
458
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
459
"""A subtrees development repository.
461
This format should be retained in 2.3, to provide an upgrade path from this
462
to RepositoryFormat2aSubtree. It can be removed in later releases.
464
1.6.1-subtree[as it might have been] with B+Tree indices.
467
repository_class = KnitPackRepository
468
_commit_builder_class = PackRootCommitBuilder
469
rich_root_data = True
471
supports_tree_reference = True
472
supports_external_lookups = True
473
# What index classes to use
474
index_builder_class = btree_index.BTreeBuilder
475
index_class = btree_index.BTreeGraphIndex
478
def _serializer(self):
479
return xml7.serializer_v7
481
def _get_matching_bzrdir(self):
482
return controldir.format_registry.make_bzrdir(
483
'development5-subtree')
485
def _ignore_setting_bzrdir(self, format):
488
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
491
def get_format_string(cls):
492
"""See RepositoryFormat.get_format_string()."""
493
return ("Bazaar development format 2 with subtree support "
494
"(needs bzr.dev from before 1.8)\n")
496
def get_format_description(self):
497
"""See RepositoryFormat.get_format_description()."""
498
return ("Development repository format, currently the same as "
499
"1.6.1-subtree with B+Tree indices.\n")
502
class KnitPackStreamSource(StreamSource):
503
"""A StreamSource used to transfer data between same-format KnitPack repos.
506
1) Same serialization format for all objects
507
2) Same root information
508
3) XML format inventories
509
4) Atomic inserts (so we can stream inventory texts before text
514
def __init__(self, from_repository, to_format):
515
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
516
self._text_keys = None
517
self._text_fetch_order = 'unordered'
519
def _get_filtered_inv_stream(self, revision_ids):
520
from_repo = self.from_repository
521
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
522
parent_keys = [(p,) for p in parent_ids]
523
find_text_keys = from_repo._serializer._find_text_key_references
524
parent_text_keys = set(find_text_keys(
525
from_repo._inventory_xml_lines_for_keys(parent_keys)))
526
content_text_keys = set()
527
knit = KnitVersionedFiles(None, None)
528
factory = KnitPlainFactory()
529
def find_text_keys_from_content(record):
530
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
531
raise ValueError("Unknown content storage kind for"
532
" inventory text: %s" % (record.storage_kind,))
533
# It's a knit record, it has a _raw_record field (even if it was
534
# reconstituted from a network stream).
535
raw_data = record._raw_record
536
# read the entire thing
537
revision_id = record.key[-1]
538
content, _ = knit._parse_record(revision_id, raw_data)
539
if record.storage_kind == 'knit-delta-gz':
540
line_iterator = factory.get_linedelta_content(content)
541
elif record.storage_kind == 'knit-ft-gz':
542
line_iterator = factory.get_fulltext_content(content)
543
content_text_keys.update(find_text_keys(
544
[(line, revision_id) for line in line_iterator]))
545
revision_keys = [(r,) for r in revision_ids]
546
def _filtered_inv_stream():
547
source_vf = from_repo.inventories
548
stream = source_vf.get_record_stream(revision_keys,
550
for record in stream:
551
if record.storage_kind == 'absent':
552
raise errors.NoSuchRevision(from_repo, record.key)
553
find_text_keys_from_content(record)
555
self._text_keys = content_text_keys - parent_text_keys
556
return ('inventories', _filtered_inv_stream())
558
def _get_text_stream(self):
559
# Note: We know we don't have to handle adding root keys, because both
560
# the source and target are the identical network name.
561
text_stream = self.from_repository.texts.get_record_stream(
562
self._text_keys, self._text_fetch_order, False)
563
return ('texts', text_stream)
565
def get_stream(self, search):
566
revision_ids = search.get_keys()
567
for stream_info in self._fetch_revision_texts(revision_ids):
569
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
570
yield self._get_filtered_inv_stream(revision_ids)
571
yield self._get_text_stream()
574
class KnitPacker(Packer):
575
"""Packer that works with knit packs."""
577
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
579
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
580
revision_ids=revision_ids,
581
reload_func=reload_func)
583
def _pack_map_and_index_list(self, index_attribute):
584
"""Convert a list of packs to an index pack map and index list.
586
:param index_attribute: The attribute that the desired index is found
588
:return: A tuple (map, list) where map contains the dict from
589
index:pack_tuple, and list contains the indices in the preferred
594
for pack_obj in self.packs:
595
index = getattr(pack_obj, index_attribute)
596
indices.append(index)
597
pack_map[index] = pack_obj
598
return pack_map, indices
600
def _index_contents(self, indices, key_filter=None):
601
"""Get an iterable of the index contents from a pack_map.
603
:param indices: The list of indices to query
604
:param key_filter: An optional filter to limit the keys returned.
606
all_index = CombinedGraphIndex(indices)
607
if key_filter is None:
608
return all_index.iter_all_entries()
610
return all_index.iter_entries(key_filter)
612
def _copy_nodes(self, nodes, index_map, writer, write_index,
614
"""Copy knit nodes between packs with no graph references.
616
:param output_lines: Output full texts of copied items.
618
pb = ui.ui_factory.nested_progress_bar()
620
return self._do_copy_nodes(nodes, index_map, writer,
621
write_index, pb, output_lines=output_lines)
625
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
627
# for record verification
628
knit = KnitVersionedFiles(None, None)
629
# plan a readv on each source pack:
631
nodes = sorted(nodes)
632
# how to map this into knit.py - or knit.py into this?
633
# we don't want the typical knit logic, we want grouping by pack
634
# at this point - perhaps a helper library for the following code
635
# duplication points?
637
for index, key, value in nodes:
638
if index not in request_groups:
639
request_groups[index] = []
640
request_groups[index].append((key, value))
642
pb.update("Copied record", record_index, len(nodes))
643
for index, items in request_groups.iteritems():
644
pack_readv_requests = []
645
for key, value in items:
646
# ---- KnitGraphIndex.get_position
647
bits = value[1:].split(' ')
648
offset, length = int(bits[0]), int(bits[1])
649
pack_readv_requests.append((offset, length, (key, value[0])))
650
# linear scan up the pack
651
pack_readv_requests.sort()
653
pack_obj = index_map[index]
654
transport, path = pack_obj.access_tuple()
656
reader = pack.make_readv_reader(transport, path,
657
[offset[0:2] for offset in pack_readv_requests])
658
except errors.NoSuchFile:
659
if self._reload_func is not None:
662
for (names, read_func), (_1, _2, (key, eol_flag)) in \
663
izip(reader.iter_records(), pack_readv_requests):
664
raw_data = read_func(None)
665
# check the header only
666
if output_lines is not None:
667
output_lines(knit._parse_record(key[-1], raw_data)[0])
669
df, _ = knit._parse_record_header(key, raw_data)
671
pos, size = writer.add_bytes_record(raw_data, names)
672
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
673
pb.update("Copied record", record_index)
676
def _copy_nodes_graph(self, index_map, writer, write_index,
677
readv_group_iter, total_items, output_lines=False):
678
"""Copy knit nodes between packs.
680
:param output_lines: Return lines present in the copied data as
681
an iterator of line,version_id.
683
pb = ui.ui_factory.nested_progress_bar()
685
for result in self._do_copy_nodes_graph(index_map, writer,
686
write_index, output_lines, pb, readv_group_iter, total_items):
689
# Python 2.4 does not permit try:finally: in a generator.
695
def _do_copy_nodes_graph(self, index_map, writer, write_index,
696
output_lines, pb, readv_group_iter, total_items):
697
# for record verification
698
knit = KnitVersionedFiles(None, None)
699
# for line extraction when requested (inventories only)
701
factory = KnitPlainFactory()
703
pb.update("Copied record", record_index, total_items)
704
for index, readv_vector, node_vector in readv_group_iter:
706
pack_obj = index_map[index]
707
transport, path = pack_obj.access_tuple()
709
reader = pack.make_readv_reader(transport, path, readv_vector)
710
except errors.NoSuchFile:
711
if self._reload_func is not None:
714
for (names, read_func), (key, eol_flag, references) in \
715
izip(reader.iter_records(), node_vector):
716
raw_data = read_func(None)
718
# read the entire thing
719
content, _ = knit._parse_record(key[-1], raw_data)
720
if len(references[-1]) == 0:
721
line_iterator = factory.get_fulltext_content(content)
723
line_iterator = factory.get_linedelta_content(content)
724
for line in line_iterator:
727
# check the header only
728
df, _ = knit._parse_record_header(key, raw_data)
730
pos, size = writer.add_bytes_record(raw_data, names)
731
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
732
pb.update("Copied record", record_index)
735
def _process_inventory_lines(self, inv_lines):
736
"""Use up the inv_lines generator and setup a text key filter."""
737
repo = self._pack_collection.repo
738
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
739
inv_lines, self.revision_keys)
741
for fileid, file_revids in fileid_revisions.iteritems():
742
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
743
self._text_filter = text_filter
745
def _copy_inventory_texts(self):
746
# select inventory keys
747
inv_keys = self._revision_keys # currently the same keyspace, and note that
748
# querying for keys here could introduce a bug where an inventory item
749
# is missed, so do not change it to query separately without cross
750
# checking like the text key check below.
751
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
753
inv_nodes = self._index_contents(inventory_indices, inv_keys)
754
# copy inventory keys and adjust values
755
# XXX: Should be a helper function to allow different inv representation
757
self.pb.update("Copying inventory texts", 2)
758
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
759
# Only grab the output lines if we will be processing them
760
output_lines = bool(self.revision_ids)
761
inv_lines = self._copy_nodes_graph(inventory_index_map,
762
self.new_pack._writer, self.new_pack.inventory_index,
763
readv_group_iter, total_items, output_lines=output_lines)
764
if self.revision_ids:
765
self._process_inventory_lines(inv_lines)
767
# eat the iterator to cause it to execute.
769
self._text_filter = None
770
if 'pack' in debug.debug_flags:
771
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
772
time.ctime(), self._pack_collection._upload_transport.base,
773
self.new_pack.random_name,
774
self.new_pack.inventory_index.key_count(),
775
time.time() - self.new_pack.start_time)
777
def _update_pack_order(self, entries, index_to_pack_map):
778
"""Determine how we want our packs to be ordered.
780
This changes the sort order of the self.packs list so that packs unused
781
by 'entries' will be at the end of the list, so that future requests
782
can avoid probing them. Used packs will be at the front of the
783
self.packs list, in the order of their first use in 'entries'.
785
:param entries: A list of (index, ...) tuples
786
:param index_to_pack_map: A mapping from index objects to pack objects.
790
for entry in entries:
792
if index not in seen_indexes:
793
packs.append(index_to_pack_map[index])
794
seen_indexes.add(index)
795
if len(packs) == len(self.packs):
796
if 'pack' in debug.debug_flags:
797
trace.mutter('Not changing pack list, all packs used.')
799
seen_packs = set(packs)
800
for pack in self.packs:
801
if pack not in seen_packs:
804
if 'pack' in debug.debug_flags:
805
old_names = [p.access_tuple()[1] for p in self.packs]
806
new_names = [p.access_tuple()[1] for p in packs]
807
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
808
old_names, new_names)
811
def _copy_revision_texts(self):
813
if self.revision_ids:
814
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
817
# select revision keys
818
revision_index_map, revision_indices = self._pack_map_and_index_list(
820
revision_nodes = self._index_contents(revision_indices, revision_keys)
821
revision_nodes = list(revision_nodes)
822
self._update_pack_order(revision_nodes, revision_index_map)
823
# copy revision keys and adjust values
824
self.pb.update("Copying revision texts", 1)
825
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
826
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
827
self.new_pack.revision_index, readv_group_iter, total_items))
828
if 'pack' in debug.debug_flags:
829
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
830
time.ctime(), self._pack_collection._upload_transport.base,
831
self.new_pack.random_name,
832
self.new_pack.revision_index.key_count(),
833
time.time() - self.new_pack.start_time)
834
self._revision_keys = revision_keys
836
def _get_text_nodes(self):
837
text_index_map, text_indices = self._pack_map_and_index_list(
839
return text_index_map, self._index_contents(text_indices,
842
def _copy_text_texts(self):
844
text_index_map, text_nodes = self._get_text_nodes()
845
if self._text_filter is not None:
846
# We could return the keys copied as part of the return value from
847
# _copy_nodes_graph but this doesn't work all that well with the
848
# need to get line output too, so we check separately, and as we're
849
# going to buffer everything anyway, we check beforehand, which
850
# saves reading knit data over the wire when we know there are
852
text_nodes = set(text_nodes)
853
present_text_keys = set(_node[1] for _node in text_nodes)
854
missing_text_keys = set(self._text_filter) - present_text_keys
855
if missing_text_keys:
856
# TODO: raise a specific error that can handle many missing
858
trace.mutter("missing keys during fetch: %r", missing_text_keys)
859
a_missing_key = missing_text_keys.pop()
860
raise errors.RevisionNotPresent(a_missing_key[1],
862
# copy text keys and adjust values
863
self.pb.update("Copying content texts", 3)
864
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
865
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
866
self.new_pack.text_index, readv_group_iter, total_items))
867
self._log_copied_texts()
869
def _create_pack_from_packs(self):
870
self.pb.update("Opening pack", 0, 5)
871
self.new_pack = self.open_pack()
872
new_pack = self.new_pack
873
# buffer data - we won't be reading-back during the pack creation and
874
# this makes a significant difference on sftp pushes.
875
new_pack.set_write_cache_size(1024*1024)
876
if 'pack' in debug.debug_flags:
877
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
878
for a_pack in self.packs]
879
if self.revision_ids is not None:
880
rev_count = len(self.revision_ids)
883
trace.mutter('%s: create_pack: creating pack from source packs: '
884
'%s%s %s revisions wanted %s t=0',
885
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
886
plain_pack_list, rev_count)
887
self._copy_revision_texts()
888
self._copy_inventory_texts()
889
self._copy_text_texts()
890
# select signature keys
891
signature_filter = self._revision_keys # same keyspace
892
signature_index_map, signature_indices = self._pack_map_and_index_list(
894
signature_nodes = self._index_contents(signature_indices,
896
# copy signature keys and adjust values
897
self.pb.update("Copying signature texts", 4)
898
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
899
new_pack.signature_index)
900
if 'pack' in debug.debug_flags:
901
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
902
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
903
new_pack.signature_index.key_count(),
904
time.time() - new_pack.start_time)
905
new_pack._check_references()
906
if not self._use_pack(new_pack):
909
self.pb.update("Finishing pack", 5)
911
self._pack_collection.allocate(new_pack)
914
def _least_readv_node_readv(self, nodes):
915
"""Generate request groups for nodes using the least readv's.
917
:param nodes: An iterable of graph index nodes.
918
:return: Total node count and an iterator of the data needed to perform
919
readvs to obtain the data for nodes. Each item yielded by the
920
iterator is a tuple with:
921
index, readv_vector, node_vector. readv_vector is a list ready to
922
hand to the transport readv method, and node_vector is a list of
923
(key, eol_flag, references) for the node retrieved by the
924
matching readv_vector.
926
# group by pack so we do one readv per pack
927
nodes = sorted(nodes)
930
for index, key, value, references in nodes:
931
if index not in request_groups:
932
request_groups[index] = []
933
request_groups[index].append((key, value, references))
935
for index, items in request_groups.iteritems():
936
pack_readv_requests = []
937
for key, value, references in items:
938
# ---- KnitGraphIndex.get_position
939
bits = value[1:].split(' ')
940
offset, length = int(bits[0]), int(bits[1])
941
pack_readv_requests.append(
942
((offset, length), (key, value[0], references)))
943
# linear scan up the pack to maximum range combining.
944
pack_readv_requests.sort()
945
# split out the readv and the node data.
946
pack_readv = [readv for readv, node in pack_readv_requests]
947
node_vector = [node for readv, node in pack_readv_requests]
948
result.append((index, pack_readv, node_vector))
951
def _revision_node_readv(self, revision_nodes):
952
"""Return the total revisions and the readv's to issue.
954
:param revision_nodes: The revision index contents for the packs being
955
incorporated into the new pack.
956
:return: As per _least_readv_node_readv.
958
return self._least_readv_node_readv(revision_nodes)
961
class KnitReconcilePacker(KnitPacker):
962
"""A packer which regenerates indices etc as it copies.
964
This is used by ``bzr reconcile`` to cause parent text pointers to be
968
def __init__(self, *args, **kwargs):
969
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
970
self._data_changed = False
972
def _process_inventory_lines(self, inv_lines):
973
"""Generate a text key reference map rather for reconciling with."""
974
repo = self._pack_collection.repo
975
refs = repo._serializer._find_text_key_references(inv_lines)
976
self._text_refs = refs
977
# during reconcile we:
978
# - convert unreferenced texts to full texts
979
# - correct texts which reference a text not copied to be full texts
980
# - copy all others as-is but with corrected parents.
981
# - so at this point we don't know enough to decide what becomes a full
983
self._text_filter = None
985
def _copy_text_texts(self):
986
"""generate what texts we should have and then copy."""
987
self.pb.update("Copying content texts", 3)
988
# we have three major tasks here:
989
# 1) generate the ideal index
990
repo = self._pack_collection.repo
991
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
993
self.new_pack.revision_index.iter_all_entries()])
994
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
995
# 2) generate a text_nodes list that contains all the deltas that can
996
# be used as-is, with corrected parents.
1000
NULL_REVISION = _mod_revision.NULL_REVISION
1001
text_index_map, text_nodes = self._get_text_nodes()
1002
for node in text_nodes:
1008
ideal_parents = tuple(ideal_index[node[1]])
1010
discarded_nodes.append(node)
1011
self._data_changed = True
1013
if ideal_parents == (NULL_REVISION,):
1015
if ideal_parents == node[3][0]:
1017
ok_nodes.append(node)
1018
elif ideal_parents[0:1] == node[3][0][0:1]:
1019
# the left most parent is the same, or there are no parents
1020
# today. Either way, we can preserve the representation as
1021
# long as we change the refs to be inserted.
1022
self._data_changed = True
1023
ok_nodes.append((node[0], node[1], node[2],
1024
(ideal_parents, node[3][1])))
1025
self._data_changed = True
1027
# Reinsert this text completely
1028
bad_texts.append((node[1], ideal_parents))
1029
self._data_changed = True
1030
# we're finished with some data.
1033
# 3) bulk copy the ok data
1034
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1035
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1036
self.new_pack.text_index, readv_group_iter, total_items))
1037
# 4) adhoc copy all the other texts.
1038
# We have to topologically insert all texts otherwise we can fail to
1039
# reconcile when parts of a single delta chain are preserved intact,
1040
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1041
# reinserted, and if d3 has incorrect parents it will also be
1042
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1043
# copied), so we will try to delta, but d2 is not currently able to be
1044
# extracted because its basis d1 is not present. Topologically sorting
1045
# addresses this. The following generates a sort for all the texts that
1046
# are being inserted without having to reference the entire text key
1047
# space (we only topo sort the revisions, which is smaller).
1048
topo_order = tsort.topo_sort(ancestors)
1049
rev_order = dict(zip(topo_order, range(len(topo_order))))
1050
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1051
transaction = repo.get_transaction()
1052
file_id_index = GraphIndexPrefixAdapter(
1053
self.new_pack.text_index,
1055
add_nodes_callback=self.new_pack.text_index.add_nodes)
1056
data_access = _DirectPackAccess(
1057
{self.new_pack.text_index:self.new_pack.access_tuple()})
1058
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1059
self.new_pack.access_tuple())
1060
output_texts = KnitVersionedFiles(
1061
_KnitGraphIndex(self.new_pack.text_index,
1062
add_callback=self.new_pack.text_index.add_nodes,
1063
deltas=True, parents=True, is_locked=repo.is_locked),
1064
data_access=data_access, max_delta_chain=200)
1065
for key, parent_keys in bad_texts:
1066
# We refer to the new pack to delta data being output.
1067
# A possible improvement would be to catch errors on short reads
1068
# and only flush then.
1069
self.new_pack.flush()
1071
for parent_key in parent_keys:
1072
if parent_key[0] != key[0]:
1073
# Graph parents must match the fileid
1074
raise errors.BzrError('Mismatched key parent %r:%r' %
1076
parents.append(parent_key[1])
1077
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1078
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1079
output_texts.add_lines(key, parent_keys, text_lines,
1080
random_id=True, check_content=False)
1081
# 5) check that nothing inserted has a reference outside the keyspace.
1082
missing_text_keys = self.new_pack.text_index._external_references()
1083
if missing_text_keys:
1084
raise errors.BzrCheckError('Reference to missing compression parents %r'
1085
% (missing_text_keys,))
1086
self._log_copied_texts()
1088
def _use_pack(self, new_pack):
1089
"""Override _use_pack to check for reconcile having changed content."""
1090
# XXX: we might be better checking this at the copy time.
1091
original_inventory_keys = set()
1092
inv_index = self._pack_collection.inventory_index.combined_index
1093
for entry in inv_index.iter_all_entries():
1094
original_inventory_keys.add(entry[1])
1095
new_inventory_keys = set()
1096
for entry in new_pack.inventory_index.iter_all_entries():
1097
new_inventory_keys.add(entry[1])
1098
if new_inventory_keys != original_inventory_keys:
1099
self._data_changed = True
1100
return new_pack.data_inserted() and self._data_changed
1103
class OptimisingKnitPacker(KnitPacker):
1104
"""A packer which spends more time to create better disk layouts."""
1106
def _revision_node_readv(self, revision_nodes):
1107
"""Return the total revisions and the readv's to issue.
1109
This sort places revisions in topological order with the ancestors
1112
:param revision_nodes: The revision index contents for the packs being
1113
incorporated into the new pack.
1114
:return: As per _least_readv_node_readv.
1116
# build an ancestors dict
1119
for index, key, value, references in revision_nodes:
1120
ancestors[key] = references[0]
1121
by_key[key] = (index, value, references)
1122
order = tsort.topo_sort(ancestors)
1124
# Single IO is pathological, but it will work as a starting point.
1126
for key in reversed(order):
1127
index, value, references = by_key[key]
1128
# ---- KnitGraphIndex.get_position
1129
bits = value[1:].split(' ')
1130
offset, length = int(bits[0]), int(bits[1])
1132
(index, [(offset, length)], [(key, value[0], references)]))
1133
# TODO: combine requests in the same index that are in ascending order.
1134
return total, requests
1136
def open_pack(self):
1137
"""Open a pack for the pack we are creating."""
1138
new_pack = super(OptimisingKnitPacker, self).open_pack()
1139
# Turn on the optimization flags for all the index builders.
1140
new_pack.revision_index.set_optimize(for_size=True)
1141
new_pack.inventory_index.set_optimize(for_size=True)
1142
new_pack.text_index.set_optimize(for_size=True)
1143
new_pack.signature_index.set_optimize(for_size=True)
1147
class KnitRepositoryPackCollection(RepositoryPackCollection):
1148
"""A knit pack collection."""
1150
pack_factory = NewPack
1151
resumed_pack_factory = ResumedPack
1152
normal_packer_class = KnitPacker
1153
optimising_packer_class = OptimisingKnitPacker