516
472
if (self.add_callback is not None and
517
473
getattr(index, 'add_nodes', None) == self.add_callback):
518
474
self.add_callback = None
519
self.data_access.set_writer(None, None, (None, None))
522
class Packer(object):
523
"""Create a pack from packs."""
525
def __init__(self, pack_collection, packs, suffix, revision_ids=None):
528
:param pack_collection: A RepositoryPackCollection object where the
529
new pack is being written to.
530
:param packs: The packs to combine.
531
:param suffix: The suffix to use on the temporary files for the pack.
532
:param revision_ids: Revision ids to limit the pack to.
536
self.revision_ids = revision_ids
537
# The pack object we are creating.
539
self._pack_collection = pack_collection
540
# The index layer keys for the revisions being copied. None for 'all
542
self._revision_keys = None
543
# What text keys to copy. None for 'all texts'. This is set by
544
# _copy_inventory_texts
545
self._text_filter = None
548
def _extra_init(self):
549
"""A template hook to allow extending the constructor trivially."""
551
def pack(self, pb=None):
552
"""Create a new pack by reading data from other packs.
554
This does little more than a bulk copy of data. One key difference
555
is that data with the same item key across multiple packs is elided
556
from the output. The new pack is written into the current pack store
557
along with its indices, and the name added to the pack names. The
558
source packs are not altered and are not required to be in the current
561
:param pb: An optional progress bar to use. A nested bar is created if
563
:return: A Pack object, or None if nothing was copied.
565
# open a pack - using the same name as the last temporary file
566
# - which has already been flushed, so its safe.
567
# XXX: - duplicate code warning with start_write_group; fix before
568
# considering 'done'.
569
if self._pack_collection._new_pack is not None:
570
raise errors.BzrError('call to create_pack_from_packs while '
571
'another pack is being written.')
572
if self.revision_ids is not None:
573
if len(self.revision_ids) == 0:
574
# silly fetch request.
577
self.revision_ids = frozenset(self.revision_ids)
578
self.revision_keys = frozenset((revid,) for revid in
581
self.pb = ui.ui_factory.nested_progress_bar()
585
return self._create_pack_from_packs()
591
"""Open a pack for the pack we are creating."""
592
return NewPack(self._pack_collection._upload_transport,
593
self._pack_collection._index_transport,
594
self._pack_collection._pack_transport, upload_suffix=self.suffix,
595
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
597
def _copy_revision_texts(self):
598
"""Copy revision data to the new pack."""
600
if self.revision_ids:
601
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
604
# select revision keys
605
revision_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
606
self.packs, 'revision_index')[0]
607
revision_nodes = self._pack_collection._index_contents(revision_index_map, revision_keys)
608
# copy revision keys and adjust values
609
self.pb.update("Copying revision texts", 1)
610
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
611
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
612
self.new_pack.revision_index, readv_group_iter, total_items))
613
if 'pack' in debug.debug_flags:
614
mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
615
time.ctime(), self._pack_collection._upload_transport.base,
616
self.new_pack.random_name,
617
self.new_pack.revision_index.key_count(),
618
time.time() - self.new_pack.start_time)
619
self._revision_keys = revision_keys
621
def _copy_inventory_texts(self):
622
"""Copy the inventory texts to the new pack.
624
self._revision_keys is used to determine what inventories to copy.
626
Sets self._text_filter appropriately.
628
# select inventory keys
629
inv_keys = self._revision_keys # currently the same keyspace, and note that
630
# querying for keys here could introduce a bug where an inventory item
631
# is missed, so do not change it to query separately without cross
632
# checking like the text key check below.
633
inventory_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
634
self.packs, 'inventory_index')[0]
635
inv_nodes = self._pack_collection._index_contents(inventory_index_map, inv_keys)
636
# copy inventory keys and adjust values
637
# XXX: Should be a helper function to allow different inv representation
639
self.pb.update("Copying inventory texts", 2)
640
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
641
# Only grab the output lines if we will be processing them
642
output_lines = bool(self.revision_ids)
643
inv_lines = self._copy_nodes_graph(inventory_index_map,
644
self.new_pack._writer, self.new_pack.inventory_index,
645
readv_group_iter, total_items, output_lines=output_lines)
646
if self.revision_ids:
647
self._process_inventory_lines(inv_lines)
649
# eat the iterator to cause it to execute.
651
self._text_filter = None
652
if 'pack' in debug.debug_flags:
653
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
654
time.ctime(), self._pack_collection._upload_transport.base,
655
self.new_pack.random_name,
656
self.new_pack.inventory_index.key_count(),
657
time.time() - self.new_pack.start_time)
659
def _copy_text_texts(self):
661
text_index_map, text_nodes = self._get_text_nodes()
662
if self._text_filter is not None:
663
# We could return the keys copied as part of the return value from
664
# _copy_nodes_graph but this doesn't work all that well with the
665
# need to get line output too, so we check separately, and as we're
666
# going to buffer everything anyway, we check beforehand, which
667
# saves reading knit data over the wire when we know there are
669
text_nodes = set(text_nodes)
670
present_text_keys = set(_node[1] for _node in text_nodes)
671
missing_text_keys = set(self._text_filter) - present_text_keys
672
if missing_text_keys:
673
# TODO: raise a specific error that can handle many missing
675
a_missing_key = missing_text_keys.pop()
676
raise errors.RevisionNotPresent(a_missing_key[1],
678
# copy text keys and adjust values
679
self.pb.update("Copying content texts", 3)
680
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
681
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
682
self.new_pack.text_index, readv_group_iter, total_items))
683
self._log_copied_texts()
685
def _check_references(self):
686
"""Make sure our external refereneces are present."""
687
external_refs = self.new_pack._external_compression_parents_of_texts()
689
index = self._pack_collection.text_index.combined_index
690
found_items = list(index.iter_entries(external_refs))
691
if len(found_items) != len(external_refs):
692
found_keys = set(k for idx, k, refs, value in found_items)
693
missing_items = external_refs - found_keys
694
missing_file_id, missing_revision_id = missing_items.pop()
695
raise errors.RevisionNotPresent(missing_revision_id,
698
def _create_pack_from_packs(self):
699
self.pb.update("Opening pack", 0, 5)
700
self.new_pack = self.open_pack()
701
new_pack = self.new_pack
702
# buffer data - we won't be reading-back during the pack creation and
703
# this makes a significant difference on sftp pushes.
704
new_pack.set_write_cache_size(1024*1024)
705
if 'pack' in debug.debug_flags:
706
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
707
for a_pack in self.packs]
708
if self.revision_ids is not None:
709
rev_count = len(self.revision_ids)
712
mutter('%s: create_pack: creating pack from source packs: '
713
'%s%s %s revisions wanted %s t=0',
714
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
715
plain_pack_list, rev_count)
716
self._copy_revision_texts()
717
self._copy_inventory_texts()
718
self._copy_text_texts()
719
# select signature keys
720
signature_filter = self._revision_keys # same keyspace
721
signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
722
self.packs, 'signature_index')[0]
723
signature_nodes = self._pack_collection._index_contents(signature_index_map,
725
# copy signature keys and adjust values
726
self.pb.update("Copying signature texts", 4)
727
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
728
new_pack.signature_index)
729
if 'pack' in debug.debug_flags:
730
mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
731
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
732
new_pack.signature_index.key_count(),
733
time.time() - new_pack.start_time)
734
self._check_references()
735
if not self._use_pack(new_pack):
738
self.pb.update("Finishing pack", 5)
740
self._pack_collection.allocate(new_pack)
743
def _copy_nodes(self, nodes, index_map, writer, write_index):
744
"""Copy knit nodes between packs with no graph references."""
745
pb = ui.ui_factory.nested_progress_bar()
747
return self._do_copy_nodes(nodes, index_map, writer,
752
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
753
# for record verification
754
knit = KnitVersionedFiles(None, None)
755
# plan a readv on each source pack:
757
nodes = sorted(nodes)
758
# how to map this into knit.py - or knit.py into this?
759
# we don't want the typical knit logic, we want grouping by pack
760
# at this point - perhaps a helper library for the following code
761
# duplication points?
763
for index, key, value in nodes:
764
if index not in request_groups:
765
request_groups[index] = []
766
request_groups[index].append((key, value))
768
pb.update("Copied record", record_index, len(nodes))
769
for index, items in request_groups.iteritems():
770
pack_readv_requests = []
771
for key, value in items:
772
# ---- KnitGraphIndex.get_position
773
bits = value[1:].split(' ')
774
offset, length = int(bits[0]), int(bits[1])
775
pack_readv_requests.append((offset, length, (key, value[0])))
776
# linear scan up the pack
777
pack_readv_requests.sort()
779
transport, path = index_map[index]
780
reader = pack.make_readv_reader(transport, path,
781
[offset[0:2] for offset in pack_readv_requests])
782
for (names, read_func), (_1, _2, (key, eol_flag)) in \
783
izip(reader.iter_records(), pack_readv_requests):
784
raw_data = read_func(None)
785
# check the header only
786
df, _ = knit._parse_record_header(key, raw_data)
788
pos, size = writer.add_bytes_record(raw_data, names)
789
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
790
pb.update("Copied record", record_index)
793
def _copy_nodes_graph(self, index_map, writer, write_index,
794
readv_group_iter, total_items, output_lines=False):
795
"""Copy knit nodes between packs.
797
:param output_lines: Return lines present in the copied data as
798
an iterator of line,version_id.
800
pb = ui.ui_factory.nested_progress_bar()
802
for result in self._do_copy_nodes_graph(index_map, writer,
803
write_index, output_lines, pb, readv_group_iter, total_items):
806
# Python 2.4 does not permit try:finally: in a generator.
812
def _do_copy_nodes_graph(self, index_map, writer, write_index,
813
output_lines, pb, readv_group_iter, total_items):
814
# for record verification
815
knit = KnitVersionedFiles(None, None)
816
# for line extraction when requested (inventories only)
818
factory = KnitPlainFactory()
820
pb.update("Copied record", record_index, total_items)
821
for index, readv_vector, node_vector in readv_group_iter:
823
transport, path = index_map[index]
824
reader = pack.make_readv_reader(transport, path, readv_vector)
825
for (names, read_func), (key, eol_flag, references) in \
826
izip(reader.iter_records(), node_vector):
827
raw_data = read_func(None)
829
# read the entire thing
830
content, _ = knit._parse_record(key[-1], raw_data)
831
if len(references[-1]) == 0:
832
line_iterator = factory.get_fulltext_content(content)
834
line_iterator = factory.get_linedelta_content(content)
835
for line in line_iterator:
838
# check the header only
839
df, _ = knit._parse_record_header(key, raw_data)
841
pos, size = writer.add_bytes_record(raw_data, names)
842
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
843
pb.update("Copied record", record_index)
846
def _get_text_nodes(self):
847
text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
848
self.packs, 'text_index')[0]
849
return text_index_map, self._pack_collection._index_contents(text_index_map,
852
def _least_readv_node_readv(self, nodes):
853
"""Generate request groups for nodes using the least readv's.
855
:param nodes: An iterable of graph index nodes.
856
:return: Total node count and an iterator of the data needed to perform
857
readvs to obtain the data for nodes. Each item yielded by the
858
iterator is a tuple with:
859
index, readv_vector, node_vector. readv_vector is a list ready to
860
hand to the transport readv method, and node_vector is a list of
861
(key, eol_flag, references) for the the node retrieved by the
862
matching readv_vector.
864
# group by pack so we do one readv per pack
865
nodes = sorted(nodes)
868
for index, key, value, references in nodes:
869
if index not in request_groups:
870
request_groups[index] = []
871
request_groups[index].append((key, value, references))
873
for index, items in request_groups.iteritems():
874
pack_readv_requests = []
875
for key, value, references in items:
876
# ---- KnitGraphIndex.get_position
877
bits = value[1:].split(' ')
878
offset, length = int(bits[0]), int(bits[1])
879
pack_readv_requests.append(
880
((offset, length), (key, value[0], references)))
881
# linear scan up the pack to maximum range combining.
882
pack_readv_requests.sort()
883
# split out the readv and the node data.
884
pack_readv = [readv for readv, node in pack_readv_requests]
885
node_vector = [node for readv, node in pack_readv_requests]
886
result.append((index, pack_readv, node_vector))
889
def _log_copied_texts(self):
890
if 'pack' in debug.debug_flags:
891
mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
892
time.ctime(), self._pack_collection._upload_transport.base,
893
self.new_pack.random_name,
894
self.new_pack.text_index.key_count(),
895
time.time() - self.new_pack.start_time)
897
def _process_inventory_lines(self, inv_lines):
898
"""Use up the inv_lines generator and setup a text key filter."""
899
repo = self._pack_collection.repo
900
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
901
inv_lines, self.revision_keys)
903
for fileid, file_revids in fileid_revisions.iteritems():
904
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
905
self._text_filter = text_filter
907
def _revision_node_readv(self, revision_nodes):
908
"""Return the total revisions and the readv's to issue.
910
:param revision_nodes: The revision index contents for the packs being
911
incorporated into the new pack.
912
:return: As per _least_readv_node_readv.
914
return self._least_readv_node_readv(revision_nodes)
916
def _use_pack(self, new_pack):
917
"""Return True if new_pack should be used.
919
:param new_pack: The pack that has just been created.
920
:return: True if the pack should be used.
922
return new_pack.data_inserted()
925
class OptimisingPacker(Packer):
926
"""A packer which spends more time to create better disk layouts."""
928
def _revision_node_readv(self, revision_nodes):
929
"""Return the total revisions and the readv's to issue.
931
This sort places revisions in topological order with the ancestors
934
:param revision_nodes: The revision index contents for the packs being
935
incorporated into the new pack.
936
:return: As per _least_readv_node_readv.
938
# build an ancestors dict
941
for index, key, value, references in revision_nodes:
942
ancestors[key] = references[0]
943
by_key[key] = (index, value, references)
944
order = tsort.topo_sort(ancestors)
946
# Single IO is pathological, but it will work as a starting point.
948
for key in reversed(order):
949
index, value, references = by_key[key]
950
# ---- KnitGraphIndex.get_position
951
bits = value[1:].split(' ')
952
offset, length = int(bits[0]), int(bits[1])
954
(index, [(offset, length)], [(key, value[0], references)]))
955
# TODO: combine requests in the same index that are in ascending order.
956
return total, requests
959
class ReconcilePacker(Packer):
960
"""A packer which regenerates indices etc as it copies.
962
This is used by ``bzr reconcile`` to cause parent text pointers to be
966
def _extra_init(self):
967
self._data_changed = False
969
def _process_inventory_lines(self, inv_lines):
970
"""Generate a text key reference map rather for reconciling with."""
971
repo = self._pack_collection.repo
972
refs = repo._find_text_key_references_from_xml_inventory_lines(
974
self._text_refs = refs
975
# during reconcile we:
976
# - convert unreferenced texts to full texts
977
# - correct texts which reference a text not copied to be full texts
978
# - copy all others as-is but with corrected parents.
979
# - so at this point we don't know enough to decide what becomes a full
981
self._text_filter = None
983
def _copy_text_texts(self):
984
"""generate what texts we should have and then copy."""
985
self.pb.update("Copying content texts", 3)
986
# we have three major tasks here:
987
# 1) generate the ideal index
988
repo = self._pack_collection.repo
989
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
991
self.new_pack.revision_index.iter_all_entries()])
992
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
993
# 2) generate a text_nodes list that contains all the deltas that can
994
# be used as-is, with corrected parents.
998
NULL_REVISION = _mod_revision.NULL_REVISION
999
text_index_map, text_nodes = self._get_text_nodes()
1000
for node in text_nodes:
1006
ideal_parents = tuple(ideal_index[node[1]])
1008
discarded_nodes.append(node)
1009
self._data_changed = True
1011
if ideal_parents == (NULL_REVISION,):
1013
if ideal_parents == node[3][0]:
1015
ok_nodes.append(node)
1016
elif ideal_parents[0:1] == node[3][0][0:1]:
1017
# the left most parent is the same, or there are no parents
1018
# today. Either way, we can preserve the representation as
1019
# long as we change the refs to be inserted.
1020
self._data_changed = True
1021
ok_nodes.append((node[0], node[1], node[2],
1022
(ideal_parents, node[3][1])))
1023
self._data_changed = True
1025
# Reinsert this text completely
1026
bad_texts.append((node[1], ideal_parents))
1027
self._data_changed = True
1028
# we're finished with some data.
1031
# 3) bulk copy the ok data
1032
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1033
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1034
self.new_pack.text_index, readv_group_iter, total_items))
1035
# 4) adhoc copy all the other texts.
1036
# We have to topologically insert all texts otherwise we can fail to
1037
# reconcile when parts of a single delta chain are preserved intact,
1038
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1039
# reinserted, and if d3 has incorrect parents it will also be
1040
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1041
# copied), so we will try to delta, but d2 is not currently able to be
1042
# extracted because it's basis d1 is not present. Topologically sorting
1043
# addresses this. The following generates a sort for all the texts that
1044
# are being inserted without having to reference the entire text key
1045
# space (we only topo sort the revisions, which is smaller).
1046
topo_order = tsort.topo_sort(ancestors)
1047
rev_order = dict(zip(topo_order, range(len(topo_order))))
1048
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
1049
transaction = repo.get_transaction()
1050
file_id_index = GraphIndexPrefixAdapter(
1051
self.new_pack.text_index,
1053
add_nodes_callback=self.new_pack.text_index.add_nodes)
1054
data_access = _DirectPackAccess(
1055
{self.new_pack.text_index:self.new_pack.access_tuple()})
1056
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1057
self.new_pack.access_tuple())
1058
output_texts = KnitVersionedFiles(
1059
_KnitGraphIndex(self.new_pack.text_index,
1060
add_callback=self.new_pack.text_index.add_nodes,
1061
deltas=True, parents=True, is_locked=repo.is_locked),
1062
data_access=data_access, max_delta_chain=200)
1063
for key, parent_keys in bad_texts:
1064
# We refer to the new pack to delta data being output.
1065
# A possible improvement would be to catch errors on short reads
1066
# and only flush then.
1067
self.new_pack.flush()
1069
for parent_key in parent_keys:
1070
if parent_key[0] != key[0]:
1071
# Graph parents must match the fileid
1072
raise errors.BzrError('Mismatched key parent %r:%r' %
1074
parents.append(parent_key[1])
1075
text_lines = split_lines(repo.texts.get_record_stream(
1076
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1077
output_texts.add_lines(key, parent_keys, text_lines,
1078
random_id=True, check_content=False)
1079
# 5) check that nothing inserted has a reference outside the keyspace.
1080
missing_text_keys = self.new_pack._external_compression_parents_of_texts()
1081
if missing_text_keys:
1082
raise errors.BzrError('Reference to missing compression parents %r'
1083
% (missing_text_keys,))
1084
self._log_copied_texts()
1086
def _use_pack(self, new_pack):
1087
"""Override _use_pack to check for reconcile having changed content."""
1088
# XXX: we might be better checking this at the copy time.
1089
original_inventory_keys = set()
1090
inv_index = self._pack_collection.inventory_index.combined_index
1091
for entry in inv_index.iter_all_entries():
1092
original_inventory_keys.add(entry[1])
1093
new_inventory_keys = set()
1094
for entry in new_pack.inventory_index.iter_all_entries():
1095
new_inventory_keys.add(entry[1])
1096
if new_inventory_keys != original_inventory_keys:
1097
self._data_changed = True
1098
return new_pack.data_inserted() and self._data_changed
475
self.knit_access.set_writer(None, None, (None, None))
1101
478
class RepositoryPackCollection(object):
1102
"""Management of packs within a repository.
1104
:ivar _names: map of {pack_name: (index_size,)}
479
"""Management of packs within a repository."""
1107
481
def __init__(self, repo, transport, index_transport, upload_transport,
1108
482
pack_transport):
1209
594
self._execute_pack_operations(pack_operations)
1212
def _execute_pack_operations(self, pack_operations, _packer_class=Packer):
597
def create_pack_from_packs(self, packs, suffix, revision_ids=None):
598
"""Create a new pack by reading data from other packs.
600
This does little more than a bulk copy of data. One key difference
601
is that data with the same item key across multiple packs is elided
602
from the output. The new pack is written into the current pack store
603
along with its indices, and the name added to the pack names. The
604
source packs are not altered and are not required to be in the current
607
:param packs: An iterable of Packs to combine.
608
:param revision_ids: Either None, to copy all data, or a list
609
of revision_ids to limit the copied data to the data they
611
:return: A Pack object, or None if nothing was copied.
613
# open a pack - using the same name as the last temporary file
614
# - which has already been flushed, so its safe.
615
# XXX: - duplicate code warning with start_write_group; fix before
616
# considering 'done'.
617
if self._new_pack is not None:
618
raise errors.BzrError('call to create_pack_from_packs while '
619
'another pack is being written.')
620
if revision_ids is not None and len(revision_ids) == 0:
621
# silly fetch request.
623
new_pack = NewPack(self._upload_transport, self._index_transport,
624
self._pack_transport, upload_suffix=suffix)
625
# buffer data - we won't be reading-back during the pack creation and
626
# this makes a significant difference on sftp pushes.
627
new_pack.set_write_cache_size(1024*1024)
628
if 'pack' in debug.debug_flags:
629
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
631
if revision_ids is not None:
632
rev_count = len(revision_ids)
635
mutter('%s: create_pack: creating pack from source packs: '
636
'%s%s %s revisions wanted %s t=0',
637
time.ctime(), self._upload_transport.base, new_pack.random_name,
638
plain_pack_list, rev_count)
641
revision_keys = [(revision_id,) for revision_id in revision_ids]
645
# select revision keys
646
revision_index_map = self._packs_list_to_pack_map_and_index_list(
647
packs, 'revision_index')[0]
648
revision_nodes = self._index_contents(revision_index_map, revision_keys)
649
# copy revision keys and adjust values
650
list(self._copy_nodes_graph(revision_nodes, revision_index_map,
651
new_pack._writer, new_pack.revision_index))
652
if 'pack' in debug.debug_flags:
653
mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
654
time.ctime(), self._upload_transport.base, new_pack.random_name,
655
new_pack.revision_index.key_count(),
656
time.time() - new_pack.start_time)
657
# select inventory keys
658
inv_keys = revision_keys # currently the same keyspace, and note that
659
# querying for keys here could introduce a bug where an inventory item
660
# is missed, so do not change it to query separately without cross
661
# checking like the text key check below.
662
inventory_index_map = self._packs_list_to_pack_map_and_index_list(
663
packs, 'inventory_index')[0]
664
inv_nodes = self._index_contents(inventory_index_map, inv_keys)
665
# copy inventory keys and adjust values
666
# XXX: Should be a helper function to allow different inv representation
668
inv_lines = self._copy_nodes_graph(inv_nodes, inventory_index_map,
669
new_pack._writer, new_pack.inventory_index, output_lines=True)
671
fileid_revisions = self.repo._find_file_ids_from_xml_inventory_lines(
672
inv_lines, revision_ids)
674
for fileid, file_revids in fileid_revisions.iteritems():
676
[(fileid, file_revid) for file_revid in file_revids])
678
# eat the iterator to cause it to execute.
681
if 'pack' in debug.debug_flags:
682
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
683
time.ctime(), self._upload_transport.base, new_pack.random_name,
684
new_pack.inventory_index.key_count(),
685
time.time() - new_pack.start_time)
687
text_index_map = self._packs_list_to_pack_map_and_index_list(
688
packs, 'text_index')[0]
689
text_nodes = self._index_contents(text_index_map, text_filter)
690
if text_filter is not None:
691
# We could return the keys copied as part of the return value from
692
# _copy_nodes_graph but this doesn't work all that well with the
693
# need to get line output too, so we check separately, and as we're
694
# going to buffer everything anyway, we check beforehand, which
695
# saves reading knit data over the wire when we know there are
697
text_nodes = set(text_nodes)
698
present_text_keys = set(_node[1] for _node in text_nodes)
699
missing_text_keys = set(text_filter) - present_text_keys
700
if missing_text_keys:
701
# TODO: raise a specific error that can handle many missing
703
a_missing_key = missing_text_keys.pop()
704
raise errors.RevisionNotPresent(a_missing_key[1],
706
# copy text keys and adjust values
707
list(self._copy_nodes_graph(text_nodes, text_index_map,
708
new_pack._writer, new_pack.text_index))
709
if 'pack' in debug.debug_flags:
710
mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
711
time.ctime(), self._upload_transport.base, new_pack.random_name,
712
new_pack.text_index.key_count(),
713
time.time() - new_pack.start_time)
714
# select signature keys
715
signature_filter = revision_keys # same keyspace
716
signature_index_map = self._packs_list_to_pack_map_and_index_list(
717
packs, 'signature_index')[0]
718
signature_nodes = self._index_contents(signature_index_map,
720
# copy signature keys and adjust values
721
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
722
new_pack.signature_index)
723
if 'pack' in debug.debug_flags:
724
mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
725
time.ctime(), self._upload_transport.base, new_pack.random_name,
726
new_pack.signature_index.key_count(),
727
time.time() - new_pack.start_time)
728
if not new_pack.data_inserted():
732
self.allocate(new_pack)
735
def _execute_pack_operations(self, pack_operations):
1213
736
"""Execute a series of pack operations.
1215
738
:param pack_operations: A list of [revision_count, packs_to_combine].
1216
:param _packer_class: The class of packer to use (default: Packer).
1219
741
for revision_count, packs in pack_operations:
1220
742
# we may have no-ops from the setup logic
1221
743
if len(packs) == 0:
1223
_packer_class(self, packs, '.autopack').pack()
745
# have a progress bar?
746
self.create_pack_from_packs(packs, '.autopack')
1224
747
for pack in packs:
1225
748
self._remove_pack_from_memory(pack)
1226
749
# record the newly available packs and stop advertising the old
1228
self._save_pack_names(clear_obsolete_packs=True)
751
self._save_pack_names()
1229
752
# Move the old packs out of the way now they are no longer referenced.
1230
753
for revision_count, packs in pack_operations:
1231
754
self._obsolete_packs(packs)
1646
1247
self._save_pack_names()
1648
1249
self._new_pack.abort()
1649
self._new_pack = None
1650
1250
self.repo._text_knit = None
1253
class KnitPackRevisionStore(KnitRevisionStore):
1254
"""An object to adapt access from RevisionStore's to use KnitPacks.
1256
This class works by replacing the original RevisionStore.
1257
We need to do this because the KnitPackRevisionStore is less
1258
isolated in its layering - it uses services from the repo.
1261
def __init__(self, repo, transport, revisionstore):
1262
"""Create a KnitPackRevisionStore on repo with revisionstore.
1264
This will store its state in the Repository, use the
1265
indices to provide a KnitGraphIndex,
1266
and at the end of transactions write new indices.
1268
KnitRevisionStore.__init__(self, revisionstore.versioned_file_store)
1270
self._serializer = revisionstore._serializer
1271
self.transport = transport
1273
def get_revision_file(self, transaction):
1274
"""Get the revision versioned file object."""
1275
if getattr(self.repo, '_revision_knit', None) is not None:
1276
return self.repo._revision_knit
1277
self.repo._pack_collection.ensure_loaded()
1278
add_callback = self.repo._pack_collection.revision_index.add_callback
1279
# setup knit specific objects
1280
knit_index = KnitGraphIndex(
1281
self.repo._pack_collection.revision_index.combined_index,
1282
add_callback=add_callback)
1283
self.repo._revision_knit = knit.KnitVersionedFile(
1284
'revisions', self.transport.clone('..'),
1285
self.repo.control_files._file_mode,
1286
create=False, access_mode=self.repo._access_mode(),
1287
index=knit_index, delta=False, factory=knit.KnitPlainFactory(),
1288
access_method=self.repo._pack_collection.revision_index.knit_access)
1289
return self.repo._revision_knit
1291
def get_signature_file(self, transaction):
1292
"""Get the signature versioned file object."""
1293
if getattr(self.repo, '_signature_knit', None) is not None:
1294
return self.repo._signature_knit
1295
self.repo._pack_collection.ensure_loaded()
1296
add_callback = self.repo._pack_collection.signature_index.add_callback
1297
# setup knit specific objects
1298
knit_index = KnitGraphIndex(
1299
self.repo._pack_collection.signature_index.combined_index,
1300
add_callback=add_callback, parents=False)
1301
self.repo._signature_knit = knit.KnitVersionedFile(
1302
'signatures', self.transport.clone('..'),
1303
self.repo.control_files._file_mode,
1304
create=False, access_mode=self.repo._access_mode(),
1305
index=knit_index, delta=False, factory=knit.KnitPlainFactory(),
1306
access_method=self.repo._pack_collection.signature_index.knit_access)
1307
return self.repo._signature_knit
1310
class KnitPackTextStore(VersionedFileStore):
1311
"""Presents a TextStore abstraction on top of packs.
1313
This class works by replacing the original VersionedFileStore.
1314
We need to do this because the KnitPackRevisionStore is less
1315
isolated in its layering - it uses services from the repo and shares them
1316
with all the data written in a single write group.
1319
def __init__(self, repo, transport, weavestore):
1320
"""Create a KnitPackTextStore on repo with weavestore.
1322
This will store its state in the Repository, use the
1323
indices FileNames to provide a KnitGraphIndex,
1324
and at the end of transactions write new indices.
1326
# don't call base class constructor - it's not suitable.
1327
# no transient data stored in the transaction
1329
self._precious = False
1331
self.transport = transport
1332
self.weavestore = weavestore
1333
# XXX for check() which isn't updated yet
1334
self._transport = weavestore._transport
1336
def get_weave_or_empty(self, file_id, transaction):
1337
"""Get a 'Knit' backed by the .tix indices.
1339
The transaction parameter is ignored.
1341
self.repo._pack_collection.ensure_loaded()
1342
add_callback = self.repo._pack_collection.text_index.add_callback
1343
# setup knit specific objects
1344
file_id_index = GraphIndexPrefixAdapter(
1345
self.repo._pack_collection.text_index.combined_index,
1346
(file_id, ), 1, add_nodes_callback=add_callback)
1347
knit_index = KnitGraphIndex(file_id_index,
1348
add_callback=file_id_index.add_nodes,
1349
deltas=True, parents=True)
1350
return knit.KnitVersionedFile('text:' + file_id,
1351
self.transport.clone('..'),
1354
access_method=self.repo._pack_collection.text_index.knit_access,
1355
factory=knit.KnitPlainFactory())
1357
get_weave = get_weave_or_empty
1360
"""Generate a list of the fileids inserted, for use by check."""
1361
self.repo._pack_collection.ensure_loaded()
1363
for index, key, value, refs in \
1364
self.repo._pack_collection.text_index.combined_index.iter_all_entries():
1369
class InventoryKnitThunk(object):
1370
"""An object to manage thunking get_inventory_weave to pack based knits."""
1372
def __init__(self, repo, transport):
1373
"""Create an InventoryKnitThunk for repo at transport.
1375
This will store its state in the Repository, use the
1376
indices FileNames to provide a KnitGraphIndex,
1377
and at the end of transactions write a new index..
1380
self.transport = transport
1382
def get_weave(self):
1383
"""Get a 'Knit' that contains inventory data."""
1384
self.repo._pack_collection.ensure_loaded()
1385
add_callback = self.repo._pack_collection.inventory_index.add_callback
1386
# setup knit specific objects
1387
knit_index = KnitGraphIndex(
1388
self.repo._pack_collection.inventory_index.combined_index,
1389
add_callback=add_callback, deltas=True, parents=True)
1390
return knit.KnitVersionedFile(
1391
'inventory', self.transport.clone('..'),
1392
self.repo.control_files._file_mode,
1393
create=False, access_mode=self.repo._access_mode(),
1394
index=knit_index, delta=True, factory=knit.KnitPlainFactory(),
1395
access_method=self.repo._pack_collection.inventory_index.knit_access)
1653
1398
class KnitPackRepository(KnitRepository):
1654
"""Repository with knit objects stored inside pack containers.
1656
The layering for a KnitPackRepository is:
1658
Graph | HPSS | Repository public layer |
1659
===================================================
1660
Tuple based apis below, string based, and key based apis above
1661
---------------------------------------------------
1663
Provides .texts, .revisions etc
1664
This adapts the N-tuple keys to physical knit records which only have a
1665
single string identifier (for historical reasons), which in older formats
1666
was always the revision_id, and in the mapped code for packs is always
1667
the last element of key tuples.
1668
---------------------------------------------------
1670
A separate GraphIndex is used for each of the
1671
texts/inventories/revisions/signatures contained within each individual
1672
pack file. The GraphIndex layer works in N-tuples and is unaware of any
1674
===================================================
1678
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1399
"""Experimental graph-knit using repository."""
1401
def __init__(self, _format, a_bzrdir, control_files, _revision_store,
1402
control_store, text_store, _commit_builder_class, _serializer):
1680
1403
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
1681
_commit_builder_class, _serializer)
1682
index_transport = self._transport.clone('indices')
1683
self._pack_collection = RepositoryPackCollection(self, self._transport,
1404
_revision_store, control_store, text_store, _commit_builder_class,
1406
index_transport = control_files._transport.clone('indices')
1407
self._pack_collection = RepositoryPackCollection(self, control_files._transport,
1684
1408
index_transport,
1685
self._transport.clone('upload'),
1686
self._transport.clone('packs'))
1687
self.inventories = KnitVersionedFiles(
1688
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
1689
add_callback=self._pack_collection.inventory_index.add_callback,
1690
deltas=True, parents=True, is_locked=self.is_locked),
1691
data_access=self._pack_collection.inventory_index.data_access,
1692
max_delta_chain=200)
1693
self.revisions = KnitVersionedFiles(
1694
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
1695
add_callback=self._pack_collection.revision_index.add_callback,
1696
deltas=False, parents=True, is_locked=self.is_locked),
1697
data_access=self._pack_collection.revision_index.data_access,
1699
self.signatures = KnitVersionedFiles(
1700
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
1701
add_callback=self._pack_collection.signature_index.add_callback,
1702
deltas=False, parents=False, is_locked=self.is_locked),
1703
data_access=self._pack_collection.signature_index.data_access,
1705
self.texts = KnitVersionedFiles(
1706
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
1707
add_callback=self._pack_collection.text_index.add_callback,
1708
deltas=True, parents=True, is_locked=self.is_locked),
1709
data_access=self._pack_collection.text_index.data_access,
1710
max_delta_chain=200)
1409
control_files._transport.clone('upload'),
1410
control_files._transport.clone('packs'))
1411
self._revision_store = KnitPackRevisionStore(self, index_transport, self._revision_store)
1412
self.weave_store = KnitPackTextStore(self, index_transport, self.weave_store)
1413
self._inv_thunk = InventoryKnitThunk(self, index_transport)
1711
1414
# True when the repository object is 'write locked' (as opposed to the
1712
1415
# physical lock only taken out around changes to the pack-names list.)
1713
1416
# Another way to represent this would be a decorator around the control
2017
1727
def get_format_description(self):
2018
1728
"""See RepositoryFormat.get_format_description()."""
2019
1729
return "Packs containing knits with subtree support\n"
2022
class RepositoryFormatKnitPack4(RepositoryFormatPack):
2023
"""A rich-root, no subtrees parameterized Pack repository.
2025
This repository format uses the xml6 serializer to get:
2026
- support for recording full info about the tree root
2028
This format was introduced in 1.0.
2031
repository_class = KnitPackRepository
2032
_commit_builder_class = PackRootCommitBuilder
2033
rich_root_data = True
2034
supports_tree_reference = False
2035
_serializer = xml6.serializer_v6
2037
def _get_matching_bzrdir(self):
2038
return bzrdir.format_registry.make_bzrdir(
2041
def _ignore_setting_bzrdir(self, format):
2044
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2046
def check_conversion_target(self, target_format):
2047
if not target_format.rich_root_data:
2048
raise errors.BadConversionTarget(
2049
'Does not support rich root data.', target_format)
2051
def get_format_string(self):
2052
"""See RepositoryFormat.get_format_string()."""
2053
return ("Bazaar pack repository format 1 with rich root"
2054
" (needs bzr 1.0)\n")
2056
def get_format_description(self):
2057
"""See RepositoryFormat.get_format_description()."""
2058
return "Packs containing knits with rich root support\n"
2061
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2062
"""Repository that supports external references to allow stacking.
2066
Supports external lookups, which results in non-truncated ghosts after
2067
reconcile compared to pack-0.92 formats.
2070
repository_class = KnitPackRepository
2071
_commit_builder_class = PackCommitBuilder
2072
_serializer = xml5.serializer_v5
2073
supports_external_lookups = True
2075
def _get_matching_bzrdir(self):
2076
return bzrdir.format_registry.make_bzrdir('development1')
2078
def _ignore_setting_bzrdir(self, format):
2081
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2083
def get_format_string(self):
2084
"""See RepositoryFormat.get_format_string()."""
2085
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2087
def get_format_description(self):
2088
"""See RepositoryFormat.get_format_description()."""
2089
return "Packs 5 (adds stacking support, requires bzr 1.6)"
2091
def check_conversion_target(self, target_format):
2095
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2096
"""A repository with rich roots and stacking.
2098
New in release 1.6.1.
2100
Supports stacking on other repositories, allowing data to be accessed
2101
without being stored locally.
2104
repository_class = KnitPackRepository
2105
_commit_builder_class = PackRootCommitBuilder
2106
rich_root_data = True
2107
supports_tree_reference = False # no subtrees
2108
_serializer = xml6.serializer_v6
2109
supports_external_lookups = True
2111
def _get_matching_bzrdir(self):
2112
return bzrdir.format_registry.make_bzrdir(
2115
def _ignore_setting_bzrdir(self, format):
2118
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2120
def check_conversion_target(self, target_format):
2121
if not target_format.rich_root_data:
2122
raise errors.BadConversionTarget(
2123
'Does not support rich root data.', target_format)
2125
def get_format_string(self):
2126
"""See RepositoryFormat.get_format_string()."""
2127
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2129
def get_format_description(self):
2130
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
2133
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
2134
"""A repository with rich roots and external references.
2138
Supports external lookups, which results in non-truncated ghosts after
2139
reconcile compared to pack-0.92 formats.
2141
This format was deprecated because the serializer it uses accidentally
2142
supported subtrees, when the format was not intended to. This meant that
2143
someone could accidentally fetch from an incorrect repository.
2146
repository_class = KnitPackRepository
2147
_commit_builder_class = PackRootCommitBuilder
2148
rich_root_data = True
2149
supports_tree_reference = False # no subtrees
2150
_serializer = xml7.serializer_v7
2152
supports_external_lookups = True
2154
def _get_matching_bzrdir(self):
2155
return bzrdir.format_registry.make_bzrdir(
2156
'development1-subtree')
2158
def _ignore_setting_bzrdir(self, format):
2161
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2163
def check_conversion_target(self, target_format):
2164
if not target_format.rich_root_data:
2165
raise errors.BadConversionTarget(
2166
'Does not support rich root data.', target_format)
2168
def get_format_string(self):
2169
"""See RepositoryFormat.get_format_string()."""
2170
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2172
def get_format_description(self):
2173
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
2177
class RepositoryFormatPackDevelopment1(RepositoryFormatPack):
2178
"""A no-subtrees development repository.
2180
This format should be retained until the second release after bzr 1.5.
2182
Supports external lookups, which results in non-truncated ghosts after
2183
reconcile compared to pack-0.92 formats.
2186
repository_class = KnitPackRepository
2187
_commit_builder_class = PackCommitBuilder
2188
_serializer = xml5.serializer_v5
2189
supports_external_lookups = True
2191
def _get_matching_bzrdir(self):
2192
return bzrdir.format_registry.make_bzrdir('development1')
2194
def _ignore_setting_bzrdir(self, format):
2197
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2199
def get_format_string(self):
2200
"""See RepositoryFormat.get_format_string()."""
2201
return "Bazaar development format 1 (needs bzr.dev from before 1.6)\n"
2203
def get_format_description(self):
2204
"""See RepositoryFormat.get_format_description()."""
2205
return ("Development repository format, currently the same as "
2206
"pack-0.92 with external reference support.\n")
2208
def check_conversion_target(self, target_format):
2212
class RepositoryFormatPackDevelopment1Subtree(RepositoryFormatPack):
2213
"""A subtrees development repository.
2215
This format should be retained until the second release after bzr 1.5.
2217
Supports external lookups, which results in non-truncated ghosts after
2218
reconcile compared to pack-0.92 formats.
2221
repository_class = KnitPackRepository
2222
_commit_builder_class = PackRootCommitBuilder
2223
rich_root_data = True
2224
supports_tree_reference = True
2225
_serializer = xml7.serializer_v7
2226
supports_external_lookups = True
2228
def _get_matching_bzrdir(self):
2229
return bzrdir.format_registry.make_bzrdir(
2230
'development1-subtree')
2232
def _ignore_setting_bzrdir(self, format):
2235
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2237
def check_conversion_target(self, target_format):
2238
if not target_format.rich_root_data:
2239
raise errors.BadConversionTarget(
2240
'Does not support rich root data.', target_format)
2241
if not getattr(target_format, 'supports_tree_reference', False):
2242
raise errors.BadConversionTarget(
2243
'Does not support nested trees', target_format)
2245
def get_format_string(self):
2246
"""See RepositoryFormat.get_format_string()."""
2247
return ("Bazaar development format 1 with subtree support "
2248
"(needs bzr.dev from before 1.6)\n")
2250
def get_format_description(self):
2251
"""See RepositoryFormat.get_format_description()."""
2252
return ("Development repository format, currently the same as "
2253
"pack-0.92-subtree with external reference support.\n")