16
16
# You should have received a copy of the GNU General Public License
17
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
22
from bzrlib.lazy_import import lazy_import
29
23
lazy_import(globals(), """
32
25
from bzrlib import (
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
33
from bzrlib.graph import Graph
46
34
from bzrlib.transport.memory import MemoryTransport
48
from bzrlib.registry import Registry
37
from cStringIO import StringIO
39
from bzrlib.inter import InterObject
40
from bzrlib.symbol_versioning import *
49
41
from bzrlib.textmerge import TextMerge
50
from bzrlib import bencode
53
adapter_registry = Registry()
54
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
55
'DeltaPlainToFullText')
56
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
58
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
59
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
60
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
61
'bzrlib.knit', 'DeltaAnnotatedToFullText')
62
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
63
'bzrlib.knit', 'FTAnnotatedToUnannotated')
64
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
65
'bzrlib.knit', 'FTAnnotatedToFullText')
66
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
67
# 'bzrlib.knit', 'FTAnnotatedToChunked')
70
class ContentFactory(object):
71
"""Abstract interface for insertion and retrieval from a VersionedFile.
73
:ivar sha1: None, or the sha1 of the content fulltext.
74
:ivar storage_kind: The native storage kind of this factory. One of
75
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
76
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
77
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
78
:ivar key: The key of this content. Each key is a tuple with a single
80
:ivar parents: A tuple of parent keys for self.key. If the object has
81
no parent information, None (as opposed to () for an empty list of
86
"""Create a ContentFactory."""
88
self.storage_kind = None
93
class ChunkedContentFactory(ContentFactory):
94
"""Static data content factory.
96
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
97
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
98
satisfies this, as does a list of lines.
100
:ivar sha1: None, or the sha1 of the content fulltext.
101
:ivar storage_kind: The native storage kind of this factory. Always
103
:ivar key: The key of this content. Each key is a tuple with a single
105
:ivar parents: A tuple of parent keys for self.key. If the object has
106
no parent information, None (as opposed to () for an empty list of
110
def __init__(self, key, parents, sha1, chunks):
111
"""Create a ContentFactory."""
113
self.storage_kind = 'chunked'
115
self.parents = parents
116
self._chunks = chunks
118
def get_bytes_as(self, storage_kind):
119
if storage_kind == 'chunked':
121
elif storage_kind == 'fulltext':
122
return ''.join(self._chunks)
123
raise errors.UnavailableRepresentation(self.key, storage_kind,
127
class FulltextContentFactory(ContentFactory):
128
"""Static data content factory.
130
This takes a fulltext when created and just returns that during
131
get_bytes_as('fulltext').
133
:ivar sha1: None, or the sha1 of the content fulltext.
134
:ivar storage_kind: The native storage kind of this factory. Always
136
:ivar key: The key of this content. Each key is a tuple with a single
138
:ivar parents: A tuple of parent keys for self.key. If the object has
139
no parent information, None (as opposed to () for an empty list of
143
def __init__(self, key, parents, sha1, text):
144
"""Create a ContentFactory."""
146
self.storage_kind = 'fulltext'
148
self.parents = parents
151
def get_bytes_as(self, storage_kind):
152
if storage_kind == self.storage_kind:
154
elif storage_kind == 'chunked':
156
raise errors.UnavailableRepresentation(self.key, storage_kind,
160
class AbsentContentFactory(ContentFactory):
161
"""A placeholder content factory for unavailable texts.
164
:ivar storage_kind: 'absent'.
165
:ivar key: The key of this content. Each key is a tuple with a single
170
def __init__(self, key):
171
"""Create a ContentFactory."""
173
self.storage_kind = 'absent'
177
def get_bytes_as(self, storage_kind):
178
raise ValueError('A request was made for key: %s, but that'
179
' content is not available, and the calling'
180
' code does not handle if it is missing.'
184
class AdapterFactory(ContentFactory):
185
"""A content factory to adapt between key prefix's."""
187
def __init__(self, key, parents, adapted):
188
"""Create an adapter factory instance."""
190
self.parents = parents
191
self._adapted = adapted
193
def __getattr__(self, attr):
194
"""Return a member from the adapted object."""
195
if attr in ('key', 'parents'):
196
return self.__dict__[attr]
198
return getattr(self._adapted, attr)
201
def filter_absent(record_stream):
202
"""Adapt a record stream to remove absent records."""
203
for record in record_stream:
204
if record.storage_kind != 'absent':
208
class _MPDiffGenerator(object):
209
"""Pull out the functionality for generating mp_diffs."""
211
def __init__(self, vf, keys):
213
# This is the order the keys were requested in
214
self.ordered_keys = tuple(keys)
215
# keys + their parents, what we need to compute the diffs
216
self.needed_keys = ()
217
# Map from key: mp_diff
219
# Map from key: parents_needed (may have ghosts)
221
# Parents that aren't present
222
self.ghost_parents = ()
223
# Map from parent_key => number of children for this text
225
# Content chunks that are cached while we still need them
228
def _find_needed_keys(self):
229
"""Find the set of keys we need to request.
231
This includes all the original keys passed in, and the non-ghost
232
parents of those keys.
234
:return: (needed_keys, refcounts)
235
needed_keys is the set of all texts we need to extract
236
refcounts is a dict of {key: num_children} letting us know when we
237
no longer need to cache a given parent text
239
# All the keys and their parents
240
needed_keys = set(self.ordered_keys)
241
parent_map = self.vf.get_parent_map(needed_keys)
242
self.parent_map = parent_map
243
# TODO: Should we be using a different construct here? I think this
244
# uses difference_update internally, and we expect the result to
246
missing_keys = needed_keys.difference(parent_map)
248
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
249
# Parents that might be missing. They are allowed to be ghosts, but we
250
# should check for them
252
setdefault = refcounts.setdefault
254
for child_key, parent_keys in parent_map.iteritems():
256
# parent_keys may be None if a given VersionedFile claims to
257
# not support graph operations.
259
just_parents.update(parent_keys)
260
needed_keys.update(parent_keys)
261
for p in parent_keys:
262
refcounts[p] = setdefault(p, 0) + 1
263
just_parents.difference_update(parent_map)
264
# Remove any parents that are actually ghosts from the needed set
265
self.present_parents = set(self.vf.get_parent_map(just_parents))
266
self.ghost_parents = just_parents.difference(self.present_parents)
267
needed_keys.difference_update(self.ghost_parents)
268
self.needed_keys = needed_keys
269
self.refcounts = refcounts
270
return needed_keys, refcounts
272
def _compute_diff(self, key, parent_lines, lines):
273
"""Compute a single mp_diff, and store it in self._diffs"""
274
if len(parent_lines) > 0:
275
# XXX: _extract_blocks is not usefully defined anywhere...
276
# It was meant to extract the left-parent diff without
277
# having to recompute it for Knit content (pack-0.92,
278
# etc). That seems to have regressed somewhere
279
left_parent_blocks = self.vf._extract_blocks(key,
280
parent_lines[0], lines)
282
left_parent_blocks = None
283
diff = multiparent.MultiParent.from_lines(lines,
284
parent_lines, left_parent_blocks)
285
self.diffs[key] = diff
287
def _process_one_record(self, key, this_chunks):
289
if key in self.parent_map:
290
# This record should be ready to diff, since we requested
291
# content in 'topological' order
292
parent_keys = self.parent_map.pop(key)
293
# If a VersionedFile claims 'no-graph' support, then it may return
294
# None for any parent request, so we replace it with an empty tuple
295
if parent_keys is None:
298
for p in parent_keys:
299
# Alternatively we could check p not in self.needed_keys, but
300
# ghost_parents should be tiny versus huge
301
if p in self.ghost_parents:
303
refcount = self.refcounts[p]
304
if refcount == 1: # Last child reference
305
self.refcounts.pop(p)
306
parent_chunks = self.chunks.pop(p)
308
self.refcounts[p] = refcount - 1
309
parent_chunks = self.chunks[p]
310
p_lines = osutils.chunks_to_lines(parent_chunks)
311
# TODO: Should we cache the line form? We did the
312
# computation to get it, but storing it this way will
313
# be less memory efficient...
314
parent_lines.append(p_lines)
316
lines = osutils.chunks_to_lines(this_chunks)
317
# Since we needed the lines, we'll go ahead and cache them this way
319
self._compute_diff(key, parent_lines, lines)
321
# Is this content required for any more children?
322
if key in self.refcounts:
323
self.chunks[key] = this_chunks
325
def _extract_diffs(self):
326
needed_keys, refcounts = self._find_needed_keys()
327
for record in self.vf.get_record_stream(needed_keys,
328
'topological', True):
329
if record.storage_kind == 'absent':
330
raise errors.RevisionNotPresent(record.key, self.vf)
331
self._process_one_record(record.key,
332
record.get_bytes_as('chunked'))
334
def compute_diffs(self):
335
self._extract_diffs()
336
dpop = self.diffs.pop
337
return [dpop(k) for k in self.ordered_keys]
340
44
class VersionedFile(object):
341
45
"""Versioned text file storage.
343
47
A versioned file manages versions of line-based text files,
344
48
keeping track of the originating version for each line.
684
521
unchanged Alive in both a and b (possibly created in both)
685
522
new-a Created in a
686
523
new-b Created in b
687
ghost-a Killed in a, unborn in b
524
ghost-a Killed in a, unborn in b
688
525
ghost-b Killed in b, unborn in a
689
526
irrelevant Not in either revision
691
528
raise NotImplementedError(VersionedFile.plan_merge)
693
530
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
694
531
b_marker=TextMerge.B_MARKER):
695
532
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
698
class RecordingVersionedFilesDecorator(object):
699
"""A minimal versioned files that records calls made on it.
701
Only enough methods have been added to support tests using it to date.
703
:ivar calls: A list of the calls made; can be reset at any time by
707
def __init__(self, backing_vf):
708
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
710
:param backing_vf: The versioned file to answer all methods.
712
self._backing_vf = backing_vf
715
def add_lines(self, key, parents, lines, parent_texts=None,
716
left_matching_blocks=None, nostore_sha=None, random_id=False,
718
self.calls.append(("add_lines", key, parents, lines, parent_texts,
719
left_matching_blocks, nostore_sha, random_id, check_content))
720
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
721
left_matching_blocks, nostore_sha, random_id, check_content)
724
self._backing_vf.check()
726
def get_parent_map(self, keys):
727
self.calls.append(("get_parent_map", copy(keys)))
728
return self._backing_vf.get_parent_map(keys)
730
def get_record_stream(self, keys, sort_order, include_delta_closure):
731
self.calls.append(("get_record_stream", list(keys), sort_order,
732
include_delta_closure))
733
return self._backing_vf.get_record_stream(keys, sort_order,
734
include_delta_closure)
736
def get_sha1s(self, keys):
737
self.calls.append(("get_sha1s", copy(keys)))
738
return self._backing_vf.get_sha1s(keys)
740
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
741
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
742
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
745
self.calls.append(("keys",))
746
return self._backing_vf.keys()
749
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
750
"""A VF that records calls, and returns keys in specific order.
752
:ivar calls: A list of the calls made; can be reset at any time by
756
def __init__(self, backing_vf, key_priority):
757
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
759
:param backing_vf: The versioned file to answer all methods.
760
:param key_priority: A dictionary defining what order keys should be
761
returned from an 'unordered' get_record_stream request.
762
Keys with lower priority are returned first, keys not present in
763
the map get an implicit priority of 0, and are returned in
764
lexicographical order.
766
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
767
self._key_priority = key_priority
769
def get_record_stream(self, keys, sort_order, include_delta_closure):
770
self.calls.append(("get_record_stream", list(keys), sort_order,
771
include_delta_closure))
772
if sort_order == 'unordered':
774
return (self._key_priority.get(key, 0), key)
775
# Use a defined order by asking for the keys one-by-one from the
777
for key in sorted(keys, key=sort_key):
778
for record in self._backing_vf.get_record_stream([key],
779
'unordered', include_delta_closure):
782
for record in self._backing_vf.get_record_stream(keys, sort_order,
783
include_delta_closure):
787
class KeyMapper(object):
788
"""KeyMappers map between keys and underlying partitioned storage."""
791
"""Map key to an underlying storage identifier.
793
:param key: A key tuple e.g. ('file-id', 'revision-id').
794
:return: An underlying storage identifier, specific to the partitioning
797
raise NotImplementedError(self.map)
799
def unmap(self, partition_id):
800
"""Map a partitioned storage id back to a key prefix.
802
:param partition_id: The underlying partition id.
803
:return: As much of a key (or prefix) as is derivable from the partition
806
raise NotImplementedError(self.unmap)
809
class ConstantMapper(KeyMapper):
810
"""A key mapper that maps to a constant result."""
812
def __init__(self, result):
813
"""Create a ConstantMapper which will return result for all maps."""
814
self._result = result
817
"""See KeyMapper.map()."""
821
class URLEscapeMapper(KeyMapper):
822
"""Base class for use with transport backed storage.
824
This provides a map and unmap wrapper that respectively url escape and
825
unescape their outputs and inputs.
829
"""See KeyMapper.map()."""
830
return urllib.quote(self._map(key))
832
def unmap(self, partition_id):
833
"""See KeyMapper.unmap()."""
834
return self._unmap(urllib.unquote(partition_id))
837
class PrefixMapper(URLEscapeMapper):
838
"""A key mapper that extracts the first component of a key.
840
This mapper is for use with a transport based backend.
844
"""See KeyMapper.map()."""
847
def _unmap(self, partition_id):
848
"""See KeyMapper.unmap()."""
849
return (partition_id,)
852
class HashPrefixMapper(URLEscapeMapper):
853
"""A key mapper that combines the first component of a key with a hash.
855
This mapper is for use with a transport based backend.
859
"""See KeyMapper.map()."""
860
prefix = self._escape(key[0])
861
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
863
def _escape(self, prefix):
864
"""No escaping needed here."""
867
def _unmap(self, partition_id):
868
"""See KeyMapper.unmap()."""
869
return (self._unescape(osutils.basename(partition_id)),)
871
def _unescape(self, basename):
872
"""No unescaping needed for HashPrefixMapper."""
876
class HashEscapedPrefixMapper(HashPrefixMapper):
877
"""Combines the escaped first component of a key with a hash.
879
This mapper is for use with a transport based backend.
882
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
884
def _escape(self, prefix):
885
"""Turn a key element into a filesystem safe string.
887
This is similar to a plain urllib.quote, except
888
it uses specific safe characters, so that it doesn't
889
have to translate a lot of valid file ids.
891
# @ does not get escaped. This is because it is a valid
892
# filesystem character we use all the time, and it looks
893
# a lot better than seeing %40 all the time.
894
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
898
def _unescape(self, basename):
899
"""Escaped names are easily unescaped by urlutils."""
900
return urllib.unquote(basename)
903
def make_versioned_files_factory(versioned_file_factory, mapper):
904
"""Create a ThunkedVersionedFiles factory.
906
This will create a callable which when called creates a
907
ThunkedVersionedFiles on a transport, using mapper to access individual
908
versioned files, and versioned_file_factory to create each individual file.
910
def factory(transport):
911
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
916
class VersionedFiles(object):
917
"""Storage for many versioned files.
919
This object allows a single keyspace for accessing the history graph and
920
contents of named bytestrings.
922
Currently no implementation allows the graph of different key prefixes to
923
intersect, but the API does allow such implementations in the future.
925
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
926
may have a different length key-size, but that size will be constant for
927
all texts added to or retrieved from it. For instance, bzrlib uses
928
instances with a key-size of 2 for storing user files in a repository, with
929
the first element the fileid, and the second the version of that file.
931
The use of tuples allows a single code base to support several different
932
uses with only the mapping logic changing from instance to instance.
935
def add_lines(self, key, parents, lines, parent_texts=None,
936
left_matching_blocks=None, nostore_sha=None, random_id=False,
938
"""Add a text to the store.
940
:param key: The key tuple of the text to add. If the last element is
941
None, a CHK string will be generated during the addition.
942
:param parents: The parents key tuples of the text to add.
943
:param lines: A list of lines. Each line must be a bytestring. And all
944
of them except the last must be terminated with \n and contain no
945
other \n's. The last line may either contain no \n's or a single
946
terminating \n. If the lines list does meet this constraint the add
947
routine may error or may succeed - but you will be unable to read
948
the data back accurately. (Checking the lines have been split
949
correctly is expensive and extremely unlikely to catch bugs so it
950
is not done at runtime unless check_content is True.)
951
:param parent_texts: An optional dictionary containing the opaque
952
representations of some or all of the parents of version_id to
953
allow delta optimisations. VERY IMPORTANT: the texts must be those
954
returned by add_lines or data corruption can be caused.
955
:param left_matching_blocks: a hint about which areas are common
956
between the text and its left-hand-parent. The format is
957
the SequenceMatcher.get_matching_blocks format.
958
:param nostore_sha: Raise ExistingContent and do not add the lines to
959
the versioned file if the digest of the lines matches this.
960
:param random_id: If True a random id has been selected rather than
961
an id determined by some deterministic process such as a converter
962
from a foreign VCS. When True the backend may choose not to check
963
for uniqueness of the resulting key within the versioned file, so
964
this should only be done when the result is expected to be unique
966
:param check_content: If True, the lines supplied are verified to be
967
bytestrings that are correctly formed lines.
968
:return: The text sha1, the number of bytes in the text, and an opaque
969
representation of the inserted version which can be provided
970
back to future add_lines calls in the parent_texts dictionary.
972
raise NotImplementedError(self.add_lines)
974
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
975
"""Add a text to the store.
977
This is a private function for use by CommitBuilder.
979
:param key: The key tuple of the text to add. If the last element is
980
None, a CHK string will be generated during the addition.
981
:param parents: The parents key tuples of the text to add.
982
:param text: A string containing the text to be committed.
983
:param nostore_sha: Raise ExistingContent and do not add the lines to
984
the versioned file if the digest of the lines matches this.
985
:param random_id: If True a random id has been selected rather than
986
an id determined by some deterministic process such as a converter
987
from a foreign VCS. When True the backend may choose not to check
988
for uniqueness of the resulting key within the versioned file, so
989
this should only be done when the result is expected to be unique
991
:param check_content: If True, the lines supplied are verified to be
992
bytestrings that are correctly formed lines.
993
:return: The text sha1, the number of bytes in the text, and an opaque
994
representation of the inserted version which can be provided
995
back to future _add_text calls in the parent_texts dictionary.
997
# The default implementation just thunks over to .add_lines(),
998
# inefficient, but it works.
999
return self.add_lines(key, parents, osutils.split_lines(text),
1000
nostore_sha=nostore_sha,
1001
random_id=random_id,
1004
def add_mpdiffs(self, records):
1005
"""Add mpdiffs to this VersionedFile.
1007
Records should be iterables of version, parents, expected_sha1,
1008
mpdiff. mpdiff should be a MultiParent instance.
1011
mpvf = multiparent.MultiMemoryVersionedFile()
1013
for version, parent_ids, expected_sha1, mpdiff in records:
1014
versions.append(version)
1015
mpvf.add_diff(mpdiff, version, parent_ids)
1016
needed_parents = set()
1017
for version, parent_ids, expected_sha1, mpdiff in records:
1018
needed_parents.update(p for p in parent_ids
1019
if not mpvf.has_version(p))
1020
# It seems likely that adding all the present parents as fulltexts can
1021
# easily exhaust memory.
1022
chunks_to_lines = osutils.chunks_to_lines
1023
for record in self.get_record_stream(needed_parents, 'unordered',
1025
if record.storage_kind == 'absent':
1027
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1029
for (key, parent_keys, expected_sha1, mpdiff), lines in\
1030
zip(records, mpvf.get_line_list(versions)):
1031
if len(parent_keys) == 1:
1032
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1033
mpvf.get_diff(parent_keys[0]).num_lines()))
1035
left_matching_blocks = None
1036
version_sha1, _, version_text = self.add_lines(key,
1037
parent_keys, lines, vf_parents,
1038
left_matching_blocks=left_matching_blocks)
1039
if version_sha1 != expected_sha1:
1040
raise errors.VersionedFileInvalidChecksum(version)
1041
vf_parents[key] = version_text
1043
def annotate(self, key):
1044
"""Return a list of (version-key, line) tuples for the text of key.
1046
:raise RevisionNotPresent: If the key is not present.
1048
raise NotImplementedError(self.annotate)
1050
def check(self, progress_bar=None):
1051
"""Check this object for integrity.
1053
:param progress_bar: A progress bar to output as the check progresses.
1054
:param keys: Specific keys within the VersionedFiles to check. When
1055
this parameter is not None, check() becomes a generator as per
1056
get_record_stream. The difference to get_record_stream is that
1057
more or deeper checks will be performed.
1058
:return: None, or if keys was supplied a generator as per
1061
raise NotImplementedError(self.check)
1064
def check_not_reserved_id(version_id):
1065
revision.check_not_reserved_id(version_id)
1067
def clear_cache(self):
1068
"""Clear whatever caches this VersionedFile holds.
1070
This is generally called after an operation has been performed, when we
1071
don't expect to be using this versioned file again soon.
1074
def _check_lines_not_unicode(self, lines):
1075
"""Check that lines being added to a versioned file are not unicode."""
1077
if line.__class__ is not str:
1078
raise errors.BzrBadParameterUnicode("lines")
1080
def _check_lines_are_lines(self, lines):
1081
"""Check that the lines really are full lines without inline EOL."""
1083
if '\n' in line[:-1]:
1084
raise errors.BzrBadParameterContainsNewline("lines")
1086
def get_known_graph_ancestry(self, keys):
1087
"""Get a KnownGraph instance with the ancestry of keys."""
1088
# most basic implementation is a loop around get_parent_map
1092
this_parent_map = self.get_parent_map(pending)
1093
parent_map.update(this_parent_map)
1095
map(pending.update, this_parent_map.itervalues())
1096
pending = pending.difference(parent_map)
1097
kg = _mod_graph.KnownGraph(parent_map)
1100
def get_parent_map(self, keys):
1101
"""Get a map of the parents of keys.
1103
:param keys: The keys to look up parents for.
1104
:return: A mapping from keys to parents. Absent keys are absent from
1107
raise NotImplementedError(self.get_parent_map)
1109
def get_record_stream(self, keys, ordering, include_delta_closure):
1110
"""Get a stream of records for keys.
1112
:param keys: The keys to include.
1113
:param ordering: Either 'unordered' or 'topological'. A topologically
1114
sorted stream has compression parents strictly before their
1116
:param include_delta_closure: If True then the closure across any
1117
compression parents will be included (in the opaque data).
1118
:return: An iterator of ContentFactory objects, each of which is only
1119
valid until the iterator is advanced.
1121
raise NotImplementedError(self.get_record_stream)
1123
def get_sha1s(self, keys):
1124
"""Get the sha1's of the texts for the given keys.
1126
:param keys: The names of the keys to lookup
1127
:return: a dict from key to sha1 digest. Keys of texts which are not
1128
present in the store are not present in the returned
1131
raise NotImplementedError(self.get_sha1s)
1133
has_key = index._has_key_from_parent_map
1135
def get_missing_compression_parent_keys(self):
1136
"""Return an iterable of keys of missing compression parents.
1138
Check this after calling insert_record_stream to find out if there are
1139
any missing compression parents. If there are, the records that
1140
depend on them are not able to be inserted safely. The precise
1141
behaviour depends on the concrete VersionedFiles class in use.
1143
Classes that do not support this will raise NotImplementedError.
1145
raise NotImplementedError(self.get_missing_compression_parent_keys)
1147
def insert_record_stream(self, stream):
1148
"""Insert a record stream into this container.
1150
:param stream: A stream of records to insert.
1152
:seealso VersionedFile.get_record_stream:
1154
raise NotImplementedError
1156
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1157
"""Iterate over the lines in the versioned files from keys.
1159
This may return lines from other keys. Each item the returned
1160
iterator yields is a tuple of a line and a text version that that line
1161
is present in (not introduced in).
1163
Ordering of results is in whatever order is most suitable for the
1164
underlying storage format.
1166
If a progress bar is supplied, it may be used to indicate progress.
1167
The caller is responsible for cleaning up progress bars (because this
1171
* Lines are normalised by the underlying store: they will all have \n
1173
* Lines are returned in arbitrary order.
1175
:return: An iterator over (line, key).
1177
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1180
"""Return a iterable of the keys for all the contained texts."""
1181
raise NotImplementedError(self.keys)
1183
def make_mpdiffs(self, keys):
1184
"""Create multiparent diffs for specified keys."""
1185
generator = _MPDiffGenerator(self, keys)
1186
return generator.compute_diffs()
1188
def get_annotator(self):
1189
return annotate.Annotator(self)
1191
missing_keys = index._missing_keys_from_parent_map
1193
def _extract_blocks(self, version_id, source, target):
1197
class ThunkedVersionedFiles(VersionedFiles):
1198
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1200
This object allows a single keyspace for accessing the history graph and
1201
contents of named bytestrings.
1203
Currently no implementation allows the graph of different key prefixes to
1204
intersect, but the API does allow such implementations in the future.
1207
def __init__(self, transport, file_factory, mapper, is_locked):
1208
"""Create a ThunkedVersionedFiles."""
1209
self._transport = transport
1210
self._file_factory = file_factory
1211
self._mapper = mapper
1212
self._is_locked = is_locked
1214
def add_lines(self, key, parents, lines, parent_texts=None,
1215
left_matching_blocks=None, nostore_sha=None, random_id=False,
1216
check_content=True):
1217
"""See VersionedFiles.add_lines()."""
1218
path = self._mapper.map(key)
1219
version_id = key[-1]
1220
parents = [parent[-1] for parent in parents]
1221
vf = self._get_vf(path)
1224
return vf.add_lines_with_ghosts(version_id, parents, lines,
1225
parent_texts=parent_texts,
1226
left_matching_blocks=left_matching_blocks,
1227
nostore_sha=nostore_sha, random_id=random_id,
1228
check_content=check_content)
1229
except NotImplementedError:
1230
return vf.add_lines(version_id, parents, lines,
1231
parent_texts=parent_texts,
1232
left_matching_blocks=left_matching_blocks,
1233
nostore_sha=nostore_sha, random_id=random_id,
1234
check_content=check_content)
1235
except errors.NoSuchFile:
1236
# parent directory may be missing, try again.
1237
self._transport.mkdir(osutils.dirname(path))
1239
return vf.add_lines_with_ghosts(version_id, parents, lines,
1240
parent_texts=parent_texts,
1241
left_matching_blocks=left_matching_blocks,
1242
nostore_sha=nostore_sha, random_id=random_id,
1243
check_content=check_content)
1244
except NotImplementedError:
1245
return vf.add_lines(version_id, parents, lines,
1246
parent_texts=parent_texts,
1247
left_matching_blocks=left_matching_blocks,
1248
nostore_sha=nostore_sha, random_id=random_id,
1249
check_content=check_content)
1251
def annotate(self, key):
1252
"""Return a list of (version-key, line) tuples for the text of key.
1254
:raise RevisionNotPresent: If the key is not present.
1257
path = self._mapper.map(prefix)
1258
vf = self._get_vf(path)
1259
origins = vf.annotate(key[-1])
1261
for origin, line in origins:
1262
result.append((prefix + (origin,), line))
1265
def check(self, progress_bar=None, keys=None):
1266
"""See VersionedFiles.check()."""
1267
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1268
# this is tolerable. Ideally we'd pass keys down to check() and
1269
# have the older VersiondFile interface updated too.
1270
for prefix, vf in self._iter_all_components():
1272
if keys is not None:
1273
return self.get_record_stream(keys, 'unordered', True)
1275
def get_parent_map(self, keys):
1276
"""Get a map of the parents of keys.
1278
:param keys: The keys to look up parents for.
1279
:return: A mapping from keys to parents. Absent keys are absent from
1282
prefixes = self._partition_keys(keys)
1284
for prefix, suffixes in prefixes.items():
1285
path = self._mapper.map(prefix)
1286
vf = self._get_vf(path)
1287
parent_map = vf.get_parent_map(suffixes)
1288
for key, parents in parent_map.items():
1289
result[prefix + (key,)] = tuple(
1290
prefix + (parent,) for parent in parents)
1293
def _get_vf(self, path):
1294
if not self._is_locked():
1295
raise errors.ObjectNotLocked(self)
1296
return self._file_factory(path, self._transport, create=True,
1297
get_scope=lambda:None)
1299
def _partition_keys(self, keys):
1300
"""Turn keys into a dict of prefix:suffix_list."""
1303
prefix_keys = result.setdefault(key[:-1], [])
1304
prefix_keys.append(key[-1])
1307
def _get_all_prefixes(self):
1308
# Identify all key prefixes.
1309
# XXX: A bit hacky, needs polish.
1310
if type(self._mapper) == ConstantMapper:
1311
paths = [self._mapper.map(())]
1315
for quoted_relpath in self._transport.iter_files_recursive():
1316
path, ext = os.path.splitext(quoted_relpath)
1318
paths = list(relpaths)
1319
prefixes = [self._mapper.unmap(path) for path in paths]
1320
return zip(paths, prefixes)
1322
def get_record_stream(self, keys, ordering, include_delta_closure):
1323
"""See VersionedFiles.get_record_stream()."""
1324
# Ordering will be taken care of by each partitioned store; group keys
1327
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1328
suffixes = [(suffix,) for suffix in suffixes]
1329
for record in vf.get_record_stream(suffixes, ordering,
1330
include_delta_closure):
1331
if record.parents is not None:
1332
record.parents = tuple(
1333
prefix + parent for parent in record.parents)
1334
record.key = prefix + record.key
1337
def _iter_keys_vf(self, keys):
1338
prefixes = self._partition_keys(keys)
1340
for prefix, suffixes in prefixes.items():
1341
path = self._mapper.map(prefix)
1342
vf = self._get_vf(path)
1343
yield prefix, suffixes, vf
1345
def get_sha1s(self, keys):
1346
"""See VersionedFiles.get_sha1s()."""
1348
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1349
vf_sha1s = vf.get_sha1s(suffixes)
1350
for suffix, sha1 in vf_sha1s.iteritems():
1351
sha1s[prefix + (suffix,)] = sha1
1354
def insert_record_stream(self, stream):
1355
"""Insert a record stream into this container.
1357
:param stream: A stream of records to insert.
1359
:seealso VersionedFile.get_record_stream:
1361
for record in stream:
1362
prefix = record.key[:-1]
1363
key = record.key[-1:]
1364
if record.parents is not None:
1365
parents = [parent[-1:] for parent in record.parents]
1368
thunk_record = AdapterFactory(key, parents, record)
1369
path = self._mapper.map(prefix)
1370
# Note that this parses the file many times; we can do better but
1371
# as this only impacts weaves in terms of performance, it is
1373
vf = self._get_vf(path)
1374
vf.insert_record_stream([thunk_record])
1376
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1377
"""Iterate over the lines in the versioned files from keys.
1379
This may return lines from other keys. Each item the returned
1380
iterator yields is a tuple of a line and a text version that that line
1381
is present in (not introduced in).
1383
Ordering of results is in whatever order is most suitable for the
1384
underlying storage format.
1386
If a progress bar is supplied, it may be used to indicate progress.
1387
The caller is responsible for cleaning up progress bars (because this
1391
* Lines are normalised by the underlying store: they will all have \n
1393
* Lines are returned in arbitrary order.
1395
:return: An iterator over (line, key).
1397
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1398
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1399
yield line, prefix + (version,)
1401
def _iter_all_components(self):
1402
for path, prefix in self._get_all_prefixes():
1403
yield prefix, self._get_vf(path)
1406
"""See VersionedFiles.keys()."""
1408
for prefix, vf in self._iter_all_components():
1409
for suffix in vf.versions():
1410
result.add(prefix + (suffix,))
1414
class _PlanMergeVersionedFile(VersionedFiles):
535
class _PlanMergeVersionedFile(object):
1415
536
"""A VersionedFile for uncommitted and committed texts.
1417
538
It is intended to allow merges to be planned with working tree texts.
1418
It implements only the small part of the VersionedFiles interface used by
539
It implements only the small part of the VersionedFile interface used by
1419
540
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1420
541
_PlanMergeVersionedFile itself.
1422
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1423
queried for missing texts.
1426
def __init__(self, file_id):
1427
"""Create a _PlanMergeVersionedFile.
544
def __init__(self, file_id, fallback_versionedfiles=None):
1429
:param file_id: Used with _PlanMerge code which is not yet fully
1430
tuple-keyspace aware.
547
:param file_id: Used when raising exceptions.
548
:param fallback_versionedfiles: If supplied, the set of fallbacks to
549
use. Otherwise, _PlanMergeVersionedFile.fallback_versionedfiles
550
can be appended to later.
1432
552
self._file_id = file_id
1433
# fallback locations
1434
self.fallback_versionedfiles = []
1435
# Parents for locally held keys.
553
if fallback_versionedfiles is None:
554
self.fallback_versionedfiles = []
556
self.fallback_versionedfiles = fallback_versionedfiles
1436
557
self._parents = {}
1437
# line data for locally held keys.
1438
558
self._lines = {}
1439
# key lookup providers
1440
self._providers = [DictParentsProvider(self._parents)]
1442
560
def plan_merge(self, ver_a, ver_b, base=None):
1443
561
"""See VersionedFile.plan_merge"""
1444
562
from bzrlib.merge import _PlanMerge
1445
563
if base is None:
1446
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1447
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1448
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
564
return _PlanMerge(ver_a, ver_b, self).plan_merge()
565
old_plan = list(_PlanMerge(ver_a, base, self).plan_merge())
566
new_plan = list(_PlanMerge(ver_a, ver_b, self).plan_merge())
1449
567
return _PlanMerge._subtract_plans(old_plan, new_plan)
1451
569
def plan_lca_merge(self, ver_a, ver_b, base=None):
1452
570
from bzrlib.merge import _PlanLCAMerge
1454
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
571
graph = self._get_graph()
572
new_plan = _PlanLCAMerge(ver_a, ver_b, self, graph).plan_merge()
1455
573
if base is None:
1457
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
575
old_plan = _PlanLCAMerge(ver_a, base, self, graph).plan_merge()
1458
576
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1460
def add_lines(self, key, parents, lines):
1461
"""See VersionedFiles.add_lines
578
def add_lines(self, version_id, parents, lines):
579
"""See VersionedFile.add_lines
1463
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1464
are permitted. Only reserved ids are permitted.
581
Lines are added locally, not fallback versionedfiles. Also, ghosts are
582
permitted. Only reserved ids are permitted.
1466
if type(key) is not tuple:
1467
raise TypeError(key)
1468
if not revision.is_reserved_id(key[-1]):
584
if not revision.is_reserved_id(version_id):
1469
585
raise ValueError('Only reserved ids may be used')
1470
586
if parents is None:
1471
587
raise ValueError('Parents may not be None')
1472
588
if lines is None:
1473
589
raise ValueError('Lines may not be None')
1474
self._parents[key] = tuple(parents)
1475
self._lines[key] = lines
590
self._parents[version_id] = tuple(parents)
591
self._lines[version_id] = lines
1477
def get_record_stream(self, keys, ordering, include_delta_closure):
1480
if key in self._lines:
1481
lines = self._lines[key]
1482
parents = self._parents[key]
1484
yield ChunkedContentFactory(key, parents, None, lines)
593
def get_lines(self, version_id):
594
"""See VersionedFile.get_ancestry"""
595
lines = self._lines.get(version_id)
596
if lines is not None:
1485
598
for versionedfile in self.fallback_versionedfiles:
1486
for record in versionedfile.get_record_stream(
1487
pending, 'unordered', True):
1488
if record.storage_kind == 'absent':
600
return versionedfile.get_lines(version_id)
601
except errors.RevisionNotPresent:
604
raise errors.RevisionNotPresent(version_id, self._file_id)
606
def get_ancestry(self, version_id, topo_sorted=False):
607
"""See VersionedFile.get_ancestry.
609
Note that this implementation assumes that if a VersionedFile can
610
answer get_ancestry at all, it can give an authoritative answer. In
611
fact, ghosts can invalidate this assumption. But it's good enough
612
99% of the time, and far cheaper/simpler.
614
Also note that the results of this version are never topologically
615
sorted, and are a set.
618
raise ValueError('This implementation does not provide sorting')
619
parents = self._parents.get(version_id)
621
for vf in self.fallback_versionedfiles:
623
return vf.get_ancestry(version_id, topo_sorted=False)
624
except errors.RevisionNotPresent:
1491
pending.remove(record.key)
627
raise errors.RevisionNotPresent(version_id, self._file_id)
628
ancestry = set([version_id])
629
for parent in parents:
630
ancestry.update(self.get_ancestry(parent, topo_sorted=False))
633
def get_parent_map(self, version_ids):
634
"""See VersionedFile.get_parent_map"""
636
pending = set(version_ids)
637
for key in version_ids:
639
result[key] = self._parents[key]
642
pending = pending - set(result.keys())
643
for versionedfile in self.fallback_versionedfiles:
644
parents = versionedfile.get_parent_map(pending)
645
result.update(parents)
646
pending = pending - set(parents.keys())
1495
# report absent entries
1497
yield AbsentContentFactory(key)
1499
def get_parent_map(self, keys):
1500
"""See VersionedFiles.get_parent_map"""
1501
# We create a new provider because a fallback may have been added.
1502
# If we make fallbacks private we can update a stack list and avoid
1503
# object creation thrashing.
1506
if revision.NULL_REVISION in keys:
1507
keys.remove(revision.NULL_REVISION)
1508
result[revision.NULL_REVISION] = ()
1509
self._providers = self._providers[:1] + self.fallback_versionedfiles
1511
StackedParentsProvider(self._providers).get_parent_map(keys))
1512
for key, parents in result.iteritems():
1514
result[key] = (revision.NULL_REVISION,)
651
def _get_graph(self):
652
from bzrlib.graph import (
655
_StackedParentsProvider,
657
from bzrlib.repofmt.knitrepo import _KnitParentsProvider
658
parent_providers = [DictParentsProvider(self._parents)]
659
for vf in self.fallback_versionedfiles:
660
parent_providers.append(_KnitParentsProvider(vf))
661
return Graph(_StackedParentsProvider(parent_providers))
1518
664
class PlanWeaveMerge(TextMerge):
1519
665
"""Weave merge that takes a plan as its input.
1521
667
This exists so that VersionedFile.plan_merge is implementable.
1522
668
Most callers will want to use WeaveMerge instead.
1578
724
elif state == 'conflicted-b':
1579
725
ch_b = ch_a = True
1580
726
lines_b.append(line)
1581
elif state == 'killed-both':
1582
# This counts as a change, even though there is no associated
1586
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1588
raise AssertionError(state)
728
assert state in ('irrelevant', 'ghost-a', 'ghost-b',
729
'killed-base', 'killed-both'), state
1589
730
for struct in outstanding_struct():
1592
def base_from_plan(self):
1593
"""Construct a BASE file from the plan text."""
1595
for state, line in self.plan:
1596
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1597
# If unchanged, then this line is straight from base. If a or b
1598
# or both killed the line, then it *used* to be in base.
1599
base_lines.append(line)
1601
if state not in ('killed-base', 'irrelevant',
1602
'ghost-a', 'ghost-b',
1604
'conflicted-a', 'conflicted-b'):
1605
# killed-base, irrelevant means it doesn't apply
1606
# ghost-a/ghost-b are harder to say for sure, but they
1607
# aren't in the 'inc_c' which means they aren't in the
1608
# shared base of a & b. So we don't include them. And
1609
# obviously if the line is newly inserted, it isn't in base
1611
# If 'conflicted-a' or b, then it is new vs one base, but
1612
# old versus another base. However, if we make it present
1613
# in the base, it will be deleted from the target, and it
1614
# seems better to get a line doubled in the merge result,
1615
# rather than have it deleted entirely.
1616
# Example, each node is the 'text' at that point:
1624
# There was a criss-cross conflict merge. Both sides
1625
# include the other, but put themselves first.
1626
# Weave marks this as a 'clean' merge, picking OTHER over
1627
# THIS. (Though the details depend on order inserted into
1629
# LCA generates a plan:
1630
# [('unchanged', M),
1631
# ('conflicted-b', b),
1633
# ('conflicted-a', b),
1635
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1636
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1637
# removes one 'b', and BASE vs OTHER removes the other)
1638
# If you include neither, 3-way creates a clean "MbabN" as
1639
# THIS adds one 'b', and OTHER does too.
1640
# It seems that having the line 2 times is better than
1641
# having it omitted. (Easier to manually delete than notice
1642
# it needs to be added.)
1643
raise AssertionError('Unknown state: %s' % (state,))
1647
734
class WeaveMerge(PlanWeaveMerge):
1648
735
"""Weave merge that takes a VersionedFile and two versions as its input."""
1650
def __init__(self, versionedfile, ver_a, ver_b,
737
def __init__(self, versionedfile, ver_a, ver_b,
1651
738
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1652
739
plan = versionedfile.plan_merge(ver_a, ver_b)
1653
740
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1656
class VirtualVersionedFiles(VersionedFiles):
1657
"""Dummy implementation for VersionedFiles that uses other functions for
1658
obtaining fulltexts and parent maps.
1660
This is always on the bottom of the stack and uses string keys
1661
(rather than tuples) internally.
1664
def __init__(self, get_parent_map, get_lines):
1665
"""Create a VirtualVersionedFiles.
1667
:param get_parent_map: Same signature as Repository.get_parent_map.
1668
:param get_lines: Should return lines for specified key or None if
1671
super(VirtualVersionedFiles, self).__init__()
1672
self._get_parent_map = get_parent_map
1673
self._get_lines = get_lines
1675
def check(self, progressbar=None):
1676
"""See VersionedFiles.check.
1678
:note: Always returns True for VirtualVersionedFiles.
1682
def add_mpdiffs(self, records):
1683
"""See VersionedFiles.mpdiffs.
1685
:note: Not implemented for VirtualVersionedFiles.
1687
raise NotImplementedError(self.add_mpdiffs)
1689
def get_parent_map(self, keys):
1690
"""See VersionedFiles.get_parent_map."""
1691
return dict([((k,), tuple([(p,) for p in v]))
1692
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1694
def get_sha1s(self, keys):
1695
"""See VersionedFiles.get_sha1s."""
1698
lines = self._get_lines(k)
1699
if lines is not None:
1700
if not isinstance(lines, list):
1701
raise AssertionError
1702
ret[(k,)] = osutils.sha_strings(lines)
1705
def get_record_stream(self, keys, ordering, include_delta_closure):
1706
"""See VersionedFiles.get_record_stream."""
1707
for (k,) in list(keys):
1708
lines = self._get_lines(k)
1709
if lines is not None:
1710
if not isinstance(lines, list):
1711
raise AssertionError
1712
yield ChunkedContentFactory((k,), None,
1713
sha1=osutils.sha_strings(lines),
1716
yield AbsentContentFactory((k,))
1718
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1719
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1720
for i, (key,) in enumerate(keys):
1722
pb.update("Finding changed lines", i, len(keys))
1723
for l in self._get_lines(key):
1727
class NoDupeAddLinesDecorator(object):
1728
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1732
def __init__(self, store):
1735
def add_lines(self, key, parents, lines, parent_texts=None,
1736
left_matching_blocks=None, nostore_sha=None, random_id=False,
1737
check_content=True):
1738
"""See VersionedFiles.add_lines.
1740
This implementation may return None as the third element of the return
1741
value when the original store wouldn't.
1744
raise NotImplementedError(
1745
"NoDupeAddLinesDecorator.add_lines does not implement the "
1746
"nostore_sha behaviour.")
1748
sha1 = osutils.sha_strings(lines)
1749
key = ("sha1:" + sha1,)
1752
if key in self._store.get_parent_map([key]):
1753
# This key has already been inserted, so don't do it again.
1755
sha1 = osutils.sha_strings(lines)
1756
return sha1, sum(map(len, lines)), None
1757
return self._store.add_lines(key, parents, lines,
1758
parent_texts=parent_texts,
1759
left_matching_blocks=left_matching_blocks,
1760
nostore_sha=nostore_sha, random_id=random_id,
1761
check_content=check_content)
1763
def __getattr__(self, name):
1764
return getattr(self._store, name)
1767
def network_bytes_to_kind_and_offset(network_bytes):
1768
"""Strip of a record kind from the front of network_bytes.
1770
:param network_bytes: The bytes of a record.
1771
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1773
line_end = network_bytes.find('\n')
1774
storage_kind = network_bytes[:line_end]
1775
return storage_kind, line_end + 1
1778
class NetworkRecordStream(object):
1779
"""A record_stream which reconstitures a serialised stream."""
1781
def __init__(self, bytes_iterator):
1782
"""Create a NetworkRecordStream.
1784
:param bytes_iterator: An iterator of bytes. Each item in this
1785
iterator should have been obtained from a record_streams'
1786
record.get_bytes_as(record.storage_kind) call.
1788
self._bytes_iterator = bytes_iterator
1789
self._kind_factory = {
1790
'fulltext': fulltext_network_to_record,
1791
'groupcompress-block': groupcompress.network_block_to_records,
1792
'knit-ft-gz': knit.knit_network_to_record,
1793
'knit-delta-gz': knit.knit_network_to_record,
1794
'knit-annotated-ft-gz': knit.knit_network_to_record,
1795
'knit-annotated-delta-gz': knit.knit_network_to_record,
1796
'knit-delta-closure': knit.knit_delta_closure_to_records,
1802
:return: An iterator as per VersionedFiles.get_record_stream().
1804
for bytes in self._bytes_iterator:
1805
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1806
for record in self._kind_factory[storage_kind](
1807
storage_kind, bytes, line_end):
1811
def fulltext_network_to_record(kind, bytes, line_end):
1812
"""Convert a network fulltext record to record."""
1813
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1814
record_meta = bytes[line_end+4:line_end+4+meta_len]
1815
key, parents = bencode.bdecode_as_tuple(record_meta)
1816
if parents == 'nil':
1818
fulltext = bytes[line_end+4+meta_len:]
1819
return [FulltextContentFactory(key, parents, None, fulltext)]
1822
def _length_prefix(bytes):
1823
return struct.pack('!L', len(bytes))
1826
def record_to_fulltext_bytes(record):
1827
if record.parents is None:
1830
parents = record.parents
1831
record_meta = bencode.bencode((record.key, parents))
1832
record_content = record.get_bytes_as('fulltext')
1833
return "fulltext\n%s%s%s" % (
1834
_length_prefix(record_meta), record_meta, record_content)
1837
def sort_groupcompress(parent_map):
1838
"""Sort and group the keys in parent_map into groupcompress order.
1840
groupcompress is defined (currently) as reverse-topological order, grouped
1843
:return: A sorted-list of keys
1845
# gc-optimal ordering is approximately reverse topological,
1846
# properly grouped by file-id.
1848
for item in parent_map.iteritems():
1850
if isinstance(key, str) or len(key) == 1:
743
class InterVersionedFile(InterObject):
744
"""This class represents operations taking place between two VersionedFiles.
746
Its instances have methods like join, and contain
747
references to the source and target versionedfiles these operations can be
750
Often we will provide convenience methods on 'versionedfile' which carry out
751
operations with another versionedfile - they will always forward to
752
InterVersionedFile.get(other).method_name(parameters).
756
"""The available optimised InterVersionedFile types."""
758
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
759
"""Integrate versions from self.source into self.target.
761
If version_ids is None all versions from source should be
762
incorporated into this versioned file.
764
Must raise RevisionNotPresent if any of the specified versions
765
are not present in the other file's history unless ignore_missing is
766
supplied in which case they are silently skipped.
769
# - if the target is empty, just add all the versions from
770
# source to target, otherwise:
771
# - make a temporary versioned file of type target
772
# - insert the source content into it one at a time
774
if not self.target.versions():
777
# Make a new target-format versioned file.
778
temp_source = self.target.create_empty("temp", MemoryTransport())
780
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
781
graph = Graph(self.source)
782
search = graph._make_breadth_first_searcher(version_ids)
783
transitive_ids = set()
784
map(transitive_ids.update, list(search))
785
parent_map = self.source.get_parent_map(transitive_ids)
786
order = tsort.topo_sort(parent_map.items())
787
pb = ui.ui_factory.nested_progress_bar()
1855
per_prefix_map[prefix].append(item)
1857
per_prefix_map[prefix] = [item]
1860
for prefix in sorted(per_prefix_map):
1861
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
790
# TODO for incremental cross-format work:
791
# make a versioned file with the following content:
792
# all revisions we have been asked to join
793
# all their ancestors that are *not* in target already.
794
# the immediate parents of the above two sets, with
795
# empty parent lists - these versions are in target already
796
# and the incorrect version data will be ignored.
797
# TODO: for all ancestors that are present in target already,
798
# check them for consistent data, this requires moving sha1 from
800
# TODO: remove parent texts when they are not relevant any more for
801
# memory pressure reduction. RBC 20060313
802
# pb.update('Converting versioned data', 0, len(order))
804
for index, version in enumerate(order):
805
pb.update('Converting versioned data', index, total)
806
_, _, parent_text = target.add_lines(version,
808
self.source.get_lines(version),
809
parent_texts=parent_texts)
810
parent_texts[version] = parent_text
812
# this should hit the native code path for target
813
if target is not self.target:
814
return self.target.join(temp_source,
824
def _get_source_version_ids(self, version_ids, ignore_missing):
825
"""Determine the version ids to be used from self.source.
827
:param version_ids: The caller-supplied version ids to check. (None
828
for all). If None is in version_ids, it is stripped.
829
:param ignore_missing: if True, remove missing ids from the version
830
list. If False, raise RevisionNotPresent on
831
a missing version id.
832
:return: A set of version ids.
834
if version_ids is None:
835
# None cannot be in source.versions
836
return set(self.source.versions())
839
return set(self.source.versions()).intersection(set(version_ids))
841
new_version_ids = set()
842
for version in version_ids:
845
if not self.source.has_version(version):
846
raise errors.RevisionNotPresent(version, str(self.source))
848
new_version_ids.add(version)
849
return new_version_ids