59
62
'bzrlib.knit', 'FTAnnotatedToUnannotated')
60
63
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
61
64
'bzrlib.knit', 'FTAnnotatedToFullText')
65
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
66
# 'bzrlib.knit', 'FTAnnotatedToChunked')
64
69
class ContentFactory(object):
65
70
"""Abstract interface for insertion and retrieval from a VersionedFile.
67
72
:ivar sha1: None, or the sha1 of the content fulltext.
68
73
:ivar storage_kind: The native storage kind of this factory. One of
69
74
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
84
89
self.parents = None
92
class ChunkedContentFactory(ContentFactory):
93
"""Static data content factory.
95
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
96
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
97
satisfies this, as does a list of lines.
99
:ivar sha1: None, or the sha1 of the content fulltext.
100
:ivar storage_kind: The native storage kind of this factory. Always
102
:ivar key: The key of this content. Each key is a tuple with a single
104
:ivar parents: A tuple of parent keys for self.key. If the object has
105
no parent information, None (as opposed to () for an empty list of
109
def __init__(self, key, parents, sha1, chunks):
110
"""Create a ContentFactory."""
112
self.storage_kind = 'chunked'
114
self.parents = parents
115
self._chunks = chunks
117
def get_bytes_as(self, storage_kind):
118
if storage_kind == 'chunked':
120
elif storage_kind == 'fulltext':
121
return ''.join(self._chunks)
122
raise errors.UnavailableRepresentation(self.key, storage_kind,
87
126
class FulltextContentFactory(ContentFactory):
88
127
"""Static data content factory.
90
129
This takes a fulltext when created and just returns that during
91
130
get_bytes_as('fulltext').
93
132
:ivar sha1: None, or the sha1 of the content fulltext.
94
133
:ivar storage_kind: The native storage kind of this factory. Always
419
460
if isinstance(version_ids, basestring):
420
461
version_ids = [version_ids]
421
462
raise NotImplementedError(self.get_ancestry)
423
464
def get_ancestry_with_ghosts(self, version_ids):
424
465
"""Return a list of all ancestors of given version(s). This
425
466
will not include the null revision.
427
468
Must raise RevisionNotPresent if any of the given versions are
428
469
not present in file history.
430
471
Ghosts that are known about will be included in ancestry list,
431
472
but are not explicitly marked.
433
474
raise NotImplementedError(self.get_ancestry_with_ghosts)
435
476
def get_parent_map(self, version_ids):
436
477
"""Get a map of the parents of version_ids.
500
541
unchanged Alive in both a and b (possibly created in both)
501
542
new-a Created in a
502
543
new-b Created in b
503
ghost-a Killed in a, unborn in b
544
ghost-a Killed in a, unborn in b
504
545
ghost-b Killed in b, unborn in a
505
546
irrelevant Not in either revision
507
548
raise NotImplementedError(VersionedFile.plan_merge)
509
550
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
510
551
b_marker=TextMerge.B_MARKER):
511
552
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
562
603
return self._backing_vf.keys()
606
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
607
"""A VF that records calls, and returns keys in specific order.
609
:ivar calls: A list of the calls made; can be reset at any time by
613
def __init__(self, backing_vf, key_priority):
614
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
616
:param backing_vf: The versioned file to answer all methods.
617
:param key_priority: A dictionary defining what order keys should be
618
returned from an 'unordered' get_record_stream request.
619
Keys with lower priority are returned first, keys not present in
620
the map get an implicit priority of 0, and are returned in
621
lexicographical order.
623
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
624
self._key_priority = key_priority
626
def get_record_stream(self, keys, sort_order, include_delta_closure):
627
self.calls.append(("get_record_stream", list(keys), sort_order,
628
include_delta_closure))
629
if sort_order == 'unordered':
631
return (self._key_priority.get(key, 0), key)
632
# Use a defined order by asking for the keys one-by-one from the
634
for key in sorted(keys, key=sort_key):
635
for record in self._backing_vf.get_record_stream([key],
636
'unordered', include_delta_closure):
639
for record in self._backing_vf.get_record_stream(keys, sort_order,
640
include_delta_closure):
565
644
class KeyMapper(object):
566
645
"""KeyMappers map between keys and underlying partitioned storage."""
766
845
if not mpvf.has_version(p))
767
846
# It seems likely that adding all the present parents as fulltexts can
768
847
# easily exhaust memory.
769
split_lines = osutils.split_lines
848
chunks_to_lines = osutils.chunks_to_lines
770
849
for record in self.get_record_stream(needed_parents, 'unordered',
772
851
if record.storage_kind == 'absent':
774
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
853
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
776
855
for (key, parent_keys, expected_sha1, mpdiff), lines in\
777
856
zip(records, mpvf.get_line_list(versions)):
850
929
has_key = index._has_key_from_parent_map
931
def get_missing_compression_parent_keys(self):
932
"""Return an iterable of keys of missing compression parents.
934
Check this after calling insert_record_stream to find out if there are
935
any missing compression parents. If there are, the records that
936
depend on them are not able to be inserted safely. The precise
937
behaviour depends on the concrete VersionedFiles class in use.
939
Classes that do not support this will raise NotImplementedError.
941
raise NotImplementedError(self.get_missing_compression_parent_keys)
852
943
def insert_record_stream(self, stream):
853
944
"""Insert a record stream into this container.
855
:param stream: A stream of records to insert.
946
:param stream: A stream of records to insert.
857
948
:seealso VersionedFile.get_record_stream:
902
993
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
903
994
knit_keys.difference_update(ghosts)
905
split_lines = osutils.split_lines
996
chunks_to_lines = osutils.chunks_to_lines
906
997
for record in self.get_record_stream(knit_keys, 'topological', True):
907
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
998
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
908
999
# line_block_dict = {}
909
1000
# for parent, blocks in record.extract_line_blocks():
910
1001
# line_blocks[parent] = blocks
1322
1412
class WeaveMerge(PlanWeaveMerge):
1323
1413
"""Weave merge that takes a VersionedFile and two versions as its input."""
1325
def __init__(self, versionedfile, ver_a, ver_b,
1415
def __init__(self, versionedfile, ver_a, ver_b,
1326
1416
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1327
1417
plan = versionedfile.plan_merge(ver_a, ver_b)
1328
1418
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1331
1421
class VirtualVersionedFiles(VersionedFiles):
1332
"""Dummy implementation for VersionedFiles that uses other functions for
1422
"""Dummy implementation for VersionedFiles that uses other functions for
1333
1423
obtaining fulltexts and parent maps.
1335
This is always on the bottom of the stack and uses string keys
1425
This is always on the bottom of the stack and uses string keys
1336
1426
(rather than tuples) internally.
1384
1474
if lines is not None:
1385
1475
if not isinstance(lines, list):
1386
1476
raise AssertionError
1387
yield FulltextContentFactory((k,), None,
1477
yield ChunkedContentFactory((k,), None,
1388
1478
sha1=osutils.sha_strings(lines),
1389
text=''.join(lines))
1391
1481
yield AbsentContentFactory((k,))
1483
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1484
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1485
for i, (key,) in enumerate(keys):
1487
pb.update("iterating texts", i, len(keys))
1488
for l in self._get_lines(key):
1492
def network_bytes_to_kind_and_offset(network_bytes):
1493
"""Strip of a record kind from the front of network_bytes.
1495
:param network_bytes: The bytes of a record.
1496
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1498
line_end = network_bytes.find('\n')
1499
storage_kind = network_bytes[:line_end]
1500
return storage_kind, line_end + 1
1503
class NetworkRecordStream(object):
1504
"""A record_stream which reconstitures a serialised stream."""
1506
def __init__(self, bytes_iterator):
1507
"""Create a NetworkRecordStream.
1509
:param bytes_iterator: An iterator of bytes. Each item in this
1510
iterator should have been obtained from a record_streams'
1511
record.get_bytes_as(record.storage_kind) call.
1513
self._bytes_iterator = bytes_iterator
1514
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1515
'knit-delta-gz':knit.knit_network_to_record,
1516
'knit-annotated-ft-gz':knit.knit_network_to_record,
1517
'knit-annotated-delta-gz':knit.knit_network_to_record,
1518
'knit-delta-closure':knit.knit_delta_closure_to_records,
1519
'fulltext':fulltext_network_to_record,
1525
:return: An iterator as per VersionedFiles.get_record_stream().
1527
for bytes in self._bytes_iterator:
1528
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1529
for record in self._kind_factory[storage_kind](
1530
storage_kind, bytes, line_end):
1534
def fulltext_network_to_record(kind, bytes, line_end):
1535
"""Convert a network fulltext record to record."""
1536
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1537
record_meta = record_bytes[line_end+4:line_end+4+meta_len]
1538
key, parents = bencode.bdecode_as_tuple(record_meta)
1539
if parents == 'nil':
1541
fulltext = record_bytes[line_end+4+meta_len:]
1542
return FulltextContentFactory(key, parents, None, fulltext)
1545
def _length_prefix(bytes):
1546
return struct.pack('!L', len(bytes))
1549
def record_to_fulltext_bytes(self, record):
1550
if record.parents is None:
1553
parents = record.parents
1554
record_meta = bencode.bencode((record.key, parents))
1555
record_content = record.get_bytes_as('fulltext')
1556
return "fulltext\n%s%s%s" % (
1557
_length_prefix(record_meta), record_meta, record_content)