16
16
# You should have received a copy of the GNU General Public License
17
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
20
"""Versioned text file storage api."""
22
22
from copy import copy
23
23
from cStringIO import StringIO
26
26
from zlib import adler32
28
28
from bzrlib.lazy_import import lazy_import
29
29
lazy_import(globals(), """
31
32
from bzrlib import (
39
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
40
46
from bzrlib.transport.memory import MemoryTransport
42
48
from bzrlib.inter import InterObject
43
49
from bzrlib.registry import Registry
44
50
from bzrlib.symbol_versioning import *
45
51
from bzrlib.textmerge import TextMerge
52
from bzrlib import bencode
48
55
adapter_registry = Registry()
58
65
'bzrlib.knit', 'FTAnnotatedToUnannotated')
59
66
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
60
67
'bzrlib.knit', 'FTAnnotatedToFullText')
68
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
69
# 'bzrlib.knit', 'FTAnnotatedToChunked')
63
72
class ContentFactory(object):
64
73
"""Abstract interface for insertion and retrieval from a VersionedFile.
66
75
:ivar sha1: None, or the sha1 of the content fulltext.
67
76
:ivar storage_kind: The native storage kind of this factory. One of
68
77
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
83
92
self.parents = None
95
class ChunkedContentFactory(ContentFactory):
96
"""Static data content factory.
98
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
99
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
100
satisfies this, as does a list of lines.
102
:ivar sha1: None, or the sha1 of the content fulltext.
103
:ivar storage_kind: The native storage kind of this factory. Always
105
:ivar key: The key of this content. Each key is a tuple with a single
107
:ivar parents: A tuple of parent keys for self.key. If the object has
108
no parent information, None (as opposed to () for an empty list of
112
def __init__(self, key, parents, sha1, chunks):
113
"""Create a ContentFactory."""
115
self.storage_kind = 'chunked'
117
self.parents = parents
118
self._chunks = chunks
120
def get_bytes_as(self, storage_kind):
121
if storage_kind == 'chunked':
123
elif storage_kind == 'fulltext':
124
return ''.join(self._chunks)
125
raise errors.UnavailableRepresentation(self.key, storage_kind,
86
129
class FulltextContentFactory(ContentFactory):
87
130
"""Static data content factory.
89
132
This takes a fulltext when created and just returns that during
90
133
get_bytes_as('fulltext').
92
135
:ivar sha1: None, or the sha1 of the content fulltext.
93
136
:ivar storage_kind: The native storage kind of this factory. Always
418
469
if isinstance(version_ids, basestring):
419
470
version_ids = [version_ids]
420
471
raise NotImplementedError(self.get_ancestry)
422
473
def get_ancestry_with_ghosts(self, version_ids):
423
474
"""Return a list of all ancestors of given version(s). This
424
475
will not include the null revision.
426
477
Must raise RevisionNotPresent if any of the given versions are
427
478
not present in file history.
429
480
Ghosts that are known about will be included in ancestry list,
430
481
but are not explicitly marked.
432
483
raise NotImplementedError(self.get_ancestry_with_ghosts)
434
485
def get_parent_map(self, version_ids):
435
486
"""Get a map of the parents of version_ids.
561
612
return self._backing_vf.keys()
615
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
616
"""A VF that records calls, and returns keys in specific order.
618
:ivar calls: A list of the calls made; can be reset at any time by
622
def __init__(self, backing_vf, key_priority):
623
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
625
:param backing_vf: The versioned file to answer all methods.
626
:param key_priority: A dictionary defining what order keys should be
627
returned from an 'unordered' get_record_stream request.
628
Keys with lower priority are returned first, keys not present in
629
the map get an implicit priority of 0, and are returned in
630
lexicographical order.
632
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
633
self._key_priority = key_priority
635
def get_record_stream(self, keys, sort_order, include_delta_closure):
636
self.calls.append(("get_record_stream", list(keys), sort_order,
637
include_delta_closure))
638
if sort_order == 'unordered':
640
return (self._key_priority.get(key, 0), key)
641
# Use a defined order by asking for the keys one-by-one from the
643
for key in sorted(keys, key=sort_key):
644
for record in self._backing_vf.get_record_stream([key],
645
'unordered', include_delta_closure):
648
for record in self._backing_vf.get_record_stream(keys, sort_order,
649
include_delta_closure):
564
653
class KeyMapper(object):
565
654
"""KeyMappers map between keys and underlying partitioned storage."""
748
838
raise NotImplementedError(self.add_lines)
840
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
841
"""Add a text to the store.
843
This is a private function for use by CommitBuilder.
845
:param key: The key tuple of the text to add. If the last element is
846
None, a CHK string will be generated during the addition.
847
:param parents: The parents key tuples of the text to add.
848
:param text: A string containing the text to be committed.
849
:param nostore_sha: Raise ExistingContent and do not add the lines to
850
the versioned file if the digest of the lines matches this.
851
:param random_id: If True a random id has been selected rather than
852
an id determined by some deterministic process such as a converter
853
from a foreign VCS. When True the backend may choose not to check
854
for uniqueness of the resulting key within the versioned file, so
855
this should only be done when the result is expected to be unique
857
:param check_content: If True, the lines supplied are verified to be
858
bytestrings that are correctly formed lines.
859
:return: The text sha1, the number of bytes in the text, and an opaque
860
representation of the inserted version which can be provided
861
back to future _add_text calls in the parent_texts dictionary.
863
# The default implementation just thunks over to .add_lines(),
864
# inefficient, but it works.
865
return self.add_lines(key, parents, osutils.split_lines(text),
866
nostore_sha=nostore_sha,
750
870
def add_mpdiffs(self, records):
751
871
"""Add mpdiffs to this VersionedFile.
765
885
if not mpvf.has_version(p))
766
886
# It seems likely that adding all the present parents as fulltexts can
767
887
# easily exhaust memory.
768
split_lines = osutils.split_lines
888
chunks_to_lines = osutils.chunks_to_lines
769
889
for record in self.get_record_stream(needed_parents, 'unordered',
771
891
if record.storage_kind == 'absent':
773
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
893
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
775
895
for (key, parent_keys, expected_sha1, mpdiff), lines in\
776
896
zip(records, mpvf.get_line_list(versions)):
847
990
raise NotImplementedError(self.get_sha1s)
992
has_key = index._has_key_from_parent_map
994
def get_missing_compression_parent_keys(self):
995
"""Return an iterable of keys of missing compression parents.
997
Check this after calling insert_record_stream to find out if there are
998
any missing compression parents. If there are, the records that
999
depend on them are not able to be inserted safely. The precise
1000
behaviour depends on the concrete VersionedFiles class in use.
1002
Classes that do not support this will raise NotImplementedError.
1004
raise NotImplementedError(self.get_missing_compression_parent_keys)
849
1006
def insert_record_stream(self, stream):
850
1007
"""Insert a record stream into this container.
852
:param stream: A stream of records to insert.
1009
:param stream: A stream of records to insert.
854
1011
:seealso VersionedFile.get_record_stream:
994
1153
result.append((prefix + (origin,), line))
997
def check(self, progress_bar=None):
1156
def get_annotator(self):
1157
return annotate.Annotator(self)
1159
def check(self, progress_bar=None, keys=None):
998
1160
"""See VersionedFiles.check()."""
1161
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1162
# this is tolerable. Ideally we'd pass keys down to check() and
1163
# have the older VersiondFile interface updated too.
999
1164
for prefix, vf in self._iter_all_components():
1166
if keys is not None:
1167
return self.get_record_stream(keys, 'unordered', True)
1002
1169
def get_parent_map(self, keys):
1003
1170
"""Get a map of the parents of keys.
1317
1487
class WeaveMerge(PlanWeaveMerge):
1318
1488
"""Weave merge that takes a VersionedFile and two versions as its input."""
1320
def __init__(self, versionedfile, ver_a, ver_b,
1490
def __init__(self, versionedfile, ver_a, ver_b,
1321
1491
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1322
1492
plan = versionedfile.plan_merge(ver_a, ver_b)
1323
1493
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1326
1496
class VirtualVersionedFiles(VersionedFiles):
1327
"""Dummy implementation for VersionedFiles that uses other functions for
1497
"""Dummy implementation for VersionedFiles that uses other functions for
1328
1498
obtaining fulltexts and parent maps.
1330
This is always on the bottom of the stack and uses string keys
1500
This is always on the bottom of the stack and uses string keys
1331
1501
(rather than tuples) internally.
1379
1549
if lines is not None:
1380
1550
if not isinstance(lines, list):
1381
1551
raise AssertionError
1382
yield FulltextContentFactory((k,), None,
1552
yield ChunkedContentFactory((k,), None,
1383
1553
sha1=osutils.sha_strings(lines),
1384
text=''.join(lines))
1386
1556
yield AbsentContentFactory((k,))
1558
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1559
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1560
for i, (key,) in enumerate(keys):
1562
pb.update("Finding changed lines", i, len(keys))
1563
for l in self._get_lines(key):
1567
def network_bytes_to_kind_and_offset(network_bytes):
1568
"""Strip of a record kind from the front of network_bytes.
1570
:param network_bytes: The bytes of a record.
1571
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1573
line_end = network_bytes.find('\n')
1574
storage_kind = network_bytes[:line_end]
1575
return storage_kind, line_end + 1
1578
class NetworkRecordStream(object):
1579
"""A record_stream which reconstitures a serialised stream."""
1581
def __init__(self, bytes_iterator):
1582
"""Create a NetworkRecordStream.
1584
:param bytes_iterator: An iterator of bytes. Each item in this
1585
iterator should have been obtained from a record_streams'
1586
record.get_bytes_as(record.storage_kind) call.
1588
self._bytes_iterator = bytes_iterator
1589
self._kind_factory = {
1590
'fulltext': fulltext_network_to_record,
1591
'groupcompress-block': groupcompress.network_block_to_records,
1592
'knit-ft-gz': knit.knit_network_to_record,
1593
'knit-delta-gz': knit.knit_network_to_record,
1594
'knit-annotated-ft-gz': knit.knit_network_to_record,
1595
'knit-annotated-delta-gz': knit.knit_network_to_record,
1596
'knit-delta-closure': knit.knit_delta_closure_to_records,
1602
:return: An iterator as per VersionedFiles.get_record_stream().
1604
for bytes in self._bytes_iterator:
1605
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1606
for record in self._kind_factory[storage_kind](
1607
storage_kind, bytes, line_end):
1611
def fulltext_network_to_record(kind, bytes, line_end):
1612
"""Convert a network fulltext record to record."""
1613
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1614
record_meta = bytes[line_end+4:line_end+4+meta_len]
1615
key, parents = bencode.bdecode_as_tuple(record_meta)
1616
if parents == 'nil':
1618
fulltext = bytes[line_end+4+meta_len:]
1619
return [FulltextContentFactory(key, parents, None, fulltext)]
1622
def _length_prefix(bytes):
1623
return struct.pack('!L', len(bytes))
1626
def record_to_fulltext_bytes(record):
1627
if record.parents is None:
1630
parents = record.parents
1631
record_meta = bencode.bencode((record.key, parents))
1632
record_content = record.get_bytes_as('fulltext')
1633
return "fulltext\n%s%s%s" % (
1634
_length_prefix(record_meta), record_meta, record_content)
1637
def sort_groupcompress(parent_map):
1638
"""Sort and group the keys in parent_map into groupcompress order.
1640
groupcompress is defined (currently) as reverse-topological order, grouped
1643
:return: A sorted-list of keys
1645
# gc-optimal ordering is approximately reverse topological,
1646
# properly grouped by file-id.
1648
for item in parent_map.iteritems():
1650
if isinstance(key, str) or len(key) == 1:
1655
per_prefix_map[prefix].append(item)
1657
per_prefix_map[prefix] = [item]
1660
for prefix in sorted(per_prefix_map):
1661
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))