16
16
# You should have received a copy of the GNU General Public License
17
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
20
"""Versioned text file storage api."""
22
22
from copy import copy
23
23
from cStringIO import StringIO
26
26
from zlib import adler32
28
28
from bzrlib.lazy_import import lazy_import
29
29
lazy_import(globals(), """
32
31
from bzrlib import (
44
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
39
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
45
40
from bzrlib.transport.memory import MemoryTransport
47
42
from bzrlib.inter import InterObject
48
43
from bzrlib.registry import Registry
49
44
from bzrlib.symbol_versioning import *
50
45
from bzrlib.textmerge import TextMerge
51
from bzrlib import bencode
54
48
adapter_registry = Registry()
64
58
'bzrlib.knit', 'FTAnnotatedToUnannotated')
65
59
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
66
60
'bzrlib.knit', 'FTAnnotatedToFullText')
67
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
68
# 'bzrlib.knit', 'FTAnnotatedToChunked')
71
63
class ContentFactory(object):
72
64
"""Abstract interface for insertion and retrieval from a VersionedFile.
74
66
:ivar sha1: None, or the sha1 of the content fulltext.
75
67
:ivar storage_kind: The native storage kind of this factory. One of
76
68
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
91
83
self.parents = None
94
class ChunkedContentFactory(ContentFactory):
95
"""Static data content factory.
97
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
98
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
99
satisfies this, as does a list of lines.
101
:ivar sha1: None, or the sha1 of the content fulltext.
102
:ivar storage_kind: The native storage kind of this factory. Always
104
:ivar key: The key of this content. Each key is a tuple with a single
106
:ivar parents: A tuple of parent keys for self.key. If the object has
107
no parent information, None (as opposed to () for an empty list of
111
def __init__(self, key, parents, sha1, chunks):
112
"""Create a ContentFactory."""
114
self.storage_kind = 'chunked'
116
self.parents = parents
117
self._chunks = chunks
119
def get_bytes_as(self, storage_kind):
120
if storage_kind == 'chunked':
122
elif storage_kind == 'fulltext':
123
return ''.join(self._chunks)
124
raise errors.UnavailableRepresentation(self.key, storage_kind,
128
86
class FulltextContentFactory(ContentFactory):
129
87
"""Static data content factory.
131
89
This takes a fulltext when created and just returns that during
132
90
get_bytes_as('fulltext').
134
92
:ivar sha1: None, or the sha1 of the content fulltext.
135
93
:ivar storage_kind: The native storage kind of this factory. Always
468
418
if isinstance(version_ids, basestring):
469
419
version_ids = [version_ids]
470
420
raise NotImplementedError(self.get_ancestry)
472
422
def get_ancestry_with_ghosts(self, version_ids):
473
423
"""Return a list of all ancestors of given version(s). This
474
424
will not include the null revision.
476
426
Must raise RevisionNotPresent if any of the given versions are
477
427
not present in file history.
479
429
Ghosts that are known about will be included in ancestry list,
480
430
but are not explicitly marked.
482
432
raise NotImplementedError(self.get_ancestry_with_ghosts)
484
434
def get_parent_map(self, version_ids):
485
435
"""Get a map of the parents of version_ids.
611
561
return self._backing_vf.keys()
614
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
615
"""A VF that records calls, and returns keys in specific order.
617
:ivar calls: A list of the calls made; can be reset at any time by
621
def __init__(self, backing_vf, key_priority):
622
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
624
:param backing_vf: The versioned file to answer all methods.
625
:param key_priority: A dictionary defining what order keys should be
626
returned from an 'unordered' get_record_stream request.
627
Keys with lower priority are returned first, keys not present in
628
the map get an implicit priority of 0, and are returned in
629
lexicographical order.
631
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
632
self._key_priority = key_priority
634
def get_record_stream(self, keys, sort_order, include_delta_closure):
635
self.calls.append(("get_record_stream", list(keys), sort_order,
636
include_delta_closure))
637
if sort_order == 'unordered':
639
return (self._key_priority.get(key, 0), key)
640
# Use a defined order by asking for the keys one-by-one from the
642
for key in sorted(keys, key=sort_key):
643
for record in self._backing_vf.get_record_stream([key],
644
'unordered', include_delta_closure):
647
for record in self._backing_vf.get_record_stream(keys, sort_order,
648
include_delta_closure):
652
564
class KeyMapper(object):
653
565
"""KeyMappers map between keys and underlying partitioned storage."""
837
748
raise NotImplementedError(self.add_lines)
839
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
840
"""Add a text to the store.
842
This is a private function for use by CommitBuilder.
844
:param key: The key tuple of the text to add. If the last element is
845
None, a CHK string will be generated during the addition.
846
:param parents: The parents key tuples of the text to add.
847
:param text: A string containing the text to be committed.
848
:param nostore_sha: Raise ExistingContent and do not add the lines to
849
the versioned file if the digest of the lines matches this.
850
:param random_id: If True a random id has been selected rather than
851
an id determined by some deterministic process such as a converter
852
from a foreign VCS. When True the backend may choose not to check
853
for uniqueness of the resulting key within the versioned file, so
854
this should only be done when the result is expected to be unique
856
:param check_content: If True, the lines supplied are verified to be
857
bytestrings that are correctly formed lines.
858
:return: The text sha1, the number of bytes in the text, and an opaque
859
representation of the inserted version which can be provided
860
back to future _add_text calls in the parent_texts dictionary.
862
# The default implementation just thunks over to .add_lines(),
863
# inefficient, but it works.
864
return self.add_lines(key, parents, osutils.split_lines(text),
865
nostore_sha=nostore_sha,
869
750
def add_mpdiffs(self, records):
870
751
"""Add mpdiffs to this VersionedFile.
884
765
if not mpvf.has_version(p))
885
766
# It seems likely that adding all the present parents as fulltexts can
886
767
# easily exhaust memory.
887
chunks_to_lines = osutils.chunks_to_lines
768
split_lines = osutils.split_lines
888
769
for record in self.get_record_stream(needed_parents, 'unordered',
890
771
if record.storage_kind == 'absent':
892
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
773
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
894
775
for (key, parent_keys, expected_sha1, mpdiff), lines in\
895
776
zip(records, mpvf.get_line_list(versions)):
975
847
raise NotImplementedError(self.get_sha1s)
977
has_key = index._has_key_from_parent_map
979
def get_missing_compression_parent_keys(self):
980
"""Return an iterable of keys of missing compression parents.
982
Check this after calling insert_record_stream to find out if there are
983
any missing compression parents. If there are, the records that
984
depend on them are not able to be inserted safely. The precise
985
behaviour depends on the concrete VersionedFiles class in use.
987
Classes that do not support this will raise NotImplementedError.
989
raise NotImplementedError(self.get_missing_compression_parent_keys)
991
849
def insert_record_stream(self, stream):
992
850
"""Insert a record stream into this container.
994
:param stream: A stream of records to insert.
852
:param stream: A stream of records to insert.
996
854
:seealso VersionedFile.get_record_stream:
1138
994
result.append((prefix + (origin,), line))
1141
def get_annotator(self):
1142
return annotate.Annotator(self)
1144
def check(self, progress_bar=None, keys=None):
997
def check(self, progress_bar=None):
1145
998
"""See VersionedFiles.check()."""
1146
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1147
# this is tolerable. Ideally we'd pass keys down to check() and
1148
# have the older VersiondFile interface updated too.
1149
999
for prefix, vf in self._iter_all_components():
1151
if keys is not None:
1152
return self.get_record_stream(keys, 'unordered', True)
1154
1002
def get_parent_map(self, keys):
1155
1003
"""Get a map of the parents of keys.
1472
1317
class WeaveMerge(PlanWeaveMerge):
1473
1318
"""Weave merge that takes a VersionedFile and two versions as its input."""
1475
def __init__(self, versionedfile, ver_a, ver_b,
1320
def __init__(self, versionedfile, ver_a, ver_b,
1476
1321
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1477
1322
plan = versionedfile.plan_merge(ver_a, ver_b)
1478
1323
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1481
1326
class VirtualVersionedFiles(VersionedFiles):
1482
"""Dummy implementation for VersionedFiles that uses other functions for
1327
"""Dummy implementation for VersionedFiles that uses other functions for
1483
1328
obtaining fulltexts and parent maps.
1485
This is always on the bottom of the stack and uses string keys
1330
This is always on the bottom of the stack and uses string keys
1486
1331
(rather than tuples) internally.
1534
1379
if lines is not None:
1535
1380
if not isinstance(lines, list):
1536
1381
raise AssertionError
1537
yield ChunkedContentFactory((k,), None,
1382
yield FulltextContentFactory((k,), None,
1538
1383
sha1=osutils.sha_strings(lines),
1384
text=''.join(lines))
1541
1386
yield AbsentContentFactory((k,))
1543
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1544
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1545
for i, (key,) in enumerate(keys):
1547
pb.update("Finding changed lines", i, len(keys))
1548
for l in self._get_lines(key):
1552
def network_bytes_to_kind_and_offset(network_bytes):
1553
"""Strip of a record kind from the front of network_bytes.
1555
:param network_bytes: The bytes of a record.
1556
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1558
line_end = network_bytes.find('\n')
1559
storage_kind = network_bytes[:line_end]
1560
return storage_kind, line_end + 1
1563
class NetworkRecordStream(object):
1564
"""A record_stream which reconstitures a serialised stream."""
1566
def __init__(self, bytes_iterator):
1567
"""Create a NetworkRecordStream.
1569
:param bytes_iterator: An iterator of bytes. Each item in this
1570
iterator should have been obtained from a record_streams'
1571
record.get_bytes_as(record.storage_kind) call.
1573
self._bytes_iterator = bytes_iterator
1574
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1575
'knit-delta-gz':knit.knit_network_to_record,
1576
'knit-annotated-ft-gz':knit.knit_network_to_record,
1577
'knit-annotated-delta-gz':knit.knit_network_to_record,
1578
'knit-delta-closure':knit.knit_delta_closure_to_records,
1579
'fulltext':fulltext_network_to_record,
1580
'groupcompress-block':groupcompress.network_block_to_records,
1586
:return: An iterator as per VersionedFiles.get_record_stream().
1588
for bytes in self._bytes_iterator:
1589
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1590
for record in self._kind_factory[storage_kind](
1591
storage_kind, bytes, line_end):
1595
def fulltext_network_to_record(kind, bytes, line_end):
1596
"""Convert a network fulltext record to record."""
1597
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1598
record_meta = bytes[line_end+4:line_end+4+meta_len]
1599
key, parents = bencode.bdecode_as_tuple(record_meta)
1600
if parents == 'nil':
1602
fulltext = bytes[line_end+4+meta_len:]
1603
return [FulltextContentFactory(key, parents, None, fulltext)]
1606
def _length_prefix(bytes):
1607
return struct.pack('!L', len(bytes))
1610
def record_to_fulltext_bytes(record):
1611
if record.parents is None:
1614
parents = record.parents
1615
record_meta = bencode.bencode((record.key, parents))
1616
record_content = record.get_bytes_as('fulltext')
1617
return "fulltext\n%s%s%s" % (
1618
_length_prefix(record_meta), record_meta, record_content)
1621
def sort_groupcompress(parent_map):
1622
"""Sort and group the keys in parent_map into groupcompress order.
1624
groupcompress is defined (currently) as reverse-topological order, grouped
1627
:return: A sorted-list of keys
1629
# gc-optimal ordering is approximately reverse topological,
1630
# properly grouped by file-id.
1632
for item in parent_map.iteritems():
1634
if isinstance(key, str) or len(key) == 1:
1639
per_prefix_map[prefix].append(item)
1641
per_prefix_map[prefix] = [item]
1644
for prefix in sorted(per_prefix_map):
1645
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))