16
16
# You should have received a copy of the GNU General Public License
17
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
20
"""Versioned text file storage api."""
22
22
from copy import copy
23
23
from cStringIO import StringIO
25
26
from zlib import adler32
27
28
from bzrlib.lazy_import import lazy_import
457
468
if isinstance(version_ids, basestring):
458
469
version_ids = [version_ids]
459
470
raise NotImplementedError(self.get_ancestry)
461
472
def get_ancestry_with_ghosts(self, version_ids):
462
473
"""Return a list of all ancestors of given version(s). This
463
474
will not include the null revision.
465
476
Must raise RevisionNotPresent if any of the given versions are
466
477
not present in file history.
468
479
Ghosts that are known about will be included in ancestry list,
469
480
but are not explicitly marked.
471
482
raise NotImplementedError(self.get_ancestry_with_ghosts)
473
484
def get_parent_map(self, version_ids):
474
485
"""Get a map of the parents of version_ids.
538
549
unchanged Alive in both a and b (possibly created in both)
539
550
new-a Created in a
540
551
new-b Created in b
541
ghost-a Killed in a, unborn in b
552
ghost-a Killed in a, unborn in b
542
553
ghost-b Killed in b, unborn in a
543
554
irrelevant Not in either revision
545
556
raise NotImplementedError(VersionedFile.plan_merge)
547
558
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
548
559
b_marker=TextMerge.B_MARKER):
549
560
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
825
837
raise NotImplementedError(self.add_lines)
839
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
840
"""Add a text to the store.
842
This is a private function for use by CommitBuilder.
844
:param key: The key tuple of the text to add. If the last element is
845
None, a CHK string will be generated during the addition.
846
:param parents: The parents key tuples of the text to add.
847
:param text: A string containing the text to be committed.
848
:param nostore_sha: Raise ExistingContent and do not add the lines to
849
the versioned file if the digest of the lines matches this.
850
:param random_id: If True a random id has been selected rather than
851
an id determined by some deterministic process such as a converter
852
from a foreign VCS. When True the backend may choose not to check
853
for uniqueness of the resulting key within the versioned file, so
854
this should only be done when the result is expected to be unique
856
:param check_content: If True, the lines supplied are verified to be
857
bytestrings that are correctly formed lines.
858
:return: The text sha1, the number of bytes in the text, and an opaque
859
representation of the inserted version which can be provided
860
back to future _add_text calls in the parent_texts dictionary.
862
# The default implementation just thunks over to .add_lines(),
863
# inefficient, but it works.
864
return self.add_lines(key, parents, osutils.split_lines(text),
865
nostore_sha=nostore_sha,
827
869
def add_mpdiffs(self, records):
828
870
"""Add mpdiffs to this VersionedFile.
871
913
raise NotImplementedError(self.annotate)
873
915
def check(self, progress_bar=None):
874
"""Check this object for integrity."""
916
"""Check this object for integrity.
918
:param progress_bar: A progress bar to output as the check progresses.
919
:param keys: Specific keys within the VersionedFiles to check. When
920
this parameter is not None, check() becomes a generator as per
921
get_record_stream. The difference to get_record_stream is that
922
more or deeper checks will be performed.
923
:return: None, or if keys was supplied a generator as per
875
926
raise NotImplementedError(self.check)
878
929
def check_not_reserved_id(version_id):
879
930
revision.check_not_reserved_id(version_id)
932
def clear_cache(self):
933
"""Clear whatever caches this VersionedFile holds.
935
This is generally called after an operation has been performed, when we
936
don't expect to be using this versioned file again soon.
881
939
def _check_lines_not_unicode(self, lines):
882
940
"""Check that lines being added to a versioned file are not unicode."""
883
941
for line in lines:
890
948
if '\n' in line[:-1]:
891
949
raise errors.BzrBadParameterContainsNewline("lines")
951
def get_known_graph_ancestry(self, keys):
952
"""Get a KnownGraph instance with the ancestry of keys."""
953
# most basic implementation is a loop around get_parent_map
957
this_parent_map = self.get_parent_map(pending)
958
parent_map.update(this_parent_map)
960
map(pending.update, this_parent_map.itervalues())
961
pending = pending.difference(parent_map)
962
kg = _mod_graph.KnownGraph(parent_map)
893
965
def get_parent_map(self, keys):
894
966
"""Get a map of the parents of keys.
926
998
has_key = index._has_key_from_parent_map
1000
def get_missing_compression_parent_keys(self):
1001
"""Return an iterable of keys of missing compression parents.
1003
Check this after calling insert_record_stream to find out if there are
1004
any missing compression parents. If there are, the records that
1005
depend on them are not able to be inserted safely. The precise
1006
behaviour depends on the concrete VersionedFiles class in use.
1008
Classes that do not support this will raise NotImplementedError.
1010
raise NotImplementedError(self.get_missing_compression_parent_keys)
928
1012
def insert_record_stream(self, stream):
929
1013
"""Insert a record stream into this container.
931
:param stream: A stream of records to insert.
1015
:param stream: A stream of records to insert.
933
1017
:seealso VersionedFile.get_record_stream:
1075
1159
result.append((prefix + (origin,), line))
1078
def check(self, progress_bar=None):
1162
def get_annotator(self):
1163
return annotate.Annotator(self)
1165
def check(self, progress_bar=None, keys=None):
1079
1166
"""See VersionedFiles.check()."""
1167
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1168
# this is tolerable. Ideally we'd pass keys down to check() and
1169
# have the older VersiondFile interface updated too.
1080
1170
for prefix, vf in self._iter_all_components():
1172
if keys is not None:
1173
return self.get_record_stream(keys, 'unordered', True)
1083
1175
def get_parent_map(self, keys):
1084
1176
"""Get a map of the parents of keys.
1386
1478
elif state == 'conflicted-b':
1387
1479
ch_b = ch_a = True
1388
1480
lines_b.append(line)
1481
elif state == 'killed-both':
1482
# This counts as a change, even though there is no associated
1390
1486
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1391
'killed-base', 'killed-both'):
1392
1488
raise AssertionError(state)
1393
1489
for struct in outstanding_struct():
1492
def base_from_plan(self):
1493
"""Construct a BASE file from the plan text."""
1495
for state, line in self.plan:
1496
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1497
# If unchanged, then this line is straight from base. If a or b
1498
# or both killed the line, then it *used* to be in base.
1499
base_lines.append(line)
1501
if state not in ('killed-base', 'irrelevant',
1502
'ghost-a', 'ghost-b',
1504
'conflicted-a', 'conflicted-b'):
1505
# killed-base, irrelevant means it doesn't apply
1506
# ghost-a/ghost-b are harder to say for sure, but they
1507
# aren't in the 'inc_c' which means they aren't in the
1508
# shared base of a & b. So we don't include them. And
1509
# obviously if the line is newly inserted, it isn't in base
1511
# If 'conflicted-a' or b, then it is new vs one base, but
1512
# old versus another base. However, if we make it present
1513
# in the base, it will be deleted from the target, and it
1514
# seems better to get a line doubled in the merge result,
1515
# rather than have it deleted entirely.
1516
# Example, each node is the 'text' at that point:
1524
# There was a criss-cross conflict merge. Both sides
1525
# include the other, but put themselves first.
1526
# Weave marks this as a 'clean' merge, picking OTHER over
1527
# THIS. (Though the details depend on order inserted into
1529
# LCA generates a plan:
1530
# [('unchanged', M),
1531
# ('conflicted-b', b),
1533
# ('conflicted-a', b),
1535
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1536
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1537
# removes one 'b', and BASE vs OTHER removes the other)
1538
# If you include neither, 3-way creates a clean "MbabN" as
1539
# THIS adds one 'b', and OTHER does too.
1540
# It seems that having the line 2 times is better than
1541
# having it omitted. (Easier to manually delete than notice
1542
# it needs to be added.)
1543
raise AssertionError('Unknown state: %s' % (state,))
1397
1547
class WeaveMerge(PlanWeaveMerge):
1398
1548
"""Weave merge that takes a VersionedFile and two versions as its input."""
1400
def __init__(self, versionedfile, ver_a, ver_b,
1550
def __init__(self, versionedfile, ver_a, ver_b,
1401
1551
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1402
1552
plan = versionedfile.plan_merge(ver_a, ver_b)
1403
1553
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1406
1556
class VirtualVersionedFiles(VersionedFiles):
1407
"""Dummy implementation for VersionedFiles that uses other functions for
1557
"""Dummy implementation for VersionedFiles that uses other functions for
1408
1558
obtaining fulltexts and parent maps.
1410
This is always on the bottom of the stack and uses string keys
1560
This is always on the bottom of the stack and uses string keys
1411
1561
(rather than tuples) internally.
1469
1619
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1470
1620
for i, (key,) in enumerate(keys):
1471
1621
if pb is not None:
1472
pb.update("iterating texts", i, len(keys))
1622
pb.update("Finding changed lines", i, len(keys))
1473
1623
for l in self._get_lines(key):
1627
def network_bytes_to_kind_and_offset(network_bytes):
1628
"""Strip of a record kind from the front of network_bytes.
1630
:param network_bytes: The bytes of a record.
1631
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1633
line_end = network_bytes.find('\n')
1634
storage_kind = network_bytes[:line_end]
1635
return storage_kind, line_end + 1
1638
class NetworkRecordStream(object):
1639
"""A record_stream which reconstitures a serialised stream."""
1641
def __init__(self, bytes_iterator):
1642
"""Create a NetworkRecordStream.
1644
:param bytes_iterator: An iterator of bytes. Each item in this
1645
iterator should have been obtained from a record_streams'
1646
record.get_bytes_as(record.storage_kind) call.
1648
self._bytes_iterator = bytes_iterator
1649
self._kind_factory = {
1650
'fulltext': fulltext_network_to_record,
1651
'groupcompress-block': groupcompress.network_block_to_records,
1652
'knit-ft-gz': knit.knit_network_to_record,
1653
'knit-delta-gz': knit.knit_network_to_record,
1654
'knit-annotated-ft-gz': knit.knit_network_to_record,
1655
'knit-annotated-delta-gz': knit.knit_network_to_record,
1656
'knit-delta-closure': knit.knit_delta_closure_to_records,
1662
:return: An iterator as per VersionedFiles.get_record_stream().
1664
for bytes in self._bytes_iterator:
1665
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1666
for record in self._kind_factory[storage_kind](
1667
storage_kind, bytes, line_end):
1671
def fulltext_network_to_record(kind, bytes, line_end):
1672
"""Convert a network fulltext record to record."""
1673
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1674
record_meta = bytes[line_end+4:line_end+4+meta_len]
1675
key, parents = bencode.bdecode_as_tuple(record_meta)
1676
if parents == 'nil':
1678
fulltext = bytes[line_end+4+meta_len:]
1679
return [FulltextContentFactory(key, parents, None, fulltext)]
1682
def _length_prefix(bytes):
1683
return struct.pack('!L', len(bytes))
1686
def record_to_fulltext_bytes(record):
1687
if record.parents is None:
1690
parents = record.parents
1691
record_meta = bencode.bencode((record.key, parents))
1692
record_content = record.get_bytes_as('fulltext')
1693
return "fulltext\n%s%s%s" % (
1694
_length_prefix(record_meta), record_meta, record_content)
1697
def sort_groupcompress(parent_map):
1698
"""Sort and group the keys in parent_map into groupcompress order.
1700
groupcompress is defined (currently) as reverse-topological order, grouped
1703
:return: A sorted-list of keys
1705
# gc-optimal ordering is approximately reverse topological,
1706
# properly grouped by file-id.
1708
for item in parent_map.iteritems():
1710
if isinstance(key, str) or len(key) == 1:
1715
per_prefix_map[prefix].append(item)
1717
per_prefix_map[prefix] = [item]
1720
for prefix in sorted(per_prefix_map):
1721
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))