16
16
# You should have received a copy of the GNU General Public License
17
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
20
"""Versioned text file storage api."""
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
42
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
46
43
from bzrlib.transport.memory import MemoryTransport
48
45
from bzrlib.inter import InterObject
49
46
from bzrlib.registry import Registry
50
47
from bzrlib.symbol_versioning import *
51
48
from bzrlib.textmerge import TextMerge
52
from bzrlib import bencode
49
from bzrlib.util import bencode
55
52
adapter_registry = Registry()
803
794
check_content=True):
804
795
"""Add a text to the store.
806
:param key: The key tuple of the text to add. If the last element is
807
None, a CHK string will be generated during the addition.
797
:param key: The key tuple of the text to add.
808
798
:param parents: The parents key tuples of the text to add.
809
799
:param lines: A list of lines. Each line must be a bytestring. And all
810
800
of them except the last must be terminated with \n and contain no
838
828
raise NotImplementedError(self.add_lines)
840
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
841
"""Add a text to the store.
843
This is a private function for use by CommitBuilder.
845
:param key: The key tuple of the text to add. If the last element is
846
None, a CHK string will be generated during the addition.
847
:param parents: The parents key tuples of the text to add.
848
:param text: A string containing the text to be committed.
849
:param nostore_sha: Raise ExistingContent and do not add the lines to
850
the versioned file if the digest of the lines matches this.
851
:param random_id: If True a random id has been selected rather than
852
an id determined by some deterministic process such as a converter
853
from a foreign VCS. When True the backend may choose not to check
854
for uniqueness of the resulting key within the versioned file, so
855
this should only be done when the result is expected to be unique
857
:param check_content: If True, the lines supplied are verified to be
858
bytestrings that are correctly formed lines.
859
:return: The text sha1, the number of bytes in the text, and an opaque
860
representation of the inserted version which can be provided
861
back to future _add_text calls in the parent_texts dictionary.
863
# The default implementation just thunks over to .add_lines(),
864
# inefficient, but it works.
865
return self.add_lines(key, parents, osutils.split_lines(text),
866
nostore_sha=nostore_sha,
870
830
def add_mpdiffs(self, records):
871
831
"""Add mpdiffs to this VersionedFile.
914
874
raise NotImplementedError(self.annotate)
916
876
def check(self, progress_bar=None):
917
"""Check this object for integrity.
919
:param progress_bar: A progress bar to output as the check progresses.
920
:param keys: Specific keys within the VersionedFiles to check. When
921
this parameter is not None, check() becomes a generator as per
922
get_record_stream. The difference to get_record_stream is that
923
more or deeper checks will be performed.
924
:return: None, or if keys was supplied a generator as per
877
"""Check this object for integrity."""
927
878
raise NotImplementedError(self.check)
930
881
def check_not_reserved_id(version_id):
931
882
revision.check_not_reserved_id(version_id)
933
def clear_cache(self):
934
"""Clear whatever caches this VersionedFile holds.
936
This is generally called after an operation has been performed, when we
937
don't expect to be using this versioned file again soon.
940
884
def _check_lines_not_unicode(self, lines):
941
885
"""Check that lines being added to a versioned file are not unicode."""
942
886
for line in lines:
949
893
if '\n' in line[:-1]:
950
894
raise errors.BzrBadParameterContainsNewline("lines")
952
def get_known_graph_ancestry(self, keys):
953
"""Get a KnownGraph instance with the ancestry of keys."""
954
# most basic implementation is a loop around get_parent_map
958
this_parent_map = self.get_parent_map(pending)
959
parent_map.update(this_parent_map)
961
map(pending.update, this_parent_map.itervalues())
962
pending = pending.difference(parent_map)
963
kg = _mod_graph.KnownGraph(parent_map)
966
896
def get_parent_map(self, keys):
967
897
"""Get a map of the parents of keys.
1160
1090
result.append((prefix + (origin,), line))
1163
def get_annotator(self):
1164
return annotate.Annotator(self)
1166
def check(self, progress_bar=None, keys=None):
1093
def check(self, progress_bar=None):
1167
1094
"""See VersionedFiles.check()."""
1168
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1169
# this is tolerable. Ideally we'd pass keys down to check() and
1170
# have the older VersiondFile interface updated too.
1171
1095
for prefix, vf in self._iter_all_components():
1173
if keys is not None:
1174
return self.get_record_stream(keys, 'unordered', True)
1176
1098
def get_parent_map(self, keys):
1177
1099
"""Get a map of the parents of keys.
1479
1401
elif state == 'conflicted-b':
1480
1402
ch_b = ch_a = True
1481
1403
lines_b.append(line)
1482
elif state == 'killed-both':
1483
# This counts as a change, even though there is no associated
1487
1405
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1406
'killed-base', 'killed-both'):
1489
1407
raise AssertionError(state)
1490
1408
for struct in outstanding_struct():
1493
def base_from_plan(self):
1494
"""Construct a BASE file from the plan text."""
1496
for state, line in self.plan:
1497
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1498
# If unchanged, then this line is straight from base. If a or b
1499
# or both killed the line, then it *used* to be in base.
1500
base_lines.append(line)
1502
if state not in ('killed-base', 'irrelevant',
1503
'ghost-a', 'ghost-b',
1505
'conflicted-a', 'conflicted-b'):
1506
# killed-base, irrelevant means it doesn't apply
1507
# ghost-a/ghost-b are harder to say for sure, but they
1508
# aren't in the 'inc_c' which means they aren't in the
1509
# shared base of a & b. So we don't include them. And
1510
# obviously if the line is newly inserted, it isn't in base
1512
# If 'conflicted-a' or b, then it is new vs one base, but
1513
# old versus another base. However, if we make it present
1514
# in the base, it will be deleted from the target, and it
1515
# seems better to get a line doubled in the merge result,
1516
# rather than have it deleted entirely.
1517
# Example, each node is the 'text' at that point:
1525
# There was a criss-cross conflict merge. Both sides
1526
# include the other, but put themselves first.
1527
# Weave marks this as a 'clean' merge, picking OTHER over
1528
# THIS. (Though the details depend on order inserted into
1530
# LCA generates a plan:
1531
# [('unchanged', M),
1532
# ('conflicted-b', b),
1534
# ('conflicted-a', b),
1536
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1537
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1538
# removes one 'b', and BASE vs OTHER removes the other)
1539
# If you include neither, 3-way creates a clean "MbabN" as
1540
# THIS adds one 'b', and OTHER does too.
1541
# It seems that having the line 2 times is better than
1542
# having it omitted. (Easier to manually delete than notice
1543
# it needs to be added.)
1544
raise AssertionError('Unknown state: %s' % (state,))
1548
1412
class WeaveMerge(PlanWeaveMerge):
1549
1413
"""Weave merge that takes a VersionedFile and two versions as its input."""
1620
1484
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1621
1485
for i, (key,) in enumerate(keys):
1622
1486
if pb is not None:
1623
pb.update("Finding changed lines", i, len(keys))
1487
pb.update("iterating texts", i, len(keys))
1624
1488
for l in self._get_lines(key):
1647
1511
record.get_bytes_as(record.storage_kind) call.
1649
1513
self._bytes_iterator = bytes_iterator
1650
self._kind_factory = {
1651
'fulltext': fulltext_network_to_record,
1652
'groupcompress-block': groupcompress.network_block_to_records,
1653
'knit-ft-gz': knit.knit_network_to_record,
1654
'knit-delta-gz': knit.knit_network_to_record,
1655
'knit-annotated-ft-gz': knit.knit_network_to_record,
1656
'knit-annotated-delta-gz': knit.knit_network_to_record,
1657
'knit-delta-closure': knit.knit_delta_closure_to_records,
1514
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1515
'knit-delta-gz':knit.knit_network_to_record,
1516
'knit-annotated-ft-gz':knit.knit_network_to_record,
1517
'knit-annotated-delta-gz':knit.knit_network_to_record,
1518
'knit-delta-closure':knit.knit_delta_closure_to_records,
1519
'fulltext':fulltext_network_to_record,
1660
1522
def read(self):
1693
1555
record_content = record.get_bytes_as('fulltext')
1694
1556
return "fulltext\n%s%s%s" % (
1695
1557
_length_prefix(record_meta), record_meta, record_content)
1698
def sort_groupcompress(parent_map):
1699
"""Sort and group the keys in parent_map into groupcompress order.
1701
groupcompress is defined (currently) as reverse-topological order, grouped
1704
:return: A sorted-list of keys
1706
# gc-optimal ordering is approximately reverse topological,
1707
# properly grouped by file-id.
1709
for item in parent_map.iteritems():
1711
if isinstance(key, str) or len(key) == 1:
1716
per_prefix_map[prefix].append(item)
1718
per_prefix_map[prefix] = [item]
1721
for prefix in sorted(per_prefix_map):
1722
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))