1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Versioned text file storage api."""
20
from cStringIO import StringIO
23
from zlib import adler32
25
from bzrlib.lazy_import import lazy_import
26
lazy_import(globals(), """
43
from bzrlib.registry import Registry
44
from bzrlib.textmerge import TextMerge
47
adapter_registry = Registry()
48
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
49
'DeltaPlainToFullText')
50
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
52
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
53
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
54
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
55
'bzrlib.knit', 'DeltaAnnotatedToFullText')
56
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
57
'bzrlib.knit', 'FTAnnotatedToUnannotated')
58
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
59
'bzrlib.knit', 'FTAnnotatedToFullText')
60
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
61
# 'bzrlib.knit', 'FTAnnotatedToChunked')
64
class ContentFactory(object):
65
"""Abstract interface for insertion and retrieval from a VersionedFile.
67
:ivar sha1: None, or the sha1 of the content fulltext.
68
:ivar storage_kind: The native storage kind of this factory. One of
69
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
70
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
71
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
72
:ivar key: The key of this content. Each key is a tuple with a single
74
:ivar parents: A tuple of parent keys for self.key. If the object has
75
no parent information, None (as opposed to () for an empty list of
80
"""Create a ContentFactory."""
82
self.storage_kind = None
87
class ChunkedContentFactory(ContentFactory):
88
"""Static data content factory.
90
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
91
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
92
satisfies this, as does a list of lines.
94
:ivar sha1: None, or the sha1 of the content fulltext.
95
:ivar storage_kind: The native storage kind of this factory. Always
97
:ivar key: The key of this content. Each key is a tuple with a single
99
:ivar parents: A tuple of parent keys for self.key. If the object has
100
no parent information, None (as opposed to () for an empty list of
104
def __init__(self, key, parents, sha1, chunks):
105
"""Create a ContentFactory."""
107
self.storage_kind = 'chunked'
109
self.parents = parents
110
self._chunks = chunks
112
def get_bytes_as(self, storage_kind):
113
if storage_kind == 'chunked':
115
elif storage_kind == 'fulltext':
116
return ''.join(self._chunks)
117
raise errors.UnavailableRepresentation(self.key, storage_kind,
121
class FulltextContentFactory(ContentFactory):
122
"""Static data content factory.
124
This takes a fulltext when created and just returns that during
125
get_bytes_as('fulltext').
127
:ivar sha1: None, or the sha1 of the content fulltext.
128
:ivar storage_kind: The native storage kind of this factory. Always
130
:ivar key: The key of this content. Each key is a tuple with a single
132
:ivar parents: A tuple of parent keys for self.key. If the object has
133
no parent information, None (as opposed to () for an empty list of
137
def __init__(self, key, parents, sha1, text):
138
"""Create a ContentFactory."""
140
self.storage_kind = 'fulltext'
142
self.parents = parents
145
def get_bytes_as(self, storage_kind):
146
if storage_kind == self.storage_kind:
148
elif storage_kind == 'chunked':
150
raise errors.UnavailableRepresentation(self.key, storage_kind,
154
class AbsentContentFactory(ContentFactory):
155
"""A placeholder content factory for unavailable texts.
158
:ivar storage_kind: 'absent'.
159
:ivar key: The key of this content. Each key is a tuple with a single
164
def __init__(self, key):
165
"""Create a ContentFactory."""
167
self.storage_kind = 'absent'
171
def get_bytes_as(self, storage_kind):
172
raise ValueError('A request was made for key: %s, but that'
173
' content is not available, and the calling'
174
' code does not handle if it is missing.'
178
class AdapterFactory(ContentFactory):
179
"""A content factory to adapt between key prefix's."""
181
def __init__(self, key, parents, adapted):
182
"""Create an adapter factory instance."""
184
self.parents = parents
185
self._adapted = adapted
187
def __getattr__(self, attr):
188
"""Return a member from the adapted object."""
189
if attr in ('key', 'parents'):
190
return self.__dict__[attr]
192
return getattr(self._adapted, attr)
195
def filter_absent(record_stream):
196
"""Adapt a record stream to remove absent records."""
197
for record in record_stream:
198
if record.storage_kind != 'absent':
202
class _MPDiffGenerator(object):
203
"""Pull out the functionality for generating mp_diffs."""
205
def __init__(self, vf, keys):
207
# This is the order the keys were requested in
208
self.ordered_keys = tuple(keys)
209
# keys + their parents, what we need to compute the diffs
210
self.needed_keys = ()
211
# Map from key: mp_diff
213
# Map from key: parents_needed (may have ghosts)
215
# Parents that aren't present
216
self.ghost_parents = ()
217
# Map from parent_key => number of children for this text
219
# Content chunks that are cached while we still need them
222
def _find_needed_keys(self):
223
"""Find the set of keys we need to request.
225
This includes all the original keys passed in, and the non-ghost
226
parents of those keys.
228
:return: (needed_keys, refcounts)
229
needed_keys is the set of all texts we need to extract
230
refcounts is a dict of {key: num_children} letting us know when we
231
no longer need to cache a given parent text
233
# All the keys and their parents
234
needed_keys = set(self.ordered_keys)
235
parent_map = self.vf.get_parent_map(needed_keys)
236
self.parent_map = parent_map
237
# TODO: Should we be using a different construct here? I think this
238
# uses difference_update internally, and we expect the result to
240
missing_keys = needed_keys.difference(parent_map)
242
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
243
# Parents that might be missing. They are allowed to be ghosts, but we
244
# should check for them
246
setdefault = refcounts.setdefault
248
for child_key, parent_keys in parent_map.iteritems():
250
# parent_keys may be None if a given VersionedFile claims to
251
# not support graph operations.
253
just_parents.update(parent_keys)
254
needed_keys.update(parent_keys)
255
for p in parent_keys:
256
refcounts[p] = setdefault(p, 0) + 1
257
just_parents.difference_update(parent_map)
258
# Remove any parents that are actually ghosts from the needed set
259
self.present_parents = set(self.vf.get_parent_map(just_parents))
260
self.ghost_parents = just_parents.difference(self.present_parents)
261
needed_keys.difference_update(self.ghost_parents)
262
self.needed_keys = needed_keys
263
self.refcounts = refcounts
264
return needed_keys, refcounts
266
def _compute_diff(self, key, parent_lines, lines):
267
"""Compute a single mp_diff, and store it in self._diffs"""
268
if len(parent_lines) > 0:
269
# XXX: _extract_blocks is not usefully defined anywhere...
270
# It was meant to extract the left-parent diff without
271
# having to recompute it for Knit content (pack-0.92,
272
# etc). That seems to have regressed somewhere
273
left_parent_blocks = self.vf._extract_blocks(key,
274
parent_lines[0], lines)
276
left_parent_blocks = None
277
diff = multiparent.MultiParent.from_lines(lines,
278
parent_lines, left_parent_blocks)
279
self.diffs[key] = diff
281
def _process_one_record(self, key, this_chunks):
283
if key in self.parent_map:
284
# This record should be ready to diff, since we requested
285
# content in 'topological' order
286
parent_keys = self.parent_map.pop(key)
287
# If a VersionedFile claims 'no-graph' support, then it may return
288
# None for any parent request, so we replace it with an empty tuple
289
if parent_keys is None:
292
for p in parent_keys:
293
# Alternatively we could check p not in self.needed_keys, but
294
# ghost_parents should be tiny versus huge
295
if p in self.ghost_parents:
297
refcount = self.refcounts[p]
298
if refcount == 1: # Last child reference
299
self.refcounts.pop(p)
300
parent_chunks = self.chunks.pop(p)
302
self.refcounts[p] = refcount - 1
303
parent_chunks = self.chunks[p]
304
p_lines = osutils.chunks_to_lines(parent_chunks)
305
# TODO: Should we cache the line form? We did the
306
# computation to get it, but storing it this way will
307
# be less memory efficient...
308
parent_lines.append(p_lines)
310
lines = osutils.chunks_to_lines(this_chunks)
311
# Since we needed the lines, we'll go ahead and cache them this way
313
self._compute_diff(key, parent_lines, lines)
315
# Is this content required for any more children?
316
if key in self.refcounts:
317
self.chunks[key] = this_chunks
319
def _extract_diffs(self):
320
needed_keys, refcounts = self._find_needed_keys()
321
for record in self.vf.get_record_stream(needed_keys,
322
'topological', True):
323
if record.storage_kind == 'absent':
324
raise errors.RevisionNotPresent(record.key, self.vf)
325
self._process_one_record(record.key,
326
record.get_bytes_as('chunked'))
328
def compute_diffs(self):
329
self._extract_diffs()
330
dpop = self.diffs.pop
331
return [dpop(k) for k in self.ordered_keys]
334
class VersionedFile(object):
335
"""Versioned text file storage.
337
A versioned file manages versions of line-based text files,
338
keeping track of the originating version for each line.
340
To clients the "lines" of the file are represented as a list of
341
strings. These strings will typically have terminal newline
342
characters, but this is not required. In particular files commonly
343
do not have a newline at the end of the file.
345
Texts are identified by a version-id string.
349
def check_not_reserved_id(version_id):
350
revision.check_not_reserved_id(version_id)
352
def copy_to(self, name, transport):
353
"""Copy this versioned file to name on transport."""
354
raise NotImplementedError(self.copy_to)
356
def get_record_stream(self, versions, ordering, include_delta_closure):
357
"""Get a stream of records for versions.
359
:param versions: The versions to include. Each version is a tuple
361
:param ordering: Either 'unordered' or 'topological'. A topologically
362
sorted stream has compression parents strictly before their
364
:param include_delta_closure: If True then the closure across any
365
compression parents will be included (in the data content of the
366
stream, not in the emitted records). This guarantees that
367
'fulltext' can be used successfully on every record.
368
:return: An iterator of ContentFactory objects, each of which is only
369
valid until the iterator is advanced.
371
raise NotImplementedError(self.get_record_stream)
373
def has_version(self, version_id):
374
"""Returns whether version is present."""
375
raise NotImplementedError(self.has_version)
377
def insert_record_stream(self, stream):
378
"""Insert a record stream into this versioned file.
380
:param stream: A stream of records to insert.
382
:seealso VersionedFile.get_record_stream:
384
raise NotImplementedError
386
def add_lines(self, version_id, parents, lines, parent_texts=None,
387
left_matching_blocks=None, nostore_sha=None, random_id=False,
389
"""Add a single text on top of the versioned file.
391
Must raise RevisionAlreadyPresent if the new version is
392
already present in file history.
394
Must raise RevisionNotPresent if any of the given parents are
395
not present in file history.
397
:param lines: A list of lines. Each line must be a bytestring. And all
398
of them except the last must be terminated with \n and contain no
399
other \n's. The last line may either contain no \n's or a single
400
terminated \n. If the lines list does meet this constraint the add
401
routine may error or may succeed - but you will be unable to read
402
the data back accurately. (Checking the lines have been split
403
correctly is expensive and extremely unlikely to catch bugs so it
404
is not done at runtime unless check_content is True.)
405
:param parent_texts: An optional dictionary containing the opaque
406
representations of some or all of the parents of version_id to
407
allow delta optimisations. VERY IMPORTANT: the texts must be those
408
returned by add_lines or data corruption can be caused.
409
:param left_matching_blocks: a hint about which areas are common
410
between the text and its left-hand-parent. The format is
411
the SequenceMatcher.get_matching_blocks format.
412
:param nostore_sha: Raise ExistingContent and do not add the lines to
413
the versioned file if the digest of the lines matches this.
414
:param random_id: If True a random id has been selected rather than
415
an id determined by some deterministic process such as a converter
416
from a foreign VCS. When True the backend may choose not to check
417
for uniqueness of the resulting key within the versioned file, so
418
this should only be done when the result is expected to be unique
420
:param check_content: If True, the lines supplied are verified to be
421
bytestrings that are correctly formed lines.
422
:return: The text sha1, the number of bytes in the text, and an opaque
423
representation of the inserted version which can be provided
424
back to future add_lines calls in the parent_texts dictionary.
426
self._check_write_ok()
427
return self._add_lines(version_id, parents, lines, parent_texts,
428
left_matching_blocks, nostore_sha, random_id, check_content)
430
def _add_lines(self, version_id, parents, lines, parent_texts,
431
left_matching_blocks, nostore_sha, random_id, check_content):
432
"""Helper to do the class specific add_lines."""
433
raise NotImplementedError(self.add_lines)
435
def add_lines_with_ghosts(self, version_id, parents, lines,
436
parent_texts=None, nostore_sha=None, random_id=False,
437
check_content=True, left_matching_blocks=None):
438
"""Add lines to the versioned file, allowing ghosts to be present.
440
This takes the same parameters as add_lines and returns the same.
442
self._check_write_ok()
443
return self._add_lines_with_ghosts(version_id, parents, lines,
444
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
446
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
447
nostore_sha, random_id, check_content, left_matching_blocks):
448
"""Helper to do class specific add_lines_with_ghosts."""
449
raise NotImplementedError(self.add_lines_with_ghosts)
451
def check(self, progress_bar=None):
452
"""Check the versioned file for integrity."""
453
raise NotImplementedError(self.check)
455
def _check_lines_not_unicode(self, lines):
456
"""Check that lines being added to a versioned file are not unicode."""
458
if line.__class__ is not str:
459
raise errors.BzrBadParameterUnicode("lines")
461
def _check_lines_are_lines(self, lines):
462
"""Check that the lines really are full lines without inline EOL."""
464
if '\n' in line[:-1]:
465
raise errors.BzrBadParameterContainsNewline("lines")
467
def get_format_signature(self):
468
"""Get a text description of the data encoding in this file.
472
raise NotImplementedError(self.get_format_signature)
474
def make_mpdiffs(self, version_ids):
475
"""Create multiparent diffs for specified versions."""
476
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
477
# is a list of strings, not keys. And while self.get_record_stream
478
# is supported, it takes *keys*, while self.get_parent_map() takes
480
knit_versions = set()
481
knit_versions.update(version_ids)
482
parent_map = self.get_parent_map(version_ids)
483
for version_id in version_ids:
485
knit_versions.update(parent_map[version_id])
487
raise errors.RevisionNotPresent(version_id, self)
488
# We need to filter out ghosts, because we can't diff against them.
489
knit_versions = set(self.get_parent_map(knit_versions).keys())
490
lines = dict(zip(knit_versions,
491
self._get_lf_split_line_list(knit_versions)))
493
for version_id in version_ids:
494
target = lines[version_id]
496
parents = [lines[p] for p in parent_map[version_id] if p in
499
# I don't know how this could ever trigger.
500
# parent_map[version_id] was already triggered in the previous
501
# for loop, and lines[p] has the 'if p in knit_versions' check,
502
# so we again won't have a KeyError.
503
raise errors.RevisionNotPresent(version_id, self)
505
left_parent_blocks = self._extract_blocks(version_id,
508
left_parent_blocks = None
509
diffs.append(multiparent.MultiParent.from_lines(target, parents,
513
def _extract_blocks(self, version_id, source, target):
516
def add_mpdiffs(self, records):
517
"""Add mpdiffs to this VersionedFile.
519
Records should be iterables of version, parents, expected_sha1,
520
mpdiff. mpdiff should be a MultiParent instance.
522
# Does this need to call self._check_write_ok()? (IanC 20070919)
524
mpvf = multiparent.MultiMemoryVersionedFile()
526
for version, parent_ids, expected_sha1, mpdiff in records:
527
versions.append(version)
528
mpvf.add_diff(mpdiff, version, parent_ids)
529
needed_parents = set()
530
for version, parent_ids, expected_sha1, mpdiff in records:
531
needed_parents.update(p for p in parent_ids
532
if not mpvf.has_version(p))
533
present_parents = set(self.get_parent_map(needed_parents).keys())
534
for parent_id, lines in zip(present_parents,
535
self._get_lf_split_line_list(present_parents)):
536
mpvf.add_version(lines, parent_id, [])
537
for (version, parent_ids, expected_sha1, mpdiff), lines in\
538
zip(records, mpvf.get_line_list(versions)):
539
if len(parent_ids) == 1:
540
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
541
mpvf.get_diff(parent_ids[0]).num_lines()))
543
left_matching_blocks = None
545
_, _, version_text = self.add_lines_with_ghosts(version,
546
parent_ids, lines, vf_parents,
547
left_matching_blocks=left_matching_blocks)
548
except NotImplementedError:
549
# The vf can't handle ghosts, so add lines normally, which will
550
# (reasonably) fail if there are ghosts in the data.
551
_, _, version_text = self.add_lines(version,
552
parent_ids, lines, vf_parents,
553
left_matching_blocks=left_matching_blocks)
554
vf_parents[version] = version_text
555
sha1s = self.get_sha1s(versions)
556
for version, parent_ids, expected_sha1, mpdiff in records:
557
if expected_sha1 != sha1s[version]:
558
raise errors.VersionedFileInvalidChecksum(version)
560
def get_text(self, version_id):
561
"""Return version contents as a text string.
563
Raises RevisionNotPresent if version is not present in
566
return ''.join(self.get_lines(version_id))
567
get_string = get_text
569
def get_texts(self, version_ids):
570
"""Return the texts of listed versions as a list of strings.
572
Raises RevisionNotPresent if version is not present in
575
return [''.join(self.get_lines(v)) for v in version_ids]
577
def get_lines(self, version_id):
578
"""Return version contents as a sequence of lines.
580
Raises RevisionNotPresent if version is not present in
583
raise NotImplementedError(self.get_lines)
585
def _get_lf_split_line_list(self, version_ids):
586
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
588
def get_ancestry(self, version_ids, topo_sorted=True):
589
"""Return a list of all ancestors of given version(s). This
590
will not include the null revision.
592
This list will not be topologically sorted if topo_sorted=False is
595
Must raise RevisionNotPresent if any of the given versions are
596
not present in file history."""
597
if isinstance(version_ids, basestring):
598
version_ids = [version_ids]
599
raise NotImplementedError(self.get_ancestry)
601
def get_ancestry_with_ghosts(self, version_ids):
602
"""Return a list of all ancestors of given version(s). This
603
will not include the null revision.
605
Must raise RevisionNotPresent if any of the given versions are
606
not present in file history.
608
Ghosts that are known about will be included in ancestry list,
609
but are not explicitly marked.
611
raise NotImplementedError(self.get_ancestry_with_ghosts)
613
def get_parent_map(self, version_ids):
614
"""Get a map of the parents of version_ids.
616
:param version_ids: The version ids to look up parents for.
617
:return: A mapping from version id to parents.
619
raise NotImplementedError(self.get_parent_map)
621
def get_parents_with_ghosts(self, version_id):
622
"""Return version names for parents of version_id.
624
Will raise RevisionNotPresent if version_id is not present
627
Ghosts that are known about will be included in the parent list,
628
but are not explicitly marked.
631
return list(self.get_parent_map([version_id])[version_id])
633
raise errors.RevisionNotPresent(version_id, self)
635
def annotate(self, version_id):
636
"""Return a list of (version-id, line) tuples for version_id.
638
:raise RevisionNotPresent: If the given version is
639
not present in file history.
641
raise NotImplementedError(self.annotate)
643
def iter_lines_added_or_present_in_versions(self, version_ids=None,
645
"""Iterate over the lines in the versioned file from version_ids.
647
This may return lines from other versions. Each item the returned
648
iterator yields is a tuple of a line and a text version that that line
649
is present in (not introduced in).
651
Ordering of results is in whatever order is most suitable for the
652
underlying storage format.
654
If a progress bar is supplied, it may be used to indicate progress.
655
The caller is responsible for cleaning up progress bars (because this
658
NOTES: Lines are normalised: they will all have \n terminators.
659
Lines are returned in arbitrary order.
661
:return: An iterator over (line, version_id).
663
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
665
def plan_merge(self, ver_a, ver_b):
666
"""Return pseudo-annotation indicating how the two versions merge.
668
This is computed between versions a and b and their common
671
Weave lines present in none of them are skipped entirely.
674
killed-base Dead in base revision
675
killed-both Killed in each revision
678
unchanged Alive in both a and b (possibly created in both)
681
ghost-a Killed in a, unborn in b
682
ghost-b Killed in b, unborn in a
683
irrelevant Not in either revision
685
raise NotImplementedError(VersionedFile.plan_merge)
687
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
688
b_marker=TextMerge.B_MARKER):
689
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
692
class RecordingVersionedFilesDecorator(object):
693
"""A minimal versioned files that records calls made on it.
695
Only enough methods have been added to support tests using it to date.
697
:ivar calls: A list of the calls made; can be reset at any time by
701
def __init__(self, backing_vf):
702
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
704
:param backing_vf: The versioned file to answer all methods.
706
self._backing_vf = backing_vf
709
def add_lines(self, key, parents, lines, parent_texts=None,
710
left_matching_blocks=None, nostore_sha=None, random_id=False,
712
self.calls.append(("add_lines", key, parents, lines, parent_texts,
713
left_matching_blocks, nostore_sha, random_id, check_content))
714
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
715
left_matching_blocks, nostore_sha, random_id, check_content)
718
self._backing_vf.check()
720
def get_parent_map(self, keys):
721
self.calls.append(("get_parent_map", copy(keys)))
722
return self._backing_vf.get_parent_map(keys)
724
def get_record_stream(self, keys, sort_order, include_delta_closure):
725
self.calls.append(("get_record_stream", list(keys), sort_order,
726
include_delta_closure))
727
return self._backing_vf.get_record_stream(keys, sort_order,
728
include_delta_closure)
730
def get_sha1s(self, keys):
731
self.calls.append(("get_sha1s", copy(keys)))
732
return self._backing_vf.get_sha1s(keys)
734
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
735
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
736
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
739
self.calls.append(("keys",))
740
return self._backing_vf.keys()
743
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
744
"""A VF that records calls, and returns keys in specific order.
746
:ivar calls: A list of the calls made; can be reset at any time by
750
def __init__(self, backing_vf, key_priority):
751
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
753
:param backing_vf: The versioned file to answer all methods.
754
:param key_priority: A dictionary defining what order keys should be
755
returned from an 'unordered' get_record_stream request.
756
Keys with lower priority are returned first, keys not present in
757
the map get an implicit priority of 0, and are returned in
758
lexicographical order.
760
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
761
self._key_priority = key_priority
763
def get_record_stream(self, keys, sort_order, include_delta_closure):
764
self.calls.append(("get_record_stream", list(keys), sort_order,
765
include_delta_closure))
766
if sort_order == 'unordered':
768
return (self._key_priority.get(key, 0), key)
769
# Use a defined order by asking for the keys one-by-one from the
771
for key in sorted(keys, key=sort_key):
772
for record in self._backing_vf.get_record_stream([key],
773
'unordered', include_delta_closure):
776
for record in self._backing_vf.get_record_stream(keys, sort_order,
777
include_delta_closure):
781
class KeyMapper(object):
782
"""KeyMappers map between keys and underlying partitioned storage."""
785
"""Map key to an underlying storage identifier.
787
:param key: A key tuple e.g. ('file-id', 'revision-id').
788
:return: An underlying storage identifier, specific to the partitioning
791
raise NotImplementedError(self.map)
793
def unmap(self, partition_id):
794
"""Map a partitioned storage id back to a key prefix.
796
:param partition_id: The underlying partition id.
797
:return: As much of a key (or prefix) as is derivable from the partition
800
raise NotImplementedError(self.unmap)
803
class ConstantMapper(KeyMapper):
804
"""A key mapper that maps to a constant result."""
806
def __init__(self, result):
807
"""Create a ConstantMapper which will return result for all maps."""
808
self._result = result
811
"""See KeyMapper.map()."""
815
class URLEscapeMapper(KeyMapper):
816
"""Base class for use with transport backed storage.
818
This provides a map and unmap wrapper that respectively url escape and
819
unescape their outputs and inputs.
823
"""See KeyMapper.map()."""
824
return urllib.quote(self._map(key))
826
def unmap(self, partition_id):
827
"""See KeyMapper.unmap()."""
828
return self._unmap(urllib.unquote(partition_id))
831
class PrefixMapper(URLEscapeMapper):
832
"""A key mapper that extracts the first component of a key.
834
This mapper is for use with a transport based backend.
838
"""See KeyMapper.map()."""
841
def _unmap(self, partition_id):
842
"""See KeyMapper.unmap()."""
843
return (partition_id,)
846
class HashPrefixMapper(URLEscapeMapper):
847
"""A key mapper that combines the first component of a key with a hash.
849
This mapper is for use with a transport based backend.
853
"""See KeyMapper.map()."""
854
prefix = self._escape(key[0])
855
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
857
def _escape(self, prefix):
858
"""No escaping needed here."""
861
def _unmap(self, partition_id):
862
"""See KeyMapper.unmap()."""
863
return (self._unescape(osutils.basename(partition_id)),)
865
def _unescape(self, basename):
866
"""No unescaping needed for HashPrefixMapper."""
870
class HashEscapedPrefixMapper(HashPrefixMapper):
871
"""Combines the escaped first component of a key with a hash.
873
This mapper is for use with a transport based backend.
876
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
878
def _escape(self, prefix):
879
"""Turn a key element into a filesystem safe string.
881
This is similar to a plain urllib.quote, except
882
it uses specific safe characters, so that it doesn't
883
have to translate a lot of valid file ids.
885
# @ does not get escaped. This is because it is a valid
886
# filesystem character we use all the time, and it looks
887
# a lot better than seeing %40 all the time.
888
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
892
def _unescape(self, basename):
893
"""Escaped names are easily unescaped by urlutils."""
894
return urllib.unquote(basename)
897
def make_versioned_files_factory(versioned_file_factory, mapper):
898
"""Create a ThunkedVersionedFiles factory.
900
This will create a callable which when called creates a
901
ThunkedVersionedFiles on a transport, using mapper to access individual
902
versioned files, and versioned_file_factory to create each individual file.
904
def factory(transport):
905
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
910
class VersionedFiles(object):
911
"""Storage for many versioned files.
913
This object allows a single keyspace for accessing the history graph and
914
contents of named bytestrings.
916
Currently no implementation allows the graph of different key prefixes to
917
intersect, but the API does allow such implementations in the future.
919
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
920
may have a different length key-size, but that size will be constant for
921
all texts added to or retrieved from it. For instance, bzrlib uses
922
instances with a key-size of 2 for storing user files in a repository, with
923
the first element the fileid, and the second the version of that file.
925
The use of tuples allows a single code base to support several different
926
uses with only the mapping logic changing from instance to instance.
928
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
929
this is a list of other VersionedFiles immediately underneath this
930
one. They may in turn each have further fallbacks.
933
def add_lines(self, key, parents, lines, parent_texts=None,
934
left_matching_blocks=None, nostore_sha=None, random_id=False,
936
"""Add a text to the store.
938
:param key: The key tuple of the text to add. If the last element is
939
None, a CHK string will be generated during the addition.
940
:param parents: The parents key tuples of the text to add.
941
:param lines: A list of lines. Each line must be a bytestring. And all
942
of them except the last must be terminated with \n and contain no
943
other \n's. The last line may either contain no \n's or a single
944
terminating \n. If the lines list does meet this constraint the add
945
routine may error or may succeed - but you will be unable to read
946
the data back accurately. (Checking the lines have been split
947
correctly is expensive and extremely unlikely to catch bugs so it
948
is not done at runtime unless check_content is True.)
949
:param parent_texts: An optional dictionary containing the opaque
950
representations of some or all of the parents of version_id to
951
allow delta optimisations. VERY IMPORTANT: the texts must be those
952
returned by add_lines or data corruption can be caused.
953
:param left_matching_blocks: a hint about which areas are common
954
between the text and its left-hand-parent. The format is
955
the SequenceMatcher.get_matching_blocks format.
956
:param nostore_sha: Raise ExistingContent and do not add the lines to
957
the versioned file if the digest of the lines matches this.
958
:param random_id: If True a random id has been selected rather than
959
an id determined by some deterministic process such as a converter
960
from a foreign VCS. When True the backend may choose not to check
961
for uniqueness of the resulting key within the versioned file, so
962
this should only be done when the result is expected to be unique
964
:param check_content: If True, the lines supplied are verified to be
965
bytestrings that are correctly formed lines.
966
:return: The text sha1, the number of bytes in the text, and an opaque
967
representation of the inserted version which can be provided
968
back to future add_lines calls in the parent_texts dictionary.
970
raise NotImplementedError(self.add_lines)
972
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
973
"""Add a text to the store.
975
This is a private function for use by VersionedFileCommitBuilder.
977
:param key: The key tuple of the text to add. If the last element is
978
None, a CHK string will be generated during the addition.
979
:param parents: The parents key tuples of the text to add.
980
:param text: A string containing the text to be committed.
981
:param nostore_sha: Raise ExistingContent and do not add the lines to
982
the versioned file if the digest of the lines matches this.
983
:param random_id: If True a random id has been selected rather than
984
an id determined by some deterministic process such as a converter
985
from a foreign VCS. When True the backend may choose not to check
986
for uniqueness of the resulting key within the versioned file, so
987
this should only be done when the result is expected to be unique
989
:param check_content: If True, the lines supplied are verified to be
990
bytestrings that are correctly formed lines.
991
:return: The text sha1, the number of bytes in the text, and an opaque
992
representation of the inserted version which can be provided
993
back to future _add_text calls in the parent_texts dictionary.
995
# The default implementation just thunks over to .add_lines(),
996
# inefficient, but it works.
997
return self.add_lines(key, parents, osutils.split_lines(text),
998
nostore_sha=nostore_sha,
1002
def add_mpdiffs(self, records):
1003
"""Add mpdiffs to this VersionedFile.
1005
Records should be iterables of version, parents, expected_sha1,
1006
mpdiff. mpdiff should be a MultiParent instance.
1009
mpvf = multiparent.MultiMemoryVersionedFile()
1011
for version, parent_ids, expected_sha1, mpdiff in records:
1012
versions.append(version)
1013
mpvf.add_diff(mpdiff, version, parent_ids)
1014
needed_parents = set()
1015
for version, parent_ids, expected_sha1, mpdiff in records:
1016
needed_parents.update(p for p in parent_ids
1017
if not mpvf.has_version(p))
1018
# It seems likely that adding all the present parents as fulltexts can
1019
# easily exhaust memory.
1020
chunks_to_lines = osutils.chunks_to_lines
1021
for record in self.get_record_stream(needed_parents, 'unordered',
1023
if record.storage_kind == 'absent':
1025
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1027
for (key, parent_keys, expected_sha1, mpdiff), lines in\
1028
zip(records, mpvf.get_line_list(versions)):
1029
if len(parent_keys) == 1:
1030
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1031
mpvf.get_diff(parent_keys[0]).num_lines()))
1033
left_matching_blocks = None
1034
version_sha1, _, version_text = self.add_lines(key,
1035
parent_keys, lines, vf_parents,
1036
left_matching_blocks=left_matching_blocks)
1037
if version_sha1 != expected_sha1:
1038
raise errors.VersionedFileInvalidChecksum(version)
1039
vf_parents[key] = version_text
1041
def annotate(self, key):
1042
"""Return a list of (version-key, line) tuples for the text of key.
1044
:raise RevisionNotPresent: If the key is not present.
1046
raise NotImplementedError(self.annotate)
1048
def check(self, progress_bar=None):
1049
"""Check this object for integrity.
1051
:param progress_bar: A progress bar to output as the check progresses.
1052
:param keys: Specific keys within the VersionedFiles to check. When
1053
this parameter is not None, check() becomes a generator as per
1054
get_record_stream. The difference to get_record_stream is that
1055
more or deeper checks will be performed.
1056
:return: None, or if keys was supplied a generator as per
1059
raise NotImplementedError(self.check)
1062
def check_not_reserved_id(version_id):
1063
revision.check_not_reserved_id(version_id)
1065
def clear_cache(self):
1066
"""Clear whatever caches this VersionedFile holds.
1068
This is generally called after an operation has been performed, when we
1069
don't expect to be using this versioned file again soon.
1072
def _check_lines_not_unicode(self, lines):
1073
"""Check that lines being added to a versioned file are not unicode."""
1075
if line.__class__ is not str:
1076
raise errors.BzrBadParameterUnicode("lines")
1078
def _check_lines_are_lines(self, lines):
1079
"""Check that the lines really are full lines without inline EOL."""
1081
if '\n' in line[:-1]:
1082
raise errors.BzrBadParameterContainsNewline("lines")
1084
def get_known_graph_ancestry(self, keys):
1085
"""Get a KnownGraph instance with the ancestry of keys."""
1086
# most basic implementation is a loop around get_parent_map
1090
this_parent_map = self.get_parent_map(pending)
1091
parent_map.update(this_parent_map)
1093
map(pending.update, this_parent_map.itervalues())
1094
pending = pending.difference(parent_map)
1095
kg = _mod_graph.KnownGraph(parent_map)
1098
def get_parent_map(self, keys):
1099
"""Get a map of the parents of keys.
1101
:param keys: The keys to look up parents for.
1102
:return: A mapping from keys to parents. Absent keys are absent from
1105
raise NotImplementedError(self.get_parent_map)
1107
def get_record_stream(self, keys, ordering, include_delta_closure):
1108
"""Get a stream of records for keys.
1110
:param keys: The keys to include.
1111
:param ordering: Either 'unordered' or 'topological'. A topologically
1112
sorted stream has compression parents strictly before their
1114
:param include_delta_closure: If True then the closure across any
1115
compression parents will be included (in the opaque data).
1116
:return: An iterator of ContentFactory objects, each of which is only
1117
valid until the iterator is advanced.
1119
raise NotImplementedError(self.get_record_stream)
1121
def get_sha1s(self, keys):
1122
"""Get the sha1's of the texts for the given keys.
1124
:param keys: The names of the keys to lookup
1125
:return: a dict from key to sha1 digest. Keys of texts which are not
1126
present in the store are not present in the returned
1129
raise NotImplementedError(self.get_sha1s)
1131
has_key = index._has_key_from_parent_map
1133
def get_missing_compression_parent_keys(self):
1134
"""Return an iterable of keys of missing compression parents.
1136
Check this after calling insert_record_stream to find out if there are
1137
any missing compression parents. If there are, the records that
1138
depend on them are not able to be inserted safely. The precise
1139
behaviour depends on the concrete VersionedFiles class in use.
1141
Classes that do not support this will raise NotImplementedError.
1143
raise NotImplementedError(self.get_missing_compression_parent_keys)
1145
def insert_record_stream(self, stream):
1146
"""Insert a record stream into this container.
1148
:param stream: A stream of records to insert.
1150
:seealso VersionedFile.get_record_stream:
1152
raise NotImplementedError
1154
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1155
"""Iterate over the lines in the versioned files from keys.
1157
This may return lines from other keys. Each item the returned
1158
iterator yields is a tuple of a line and a text version that that line
1159
is present in (not introduced in).
1161
Ordering of results is in whatever order is most suitable for the
1162
underlying storage format.
1164
If a progress bar is supplied, it may be used to indicate progress.
1165
The caller is responsible for cleaning up progress bars (because this
1169
* Lines are normalised by the underlying store: they will all have \n
1171
* Lines are returned in arbitrary order.
1173
:return: An iterator over (line, key).
1175
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1178
"""Return a iterable of the keys for all the contained texts."""
1179
raise NotImplementedError(self.keys)
1181
def make_mpdiffs(self, keys):
1182
"""Create multiparent diffs for specified keys."""
1183
generator = _MPDiffGenerator(self, keys)
1184
return generator.compute_diffs()
1186
def get_annotator(self):
1187
return annotate.Annotator(self)
1189
missing_keys = index._missing_keys_from_parent_map
1191
def _extract_blocks(self, version_id, source, target):
1194
def _transitive_fallbacks(self):
1195
"""Return the whole stack of fallback versionedfiles.
1197
This VersionedFiles may have a list of fallbacks, but it doesn't
1198
necessarily know about the whole stack going down, and it can't know
1199
at open time because they may change after the objects are opened.
1202
for a_vfs in self._immediate_fallback_vfs:
1203
all_fallbacks.append(a_vfs)
1204
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1205
return all_fallbacks
1208
class ThunkedVersionedFiles(VersionedFiles):
1209
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1211
This object allows a single keyspace for accessing the history graph and
1212
contents of named bytestrings.
1214
Currently no implementation allows the graph of different key prefixes to
1215
intersect, but the API does allow such implementations in the future.
1218
def __init__(self, transport, file_factory, mapper, is_locked):
1219
"""Create a ThunkedVersionedFiles."""
1220
self._transport = transport
1221
self._file_factory = file_factory
1222
self._mapper = mapper
1223
self._is_locked = is_locked
1225
def add_lines(self, key, parents, lines, parent_texts=None,
1226
left_matching_blocks=None, nostore_sha=None, random_id=False,
1227
check_content=True):
1228
"""See VersionedFiles.add_lines()."""
1229
path = self._mapper.map(key)
1230
version_id = key[-1]
1231
parents = [parent[-1] for parent in parents]
1232
vf = self._get_vf(path)
1235
return vf.add_lines_with_ghosts(version_id, parents, lines,
1236
parent_texts=parent_texts,
1237
left_matching_blocks=left_matching_blocks,
1238
nostore_sha=nostore_sha, random_id=random_id,
1239
check_content=check_content)
1240
except NotImplementedError:
1241
return vf.add_lines(version_id, parents, lines,
1242
parent_texts=parent_texts,
1243
left_matching_blocks=left_matching_blocks,
1244
nostore_sha=nostore_sha, random_id=random_id,
1245
check_content=check_content)
1246
except errors.NoSuchFile:
1247
# parent directory may be missing, try again.
1248
self._transport.mkdir(osutils.dirname(path))
1250
return vf.add_lines_with_ghosts(version_id, parents, lines,
1251
parent_texts=parent_texts,
1252
left_matching_blocks=left_matching_blocks,
1253
nostore_sha=nostore_sha, random_id=random_id,
1254
check_content=check_content)
1255
except NotImplementedError:
1256
return vf.add_lines(version_id, parents, lines,
1257
parent_texts=parent_texts,
1258
left_matching_blocks=left_matching_blocks,
1259
nostore_sha=nostore_sha, random_id=random_id,
1260
check_content=check_content)
1262
def annotate(self, key):
1263
"""Return a list of (version-key, line) tuples for the text of key.
1265
:raise RevisionNotPresent: If the key is not present.
1268
path = self._mapper.map(prefix)
1269
vf = self._get_vf(path)
1270
origins = vf.annotate(key[-1])
1272
for origin, line in origins:
1273
result.append((prefix + (origin,), line))
1276
def check(self, progress_bar=None, keys=None):
1277
"""See VersionedFiles.check()."""
1278
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1279
# this is tolerable. Ideally we'd pass keys down to check() and
1280
# have the older VersiondFile interface updated too.
1281
for prefix, vf in self._iter_all_components():
1283
if keys is not None:
1284
return self.get_record_stream(keys, 'unordered', True)
1286
def get_parent_map(self, keys):
1287
"""Get a map of the parents of keys.
1289
:param keys: The keys to look up parents for.
1290
:return: A mapping from keys to parents. Absent keys are absent from
1293
prefixes = self._partition_keys(keys)
1295
for prefix, suffixes in prefixes.items():
1296
path = self._mapper.map(prefix)
1297
vf = self._get_vf(path)
1298
parent_map = vf.get_parent_map(suffixes)
1299
for key, parents in parent_map.items():
1300
result[prefix + (key,)] = tuple(
1301
prefix + (parent,) for parent in parents)
1304
def _get_vf(self, path):
1305
if not self._is_locked():
1306
raise errors.ObjectNotLocked(self)
1307
return self._file_factory(path, self._transport, create=True,
1308
get_scope=lambda:None)
1310
def _partition_keys(self, keys):
1311
"""Turn keys into a dict of prefix:suffix_list."""
1314
prefix_keys = result.setdefault(key[:-1], [])
1315
prefix_keys.append(key[-1])
1318
def _get_all_prefixes(self):
1319
# Identify all key prefixes.
1320
# XXX: A bit hacky, needs polish.
1321
if type(self._mapper) == ConstantMapper:
1322
paths = [self._mapper.map(())]
1326
for quoted_relpath in self._transport.iter_files_recursive():
1327
path, ext = os.path.splitext(quoted_relpath)
1329
paths = list(relpaths)
1330
prefixes = [self._mapper.unmap(path) for path in paths]
1331
return zip(paths, prefixes)
1333
def get_record_stream(self, keys, ordering, include_delta_closure):
1334
"""See VersionedFiles.get_record_stream()."""
1335
# Ordering will be taken care of by each partitioned store; group keys
1338
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1339
suffixes = [(suffix,) for suffix in suffixes]
1340
for record in vf.get_record_stream(suffixes, ordering,
1341
include_delta_closure):
1342
if record.parents is not None:
1343
record.parents = tuple(
1344
prefix + parent for parent in record.parents)
1345
record.key = prefix + record.key
1348
def _iter_keys_vf(self, keys):
1349
prefixes = self._partition_keys(keys)
1351
for prefix, suffixes in prefixes.items():
1352
path = self._mapper.map(prefix)
1353
vf = self._get_vf(path)
1354
yield prefix, suffixes, vf
1356
def get_sha1s(self, keys):
1357
"""See VersionedFiles.get_sha1s()."""
1359
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1360
vf_sha1s = vf.get_sha1s(suffixes)
1361
for suffix, sha1 in vf_sha1s.iteritems():
1362
sha1s[prefix + (suffix,)] = sha1
1365
def insert_record_stream(self, stream):
1366
"""Insert a record stream into this container.
1368
:param stream: A stream of records to insert.
1370
:seealso VersionedFile.get_record_stream:
1372
for record in stream:
1373
prefix = record.key[:-1]
1374
key = record.key[-1:]
1375
if record.parents is not None:
1376
parents = [parent[-1:] for parent in record.parents]
1379
thunk_record = AdapterFactory(key, parents, record)
1380
path = self._mapper.map(prefix)
1381
# Note that this parses the file many times; we can do better but
1382
# as this only impacts weaves in terms of performance, it is
1384
vf = self._get_vf(path)
1385
vf.insert_record_stream([thunk_record])
1387
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1388
"""Iterate over the lines in the versioned files from keys.
1390
This may return lines from other keys. Each item the returned
1391
iterator yields is a tuple of a line and a text version that that line
1392
is present in (not introduced in).
1394
Ordering of results is in whatever order is most suitable for the
1395
underlying storage format.
1397
If a progress bar is supplied, it may be used to indicate progress.
1398
The caller is responsible for cleaning up progress bars (because this
1402
* Lines are normalised by the underlying store: they will all have \n
1404
* Lines are returned in arbitrary order.
1406
:return: An iterator over (line, key).
1408
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1409
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1410
yield line, prefix + (version,)
1412
def _iter_all_components(self):
1413
for path, prefix in self._get_all_prefixes():
1414
yield prefix, self._get_vf(path)
1417
"""See VersionedFiles.keys()."""
1419
for prefix, vf in self._iter_all_components():
1420
for suffix in vf.versions():
1421
result.add(prefix + (suffix,))
1425
class VersionedFilesWithFallbacks(VersionedFiles):
1427
def without_fallbacks(self):
1428
"""Return a clone of this object without any fallbacks configured."""
1429
raise NotImplementedError(self.without_fallbacks)
1431
def add_fallback_versioned_files(self, a_versioned_files):
1432
"""Add a source of texts for texts not present in this knit.
1434
:param a_versioned_files: A VersionedFiles object.
1436
raise NotImplementedError(self.add_fallback_versioned_files)
1438
def get_known_graph_ancestry(self, keys):
1439
"""Get a KnownGraph instance with the ancestry of keys."""
1440
parent_map, missing_keys = self._index.find_ancestry(keys)
1441
for fallback in self._transitive_fallbacks():
1442
if not missing_keys:
1444
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1446
parent_map.update(f_parent_map)
1447
missing_keys = f_missing_keys
1448
kg = _mod_graph.KnownGraph(parent_map)
1452
class _PlanMergeVersionedFile(VersionedFiles):
1453
"""A VersionedFile for uncommitted and committed texts.
1455
It is intended to allow merges to be planned with working tree texts.
1456
It implements only the small part of the VersionedFiles interface used by
1457
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1458
_PlanMergeVersionedFile itself.
1460
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1461
queried for missing texts.
1464
def __init__(self, file_id):
1465
"""Create a _PlanMergeVersionedFile.
1467
:param file_id: Used with _PlanMerge code which is not yet fully
1468
tuple-keyspace aware.
1470
self._file_id = file_id
1471
# fallback locations
1472
self.fallback_versionedfiles = []
1473
# Parents for locally held keys.
1475
# line data for locally held keys.
1477
# key lookup providers
1478
self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1480
def plan_merge(self, ver_a, ver_b, base=None):
1481
"""See VersionedFile.plan_merge"""
1482
from bzrlib.merge import _PlanMerge
1484
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1485
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1486
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1487
return _PlanMerge._subtract_plans(old_plan, new_plan)
1489
def plan_lca_merge(self, ver_a, ver_b, base=None):
1490
from bzrlib.merge import _PlanLCAMerge
1491
graph = _mod_graph.Graph(self)
1492
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1495
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1496
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1498
def add_lines(self, key, parents, lines):
1499
"""See VersionedFiles.add_lines
1501
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1502
are permitted. Only reserved ids are permitted.
1504
if type(key) is not tuple:
1505
raise TypeError(key)
1506
if not revision.is_reserved_id(key[-1]):
1507
raise ValueError('Only reserved ids may be used')
1509
raise ValueError('Parents may not be None')
1511
raise ValueError('Lines may not be None')
1512
self._parents[key] = tuple(parents)
1513
self._lines[key] = lines
1515
def get_record_stream(self, keys, ordering, include_delta_closure):
1518
if key in self._lines:
1519
lines = self._lines[key]
1520
parents = self._parents[key]
1522
yield ChunkedContentFactory(key, parents, None, lines)
1523
for versionedfile in self.fallback_versionedfiles:
1524
for record in versionedfile.get_record_stream(
1525
pending, 'unordered', True):
1526
if record.storage_kind == 'absent':
1529
pending.remove(record.key)
1533
# report absent entries
1535
yield AbsentContentFactory(key)
1537
def get_parent_map(self, keys):
1538
"""See VersionedFiles.get_parent_map"""
1539
# We create a new provider because a fallback may have been added.
1540
# If we make fallbacks private we can update a stack list and avoid
1541
# object creation thrashing.
1544
if revision.NULL_REVISION in keys:
1545
keys.remove(revision.NULL_REVISION)
1546
result[revision.NULL_REVISION] = ()
1547
self._providers = self._providers[:1] + self.fallback_versionedfiles
1549
_mod_graph.StackedParentsProvider(
1550
self._providers).get_parent_map(keys))
1551
for key, parents in result.iteritems():
1553
result[key] = (revision.NULL_REVISION,)
1557
class PlanWeaveMerge(TextMerge):
1558
"""Weave merge that takes a plan as its input.
1560
This exists so that VersionedFile.plan_merge is implementable.
1561
Most callers will want to use WeaveMerge instead.
1564
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1565
b_marker=TextMerge.B_MARKER):
1566
TextMerge.__init__(self, a_marker, b_marker)
1567
self.plan = list(plan)
1569
def _merge_struct(self):
1574
def outstanding_struct():
1575
if not lines_a and not lines_b:
1577
elif ch_a and not ch_b:
1580
elif ch_b and not ch_a:
1582
elif lines_a == lines_b:
1585
yield (lines_a, lines_b)
1587
# We previously considered either 'unchanged' or 'killed-both' lines
1588
# to be possible places to resynchronize. However, assuming agreement
1589
# on killed-both lines may be too aggressive. -- mbp 20060324
1590
for state, line in self.plan:
1591
if state == 'unchanged':
1592
# resync and flush queued conflicts changes if any
1593
for struct in outstanding_struct():
1599
if state == 'unchanged':
1602
elif state == 'killed-a':
1604
lines_b.append(line)
1605
elif state == 'killed-b':
1607
lines_a.append(line)
1608
elif state == 'new-a':
1610
lines_a.append(line)
1611
elif state == 'new-b':
1613
lines_b.append(line)
1614
elif state == 'conflicted-a':
1616
lines_a.append(line)
1617
elif state == 'conflicted-b':
1619
lines_b.append(line)
1620
elif state == 'killed-both':
1621
# This counts as a change, even though there is no associated
1625
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1627
raise AssertionError(state)
1628
for struct in outstanding_struct():
1631
def base_from_plan(self):
1632
"""Construct a BASE file from the plan text."""
1634
for state, line in self.plan:
1635
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1636
# If unchanged, then this line is straight from base. If a or b
1637
# or both killed the line, then it *used* to be in base.
1638
base_lines.append(line)
1640
if state not in ('killed-base', 'irrelevant',
1641
'ghost-a', 'ghost-b',
1643
'conflicted-a', 'conflicted-b'):
1644
# killed-base, irrelevant means it doesn't apply
1645
# ghost-a/ghost-b are harder to say for sure, but they
1646
# aren't in the 'inc_c' which means they aren't in the
1647
# shared base of a & b. So we don't include them. And
1648
# obviously if the line is newly inserted, it isn't in base
1650
# If 'conflicted-a' or b, then it is new vs one base, but
1651
# old versus another base. However, if we make it present
1652
# in the base, it will be deleted from the target, and it
1653
# seems better to get a line doubled in the merge result,
1654
# rather than have it deleted entirely.
1655
# Example, each node is the 'text' at that point:
1663
# There was a criss-cross conflict merge. Both sides
1664
# include the other, but put themselves first.
1665
# Weave marks this as a 'clean' merge, picking OTHER over
1666
# THIS. (Though the details depend on order inserted into
1668
# LCA generates a plan:
1669
# [('unchanged', M),
1670
# ('conflicted-b', b),
1672
# ('conflicted-a', b),
1674
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1675
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1676
# removes one 'b', and BASE vs OTHER removes the other)
1677
# If you include neither, 3-way creates a clean "MbabN" as
1678
# THIS adds one 'b', and OTHER does too.
1679
# It seems that having the line 2 times is better than
1680
# having it omitted. (Easier to manually delete than notice
1681
# it needs to be added.)
1682
raise AssertionError('Unknown state: %s' % (state,))
1686
class WeaveMerge(PlanWeaveMerge):
1687
"""Weave merge that takes a VersionedFile and two versions as its input."""
1689
def __init__(self, versionedfile, ver_a, ver_b,
1690
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1691
plan = versionedfile.plan_merge(ver_a, ver_b)
1692
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1695
class VirtualVersionedFiles(VersionedFiles):
1696
"""Dummy implementation for VersionedFiles that uses other functions for
1697
obtaining fulltexts and parent maps.
1699
This is always on the bottom of the stack and uses string keys
1700
(rather than tuples) internally.
1703
def __init__(self, get_parent_map, get_lines):
1704
"""Create a VirtualVersionedFiles.
1706
:param get_parent_map: Same signature as Repository.get_parent_map.
1707
:param get_lines: Should return lines for specified key or None if
1710
super(VirtualVersionedFiles, self).__init__()
1711
self._get_parent_map = get_parent_map
1712
self._get_lines = get_lines
1714
def check(self, progressbar=None):
1715
"""See VersionedFiles.check.
1717
:note: Always returns True for VirtualVersionedFiles.
1721
def add_mpdiffs(self, records):
1722
"""See VersionedFiles.mpdiffs.
1724
:note: Not implemented for VirtualVersionedFiles.
1726
raise NotImplementedError(self.add_mpdiffs)
1728
def get_parent_map(self, keys):
1729
"""See VersionedFiles.get_parent_map."""
1730
return dict([((k,), tuple([(p,) for p in v]))
1731
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1733
def get_sha1s(self, keys):
1734
"""See VersionedFiles.get_sha1s."""
1737
lines = self._get_lines(k)
1738
if lines is not None:
1739
if not isinstance(lines, list):
1740
raise AssertionError
1741
ret[(k,)] = osutils.sha_strings(lines)
1744
def get_record_stream(self, keys, ordering, include_delta_closure):
1745
"""See VersionedFiles.get_record_stream."""
1746
for (k,) in list(keys):
1747
lines = self._get_lines(k)
1748
if lines is not None:
1749
if not isinstance(lines, list):
1750
raise AssertionError
1751
yield ChunkedContentFactory((k,), None,
1752
sha1=osutils.sha_strings(lines),
1755
yield AbsentContentFactory((k,))
1757
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1758
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1759
for i, (key,) in enumerate(keys):
1761
pb.update("Finding changed lines", i, len(keys))
1762
for l in self._get_lines(key):
1766
class NoDupeAddLinesDecorator(object):
1767
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1771
def __init__(self, store):
1774
def add_lines(self, key, parents, lines, parent_texts=None,
1775
left_matching_blocks=None, nostore_sha=None, random_id=False,
1776
check_content=True):
1777
"""See VersionedFiles.add_lines.
1779
This implementation may return None as the third element of the return
1780
value when the original store wouldn't.
1783
raise NotImplementedError(
1784
"NoDupeAddLinesDecorator.add_lines does not implement the "
1785
"nostore_sha behaviour.")
1787
sha1 = osutils.sha_strings(lines)
1788
key = ("sha1:" + sha1,)
1791
if key in self._store.get_parent_map([key]):
1792
# This key has already been inserted, so don't do it again.
1794
sha1 = osutils.sha_strings(lines)
1795
return sha1, sum(map(len, lines)), None
1796
return self._store.add_lines(key, parents, lines,
1797
parent_texts=parent_texts,
1798
left_matching_blocks=left_matching_blocks,
1799
nostore_sha=nostore_sha, random_id=random_id,
1800
check_content=check_content)
1802
def __getattr__(self, name):
1803
return getattr(self._store, name)
1806
def network_bytes_to_kind_and_offset(network_bytes):
1807
"""Strip of a record kind from the front of network_bytes.
1809
:param network_bytes: The bytes of a record.
1810
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1812
line_end = network_bytes.find('\n')
1813
storage_kind = network_bytes[:line_end]
1814
return storage_kind, line_end + 1
1817
class NetworkRecordStream(object):
1818
"""A record_stream which reconstitures a serialised stream."""
1820
def __init__(self, bytes_iterator):
1821
"""Create a NetworkRecordStream.
1823
:param bytes_iterator: An iterator of bytes. Each item in this
1824
iterator should have been obtained from a record_streams'
1825
record.get_bytes_as(record.storage_kind) call.
1827
self._bytes_iterator = bytes_iterator
1828
self._kind_factory = {
1829
'fulltext': fulltext_network_to_record,
1830
'groupcompress-block': groupcompress.network_block_to_records,
1831
'knit-ft-gz': knit.knit_network_to_record,
1832
'knit-delta-gz': knit.knit_network_to_record,
1833
'knit-annotated-ft-gz': knit.knit_network_to_record,
1834
'knit-annotated-delta-gz': knit.knit_network_to_record,
1835
'knit-delta-closure': knit.knit_delta_closure_to_records,
1841
:return: An iterator as per VersionedFiles.get_record_stream().
1843
for bytes in self._bytes_iterator:
1844
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1845
for record in self._kind_factory[storage_kind](
1846
storage_kind, bytes, line_end):
1850
def fulltext_network_to_record(kind, bytes, line_end):
1851
"""Convert a network fulltext record to record."""
1852
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1853
record_meta = bytes[line_end+4:line_end+4+meta_len]
1854
key, parents = bencode.bdecode_as_tuple(record_meta)
1855
if parents == 'nil':
1857
fulltext = bytes[line_end+4+meta_len:]
1858
return [FulltextContentFactory(key, parents, None, fulltext)]
1861
def _length_prefix(bytes):
1862
return struct.pack('!L', len(bytes))
1865
def record_to_fulltext_bytes(record):
1866
if record.parents is None:
1869
parents = record.parents
1870
record_meta = bencode.bencode((record.key, parents))
1871
record_content = record.get_bytes_as('fulltext')
1872
return "fulltext\n%s%s%s" % (
1873
_length_prefix(record_meta), record_meta, record_content)
1876
def sort_groupcompress(parent_map):
1877
"""Sort and group the keys in parent_map into groupcompress order.
1879
groupcompress is defined (currently) as reverse-topological order, grouped
1882
:return: A sorted-list of keys
1884
# gc-optimal ordering is approximately reverse topological,
1885
# properly grouped by file-id.
1887
for item in parent_map.iteritems():
1889
if isinstance(key, str) or len(key) == 1:
1894
per_prefix_map[prefix].append(item)
1896
per_prefix_map[prefix] = [item]
1899
for prefix in sorted(per_prefix_map):
1900
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1904
class _KeyRefs(object):
1906
def __init__(self, track_new_keys=False):
1907
# dict mapping 'key' to 'set of keys referring to that key'
1910
# set remembering all new keys
1911
self.new_keys = set()
1913
self.new_keys = None
1919
self.new_keys.clear()
1921
def add_references(self, key, refs):
1922
# Record the new references
1923
for referenced in refs:
1925
needed_by = self.refs[referenced]
1927
needed_by = self.refs[referenced] = set()
1929
# Discard references satisfied by the new key
1932
def get_new_keys(self):
1933
return self.new_keys
1935
def get_unsatisfied_refs(self):
1936
return self.refs.iterkeys()
1938
def _satisfy_refs_for_key(self, key):
1942
# No keys depended on this key. That's ok.
1945
def add_key(self, key):
1946
# satisfy refs for key, and remember that we've seen this key.
1947
self._satisfy_refs_for_key(key)
1948
if self.new_keys is not None:
1949
self.new_keys.add(key)
1951
def satisfy_refs_for_keys(self, keys):
1953
self._satisfy_refs_for_key(key)
1955
def get_referrers(self):
1957
for referrers in self.refs.itervalues():
1958
result.update(referrers)