1
# Copyright (C) 2006-2010 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
46
from bzrlib.transport.memory import MemoryTransport
48
from bzrlib.registry import Registry
49
from bzrlib.symbol_versioning import *
50
from bzrlib.textmerge import TextMerge
51
from bzrlib import bencode
54
adapter_registry = Registry()
55
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
56
'DeltaPlainToFullText')
57
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
59
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
60
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
61
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
62
'bzrlib.knit', 'DeltaAnnotatedToFullText')
63
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
64
'bzrlib.knit', 'FTAnnotatedToUnannotated')
65
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
66
'bzrlib.knit', 'FTAnnotatedToFullText')
67
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
68
# 'bzrlib.knit', 'FTAnnotatedToChunked')
71
class ContentFactory(object):
72
"""Abstract interface for insertion and retrieval from a VersionedFile.
74
:ivar sha1: None, or the sha1 of the content fulltext.
75
:ivar storage_kind: The native storage kind of this factory. One of
76
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
77
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
78
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
79
:ivar key: The key of this content. Each key is a tuple with a single
81
:ivar parents: A tuple of parent keys for self.key. If the object has
82
no parent information, None (as opposed to () for an empty list of
87
"""Create a ContentFactory."""
89
self.storage_kind = None
94
class ChunkedContentFactory(ContentFactory):
95
"""Static data content factory.
97
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
98
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
99
satisfies this, as does a list of lines.
101
:ivar sha1: None, or the sha1 of the content fulltext.
102
:ivar storage_kind: The native storage kind of this factory. Always
104
:ivar key: The key of this content. Each key is a tuple with a single
106
:ivar parents: A tuple of parent keys for self.key. If the object has
107
no parent information, None (as opposed to () for an empty list of
111
def __init__(self, key, parents, sha1, chunks):
112
"""Create a ContentFactory."""
114
self.storage_kind = 'chunked'
116
self.parents = parents
117
self._chunks = chunks
119
def get_bytes_as(self, storage_kind):
120
if storage_kind == 'chunked':
122
elif storage_kind == 'fulltext':
123
return ''.join(self._chunks)
124
raise errors.UnavailableRepresentation(self.key, storage_kind,
128
class FulltextContentFactory(ContentFactory):
129
"""Static data content factory.
131
This takes a fulltext when created and just returns that during
132
get_bytes_as('fulltext').
134
:ivar sha1: None, or the sha1 of the content fulltext.
135
:ivar storage_kind: The native storage kind of this factory. Always
137
:ivar key: The key of this content. Each key is a tuple with a single
139
:ivar parents: A tuple of parent keys for self.key. If the object has
140
no parent information, None (as opposed to () for an empty list of
144
def __init__(self, key, parents, sha1, text):
145
"""Create a ContentFactory."""
147
self.storage_kind = 'fulltext'
149
self.parents = parents
152
def get_bytes_as(self, storage_kind):
153
if storage_kind == self.storage_kind:
155
elif storage_kind == 'chunked':
157
raise errors.UnavailableRepresentation(self.key, storage_kind,
161
class AbsentContentFactory(ContentFactory):
162
"""A placeholder content factory for unavailable texts.
165
:ivar storage_kind: 'absent'.
166
:ivar key: The key of this content. Each key is a tuple with a single
171
def __init__(self, key):
172
"""Create a ContentFactory."""
174
self.storage_kind = 'absent'
178
def get_bytes_as(self, storage_kind):
179
raise ValueError('A request was made for key: %s, but that'
180
' content is not available, and the calling'
181
' code does not handle if it is missing.'
185
class AdapterFactory(ContentFactory):
186
"""A content factory to adapt between key prefix's."""
188
def __init__(self, key, parents, adapted):
189
"""Create an adapter factory instance."""
191
self.parents = parents
192
self._adapted = adapted
194
def __getattr__(self, attr):
195
"""Return a member from the adapted object."""
196
if attr in ('key', 'parents'):
197
return self.__dict__[attr]
199
return getattr(self._adapted, attr)
202
def filter_absent(record_stream):
203
"""Adapt a record stream to remove absent records."""
204
for record in record_stream:
205
if record.storage_kind != 'absent':
209
class _MPDiffGenerator(object):
210
"""Pull out the functionality for generating mp_diffs."""
212
def __init__(self, vf, keys):
214
# This is the order the keys were requested in
215
self.ordered_keys = tuple(keys)
216
# keys + their parents, what we need to compute the diffs
217
self.needed_keys = ()
218
# Map from key: mp_diff
220
# Map from key: parents_needed (may have ghosts)
222
# Parents that aren't present
223
self.ghost_parents = ()
224
# Map from parent_key => number of children for this text
226
# Content chunks that are cached while we still need them
229
def _find_needed_keys(self):
230
"""Find the set of keys we need to request.
232
This includes all the original keys passed in, and the non-ghost
233
parents of those keys.
235
:return: (needed_keys, refcounts)
236
needed_keys is the set of all texts we need to extract
237
refcounts is a dict of {key: num_children} letting us know when we
238
no longer need to cache a given parent text
240
# All the keys and their parents
241
needed_keys = set(self.ordered_keys)
242
parent_map = self.vf.get_parent_map(needed_keys)
243
self.parent_map = parent_map
244
# TODO: Should we be using a different construct here? I think this
245
# uses difference_update internally, and we expect the result to
247
missing_keys = needed_keys.difference(parent_map)
249
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
250
# Parents that might be missing. They are allowed to be ghosts, but we
251
# should check for them
253
setdefault = refcounts.setdefault
255
for child_key, parent_keys in parent_map.iteritems():
257
# parent_keys may be None if a given VersionedFile claims to
258
# not support graph operations.
260
just_parents.update(parent_keys)
261
needed_keys.update(parent_keys)
262
for p in parent_keys:
263
refcounts[p] = setdefault(p, 0) + 1
264
just_parents.difference_update(parent_map)
265
# Remove any parents that are actually ghosts from the needed set
266
self.present_parents = set(self.vf.get_parent_map(just_parents))
267
self.ghost_parents = just_parents.difference(self.present_parents)
268
needed_keys.difference_update(self.ghost_parents)
269
self.needed_keys = needed_keys
270
self.refcounts = refcounts
271
return needed_keys, refcounts
273
def _compute_diff(self, key, parent_lines, lines):
274
"""Compute a single mp_diff, and store it in self._diffs"""
275
if len(parent_lines) > 0:
276
# XXX: _extract_blocks is not usefully defined anywhere...
277
# It was meant to extract the left-parent diff without
278
# having to recompute it for Knit content (pack-0.92,
279
# etc). That seems to have regressed somewhere
280
left_parent_blocks = self.vf._extract_blocks(key,
281
parent_lines[0], lines)
283
left_parent_blocks = None
284
diff = multiparent.MultiParent.from_lines(lines,
285
parent_lines, left_parent_blocks)
286
self.diffs[key] = diff
288
def _process_one_record(self, key, this_chunks):
290
if key in self.parent_map:
291
# This record should be ready to diff, since we requested
292
# content in 'topological' order
293
parent_keys = self.parent_map.pop(key)
294
# If a VersionedFile claims 'no-graph' support, then it may return
295
# None for any parent request, so we replace it with an empty tuple
296
if parent_keys is None:
299
for p in parent_keys:
300
# Alternatively we could check p not in self.needed_keys, but
301
# ghost_parents should be tiny versus huge
302
if p in self.ghost_parents:
304
refcount = self.refcounts[p]
305
if refcount == 1: # Last child reference
306
self.refcounts.pop(p)
307
parent_chunks = self.chunks.pop(p)
309
self.refcounts[p] = refcount - 1
310
parent_chunks = self.chunks[p]
311
p_lines = osutils.chunks_to_lines(parent_chunks)
312
# TODO: Should we cache the line form? We did the
313
# computation to get it, but storing it this way will
314
# be less memory efficient...
315
parent_lines.append(p_lines)
317
lines = osutils.chunks_to_lines(this_chunks)
318
# Since we needed the lines, we'll go ahead and cache them this way
320
self._compute_diff(key, parent_lines, lines)
322
# Is this content required for any more children?
323
if key in self.refcounts:
324
self.chunks[key] = this_chunks
326
def _extract_diffs(self):
327
needed_keys, refcounts = self._find_needed_keys()
328
for record in self.vf.get_record_stream(needed_keys,
329
'topological', True):
330
if record.storage_kind == 'absent':
331
raise errors.RevisionNotPresent(record.key, self.vf)
332
self._process_one_record(record.key,
333
record.get_bytes_as('chunked'))
335
def compute_diffs(self):
336
self._extract_diffs()
337
dpop = self.diffs.pop
338
return [dpop(k) for k in self.ordered_keys]
341
class VersionedFile(object):
342
"""Versioned text file storage.
344
A versioned file manages versions of line-based text files,
345
keeping track of the originating version for each line.
347
To clients the "lines" of the file are represented as a list of
348
strings. These strings will typically have terminal newline
349
characters, but this is not required. In particular files commonly
350
do not have a newline at the end of the file.
352
Texts are identified by a version-id string.
356
def check_not_reserved_id(version_id):
357
revision.check_not_reserved_id(version_id)
359
def copy_to(self, name, transport):
360
"""Copy this versioned file to name on transport."""
361
raise NotImplementedError(self.copy_to)
363
def get_record_stream(self, versions, ordering, include_delta_closure):
364
"""Get a stream of records for versions.
366
:param versions: The versions to include. Each version is a tuple
368
:param ordering: Either 'unordered' or 'topological'. A topologically
369
sorted stream has compression parents strictly before their
371
:param include_delta_closure: If True then the closure across any
372
compression parents will be included (in the data content of the
373
stream, not in the emitted records). This guarantees that
374
'fulltext' can be used successfully on every record.
375
:return: An iterator of ContentFactory objects, each of which is only
376
valid until the iterator is advanced.
378
raise NotImplementedError(self.get_record_stream)
380
def has_version(self, version_id):
381
"""Returns whether version is present."""
382
raise NotImplementedError(self.has_version)
384
def insert_record_stream(self, stream):
385
"""Insert a record stream into this versioned file.
387
:param stream: A stream of records to insert.
389
:seealso VersionedFile.get_record_stream:
391
raise NotImplementedError
393
def add_lines(self, version_id, parents, lines, parent_texts=None,
394
left_matching_blocks=None, nostore_sha=None, random_id=False,
396
"""Add a single text on top of the versioned file.
398
Must raise RevisionAlreadyPresent if the new version is
399
already present in file history.
401
Must raise RevisionNotPresent if any of the given parents are
402
not present in file history.
404
:param lines: A list of lines. Each line must be a bytestring. And all
405
of them except the last must be terminated with \n and contain no
406
other \n's. The last line may either contain no \n's or a single
407
terminated \n. If the lines list does meet this constraint the add
408
routine may error or may succeed - but you will be unable to read
409
the data back accurately. (Checking the lines have been split
410
correctly is expensive and extremely unlikely to catch bugs so it
411
is not done at runtime unless check_content is True.)
412
:param parent_texts: An optional dictionary containing the opaque
413
representations of some or all of the parents of version_id to
414
allow delta optimisations. VERY IMPORTANT: the texts must be those
415
returned by add_lines or data corruption can be caused.
416
:param left_matching_blocks: a hint about which areas are common
417
between the text and its left-hand-parent. The format is
418
the SequenceMatcher.get_matching_blocks format.
419
:param nostore_sha: Raise ExistingContent and do not add the lines to
420
the versioned file if the digest of the lines matches this.
421
:param random_id: If True a random id has been selected rather than
422
an id determined by some deterministic process such as a converter
423
from a foreign VCS. When True the backend may choose not to check
424
for uniqueness of the resulting key within the versioned file, so
425
this should only be done when the result is expected to be unique
427
:param check_content: If True, the lines supplied are verified to be
428
bytestrings that are correctly formed lines.
429
:return: The text sha1, the number of bytes in the text, and an opaque
430
representation of the inserted version which can be provided
431
back to future add_lines calls in the parent_texts dictionary.
433
self._check_write_ok()
434
return self._add_lines(version_id, parents, lines, parent_texts,
435
left_matching_blocks, nostore_sha, random_id, check_content)
437
def _add_lines(self, version_id, parents, lines, parent_texts,
438
left_matching_blocks, nostore_sha, random_id, check_content):
439
"""Helper to do the class specific add_lines."""
440
raise NotImplementedError(self.add_lines)
442
def add_lines_with_ghosts(self, version_id, parents, lines,
443
parent_texts=None, nostore_sha=None, random_id=False,
444
check_content=True, left_matching_blocks=None):
445
"""Add lines to the versioned file, allowing ghosts to be present.
447
This takes the same parameters as add_lines and returns the same.
449
self._check_write_ok()
450
return self._add_lines_with_ghosts(version_id, parents, lines,
451
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
453
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
454
nostore_sha, random_id, check_content, left_matching_blocks):
455
"""Helper to do class specific add_lines_with_ghosts."""
456
raise NotImplementedError(self.add_lines_with_ghosts)
458
def check(self, progress_bar=None):
459
"""Check the versioned file for integrity."""
460
raise NotImplementedError(self.check)
462
def _check_lines_not_unicode(self, lines):
463
"""Check that lines being added to a versioned file are not unicode."""
465
if line.__class__ is not str:
466
raise errors.BzrBadParameterUnicode("lines")
468
def _check_lines_are_lines(self, lines):
469
"""Check that the lines really are full lines without inline EOL."""
471
if '\n' in line[:-1]:
472
raise errors.BzrBadParameterContainsNewline("lines")
474
def get_format_signature(self):
475
"""Get a text description of the data encoding in this file.
479
raise NotImplementedError(self.get_format_signature)
481
def make_mpdiffs(self, version_ids):
482
"""Create multiparent diffs for specified versions."""
483
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
484
# is a list of strings, not keys. And while self.get_record_stream
485
# is supported, it takes *keys*, while self.get_parent_map() takes
487
knit_versions = set()
488
knit_versions.update(version_ids)
489
parent_map = self.get_parent_map(version_ids)
490
for version_id in version_ids:
492
knit_versions.update(parent_map[version_id])
494
raise errors.RevisionNotPresent(version_id, self)
495
# We need to filter out ghosts, because we can't diff against them.
496
knit_versions = set(self.get_parent_map(knit_versions).keys())
497
lines = dict(zip(knit_versions,
498
self._get_lf_split_line_list(knit_versions)))
500
for version_id in version_ids:
501
target = lines[version_id]
503
parents = [lines[p] for p in parent_map[version_id] if p in
506
# I don't know how this could ever trigger.
507
# parent_map[version_id] was already triggered in the previous
508
# for loop, and lines[p] has the 'if p in knit_versions' check,
509
# so we again won't have a KeyError.
510
raise errors.RevisionNotPresent(version_id, self)
512
left_parent_blocks = self._extract_blocks(version_id,
515
left_parent_blocks = None
516
diffs.append(multiparent.MultiParent.from_lines(target, parents,
520
def _extract_blocks(self, version_id, source, target):
523
def add_mpdiffs(self, records):
524
"""Add mpdiffs to this VersionedFile.
526
Records should be iterables of version, parents, expected_sha1,
527
mpdiff. mpdiff should be a MultiParent instance.
529
# Does this need to call self._check_write_ok()? (IanC 20070919)
531
mpvf = multiparent.MultiMemoryVersionedFile()
533
for version, parent_ids, expected_sha1, mpdiff in records:
534
versions.append(version)
535
mpvf.add_diff(mpdiff, version, parent_ids)
536
needed_parents = set()
537
for version, parent_ids, expected_sha1, mpdiff in records:
538
needed_parents.update(p for p in parent_ids
539
if not mpvf.has_version(p))
540
present_parents = set(self.get_parent_map(needed_parents).keys())
541
for parent_id, lines in zip(present_parents,
542
self._get_lf_split_line_list(present_parents)):
543
mpvf.add_version(lines, parent_id, [])
544
for (version, parent_ids, expected_sha1, mpdiff), lines in\
545
zip(records, mpvf.get_line_list(versions)):
546
if len(parent_ids) == 1:
547
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
548
mpvf.get_diff(parent_ids[0]).num_lines()))
550
left_matching_blocks = None
552
_, _, version_text = self.add_lines_with_ghosts(version,
553
parent_ids, lines, vf_parents,
554
left_matching_blocks=left_matching_blocks)
555
except NotImplementedError:
556
# The vf can't handle ghosts, so add lines normally, which will
557
# (reasonably) fail if there are ghosts in the data.
558
_, _, version_text = self.add_lines(version,
559
parent_ids, lines, vf_parents,
560
left_matching_blocks=left_matching_blocks)
561
vf_parents[version] = version_text
562
sha1s = self.get_sha1s(versions)
563
for version, parent_ids, expected_sha1, mpdiff in records:
564
if expected_sha1 != sha1s[version]:
565
raise errors.VersionedFileInvalidChecksum(version)
567
def get_text(self, version_id):
568
"""Return version contents as a text string.
570
Raises RevisionNotPresent if version is not present in
573
return ''.join(self.get_lines(version_id))
574
get_string = get_text
576
def get_texts(self, version_ids):
577
"""Return the texts of listed versions as a list of strings.
579
Raises RevisionNotPresent if version is not present in
582
return [''.join(self.get_lines(v)) for v in version_ids]
584
def get_lines(self, version_id):
585
"""Return version contents as a sequence of lines.
587
Raises RevisionNotPresent if version is not present in
590
raise NotImplementedError(self.get_lines)
592
def _get_lf_split_line_list(self, version_ids):
593
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
595
def get_ancestry(self, version_ids, topo_sorted=True):
596
"""Return a list of all ancestors of given version(s). This
597
will not include the null revision.
599
This list will not be topologically sorted if topo_sorted=False is
602
Must raise RevisionNotPresent if any of the given versions are
603
not present in file history."""
604
if isinstance(version_ids, basestring):
605
version_ids = [version_ids]
606
raise NotImplementedError(self.get_ancestry)
608
def get_ancestry_with_ghosts(self, version_ids):
609
"""Return a list of all ancestors of given version(s). This
610
will not include the null revision.
612
Must raise RevisionNotPresent if any of the given versions are
613
not present in file history.
615
Ghosts that are known about will be included in ancestry list,
616
but are not explicitly marked.
618
raise NotImplementedError(self.get_ancestry_with_ghosts)
620
def get_parent_map(self, version_ids):
621
"""Get a map of the parents of version_ids.
623
:param version_ids: The version ids to look up parents for.
624
:return: A mapping from version id to parents.
626
raise NotImplementedError(self.get_parent_map)
628
def get_parents_with_ghosts(self, version_id):
629
"""Return version names for parents of version_id.
631
Will raise RevisionNotPresent if version_id is not present
634
Ghosts that are known about will be included in the parent list,
635
but are not explicitly marked.
638
return list(self.get_parent_map([version_id])[version_id])
640
raise errors.RevisionNotPresent(version_id, self)
642
def annotate(self, version_id):
643
"""Return a list of (version-id, line) tuples for version_id.
645
:raise RevisionNotPresent: If the given version is
646
not present in file history.
648
raise NotImplementedError(self.annotate)
650
def iter_lines_added_or_present_in_versions(self, version_ids=None,
652
"""Iterate over the lines in the versioned file from version_ids.
654
This may return lines from other versions. Each item the returned
655
iterator yields is a tuple of a line and a text version that that line
656
is present in (not introduced in).
658
Ordering of results is in whatever order is most suitable for the
659
underlying storage format.
661
If a progress bar is supplied, it may be used to indicate progress.
662
The caller is responsible for cleaning up progress bars (because this
665
NOTES: Lines are normalised: they will all have \n terminators.
666
Lines are returned in arbitrary order.
668
:return: An iterator over (line, version_id).
670
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
672
def plan_merge(self, ver_a, ver_b):
673
"""Return pseudo-annotation indicating how the two versions merge.
675
This is computed between versions a and b and their common
678
Weave lines present in none of them are skipped entirely.
681
killed-base Dead in base revision
682
killed-both Killed in each revision
685
unchanged Alive in both a and b (possibly created in both)
688
ghost-a Killed in a, unborn in b
689
ghost-b Killed in b, unborn in a
690
irrelevant Not in either revision
692
raise NotImplementedError(VersionedFile.plan_merge)
694
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
695
b_marker=TextMerge.B_MARKER):
696
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
699
class RecordingVersionedFilesDecorator(object):
700
"""A minimal versioned files that records calls made on it.
702
Only enough methods have been added to support tests using it to date.
704
:ivar calls: A list of the calls made; can be reset at any time by
708
def __init__(self, backing_vf):
709
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
711
:param backing_vf: The versioned file to answer all methods.
713
self._backing_vf = backing_vf
716
def add_lines(self, key, parents, lines, parent_texts=None,
717
left_matching_blocks=None, nostore_sha=None, random_id=False,
719
self.calls.append(("add_lines", key, parents, lines, parent_texts,
720
left_matching_blocks, nostore_sha, random_id, check_content))
721
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
722
left_matching_blocks, nostore_sha, random_id, check_content)
725
self._backing_vf.check()
727
def get_parent_map(self, keys):
728
self.calls.append(("get_parent_map", copy(keys)))
729
return self._backing_vf.get_parent_map(keys)
731
def get_record_stream(self, keys, sort_order, include_delta_closure):
732
self.calls.append(("get_record_stream", list(keys), sort_order,
733
include_delta_closure))
734
return self._backing_vf.get_record_stream(keys, sort_order,
735
include_delta_closure)
737
def get_sha1s(self, keys):
738
self.calls.append(("get_sha1s", copy(keys)))
739
return self._backing_vf.get_sha1s(keys)
741
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
742
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
743
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
746
self.calls.append(("keys",))
747
return self._backing_vf.keys()
750
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
751
"""A VF that records calls, and returns keys in specific order.
753
:ivar calls: A list of the calls made; can be reset at any time by
757
def __init__(self, backing_vf, key_priority):
758
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
760
:param backing_vf: The versioned file to answer all methods.
761
:param key_priority: A dictionary defining what order keys should be
762
returned from an 'unordered' get_record_stream request.
763
Keys with lower priority are returned first, keys not present in
764
the map get an implicit priority of 0, and are returned in
765
lexicographical order.
767
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
768
self._key_priority = key_priority
770
def get_record_stream(self, keys, sort_order, include_delta_closure):
771
self.calls.append(("get_record_stream", list(keys), sort_order,
772
include_delta_closure))
773
if sort_order == 'unordered':
775
return (self._key_priority.get(key, 0), key)
776
# Use a defined order by asking for the keys one-by-one from the
778
for key in sorted(keys, key=sort_key):
779
for record in self._backing_vf.get_record_stream([key],
780
'unordered', include_delta_closure):
783
for record in self._backing_vf.get_record_stream(keys, sort_order,
784
include_delta_closure):
788
class KeyMapper(object):
789
"""KeyMappers map between keys and underlying partitioned storage."""
792
"""Map key to an underlying storage identifier.
794
:param key: A key tuple e.g. ('file-id', 'revision-id').
795
:return: An underlying storage identifier, specific to the partitioning
798
raise NotImplementedError(self.map)
800
def unmap(self, partition_id):
801
"""Map a partitioned storage id back to a key prefix.
803
:param partition_id: The underlying partition id.
804
:return: As much of a key (or prefix) as is derivable from the partition
807
raise NotImplementedError(self.unmap)
810
class ConstantMapper(KeyMapper):
811
"""A key mapper that maps to a constant result."""
813
def __init__(self, result):
814
"""Create a ConstantMapper which will return result for all maps."""
815
self._result = result
818
"""See KeyMapper.map()."""
822
class URLEscapeMapper(KeyMapper):
823
"""Base class for use with transport backed storage.
825
This provides a map and unmap wrapper that respectively url escape and
826
unescape their outputs and inputs.
830
"""See KeyMapper.map()."""
831
return urllib.quote(self._map(key))
833
def unmap(self, partition_id):
834
"""See KeyMapper.unmap()."""
835
return self._unmap(urllib.unquote(partition_id))
838
class PrefixMapper(URLEscapeMapper):
839
"""A key mapper that extracts the first component of a key.
841
This mapper is for use with a transport based backend.
845
"""See KeyMapper.map()."""
848
def _unmap(self, partition_id):
849
"""See KeyMapper.unmap()."""
850
return (partition_id,)
853
class HashPrefixMapper(URLEscapeMapper):
854
"""A key mapper that combines the first component of a key with a hash.
856
This mapper is for use with a transport based backend.
860
"""See KeyMapper.map()."""
861
prefix = self._escape(key[0])
862
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
864
def _escape(self, prefix):
865
"""No escaping needed here."""
868
def _unmap(self, partition_id):
869
"""See KeyMapper.unmap()."""
870
return (self._unescape(osutils.basename(partition_id)),)
872
def _unescape(self, basename):
873
"""No unescaping needed for HashPrefixMapper."""
877
class HashEscapedPrefixMapper(HashPrefixMapper):
878
"""Combines the escaped first component of a key with a hash.
880
This mapper is for use with a transport based backend.
883
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
885
def _escape(self, prefix):
886
"""Turn a key element into a filesystem safe string.
888
This is similar to a plain urllib.quote, except
889
it uses specific safe characters, so that it doesn't
890
have to translate a lot of valid file ids.
892
# @ does not get escaped. This is because it is a valid
893
# filesystem character we use all the time, and it looks
894
# a lot better than seeing %40 all the time.
895
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
899
def _unescape(self, basename):
900
"""Escaped names are easily unescaped by urlutils."""
901
return urllib.unquote(basename)
904
def make_versioned_files_factory(versioned_file_factory, mapper):
905
"""Create a ThunkedVersionedFiles factory.
907
This will create a callable which when called creates a
908
ThunkedVersionedFiles on a transport, using mapper to access individual
909
versioned files, and versioned_file_factory to create each individual file.
911
def factory(transport):
912
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
917
class VersionedFiles(object):
918
"""Storage for many versioned files.
920
This object allows a single keyspace for accessing the history graph and
921
contents of named bytestrings.
923
Currently no implementation allows the graph of different key prefixes to
924
intersect, but the API does allow such implementations in the future.
926
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
927
may have a different length key-size, but that size will be constant for
928
all texts added to or retrieved from it. For instance, bzrlib uses
929
instances with a key-size of 2 for storing user files in a repository, with
930
the first element the fileid, and the second the version of that file.
932
The use of tuples allows a single code base to support several different
933
uses with only the mapping logic changing from instance to instance.
936
def add_lines(self, key, parents, lines, parent_texts=None,
937
left_matching_blocks=None, nostore_sha=None, random_id=False,
939
"""Add a text to the store.
941
:param key: The key tuple of the text to add. If the last element is
942
None, a CHK string will be generated during the addition.
943
:param parents: The parents key tuples of the text to add.
944
:param lines: A list of lines. Each line must be a bytestring. And all
945
of them except the last must be terminated with \n and contain no
946
other \n's. The last line may either contain no \n's or a single
947
terminating \n. If the lines list does meet this constraint the add
948
routine may error or may succeed - but you will be unable to read
949
the data back accurately. (Checking the lines have been split
950
correctly is expensive and extremely unlikely to catch bugs so it
951
is not done at runtime unless check_content is True.)
952
:param parent_texts: An optional dictionary containing the opaque
953
representations of some or all of the parents of version_id to
954
allow delta optimisations. VERY IMPORTANT: the texts must be those
955
returned by add_lines or data corruption can be caused.
956
:param left_matching_blocks: a hint about which areas are common
957
between the text and its left-hand-parent. The format is
958
the SequenceMatcher.get_matching_blocks format.
959
:param nostore_sha: Raise ExistingContent and do not add the lines to
960
the versioned file if the digest of the lines matches this.
961
:param random_id: If True a random id has been selected rather than
962
an id determined by some deterministic process such as a converter
963
from a foreign VCS. When True the backend may choose not to check
964
for uniqueness of the resulting key within the versioned file, so
965
this should only be done when the result is expected to be unique
967
:param check_content: If True, the lines supplied are verified to be
968
bytestrings that are correctly formed lines.
969
:return: The text sha1, the number of bytes in the text, and an opaque
970
representation of the inserted version which can be provided
971
back to future add_lines calls in the parent_texts dictionary.
973
raise NotImplementedError(self.add_lines)
975
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
976
"""Add a text to the store.
978
This is a private function for use by CommitBuilder.
980
:param key: The key tuple of the text to add. If the last element is
981
None, a CHK string will be generated during the addition.
982
:param parents: The parents key tuples of the text to add.
983
:param text: A string containing the text to be committed.
984
:param nostore_sha: Raise ExistingContent and do not add the lines to
985
the versioned file if the digest of the lines matches this.
986
:param random_id: If True a random id has been selected rather than
987
an id determined by some deterministic process such as a converter
988
from a foreign VCS. When True the backend may choose not to check
989
for uniqueness of the resulting key within the versioned file, so
990
this should only be done when the result is expected to be unique
992
:param check_content: If True, the lines supplied are verified to be
993
bytestrings that are correctly formed lines.
994
:return: The text sha1, the number of bytes in the text, and an opaque
995
representation of the inserted version which can be provided
996
back to future _add_text calls in the parent_texts dictionary.
998
# The default implementation just thunks over to .add_lines(),
999
# inefficient, but it works.
1000
return self.add_lines(key, parents, osutils.split_lines(text),
1001
nostore_sha=nostore_sha,
1002
random_id=random_id,
1005
def add_mpdiffs(self, records):
1006
"""Add mpdiffs to this VersionedFile.
1008
Records should be iterables of version, parents, expected_sha1,
1009
mpdiff. mpdiff should be a MultiParent instance.
1012
mpvf = multiparent.MultiMemoryVersionedFile()
1014
for version, parent_ids, expected_sha1, mpdiff in records:
1015
versions.append(version)
1016
mpvf.add_diff(mpdiff, version, parent_ids)
1017
needed_parents = set()
1018
for version, parent_ids, expected_sha1, mpdiff in records:
1019
needed_parents.update(p for p in parent_ids
1020
if not mpvf.has_version(p))
1021
# It seems likely that adding all the present parents as fulltexts can
1022
# easily exhaust memory.
1023
chunks_to_lines = osutils.chunks_to_lines
1024
for record in self.get_record_stream(needed_parents, 'unordered',
1026
if record.storage_kind == 'absent':
1028
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1030
for (key, parent_keys, expected_sha1, mpdiff), lines in\
1031
zip(records, mpvf.get_line_list(versions)):
1032
if len(parent_keys) == 1:
1033
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1034
mpvf.get_diff(parent_keys[0]).num_lines()))
1036
left_matching_blocks = None
1037
version_sha1, _, version_text = self.add_lines(key,
1038
parent_keys, lines, vf_parents,
1039
left_matching_blocks=left_matching_blocks)
1040
if version_sha1 != expected_sha1:
1041
raise errors.VersionedFileInvalidChecksum(version)
1042
vf_parents[key] = version_text
1044
def annotate(self, key):
1045
"""Return a list of (version-key, line) tuples for the text of key.
1047
:raise RevisionNotPresent: If the key is not present.
1049
raise NotImplementedError(self.annotate)
1051
def check(self, progress_bar=None):
1052
"""Check this object for integrity.
1054
:param progress_bar: A progress bar to output as the check progresses.
1055
:param keys: Specific keys within the VersionedFiles to check. When
1056
this parameter is not None, check() becomes a generator as per
1057
get_record_stream. The difference to get_record_stream is that
1058
more or deeper checks will be performed.
1059
:return: None, or if keys was supplied a generator as per
1062
raise NotImplementedError(self.check)
1065
def check_not_reserved_id(version_id):
1066
revision.check_not_reserved_id(version_id)
1068
def clear_cache(self):
1069
"""Clear whatever caches this VersionedFile holds.
1071
This is generally called after an operation has been performed, when we
1072
don't expect to be using this versioned file again soon.
1075
def _check_lines_not_unicode(self, lines):
1076
"""Check that lines being added to a versioned file are not unicode."""
1078
if line.__class__ is not str:
1079
raise errors.BzrBadParameterUnicode("lines")
1081
def _check_lines_are_lines(self, lines):
1082
"""Check that the lines really are full lines without inline EOL."""
1084
if '\n' in line[:-1]:
1085
raise errors.BzrBadParameterContainsNewline("lines")
1087
def get_known_graph_ancestry(self, keys):
1088
"""Get a KnownGraph instance with the ancestry of keys."""
1089
# most basic implementation is a loop around get_parent_map
1093
this_parent_map = self.get_parent_map(pending)
1094
parent_map.update(this_parent_map)
1096
map(pending.update, this_parent_map.itervalues())
1097
pending = pending.difference(parent_map)
1098
kg = _mod_graph.KnownGraph(parent_map)
1101
def get_parent_map(self, keys):
1102
"""Get a map of the parents of keys.
1104
:param keys: The keys to look up parents for.
1105
:return: A mapping from keys to parents. Absent keys are absent from
1108
raise NotImplementedError(self.get_parent_map)
1110
def get_record_stream(self, keys, ordering, include_delta_closure):
1111
"""Get a stream of records for keys.
1113
:param keys: The keys to include.
1114
:param ordering: Either 'unordered' or 'topological'. A topologically
1115
sorted stream has compression parents strictly before their
1117
:param include_delta_closure: If True then the closure across any
1118
compression parents will be included (in the opaque data).
1119
:return: An iterator of ContentFactory objects, each of which is only
1120
valid until the iterator is advanced.
1122
raise NotImplementedError(self.get_record_stream)
1124
def get_sha1s(self, keys):
1125
"""Get the sha1's of the texts for the given keys.
1127
:param keys: The names of the keys to lookup
1128
:return: a dict from key to sha1 digest. Keys of texts which are not
1129
present in the store are not present in the returned
1132
raise NotImplementedError(self.get_sha1s)
1134
has_key = index._has_key_from_parent_map
1136
def get_missing_compression_parent_keys(self):
1137
"""Return an iterable of keys of missing compression parents.
1139
Check this after calling insert_record_stream to find out if there are
1140
any missing compression parents. If there are, the records that
1141
depend on them are not able to be inserted safely. The precise
1142
behaviour depends on the concrete VersionedFiles class in use.
1144
Classes that do not support this will raise NotImplementedError.
1146
raise NotImplementedError(self.get_missing_compression_parent_keys)
1148
def insert_record_stream(self, stream):
1149
"""Insert a record stream into this container.
1151
:param stream: A stream of records to insert.
1153
:seealso VersionedFile.get_record_stream:
1155
raise NotImplementedError
1157
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1158
"""Iterate over the lines in the versioned files from keys.
1160
This may return lines from other keys. Each item the returned
1161
iterator yields is a tuple of a line and a text version that that line
1162
is present in (not introduced in).
1164
Ordering of results is in whatever order is most suitable for the
1165
underlying storage format.
1167
If a progress bar is supplied, it may be used to indicate progress.
1168
The caller is responsible for cleaning up progress bars (because this
1172
* Lines are normalised by the underlying store: they will all have \n
1174
* Lines are returned in arbitrary order.
1176
:return: An iterator over (line, key).
1178
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1181
"""Return a iterable of the keys for all the contained texts."""
1182
raise NotImplementedError(self.keys)
1184
def make_mpdiffs(self, keys):
1185
"""Create multiparent diffs for specified keys."""
1186
generator = _MPDiffGenerator(self, keys)
1187
return generator.compute_diffs()
1189
def get_annotator(self):
1190
return annotate.Annotator(self)
1192
missing_keys = index._missing_keys_from_parent_map
1194
def _extract_blocks(self, version_id, source, target):
1198
class ThunkedVersionedFiles(VersionedFiles):
1199
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1201
This object allows a single keyspace for accessing the history graph and
1202
contents of named bytestrings.
1204
Currently no implementation allows the graph of different key prefixes to
1205
intersect, but the API does allow such implementations in the future.
1208
def __init__(self, transport, file_factory, mapper, is_locked):
1209
"""Create a ThunkedVersionedFiles."""
1210
self._transport = transport
1211
self._file_factory = file_factory
1212
self._mapper = mapper
1213
self._is_locked = is_locked
1215
def add_lines(self, key, parents, lines, parent_texts=None,
1216
left_matching_blocks=None, nostore_sha=None, random_id=False,
1217
check_content=True):
1218
"""See VersionedFiles.add_lines()."""
1219
path = self._mapper.map(key)
1220
version_id = key[-1]
1221
parents = [parent[-1] for parent in parents]
1222
vf = self._get_vf(path)
1225
return vf.add_lines_with_ghosts(version_id, parents, lines,
1226
parent_texts=parent_texts,
1227
left_matching_blocks=left_matching_blocks,
1228
nostore_sha=nostore_sha, random_id=random_id,
1229
check_content=check_content)
1230
except NotImplementedError:
1231
return vf.add_lines(version_id, parents, lines,
1232
parent_texts=parent_texts,
1233
left_matching_blocks=left_matching_blocks,
1234
nostore_sha=nostore_sha, random_id=random_id,
1235
check_content=check_content)
1236
except errors.NoSuchFile:
1237
# parent directory may be missing, try again.
1238
self._transport.mkdir(osutils.dirname(path))
1240
return vf.add_lines_with_ghosts(version_id, parents, lines,
1241
parent_texts=parent_texts,
1242
left_matching_blocks=left_matching_blocks,
1243
nostore_sha=nostore_sha, random_id=random_id,
1244
check_content=check_content)
1245
except NotImplementedError:
1246
return vf.add_lines(version_id, parents, lines,
1247
parent_texts=parent_texts,
1248
left_matching_blocks=left_matching_blocks,
1249
nostore_sha=nostore_sha, random_id=random_id,
1250
check_content=check_content)
1252
def annotate(self, key):
1253
"""Return a list of (version-key, line) tuples for the text of key.
1255
:raise RevisionNotPresent: If the key is not present.
1258
path = self._mapper.map(prefix)
1259
vf = self._get_vf(path)
1260
origins = vf.annotate(key[-1])
1262
for origin, line in origins:
1263
result.append((prefix + (origin,), line))
1266
def check(self, progress_bar=None, keys=None):
1267
"""See VersionedFiles.check()."""
1268
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1269
# this is tolerable. Ideally we'd pass keys down to check() and
1270
# have the older VersiondFile interface updated too.
1271
for prefix, vf in self._iter_all_components():
1273
if keys is not None:
1274
return self.get_record_stream(keys, 'unordered', True)
1276
def get_parent_map(self, keys):
1277
"""Get a map of the parents of keys.
1279
:param keys: The keys to look up parents for.
1280
:return: A mapping from keys to parents. Absent keys are absent from
1283
prefixes = self._partition_keys(keys)
1285
for prefix, suffixes in prefixes.items():
1286
path = self._mapper.map(prefix)
1287
vf = self._get_vf(path)
1288
parent_map = vf.get_parent_map(suffixes)
1289
for key, parents in parent_map.items():
1290
result[prefix + (key,)] = tuple(
1291
prefix + (parent,) for parent in parents)
1294
def _get_vf(self, path):
1295
if not self._is_locked():
1296
raise errors.ObjectNotLocked(self)
1297
return self._file_factory(path, self._transport, create=True,
1298
get_scope=lambda:None)
1300
def _partition_keys(self, keys):
1301
"""Turn keys into a dict of prefix:suffix_list."""
1304
prefix_keys = result.setdefault(key[:-1], [])
1305
prefix_keys.append(key[-1])
1308
def _get_all_prefixes(self):
1309
# Identify all key prefixes.
1310
# XXX: A bit hacky, needs polish.
1311
if type(self._mapper) == ConstantMapper:
1312
paths = [self._mapper.map(())]
1316
for quoted_relpath in self._transport.iter_files_recursive():
1317
path, ext = os.path.splitext(quoted_relpath)
1319
paths = list(relpaths)
1320
prefixes = [self._mapper.unmap(path) for path in paths]
1321
return zip(paths, prefixes)
1323
def get_record_stream(self, keys, ordering, include_delta_closure):
1324
"""See VersionedFiles.get_record_stream()."""
1325
# Ordering will be taken care of by each partitioned store; group keys
1328
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1329
suffixes = [(suffix,) for suffix in suffixes]
1330
for record in vf.get_record_stream(suffixes, ordering,
1331
include_delta_closure):
1332
if record.parents is not None:
1333
record.parents = tuple(
1334
prefix + parent for parent in record.parents)
1335
record.key = prefix + record.key
1338
def _iter_keys_vf(self, keys):
1339
prefixes = self._partition_keys(keys)
1341
for prefix, suffixes in prefixes.items():
1342
path = self._mapper.map(prefix)
1343
vf = self._get_vf(path)
1344
yield prefix, suffixes, vf
1346
def get_sha1s(self, keys):
1347
"""See VersionedFiles.get_sha1s()."""
1349
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1350
vf_sha1s = vf.get_sha1s(suffixes)
1351
for suffix, sha1 in vf_sha1s.iteritems():
1352
sha1s[prefix + (suffix,)] = sha1
1355
def insert_record_stream(self, stream):
1356
"""Insert a record stream into this container.
1358
:param stream: A stream of records to insert.
1360
:seealso VersionedFile.get_record_stream:
1362
for record in stream:
1363
prefix = record.key[:-1]
1364
key = record.key[-1:]
1365
if record.parents is not None:
1366
parents = [parent[-1:] for parent in record.parents]
1369
thunk_record = AdapterFactory(key, parents, record)
1370
path = self._mapper.map(prefix)
1371
# Note that this parses the file many times; we can do better but
1372
# as this only impacts weaves in terms of performance, it is
1374
vf = self._get_vf(path)
1375
vf.insert_record_stream([thunk_record])
1377
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1378
"""Iterate over the lines in the versioned files from keys.
1380
This may return lines from other keys. Each item the returned
1381
iterator yields is a tuple of a line and a text version that that line
1382
is present in (not introduced in).
1384
Ordering of results is in whatever order is most suitable for the
1385
underlying storage format.
1387
If a progress bar is supplied, it may be used to indicate progress.
1388
The caller is responsible for cleaning up progress bars (because this
1392
* Lines are normalised by the underlying store: they will all have \n
1394
* Lines are returned in arbitrary order.
1396
:return: An iterator over (line, key).
1398
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1399
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1400
yield line, prefix + (version,)
1402
def _iter_all_components(self):
1403
for path, prefix in self._get_all_prefixes():
1404
yield prefix, self._get_vf(path)
1407
"""See VersionedFiles.keys()."""
1409
for prefix, vf in self._iter_all_components():
1410
for suffix in vf.versions():
1411
result.add(prefix + (suffix,))
1415
class _PlanMergeVersionedFile(VersionedFiles):
1416
"""A VersionedFile for uncommitted and committed texts.
1418
It is intended to allow merges to be planned with working tree texts.
1419
It implements only the small part of the VersionedFiles interface used by
1420
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1421
_PlanMergeVersionedFile itself.
1423
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1424
queried for missing texts.
1427
def __init__(self, file_id):
1428
"""Create a _PlanMergeVersionedFile.
1430
:param file_id: Used with _PlanMerge code which is not yet fully
1431
tuple-keyspace aware.
1433
self._file_id = file_id
1434
# fallback locations
1435
self.fallback_versionedfiles = []
1436
# Parents for locally held keys.
1438
# line data for locally held keys.
1440
# key lookup providers
1441
self._providers = [DictParentsProvider(self._parents)]
1443
def plan_merge(self, ver_a, ver_b, base=None):
1444
"""See VersionedFile.plan_merge"""
1445
from bzrlib.merge import _PlanMerge
1447
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1448
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1449
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1450
return _PlanMerge._subtract_plans(old_plan, new_plan)
1452
def plan_lca_merge(self, ver_a, ver_b, base=None):
1453
from bzrlib.merge import _PlanLCAMerge
1455
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1458
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1459
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1461
def add_lines(self, key, parents, lines):
1462
"""See VersionedFiles.add_lines
1464
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1465
are permitted. Only reserved ids are permitted.
1467
if type(key) is not tuple:
1468
raise TypeError(key)
1469
if not revision.is_reserved_id(key[-1]):
1470
raise ValueError('Only reserved ids may be used')
1472
raise ValueError('Parents may not be None')
1474
raise ValueError('Lines may not be None')
1475
self._parents[key] = tuple(parents)
1476
self._lines[key] = lines
1478
def get_record_stream(self, keys, ordering, include_delta_closure):
1481
if key in self._lines:
1482
lines = self._lines[key]
1483
parents = self._parents[key]
1485
yield ChunkedContentFactory(key, parents, None, lines)
1486
for versionedfile in self.fallback_versionedfiles:
1487
for record in versionedfile.get_record_stream(
1488
pending, 'unordered', True):
1489
if record.storage_kind == 'absent':
1492
pending.remove(record.key)
1496
# report absent entries
1498
yield AbsentContentFactory(key)
1500
def get_parent_map(self, keys):
1501
"""See VersionedFiles.get_parent_map"""
1502
# We create a new provider because a fallback may have been added.
1503
# If we make fallbacks private we can update a stack list and avoid
1504
# object creation thrashing.
1507
if revision.NULL_REVISION in keys:
1508
keys.remove(revision.NULL_REVISION)
1509
result[revision.NULL_REVISION] = ()
1510
self._providers = self._providers[:1] + self.fallback_versionedfiles
1512
StackedParentsProvider(self._providers).get_parent_map(keys))
1513
for key, parents in result.iteritems():
1515
result[key] = (revision.NULL_REVISION,)
1519
class PlanWeaveMerge(TextMerge):
1520
"""Weave merge that takes a plan as its input.
1522
This exists so that VersionedFile.plan_merge is implementable.
1523
Most callers will want to use WeaveMerge instead.
1526
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1527
b_marker=TextMerge.B_MARKER):
1528
TextMerge.__init__(self, a_marker, b_marker)
1529
self.plan = list(plan)
1531
def _merge_struct(self):
1536
def outstanding_struct():
1537
if not lines_a and not lines_b:
1539
elif ch_a and not ch_b:
1542
elif ch_b and not ch_a:
1544
elif lines_a == lines_b:
1547
yield (lines_a, lines_b)
1549
# We previously considered either 'unchanged' or 'killed-both' lines
1550
# to be possible places to resynchronize. However, assuming agreement
1551
# on killed-both lines may be too aggressive. -- mbp 20060324
1552
for state, line in self.plan:
1553
if state == 'unchanged':
1554
# resync and flush queued conflicts changes if any
1555
for struct in outstanding_struct():
1561
if state == 'unchanged':
1564
elif state == 'killed-a':
1566
lines_b.append(line)
1567
elif state == 'killed-b':
1569
lines_a.append(line)
1570
elif state == 'new-a':
1572
lines_a.append(line)
1573
elif state == 'new-b':
1575
lines_b.append(line)
1576
elif state == 'conflicted-a':
1578
lines_a.append(line)
1579
elif state == 'conflicted-b':
1581
lines_b.append(line)
1582
elif state == 'killed-both':
1583
# This counts as a change, even though there is no associated
1587
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1589
raise AssertionError(state)
1590
for struct in outstanding_struct():
1593
def base_from_plan(self):
1594
"""Construct a BASE file from the plan text."""
1596
for state, line in self.plan:
1597
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1598
# If unchanged, then this line is straight from base. If a or b
1599
# or both killed the line, then it *used* to be in base.
1600
base_lines.append(line)
1602
if state not in ('killed-base', 'irrelevant',
1603
'ghost-a', 'ghost-b',
1605
'conflicted-a', 'conflicted-b'):
1606
# killed-base, irrelevant means it doesn't apply
1607
# ghost-a/ghost-b are harder to say for sure, but they
1608
# aren't in the 'inc_c' which means they aren't in the
1609
# shared base of a & b. So we don't include them. And
1610
# obviously if the line is newly inserted, it isn't in base
1612
# If 'conflicted-a' or b, then it is new vs one base, but
1613
# old versus another base. However, if we make it present
1614
# in the base, it will be deleted from the target, and it
1615
# seems better to get a line doubled in the merge result,
1616
# rather than have it deleted entirely.
1617
# Example, each node is the 'text' at that point:
1625
# There was a criss-cross conflict merge. Both sides
1626
# include the other, but put themselves first.
1627
# Weave marks this as a 'clean' merge, picking OTHER over
1628
# THIS. (Though the details depend on order inserted into
1630
# LCA generates a plan:
1631
# [('unchanged', M),
1632
# ('conflicted-b', b),
1634
# ('conflicted-a', b),
1636
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1637
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1638
# removes one 'b', and BASE vs OTHER removes the other)
1639
# If you include neither, 3-way creates a clean "MbabN" as
1640
# THIS adds one 'b', and OTHER does too.
1641
# It seems that having the line 2 times is better than
1642
# having it omitted. (Easier to manually delete than notice
1643
# it needs to be added.)
1644
raise AssertionError('Unknown state: %s' % (state,))
1648
class WeaveMerge(PlanWeaveMerge):
1649
"""Weave merge that takes a VersionedFile and two versions as its input."""
1651
def __init__(self, versionedfile, ver_a, ver_b,
1652
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1653
plan = versionedfile.plan_merge(ver_a, ver_b)
1654
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1657
class VirtualVersionedFiles(VersionedFiles):
1658
"""Dummy implementation for VersionedFiles that uses other functions for
1659
obtaining fulltexts and parent maps.
1661
This is always on the bottom of the stack and uses string keys
1662
(rather than tuples) internally.
1665
def __init__(self, get_parent_map, get_lines):
1666
"""Create a VirtualVersionedFiles.
1668
:param get_parent_map: Same signature as Repository.get_parent_map.
1669
:param get_lines: Should return lines for specified key or None if
1672
super(VirtualVersionedFiles, self).__init__()
1673
self._get_parent_map = get_parent_map
1674
self._get_lines = get_lines
1676
def check(self, progressbar=None):
1677
"""See VersionedFiles.check.
1679
:note: Always returns True for VirtualVersionedFiles.
1683
def add_mpdiffs(self, records):
1684
"""See VersionedFiles.mpdiffs.
1686
:note: Not implemented for VirtualVersionedFiles.
1688
raise NotImplementedError(self.add_mpdiffs)
1690
def get_parent_map(self, keys):
1691
"""See VersionedFiles.get_parent_map."""
1692
return dict([((k,), tuple([(p,) for p in v]))
1693
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1695
def get_sha1s(self, keys):
1696
"""See VersionedFiles.get_sha1s."""
1699
lines = self._get_lines(k)
1700
if lines is not None:
1701
if not isinstance(lines, list):
1702
raise AssertionError
1703
ret[(k,)] = osutils.sha_strings(lines)
1706
def get_record_stream(self, keys, ordering, include_delta_closure):
1707
"""See VersionedFiles.get_record_stream."""
1708
for (k,) in list(keys):
1709
lines = self._get_lines(k)
1710
if lines is not None:
1711
if not isinstance(lines, list):
1712
raise AssertionError
1713
yield ChunkedContentFactory((k,), None,
1714
sha1=osutils.sha_strings(lines),
1717
yield AbsentContentFactory((k,))
1719
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1720
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1721
for i, (key,) in enumerate(keys):
1723
pb.update("Finding changed lines", i, len(keys))
1724
for l in self._get_lines(key):
1728
class NoDupeAddLinesDecorator(object):
1729
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1733
def __init__(self, store):
1736
def add_lines(self, key, parents, lines, parent_texts=None,
1737
left_matching_blocks=None, nostore_sha=None, random_id=False,
1738
check_content=True):
1739
"""See VersionedFiles.add_lines.
1741
This implementation may return None as the third element of the return
1742
value when the original store wouldn't.
1745
raise NotImplementedError(
1746
"NoDupeAddLinesDecorator.add_lines does not implement the "
1747
"nostore_sha behaviour.")
1749
sha1 = osutils.sha_strings(lines)
1750
key = ("sha1:" + sha1,)
1753
if key in self._store.get_parent_map([key]):
1754
# This key has already been inserted, so don't do it again.
1756
sha1 = osutils.sha_strings(lines)
1757
return sha1, sum(map(len, lines)), None
1758
return self._store.add_lines(key, parents, lines,
1759
parent_texts=parent_texts,
1760
left_matching_blocks=left_matching_blocks,
1761
nostore_sha=nostore_sha, random_id=random_id,
1762
check_content=check_content)
1764
def __getattr__(self, name):
1765
return getattr(self._store, name)
1768
def network_bytes_to_kind_and_offset(network_bytes):
1769
"""Strip of a record kind from the front of network_bytes.
1771
:param network_bytes: The bytes of a record.
1772
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1774
line_end = network_bytes.find('\n')
1775
storage_kind = network_bytes[:line_end]
1776
return storage_kind, line_end + 1
1779
class NetworkRecordStream(object):
1780
"""A record_stream which reconstitures a serialised stream."""
1782
def __init__(self, bytes_iterator):
1783
"""Create a NetworkRecordStream.
1785
:param bytes_iterator: An iterator of bytes. Each item in this
1786
iterator should have been obtained from a record_streams'
1787
record.get_bytes_as(record.storage_kind) call.
1789
self._bytes_iterator = bytes_iterator
1790
self._kind_factory = {
1791
'fulltext': fulltext_network_to_record,
1792
'groupcompress-block': groupcompress.network_block_to_records,
1793
'knit-ft-gz': knit.knit_network_to_record,
1794
'knit-delta-gz': knit.knit_network_to_record,
1795
'knit-annotated-ft-gz': knit.knit_network_to_record,
1796
'knit-annotated-delta-gz': knit.knit_network_to_record,
1797
'knit-delta-closure': knit.knit_delta_closure_to_records,
1803
:return: An iterator as per VersionedFiles.get_record_stream().
1805
for bytes in self._bytes_iterator:
1806
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1807
for record in self._kind_factory[storage_kind](
1808
storage_kind, bytes, line_end):
1812
def fulltext_network_to_record(kind, bytes, line_end):
1813
"""Convert a network fulltext record to record."""
1814
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1815
record_meta = bytes[line_end+4:line_end+4+meta_len]
1816
key, parents = bencode.bdecode_as_tuple(record_meta)
1817
if parents == 'nil':
1819
fulltext = bytes[line_end+4+meta_len:]
1820
return [FulltextContentFactory(key, parents, None, fulltext)]
1823
def _length_prefix(bytes):
1824
return struct.pack('!L', len(bytes))
1827
def record_to_fulltext_bytes(record):
1828
if record.parents is None:
1831
parents = record.parents
1832
record_meta = bencode.bencode((record.key, parents))
1833
record_content = record.get_bytes_as('fulltext')
1834
return "fulltext\n%s%s%s" % (
1835
_length_prefix(record_meta), record_meta, record_content)
1838
def sort_groupcompress(parent_map):
1839
"""Sort and group the keys in parent_map into groupcompress order.
1841
groupcompress is defined (currently) as reverse-topological order, grouped
1844
:return: A sorted-list of keys
1846
# gc-optimal ordering is approximately reverse topological,
1847
# properly grouped by file-id.
1849
for item in parent_map.iteritems():
1851
if isinstance(key, str) or len(key) == 1:
1856
per_prefix_map[prefix].append(item)
1858
per_prefix_map[prefix] = [item]
1861
for prefix in sorted(per_prefix_map):
1862
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))