1
# Copyright (C) 2006-2010 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
46
from bzrlib.transport.memory import MemoryTransport
48
from bzrlib.registry import Registry
49
from bzrlib.textmerge import TextMerge
50
from bzrlib import bencode
53
adapter_registry = Registry()
54
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
55
'DeltaPlainToFullText')
56
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
58
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
59
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
60
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
61
'bzrlib.knit', 'DeltaAnnotatedToFullText')
62
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
63
'bzrlib.knit', 'FTAnnotatedToUnannotated')
64
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
65
'bzrlib.knit', 'FTAnnotatedToFullText')
66
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
67
# 'bzrlib.knit', 'FTAnnotatedToChunked')
70
class ContentFactory(object):
71
"""Abstract interface for insertion and retrieval from a VersionedFile.
73
:ivar sha1: None, or the sha1 of the content fulltext.
74
:ivar storage_kind: The native storage kind of this factory. One of
75
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
76
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
77
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
78
:ivar key: The key of this content. Each key is a tuple with a single
80
:ivar parents: A tuple of parent keys for self.key. If the object has
81
no parent information, None (as opposed to () for an empty list of
86
"""Create a ContentFactory."""
88
self.storage_kind = None
93
class ChunkedContentFactory(ContentFactory):
94
"""Static data content factory.
96
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
97
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
98
satisfies this, as does a list of lines.
100
:ivar sha1: None, or the sha1 of the content fulltext.
101
:ivar storage_kind: The native storage kind of this factory. Always
103
:ivar key: The key of this content. Each key is a tuple with a single
105
:ivar parents: A tuple of parent keys for self.key. If the object has
106
no parent information, None (as opposed to () for an empty list of
110
def __init__(self, key, parents, sha1, chunks):
111
"""Create a ContentFactory."""
113
self.storage_kind = 'chunked'
115
self.parents = parents
116
self._chunks = chunks
118
def get_bytes_as(self, storage_kind):
119
if storage_kind == 'chunked':
121
elif storage_kind == 'fulltext':
122
return ''.join(self._chunks)
123
raise errors.UnavailableRepresentation(self.key, storage_kind,
127
class FulltextContentFactory(ContentFactory):
128
"""Static data content factory.
130
This takes a fulltext when created and just returns that during
131
get_bytes_as('fulltext').
133
:ivar sha1: None, or the sha1 of the content fulltext.
134
:ivar storage_kind: The native storage kind of this factory. Always
136
:ivar key: The key of this content. Each key is a tuple with a single
138
:ivar parents: A tuple of parent keys for self.key. If the object has
139
no parent information, None (as opposed to () for an empty list of
143
def __init__(self, key, parents, sha1, text):
144
"""Create a ContentFactory."""
146
self.storage_kind = 'fulltext'
148
self.parents = parents
151
def get_bytes_as(self, storage_kind):
152
if storage_kind == self.storage_kind:
154
elif storage_kind == 'chunked':
156
raise errors.UnavailableRepresentation(self.key, storage_kind,
160
class AbsentContentFactory(ContentFactory):
161
"""A placeholder content factory for unavailable texts.
164
:ivar storage_kind: 'absent'.
165
:ivar key: The key of this content. Each key is a tuple with a single
170
def __init__(self, key):
171
"""Create a ContentFactory."""
173
self.storage_kind = 'absent'
177
def get_bytes_as(self, storage_kind):
178
raise ValueError('A request was made for key: %s, but that'
179
' content is not available, and the calling'
180
' code does not handle if it is missing.'
184
class AdapterFactory(ContentFactory):
185
"""A content factory to adapt between key prefix's."""
187
def __init__(self, key, parents, adapted):
188
"""Create an adapter factory instance."""
190
self.parents = parents
191
self._adapted = adapted
193
def __getattr__(self, attr):
194
"""Return a member from the adapted object."""
195
if attr in ('key', 'parents'):
196
return self.__dict__[attr]
198
return getattr(self._adapted, attr)
201
def filter_absent(record_stream):
202
"""Adapt a record stream to remove absent records."""
203
for record in record_stream:
204
if record.storage_kind != 'absent':
208
class _MPDiffGenerator(object):
209
"""Pull out the functionality for generating mp_diffs."""
211
def __init__(self, vf, keys):
213
# This is the order the keys were requested in
214
self.ordered_keys = tuple(keys)
215
# keys + their parents, what we need to compute the diffs
216
self.needed_keys = ()
217
# Map from key: mp_diff
219
# Map from key: parents_needed (may have ghosts)
221
# Parents that aren't present
222
self.ghost_parents = ()
223
# Map from parent_key => number of children for this text
225
# Content chunks that are cached while we still need them
228
def _find_needed_keys(self):
229
"""Find the set of keys we need to request.
231
This includes all the original keys passed in, and the non-ghost
232
parents of those keys.
234
:return: (needed_keys, refcounts)
235
needed_keys is the set of all texts we need to extract
236
refcounts is a dict of {key: num_children} letting us know when we
237
no longer need to cache a given parent text
239
# All the keys and their parents
240
needed_keys = set(self.ordered_keys)
241
parent_map = self.vf.get_parent_map(needed_keys)
242
self.parent_map = parent_map
243
# TODO: Should we be using a different construct here? I think this
244
# uses difference_update internally, and we expect the result to
246
missing_keys = needed_keys.difference(parent_map)
248
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
249
# Parents that might be missing. They are allowed to be ghosts, but we
250
# should check for them
252
setdefault = refcounts.setdefault
254
for child_key, parent_keys in parent_map.iteritems():
256
# parent_keys may be None if a given VersionedFile claims to
257
# not support graph operations.
259
just_parents.update(parent_keys)
260
needed_keys.update(parent_keys)
261
for p in parent_keys:
262
refcounts[p] = setdefault(p, 0) + 1
263
just_parents.difference_update(parent_map)
264
# Remove any parents that are actually ghosts from the needed set
265
self.present_parents = set(self.vf.get_parent_map(just_parents))
266
self.ghost_parents = just_parents.difference(self.present_parents)
267
needed_keys.difference_update(self.ghost_parents)
268
self.needed_keys = needed_keys
269
self.refcounts = refcounts
270
return needed_keys, refcounts
272
def _compute_diff(self, key, parent_lines, lines):
273
"""Compute a single mp_diff, and store it in self._diffs"""
274
if len(parent_lines) > 0:
275
# XXX: _extract_blocks is not usefully defined anywhere...
276
# It was meant to extract the left-parent diff without
277
# having to recompute it for Knit content (pack-0.92,
278
# etc). That seems to have regressed somewhere
279
left_parent_blocks = self.vf._extract_blocks(key,
280
parent_lines[0], lines)
282
left_parent_blocks = None
283
diff = multiparent.MultiParent.from_lines(lines,
284
parent_lines, left_parent_blocks)
285
self.diffs[key] = diff
287
def _process_one_record(self, key, this_chunks):
289
if key in self.parent_map:
290
# This record should be ready to diff, since we requested
291
# content in 'topological' order
292
parent_keys = self.parent_map.pop(key)
293
# If a VersionedFile claims 'no-graph' support, then it may return
294
# None for any parent request, so we replace it with an empty tuple
295
if parent_keys is None:
298
for p in parent_keys:
299
# Alternatively we could check p not in self.needed_keys, but
300
# ghost_parents should be tiny versus huge
301
if p in self.ghost_parents:
303
refcount = self.refcounts[p]
304
if refcount == 1: # Last child reference
305
self.refcounts.pop(p)
306
parent_chunks = self.chunks.pop(p)
308
self.refcounts[p] = refcount - 1
309
parent_chunks = self.chunks[p]
310
p_lines = osutils.chunks_to_lines(parent_chunks)
311
# TODO: Should we cache the line form? We did the
312
# computation to get it, but storing it this way will
313
# be less memory efficient...
314
parent_lines.append(p_lines)
316
lines = osutils.chunks_to_lines(this_chunks)
317
# Since we needed the lines, we'll go ahead and cache them this way
319
self._compute_diff(key, parent_lines, lines)
321
# Is this content required for any more children?
322
if key in self.refcounts:
323
self.chunks[key] = this_chunks
325
def _extract_diffs(self):
326
needed_keys, refcounts = self._find_needed_keys()
327
for record in self.vf.get_record_stream(needed_keys,
328
'topological', True):
329
if record.storage_kind == 'absent':
330
raise errors.RevisionNotPresent(record.key, self.vf)
331
self._process_one_record(record.key,
332
record.get_bytes_as('chunked'))
334
def compute_diffs(self):
335
self._extract_diffs()
336
dpop = self.diffs.pop
337
return [dpop(k) for k in self.ordered_keys]
340
class VersionedFile(object):
341
"""Versioned text file storage.
343
A versioned file manages versions of line-based text files,
344
keeping track of the originating version for each line.
346
To clients the "lines" of the file are represented as a list of
347
strings. These strings will typically have terminal newline
348
characters, but this is not required. In particular files commonly
349
do not have a newline at the end of the file.
351
Texts are identified by a version-id string.
355
def check_not_reserved_id(version_id):
356
revision.check_not_reserved_id(version_id)
358
def copy_to(self, name, transport):
359
"""Copy this versioned file to name on transport."""
360
raise NotImplementedError(self.copy_to)
362
def get_record_stream(self, versions, ordering, include_delta_closure):
363
"""Get a stream of records for versions.
365
:param versions: The versions to include. Each version is a tuple
367
:param ordering: Either 'unordered' or 'topological'. A topologically
368
sorted stream has compression parents strictly before their
370
:param include_delta_closure: If True then the closure across any
371
compression parents will be included (in the data content of the
372
stream, not in the emitted records). This guarantees that
373
'fulltext' can be used successfully on every record.
374
:return: An iterator of ContentFactory objects, each of which is only
375
valid until the iterator is advanced.
377
raise NotImplementedError(self.get_record_stream)
379
def has_version(self, version_id):
380
"""Returns whether version is present."""
381
raise NotImplementedError(self.has_version)
383
def insert_record_stream(self, stream):
384
"""Insert a record stream into this versioned file.
386
:param stream: A stream of records to insert.
388
:seealso VersionedFile.get_record_stream:
390
raise NotImplementedError
392
def add_lines(self, version_id, parents, lines, parent_texts=None,
393
left_matching_blocks=None, nostore_sha=None, random_id=False,
395
"""Add a single text on top of the versioned file.
397
Must raise RevisionAlreadyPresent if the new version is
398
already present in file history.
400
Must raise RevisionNotPresent if any of the given parents are
401
not present in file history.
403
:param lines: A list of lines. Each line must be a bytestring. And all
404
of them except the last must be terminated with \n and contain no
405
other \n's. The last line may either contain no \n's or a single
406
terminated \n. If the lines list does meet this constraint the add
407
routine may error or may succeed - but you will be unable to read
408
the data back accurately. (Checking the lines have been split
409
correctly is expensive and extremely unlikely to catch bugs so it
410
is not done at runtime unless check_content is True.)
411
:param parent_texts: An optional dictionary containing the opaque
412
representations of some or all of the parents of version_id to
413
allow delta optimisations. VERY IMPORTANT: the texts must be those
414
returned by add_lines or data corruption can be caused.
415
:param left_matching_blocks: a hint about which areas are common
416
between the text and its left-hand-parent. The format is
417
the SequenceMatcher.get_matching_blocks format.
418
:param nostore_sha: Raise ExistingContent and do not add the lines to
419
the versioned file if the digest of the lines matches this.
420
:param random_id: If True a random id has been selected rather than
421
an id determined by some deterministic process such as a converter
422
from a foreign VCS. When True the backend may choose not to check
423
for uniqueness of the resulting key within the versioned file, so
424
this should only be done when the result is expected to be unique
426
:param check_content: If True, the lines supplied are verified to be
427
bytestrings that are correctly formed lines.
428
:return: The text sha1, the number of bytes in the text, and an opaque
429
representation of the inserted version which can be provided
430
back to future add_lines calls in the parent_texts dictionary.
432
self._check_write_ok()
433
return self._add_lines(version_id, parents, lines, parent_texts,
434
left_matching_blocks, nostore_sha, random_id, check_content)
436
def _add_lines(self, version_id, parents, lines, parent_texts,
437
left_matching_blocks, nostore_sha, random_id, check_content):
438
"""Helper to do the class specific add_lines."""
439
raise NotImplementedError(self.add_lines)
441
def add_lines_with_ghosts(self, version_id, parents, lines,
442
parent_texts=None, nostore_sha=None, random_id=False,
443
check_content=True, left_matching_blocks=None):
444
"""Add lines to the versioned file, allowing ghosts to be present.
446
This takes the same parameters as add_lines and returns the same.
448
self._check_write_ok()
449
return self._add_lines_with_ghosts(version_id, parents, lines,
450
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
452
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
453
nostore_sha, random_id, check_content, left_matching_blocks):
454
"""Helper to do class specific add_lines_with_ghosts."""
455
raise NotImplementedError(self.add_lines_with_ghosts)
457
def check(self, progress_bar=None):
458
"""Check the versioned file for integrity."""
459
raise NotImplementedError(self.check)
461
def _check_lines_not_unicode(self, lines):
462
"""Check that lines being added to a versioned file are not unicode."""
464
if line.__class__ is not str:
465
raise errors.BzrBadParameterUnicode("lines")
467
def _check_lines_are_lines(self, lines):
468
"""Check that the lines really are full lines without inline EOL."""
470
if '\n' in line[:-1]:
471
raise errors.BzrBadParameterContainsNewline("lines")
473
def get_format_signature(self):
474
"""Get a text description of the data encoding in this file.
478
raise NotImplementedError(self.get_format_signature)
480
def make_mpdiffs(self, version_ids):
481
"""Create multiparent diffs for specified versions."""
482
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
483
# is a list of strings, not keys. And while self.get_record_stream
484
# is supported, it takes *keys*, while self.get_parent_map() takes
486
knit_versions = set()
487
knit_versions.update(version_ids)
488
parent_map = self.get_parent_map(version_ids)
489
for version_id in version_ids:
491
knit_versions.update(parent_map[version_id])
493
raise errors.RevisionNotPresent(version_id, self)
494
# We need to filter out ghosts, because we can't diff against them.
495
knit_versions = set(self.get_parent_map(knit_versions).keys())
496
lines = dict(zip(knit_versions,
497
self._get_lf_split_line_list(knit_versions)))
499
for version_id in version_ids:
500
target = lines[version_id]
502
parents = [lines[p] for p in parent_map[version_id] if p in
505
# I don't know how this could ever trigger.
506
# parent_map[version_id] was already triggered in the previous
507
# for loop, and lines[p] has the 'if p in knit_versions' check,
508
# so we again won't have a KeyError.
509
raise errors.RevisionNotPresent(version_id, self)
511
left_parent_blocks = self._extract_blocks(version_id,
514
left_parent_blocks = None
515
diffs.append(multiparent.MultiParent.from_lines(target, parents,
519
def _extract_blocks(self, version_id, source, target):
522
def add_mpdiffs(self, records):
523
"""Add mpdiffs to this VersionedFile.
525
Records should be iterables of version, parents, expected_sha1,
526
mpdiff. mpdiff should be a MultiParent instance.
528
# Does this need to call self._check_write_ok()? (IanC 20070919)
530
mpvf = multiparent.MultiMemoryVersionedFile()
532
for version, parent_ids, expected_sha1, mpdiff in records:
533
versions.append(version)
534
mpvf.add_diff(mpdiff, version, parent_ids)
535
needed_parents = set()
536
for version, parent_ids, expected_sha1, mpdiff in records:
537
needed_parents.update(p for p in parent_ids
538
if not mpvf.has_version(p))
539
present_parents = set(self.get_parent_map(needed_parents).keys())
540
for parent_id, lines in zip(present_parents,
541
self._get_lf_split_line_list(present_parents)):
542
mpvf.add_version(lines, parent_id, [])
543
for (version, parent_ids, expected_sha1, mpdiff), lines in\
544
zip(records, mpvf.get_line_list(versions)):
545
if len(parent_ids) == 1:
546
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
547
mpvf.get_diff(parent_ids[0]).num_lines()))
549
left_matching_blocks = None
551
_, _, version_text = self.add_lines_with_ghosts(version,
552
parent_ids, lines, vf_parents,
553
left_matching_blocks=left_matching_blocks)
554
except NotImplementedError:
555
# The vf can't handle ghosts, so add lines normally, which will
556
# (reasonably) fail if there are ghosts in the data.
557
_, _, version_text = self.add_lines(version,
558
parent_ids, lines, vf_parents,
559
left_matching_blocks=left_matching_blocks)
560
vf_parents[version] = version_text
561
sha1s = self.get_sha1s(versions)
562
for version, parent_ids, expected_sha1, mpdiff in records:
563
if expected_sha1 != sha1s[version]:
564
raise errors.VersionedFileInvalidChecksum(version)
566
def get_text(self, version_id):
567
"""Return version contents as a text string.
569
Raises RevisionNotPresent if version is not present in
572
return ''.join(self.get_lines(version_id))
573
get_string = get_text
575
def get_texts(self, version_ids):
576
"""Return the texts of listed versions as a list of strings.
578
Raises RevisionNotPresent if version is not present in
581
return [''.join(self.get_lines(v)) for v in version_ids]
583
def get_lines(self, version_id):
584
"""Return version contents as a sequence of lines.
586
Raises RevisionNotPresent if version is not present in
589
raise NotImplementedError(self.get_lines)
591
def _get_lf_split_line_list(self, version_ids):
592
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
594
def get_ancestry(self, version_ids, topo_sorted=True):
595
"""Return a list of all ancestors of given version(s). This
596
will not include the null revision.
598
This list will not be topologically sorted if topo_sorted=False is
601
Must raise RevisionNotPresent if any of the given versions are
602
not present in file history."""
603
if isinstance(version_ids, basestring):
604
version_ids = [version_ids]
605
raise NotImplementedError(self.get_ancestry)
607
def get_ancestry_with_ghosts(self, version_ids):
608
"""Return a list of all ancestors of given version(s). This
609
will not include the null revision.
611
Must raise RevisionNotPresent if any of the given versions are
612
not present in file history.
614
Ghosts that are known about will be included in ancestry list,
615
but are not explicitly marked.
617
raise NotImplementedError(self.get_ancestry_with_ghosts)
619
def get_parent_map(self, version_ids):
620
"""Get a map of the parents of version_ids.
622
:param version_ids: The version ids to look up parents for.
623
:return: A mapping from version id to parents.
625
raise NotImplementedError(self.get_parent_map)
627
def get_parents_with_ghosts(self, version_id):
628
"""Return version names for parents of version_id.
630
Will raise RevisionNotPresent if version_id is not present
633
Ghosts that are known about will be included in the parent list,
634
but are not explicitly marked.
637
return list(self.get_parent_map([version_id])[version_id])
639
raise errors.RevisionNotPresent(version_id, self)
641
def annotate(self, version_id):
642
"""Return a list of (version-id, line) tuples for version_id.
644
:raise RevisionNotPresent: If the given version is
645
not present in file history.
647
raise NotImplementedError(self.annotate)
649
def iter_lines_added_or_present_in_versions(self, version_ids=None,
651
"""Iterate over the lines in the versioned file from version_ids.
653
This may return lines from other versions. Each item the returned
654
iterator yields is a tuple of a line and a text version that that line
655
is present in (not introduced in).
657
Ordering of results is in whatever order is most suitable for the
658
underlying storage format.
660
If a progress bar is supplied, it may be used to indicate progress.
661
The caller is responsible for cleaning up progress bars (because this
664
NOTES: Lines are normalised: they will all have \n terminators.
665
Lines are returned in arbitrary order.
667
:return: An iterator over (line, version_id).
669
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
671
def plan_merge(self, ver_a, ver_b):
672
"""Return pseudo-annotation indicating how the two versions merge.
674
This is computed between versions a and b and their common
677
Weave lines present in none of them are skipped entirely.
680
killed-base Dead in base revision
681
killed-both Killed in each revision
684
unchanged Alive in both a and b (possibly created in both)
687
ghost-a Killed in a, unborn in b
688
ghost-b Killed in b, unborn in a
689
irrelevant Not in either revision
691
raise NotImplementedError(VersionedFile.plan_merge)
693
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
694
b_marker=TextMerge.B_MARKER):
695
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
698
class RecordingVersionedFilesDecorator(object):
699
"""A minimal versioned files that records calls made on it.
701
Only enough methods have been added to support tests using it to date.
703
:ivar calls: A list of the calls made; can be reset at any time by
707
def __init__(self, backing_vf):
708
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
710
:param backing_vf: The versioned file to answer all methods.
712
self._backing_vf = backing_vf
715
def add_lines(self, key, parents, lines, parent_texts=None,
716
left_matching_blocks=None, nostore_sha=None, random_id=False,
718
self.calls.append(("add_lines", key, parents, lines, parent_texts,
719
left_matching_blocks, nostore_sha, random_id, check_content))
720
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
721
left_matching_blocks, nostore_sha, random_id, check_content)
724
self._backing_vf.check()
726
def get_parent_map(self, keys):
727
self.calls.append(("get_parent_map", copy(keys)))
728
return self._backing_vf.get_parent_map(keys)
730
def get_record_stream(self, keys, sort_order, include_delta_closure):
731
self.calls.append(("get_record_stream", list(keys), sort_order,
732
include_delta_closure))
733
return self._backing_vf.get_record_stream(keys, sort_order,
734
include_delta_closure)
736
def get_sha1s(self, keys):
737
self.calls.append(("get_sha1s", copy(keys)))
738
return self._backing_vf.get_sha1s(keys)
740
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
741
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
742
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
745
self.calls.append(("keys",))
746
return self._backing_vf.keys()
749
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
750
"""A VF that records calls, and returns keys in specific order.
752
:ivar calls: A list of the calls made; can be reset at any time by
756
def __init__(self, backing_vf, key_priority):
757
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
759
:param backing_vf: The versioned file to answer all methods.
760
:param key_priority: A dictionary defining what order keys should be
761
returned from an 'unordered' get_record_stream request.
762
Keys with lower priority are returned first, keys not present in
763
the map get an implicit priority of 0, and are returned in
764
lexicographical order.
766
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
767
self._key_priority = key_priority
769
def get_record_stream(self, keys, sort_order, include_delta_closure):
770
self.calls.append(("get_record_stream", list(keys), sort_order,
771
include_delta_closure))
772
if sort_order == 'unordered':
774
return (self._key_priority.get(key, 0), key)
775
# Use a defined order by asking for the keys one-by-one from the
777
for key in sorted(keys, key=sort_key):
778
for record in self._backing_vf.get_record_stream([key],
779
'unordered', include_delta_closure):
782
for record in self._backing_vf.get_record_stream(keys, sort_order,
783
include_delta_closure):
787
class KeyMapper(object):
788
"""KeyMappers map between keys and underlying partitioned storage."""
791
"""Map key to an underlying storage identifier.
793
:param key: A key tuple e.g. ('file-id', 'revision-id').
794
:return: An underlying storage identifier, specific to the partitioning
797
raise NotImplementedError(self.map)
799
def unmap(self, partition_id):
800
"""Map a partitioned storage id back to a key prefix.
802
:param partition_id: The underlying partition id.
803
:return: As much of a key (or prefix) as is derivable from the partition
806
raise NotImplementedError(self.unmap)
809
class ConstantMapper(KeyMapper):
810
"""A key mapper that maps to a constant result."""
812
def __init__(self, result):
813
"""Create a ConstantMapper which will return result for all maps."""
814
self._result = result
817
"""See KeyMapper.map()."""
821
class URLEscapeMapper(KeyMapper):
822
"""Base class for use with transport backed storage.
824
This provides a map and unmap wrapper that respectively url escape and
825
unescape their outputs and inputs.
829
"""See KeyMapper.map()."""
830
return urllib.quote(self._map(key))
832
def unmap(self, partition_id):
833
"""See KeyMapper.unmap()."""
834
return self._unmap(urllib.unquote(partition_id))
837
class PrefixMapper(URLEscapeMapper):
838
"""A key mapper that extracts the first component of a key.
840
This mapper is for use with a transport based backend.
844
"""See KeyMapper.map()."""
847
def _unmap(self, partition_id):
848
"""See KeyMapper.unmap()."""
849
return (partition_id,)
852
class HashPrefixMapper(URLEscapeMapper):
853
"""A key mapper that combines the first component of a key with a hash.
855
This mapper is for use with a transport based backend.
859
"""See KeyMapper.map()."""
860
prefix = self._escape(key[0])
861
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
863
def _escape(self, prefix):
864
"""No escaping needed here."""
867
def _unmap(self, partition_id):
868
"""See KeyMapper.unmap()."""
869
return (self._unescape(osutils.basename(partition_id)),)
871
def _unescape(self, basename):
872
"""No unescaping needed for HashPrefixMapper."""
876
class HashEscapedPrefixMapper(HashPrefixMapper):
877
"""Combines the escaped first component of a key with a hash.
879
This mapper is for use with a transport based backend.
882
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
884
def _escape(self, prefix):
885
"""Turn a key element into a filesystem safe string.
887
This is similar to a plain urllib.quote, except
888
it uses specific safe characters, so that it doesn't
889
have to translate a lot of valid file ids.
891
# @ does not get escaped. This is because it is a valid
892
# filesystem character we use all the time, and it looks
893
# a lot better than seeing %40 all the time.
894
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
898
def _unescape(self, basename):
899
"""Escaped names are easily unescaped by urlutils."""
900
return urllib.unquote(basename)
903
def make_versioned_files_factory(versioned_file_factory, mapper):
904
"""Create a ThunkedVersionedFiles factory.
906
This will create a callable which when called creates a
907
ThunkedVersionedFiles on a transport, using mapper to access individual
908
versioned files, and versioned_file_factory to create each individual file.
910
def factory(transport):
911
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
916
class VersionedFiles(object):
917
"""Storage for many versioned files.
919
This object allows a single keyspace for accessing the history graph and
920
contents of named bytestrings.
922
Currently no implementation allows the graph of different key prefixes to
923
intersect, but the API does allow such implementations in the future.
925
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
926
may have a different length key-size, but that size will be constant for
927
all texts added to or retrieved from it. For instance, bzrlib uses
928
instances with a key-size of 2 for storing user files in a repository, with
929
the first element the fileid, and the second the version of that file.
931
The use of tuples allows a single code base to support several different
932
uses with only the mapping logic changing from instance to instance.
935
def add_lines(self, key, parents, lines, parent_texts=None,
936
left_matching_blocks=None, nostore_sha=None, random_id=False,
938
"""Add a text to the store.
940
:param key: The key tuple of the text to add. If the last element is
941
None, a CHK string will be generated during the addition.
942
:param parents: The parents key tuples of the text to add.
943
:param lines: A list of lines. Each line must be a bytestring. And all
944
of them except the last must be terminated with \n and contain no
945
other \n's. The last line may either contain no \n's or a single
946
terminating \n. If the lines list does meet this constraint the add
947
routine may error or may succeed - but you will be unable to read
948
the data back accurately. (Checking the lines have been split
949
correctly is expensive and extremely unlikely to catch bugs so it
950
is not done at runtime unless check_content is True.)
951
:param parent_texts: An optional dictionary containing the opaque
952
representations of some or all of the parents of version_id to
953
allow delta optimisations. VERY IMPORTANT: the texts must be those
954
returned by add_lines or data corruption can be caused.
955
:param left_matching_blocks: a hint about which areas are common
956
between the text and its left-hand-parent. The format is
957
the SequenceMatcher.get_matching_blocks format.
958
:param nostore_sha: Raise ExistingContent and do not add the lines to
959
the versioned file if the digest of the lines matches this.
960
:param random_id: If True a random id has been selected rather than
961
an id determined by some deterministic process such as a converter
962
from a foreign VCS. When True the backend may choose not to check
963
for uniqueness of the resulting key within the versioned file, so
964
this should only be done when the result is expected to be unique
966
:param check_content: If True, the lines supplied are verified to be
967
bytestrings that are correctly formed lines.
968
:return: The text sha1, the number of bytes in the text, and an opaque
969
representation of the inserted version which can be provided
970
back to future add_lines calls in the parent_texts dictionary.
972
raise NotImplementedError(self.add_lines)
974
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
975
"""Add a text to the store.
977
This is a private function for use by CommitBuilder.
979
:param key: The key tuple of the text to add. If the last element is
980
None, a CHK string will be generated during the addition.
981
:param parents: The parents key tuples of the text to add.
982
:param text: A string containing the text to be committed.
983
:param nostore_sha: Raise ExistingContent and do not add the lines to
984
the versioned file if the digest of the lines matches this.
985
:param random_id: If True a random id has been selected rather than
986
an id determined by some deterministic process such as a converter
987
from a foreign VCS. When True the backend may choose not to check
988
for uniqueness of the resulting key within the versioned file, so
989
this should only be done when the result is expected to be unique
991
:param check_content: If True, the lines supplied are verified to be
992
bytestrings that are correctly formed lines.
993
:return: The text sha1, the number of bytes in the text, and an opaque
994
representation of the inserted version which can be provided
995
back to future _add_text calls in the parent_texts dictionary.
997
# The default implementation just thunks over to .add_lines(),
998
# inefficient, but it works.
999
return self.add_lines(key, parents, osutils.split_lines(text),
1000
nostore_sha=nostore_sha,
1001
random_id=random_id,
1004
def add_mpdiffs(self, records):
1005
"""Add mpdiffs to this VersionedFile.
1007
Records should be iterables of version, parents, expected_sha1,
1008
mpdiff. mpdiff should be a MultiParent instance.
1011
mpvf = multiparent.MultiMemoryVersionedFile()
1013
for version, parent_ids, expected_sha1, mpdiff in records:
1014
versions.append(version)
1015
mpvf.add_diff(mpdiff, version, parent_ids)
1016
needed_parents = set()
1017
for version, parent_ids, expected_sha1, mpdiff in records:
1018
needed_parents.update(p for p in parent_ids
1019
if not mpvf.has_version(p))
1020
# It seems likely that adding all the present parents as fulltexts can
1021
# easily exhaust memory.
1022
chunks_to_lines = osutils.chunks_to_lines
1023
for record in self.get_record_stream(needed_parents, 'unordered',
1025
if record.storage_kind == 'absent':
1027
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1029
for (key, parent_keys, expected_sha1, mpdiff), lines in\
1030
zip(records, mpvf.get_line_list(versions)):
1031
if len(parent_keys) == 1:
1032
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1033
mpvf.get_diff(parent_keys[0]).num_lines()))
1035
left_matching_blocks = None
1036
version_sha1, _, version_text = self.add_lines(key,
1037
parent_keys, lines, vf_parents,
1038
left_matching_blocks=left_matching_blocks)
1039
if version_sha1 != expected_sha1:
1040
raise errors.VersionedFileInvalidChecksum(version)
1041
vf_parents[key] = version_text
1043
def annotate(self, key):
1044
"""Return a list of (version-key, line) tuples for the text of key.
1046
:raise RevisionNotPresent: If the key is not present.
1048
raise NotImplementedError(self.annotate)
1050
def check(self, progress_bar=None):
1051
"""Check this object for integrity.
1053
:param progress_bar: A progress bar to output as the check progresses.
1054
:param keys: Specific keys within the VersionedFiles to check. When
1055
this parameter is not None, check() becomes a generator as per
1056
get_record_stream. The difference to get_record_stream is that
1057
more or deeper checks will be performed.
1058
:return: None, or if keys was supplied a generator as per
1061
raise NotImplementedError(self.check)
1064
def check_not_reserved_id(version_id):
1065
revision.check_not_reserved_id(version_id)
1067
def clear_cache(self):
1068
"""Clear whatever caches this VersionedFile holds.
1070
This is generally called after an operation has been performed, when we
1071
don't expect to be using this versioned file again soon.
1074
def _check_lines_not_unicode(self, lines):
1075
"""Check that lines being added to a versioned file are not unicode."""
1077
if line.__class__ is not str:
1078
raise errors.BzrBadParameterUnicode("lines")
1080
def _check_lines_are_lines(self, lines):
1081
"""Check that the lines really are full lines without inline EOL."""
1083
if '\n' in line[:-1]:
1084
raise errors.BzrBadParameterContainsNewline("lines")
1086
def get_known_graph_ancestry(self, keys):
1087
"""Get a KnownGraph instance with the ancestry of keys."""
1088
# most basic implementation is a loop around get_parent_map
1092
this_parent_map = self.get_parent_map(pending)
1093
parent_map.update(this_parent_map)
1095
map(pending.update, this_parent_map.itervalues())
1096
pending = pending.difference(parent_map)
1097
kg = _mod_graph.KnownGraph(parent_map)
1100
def get_parent_map(self, keys):
1101
"""Get a map of the parents of keys.
1103
:param keys: The keys to look up parents for.
1104
:return: A mapping from keys to parents. Absent keys are absent from
1107
raise NotImplementedError(self.get_parent_map)
1109
def get_record_stream(self, keys, ordering, include_delta_closure):
1110
"""Get a stream of records for keys.
1112
:param keys: The keys to include.
1113
:param ordering: Either 'unordered' or 'topological'. A topologically
1114
sorted stream has compression parents strictly before their
1116
:param include_delta_closure: If True then the closure across any
1117
compression parents will be included (in the opaque data).
1118
:return: An iterator of ContentFactory objects, each of which is only
1119
valid until the iterator is advanced.
1121
raise NotImplementedError(self.get_record_stream)
1123
def get_sha1s(self, keys):
1124
"""Get the sha1's of the texts for the given keys.
1126
:param keys: The names of the keys to lookup
1127
:return: a dict from key to sha1 digest. Keys of texts which are not
1128
present in the store are not present in the returned
1131
raise NotImplementedError(self.get_sha1s)
1133
has_key = index._has_key_from_parent_map
1135
def get_missing_compression_parent_keys(self):
1136
"""Return an iterable of keys of missing compression parents.
1138
Check this after calling insert_record_stream to find out if there are
1139
any missing compression parents. If there are, the records that
1140
depend on them are not able to be inserted safely. The precise
1141
behaviour depends on the concrete VersionedFiles class in use.
1143
Classes that do not support this will raise NotImplementedError.
1145
raise NotImplementedError(self.get_missing_compression_parent_keys)
1147
def insert_record_stream(self, stream):
1148
"""Insert a record stream into this container.
1150
:param stream: A stream of records to insert.
1152
:seealso VersionedFile.get_record_stream:
1154
raise NotImplementedError
1156
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1157
"""Iterate over the lines in the versioned files from keys.
1159
This may return lines from other keys. Each item the returned
1160
iterator yields is a tuple of a line and a text version that that line
1161
is present in (not introduced in).
1163
Ordering of results is in whatever order is most suitable for the
1164
underlying storage format.
1166
If a progress bar is supplied, it may be used to indicate progress.
1167
The caller is responsible for cleaning up progress bars (because this
1171
* Lines are normalised by the underlying store: they will all have \n
1173
* Lines are returned in arbitrary order.
1175
:return: An iterator over (line, key).
1177
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1180
"""Return a iterable of the keys for all the contained texts."""
1181
raise NotImplementedError(self.keys)
1183
def make_mpdiffs(self, keys):
1184
"""Create multiparent diffs for specified keys."""
1185
generator = _MPDiffGenerator(self, keys)
1186
return generator.compute_diffs()
1188
def get_annotator(self):
1189
return annotate.Annotator(self)
1191
missing_keys = index._missing_keys_from_parent_map
1193
def _extract_blocks(self, version_id, source, target):
1197
class ThunkedVersionedFiles(VersionedFiles):
1198
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1200
This object allows a single keyspace for accessing the history graph and
1201
contents of named bytestrings.
1203
Currently no implementation allows the graph of different key prefixes to
1204
intersect, but the API does allow such implementations in the future.
1207
def __init__(self, transport, file_factory, mapper, is_locked):
1208
"""Create a ThunkedVersionedFiles."""
1209
self._transport = transport
1210
self._file_factory = file_factory
1211
self._mapper = mapper
1212
self._is_locked = is_locked
1214
def add_lines(self, key, parents, lines, parent_texts=None,
1215
left_matching_blocks=None, nostore_sha=None, random_id=False,
1216
check_content=True):
1217
"""See VersionedFiles.add_lines()."""
1218
path = self._mapper.map(key)
1219
version_id = key[-1]
1220
parents = [parent[-1] for parent in parents]
1221
vf = self._get_vf(path)
1224
return vf.add_lines_with_ghosts(version_id, parents, lines,
1225
parent_texts=parent_texts,
1226
left_matching_blocks=left_matching_blocks,
1227
nostore_sha=nostore_sha, random_id=random_id,
1228
check_content=check_content)
1229
except NotImplementedError:
1230
return vf.add_lines(version_id, parents, lines,
1231
parent_texts=parent_texts,
1232
left_matching_blocks=left_matching_blocks,
1233
nostore_sha=nostore_sha, random_id=random_id,
1234
check_content=check_content)
1235
except errors.NoSuchFile:
1236
# parent directory may be missing, try again.
1237
self._transport.mkdir(osutils.dirname(path))
1239
return vf.add_lines_with_ghosts(version_id, parents, lines,
1240
parent_texts=parent_texts,
1241
left_matching_blocks=left_matching_blocks,
1242
nostore_sha=nostore_sha, random_id=random_id,
1243
check_content=check_content)
1244
except NotImplementedError:
1245
return vf.add_lines(version_id, parents, lines,
1246
parent_texts=parent_texts,
1247
left_matching_blocks=left_matching_blocks,
1248
nostore_sha=nostore_sha, random_id=random_id,
1249
check_content=check_content)
1251
def annotate(self, key):
1252
"""Return a list of (version-key, line) tuples for the text of key.
1254
:raise RevisionNotPresent: If the key is not present.
1257
path = self._mapper.map(prefix)
1258
vf = self._get_vf(path)
1259
origins = vf.annotate(key[-1])
1261
for origin, line in origins:
1262
result.append((prefix + (origin,), line))
1265
def check(self, progress_bar=None, keys=None):
1266
"""See VersionedFiles.check()."""
1267
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1268
# this is tolerable. Ideally we'd pass keys down to check() and
1269
# have the older VersiondFile interface updated too.
1270
for prefix, vf in self._iter_all_components():
1272
if keys is not None:
1273
return self.get_record_stream(keys, 'unordered', True)
1275
def get_parent_map(self, keys):
1276
"""Get a map of the parents of keys.
1278
:param keys: The keys to look up parents for.
1279
:return: A mapping from keys to parents. Absent keys are absent from
1282
prefixes = self._partition_keys(keys)
1284
for prefix, suffixes in prefixes.items():
1285
path = self._mapper.map(prefix)
1286
vf = self._get_vf(path)
1287
parent_map = vf.get_parent_map(suffixes)
1288
for key, parents in parent_map.items():
1289
result[prefix + (key,)] = tuple(
1290
prefix + (parent,) for parent in parents)
1293
def _get_vf(self, path):
1294
if not self._is_locked():
1295
raise errors.ObjectNotLocked(self)
1296
return self._file_factory(path, self._transport, create=True,
1297
get_scope=lambda:None)
1299
def _partition_keys(self, keys):
1300
"""Turn keys into a dict of prefix:suffix_list."""
1303
prefix_keys = result.setdefault(key[:-1], [])
1304
prefix_keys.append(key[-1])
1307
def _get_all_prefixes(self):
1308
# Identify all key prefixes.
1309
# XXX: A bit hacky, needs polish.
1310
if type(self._mapper) == ConstantMapper:
1311
paths = [self._mapper.map(())]
1315
for quoted_relpath in self._transport.iter_files_recursive():
1316
path, ext = os.path.splitext(quoted_relpath)
1318
paths = list(relpaths)
1319
prefixes = [self._mapper.unmap(path) for path in paths]
1320
return zip(paths, prefixes)
1322
def get_record_stream(self, keys, ordering, include_delta_closure):
1323
"""See VersionedFiles.get_record_stream()."""
1324
# Ordering will be taken care of by each partitioned store; group keys
1327
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1328
suffixes = [(suffix,) for suffix in suffixes]
1329
for record in vf.get_record_stream(suffixes, ordering,
1330
include_delta_closure):
1331
if record.parents is not None:
1332
record.parents = tuple(
1333
prefix + parent for parent in record.parents)
1334
record.key = prefix + record.key
1337
def _iter_keys_vf(self, keys):
1338
prefixes = self._partition_keys(keys)
1340
for prefix, suffixes in prefixes.items():
1341
path = self._mapper.map(prefix)
1342
vf = self._get_vf(path)
1343
yield prefix, suffixes, vf
1345
def get_sha1s(self, keys):
1346
"""See VersionedFiles.get_sha1s()."""
1348
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1349
vf_sha1s = vf.get_sha1s(suffixes)
1350
for suffix, sha1 in vf_sha1s.iteritems():
1351
sha1s[prefix + (suffix,)] = sha1
1354
def insert_record_stream(self, stream):
1355
"""Insert a record stream into this container.
1357
:param stream: A stream of records to insert.
1359
:seealso VersionedFile.get_record_stream:
1361
for record in stream:
1362
prefix = record.key[:-1]
1363
key = record.key[-1:]
1364
if record.parents is not None:
1365
parents = [parent[-1:] for parent in record.parents]
1368
thunk_record = AdapterFactory(key, parents, record)
1369
path = self._mapper.map(prefix)
1370
# Note that this parses the file many times; we can do better but
1371
# as this only impacts weaves in terms of performance, it is
1373
vf = self._get_vf(path)
1374
vf.insert_record_stream([thunk_record])
1376
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1377
"""Iterate over the lines in the versioned files from keys.
1379
This may return lines from other keys. Each item the returned
1380
iterator yields is a tuple of a line and a text version that that line
1381
is present in (not introduced in).
1383
Ordering of results is in whatever order is most suitable for the
1384
underlying storage format.
1386
If a progress bar is supplied, it may be used to indicate progress.
1387
The caller is responsible for cleaning up progress bars (because this
1391
* Lines are normalised by the underlying store: they will all have \n
1393
* Lines are returned in arbitrary order.
1395
:return: An iterator over (line, key).
1397
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1398
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1399
yield line, prefix + (version,)
1401
def _iter_all_components(self):
1402
for path, prefix in self._get_all_prefixes():
1403
yield prefix, self._get_vf(path)
1406
"""See VersionedFiles.keys()."""
1408
for prefix, vf in self._iter_all_components():
1409
for suffix in vf.versions():
1410
result.add(prefix + (suffix,))
1414
class _PlanMergeVersionedFile(VersionedFiles):
1415
"""A VersionedFile for uncommitted and committed texts.
1417
It is intended to allow merges to be planned with working tree texts.
1418
It implements only the small part of the VersionedFiles interface used by
1419
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1420
_PlanMergeVersionedFile itself.
1422
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1423
queried for missing texts.
1426
def __init__(self, file_id):
1427
"""Create a _PlanMergeVersionedFile.
1429
:param file_id: Used with _PlanMerge code which is not yet fully
1430
tuple-keyspace aware.
1432
self._file_id = file_id
1433
# fallback locations
1434
self.fallback_versionedfiles = []
1435
# Parents for locally held keys.
1437
# line data for locally held keys.
1439
# key lookup providers
1440
self._providers = [DictParentsProvider(self._parents)]
1442
def plan_merge(self, ver_a, ver_b, base=None):
1443
"""See VersionedFile.plan_merge"""
1444
from bzrlib.merge import _PlanMerge
1446
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1447
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1448
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1449
return _PlanMerge._subtract_plans(old_plan, new_plan)
1451
def plan_lca_merge(self, ver_a, ver_b, base=None):
1452
from bzrlib.merge import _PlanLCAMerge
1454
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1457
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1458
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1460
def add_lines(self, key, parents, lines):
1461
"""See VersionedFiles.add_lines
1463
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1464
are permitted. Only reserved ids are permitted.
1466
if type(key) is not tuple:
1467
raise TypeError(key)
1468
if not revision.is_reserved_id(key[-1]):
1469
raise ValueError('Only reserved ids may be used')
1471
raise ValueError('Parents may not be None')
1473
raise ValueError('Lines may not be None')
1474
self._parents[key] = tuple(parents)
1475
self._lines[key] = lines
1477
def get_record_stream(self, keys, ordering, include_delta_closure):
1480
if key in self._lines:
1481
lines = self._lines[key]
1482
parents = self._parents[key]
1484
yield ChunkedContentFactory(key, parents, None, lines)
1485
for versionedfile in self.fallback_versionedfiles:
1486
for record in versionedfile.get_record_stream(
1487
pending, 'unordered', True):
1488
if record.storage_kind == 'absent':
1491
pending.remove(record.key)
1495
# report absent entries
1497
yield AbsentContentFactory(key)
1499
def get_parent_map(self, keys):
1500
"""See VersionedFiles.get_parent_map"""
1501
# We create a new provider because a fallback may have been added.
1502
# If we make fallbacks private we can update a stack list and avoid
1503
# object creation thrashing.
1506
if revision.NULL_REVISION in keys:
1507
keys.remove(revision.NULL_REVISION)
1508
result[revision.NULL_REVISION] = ()
1509
self._providers = self._providers[:1] + self.fallback_versionedfiles
1511
StackedParentsProvider(self._providers).get_parent_map(keys))
1512
for key, parents in result.iteritems():
1514
result[key] = (revision.NULL_REVISION,)
1518
class PlanWeaveMerge(TextMerge):
1519
"""Weave merge that takes a plan as its input.
1521
This exists so that VersionedFile.plan_merge is implementable.
1522
Most callers will want to use WeaveMerge instead.
1525
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1526
b_marker=TextMerge.B_MARKER):
1527
TextMerge.__init__(self, a_marker, b_marker)
1528
self.plan = list(plan)
1530
def _merge_struct(self):
1535
def outstanding_struct():
1536
if not lines_a and not lines_b:
1538
elif ch_a and not ch_b:
1541
elif ch_b and not ch_a:
1543
elif lines_a == lines_b:
1546
yield (lines_a, lines_b)
1548
# We previously considered either 'unchanged' or 'killed-both' lines
1549
# to be possible places to resynchronize. However, assuming agreement
1550
# on killed-both lines may be too aggressive. -- mbp 20060324
1551
for state, line in self.plan:
1552
if state == 'unchanged':
1553
# resync and flush queued conflicts changes if any
1554
for struct in outstanding_struct():
1560
if state == 'unchanged':
1563
elif state == 'killed-a':
1565
lines_b.append(line)
1566
elif state == 'killed-b':
1568
lines_a.append(line)
1569
elif state == 'new-a':
1571
lines_a.append(line)
1572
elif state == 'new-b':
1574
lines_b.append(line)
1575
elif state == 'conflicted-a':
1577
lines_a.append(line)
1578
elif state == 'conflicted-b':
1580
lines_b.append(line)
1581
elif state == 'killed-both':
1582
# This counts as a change, even though there is no associated
1586
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1588
raise AssertionError(state)
1589
for struct in outstanding_struct():
1592
def base_from_plan(self):
1593
"""Construct a BASE file from the plan text."""
1595
for state, line in self.plan:
1596
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1597
# If unchanged, then this line is straight from base. If a or b
1598
# or both killed the line, then it *used* to be in base.
1599
base_lines.append(line)
1601
if state not in ('killed-base', 'irrelevant',
1602
'ghost-a', 'ghost-b',
1604
'conflicted-a', 'conflicted-b'):
1605
# killed-base, irrelevant means it doesn't apply
1606
# ghost-a/ghost-b are harder to say for sure, but they
1607
# aren't in the 'inc_c' which means they aren't in the
1608
# shared base of a & b. So we don't include them. And
1609
# obviously if the line is newly inserted, it isn't in base
1611
# If 'conflicted-a' or b, then it is new vs one base, but
1612
# old versus another base. However, if we make it present
1613
# in the base, it will be deleted from the target, and it
1614
# seems better to get a line doubled in the merge result,
1615
# rather than have it deleted entirely.
1616
# Example, each node is the 'text' at that point:
1624
# There was a criss-cross conflict merge. Both sides
1625
# include the other, but put themselves first.
1626
# Weave marks this as a 'clean' merge, picking OTHER over
1627
# THIS. (Though the details depend on order inserted into
1629
# LCA generates a plan:
1630
# [('unchanged', M),
1631
# ('conflicted-b', b),
1633
# ('conflicted-a', b),
1635
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1636
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1637
# removes one 'b', and BASE vs OTHER removes the other)
1638
# If you include neither, 3-way creates a clean "MbabN" as
1639
# THIS adds one 'b', and OTHER does too.
1640
# It seems that having the line 2 times is better than
1641
# having it omitted. (Easier to manually delete than notice
1642
# it needs to be added.)
1643
raise AssertionError('Unknown state: %s' % (state,))
1647
class WeaveMerge(PlanWeaveMerge):
1648
"""Weave merge that takes a VersionedFile and two versions as its input."""
1650
def __init__(self, versionedfile, ver_a, ver_b,
1651
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1652
plan = versionedfile.plan_merge(ver_a, ver_b)
1653
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1656
class VirtualVersionedFiles(VersionedFiles):
1657
"""Dummy implementation for VersionedFiles that uses other functions for
1658
obtaining fulltexts and parent maps.
1660
This is always on the bottom of the stack and uses string keys
1661
(rather than tuples) internally.
1664
def __init__(self, get_parent_map, get_lines):
1665
"""Create a VirtualVersionedFiles.
1667
:param get_parent_map: Same signature as Repository.get_parent_map.
1668
:param get_lines: Should return lines for specified key or None if
1671
super(VirtualVersionedFiles, self).__init__()
1672
self._get_parent_map = get_parent_map
1673
self._get_lines = get_lines
1675
def check(self, progressbar=None):
1676
"""See VersionedFiles.check.
1678
:note: Always returns True for VirtualVersionedFiles.
1682
def add_mpdiffs(self, records):
1683
"""See VersionedFiles.mpdiffs.
1685
:note: Not implemented for VirtualVersionedFiles.
1687
raise NotImplementedError(self.add_mpdiffs)
1689
def get_parent_map(self, keys):
1690
"""See VersionedFiles.get_parent_map."""
1691
return dict([((k,), tuple([(p,) for p in v]))
1692
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1694
def get_sha1s(self, keys):
1695
"""See VersionedFiles.get_sha1s."""
1698
lines = self._get_lines(k)
1699
if lines is not None:
1700
if not isinstance(lines, list):
1701
raise AssertionError
1702
ret[(k,)] = osutils.sha_strings(lines)
1705
def get_record_stream(self, keys, ordering, include_delta_closure):
1706
"""See VersionedFiles.get_record_stream."""
1707
for (k,) in list(keys):
1708
lines = self._get_lines(k)
1709
if lines is not None:
1710
if not isinstance(lines, list):
1711
raise AssertionError
1712
yield ChunkedContentFactory((k,), None,
1713
sha1=osutils.sha_strings(lines),
1716
yield AbsentContentFactory((k,))
1718
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1719
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1720
for i, (key,) in enumerate(keys):
1722
pb.update("Finding changed lines", i, len(keys))
1723
for l in self._get_lines(key):
1727
class NoDupeAddLinesDecorator(object):
1728
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1732
def __init__(self, store):
1735
def add_lines(self, key, parents, lines, parent_texts=None,
1736
left_matching_blocks=None, nostore_sha=None, random_id=False,
1737
check_content=True):
1738
"""See VersionedFiles.add_lines.
1740
This implementation may return None as the third element of the return
1741
value when the original store wouldn't.
1744
raise NotImplementedError(
1745
"NoDupeAddLinesDecorator.add_lines does not implement the "
1746
"nostore_sha behaviour.")
1748
sha1 = osutils.sha_strings(lines)
1749
key = ("sha1:" + sha1,)
1752
if key in self._store.get_parent_map([key]):
1753
# This key has already been inserted, so don't do it again.
1755
sha1 = osutils.sha_strings(lines)
1756
return sha1, sum(map(len, lines)), None
1757
return self._store.add_lines(key, parents, lines,
1758
parent_texts=parent_texts,
1759
left_matching_blocks=left_matching_blocks,
1760
nostore_sha=nostore_sha, random_id=random_id,
1761
check_content=check_content)
1763
def __getattr__(self, name):
1764
return getattr(self._store, name)
1767
def network_bytes_to_kind_and_offset(network_bytes):
1768
"""Strip of a record kind from the front of network_bytes.
1770
:param network_bytes: The bytes of a record.
1771
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1773
line_end = network_bytes.find('\n')
1774
storage_kind = network_bytes[:line_end]
1775
return storage_kind, line_end + 1
1778
class NetworkRecordStream(object):
1779
"""A record_stream which reconstitures a serialised stream."""
1781
def __init__(self, bytes_iterator):
1782
"""Create a NetworkRecordStream.
1784
:param bytes_iterator: An iterator of bytes. Each item in this
1785
iterator should have been obtained from a record_streams'
1786
record.get_bytes_as(record.storage_kind) call.
1788
self._bytes_iterator = bytes_iterator
1789
self._kind_factory = {
1790
'fulltext': fulltext_network_to_record,
1791
'groupcompress-block': groupcompress.network_block_to_records,
1792
'knit-ft-gz': knit.knit_network_to_record,
1793
'knit-delta-gz': knit.knit_network_to_record,
1794
'knit-annotated-ft-gz': knit.knit_network_to_record,
1795
'knit-annotated-delta-gz': knit.knit_network_to_record,
1796
'knit-delta-closure': knit.knit_delta_closure_to_records,
1802
:return: An iterator as per VersionedFiles.get_record_stream().
1804
for bytes in self._bytes_iterator:
1805
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1806
for record in self._kind_factory[storage_kind](
1807
storage_kind, bytes, line_end):
1811
def fulltext_network_to_record(kind, bytes, line_end):
1812
"""Convert a network fulltext record to record."""
1813
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1814
record_meta = bytes[line_end+4:line_end+4+meta_len]
1815
key, parents = bencode.bdecode_as_tuple(record_meta)
1816
if parents == 'nil':
1818
fulltext = bytes[line_end+4+meta_len:]
1819
return [FulltextContentFactory(key, parents, None, fulltext)]
1822
def _length_prefix(bytes):
1823
return struct.pack('!L', len(bytes))
1826
def record_to_fulltext_bytes(record):
1827
if record.parents is None:
1830
parents = record.parents
1831
record_meta = bencode.bencode((record.key, parents))
1832
record_content = record.get_bytes_as('fulltext')
1833
return "fulltext\n%s%s%s" % (
1834
_length_prefix(record_meta), record_meta, record_content)
1837
def sort_groupcompress(parent_map):
1838
"""Sort and group the keys in parent_map into groupcompress order.
1840
groupcompress is defined (currently) as reverse-topological order, grouped
1843
:return: A sorted-list of keys
1845
# gc-optimal ordering is approximately reverse topological,
1846
# properly grouped by file-id.
1848
for item in parent_map.iteritems():
1850
if isinstance(key, str) or len(key) == 1:
1855
per_prefix_map[prefix].append(item)
1857
per_prefix_map[prefix] = [item]
1860
for prefix in sorted(per_prefix_map):
1861
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))