1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
43
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
44
from bzrlib.transport.memory import MemoryTransport
46
from bzrlib.inter import InterObject
47
from bzrlib.registry import Registry
48
from bzrlib.symbol_versioning import *
49
from bzrlib.textmerge import TextMerge
50
from bzrlib.util import bencode
53
adapter_registry = Registry()
54
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
55
'DeltaPlainToFullText')
56
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
58
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
59
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
60
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
61
'bzrlib.knit', 'DeltaAnnotatedToFullText')
62
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
63
'bzrlib.knit', 'FTAnnotatedToUnannotated')
64
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
65
'bzrlib.knit', 'FTAnnotatedToFullText')
66
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
67
# 'bzrlib.knit', 'FTAnnotatedToChunked')
70
class ContentFactory(object):
71
"""Abstract interface for insertion and retrieval from a VersionedFile.
73
:ivar sha1: None, or the sha1 of the content fulltext.
74
:ivar storage_kind: The native storage kind of this factory. One of
75
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
76
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
77
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
78
:ivar key: The key of this content. Each key is a tuple with a single
80
:ivar parents: A tuple of parent keys for self.key. If the object has
81
no parent information, None (as opposed to () for an empty list of
86
"""Create a ContentFactory."""
88
self.storage_kind = None
93
class ChunkedContentFactory(ContentFactory):
94
"""Static data content factory.
96
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
97
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
98
satisfies this, as does a list of lines.
100
:ivar sha1: None, or the sha1 of the content fulltext.
101
:ivar storage_kind: The native storage kind of this factory. Always
103
:ivar key: The key of this content. Each key is a tuple with a single
105
:ivar parents: A tuple of parent keys for self.key. If the object has
106
no parent information, None (as opposed to () for an empty list of
110
def __init__(self, key, parents, sha1, chunks):
111
"""Create a ContentFactory."""
113
self.storage_kind = 'chunked'
115
self.parents = parents
116
self._chunks = chunks
118
def get_bytes_as(self, storage_kind):
119
if storage_kind == 'chunked':
121
elif storage_kind == 'fulltext':
122
return ''.join(self._chunks)
123
raise errors.UnavailableRepresentation(self.key, storage_kind,
127
class FulltextContentFactory(ContentFactory):
128
"""Static data content factory.
130
This takes a fulltext when created and just returns that during
131
get_bytes_as('fulltext').
133
:ivar sha1: None, or the sha1 of the content fulltext.
134
:ivar storage_kind: The native storage kind of this factory. Always
136
:ivar key: The key of this content. Each key is a tuple with a single
138
:ivar parents: A tuple of parent keys for self.key. If the object has
139
no parent information, None (as opposed to () for an empty list of
143
def __init__(self, key, parents, sha1, text):
144
"""Create a ContentFactory."""
146
self.storage_kind = 'fulltext'
148
self.parents = parents
151
def get_bytes_as(self, storage_kind):
152
if storage_kind == self.storage_kind:
154
elif storage_kind == 'chunked':
156
raise errors.UnavailableRepresentation(self.key, storage_kind,
160
class AbsentContentFactory(ContentFactory):
161
"""A placeholder content factory for unavailable texts.
164
:ivar storage_kind: 'absent'.
165
:ivar key: The key of this content. Each key is a tuple with a single
170
def __init__(self, key):
171
"""Create a ContentFactory."""
173
self.storage_kind = 'absent'
178
class AdapterFactory(ContentFactory):
179
"""A content factory to adapt between key prefix's."""
181
def __init__(self, key, parents, adapted):
182
"""Create an adapter factory instance."""
184
self.parents = parents
185
self._adapted = adapted
187
def __getattr__(self, attr):
188
"""Return a member from the adapted object."""
189
if attr in ('key', 'parents'):
190
return self.__dict__[attr]
192
return getattr(self._adapted, attr)
195
def filter_absent(record_stream):
196
"""Adapt a record stream to remove absent records."""
197
for record in record_stream:
198
if record.storage_kind != 'absent':
202
class VersionedFile(object):
203
"""Versioned text file storage.
205
A versioned file manages versions of line-based text files,
206
keeping track of the originating version for each line.
208
To clients the "lines" of the file are represented as a list of
209
strings. These strings will typically have terminal newline
210
characters, but this is not required. In particular files commonly
211
do not have a newline at the end of the file.
213
Texts are identified by a version-id string.
217
def check_not_reserved_id(version_id):
218
revision.check_not_reserved_id(version_id)
220
def copy_to(self, name, transport):
221
"""Copy this versioned file to name on transport."""
222
raise NotImplementedError(self.copy_to)
224
def get_record_stream(self, versions, ordering, include_delta_closure):
225
"""Get a stream of records for versions.
227
:param versions: The versions to include. Each version is a tuple
229
:param ordering: Either 'unordered' or 'topological'. A topologically
230
sorted stream has compression parents strictly before their
232
:param include_delta_closure: If True then the closure across any
233
compression parents will be included (in the data content of the
234
stream, not in the emitted records). This guarantees that
235
'fulltext' can be used successfully on every record.
236
:return: An iterator of ContentFactory objects, each of which is only
237
valid until the iterator is advanced.
239
raise NotImplementedError(self.get_record_stream)
241
def has_version(self, version_id):
242
"""Returns whether version is present."""
243
raise NotImplementedError(self.has_version)
245
def insert_record_stream(self, stream):
246
"""Insert a record stream into this versioned file.
248
:param stream: A stream of records to insert.
250
:seealso VersionedFile.get_record_stream:
252
raise NotImplementedError
254
def add_lines(self, version_id, parents, lines, parent_texts=None,
255
left_matching_blocks=None, nostore_sha=None, random_id=False,
257
"""Add a single text on top of the versioned file.
259
Must raise RevisionAlreadyPresent if the new version is
260
already present in file history.
262
Must raise RevisionNotPresent if any of the given parents are
263
not present in file history.
265
:param lines: A list of lines. Each line must be a bytestring. And all
266
of them except the last must be terminated with \n and contain no
267
other \n's. The last line may either contain no \n's or a single
268
terminated \n. If the lines list does meet this constraint the add
269
routine may error or may succeed - but you will be unable to read
270
the data back accurately. (Checking the lines have been split
271
correctly is expensive and extremely unlikely to catch bugs so it
272
is not done at runtime unless check_content is True.)
273
:param parent_texts: An optional dictionary containing the opaque
274
representations of some or all of the parents of version_id to
275
allow delta optimisations. VERY IMPORTANT: the texts must be those
276
returned by add_lines or data corruption can be caused.
277
:param left_matching_blocks: a hint about which areas are common
278
between the text and its left-hand-parent. The format is
279
the SequenceMatcher.get_matching_blocks format.
280
:param nostore_sha: Raise ExistingContent and do not add the lines to
281
the versioned file if the digest of the lines matches this.
282
:param random_id: If True a random id has been selected rather than
283
an id determined by some deterministic process such as a converter
284
from a foreign VCS. When True the backend may choose not to check
285
for uniqueness of the resulting key within the versioned file, so
286
this should only be done when the result is expected to be unique
288
:param check_content: If True, the lines supplied are verified to be
289
bytestrings that are correctly formed lines.
290
:return: The text sha1, the number of bytes in the text, and an opaque
291
representation of the inserted version which can be provided
292
back to future add_lines calls in the parent_texts dictionary.
294
self._check_write_ok()
295
return self._add_lines(version_id, parents, lines, parent_texts,
296
left_matching_blocks, nostore_sha, random_id, check_content)
298
def _add_lines(self, version_id, parents, lines, parent_texts,
299
left_matching_blocks, nostore_sha, random_id, check_content):
300
"""Helper to do the class specific add_lines."""
301
raise NotImplementedError(self.add_lines)
303
def add_lines_with_ghosts(self, version_id, parents, lines,
304
parent_texts=None, nostore_sha=None, random_id=False,
305
check_content=True, left_matching_blocks=None):
306
"""Add lines to the versioned file, allowing ghosts to be present.
308
This takes the same parameters as add_lines and returns the same.
310
self._check_write_ok()
311
return self._add_lines_with_ghosts(version_id, parents, lines,
312
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
314
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
315
nostore_sha, random_id, check_content, left_matching_blocks):
316
"""Helper to do class specific add_lines_with_ghosts."""
317
raise NotImplementedError(self.add_lines_with_ghosts)
319
def check(self, progress_bar=None):
320
"""Check the versioned file for integrity."""
321
raise NotImplementedError(self.check)
323
def _check_lines_not_unicode(self, lines):
324
"""Check that lines being added to a versioned file are not unicode."""
326
if line.__class__ is not str:
327
raise errors.BzrBadParameterUnicode("lines")
329
def _check_lines_are_lines(self, lines):
330
"""Check that the lines really are full lines without inline EOL."""
332
if '\n' in line[:-1]:
333
raise errors.BzrBadParameterContainsNewline("lines")
335
def get_format_signature(self):
336
"""Get a text description of the data encoding in this file.
340
raise NotImplementedError(self.get_format_signature)
342
def make_mpdiffs(self, version_ids):
343
"""Create multiparent diffs for specified versions."""
344
knit_versions = set()
345
knit_versions.update(version_ids)
346
parent_map = self.get_parent_map(version_ids)
347
for version_id in version_ids:
349
knit_versions.update(parent_map[version_id])
351
raise errors.RevisionNotPresent(version_id, self)
352
# We need to filter out ghosts, because we can't diff against them.
353
knit_versions = set(self.get_parent_map(knit_versions).keys())
354
lines = dict(zip(knit_versions,
355
self._get_lf_split_line_list(knit_versions)))
357
for version_id in version_ids:
358
target = lines[version_id]
360
parents = [lines[p] for p in parent_map[version_id] if p in
363
# I don't know how this could ever trigger.
364
# parent_map[version_id] was already triggered in the previous
365
# for loop, and lines[p] has the 'if p in knit_versions' check,
366
# so we again won't have a KeyError.
367
raise errors.RevisionNotPresent(version_id, self)
369
left_parent_blocks = self._extract_blocks(version_id,
372
left_parent_blocks = None
373
diffs.append(multiparent.MultiParent.from_lines(target, parents,
377
def _extract_blocks(self, version_id, source, target):
380
def add_mpdiffs(self, records):
381
"""Add mpdiffs to this VersionedFile.
383
Records should be iterables of version, parents, expected_sha1,
384
mpdiff. mpdiff should be a MultiParent instance.
386
# Does this need to call self._check_write_ok()? (IanC 20070919)
388
mpvf = multiparent.MultiMemoryVersionedFile()
390
for version, parent_ids, expected_sha1, mpdiff in records:
391
versions.append(version)
392
mpvf.add_diff(mpdiff, version, parent_ids)
393
needed_parents = set()
394
for version, parent_ids, expected_sha1, mpdiff in records:
395
needed_parents.update(p for p in parent_ids
396
if not mpvf.has_version(p))
397
present_parents = set(self.get_parent_map(needed_parents).keys())
398
for parent_id, lines in zip(present_parents,
399
self._get_lf_split_line_list(present_parents)):
400
mpvf.add_version(lines, parent_id, [])
401
for (version, parent_ids, expected_sha1, mpdiff), lines in\
402
zip(records, mpvf.get_line_list(versions)):
403
if len(parent_ids) == 1:
404
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
405
mpvf.get_diff(parent_ids[0]).num_lines()))
407
left_matching_blocks = None
409
_, _, version_text = self.add_lines_with_ghosts(version,
410
parent_ids, lines, vf_parents,
411
left_matching_blocks=left_matching_blocks)
412
except NotImplementedError:
413
# The vf can't handle ghosts, so add lines normally, which will
414
# (reasonably) fail if there are ghosts in the data.
415
_, _, version_text = self.add_lines(version,
416
parent_ids, lines, vf_parents,
417
left_matching_blocks=left_matching_blocks)
418
vf_parents[version] = version_text
419
sha1s = self.get_sha1s(versions)
420
for version, parent_ids, expected_sha1, mpdiff in records:
421
if expected_sha1 != sha1s[version]:
422
raise errors.VersionedFileInvalidChecksum(version)
424
def get_text(self, version_id):
425
"""Return version contents as a text string.
427
Raises RevisionNotPresent if version is not present in
430
return ''.join(self.get_lines(version_id))
431
get_string = get_text
433
def get_texts(self, version_ids):
434
"""Return the texts of listed versions as a list of strings.
436
Raises RevisionNotPresent if version is not present in
439
return [''.join(self.get_lines(v)) for v in version_ids]
441
def get_lines(self, version_id):
442
"""Return version contents as a sequence of lines.
444
Raises RevisionNotPresent if version is not present in
447
raise NotImplementedError(self.get_lines)
449
def _get_lf_split_line_list(self, version_ids):
450
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
452
def get_ancestry(self, version_ids, topo_sorted=True):
453
"""Return a list of all ancestors of given version(s). This
454
will not include the null revision.
456
This list will not be topologically sorted if topo_sorted=False is
459
Must raise RevisionNotPresent if any of the given versions are
460
not present in file history."""
461
if isinstance(version_ids, basestring):
462
version_ids = [version_ids]
463
raise NotImplementedError(self.get_ancestry)
465
def get_ancestry_with_ghosts(self, version_ids):
466
"""Return a list of all ancestors of given version(s). This
467
will not include the null revision.
469
Must raise RevisionNotPresent if any of the given versions are
470
not present in file history.
472
Ghosts that are known about will be included in ancestry list,
473
but are not explicitly marked.
475
raise NotImplementedError(self.get_ancestry_with_ghosts)
477
def get_parent_map(self, version_ids):
478
"""Get a map of the parents of version_ids.
480
:param version_ids: The version ids to look up parents for.
481
:return: A mapping from version id to parents.
483
raise NotImplementedError(self.get_parent_map)
485
def get_parents_with_ghosts(self, version_id):
486
"""Return version names for parents of version_id.
488
Will raise RevisionNotPresent if version_id is not present
491
Ghosts that are known about will be included in the parent list,
492
but are not explicitly marked.
495
return list(self.get_parent_map([version_id])[version_id])
497
raise errors.RevisionNotPresent(version_id, self)
499
def annotate(self, version_id):
500
"""Return a list of (version-id, line) tuples for version_id.
502
:raise RevisionNotPresent: If the given version is
503
not present in file history.
505
raise NotImplementedError(self.annotate)
507
def iter_lines_added_or_present_in_versions(self, version_ids=None,
509
"""Iterate over the lines in the versioned file from version_ids.
511
This may return lines from other versions. Each item the returned
512
iterator yields is a tuple of a line and a text version that that line
513
is present in (not introduced in).
515
Ordering of results is in whatever order is most suitable for the
516
underlying storage format.
518
If a progress bar is supplied, it may be used to indicate progress.
519
The caller is responsible for cleaning up progress bars (because this
522
NOTES: Lines are normalised: they will all have \n terminators.
523
Lines are returned in arbitrary order.
525
:return: An iterator over (line, version_id).
527
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
529
def plan_merge(self, ver_a, ver_b):
530
"""Return pseudo-annotation indicating how the two versions merge.
532
This is computed between versions a and b and their common
535
Weave lines present in none of them are skipped entirely.
538
killed-base Dead in base revision
539
killed-both Killed in each revision
542
unchanged Alive in both a and b (possibly created in both)
545
ghost-a Killed in a, unborn in b
546
ghost-b Killed in b, unborn in a
547
irrelevant Not in either revision
549
raise NotImplementedError(VersionedFile.plan_merge)
551
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
552
b_marker=TextMerge.B_MARKER):
553
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
556
class RecordingVersionedFilesDecorator(object):
557
"""A minimal versioned files that records calls made on it.
559
Only enough methods have been added to support tests using it to date.
561
:ivar calls: A list of the calls made; can be reset at any time by
565
def __init__(self, backing_vf):
566
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
568
:param backing_vf: The versioned file to answer all methods.
570
self._backing_vf = backing_vf
573
def add_lines(self, key, parents, lines, parent_texts=None,
574
left_matching_blocks=None, nostore_sha=None, random_id=False,
576
self.calls.append(("add_lines", key, parents, lines, parent_texts,
577
left_matching_blocks, nostore_sha, random_id, check_content))
578
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
579
left_matching_blocks, nostore_sha, random_id, check_content)
582
self._backing_vf.check()
584
def get_parent_map(self, keys):
585
self.calls.append(("get_parent_map", copy(keys)))
586
return self._backing_vf.get_parent_map(keys)
588
def get_record_stream(self, keys, sort_order, include_delta_closure):
589
self.calls.append(("get_record_stream", list(keys), sort_order,
590
include_delta_closure))
591
return self._backing_vf.get_record_stream(keys, sort_order,
592
include_delta_closure)
594
def get_sha1s(self, keys):
595
self.calls.append(("get_sha1s", copy(keys)))
596
return self._backing_vf.get_sha1s(keys)
598
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
599
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
600
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
603
self.calls.append(("keys",))
604
return self._backing_vf.keys()
607
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
608
"""A VF that records calls, and returns keys in specific order.
610
:ivar calls: A list of the calls made; can be reset at any time by
614
def __init__(self, backing_vf, key_priority):
615
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
617
:param backing_vf: The versioned file to answer all methods.
618
:param key_priority: A dictionary defining what order keys should be
619
returned from an 'unordered' get_record_stream request.
620
Keys with lower priority are returned first, keys not present in
621
the map get an implicit priority of 0, and are returned in
622
lexicographical order.
624
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
625
self._key_priority = key_priority
627
def get_record_stream(self, keys, sort_order, include_delta_closure):
628
self.calls.append(("get_record_stream", list(keys), sort_order,
629
include_delta_closure))
630
if sort_order == 'unordered':
632
return (self._key_priority.get(key, 0), key)
633
# Use a defined order by asking for the keys one-by-one from the
635
for key in sorted(keys, key=sort_key):
636
for record in self._backing_vf.get_record_stream([key],
637
'unordered', include_delta_closure):
640
for record in self._backing_vf.get_record_stream(keys, sort_order,
641
include_delta_closure):
645
class KeyMapper(object):
646
"""KeyMappers map between keys and underlying partitioned storage."""
649
"""Map key to an underlying storage identifier.
651
:param key: A key tuple e.g. ('file-id', 'revision-id').
652
:return: An underlying storage identifier, specific to the partitioning
655
raise NotImplementedError(self.map)
657
def unmap(self, partition_id):
658
"""Map a partitioned storage id back to a key prefix.
660
:param partition_id: The underlying partition id.
661
:return: As much of a key (or prefix) as is derivable from the partition
664
raise NotImplementedError(self.unmap)
667
class ConstantMapper(KeyMapper):
668
"""A key mapper that maps to a constant result."""
670
def __init__(self, result):
671
"""Create a ConstantMapper which will return result for all maps."""
672
self._result = result
675
"""See KeyMapper.map()."""
679
class URLEscapeMapper(KeyMapper):
680
"""Base class for use with transport backed storage.
682
This provides a map and unmap wrapper that respectively url escape and
683
unescape their outputs and inputs.
687
"""See KeyMapper.map()."""
688
return urllib.quote(self._map(key))
690
def unmap(self, partition_id):
691
"""See KeyMapper.unmap()."""
692
return self._unmap(urllib.unquote(partition_id))
695
class PrefixMapper(URLEscapeMapper):
696
"""A key mapper that extracts the first component of a key.
698
This mapper is for use with a transport based backend.
702
"""See KeyMapper.map()."""
705
def _unmap(self, partition_id):
706
"""See KeyMapper.unmap()."""
707
return (partition_id,)
710
class HashPrefixMapper(URLEscapeMapper):
711
"""A key mapper that combines the first component of a key with a hash.
713
This mapper is for use with a transport based backend.
717
"""See KeyMapper.map()."""
718
prefix = self._escape(key[0])
719
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
721
def _escape(self, prefix):
722
"""No escaping needed here."""
725
def _unmap(self, partition_id):
726
"""See KeyMapper.unmap()."""
727
return (self._unescape(osutils.basename(partition_id)),)
729
def _unescape(self, basename):
730
"""No unescaping needed for HashPrefixMapper."""
734
class HashEscapedPrefixMapper(HashPrefixMapper):
735
"""Combines the escaped first component of a key with a hash.
737
This mapper is for use with a transport based backend.
740
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
742
def _escape(self, prefix):
743
"""Turn a key element into a filesystem safe string.
745
This is similar to a plain urllib.quote, except
746
it uses specific safe characters, so that it doesn't
747
have to translate a lot of valid file ids.
749
# @ does not get escaped. This is because it is a valid
750
# filesystem character we use all the time, and it looks
751
# a lot better than seeing %40 all the time.
752
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
756
def _unescape(self, basename):
757
"""Escaped names are easily unescaped by urlutils."""
758
return urllib.unquote(basename)
761
def make_versioned_files_factory(versioned_file_factory, mapper):
762
"""Create a ThunkedVersionedFiles factory.
764
This will create a callable which when called creates a
765
ThunkedVersionedFiles on a transport, using mapper to access individual
766
versioned files, and versioned_file_factory to create each individual file.
768
def factory(transport):
769
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
774
class VersionedFiles(object):
775
"""Storage for many versioned files.
777
This object allows a single keyspace for accessing the history graph and
778
contents of named bytestrings.
780
Currently no implementation allows the graph of different key prefixes to
781
intersect, but the API does allow such implementations in the future.
783
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
784
may have a different length key-size, but that size will be constant for
785
all texts added to or retrieved from it. For instance, bzrlib uses
786
instances with a key-size of 2 for storing user files in a repository, with
787
the first element the fileid, and the second the version of that file.
789
The use of tuples allows a single code base to support several different
790
uses with only the mapping logic changing from instance to instance.
793
def add_lines(self, key, parents, lines, parent_texts=None,
794
left_matching_blocks=None, nostore_sha=None, random_id=False,
796
"""Add a text to the store.
798
:param key: The key tuple of the text to add. If the last element is
799
None, a CHK string will be generated during the addition.
800
:param parents: The parents key tuples of the text to add.
801
:param lines: A list of lines. Each line must be a bytestring. And all
802
of them except the last must be terminated with \n and contain no
803
other \n's. The last line may either contain no \n's or a single
804
terminating \n. If the lines list does meet this constraint the add
805
routine may error or may succeed - but you will be unable to read
806
the data back accurately. (Checking the lines have been split
807
correctly is expensive and extremely unlikely to catch bugs so it
808
is not done at runtime unless check_content is True.)
809
:param parent_texts: An optional dictionary containing the opaque
810
representations of some or all of the parents of version_id to
811
allow delta optimisations. VERY IMPORTANT: the texts must be those
812
returned by add_lines or data corruption can be caused.
813
:param left_matching_blocks: a hint about which areas are common
814
between the text and its left-hand-parent. The format is
815
the SequenceMatcher.get_matching_blocks format.
816
:param nostore_sha: Raise ExistingContent and do not add the lines to
817
the versioned file if the digest of the lines matches this.
818
:param random_id: If True a random id has been selected rather than
819
an id determined by some deterministic process such as a converter
820
from a foreign VCS. When True the backend may choose not to check
821
for uniqueness of the resulting key within the versioned file, so
822
this should only be done when the result is expected to be unique
824
:param check_content: If True, the lines supplied are verified to be
825
bytestrings that are correctly formed lines.
826
:return: The text sha1, the number of bytes in the text, and an opaque
827
representation of the inserted version which can be provided
828
back to future add_lines calls in the parent_texts dictionary.
830
raise NotImplementedError(self.add_lines)
832
def add_mpdiffs(self, records):
833
"""Add mpdiffs to this VersionedFile.
835
Records should be iterables of version, parents, expected_sha1,
836
mpdiff. mpdiff should be a MultiParent instance.
839
mpvf = multiparent.MultiMemoryVersionedFile()
841
for version, parent_ids, expected_sha1, mpdiff in records:
842
versions.append(version)
843
mpvf.add_diff(mpdiff, version, parent_ids)
844
needed_parents = set()
845
for version, parent_ids, expected_sha1, mpdiff in records:
846
needed_parents.update(p for p in parent_ids
847
if not mpvf.has_version(p))
848
# It seems likely that adding all the present parents as fulltexts can
849
# easily exhaust memory.
850
chunks_to_lines = osutils.chunks_to_lines
851
for record in self.get_record_stream(needed_parents, 'unordered',
853
if record.storage_kind == 'absent':
855
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
857
for (key, parent_keys, expected_sha1, mpdiff), lines in\
858
zip(records, mpvf.get_line_list(versions)):
859
if len(parent_keys) == 1:
860
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
861
mpvf.get_diff(parent_keys[0]).num_lines()))
863
left_matching_blocks = None
864
version_sha1, _, version_text = self.add_lines(key,
865
parent_keys, lines, vf_parents,
866
left_matching_blocks=left_matching_blocks)
867
if version_sha1 != expected_sha1:
868
raise errors.VersionedFileInvalidChecksum(version)
869
vf_parents[key] = version_text
871
def annotate(self, key):
872
"""Return a list of (version-key, line) tuples for the text of key.
874
:raise RevisionNotPresent: If the key is not present.
876
raise NotImplementedError(self.annotate)
878
def check(self, progress_bar=None):
879
"""Check this object for integrity."""
880
raise NotImplementedError(self.check)
883
def check_not_reserved_id(version_id):
884
revision.check_not_reserved_id(version_id)
886
def _check_lines_not_unicode(self, lines):
887
"""Check that lines being added to a versioned file are not unicode."""
889
if line.__class__ is not str:
890
raise errors.BzrBadParameterUnicode("lines")
892
def _check_lines_are_lines(self, lines):
893
"""Check that the lines really are full lines without inline EOL."""
895
if '\n' in line[:-1]:
896
raise errors.BzrBadParameterContainsNewline("lines")
898
def get_parent_map(self, keys):
899
"""Get a map of the parents of keys.
901
:param keys: The keys to look up parents for.
902
:return: A mapping from keys to parents. Absent keys are absent from
905
raise NotImplementedError(self.get_parent_map)
907
def get_record_stream(self, keys, ordering, include_delta_closure):
908
"""Get a stream of records for keys.
910
:param keys: The keys to include.
911
:param ordering: Either 'unordered' or 'topological'. A topologically
912
sorted stream has compression parents strictly before their
914
:param include_delta_closure: If True then the closure across any
915
compression parents will be included (in the opaque data).
916
:return: An iterator of ContentFactory objects, each of which is only
917
valid until the iterator is advanced.
919
raise NotImplementedError(self.get_record_stream)
921
def get_sha1s(self, keys):
922
"""Get the sha1's of the texts for the given keys.
924
:param keys: The names of the keys to lookup
925
:return: a dict from key to sha1 digest. Keys of texts which are not
926
present in the store are not present in the returned
929
raise NotImplementedError(self.get_sha1s)
931
has_key = index._has_key_from_parent_map
933
def get_missing_compression_parent_keys(self):
934
"""Return an iterable of keys of missing compression parents.
936
Check this after calling insert_record_stream to find out if there are
937
any missing compression parents. If there are, the records that
938
depend on them are not able to be inserted safely. The precise
939
behaviour depends on the concrete VersionedFiles class in use.
941
Classes that do not support this will raise NotImplementedError.
943
raise NotImplementedError(self.get_missing_compression_parent_keys)
945
def insert_record_stream(self, stream):
946
"""Insert a record stream into this container.
948
:param stream: A stream of records to insert.
950
:seealso VersionedFile.get_record_stream:
952
raise NotImplementedError
954
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
955
"""Iterate over the lines in the versioned files from keys.
957
This may return lines from other keys. Each item the returned
958
iterator yields is a tuple of a line and a text version that that line
959
is present in (not introduced in).
961
Ordering of results is in whatever order is most suitable for the
962
underlying storage format.
964
If a progress bar is supplied, it may be used to indicate progress.
965
The caller is responsible for cleaning up progress bars (because this
969
* Lines are normalised by the underlying store: they will all have \n
971
* Lines are returned in arbitrary order.
973
:return: An iterator over (line, key).
975
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
978
"""Return a iterable of the keys for all the contained texts."""
979
raise NotImplementedError(self.keys)
981
def make_mpdiffs(self, keys):
982
"""Create multiparent diffs for specified keys."""
983
keys_order = tuple(keys)
984
keys = frozenset(keys)
985
knit_keys = set(keys)
986
parent_map = self.get_parent_map(keys)
987
for parent_keys in parent_map.itervalues():
989
knit_keys.update(parent_keys)
990
missing_keys = keys - set(parent_map)
992
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
993
# We need to filter out ghosts, because we can't diff against them.
994
maybe_ghosts = knit_keys - keys
995
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
996
knit_keys.difference_update(ghosts)
998
chunks_to_lines = osutils.chunks_to_lines
999
for record in self.get_record_stream(knit_keys, 'topological', True):
1000
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1001
# line_block_dict = {}
1002
# for parent, blocks in record.extract_line_blocks():
1003
# line_blocks[parent] = blocks
1004
# line_blocks[record.key] = line_block_dict
1006
for key in keys_order:
1008
parents = parent_map[key] or []
1009
# Note that filtering knit_keys can lead to a parent difference
1010
# between the creation and the application of the mpdiff.
1011
parent_lines = [lines[p] for p in parents if p in knit_keys]
1012
if len(parent_lines) > 0:
1013
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1016
left_parent_blocks = None
1017
diffs.append(multiparent.MultiParent.from_lines(target,
1018
parent_lines, left_parent_blocks))
1021
missing_keys = index._missing_keys_from_parent_map
1023
def _extract_blocks(self, version_id, source, target):
1027
class ThunkedVersionedFiles(VersionedFiles):
1028
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1030
This object allows a single keyspace for accessing the history graph and
1031
contents of named bytestrings.
1033
Currently no implementation allows the graph of different key prefixes to
1034
intersect, but the API does allow such implementations in the future.
1037
def __init__(self, transport, file_factory, mapper, is_locked):
1038
"""Create a ThunkedVersionedFiles."""
1039
self._transport = transport
1040
self._file_factory = file_factory
1041
self._mapper = mapper
1042
self._is_locked = is_locked
1044
def add_lines(self, key, parents, lines, parent_texts=None,
1045
left_matching_blocks=None, nostore_sha=None, random_id=False,
1046
check_content=True):
1047
"""See VersionedFiles.add_lines()."""
1048
path = self._mapper.map(key)
1049
version_id = key[-1]
1050
parents = [parent[-1] for parent in parents]
1051
vf = self._get_vf(path)
1054
return vf.add_lines_with_ghosts(version_id, parents, lines,
1055
parent_texts=parent_texts,
1056
left_matching_blocks=left_matching_blocks,
1057
nostore_sha=nostore_sha, random_id=random_id,
1058
check_content=check_content)
1059
except NotImplementedError:
1060
return vf.add_lines(version_id, parents, lines,
1061
parent_texts=parent_texts,
1062
left_matching_blocks=left_matching_blocks,
1063
nostore_sha=nostore_sha, random_id=random_id,
1064
check_content=check_content)
1065
except errors.NoSuchFile:
1066
# parent directory may be missing, try again.
1067
self._transport.mkdir(osutils.dirname(path))
1069
return vf.add_lines_with_ghosts(version_id, parents, lines,
1070
parent_texts=parent_texts,
1071
left_matching_blocks=left_matching_blocks,
1072
nostore_sha=nostore_sha, random_id=random_id,
1073
check_content=check_content)
1074
except NotImplementedError:
1075
return vf.add_lines(version_id, parents, lines,
1076
parent_texts=parent_texts,
1077
left_matching_blocks=left_matching_blocks,
1078
nostore_sha=nostore_sha, random_id=random_id,
1079
check_content=check_content)
1081
def annotate(self, key):
1082
"""Return a list of (version-key, line) tuples for the text of key.
1084
:raise RevisionNotPresent: If the key is not present.
1087
path = self._mapper.map(prefix)
1088
vf = self._get_vf(path)
1089
origins = vf.annotate(key[-1])
1091
for origin, line in origins:
1092
result.append((prefix + (origin,), line))
1095
def check(self, progress_bar=None):
1096
"""See VersionedFiles.check()."""
1097
for prefix, vf in self._iter_all_components():
1100
def get_parent_map(self, keys):
1101
"""Get a map of the parents of keys.
1103
:param keys: The keys to look up parents for.
1104
:return: A mapping from keys to parents. Absent keys are absent from
1107
prefixes = self._partition_keys(keys)
1109
for prefix, suffixes in prefixes.items():
1110
path = self._mapper.map(prefix)
1111
vf = self._get_vf(path)
1112
parent_map = vf.get_parent_map(suffixes)
1113
for key, parents in parent_map.items():
1114
result[prefix + (key,)] = tuple(
1115
prefix + (parent,) for parent in parents)
1118
def _get_vf(self, path):
1119
if not self._is_locked():
1120
raise errors.ObjectNotLocked(self)
1121
return self._file_factory(path, self._transport, create=True,
1122
get_scope=lambda:None)
1124
def _partition_keys(self, keys):
1125
"""Turn keys into a dict of prefix:suffix_list."""
1128
prefix_keys = result.setdefault(key[:-1], [])
1129
prefix_keys.append(key[-1])
1132
def _get_all_prefixes(self):
1133
# Identify all key prefixes.
1134
# XXX: A bit hacky, needs polish.
1135
if type(self._mapper) == ConstantMapper:
1136
paths = [self._mapper.map(())]
1140
for quoted_relpath in self._transport.iter_files_recursive():
1141
path, ext = os.path.splitext(quoted_relpath)
1143
paths = list(relpaths)
1144
prefixes = [self._mapper.unmap(path) for path in paths]
1145
return zip(paths, prefixes)
1147
def get_record_stream(self, keys, ordering, include_delta_closure):
1148
"""See VersionedFiles.get_record_stream()."""
1149
# Ordering will be taken care of by each partitioned store; group keys
1152
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1153
suffixes = [(suffix,) for suffix in suffixes]
1154
for record in vf.get_record_stream(suffixes, ordering,
1155
include_delta_closure):
1156
if record.parents is not None:
1157
record.parents = tuple(
1158
prefix + parent for parent in record.parents)
1159
record.key = prefix + record.key
1162
def _iter_keys_vf(self, keys):
1163
prefixes = self._partition_keys(keys)
1165
for prefix, suffixes in prefixes.items():
1166
path = self._mapper.map(prefix)
1167
vf = self._get_vf(path)
1168
yield prefix, suffixes, vf
1170
def get_sha1s(self, keys):
1171
"""See VersionedFiles.get_sha1s()."""
1173
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1174
vf_sha1s = vf.get_sha1s(suffixes)
1175
for suffix, sha1 in vf_sha1s.iteritems():
1176
sha1s[prefix + (suffix,)] = sha1
1179
def insert_record_stream(self, stream):
1180
"""Insert a record stream into this container.
1182
:param stream: A stream of records to insert.
1184
:seealso VersionedFile.get_record_stream:
1186
for record in stream:
1187
prefix = record.key[:-1]
1188
key = record.key[-1:]
1189
if record.parents is not None:
1190
parents = [parent[-1:] for parent in record.parents]
1193
thunk_record = AdapterFactory(key, parents, record)
1194
path = self._mapper.map(prefix)
1195
# Note that this parses the file many times; we can do better but
1196
# as this only impacts weaves in terms of performance, it is
1198
vf = self._get_vf(path)
1199
vf.insert_record_stream([thunk_record])
1201
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1202
"""Iterate over the lines in the versioned files from keys.
1204
This may return lines from other keys. Each item the returned
1205
iterator yields is a tuple of a line and a text version that that line
1206
is present in (not introduced in).
1208
Ordering of results is in whatever order is most suitable for the
1209
underlying storage format.
1211
If a progress bar is supplied, it may be used to indicate progress.
1212
The caller is responsible for cleaning up progress bars (because this
1216
* Lines are normalised by the underlying store: they will all have \n
1218
* Lines are returned in arbitrary order.
1220
:return: An iterator over (line, key).
1222
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1223
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1224
yield line, prefix + (version,)
1226
def _iter_all_components(self):
1227
for path, prefix in self._get_all_prefixes():
1228
yield prefix, self._get_vf(path)
1231
"""See VersionedFiles.keys()."""
1233
for prefix, vf in self._iter_all_components():
1234
for suffix in vf.versions():
1235
result.add(prefix + (suffix,))
1239
class _PlanMergeVersionedFile(VersionedFiles):
1240
"""A VersionedFile for uncommitted and committed texts.
1242
It is intended to allow merges to be planned with working tree texts.
1243
It implements only the small part of the VersionedFiles interface used by
1244
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1245
_PlanMergeVersionedFile itself.
1247
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1248
queried for missing texts.
1251
def __init__(self, file_id):
1252
"""Create a _PlanMergeVersionedFile.
1254
:param file_id: Used with _PlanMerge code which is not yet fully
1255
tuple-keyspace aware.
1257
self._file_id = file_id
1258
# fallback locations
1259
self.fallback_versionedfiles = []
1260
# Parents for locally held keys.
1262
# line data for locally held keys.
1264
# key lookup providers
1265
self._providers = [DictParentsProvider(self._parents)]
1267
def plan_merge(self, ver_a, ver_b, base=None):
1268
"""See VersionedFile.plan_merge"""
1269
from bzrlib.merge import _PlanMerge
1271
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1272
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1273
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1274
return _PlanMerge._subtract_plans(old_plan, new_plan)
1276
def plan_lca_merge(self, ver_a, ver_b, base=None):
1277
from bzrlib.merge import _PlanLCAMerge
1279
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1282
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1283
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1285
def add_lines(self, key, parents, lines):
1286
"""See VersionedFiles.add_lines
1288
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1289
are permitted. Only reserved ids are permitted.
1291
if type(key) is not tuple:
1292
raise TypeError(key)
1293
if not revision.is_reserved_id(key[-1]):
1294
raise ValueError('Only reserved ids may be used')
1296
raise ValueError('Parents may not be None')
1298
raise ValueError('Lines may not be None')
1299
self._parents[key] = tuple(parents)
1300
self._lines[key] = lines
1302
def get_record_stream(self, keys, ordering, include_delta_closure):
1305
if key in self._lines:
1306
lines = self._lines[key]
1307
parents = self._parents[key]
1309
yield ChunkedContentFactory(key, parents, None, lines)
1310
for versionedfile in self.fallback_versionedfiles:
1311
for record in versionedfile.get_record_stream(
1312
pending, 'unordered', True):
1313
if record.storage_kind == 'absent':
1316
pending.remove(record.key)
1320
# report absent entries
1322
yield AbsentContentFactory(key)
1324
def get_parent_map(self, keys):
1325
"""See VersionedFiles.get_parent_map"""
1326
# We create a new provider because a fallback may have been added.
1327
# If we make fallbacks private we can update a stack list and avoid
1328
# object creation thrashing.
1331
if revision.NULL_REVISION in keys:
1332
keys.remove(revision.NULL_REVISION)
1333
result[revision.NULL_REVISION] = ()
1334
self._providers = self._providers[:1] + self.fallback_versionedfiles
1336
_StackedParentsProvider(self._providers).get_parent_map(keys))
1337
for key, parents in result.iteritems():
1339
result[key] = (revision.NULL_REVISION,)
1343
class PlanWeaveMerge(TextMerge):
1344
"""Weave merge that takes a plan as its input.
1346
This exists so that VersionedFile.plan_merge is implementable.
1347
Most callers will want to use WeaveMerge instead.
1350
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1351
b_marker=TextMerge.B_MARKER):
1352
TextMerge.__init__(self, a_marker, b_marker)
1355
def _merge_struct(self):
1360
def outstanding_struct():
1361
if not lines_a and not lines_b:
1363
elif ch_a and not ch_b:
1366
elif ch_b and not ch_a:
1368
elif lines_a == lines_b:
1371
yield (lines_a, lines_b)
1373
# We previously considered either 'unchanged' or 'killed-both' lines
1374
# to be possible places to resynchronize. However, assuming agreement
1375
# on killed-both lines may be too aggressive. -- mbp 20060324
1376
for state, line in self.plan:
1377
if state == 'unchanged':
1378
# resync and flush queued conflicts changes if any
1379
for struct in outstanding_struct():
1385
if state == 'unchanged':
1388
elif state == 'killed-a':
1390
lines_b.append(line)
1391
elif state == 'killed-b':
1393
lines_a.append(line)
1394
elif state == 'new-a':
1396
lines_a.append(line)
1397
elif state == 'new-b':
1399
lines_b.append(line)
1400
elif state == 'conflicted-a':
1402
lines_a.append(line)
1403
elif state == 'conflicted-b':
1405
lines_b.append(line)
1407
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1408
'killed-base', 'killed-both'):
1409
raise AssertionError(state)
1410
for struct in outstanding_struct():
1414
class WeaveMerge(PlanWeaveMerge):
1415
"""Weave merge that takes a VersionedFile and two versions as its input."""
1417
def __init__(self, versionedfile, ver_a, ver_b,
1418
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1419
plan = versionedfile.plan_merge(ver_a, ver_b)
1420
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1423
class VirtualVersionedFiles(VersionedFiles):
1424
"""Dummy implementation for VersionedFiles that uses other functions for
1425
obtaining fulltexts and parent maps.
1427
This is always on the bottom of the stack and uses string keys
1428
(rather than tuples) internally.
1431
def __init__(self, get_parent_map, get_lines):
1432
"""Create a VirtualVersionedFiles.
1434
:param get_parent_map: Same signature as Repository.get_parent_map.
1435
:param get_lines: Should return lines for specified key or None if
1438
super(VirtualVersionedFiles, self).__init__()
1439
self._get_parent_map = get_parent_map
1440
self._get_lines = get_lines
1442
def check(self, progressbar=None):
1443
"""See VersionedFiles.check.
1445
:note: Always returns True for VirtualVersionedFiles.
1449
def add_mpdiffs(self, records):
1450
"""See VersionedFiles.mpdiffs.
1452
:note: Not implemented for VirtualVersionedFiles.
1454
raise NotImplementedError(self.add_mpdiffs)
1456
def get_parent_map(self, keys):
1457
"""See VersionedFiles.get_parent_map."""
1458
return dict([((k,), tuple([(p,) for p in v]))
1459
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1461
def get_sha1s(self, keys):
1462
"""See VersionedFiles.get_sha1s."""
1465
lines = self._get_lines(k)
1466
if lines is not None:
1467
if not isinstance(lines, list):
1468
raise AssertionError
1469
ret[(k,)] = osutils.sha_strings(lines)
1472
def get_record_stream(self, keys, ordering, include_delta_closure):
1473
"""See VersionedFiles.get_record_stream."""
1474
for (k,) in list(keys):
1475
lines = self._get_lines(k)
1476
if lines is not None:
1477
if not isinstance(lines, list):
1478
raise AssertionError
1479
yield ChunkedContentFactory((k,), None,
1480
sha1=osutils.sha_strings(lines),
1483
yield AbsentContentFactory((k,))
1485
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1486
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1487
for i, (key,) in enumerate(keys):
1489
pb.update("Finding changed lines", i, len(keys))
1490
for l in self._get_lines(key):
1494
def network_bytes_to_kind_and_offset(network_bytes):
1495
"""Strip of a record kind from the front of network_bytes.
1497
:param network_bytes: The bytes of a record.
1498
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1500
line_end = network_bytes.find('\n')
1501
storage_kind = network_bytes[:line_end]
1502
return storage_kind, line_end + 1
1505
class NetworkRecordStream(object):
1506
"""A record_stream which reconstitures a serialised stream."""
1508
def __init__(self, bytes_iterator):
1509
"""Create a NetworkRecordStream.
1511
:param bytes_iterator: An iterator of bytes. Each item in this
1512
iterator should have been obtained from a record_streams'
1513
record.get_bytes_as(record.storage_kind) call.
1515
self._bytes_iterator = bytes_iterator
1516
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1517
'knit-delta-gz':knit.knit_network_to_record,
1518
'knit-annotated-ft-gz':knit.knit_network_to_record,
1519
'knit-annotated-delta-gz':knit.knit_network_to_record,
1520
'knit-delta-closure':knit.knit_delta_closure_to_records,
1521
'fulltext':fulltext_network_to_record,
1522
'groupcompress-block':groupcompress.network_block_to_records,
1528
:return: An iterator as per VersionedFiles.get_record_stream().
1530
for bytes in self._bytes_iterator:
1531
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1532
for record in self._kind_factory[storage_kind](
1533
storage_kind, bytes, line_end):
1537
def fulltext_network_to_record(kind, bytes, line_end):
1538
"""Convert a network fulltext record to record."""
1539
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1540
record_meta = bytes[line_end+4:line_end+4+meta_len]
1541
key, parents = bencode.bdecode_as_tuple(record_meta)
1542
if parents == 'nil':
1544
fulltext = bytes[line_end+4+meta_len:]
1545
return [FulltextContentFactory(key, parents, None, fulltext)]
1548
def _length_prefix(bytes):
1549
return struct.pack('!L', len(bytes))
1552
def record_to_fulltext_bytes(record):
1553
if record.parents is None:
1556
parents = record.parents
1557
record_meta = bencode.bencode((record.key, parents))
1558
record_content = record.get_bytes_as('fulltext')
1559
return "fulltext\n%s%s%s" % (
1560
_length_prefix(record_meta), record_meta, record_content)
1563
def sort_groupcompress(parent_map):
1564
"""Sort and group the keys in parent_map into groupcompress order.
1566
groupcompress is defined (currently) as reverse-topological order, grouped
1569
:return: A sorted-list of keys
1571
# gc-optimal ordering is approximately reverse topological,
1572
# properly grouped by file-id.
1574
for item in parent_map.iteritems():
1576
if isinstance(key, str) or len(key) == 1:
1581
per_prefix_map[prefix].append(item)
1583
per_prefix_map[prefix] = [item]
1586
for prefix in sorted(per_prefix_map):
1587
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))