37
from bzrlib.graph import Graph
38
33
from bzrlib.transport.memory import MemoryTransport
36
from cStringIO import StringIO
40
38
from bzrlib.inter import InterObject
41
from bzrlib.registry import Registry
42
from bzrlib.symbol_versioning import *
43
39
from bzrlib.textmerge import TextMerge
46
adapter_registry = Registry()
47
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
48
'DeltaPlainToFullText')
49
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
51
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
52
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
53
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
54
'bzrlib.knit', 'DeltaAnnotatedToFullText')
55
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
56
'bzrlib.knit', 'FTAnnotatedToUnannotated')
57
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
58
'bzrlib.knit', 'FTAnnotatedToFullText')
61
class ContentFactory(object):
62
"""Abstract interface for insertion and retrieval from a VersionedFile.
64
:ivar sha1: None, or the sha1 of the content fulltext.
65
:ivar storage_kind: The native storage kind of this factory. One of
66
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
67
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
68
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
69
:ivar key: The key of this content. Each key is a tuple with a single
71
:ivar parents: A tuple of parent keys for self.key. If the object has
72
no parent information, None (as opposed to () for an empty list of
77
"""Create a ContentFactory."""
79
self.storage_kind = None
84
class AbsentContentFactory(object):
85
"""A placeholder content factory for unavailable texts.
88
:ivar storage_kind: 'absent'.
89
:ivar key: The key of this content. Each key is a tuple with a single
94
def __init__(self, key):
95
"""Create a ContentFactory."""
97
self.storage_kind = 'absent'
102
def filter_absent(record_stream):
103
"""Adapt a record stream to remove absent records."""
104
for record in record_stream:
105
if record.storage_kind != 'absent':
109
42
class VersionedFile(object):
110
43
"""Versioned text file storage.
128
65
"""Copy this versioned file to name on transport."""
129
66
raise NotImplementedError(self.copy_to)
131
def get_record_stream(self, versions, ordering, include_delta_closure):
132
"""Get a stream of records for versions.
69
"""Return a unsorted list of versions."""
70
raise NotImplementedError(self.versions)
134
:param versions: The versions to include. Each version is a tuple
136
:param ordering: Either 'unordered' or 'topological'. A topologically
137
sorted stream has compression parents strictly before their
139
:param include_delta_closure: If True then the closure across any
140
compression parents will be included (in the opaque data).
141
:return: An iterator of ContentFactory objects, each of which is only
142
valid until the iterator is advanced.
144
raise NotImplementedError(self.get_record_stream)
72
def has_ghost(self, version_id):
73
"""Returns whether version is present as a ghost."""
74
raise NotImplementedError(self.has_ghost)
146
76
def has_version(self, version_id):
147
77
"""Returns whether version is present."""
148
78
raise NotImplementedError(self.has_version)
150
def insert_record_stream(self, stream):
151
"""Insert a record stream into this versioned file.
153
:param stream: A stream of records to insert.
155
:seealso VersionedFile.get_record_stream:
157
raise NotImplementedError
159
80
def add_lines(self, version_id, parents, lines, parent_texts=None,
160
81
left_matching_blocks=None, nostore_sha=None, random_id=False,
161
82
check_content=True):
208
129
def add_lines_with_ghosts(self, version_id, parents, lines,
209
130
parent_texts=None, nostore_sha=None, random_id=False,
210
check_content=True, left_matching_blocks=None):
211
132
"""Add lines to the versioned file, allowing ghosts to be present.
213
134
This takes the same parameters as add_lines and returns the same.
215
136
self._check_write_ok()
216
137
return self._add_lines_with_ghosts(version_id, parents, lines,
217
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
138
parent_texts, nostore_sha, random_id, check_content)
219
140
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
220
nostore_sha, random_id, check_content, left_matching_blocks):
141
nostore_sha, random_id, check_content):
221
142
"""Helper to do class specific add_lines_with_ghosts."""
222
143
raise NotImplementedError(self.add_lines_with_ghosts)
237
158
if '\n' in line[:-1]:
238
159
raise errors.BzrBadParameterContainsNewline("lines")
161
def _check_write_ok(self):
162
"""Is the versioned file marked as 'finished' ? Raise if it is."""
164
raise errors.OutSideTransaction()
165
if self._access_mode != 'w':
166
raise errors.ReadOnlyObjectDirtiedError(self)
168
def enable_cache(self):
169
"""Tell this versioned file that it should cache any data it reads.
171
This is advisory, implementations do not have to support caching.
175
def clear_cache(self):
176
"""Remove any data cached in the versioned file object.
178
This only needs to be supported if caches are supported
182
def clone_text(self, new_version_id, old_version_id, parents):
183
"""Add an identical text to old_version_id as new_version_id.
185
Must raise RevisionNotPresent if the old version or any of the
186
parents are not present in file history.
188
Must raise RevisionAlreadyPresent if the new version is
189
already present in file history."""
190
self._check_write_ok()
191
return self._clone_text(new_version_id, old_version_id, parents)
193
def _clone_text(self, new_version_id, old_version_id, parents):
194
"""Helper function to do the _clone_text work."""
195
raise NotImplementedError(self.clone_text)
197
def create_empty(self, name, transport, mode=None):
198
"""Create a new versioned file of this exact type.
200
:param name: the file name
201
:param transport: the transport
202
:param mode: optional file mode.
204
raise NotImplementedError(self.create_empty)
240
206
def get_format_signature(self):
241
207
"""Get a text description of the data encoding in this file.
247
213
def make_mpdiffs(self, version_ids):
248
214
"""Create multiparent diffs for specified versions."""
249
215
knit_versions = set()
250
knit_versions.update(version_ids)
251
parent_map = self.get_parent_map(version_ids)
252
216
for version_id in version_ids:
254
knit_versions.update(parent_map[version_id])
256
raise RevisionNotPresent(version_id, self)
257
# We need to filter out ghosts, because we can't diff against them.
258
knit_versions = set(self.get_parent_map(knit_versions).keys())
217
knit_versions.add(version_id)
218
knit_versions.update(self.get_parents(version_id))
259
219
lines = dict(zip(knit_versions,
260
220
self._get_lf_split_line_list(knit_versions)))
262
222
for version_id in version_ids:
263
223
target = lines[version_id]
265
parents = [lines[p] for p in parent_map[version_id] if p in
268
raise RevisionNotPresent(version_id, self)
224
parents = [lines[p] for p in self.get_parents(version_id)]
269
225
if len(parents) > 0:
270
226
left_parent_blocks = self._extract_blocks(version_id,
271
227
parents[0], target)
295
251
for version, parent_ids, expected_sha1, mpdiff in records:
296
252
needed_parents.update(p for p in parent_ids
297
253
if not mpvf.has_version(p))
298
present_parents = set(self.get_parent_map(needed_parents).keys())
299
for parent_id, lines in zip(present_parents,
300
self._get_lf_split_line_list(present_parents)):
254
for parent_id, lines in zip(needed_parents,
255
self._get_lf_split_line_list(needed_parents)):
301
256
mpvf.add_version(lines, parent_id, [])
302
257
for (version, parent_ids, expected_sha1, mpdiff), lines in\
303
258
zip(records, mpvf.get_line_list(versions)):
306
261
mpvf.get_diff(parent_ids[0]).num_lines()))
308
263
left_matching_blocks = None
310
_, _, version_text = self.add_lines_with_ghosts(version,
311
parent_ids, lines, vf_parents,
312
left_matching_blocks=left_matching_blocks)
313
except NotImplementedError:
314
# The vf can't handle ghosts, so add lines normally, which will
315
# (reasonably) fail if there are ghosts in the data.
316
_, _, version_text = self.add_lines(version,
317
parent_ids, lines, vf_parents,
318
left_matching_blocks=left_matching_blocks)
264
_, _, version_text = self.add_lines(version, parent_ids, lines,
265
vf_parents, left_matching_blocks=left_matching_blocks)
319
266
vf_parents[version] = version_text
320
267
for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
321
268
zip(records, self.get_sha1s(versions)):
322
269
if expected_sha1 != sha1:
323
270
raise errors.VersionedFileInvalidChecksum(version)
272
def get_sha1(self, version_id):
273
"""Get the stored sha1 sum for the given revision.
275
:param version_id: The name of the version to lookup
277
raise NotImplementedError(self.get_sha1)
325
279
def get_sha1s(self, version_ids):
326
280
"""Get the stored sha1 sums for the given revisions.
382
340
but are not explicitly marked.
384
342
raise NotImplementedError(self.get_ancestry_with_ghosts)
386
def get_parent_map(self, version_ids):
387
"""Get a map of the parents of version_ids.
389
:param version_ids: The version ids to look up parents for.
390
:return: A mapping from version id to parents.
392
raise NotImplementedError(self.get_parent_map)
344
def get_graph(self, version_ids=None):
345
"""Return a graph from the versioned file.
347
Ghosts are not listed or referenced in the graph.
348
:param version_ids: Versions to select.
349
None means retrieve all versions.
351
if version_ids is None:
352
return dict(self.iter_parents(self.versions()))
354
pending = set(version_ids)
356
this_iteration = pending
358
for version, parents in self.iter_parents(this_iteration):
359
result[version] = parents
360
for parent in parents:
366
def get_graph_with_ghosts(self):
367
"""Return a graph for the entire versioned file.
369
Ghosts are referenced in parents list but are not
372
raise NotImplementedError(self.get_graph_with_ghosts)
374
def get_parents(self, version_id):
375
"""Return version names for parents of a version.
377
Must raise RevisionNotPresent if version is not present in
380
raise NotImplementedError(self.get_parents)
394
382
def get_parents_with_ghosts(self, version_id):
395
383
"""Return version names for parents of version_id.
400
388
Ghosts that are known about will be included in the parent list,
401
389
but are not explicitly marked.
404
return list(self.get_parent_map([version_id])[version_id])
406
raise errors.RevisionNotPresent(version_id, self)
391
raise NotImplementedError(self.get_parents_with_ghosts)
393
def annotate_iter(self, version_id):
394
"""Yield list of (version-id, line) pairs for the specified
397
Must raise RevisionNotPresent if the given version is
398
not present in file history.
400
raise NotImplementedError(self.annotate_iter)
408
402
def annotate(self, version_id):
409
"""Return a list of (version-id, line) tuples for version_id.
411
:raise RevisionNotPresent: If the given version is
412
not present in file history.
414
raise NotImplementedError(self.annotate)
416
@deprecated_method(one_five)
403
return list(self.annotate_iter(version_id))
417
405
def join(self, other, pb=None, msg=None, version_ids=None,
418
406
ignore_missing=False):
419
407
"""Integrate versions from other into this versioned file.
455
443
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
445
def iter_parents(self, version_ids):
446
"""Iterate through the parents for many version ids.
448
:param version_ids: An iterable yielding version_ids.
449
:return: An iterator that yields (version_id, parents). Requested
450
version_ids not present in the versioned file are simply skipped.
451
The order is undefined, allowing for different optimisations in
452
the underlying implementation.
454
for version_id in version_ids:
456
yield version_id, tuple(self.get_parents(version_id))
457
except errors.RevisionNotPresent:
460
def transaction_finished(self):
461
"""The transaction that this file was opened in has finished.
463
This records self.finished = True and should cause all mutating
457
468
def plan_merge(self, ver_a, ver_b):
458
469
"""Return pseudo-annotation indicating how the two versions merge.
481
492
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
484
class RecordingVersionedFileDecorator(object):
485
"""A minimal versioned file that records calls made on it.
487
Only enough methods have been added to support tests using it to date.
489
:ivar calls: A list of the calls made; can be reset at any time by
493
def __init__(self, backing_vf):
494
"""Create a RecordingVersionedFileDecorator decorating backing_vf.
496
:param backing_vf: The versioned file to answer all methods.
498
self._backing_vf = backing_vf
501
def get_lines(self, version_ids):
502
self.calls.append(("get_lines", version_ids))
503
return self._backing_vf.get_lines(version_ids)
506
495
class _PlanMergeVersionedFile(object):
507
496
"""A VersionedFile for uncommitted and committed texts.
601
590
ancestry.update(self.get_ancestry(parent, topo_sorted=False))
604
def get_parent_map(self, version_ids):
605
"""See VersionedFile.get_parent_map"""
607
pending = set(version_ids)
608
for key in version_ids:
593
def get_parents(self, version_id):
594
"""See VersionedFile.get_parents"""
595
parents = self._parents.get(version_id)
596
if parents is not None:
598
for versionedfile in self.fallback_versionedfiles:
610
result[key] = self._parents[key]
613
pending = pending - set(result.keys())
614
for versionedfile in self.fallback_versionedfiles:
615
parents = versionedfile.get_parent_map(pending)
616
result.update(parents)
617
pending = pending - set(parents.keys())
600
return versionedfile.get_parents(version_id)
601
except errors.RevisionNotPresent:
604
raise errors.RevisionNotPresent(version_id, self._file_id)
622
606
def _get_graph(self):
623
607
from bzrlib.graph import (
736
720
are not present in the other file's history unless ignore_missing is
737
721
supplied in which case they are silently skipped.
724
# - if the target is empty, just add all the versions from
725
# source to target, otherwise:
726
# - make a temporary versioned file of type target
727
# - insert the source content into it one at a time
729
if not self.target.versions():
732
# Make a new target-format versioned file.
733
temp_source = self.target.create_empty("temp", MemoryTransport())
740
735
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
741
graph = Graph(self.source)
742
search = graph._make_breadth_first_searcher(version_ids)
743
transitive_ids = set()
744
map(transitive_ids.update, list(search))
745
parent_map = self.source.get_parent_map(transitive_ids)
746
order = tsort.topo_sort(parent_map.items())
736
graph = self.source.get_graph(version_ids)
737
order = tsort.topo_sort(graph.items())
747
738
pb = ui.ui_factory.nested_progress_bar()
748
739
parent_texts = {}
763
754
total = len(order)
764
755
for index, version in enumerate(order):
765
756
pb.update('Converting versioned data', index, total)
766
if version in target:
768
757
_, _, parent_text = target.add_lines(version,
758
self.source.get_parents(version),
770
759
self.source.get_lines(version),
771
760
parent_texts=parent_texts)
772
761
parent_texts[version] = parent_text
763
# this should hit the native code path for target
764
if target is not self.target:
765
return self.target.join(temp_source,
801
799
new_version_ids.add(version)
802
800
return new_version_ids
805
class KeyMapper(object):
806
"""KeyMappers map between keys and underlying paritioned storage."""
809
"""Map key to an underlying storage identifier.
811
:param key: A key tuple e.g. ('file-id', 'revision-id').
812
:return: An underlying storage identifier, specific to the partitioning
816
def unmap(self, partition_id):
817
"""Map a partitioned storage id back to a key prefix.
819
:param partition_id: The underlying partition id.
820
:return: As much of a key (or prefix) as is derivable from the parition
825
class ConstantMapper(KeyMapper):
826
"""A key mapper that maps to a constant result."""
828
def __init__(self, result):
829
"""Create a ConstantMapper which will return result for all maps."""
830
self._result = result
833
"""See KeyMapper.map()."""
837
class PrefixMapper(KeyMapper):
838
"""A key mapper that extracts the first component of a key."""
841
"""See KeyMapper.map()."""
844
def unmap(self, partition_id):
845
"""See KeyMapper.unmap()."""
846
return (partition_id,)
849
class HashPrefixMapper(KeyMapper):
850
"""A key mapper that combines the first component of a key with a hash."""
853
"""See KeyMapper.map()."""
854
prefix = self._escape(key[0])
855
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
857
def _escape(self, prefix):
858
"""No escaping needed here."""
861
def unmap(self, partition_id):
862
"""See KeyMapper.unmap()."""
863
return (self._unescape(osutils.basename(partition_id)),)
865
def _unescape(self, basename):
866
"""No unescaping needed for HashPrefixMapper."""
870
class HashEscapedPrefixMapper(HashPrefixMapper):
871
"""Combines the escaped first component of a key with a hash."""
873
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
875
def _escape(self, prefix):
876
"""Turn a key element into a filesystem safe string.
878
This is similar to a plain urllib.quote, except
879
it uses specific safe characters, so that it doesn't
880
have to translate a lot of valid file ids.
882
# @ does not get escaped. This is because it is a valid
883
# filesystem character we use all the time, and it looks
884
# a lot better than seeing %40 all the time.
885
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
889
def _unescape(self, basename):
890
"""Escaped names are unescaped by urlutils."""
891
return urllib.unquote(basename)