133
133
class KnitContent(object):
134
134
"""Content of a knit version to which deltas can be applied."""
136
def __init__(self, lines):
139
def annotate_iter(self):
140
"""Yield tuples of (origin, text) for each content line."""
141
return iter(self._lines)
143
136
def annotate(self):
144
137
"""Return a list of (origin, text) tuples."""
145
138
return list(self.annotate_iter())
158
151
def line_delta(self, new_lines):
159
152
return list(self.line_delta_iter(new_lines))
162
return [text for origin, text in self._lines]
165
return KnitContent(self._lines[:])
168
155
def get_line_delta_blocks(knit_delta, source, target):
169
156
"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""
191
178
yield s_pos + (target_len - t_pos), target_len, 0
194
class _KnitFactory(object):
195
"""Base factory for creating content objects."""
197
def make(self, lines, version_id):
198
num_lines = len(lines)
199
return KnitContent(zip([version_id] * num_lines, lines))
202
class KnitAnnotateFactory(_KnitFactory):
181
class AnnotatedKnitContent(KnitContent):
182
"""Annotated content."""
184
def __init__(self, lines):
187
def annotate_iter(self):
188
"""Yield tuples of (origin, text) for each content line."""
189
return iter(self._lines)
191
def strip_last_line_newline(self):
192
line = self._lines[-1][1].rstrip('\n')
193
self._lines[-1] = (self._lines[-1][0], line)
196
return [text for origin, text in self._lines]
199
return AnnotatedKnitContent(self._lines[:])
202
class PlainKnitContent(KnitContent):
203
"""Unannotated content.
205
When annotate[_iter] is called on this content, the same version is reported
206
for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent
210
def __init__(self, lines, version_id):
212
self._version_id = version_id
214
def annotate_iter(self):
215
"""Yield tuples of (origin, text) for each content line."""
216
for line in self._lines:
217
yield self._version_id, line
220
return PlainKnitContent(self._lines[:], self._version_id)
222
def strip_last_line_newline(self):
223
self._lines[-1] = self._lines[-1].rstrip('\n')
229
class KnitAnnotateFactory(object):
203
230
"""Factory for creating annotated Content objects."""
234
def make(self, lines, version_id):
235
num_lines = len(lines)
236
return AnnotatedKnitContent(zip([version_id] * num_lines, lines))
207
238
def parse_fulltext(self, content, version_id):
208
239
"""Convert fulltext to internal representation
217
248
# Figure out a way to not require the overhead of turning the
218
249
# list back into tuples.
219
250
lines = [tuple(line.split(' ', 1)) for line in content]
220
return KnitContent(lines)
251
return AnnotatedKnitContent(lines)
222
253
def parse_line_delta_iter(self, lines):
223
254
return iter(self.parse_line_delta(lines))
295
326
return content.annotate_iter()
298
class KnitPlainFactory(_KnitFactory):
329
class KnitPlainFactory(object):
299
330
"""Factory for creating plain Content objects."""
301
332
annotated = False
334
def make(self, lines, version_id):
335
return PlainKnitContent(lines, version_id)
303
337
def parse_fulltext(self, content, version_id):
304
338
"""This parses an unannotated fulltext.
315
349
header = lines[cur]
317
351
start, end, c = [int(n) for n in header.split(',')]
318
yield start, end, c, zip([version_id] * c, lines[cur:cur+c])
352
yield start, end, c, lines[cur:cur+c]
321
355
def parse_line_delta(self, lines, version_id):
347
381
for start, end, c, lines in delta:
348
382
out.append('%d,%d,%d\n' % (start, end, c))
349
out.extend([text for origin, text in lines])
352
386
def annotate_iter(self, knit, version_id):
453
487
return fulltext_size > delta_size
455
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
456
"""See VersionedFile._add_delta()."""
457
self._check_add(version_id, []) # should we check the lines ?
458
self._check_versions_present(parents)
462
for parent in parents:
463
if not self.has_version(parent):
464
ghosts.append(parent)
466
present_parents.append(parent)
468
if delta_parent is None:
469
# reconstitute as full text.
470
assert len(delta) == 1 or len(delta) == 0
472
assert delta[0][0] == 0
473
assert delta[0][1] == 0, delta[0][1]
474
return super(KnitVersionedFile, self)._add_delta(version_id,
485
options.append('no-eol')
487
if delta_parent is not None:
488
# determine the current delta chain length.
489
# To speed the extract of texts the delta chain is limited
490
# to a fixed number of deltas. This should minimize both
491
# I/O and the time spend applying deltas.
492
# The window was changed to a maximum of 200 deltas, but also added
493
# was a check that the total compressed size of the deltas is
494
# smaller than the compressed size of the fulltext.
495
if not self._check_should_delta([delta_parent]):
496
# We don't want a delta here, just do a normal insertion.
497
return super(KnitVersionedFile, self)._add_delta(version_id,
504
options.append('line-delta')
505
store_lines = self.factory.lower_line_delta(delta)
507
access_memo = self._data.add_record(version_id, digest, store_lines)
508
self._index.add_version(version_id, options, access_memo, parents)
510
489
def _add_raw_records(self, records, data):
511
490
"""Add all the records 'records' with data pre-joined in 'data'.
641
620
"""Get a delta for constructing version from some other version."""
642
621
version_id = osutils.safe_revision_id(version_id)
643
622
self.check_not_reserved_id(version_id)
644
if not self.has_version(version_id):
645
raise RevisionNotPresent(version_id, self.filename)
647
623
parents = self.get_parents(version_id)
649
625
parent = parents[0]
841
817
def _get_content(self, version_id, parent_texts={}):
842
818
"""Returns a content object that makes up the specified
844
if not self.has_version(version_id):
845
raise RevisionNotPresent(version_id, self.filename)
847
820
cached_version = parent_texts.get(version_id, None)
848
821
if cached_version is not None:
822
if not self.has_version(version_id):
823
raise RevisionNotPresent(version_id, self.filename)
849
824
return cached_version
851
826
text_map, contents_map = self._get_content_maps([version_id])
855
830
"""Check that all specified versions are present."""
856
831
self._index.check_versions_present(version_ids)
858
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):
833
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
859
835
"""See VersionedFile.add_lines_with_ghosts()."""
860
836
self._check_add(version_id, lines)
861
return self._add(version_id, lines[:], parents, self.delta, parent_texts)
837
return self._add(version_id, lines[:], parents, self.delta,
838
parent_texts, None, nostore_sha)
863
840
def _add_lines(self, version_id, parents, lines, parent_texts,
864
left_matching_blocks=None):
841
left_matching_blocks, nostore_sha):
865
842
"""See VersionedFile.add_lines."""
866
843
self._check_add(version_id, lines)
867
844
self._check_versions_present(parents)
868
845
return self._add(version_id, lines[:], parents, self.delta,
869
parent_texts, left_matching_blocks)
846
parent_texts, left_matching_blocks, nostore_sha)
871
848
def _check_add(self, version_id, lines):
872
849
"""check that version_id and lines are safe to add."""
875
852
if contains_whitespace(version_id):
876
853
raise InvalidRevisionId(version_id, self.filename)
877
854
self.check_not_reserved_id(version_id)
855
# Technically this is a case of Look Before You Leap, but:
856
# - for knits this saves wasted space in the error case
857
# - for packs this avoids dead space in the pack
858
# - it also avoids undetected poisoning attacks.
859
# - its 1.5% of total commit time, so ignore it unless it becomes a
878
861
if self.has_version(version_id):
879
862
raise RevisionAlreadyPresent(version_id, self.filename)
880
863
self._check_lines_not_unicode(lines)
881
864
self._check_lines_are_lines(lines)
883
866
def _add(self, version_id, lines, parents, delta, parent_texts,
884
left_matching_blocks=None):
867
left_matching_blocks, nostore_sha):
885
868
"""Add a set of lines on top of version specified by parents.
887
870
If delta is true, compress the text as a line-delta against
929
914
delta = self._check_should_delta(present_parents)
931
916
assert isinstance(version_id, str)
932
lines = self.factory.make(lines, version_id)
917
content = self.factory.make(lines, version_id)
933
918
if delta or (self.factory.annotated and len(present_parents) > 0):
934
919
# Merge annotations from parent texts if so is needed.
935
delta_hunks = self._merge_annotations(lines, present_parents,
920
delta_hunks = self._merge_annotations(content, present_parents,
936
921
parent_texts, delta, self.factory.annotated,
937
922
left_matching_blocks)
941
926
store_lines = self.factory.lower_line_delta(delta_hunks)
943
928
options.append('fulltext')
944
store_lines = self.factory.lower_fulltext(lines)
929
store_lines = self.factory.lower_fulltext(content)
946
931
access_memo = self._data.add_record(version_id, digest, store_lines)
947
932
self._index.add_version(version_id, options, access_memo, parents)
948
return digest, text_length, lines
933
return digest, text_length, content
950
935
def check(self, progress_bar=None):
951
936
"""See VersionedFile.check()."""
1035
1020
elif method == 'line-delta':
1036
1021
delta = self.factory.parse_line_delta(data, version_id)
1037
1022
content = content.copy()
1038
content._lines = self._apply_delta(content._lines,
1023
content._lines = self._apply_delta(content._lines,
1040
1025
content_map[component_id] = content
1042
1027
if 'no-eol' in self._index.get_options(version_id):
1043
1028
content = content.copy()
1044
line = content._lines[-1][1].rstrip('\n')
1045
content._lines[-1] = (content._lines[-1][0], line)
1029
content.strip_last_line_newline()
1046
1030
final_content[version_id] = content
1048
1032
# digest here is the digest from the last applied component.
1049
1033
text = content.text()
1050
1034
if sha_strings(text) != digest:
1051
raise KnitCorrupt(self.filename,
1035
raise KnitCorrupt(self.filename,
1052
1036
'sha-1 does not match %s' % version_id)
1054
text_map[version_id] = text
1055
return text_map, final_content
1038
text_map[version_id] = text
1039
return text_map, final_content
1057
1041
def iter_lines_added_or_present_in_versions(self, version_ids=None,