128
128
def line_delta_iter(self, new_lines):
129
129
"""Generate line-based delta from this content to new_lines."""
130
new_texts = [text for origin, text in new_lines._lines]
131
old_texts = [text for origin, text in self._lines]
130
new_texts = new_lines.text()
131
old_texts = self.text()
132
132
s = KnitSequenceMatcher(None, old_texts, new_texts)
133
for op in s.get_opcodes():
133
for tag, i1, i2, j1, j2 in s.get_opcodes():
136
# ofrom oto length data
137
yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])
136
# ofrom, oto, length, data
137
yield i1, i2, j2 - j1, new_lines._lines[j1:j2]
139
139
def line_delta(self, new_lines):
140
140
return list(self.line_delta_iter(new_lines))
308
308
self.writable = (access_mode == 'w')
309
309
self.delta = delta
311
self._max_delta_chain = 200
311
313
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
312
314
access_mode, create=create, file_mode=file_mode,
313
315
create_parent_dir=create_parent_dir, delay_create=delay_create,
321
323
return '%s(%s)' % (self.__class__.__name__,
322
324
self.transport.abspath(self.filename))
326
def _check_should_delta(self, first_parents):
327
"""Iterate back through the parent listing, looking for a fulltext.
329
This is used when we want to decide whether to add a delta or a new
330
fulltext. It searches for _max_delta_chain parents. When it finds a
331
fulltext parent, it sees if the total size of the deltas leading up to
332
it is large enough to indicate that we want a new full text anyway.
334
Return True if we should create a new delta, False if we should use a
339
delta_parents = first_parents
340
for count in xrange(self._max_delta_chain):
341
parent = delta_parents[0]
342
method = self._index.get_method(parent)
343
pos, size = self._index.get_position(parent)
344
if method == 'fulltext':
348
delta_parents = self._index.get_parents(parent)
350
# We couldn't find a fulltext, so we must create a new one
353
return fulltext_size > delta_size
324
355
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
325
356
"""See VersionedFile._add_delta()."""
326
357
self._check_add(version_id, []) # should we check the lines ?
358
389
# To speed the extract of texts the delta chain is limited
359
390
# to a fixed number of deltas. This should minimize both
360
391
# I/O and the time spend applying deltas.
362
delta_parents = [delta_parent]
364
parent = delta_parents[0]
365
method = self._index.get_method(parent)
366
if method == 'fulltext':
368
delta_parents = self._index.get_parents(parent)
370
if method == 'line-delta':
371
# did not find a fulltext in the delta limit.
372
# just do a normal insertion.
392
# The window was changed to a maximum of 200 deltas, but also added
393
# was a check that the total compressed size of the deltas is
394
# smaller than the compressed size of the fulltext.
395
if not self._check_should_delta([delta_parent]):
396
# We don't want a delta here, just do a normal insertion.
373
397
return super(KnitVersionedFile, self)._add_delta(version_id,
524
548
for parent_id in parents:
525
549
merge_content = self._get_content(parent_id, parent_texts)
526
seq = KnitSequenceMatcher(None, merge_content.text(), content.text())
550
seq = patiencediff.PatienceSequenceMatcher(
551
None, merge_content.text(), content.text())
527
552
if delta_seq is None:
528
553
# setup a delta seq to reuse.
540
565
reference_content = self._get_content(parents[0], parent_texts)
541
566
new_texts = content.text()
542
567
old_texts = reference_content.text()
543
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
568
delta_seq = patiencediff.PatienceSequenceMatcher(
569
None, old_texts, new_texts)
544
570
return self._make_line_delta(delta_seq, content)
546
572
def _make_line_delta(self, delta_seq, new_content):
667
693
# To speed the extract of texts the delta chain is limited
668
694
# to a fixed number of deltas. This should minimize both
669
695
# I/O and the time spend applying deltas.
671
delta_parents = present_parents
673
parent = delta_parents[0]
674
method = self._index.get_method(parent)
675
if method == 'fulltext':
677
delta_parents = self._index.get_parents(parent)
679
if method == 'line-delta':
696
delta = self._check_should_delta(present_parents)
682
698
lines = self.factory.make(lines, version_id)
683
699
if delta or (self.factory.annotated and len(present_parents) > 0):
824
840
data_pos, length = self._index.get_position(version_id)
825
841
version_id_records.append((version_id, data_pos, length))
828
843
total = len(version_id_records)
829
pb.update('Walking content.', count, total)
830
for version_id, data, sha_value in \
831
self._data.read_records_iter(version_id_records):
832
pb.update('Walking content.', count, total)
844
for version_idx, (version_id, data, sha_value) in \
845
enumerate(self._data.read_records_iter(version_id_records)):
846
pb.update('Walking content.', version_idx, total)
833
847
method = self._index.get_method(version_id)
834
848
version_idx = self._index.lookup(version_id)
835
849
assert method in ('fulltext', 'line-delta')
1256
1269
encode_utf8 = cache_utf8.encode
1257
for version_id, options, pos, size, parents in versions:
1258
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1262
self._version_list_to_index(parents))
1263
assert isinstance(line, str), \
1264
'content must be utf-8 encoded: %r' % (line,)
1266
if not self._need_to_create:
1267
self._transport.append_bytes(self._filename, ''.join(lines))
1270
sio.write(self.HEADER)
1271
sio.writelines(lines)
1273
self._transport.put_file_non_atomic(self._filename, sio,
1274
create_parent_dir=self._create_parent_dir,
1275
mode=self._file_mode,
1276
dir_mode=self._dir_mode)
1277
self._need_to_create = False
1279
# cache after writing, so that a failed write leads to missing cache
1280
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1281
# failure, reload the index or flush it or some such, to prevent
1282
# writing records that did complete twice.
1283
for version_id, options, pos, size, parents in versions:
1284
self._cache_version(version_id, options, pos, size, parents)
1270
orig_history = self._history[:]
1271
orig_cache = self._cache.copy()
1274
for version_id, options, pos, size, parents in versions:
1275
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1279
self._version_list_to_index(parents))
1280
assert isinstance(line, str), \
1281
'content must be utf-8 encoded: %r' % (line,)
1283
self._cache_version(version_id, options, pos, size, parents)
1284
if not self._need_to_create:
1285
self._transport.append_bytes(self._filename, ''.join(lines))
1288
sio.write(self.HEADER)
1289
sio.writelines(lines)
1291
self._transport.put_file_non_atomic(self._filename, sio,
1292
create_parent_dir=self._create_parent_dir,
1293
mode=self._file_mode,
1294
dir_mode=self._dir_mode)
1295
self._need_to_create = False
1297
# If any problems happen, restore the original values and re-raise
1298
self._history = orig_history
1299
self._cache = orig_cache
1286
1302
def has_version(self, version_id):
1287
1303
"""True if the version is in the index."""
1288
1304
return (version_id in self._cache)