512
516
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))
513
517
return diff_hunks
515
def _get_component_versions(self, version_id):
516
basis = self.basis_knit
523
if basis and basis._index.has_version(cursor):
525
basis_versions.append(cursor)
526
method = picked_knit._index.get_method(cursor)
527
needed_versions.append((method, cursor))
528
if method == 'fulltext':
530
cursor = picked_knit.get_parents(cursor)[0]
531
return needed_versions, basis_versions
533
def _get_component_positions(self, version_id):
534
needed_versions, basis_versions = \
535
self._get_component_versions(version_id)
536
assert len(basis_versions) == 0
538
for method, comp_id in needed_versions:
539
data_pos, data_size = self._index.get_position(comp_id)
540
positions.append((method, comp_id, data_pos, data_size))
543
def _get_components(self, version_id):
544
"""Return a list of (version_id, method, data) tuples that
545
makes up version specified by version_id of the knit.
547
The components should be applied in the order of the returned
550
The basis knit will be used to the largest extent possible
551
since it is assumed that accesses to it is faster.
519
def _get_components_positions(self, version_ids):
520
"""Produce a map of position data for the components of versions.
522
This data is intended to be used for retrieving the knit records.
524
A dict of version_id to (method, data_pos, data_size, next) is
526
method is the way referenced data should be applied.
527
data_pos is the position of the data in the knit.
528
data_size is the size of the data in the knit.
529
next is the build-parent of the version, or None for fulltexts.
554
# 4168 calls in 14912, 2289 internal
555
# 4168 in 9711 to read_records
556
# 52554 in 1250 to get_parents
557
# 170166 in 865 to list.append
559
# needed_revisions holds a list of (method, version_id) of
560
# versions that is needed to be fetched to construct the final
561
# version of the file.
563
# basis_revisions is a list of versions that needs to be
564
# fetched but exists in the basis knit.
566
needed_versions, basis_versions = \
567
self._get_component_versions(version_id)
571
assert False, "I am broken"
572
basis = self.basis_knit
574
for comp_id in basis_versions:
575
data_pos, data_size = basis._index.get_data_position(comp_id)
576
records.append((comp_id, data_pos, data_size))
577
components.update(basis._data.read_records(records))
580
for comp_id in [vid for method, vid in needed_versions
581
if vid not in basis_versions]:
582
data_pos, data_size = self._index.get_position(comp_id)
583
records.append((comp_id, data_pos, data_size))
584
components.update(self._data.read_records(records))
586
# get_data_records returns a mapping with the version id as
587
# index and the value as data. The order the components need
588
# to be applied is held by needed_versions (reversed).
590
for method, comp_id in reversed(needed_versions):
591
out.append((comp_id, method, components[comp_id]))
532
for version_id in version_ids:
535
while cursor is not None and cursor not in component_data:
537
method = picked_knit._index.get_method(cursor)
538
if method == 'fulltext':
541
next = picked_knit.get_parents(cursor)[0]
542
data_pos, data_size = self._index.get_position(cursor)
543
component_data[cursor] = (method, data_pos, data_size, next)
545
return component_data
595
547
def _get_content(self, version_id, parent_texts={}):
596
548
"""Returns a content object that makes up the specified
605
557
if self.basis_knit and version_id in self.basis_knit:
606
558
return self.basis_knit._get_content(version_id)
609
components = self._get_components(version_id)
610
for component_id, method, (data, digest) in components:
611
version_idx = self._index.lookup(component_id)
612
if method == 'fulltext':
613
assert content is None
614
content = self.factory.parse_fulltext(data, version_idx)
615
elif method == 'line-delta':
616
delta = self.factory.parse_line_delta(data, version_idx)
617
content._lines = self._apply_delta(content._lines, delta)
619
if 'no-eol' in self._index.get_options(version_id):
620
line = content._lines[-1][1].rstrip('\n')
621
content._lines[-1] = (content._lines[-1][0], line)
623
# digest here is the digest from the last applied component.
624
if sha_strings(content.text()) != digest:
625
raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)
560
text_map, contents_map = self._get_content_maps([version_id])
561
return contents_map[version_id]
629
563
def _check_versions_present(self, version_ids):
630
564
"""Check that all specified versions are present."""
743
677
"""See VersionedFile.get_lines()."""
744
678
return self.get_line_list([version_id])[0]
746
def _get_version_components(self, position_map):
748
for version_id, positions in position_map.iteritems():
749
for method, comp_id, position, size in positions:
750
records.append((comp_id, position, size))
751
record_map = self._data.read_records(records)
754
for version_id, positions in position_map.iteritems():
756
for method, comp_id, position, size in positions:
757
data, digest = record_map[comp_id]
758
components.append((comp_id, method, data, digest))
759
component_map[version_id] = components
680
def _get_record_map(self, version_ids):
681
"""Produce a dictionary of knit records.
683
The keys are version_ids, the values are tuples of (method, content,
685
method is the way the content should be applied.
686
content is a KnitContent object.
687
digest is the SHA1 digest of this version id after all steps are done
688
next is the build-parent of the version, i.e. the leftmost ancestor.
689
If the method is fulltext, next will be None.
691
position_map = self._get_components_positions(version_ids)
692
# c = component_id, m = method, p = position, s = size, n = next
693
records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]
695
for component_id, content, digest in\
696
self._data.read_records_iter(records):
697
method, position, size, next = position_map[component_id]
698
record_map[component_id] = method, content, digest, next
762
702
def get_text(self, version_id):
763
703
"""See VersionedFile.get_text"""
769
709
def get_line_list(self, version_ids):
770
710
"""Return the texts of listed versions as a list of strings."""
711
text_map, content_map = self._get_content_maps(version_ids)
712
return [text_map[v] for v in version_ids]
714
def _get_content_maps(self, version_ids):
715
"""Produce maps of text and KnitContents
717
:return: (text_map, content_map) where text_map contains the texts for
718
the requested versions and content_map contains the KnitContents.
719
Both dicts take version_ids as their keys.
772
721
for version_id in version_ids:
773
722
if not self.has_version(version_id):
774
723
raise RevisionNotPresent(version_id, self.filename)
775
position_map[version_id] = \
776
self._get_component_positions(version_id)
778
version_components = self._get_version_components(position_map).items()
724
record_map = self._get_record_map(version_ids)
781
for version_id, components in version_components:
729
for version_id in version_ids:
732
while cursor is not None:
733
method, data, digest, next = record_map[cursor]
734
components.append((cursor, method, data, digest))
735
if cursor in content_map:
783
740
for component_id, method, data, digest in reversed(components):
784
version_idx = self._index.lookup(component_id)
785
if method == 'fulltext':
786
assert content is None
787
content = self.factory.parse_fulltext(data, version_idx)
788
elif method == 'line-delta':
789
delta = self.factory.parse_line_delta(data, version_idx)
790
content._lines = self._apply_delta(content._lines, delta)
741
if component_id in content_map:
742
content = content_map[component_id]
744
version_idx = self._index.lookup(component_id)
745
if method == 'fulltext':
746
assert content is None
747
content = self.factory.parse_fulltext(data, version_idx)
748
elif method == 'line-delta':
749
delta = self.factory.parse_line_delta(data[:],
751
content = content.copy()
752
content._lines = self._apply_delta(content._lines,
754
content_map[component_id] = content
792
756
if 'no-eol' in self._index.get_options(version_id):
757
content = content.copy()
793
758
line = content._lines[-1][1].rstrip('\n')
794
759
content._lines[-1] = (content._lines[-1][0], line)
760
final_content[version_id] = content
796
762
# digest here is the digest from the last applied component.
797
if sha_strings(content.text()) != digest:
763
text = content.text()
764
if sha_strings(text) != digest:
798
765
raise KnitCorrupt(self.filename,
799
766
'sha-1 does not match %s' % version_id)
801
text_map[version_id] = content.text()
802
return [text_map[v] for v in version_ids]
768
text_map[version_id] = text
769
return text_map, final_content
804
771
def iter_lines_added_or_present_in_versions(self, version_ids=None):
805
772
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1444
1411
# 4168 calls to readv in 1411
1445
1412
# 4168 calls to parse_record in 2880
1414
needed_records = set()
1448
1415
for version_id, pos, size in records:
1449
1416
if version_id not in self._records:
1450
needed_records.append((version_id, pos, size))
1417
needed_records.add((version_id, pos, size))
1419
# turn our set into a list, sorted by file position
1420
needed_records = sorted(needed_records, key=operator.itemgetter(1))
1452
1422
if len(needed_records):
1453
needed_records.sort(key=operator.itemgetter(1))
1454
1423
# We take it that the transport optimizes the fetching as good
1455
1424
# as possible (ie, reads continuous ranges.)
1456
1425
response = self._transport.readv(self._filename,