632
499
# move the copied index into place
633
500
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)
635
def get_data_stream(self, required_versions):
636
"""Get a data stream for the specified versions.
638
Versions may be returned in any order, not necessarily the order
639
specified. They are returned in a partial order by compression
640
parent, so that the deltas can be applied as the data stream is
641
inserted; however note that compression parents will not be sent
642
unless they were specifically requested, as the client may already
645
:param required_versions: The exact set of versions to be extracted.
646
Unlike some other knit methods, this is not used to generate a
647
transitive closure, rather it is used precisely as given.
502
def create_empty(self, name, transport, mode=None):
503
return KnitVersionedFile(name, transport, factory=self.factory,
504
delta=self.delta, create=True)
506
def _fix_parents(self, version_id, new_parents):
507
"""Fix the parents list for version.
649
:returns: format_signature, list of (version, options, length, parents),
509
This is done by appending a new version to the index
510
with identical data except for the parents list.
511
the parents list must be a superset of the current
652
required_version_set = frozenset(required_versions)
654
# list of revisions that can just be sent without waiting for their
657
# map from revision to the children based on it
659
# first, read all relevant index data, enough to sort into the right
661
for version_id in required_versions:
662
options = self._index.get_options(version_id)
663
parents = self._index.get_parents_with_ghosts(version_id)
664
index_memo = self._index.get_position(version_id)
665
version_index[version_id] = (index_memo, options, parents)
666
if ('line-delta' in options
667
and parents[0] in required_version_set):
668
# must wait until the parent has been sent
669
deferred.setdefault(parents[0], []). \
672
# either a fulltext, or a delta whose parent the client did
673
# not ask for and presumably already has
674
ready_to_send.append(version_id)
675
# build a list of results to return, plus instructions for data to
677
copy_queue_records = []
678
temp_version_list = []
680
# XXX: pushing and popping lists may be a bit inefficient
681
version_id = ready_to_send.pop(0)
682
(index_memo, options, parents) = version_index[version_id]
683
copy_queue_records.append((version_id, index_memo))
684
none, data_pos, data_size = index_memo
685
temp_version_list.append((version_id, options, data_size,
687
if version_id in deferred:
688
# now we can send all the children of this revision - we could
689
# put them in anywhere, but we hope that sending them soon
690
# after the fulltext will give good locality in the receiver
691
ready_to_send[:0] = deferred.pop(version_id)
692
assert len(deferred) == 0, \
693
"Still have compressed child versions waiting to be sent"
694
# XXX: The stream format is such that we cannot stream it - we have to
695
# know the length of all the data a-priori.
697
result_version_list = []
698
for (version_id, raw_data), \
699
(version_id2, options, _, parents) in \
700
izip(self._data.read_records_iter_raw(copy_queue_records),
702
assert version_id == version_id2, \
703
'logic error, inconsistent results'
704
raw_datum.append(raw_data)
705
result_version_list.append(
706
(version_id, options, len(raw_data), parents))
707
# provide a callback to get data incrementally.
708
pseudo_file = StringIO(''.join(raw_datum))
711
return pseudo_file.read()
713
return pseudo_file.read(length)
714
return (self.get_format_signature(), result_version_list, read)
716
def _extract_blocks(self, version_id, source, target):
717
if self._index.get_method(version_id) != 'line-delta':
719
parent, sha1, noeol, delta = self.get_delta(version_id)
720
return KnitContent.get_line_delta_blocks(delta, source, target)
514
current_values = self._index._cache[version_id]
515
assert set(current_values[4]).difference(set(new_parents)) == set()
516
self._index.add_version(version_id,
722
522
def get_delta(self, version_id):
723
523
"""Get a delta for constructing version from some other version."""
524
version_id = osutils.safe_revision_id(version_id)
724
525
self.check_not_reserved_id(version_id)
725
parents = self.get_parent_map([version_id])[version_id]
526
if not self.has_version(version_id):
527
raise RevisionNotPresent(version_id, self.filename)
529
parents = self.get_parents(version_id)
727
531
parent = parents[0]
730
index_memo = self._index.get_position(version_id)
731
data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]
534
data_pos, data_size = self._index.get_position(version_id)
535
data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]
732
536
noeol = 'no-eol' in self._index.get_options(version_id)
733
537
if 'fulltext' == self._index.get_method(version_id):
734
538
new_content = self.factory.parse_fulltext(data, version_id)
740
544
new_texts = new_content.text()
741
delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,
545
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
743
546
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)
745
548
delta = self.factory.parse_line_delta(data, version_id)
746
549
return parent, sha1, noeol, delta
748
def get_format_signature(self):
749
"""See VersionedFile.get_format_signature()."""
750
if self.factory.annotated:
751
annotated_part = "annotated"
753
annotated_part = "plain"
754
return "knit-%s" % (annotated_part,)
756
@deprecated_method(one_four)
757
551
def get_graph_with_ghosts(self):
758
552
"""See VersionedFile.get_graph_with_ghosts()."""
759
return self.get_parent_map(self.versions())
553
graph_items = self._index.get_graph()
554
return dict(graph_items)
761
556
def get_sha1(self, version_id):
762
return self.get_sha1s([version_id])[0]
764
def get_sha1s(self, version_ids):
765
557
"""See VersionedFile.get_sha1()."""
766
record_map = self._get_record_map(version_ids)
767
# record entry 2 is the 'digest'.
768
return [record_map[v][2] for v in version_ids]
558
version_id = osutils.safe_revision_id(version_id)
559
record_map = self._get_record_map([version_id])
560
method, content, digest, next = record_map[version_id]
771
564
def get_suffixes():
772
565
"""See VersionedFile.get_suffixes()."""
773
566
return [DATA_SUFFIX, INDEX_SUFFIX]
775
@deprecated_method(one_four)
776
568
def has_ghost(self, version_id):
777
569
"""True if there is a ghost reference in the file to version_id."""
570
version_id = osutils.safe_revision_id(version_id)
778
571
# maybe we have it
779
572
if self.has_version(version_id):
781
574
# optimisable if needed by memoising the _ghosts set.
782
items = self.get_parent_map(self.versions())
783
for parents in items.itervalues():
575
items = self._index.get_graph()
576
for node, parents in items:
784
577
for parent in parents:
785
if parent == version_id and parent not in items:
578
if parent not in self._index._cache:
579
if parent == version_id:
789
def insert_data_stream(self, (format, data_list, reader_callable)):
790
"""Insert knit records from a data stream into this knit.
792
If a version in the stream is already present in this knit, it will not
793
be inserted a second time. It will be checked for consistency with the
794
stored version however, and may cause a KnitCorrupt error to be raised
795
if the data in the stream disagrees with the already stored data.
797
:seealso: get_data_stream
799
if format != self.get_format_signature():
800
if 'knit' in debug.debug_flags:
802
'incompatible format signature inserting to %r', self)
803
source = self._knit_from_datastream(
804
(format, data_list, reader_callable))
808
for version_id, options, length, parents in data_list:
809
if self.has_version(version_id):
810
# First check: the list of parents.
811
my_parents = self.get_parents_with_ghosts(version_id)
812
if tuple(my_parents) != tuple(parents):
813
# XXX: KnitCorrupt is not quite the right exception here.
816
'parents list %r from data stream does not match '
817
'already recorded parents %r for %s'
818
% (parents, my_parents, version_id))
820
# Also check the SHA-1 of the fulltext this content will
822
raw_data = reader_callable(length)
823
my_fulltext_sha1 = self.get_sha1(version_id)
824
df, rec = self._data._parse_record_header(version_id, raw_data)
825
stream_fulltext_sha1 = rec[3]
826
if my_fulltext_sha1 != stream_fulltext_sha1:
827
# Actually, we don't know if it's this knit that's corrupt,
828
# or the data stream we're trying to insert.
830
self.filename, 'sha-1 does not match %s' % version_id)
832
if 'line-delta' in options:
833
# Make sure that this knit record is actually useful: a
834
# line-delta is no use unless we have its parent.
835
# Fetching from a broken repository with this problem
836
# shouldn't break the target repository.
838
# See https://bugs.launchpad.net/bzr/+bug/164443
839
if not self._index.has_version(parents[0]):
842
'line-delta from stream '
845
'missing parent %s\n'
846
'Try running "bzr check" '
847
'on the source repository, and "bzr reconcile" '
849
(version_id, parents[0]))
850
self._add_raw_records(
851
[(version_id, options, parents, length)],
852
reader_callable(length))
854
def _knit_from_datastream(self, (format, data_list, reader_callable)):
855
"""Create a knit object from a data stream.
857
This method exists to allow conversion of data streams that do not
858
match the signature of this knit. Generally it will be slower and use
859
more memory to use this method to insert data, but it will work.
861
:seealso: get_data_stream for details on datastreams.
862
:return: A knit versioned file which can be used to join the datastream
865
if format == "knit-plain":
866
factory = KnitPlainFactory()
867
elif format == "knit-annotated":
868
factory = KnitAnnotateFactory()
870
raise errors.KnitDataStreamUnknown(format)
871
index = _StreamIndex(data_list, self._index)
872
access = _StreamAccess(reader_callable, index, self, factory)
873
return KnitVersionedFile(self.filename, self.transport,
874
factory=factory, index=index, access_method=access)
876
583
def versions(self):
877
584
"""See VersionedFile.versions."""
878
if 'evil' in debug.debug_flags:
879
trace.mutter_callsite(2, "versions scales with size of history")
880
585
return self._index.get_versions()
882
587
def has_version(self, version_id):
883
588
"""See VersionedFile.has_version."""
884
if 'evil' in debug.debug_flags:
885
trace.mutter_callsite(2, "has_version is a LBYL scenario")
589
version_id = osutils.safe_revision_id(version_id)
886
590
return self._index.has_version(version_id)
888
592
__contains__ = has_version
890
594
def _merge_annotations(self, content, parents, parent_texts={},
891
delta=None, annotated=None,
892
left_matching_blocks=None):
595
delta=None, annotated=None):
893
596
"""Merge annotations for content. This is done by comparing
894
597
the annotations based on changed to the text.
896
if left_matching_blocks is not None:
897
delta_seq = diff._PrematchedMatcher(left_matching_blocks)
901
601
for parent_id in parents:
902
602
merge_content = self._get_content(parent_id, parent_texts)
903
if (parent_id == parents[0] and delta_seq is not None):
906
seq = patiencediff.PatienceSequenceMatcher(
907
None, merge_content.text(), content.text())
603
seq = patiencediff.PatienceSequenceMatcher(
604
None, merge_content.text(), content.text())
605
if delta_seq is None:
606
# setup a delta seq to reuse.
908
608
for i, j, n in seq.get_matching_blocks():
911
# this appears to copy (origin, text) pairs across to the
912
# new content for any line that matches the last-checked
611
# this appears to copy (origin, text) pairs across to the new
612
# content for any line that matches the last-checked parent.
613
# FIXME: save the sequence control data for delta compression
614
# against the most relevant parent rather than rediffing.
914
615
content._lines[j:j+n] = merge_content._lines[i:i+n]
916
if delta_seq is None:
917
618
reference_content = self._get_content(parents[0], parent_texts)
918
619
new_texts = content.text()
919
620
old_texts = reference_content.text()
1654
1364
raise RevisionNotPresent(version_id, self._filename)
1657
class KnitGraphIndex(object):
1658
"""A knit index that builds on GraphIndex."""
1660
def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):
1661
"""Construct a KnitGraphIndex on a graph_index.
1663
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1664
:param deltas: Allow delta-compressed records.
1665
:param add_callback: If not None, allow additions to the index and call
1666
this callback with a list of added GraphIndex nodes:
1667
[(node, value, node_refs), ...]
1668
:param parents: If True, record knits parents, if not do not record
1671
self._graph_index = graph_index
1672
self._deltas = deltas
1673
self._add_callback = add_callback
1674
self._parents = parents
1675
if deltas and not parents:
1676
raise KnitCorrupt(self, "Cannot do delta compression without "
1679
def _get_entries(self, keys, check_present=False):
1680
"""Get the entries for keys.
1682
:param keys: An iterable of index keys, - 1-tuples.
1687
for node in self._graph_index.iter_entries(keys):
1689
found_keys.add(node[1])
1691
# adapt parentless index to the rest of the code.
1692
for node in self._graph_index.iter_entries(keys):
1693
yield node[0], node[1], node[2], ()
1694
found_keys.add(node[1])
1696
missing_keys = keys.difference(found_keys)
1698
raise RevisionNotPresent(missing_keys.pop(), self)
1700
def _present_keys(self, version_ids):
1702
node[1] for node in self._get_entries(version_ids)])
1704
def _parentless_ancestry(self, versions):
1705
"""Honour the get_ancestry API for parentless knit indices."""
1706
wanted_keys = self._version_ids_to_keys(versions)
1707
present_keys = self._present_keys(wanted_keys)
1708
missing = set(wanted_keys).difference(present_keys)
1710
raise RevisionNotPresent(missing.pop(), self)
1711
return list(self._keys_to_version_ids(present_keys))
1713
def get_ancestry(self, versions, topo_sorted=True):
1714
"""See VersionedFile.get_ancestry."""
1715
if not self._parents:
1716
return self._parentless_ancestry(versions)
1717
# XXX: This will do len(history) index calls - perhaps
1718
# it should be altered to be a index core feature?
1719
# get a graph of all the mentioned versions:
1722
versions = self._version_ids_to_keys(versions)
1723
pending = set(versions)
1725
# get all pending nodes
1726
this_iteration = pending
1727
new_nodes = self._get_entries(this_iteration)
1730
for (index, key, value, node_refs) in new_nodes:
1731
# dont ask for ghosties - otherwise
1732
# we we can end up looping with pending
1733
# being entirely ghosted.
1734
graph[key] = [parent for parent in node_refs[0]
1735
if parent not in ghosts]
1737
for parent in graph[key]:
1738
# dont examine known nodes again
1743
ghosts.update(this_iteration.difference(found))
1744
if versions.difference(graph):
1745
raise RevisionNotPresent(versions.difference(graph).pop(), self)
1747
result_keys = topo_sort(graph.items())
1749
result_keys = graph.iterkeys()
1750
return [key[0] for key in result_keys]
1752
def get_ancestry_with_ghosts(self, versions):
1753
"""See VersionedFile.get_ancestry."""
1754
if not self._parents:
1755
return self._parentless_ancestry(versions)
1756
# XXX: This will do len(history) index calls - perhaps
1757
# it should be altered to be a index core feature?
1758
# get a graph of all the mentioned versions:
1760
versions = self._version_ids_to_keys(versions)
1761
pending = set(versions)
1763
# get all pending nodes
1764
this_iteration = pending
1765
new_nodes = self._get_entries(this_iteration)
1767
for (index, key, value, node_refs) in new_nodes:
1768
graph[key] = node_refs[0]
1770
for parent in graph[key]:
1771
# dont examine known nodes again
1775
missing_versions = this_iteration.difference(graph)
1776
missing_needed = versions.intersection(missing_versions)
1778
raise RevisionNotPresent(missing_needed.pop(), self)
1779
for missing_version in missing_versions:
1780
# add a key, no parents
1781
graph[missing_version] = []
1782
pending.discard(missing_version) # don't look for it
1783
result_keys = topo_sort(graph.items())
1784
return [key[0] for key in result_keys]
1786
def get_build_details(self, version_ids):
1787
"""Get the method, index_memo and compression parent for version_ids.
1789
Ghosts are omitted from the result.
1791
:param version_ids: An iterable of version_ids.
1792
:return: A dict of version_id:(index_memo, compression_parent,
1793
parents, record_details).
1795
opaque structure to pass to read_records to extract the raw
1798
Content that this record is built upon, may be None
1800
Logical parents of this node
1802
extra information about the content which needs to be passed to
1803
Factory.parse_record
1806
entries = self._get_entries(self._version_ids_to_keys(version_ids), True)
1807
for entry in entries:
1808
version_id = self._keys_to_version_ids((entry[1],))[0]
1809
if not self._parents:
1812
parents = self._keys_to_version_ids(entry[3][0])
1813
if not self._deltas:
1814
compression_parent = None
1816
compression_parent_key = self._compression_parent(entry)
1817
if compression_parent_key:
1818
compression_parent = self._keys_to_version_ids(
1819
(compression_parent_key,))[0]
1821
compression_parent = None
1822
noeol = (entry[2][0] == 'N')
1823
if compression_parent:
1824
method = 'line-delta'
1827
result[version_id] = (self._node_to_position(entry),
1828
compression_parent, parents,
1832
def _compression_parent(self, an_entry):
1833
# return the key that an_entry is compressed against, or None
1834
# Grab the second parent list (as deltas implies parents currently)
1835
compression_parents = an_entry[3][1]
1836
if not compression_parents:
1838
assert len(compression_parents) == 1
1839
return compression_parents[0]
1841
def _get_method(self, node):
1842
if not self._deltas:
1844
if self._compression_parent(node):
1849
def iter_parents(self, version_ids):
1850
"""Iterate through the parents for many version ids.
1852
:param version_ids: An iterable yielding version_ids.
1853
:return: An iterator that yields (version_id, parents). Requested
1854
version_ids not present in the versioned file are simply skipped.
1855
The order is undefined, allowing for different optimisations in
1856
the underlying implementation.
1859
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))
1861
present_parents = set()
1862
for node in all_nodes:
1863
all_parents.update(node[3][0])
1864
# any node we are querying must be present
1865
present_parents.add(node[1])
1866
unknown_parents = all_parents.difference(present_parents)
1867
present_parents.update(self._present_keys(unknown_parents))
1868
for node in all_nodes:
1870
for parent in node[3][0]:
1871
if parent in present_parents:
1872
parents.append(parent[0])
1873
yield node[1][0], tuple(parents)
1875
for node in self._get_entries(self._version_ids_to_keys(version_ids)):
1876
yield node[1][0], ()
1878
def num_versions(self):
1879
return len(list(self._graph_index.iter_all_entries()))
1881
__len__ = num_versions
1883
def get_versions(self):
1884
"""Get all the versions in the file. not topologically sorted."""
1885
return [node[1][0] for node in self._graph_index.iter_all_entries()]
1887
def has_version(self, version_id):
1888
"""True if the version is in the index."""
1889
return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1
1891
def _keys_to_version_ids(self, keys):
1892
return tuple(key[0] for key in keys)
1894
def get_position(self, version_id):
1895
"""Return details needed to access the version.
1897
:return: a tuple (index, data position, size) to hand to the access
1898
logic to get the record.
1900
node = self._get_node(version_id)
1901
return self._node_to_position(node)
1903
def _node_to_position(self, node):
1904
"""Convert an index value to position details."""
1905
bits = node[2][1:].split(' ')
1906
return node[0], int(bits[0]), int(bits[1])
1908
def get_method(self, version_id):
1909
"""Return compression method of specified version."""
1910
return self._get_method(self._get_node(version_id))
1912
def _get_node(self, version_id):
1914
return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]
1916
raise RevisionNotPresent(version_id, self)
1918
def get_options(self, version_id):
1919
"""Return a list representing options.
1923
node = self._get_node(version_id)
1924
options = [self._get_method(node)]
1925
if node[2][0] == 'N':
1926
options.append('no-eol')
1929
def get_parent_map(self, version_ids):
1930
"""Passed through to by KnitVersionedFile.get_parent_map."""
1931
nodes = self._get_entries(self._version_ids_to_keys(version_ids))
1935
result[node[1][0]] = self._keys_to_version_ids(node[3][0])
1938
result[node[1][0]] = ()
1941
def get_parents_with_ghosts(self, version_id):
1942
"""Return parents of specified version with ghosts."""
1944
return self.get_parent_map([version_id])[version_id]
1946
raise RevisionNotPresent(version_id, self)
1948
def check_versions_present(self, version_ids):
1949
"""Check that all specified versions are present."""
1950
keys = self._version_ids_to_keys(version_ids)
1951
present = self._present_keys(keys)
1952
missing = keys.difference(present)
1954
raise RevisionNotPresent(missing.pop(), self)
1956
def add_version(self, version_id, options, access_memo, parents):
1957
"""Add a version record to the index."""
1958
return self.add_versions(((version_id, options, access_memo, parents),))
1960
def add_versions(self, versions, random_id=False):
1961
"""Add multiple versions to the index.
1963
This function does not insert data into the Immutable GraphIndex
1964
backing the KnitGraphIndex, instead it prepares data for insertion by
1965
the caller and checks that it is safe to insert then calls
1966
self._add_callback with the prepared GraphIndex nodes.
1968
:param versions: a list of tuples:
1969
(version_id, options, pos, size, parents).
1970
:param random_id: If True the ids being added were randomly generated
1971
and no check for existence will be performed.
1973
if not self._add_callback:
1974
raise errors.ReadOnlyError(self)
1975
# we hope there are no repositories with inconsistent parentage
1980
for (version_id, options, access_memo, parents) in versions:
1981
index, pos, size = access_memo
1982
key = (version_id, )
1983
parents = tuple((parent, ) for parent in parents)
1984
if 'no-eol' in options:
1988
value += "%d %d" % (pos, size)
1989
if not self._deltas:
1990
if 'line-delta' in options:
1991
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")
1994
if 'line-delta' in options:
1995
node_refs = (parents, (parents[0],))
1997
node_refs = (parents, ())
1999
node_refs = (parents, )
2002
raise KnitCorrupt(self, "attempt to add node with parents "
2003
"in parentless index.")
2005
keys[key] = (value, node_refs)
2007
present_nodes = self._get_entries(keys)
2008
for (index, key, value, node_refs) in present_nodes:
2009
if (value, node_refs) != keys[key]:
2010
raise KnitCorrupt(self, "inconsistent details in add_versions"
2011
": %s %s" % ((value, node_refs), keys[key]))
2015
for key, (value, node_refs) in keys.iteritems():
2016
result.append((key, value, node_refs))
2018
for key, (value, node_refs) in keys.iteritems():
2019
result.append((key, value))
2020
self._add_callback(result)
2022
def _version_ids_to_keys(self, version_ids):
2023
return set((version_id, ) for version_id in version_ids)
2026
class _KnitAccess(object):
2027
"""Access to knit records in a .knit file."""
2029
def __init__(self, transport, filename, _file_mode, _dir_mode,
2030
_need_to_create, _create_parent_dir):
2031
"""Create a _KnitAccess for accessing and inserting data.
2033
:param transport: The transport the .knit is located on.
2034
:param filename: The filename of the .knit.
2036
self._transport = transport
2037
self._filename = filename
2038
self._file_mode = _file_mode
2039
self._dir_mode = _dir_mode
2040
self._need_to_create = _need_to_create
2041
self._create_parent_dir = _create_parent_dir
2043
def add_raw_records(self, sizes, raw_data):
2044
"""Add raw knit bytes to a storage area.
2046
The data is spooled to whereever the access method is storing data.
2048
:param sizes: An iterable containing the size of each raw data segment.
2049
:param raw_data: A bytestring containing the data.
2050
:return: A list of memos to retrieve the record later. Each memo is a
2051
tuple - (index, pos, length), where the index field is always None
2052
for the .knit access method.
2054
assert type(raw_data) == str, \
2055
'data must be plain bytes was %s' % type(raw_data)
2056
if not self._need_to_create:
2057
base = self._transport.append_bytes(self._filename, raw_data)
2059
self._transport.put_bytes_non_atomic(self._filename, raw_data,
2060
create_parent_dir=self._create_parent_dir,
2061
mode=self._file_mode,
2062
dir_mode=self._dir_mode)
2063
self._need_to_create = False
2067
result.append((None, base, size))
2072
"""IFF this data access has its own storage area, initialise it.
2076
self._transport.put_bytes_non_atomic(self._filename, '',
2077
mode=self._file_mode)
2079
def open_file(self):
2080
"""IFF this data access can be represented as a single file, open it.
2082
For knits that are not mapped to a single file on disk this will
2085
:return: None or a file handle.
2088
return self._transport.get(self._filename)
2093
def get_raw_records(self, memos_for_retrieval):
2094
"""Get the raw bytes for a records.
2096
:param memos_for_retrieval: An iterable containing the (index, pos,
2097
length) memo for retrieving the bytes. The .knit method ignores
2098
the index as there is always only a single file.
2099
:return: An iterator over the bytes of the records.
2101
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
2102
for pos, data in self._transport.readv(self._filename, read_vector):
2106
class _PackAccess(object):
2107
"""Access to knit records via a collection of packs."""
2109
def __init__(self, index_to_packs, writer=None):
2110
"""Create a _PackAccess object.
2112
:param index_to_packs: A dict mapping index objects to the transport
2113
and file names for obtaining data.
2114
:param writer: A tuple (pack.ContainerWriter, write_index) which
2115
contains the pack to write, and the index that reads from it will
2119
self.container_writer = writer[0]
2120
self.write_index = writer[1]
2122
self.container_writer = None
2123
self.write_index = None
2124
self.indices = index_to_packs
2126
def add_raw_records(self, sizes, raw_data):
2127
"""Add raw knit bytes to a storage area.
2129
The data is spooled to the container writer in one bytes-record per
2132
:param sizes: An iterable containing the size of each raw data segment.
2133
:param raw_data: A bytestring containing the data.
2134
:return: A list of memos to retrieve the record later. Each memo is a
2135
tuple - (index, pos, length), where the index field is the
2136
write_index object supplied to the PackAccess object.
2138
assert type(raw_data) == str, \
2139
'data must be plain bytes was %s' % type(raw_data)
2143
p_offset, p_length = self.container_writer.add_bytes_record(
2144
raw_data[offset:offset+size], [])
2146
result.append((self.write_index, p_offset, p_length))
2150
"""Pack based knits do not get individually created."""
2152
def get_raw_records(self, memos_for_retrieval):
2153
"""Get the raw bytes for a records.
2155
:param memos_for_retrieval: An iterable containing the (index, pos,
2156
length) memo for retrieving the bytes. The Pack access method
2157
looks up the pack to use for a given record in its index_to_pack
2159
:return: An iterator over the bytes of the records.
2161
# first pass, group into same-index requests
2163
current_index = None
2164
for (index, offset, length) in memos_for_retrieval:
2165
if current_index == index:
2166
current_list.append((offset, length))
2168
if current_index is not None:
2169
request_lists.append((current_index, current_list))
2170
current_index = index
2171
current_list = [(offset, length)]
2172
# handle the last entry
2173
if current_index is not None:
2174
request_lists.append((current_index, current_list))
2175
for index, offsets in request_lists:
2176
transport, path = self.indices[index]
2177
reader = pack.make_readv_reader(transport, path, offsets)
2178
for names, read_func in reader.iter_records():
2179
yield read_func(None)
2181
def open_file(self):
2182
"""Pack based knits have no single file."""
2185
def set_writer(self, writer, index, (transport, packname)):
2186
"""Set a writer to use for adding data."""
2187
if index is not None:
2188
self.indices[index] = (transport, packname)
2189
self.container_writer = writer
2190
self.write_index = index
2193
class _StreamAccess(object):
2194
"""A Knit Access object that provides data from a datastream.
2196
It also provides a fallback to present as unannotated data, annotated data
2197
from a *backing* access object.
2199
This is triggered by a index_memo which is pointing to a different index
2200
than this was constructed with, and is used to allow extracting full
2201
unannotated texts for insertion into annotated knits.
2204
def __init__(self, reader_callable, stream_index, backing_knit,
2206
"""Create a _StreamAccess object.
2208
:param reader_callable: The reader_callable from the datastream.
2209
This is called to buffer all the data immediately, for
2211
:param stream_index: The index the data stream this provides access to
2212
which will be present in native index_memo's.
2213
:param backing_knit: The knit object that will provide access to
2214
annotated texts which are not available in the stream, so as to
2215
create unannotated texts.
2216
:param orig_factory: The original content factory used to generate the
2217
stream. This is used for checking whether the thunk code for
2218
supporting _copy_texts will generate the correct form of data.
2220
self.data = reader_callable(None)
2221
self.stream_index = stream_index
2222
self.backing_knit = backing_knit
2223
self.orig_factory = orig_factory
2225
def get_raw_records(self, memos_for_retrieval):
2226
"""Get the raw bytes for a records.
2228
:param memos_for_retrieval: An iterable of memos from the
2229
_StreamIndex object identifying bytes to read; for these classes
2230
they are (from_backing_knit, index, start, end) and can point to
2231
either the backing knit or streamed data.
2232
:return: An iterator yielding a byte string for each record in
2233
memos_for_retrieval.
2235
# use a generator for memory friendliness
2236
for from_backing_knit, version_id, start, end in memos_for_retrieval:
2237
if not from_backing_knit:
2238
assert version_id is self.stream_index
2239
yield self.data[start:end]
2241
# we have been asked to thunk. This thunking only occurs when
2242
# we are obtaining plain texts from an annotated backing knit
2243
# so that _copy_texts will work.
2244
# We could improve performance here by scanning for where we need
2245
# to do this and using get_line_list, then interleaving the output
2246
# as desired. However, for now, this is sufficient.
2247
if self.orig_factory.__class__ != KnitPlainFactory:
2248
raise errors.KnitCorrupt(
2249
self, 'Bad thunk request %r cannot be backed by %r' %
2250
(version_id, self.orig_factory))
2251
lines = self.backing_knit.get_lines(version_id)
2252
line_bytes = ''.join(lines)
2253
digest = sha_string(line_bytes)
2254
# the packed form of the fulltext always has a trailing newline,
2255
# even if the actual text does not, unless the file is empty. the
2256
# record options including the noeol flag are passed through by
2257
# _StreamIndex, so this is safe.
2259
if lines[-1][-1] != '\n':
2260
lines[-1] = lines[-1] + '\n'
2262
# We want plain data, because we expect to thunk only to allow text
2264
size, bytes = self.backing_knit._data._record_to_data(version_id,
2265
digest, lines, line_bytes)
2269
class _StreamIndex(object):
2270
"""A Knit Index object that uses the data map from a datastream."""
2272
def __init__(self, data_list, backing_index):
2273
"""Create a _StreamIndex object.
2275
:param data_list: The data_list from the datastream.
2276
:param backing_index: The index which will supply values for nodes
2277
referenced outside of this stream.
2279
self.data_list = data_list
2280
self.backing_index = backing_index
2281
self._by_version = {}
2283
for key, options, length, parents in data_list:
2284
self._by_version[key] = options, (pos, pos + length), parents
2287
def get_ancestry(self, versions, topo_sorted):
2288
"""Get an ancestry list for versions."""
2290
# Not needed for basic joins
2291
raise NotImplementedError(self.get_ancestry)
2292
# get a graph of all the mentioned versions:
2293
# Little ugly - basically copied from KnitIndex, but don't want to
2294
# accidentally incorporate too much of that index's code.
2296
pending = set(versions)
2297
cache = self._by_version
2299
version = pending.pop()
2302
parents = [p for p in cache[version][2] if p in cache]
2304
raise RevisionNotPresent(version, self)
2305
# if not completed and not a ghost
2306
pending.update([p for p in parents if p not in ancestry])
2307
ancestry.add(version)
2308
return list(ancestry)
2310
def get_build_details(self, version_ids):
2311
"""Get the method, index_memo and compression parent for version_ids.
2313
Ghosts are omitted from the result.
2315
:param version_ids: An iterable of version_ids.
2316
:return: A dict of version_id:(index_memo, compression_parent,
2317
parents, record_details).
2319
opaque memo that can be passed to _StreamAccess.read_records
2320
to extract the raw data; for these classes it is
2321
(from_backing_knit, index, start, end)
2323
Content that this record is built upon, may be None
2325
Logical parents of this node
2327
extra information about the content which needs to be passed to
2328
Factory.parse_record
2331
for version_id in version_ids:
2333
method = self.get_method(version_id)
2334
except errors.RevisionNotPresent:
2335
# ghosts are omitted
2337
parent_ids = self.get_parents_with_ghosts(version_id)
2338
noeol = ('no-eol' in self.get_options(version_id))
2339
index_memo = self.get_position(version_id)
2340
from_backing_knit = index_memo[0]
2341
if from_backing_knit:
2342
# texts retrieved from the backing knit are always full texts
2344
if method == 'fulltext':
2345
compression_parent = None
2347
compression_parent = parent_ids[0]
2348
result[version_id] = (index_memo, compression_parent,
2349
parent_ids, (method, noeol))
2352
def get_method(self, version_id):
2353
"""Return compression method of specified version."""
2354
options = self.get_options(version_id)
2355
if 'fulltext' in options:
2357
elif 'line-delta' in options:
2360
raise errors.KnitIndexUnknownMethod(self, options)
2362
def get_options(self, version_id):
2363
"""Return a list representing options.
2368
return self._by_version[version_id][0]
2370
options = list(self.backing_index.get_options(version_id))
2371
if 'fulltext' in options:
2373
elif 'line-delta' in options:
2374
# Texts from the backing knit are always returned from the stream
2376
options.remove('line-delta')
2377
options.append('fulltext')
2379
raise errors.KnitIndexUnknownMethod(self, options)
2380
return tuple(options)
2382
def get_parent_map(self, version_ids):
2383
"""Passed through to by KnitVersionedFile.get_parent_map."""
2386
for version_id in version_ids:
2388
result[version_id] = self._by_version[version_id][2]
2390
pending_ids.add(version_id)
2391
result.update(self.backing_index.get_parent_map(pending_ids))
2394
def get_parents_with_ghosts(self, version_id):
2395
"""Return parents of specified version with ghosts."""
2397
return self.get_parent_map([version_id])[version_id]
2399
raise RevisionNotPresent(version_id, self)
2401
def get_position(self, version_id):
2402
"""Return details needed to access the version.
2404
_StreamAccess has the data as a big array, so we return slice
2405
coordinates into that (as index_memo's are opaque outside the
2406
index and matching access class).
2408
:return: a tuple (from_backing_knit, index, start, end) that can
2409
be passed e.g. to get_raw_records.
2410
If from_backing_knit is False, index will be self, otherwise it
2411
will be a version id.
2414
start, end = self._by_version[version_id][1]
2415
return False, self, start, end
2417
# Signal to the access object to handle this from the backing knit.
2418
return (True, version_id, None, None)
2420
def get_versions(self):
2421
"""Get all the versions in the stream."""
2422
return self._by_version.keys()
2424
def iter_parents(self, version_ids):
2425
"""Iterate through the parents for many version ids.
2427
:param version_ids: An iterable yielding version_ids.
2428
:return: An iterator that yields (version_id, parents). Requested
2429
version_ids not present in the versioned file are simply skipped.
2430
The order is undefined, allowing for different optimisations in
2431
the underlying implementation.
2434
for version in version_ids:
2436
result.append((version, self._by_version[version][2]))
2442
class _KnitData(object):
2443
"""Manage extraction of data from a KnitAccess, caching and decompressing.
2445
The KnitData class provides the logic for parsing and using knit records,
2446
making use of an access method for the low level read and write operations.
2449
def __init__(self, access):
2450
"""Create a KnitData object.
2452
:param access: The access method to use. Access methods such as
2453
_KnitAccess manage the insertion of raw records and the subsequent
2454
retrieval of the same.
2456
self._access = access
1367
class _KnitData(_KnitComponentFile):
1368
"""Contents of the knit data file"""
1370
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1371
create_parent_dir=False, delay_create=False,
1373
_KnitComponentFile.__init__(self, transport, filename, mode,
1374
file_mode=file_mode,
1375
create_parent_dir=create_parent_dir,
2457
1377
self._checked = False
2458
1378
# TODO: jam 20060713 conceptually, this could spill to disk
2459
1379
# if the cached size gets larger than a certain amount
2881
1815
InterVersionedFile.register_optimiser(WeaveToKnit)
2884
# Deprecated, use PatienceSequenceMatcher instead
2885
KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher
2888
def annotate_knit(knit, revision_id):
2889
"""Annotate a knit with no cached annotations.
2891
This implementation is for knits with no cached annotations.
2892
It will work for knits with cached annotations, but this is not
1818
class KnitSequenceMatcher(difflib.SequenceMatcher):
1819
"""Knit tuned sequence matcher.
1821
This is based on profiling of difflib which indicated some improvements
1822
for our usage pattern.
2895
annotator = _KnitAnnotator(knit)
2896
return iter(annotator.annotate(revision_id))
2899
class _KnitAnnotator(object):
2900
"""Build up the annotations for a text."""
2902
def __init__(self, knit):
2905
# Content objects, differs from fulltexts because of how final newlines
2906
# are treated by knits. the content objects here will always have a
2908
self._fulltext_contents = {}
2910
# Annotated lines of specific revisions
2911
self._annotated_lines = {}
2913
# Track the raw data for nodes that we could not process yet.
2914
# This maps the revision_id of the base to a list of children that will
2915
# annotated from it.
2916
self._pending_children = {}
2918
# Nodes which cannot be extracted
2919
self._ghosts = set()
2921
# Track how many children this node has, so we know if we need to keep
2923
self._annotate_children = {}
2924
self._compression_children = {}
2926
self._all_build_details = {}
2927
# The children => parent revision_id graph
2928
self._revision_id_graph = {}
2930
self._heads_provider = None
2932
self._nodes_to_keep_annotations = set()
2933
self._generations_until_keep = 100
2935
def set_generations_until_keep(self, value):
2936
"""Set the number of generations before caching a node.
2938
Setting this to -1 will cache every merge node, setting this higher
2939
will cache fewer nodes.
2941
self._generations_until_keep = value
2943
def _add_fulltext_content(self, revision_id, content_obj):
2944
self._fulltext_contents[revision_id] = content_obj
2945
# TODO: jam 20080305 It might be good to check the sha1digest here
2946
return content_obj.text()
2948
def _check_parents(self, child, nodes_to_annotate):
2949
"""Check if all parents have been processed.
2951
:param child: A tuple of (rev_id, parents, raw_content)
2952
:param nodes_to_annotate: If child is ready, add it to
2953
nodes_to_annotate, otherwise put it back in self._pending_children
2955
for parent_id in child[1]:
2956
if (parent_id not in self._annotated_lines):
2957
# This parent is present, but another parent is missing
2958
self._pending_children.setdefault(parent_id,
2962
# This one is ready to be processed
2963
nodes_to_annotate.append(child)
2965
def _add_annotation(self, revision_id, fulltext, parent_ids,
2966
left_matching_blocks=None):
2967
"""Add an annotation entry.
2969
All parents should already have been annotated.
2970
:return: A list of children that now have their parents satisfied.
2972
a = self._annotated_lines
2973
annotated_parent_lines = [a[p] for p in parent_ids]
2974
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2975
fulltext, revision_id, left_matching_blocks,
2976
heads_provider=self._get_heads_provider()))
2977
self._annotated_lines[revision_id] = annotated_lines
2978
for p in parent_ids:
2979
ann_children = self._annotate_children[p]
2980
ann_children.remove(revision_id)
2981
if (not ann_children
2982
and p not in self._nodes_to_keep_annotations):
2983
del self._annotated_lines[p]
2984
del self._all_build_details[p]
2985
if p in self._fulltext_contents:
2986
del self._fulltext_contents[p]
2987
# Now that we've added this one, see if there are any pending
2988
# deltas to be done, certainly this parent is finished
2989
nodes_to_annotate = []
2990
for child in self._pending_children.pop(revision_id, []):
2991
self._check_parents(child, nodes_to_annotate)
2992
return nodes_to_annotate
2994
def _get_build_graph(self, revision_id):
2995
"""Get the graphs for building texts and annotations.
2997
The data you need for creating a full text may be different than the
2998
data you need to annotate that text. (At a minimum, you need both
2999
parents to create an annotation, but only need 1 parent to generate the
3002
:return: A list of (revision_id, index_memo) records, suitable for
3003
passing to read_records_iter to start reading in the raw data fro/
3006
if revision_id in self._annotated_lines:
3009
pending = set([revision_id])
3014
# get all pending nodes
3016
this_iteration = pending
3017
build_details = self._knit._index.get_build_details(this_iteration)
3018
self._all_build_details.update(build_details)
3019
# new_nodes = self._knit._index._get_entries(this_iteration)
3021
for rev_id, details in build_details.iteritems():
3022
(index_memo, compression_parent, parents,
3023
record_details) = details
3024
self._revision_id_graph[rev_id] = parents
3025
records.append((rev_id, index_memo))
3026
# Do we actually need to check _annotated_lines?
3027
pending.update(p for p in parents
3028
if p not in self._all_build_details)
3029
if compression_parent:
3030
self._compression_children.setdefault(compression_parent,
3033
for parent in parents:
3034
self._annotate_children.setdefault(parent,
3036
num_gens = generation - kept_generation
3037
if ((num_gens >= self._generations_until_keep)
3038
and len(parents) > 1):
3039
kept_generation = generation
3040
self._nodes_to_keep_annotations.add(rev_id)
3042
missing_versions = this_iteration.difference(build_details.keys())
3043
self._ghosts.update(missing_versions)
3044
for missing_version in missing_versions:
3045
# add a key, no parents
3046
self._revision_id_graph[missing_version] = ()
3047
pending.discard(missing_version) # don't look for it
3048
# XXX: This should probably be a real exception, as it is a data
3050
assert not self._ghosts.intersection(self._compression_children), \
3051
"We cannot have nodes which have a compression parent of a ghost."
3052
# Cleanout anything that depends on a ghost so that we don't wait for
3053
# the ghost to show up
3054
for node in self._ghosts:
3055
if node in self._annotate_children:
3056
# We won't be building this node
3057
del self._annotate_children[node]
3058
# Generally we will want to read the records in reverse order, because
3059
# we find the parent nodes after the children
3063
def _annotate_records(self, records):
3064
"""Build the annotations for the listed records."""
3065
# We iterate in the order read, rather than a strict order requested
3066
# However, process what we can, and put off to the side things that
3067
# still need parents, cleaning them up when those parents are
3069
for (rev_id, record,
3070
digest) in self._knit._data.read_records_iter(records):
3071
if rev_id in self._annotated_lines:
3073
parent_ids = self._revision_id_graph[rev_id]
3074
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3075
details = self._all_build_details[rev_id]
3076
(index_memo, compression_parent, parents,
3077
record_details) = details
3078
nodes_to_annotate = []
3079
# TODO: Remove the punning between compression parents, and
3080
# parent_ids, we should be able to do this without assuming
3082
if len(parent_ids) == 0:
3083
# There are no parents for this node, so just add it
3084
# TODO: This probably needs to be decoupled
3085
assert compression_parent is None
3086
fulltext_content, delta = self._knit.factory.parse_record(
3087
rev_id, record, record_details, None)
3088
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3089
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3090
parent_ids, left_matching_blocks=None))
1825
def find_longest_match(self, alo, ahi, blo, bhi):
1826
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
1828
If isjunk is not defined:
1830
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
1831
alo <= i <= i+k <= ahi
1832
blo <= j <= j+k <= bhi
1833
and for all (i',j',k') meeting those conditions,
1836
and if i == i', j <= j'
1838
In other words, of all maximal matching blocks, return one that
1839
starts earliest in a, and of all those maximal matching blocks that
1840
start earliest in a, return the one that starts earliest in b.
1842
>>> s = SequenceMatcher(None, " abcd", "abcd abcd")
1843
>>> s.find_longest_match(0, 5, 0, 9)
1846
If isjunk is defined, first the longest matching block is
1847
determined as above, but with the additional restriction that no
1848
junk element appears in the block. Then that block is extended as
1849
far as possible by matching (only) junk elements on both sides. So
1850
the resulting block never matches on junk except as identical junk
1851
happens to be adjacent to an "interesting" match.
1853
Here's the same example as before, but considering blanks to be
1854
junk. That prevents " abcd" from matching the " abcd" at the tail
1855
end of the second sequence directly. Instead only the "abcd" can
1856
match, and matches the leftmost "abcd" in the second sequence:
1858
>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
1859
>>> s.find_longest_match(0, 5, 0, 9)
1862
If no blocks match, return (alo, blo, 0).
1864
>>> s = SequenceMatcher(None, "ab", "c")
1865
>>> s.find_longest_match(0, 2, 0, 1)
1869
# CAUTION: stripping common prefix or suffix would be incorrect.
1873
# Longest matching block is "ab", but if common prefix is
1874
# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
1875
# strip, so ends up claiming that ab is changed to acab by
1876
# inserting "ca" in the middle. That's minimal but unintuitive:
1877
# "it's obvious" that someone inserted "ac" at the front.
1878
# Windiff ends up at the same place as diff, but by pairing up
1879
# the unique 'b's and then matching the first two 'a's.
1881
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
1882
besti, bestj, bestsize = alo, blo, 0
1883
# find longest junk-free match
1884
# during an iteration of the loop, j2len[j] = length of longest
1885
# junk-free match ending with a[i-1] and b[j]
1889
for i in xrange(alo, ahi):
1890
# look at all instances of a[i] in b; note that because
1891
# b2j has no junk keys, the loop is skipped if a[i] is junk
1892
j2lenget = j2len.get
1895
# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the
1896
# following improvement
1897
# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)
1898
# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>
1899
# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>
1901
# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)
1902
# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>
1903
# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>
3092
child = (rev_id, parent_ids, record)
3093
# Check if all the parents are present
3094
self._check_parents(child, nodes_to_annotate)
3095
while nodes_to_annotate:
3096
# Should we use a queue here instead of a stack?
3097
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3098
(index_memo, compression_parent, parents,
3099
record_details) = self._all_build_details[rev_id]
3100
if compression_parent is not None:
3101
comp_children = self._compression_children[compression_parent]
3102
assert rev_id in comp_children
3103
# If there is only 1 child, it is safe to reuse this
3105
reuse_content = (len(comp_children) == 1
3106
and compression_parent not in
3107
self._nodes_to_keep_annotations)
3109
# Remove it from the cache since it will be changing
3110
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3111
# Make sure to copy the fulltext since it might be
3113
parent_fulltext = list(parent_fulltext_content.text())
3115
parent_fulltext_content = self._fulltext_contents[compression_parent]
3116
parent_fulltext = parent_fulltext_content.text()
3117
comp_children.remove(rev_id)
3118
fulltext_content, delta = self._knit.factory.parse_record(
3119
rev_id, record, record_details,
3120
parent_fulltext_content,
3121
copy_base_content=(not reuse_content))
3122
fulltext = self._add_fulltext_content(rev_id,
3124
blocks = KnitContent.get_line_delta_blocks(delta,
3125
parent_fulltext, fulltext)
3127
fulltext_content = self._knit.factory.parse_fulltext(
3129
fulltext = self._add_fulltext_content(rev_id,
3132
nodes_to_annotate.extend(
3133
self._add_annotation(rev_id, fulltext, parent_ids,
3134
left_matching_blocks=blocks))
3136
def _get_heads_provider(self):
3137
"""Create a heads provider for resolving ancestry issues."""
3138
if self._heads_provider is not None:
3139
return self._heads_provider
3140
parent_provider = _mod_graph.DictParentsProvider(
3141
self._revision_id_graph)
3142
graph_obj = _mod_graph.Graph(parent_provider)
3143
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3144
self._heads_provider = head_cache
3147
def annotate(self, revision_id):
3148
"""Return the annotated fulltext at the given revision.
3150
:param revision_id: The revision id for this file
3152
records = self._get_build_graph(revision_id)
3153
if revision_id in self._ghosts:
3154
raise errors.RevisionNotPresent(revision_id, self._knit)
3155
self._annotate_records(records)
3156
return self._annotated_lines[revision_id]
3160
from bzrlib._knit_load_data_c import _load_data_c as _load_data
3162
from bzrlib._knit_load_data_py import _load_data_py as _load_data
1915
k = newj2len[j] = 1 + j2lenget(-1 + j, 0)
1917
besti, bestj, bestsize = 1 + i-k, 1 + j-k, k
1920
# Extend the best by non-junk elements on each end. In particular,
1921
# "popular" non-junk elements aren't in b2j, which greatly speeds
1922
# the inner loop above, but also means "the best" match so far
1923
# doesn't contain any junk *or* popular non-junk elements.
1924
while besti > alo and bestj > blo and \
1925
not isbjunk(b[bestj-1]) and \
1926
a[besti-1] == b[bestj-1]:
1927
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
1928
while besti+bestsize < ahi and bestj+bestsize < bhi and \
1929
not isbjunk(b[bestj+bestsize]) and \
1930
a[besti+bestsize] == b[bestj+bestsize]:
1933
# Now that we have a wholly interesting match (albeit possibly
1934
# empty!), we may as well suck up the matching junk on each
1935
# side of it too. Can't think of a good reason not to, and it
1936
# saves post-processing the (possibly considerable) expense of
1937
# figuring out what to do with it. In the case of an empty
1938
# interesting match, this is clearly the right thing to do,
1939
# because no other kind of match is possible in the regions.
1940
while besti > alo and bestj > blo and \
1941
isbjunk(b[bestj-1]) and \
1942
a[besti-1] == b[bestj-1]:
1943
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
1944
while besti+bestsize < ahi and bestj+bestsize < bhi and \
1945
isbjunk(b[bestj+bestsize]) and \
1946
a[besti+bestsize] == b[bestj+bestsize]:
1947
bestsize = bestsize + 1
1949
return besti, bestj, bestsize