630
500
return KnitVersionedFile(name, transport, factory=self.factory,
631
501
delta=self.delta, create=True)
633
def get_data_stream(self, required_versions):
634
"""Get a data stream for the specified versions.
636
Versions may be returned in any order, not necessarily the order
637
specified. They are returned in a partial order by compression
638
parent, so that the deltas can be applied as the data stream is
639
inserted; however note that compression parents will not be sent
640
unless they were specifically requested, as the client may already
643
:param required_versions: The exact set of versions to be extracted.
644
Unlike some other knit methods, this is not used to generate a
645
transitive closure, rather it is used precisely as given.
503
def _fix_parents(self, version, new_parents):
504
"""Fix the parents list for version.
647
:returns: format_signature, list of (version, options, length, parents),
506
This is done by appending a new version to the index
507
with identical data except for the parents list.
508
the parents list must be a superset of the current
650
required_version_set = frozenset(required_versions)
652
# list of revisions that can just be sent without waiting for their
655
# map from revision to the children based on it
657
# first, read all relevant index data, enough to sort into the right
659
for version_id in required_versions:
660
options = self._index.get_options(version_id)
661
parents = self._index.get_parents_with_ghosts(version_id)
662
index_memo = self._index.get_position(version_id)
663
version_index[version_id] = (index_memo, options, parents)
664
if ('line-delta' in options
665
and parents[0] in required_version_set):
666
# must wait until the parent has been sent
667
deferred.setdefault(parents[0], []). \
670
# either a fulltext, or a delta whose parent the client did
671
# not ask for and presumably already has
672
ready_to_send.append(version_id)
673
# build a list of results to return, plus instructions for data to
675
copy_queue_records = []
676
temp_version_list = []
678
# XXX: pushing and popping lists may be a bit inefficient
679
version_id = ready_to_send.pop(0)
680
(index_memo, options, parents) = version_index[version_id]
681
copy_queue_records.append((version_id, index_memo))
682
none, data_pos, data_size = index_memo
683
temp_version_list.append((version_id, options, data_size,
685
if version_id in deferred:
686
# now we can send all the children of this revision - we could
687
# put them in anywhere, but we hope that sending them soon
688
# after the fulltext will give good locality in the receiver
689
ready_to_send[:0] = deferred.pop(version_id)
690
assert len(deferred) == 0, \
691
"Still have compressed child versions waiting to be sent"
692
# XXX: The stream format is such that we cannot stream it - we have to
693
# know the length of all the data a-priori.
695
result_version_list = []
696
for (version_id, raw_data), \
697
(version_id2, options, _, parents) in \
698
izip(self._data.read_records_iter_raw(copy_queue_records),
700
assert version_id == version_id2, \
701
'logic error, inconsistent results'
702
raw_datum.append(raw_data)
703
result_version_list.append(
704
(version_id, options, len(raw_data), parents))
705
# provide a callback to get data incrementally.
706
pseudo_file = StringIO(''.join(raw_datum))
709
return pseudo_file.read()
711
return pseudo_file.read(length)
712
return (self.get_format_signature(), result_version_list, read)
714
def _extract_blocks(self, version_id, source, target):
715
if self._index.get_method(version_id) != 'line-delta':
717
parent, sha1, noeol, delta = self.get_delta(version_id)
718
return KnitContent.get_line_delta_blocks(delta, source, target)
511
current_values = self._index._cache[version]
512
assert set(current_values[4]).difference(set(new_parents)) == set()
513
self._index.add_version(version,
720
519
def get_delta(self, version_id):
721
520
"""Get a delta for constructing version from some other version."""
722
521
self.check_not_reserved_id(version_id)
522
if not self.has_version(version_id):
523
raise RevisionNotPresent(version_id, self.filename)
723
525
parents = self.get_parents(version_id)
725
527
parent = parents[0]
728
index_memo = self._index.get_position(version_id)
729
data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]
530
data_pos, data_size = self._index.get_position(version_id)
531
data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]
532
version_idx = self._index.lookup(version_id)
730
533
noeol = 'no-eol' in self._index.get_options(version_id)
731
534
if 'fulltext' == self._index.get_method(version_id):
732
new_content = self.factory.parse_fulltext(data, version_id)
535
new_content = self.factory.parse_fulltext(data, version_idx)
733
536
if parent is not None:
734
537
reference_content = self._get_content(parent)
735
538
old_texts = reference_content.text()
738
541
new_texts = new_content.text()
739
delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,
542
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
741
543
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)
743
delta = self.factory.parse_line_delta(data, version_id)
545
delta = self.factory.parse_line_delta(data, version_idx)
744
546
return parent, sha1, noeol, delta
746
def get_format_signature(self):
747
"""See VersionedFile.get_format_signature()."""
748
if self.factory.annotated:
749
annotated_part = "annotated"
751
annotated_part = "plain"
752
return "knit-%s" % (annotated_part,)
754
548
def get_graph_with_ghosts(self):
755
549
"""See VersionedFile.get_graph_with_ghosts()."""
787
def insert_data_stream(self, (format, data_list, reader_callable)):
788
"""Insert knit records from a data stream into this knit.
790
If a version in the stream is already present in this knit, it will not
791
be inserted a second time. It will be checked for consistency with the
792
stored version however, and may cause a KnitCorrupt error to be raised
793
if the data in the stream disagrees with the already stored data.
795
:seealso: get_data_stream
797
if format != self.get_format_signature():
798
if 'knit' in debug.debug_flags:
800
'incompatible format signature inserting to %r', self)
801
source = self._knit_from_datastream(
802
(format, data_list, reader_callable))
806
for version_id, options, length, parents in data_list:
807
if self.has_version(version_id):
808
# First check: the list of parents.
809
my_parents = self.get_parents_with_ghosts(version_id)
810
if tuple(my_parents) != tuple(parents):
811
# XXX: KnitCorrupt is not quite the right exception here.
814
'parents list %r from data stream does not match '
815
'already recorded parents %r for %s'
816
% (parents, my_parents, version_id))
818
# Also check the SHA-1 of the fulltext this content will
820
raw_data = reader_callable(length)
821
my_fulltext_sha1 = self.get_sha1(version_id)
822
df, rec = self._data._parse_record_header(version_id, raw_data)
823
stream_fulltext_sha1 = rec[3]
824
if my_fulltext_sha1 != stream_fulltext_sha1:
825
# Actually, we don't know if it's this knit that's corrupt,
826
# or the data stream we're trying to insert.
828
self.filename, 'sha-1 does not match %s' % version_id)
830
if 'line-delta' in options:
831
# Make sure that this knit record is actually useful: a
832
# line-delta is no use unless we have its parent.
833
# Fetching from a broken repository with this problem
834
# shouldn't break the target repository.
836
# See https://bugs.launchpad.net/bzr/+bug/164443
837
if not self._index.has_version(parents[0]):
840
'line-delta from stream '
843
'missing parent %s\n'
844
'Try running "bzr check" '
845
'on the source repository, and "bzr reconcile" '
847
(version_id, parents[0]))
848
self._add_raw_records(
849
[(version_id, options, parents, length)],
850
reader_callable(length))
852
def _knit_from_datastream(self, (format, data_list, reader_callable)):
853
"""Create a knit object from a data stream.
855
This method exists to allow conversion of data streams that do not
856
match the signature of this knit. Generally it will be slower and use
857
more memory to use this method to insert data, but it will work.
859
:seealso: get_data_stream for details on datastreams.
860
:return: A knit versioned file which can be used to join the datastream
863
if format == "knit-plain":
864
factory = KnitPlainFactory()
865
elif format == "knit-annotated":
866
factory = KnitAnnotateFactory()
868
raise errors.KnitDataStreamUnknown(format)
869
index = _StreamIndex(data_list, self._index)
870
access = _StreamAccess(reader_callable, index, self, factory)
871
return KnitVersionedFile(self.filename, self.transport,
872
factory=factory, index=index, access_method=access)
874
578
def versions(self):
875
579
"""See VersionedFile.versions."""
876
if 'evil' in debug.debug_flags:
877
trace.mutter_callsite(2, "versions scales with size of history")
878
580
return self._index.get_versions()
880
582
def has_version(self, version_id):
881
583
"""See VersionedFile.has_version."""
882
if 'evil' in debug.debug_flags:
883
trace.mutter_callsite(2, "has_version is a LBYL scenario")
884
584
return self._index.has_version(version_id)
886
586
__contains__ = has_version
888
588
def _merge_annotations(self, content, parents, parent_texts={},
889
delta=None, annotated=None,
890
left_matching_blocks=None):
589
delta=None, annotated=None):
891
590
"""Merge annotations for content. This is done by comparing
892
591
the annotations based on changed to the text.
894
if left_matching_blocks is not None:
895
delta_seq = diff._PrematchedMatcher(left_matching_blocks)
899
595
for parent_id in parents:
900
596
merge_content = self._get_content(parent_id, parent_texts)
901
if (parent_id == parents[0] and delta_seq is not None):
904
seq = patiencediff.PatienceSequenceMatcher(
905
None, merge_content.text(), content.text())
597
seq = patiencediff.PatienceSequenceMatcher(
598
None, merge_content.text(), content.text())
599
if delta_seq is None:
600
# setup a delta seq to reuse.
906
602
for i, j, n in seq.get_matching_blocks():
909
# this appears to copy (origin, text) pairs across to the
910
# new content for any line that matches the last-checked
605
# this appears to copy (origin, text) pairs across to the new
606
# content for any line that matches the last-checked parent.
607
# FIXME: save the sequence control data for delta compression
608
# against the most relevant parent rather than rediffing.
912
609
content._lines[j:j+n] = merge_content._lines[i:i+n]
914
if delta_seq is None:
915
612
reference_content = self._get_content(parents[0], parent_texts)
916
613
new_texts = content.text()
917
614
old_texts = reference_content.text()
1654
1358
raise RevisionNotPresent(version_id, self._filename)
1657
class KnitGraphIndex(object):
1658
"""A knit index that builds on GraphIndex."""
1660
def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):
1661
"""Construct a KnitGraphIndex on a graph_index.
1663
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1664
:param deltas: Allow delta-compressed records.
1665
:param add_callback: If not None, allow additions to the index and call
1666
this callback with a list of added GraphIndex nodes:
1667
[(node, value, node_refs), ...]
1668
:param parents: If True, record knits parents, if not do not record
1671
self._graph_index = graph_index
1672
self._deltas = deltas
1673
self._add_callback = add_callback
1674
self._parents = parents
1675
if deltas and not parents:
1676
raise KnitCorrupt(self, "Cannot do delta compression without "
1679
def _get_entries(self, keys, check_present=False):
1680
"""Get the entries for keys.
1682
:param keys: An iterable of index keys, - 1-tuples.
1687
for node in self._graph_index.iter_entries(keys):
1689
found_keys.add(node[1])
1691
# adapt parentless index to the rest of the code.
1692
for node in self._graph_index.iter_entries(keys):
1693
yield node[0], node[1], node[2], ()
1694
found_keys.add(node[1])
1696
missing_keys = keys.difference(found_keys)
1698
raise RevisionNotPresent(missing_keys.pop(), self)
1700
def _present_keys(self, version_ids):
1702
node[1] for node in self._get_entries(version_ids)])
1704
def _parentless_ancestry(self, versions):
1705
"""Honour the get_ancestry API for parentless knit indices."""
1706
wanted_keys = self._version_ids_to_keys(versions)
1707
present_keys = self._present_keys(wanted_keys)
1708
missing = set(wanted_keys).difference(present_keys)
1710
raise RevisionNotPresent(missing.pop(), self)
1711
return list(self._keys_to_version_ids(present_keys))
1713
def get_ancestry(self, versions, topo_sorted=True):
1714
"""See VersionedFile.get_ancestry."""
1715
if not self._parents:
1716
return self._parentless_ancestry(versions)
1717
# XXX: This will do len(history) index calls - perhaps
1718
# it should be altered to be a index core feature?
1719
# get a graph of all the mentioned versions:
1722
versions = self._version_ids_to_keys(versions)
1723
pending = set(versions)
1725
# get all pending nodes
1726
this_iteration = pending
1727
new_nodes = self._get_entries(this_iteration)
1730
for (index, key, value, node_refs) in new_nodes:
1731
# dont ask for ghosties - otherwise
1732
# we we can end up looping with pending
1733
# being entirely ghosted.
1734
graph[key] = [parent for parent in node_refs[0]
1735
if parent not in ghosts]
1737
for parent in graph[key]:
1738
# dont examine known nodes again
1743
ghosts.update(this_iteration.difference(found))
1744
if versions.difference(graph):
1745
raise RevisionNotPresent(versions.difference(graph).pop(), self)
1747
result_keys = topo_sort(graph.items())
1749
result_keys = graph.iterkeys()
1750
return [key[0] for key in result_keys]
1752
def get_ancestry_with_ghosts(self, versions):
1753
"""See VersionedFile.get_ancestry."""
1754
if not self._parents:
1755
return self._parentless_ancestry(versions)
1756
# XXX: This will do len(history) index calls - perhaps
1757
# it should be altered to be a index core feature?
1758
# get a graph of all the mentioned versions:
1760
versions = self._version_ids_to_keys(versions)
1761
pending = set(versions)
1763
# get all pending nodes
1764
this_iteration = pending
1765
new_nodes = self._get_entries(this_iteration)
1767
for (index, key, value, node_refs) in new_nodes:
1768
graph[key] = node_refs[0]
1770
for parent in graph[key]:
1771
# dont examine known nodes again
1775
missing_versions = this_iteration.difference(graph)
1776
missing_needed = versions.intersection(missing_versions)
1778
raise RevisionNotPresent(missing_needed.pop(), self)
1779
for missing_version in missing_versions:
1780
# add a key, no parents
1781
graph[missing_version] = []
1782
pending.discard(missing_version) # don't look for it
1783
result_keys = topo_sort(graph.items())
1784
return [key[0] for key in result_keys]
1786
def get_build_details(self, version_ids):
1787
"""Get the method, index_memo and compression parent for version_ids.
1789
Ghosts are omitted from the result.
1791
:param version_ids: An iterable of version_ids.
1792
:return: A dict of version_id:(index_memo, compression_parent,
1793
parents, record_details).
1795
opaque structure to pass to read_records to extract the raw
1798
Content that this record is built upon, may be None
1800
Logical parents of this node
1802
extra information about the content which needs to be passed to
1803
Factory.parse_record
1806
entries = self._get_entries(self._version_ids_to_keys(version_ids), True)
1807
for entry in entries:
1808
version_id = self._keys_to_version_ids((entry[1],))[0]
1809
if not self._parents:
1812
parents = self._keys_to_version_ids(entry[3][0])
1813
if not self._deltas:
1814
compression_parent = None
1816
compression_parent_key = self._compression_parent(entry)
1817
if compression_parent_key:
1818
compression_parent = self._keys_to_version_ids(
1819
(compression_parent_key,))[0]
1821
compression_parent = None
1822
noeol = (entry[2][0] == 'N')
1823
if compression_parent:
1824
method = 'line-delta'
1827
result[version_id] = (self._node_to_position(entry),
1828
compression_parent, parents,
1832
def _compression_parent(self, an_entry):
1833
# return the key that an_entry is compressed against, or None
1834
# Grab the second parent list (as deltas implies parents currently)
1835
compression_parents = an_entry[3][1]
1836
if not compression_parents:
1838
assert len(compression_parents) == 1
1839
return compression_parents[0]
1841
def _get_method(self, node):
1842
if not self._deltas:
1844
if self._compression_parent(node):
1849
def get_graph(self):
1850
"""Return a list of the node:parents lists from this knit index."""
1851
if not self._parents:
1852
return [(key, ()) for key in self.get_versions()]
1854
for index, key, value, refs in self._graph_index.iter_all_entries():
1855
result.append((key[0], tuple([ref[0] for ref in refs[0]])))
1858
def iter_parents(self, version_ids):
1859
"""Iterate through the parents for many version ids.
1861
:param version_ids: An iterable yielding version_ids.
1862
:return: An iterator that yields (version_id, parents). Requested
1863
version_ids not present in the versioned file are simply skipped.
1864
The order is undefined, allowing for different optimisations in
1865
the underlying implementation.
1868
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))
1870
present_parents = set()
1871
for node in all_nodes:
1872
all_parents.update(node[3][0])
1873
# any node we are querying must be present
1874
present_parents.add(node[1])
1875
unknown_parents = all_parents.difference(present_parents)
1876
present_parents.update(self._present_keys(unknown_parents))
1877
for node in all_nodes:
1879
for parent in node[3][0]:
1880
if parent in present_parents:
1881
parents.append(parent[0])
1882
yield node[1][0], tuple(parents)
1884
for node in self._get_entries(self._version_ids_to_keys(version_ids)):
1885
yield node[1][0], ()
1887
def num_versions(self):
1888
return len(list(self._graph_index.iter_all_entries()))
1890
__len__ = num_versions
1892
def get_versions(self):
1893
"""Get all the versions in the file. not topologically sorted."""
1894
return [node[1][0] for node in self._graph_index.iter_all_entries()]
1896
def has_version(self, version_id):
1897
"""True if the version is in the index."""
1898
return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1
1900
def _keys_to_version_ids(self, keys):
1901
return tuple(key[0] for key in keys)
1903
def get_position(self, version_id):
1904
"""Return details needed to access the version.
1906
:return: a tuple (index, data position, size) to hand to the access
1907
logic to get the record.
1909
node = self._get_node(version_id)
1910
return self._node_to_position(node)
1912
def _node_to_position(self, node):
1913
"""Convert an index value to position details."""
1914
bits = node[2][1:].split(' ')
1915
return node[0], int(bits[0]), int(bits[1])
1917
def get_method(self, version_id):
1918
"""Return compression method of specified version."""
1919
return self._get_method(self._get_node(version_id))
1921
def _get_node(self, version_id):
1923
return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]
1925
raise RevisionNotPresent(version_id, self)
1927
def get_options(self, version_id):
1928
"""Return a list representing options.
1932
node = self._get_node(version_id)
1933
options = [self._get_method(node)]
1934
if node[2][0] == 'N':
1935
options.append('no-eol')
1938
def get_parents(self, version_id):
1939
"""Return parents of specified version ignoring ghosts."""
1940
parents = list(self.iter_parents([version_id]))
1943
raise errors.RevisionNotPresent(version_id, self)
1944
return parents[0][1]
1946
def get_parents_with_ghosts(self, version_id):
1947
"""Return parents of specified version with ghosts."""
1948
nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),
1949
check_present=True))
1950
if not self._parents:
1952
return self._keys_to_version_ids(nodes[0][3][0])
1954
def check_versions_present(self, version_ids):
1955
"""Check that all specified versions are present."""
1956
keys = self._version_ids_to_keys(version_ids)
1957
present = self._present_keys(keys)
1958
missing = keys.difference(present)
1960
raise RevisionNotPresent(missing.pop(), self)
1962
def add_version(self, version_id, options, access_memo, parents):
1963
"""Add a version record to the index."""
1964
return self.add_versions(((version_id, options, access_memo, parents),))
1966
def add_versions(self, versions, random_id=False):
1967
"""Add multiple versions to the index.
1969
This function does not insert data into the Immutable GraphIndex
1970
backing the KnitGraphIndex, instead it prepares data for insertion by
1971
the caller and checks that it is safe to insert then calls
1972
self._add_callback with the prepared GraphIndex nodes.
1974
:param versions: a list of tuples:
1975
(version_id, options, pos, size, parents).
1976
:param random_id: If True the ids being added were randomly generated
1977
and no check for existence will be performed.
1979
if not self._add_callback:
1980
raise errors.ReadOnlyError(self)
1981
# we hope there are no repositories with inconsistent parentage
1986
for (version_id, options, access_memo, parents) in versions:
1987
index, pos, size = access_memo
1988
key = (version_id, )
1989
parents = tuple((parent, ) for parent in parents)
1990
if 'no-eol' in options:
1994
value += "%d %d" % (pos, size)
1995
if not self._deltas:
1996
if 'line-delta' in options:
1997
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")
2000
if 'line-delta' in options:
2001
node_refs = (parents, (parents[0],))
2003
node_refs = (parents, ())
2005
node_refs = (parents, )
2008
raise KnitCorrupt(self, "attempt to add node with parents "
2009
"in parentless index.")
2011
keys[key] = (value, node_refs)
2013
present_nodes = self._get_entries(keys)
2014
for (index, key, value, node_refs) in present_nodes:
2015
if (value, node_refs) != keys[key]:
2016
raise KnitCorrupt(self, "inconsistent details in add_versions"
2017
": %s %s" % ((value, node_refs), keys[key]))
2021
for key, (value, node_refs) in keys.iteritems():
2022
result.append((key, value, node_refs))
2024
for key, (value, node_refs) in keys.iteritems():
2025
result.append((key, value))
2026
self._add_callback(result)
2028
def _version_ids_to_keys(self, version_ids):
2029
return set((version_id, ) for version_id in version_ids)
2032
class _KnitAccess(object):
2033
"""Access to knit records in a .knit file."""
2035
def __init__(self, transport, filename, _file_mode, _dir_mode,
2036
_need_to_create, _create_parent_dir):
2037
"""Create a _KnitAccess for accessing and inserting data.
2039
:param transport: The transport the .knit is located on.
2040
:param filename: The filename of the .knit.
2042
self._transport = transport
2043
self._filename = filename
2044
self._file_mode = _file_mode
2045
self._dir_mode = _dir_mode
2046
self._need_to_create = _need_to_create
2047
self._create_parent_dir = _create_parent_dir
2049
def add_raw_records(self, sizes, raw_data):
2050
"""Add raw knit bytes to a storage area.
2052
The data is spooled to whereever the access method is storing data.
2054
:param sizes: An iterable containing the size of each raw data segment.
2055
:param raw_data: A bytestring containing the data.
2056
:return: A list of memos to retrieve the record later. Each memo is a
2057
tuple - (index, pos, length), where the index field is always None
2058
for the .knit access method.
2060
assert type(raw_data) == str, \
2061
'data must be plain bytes was %s' % type(raw_data)
2062
if not self._need_to_create:
2063
base = self._transport.append_bytes(self._filename, raw_data)
2065
self._transport.put_bytes_non_atomic(self._filename, raw_data,
2066
create_parent_dir=self._create_parent_dir,
2067
mode=self._file_mode,
2068
dir_mode=self._dir_mode)
2069
self._need_to_create = False
2073
result.append((None, base, size))
2078
"""IFF this data access has its own storage area, initialise it.
2082
self._transport.put_bytes_non_atomic(self._filename, '',
2083
mode=self._file_mode)
2085
def open_file(self):
2086
"""IFF this data access can be represented as a single file, open it.
2088
For knits that are not mapped to a single file on disk this will
2091
:return: None or a file handle.
2094
return self._transport.get(self._filename)
2099
def get_raw_records(self, memos_for_retrieval):
2100
"""Get the raw bytes for a records.
2102
:param memos_for_retrieval: An iterable containing the (index, pos,
2103
length) memo for retrieving the bytes. The .knit method ignores
2104
the index as there is always only a single file.
2105
:return: An iterator over the bytes of the records.
2107
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
2108
for pos, data in self._transport.readv(self._filename, read_vector):
2112
class _PackAccess(object):
2113
"""Access to knit records via a collection of packs."""
2115
def __init__(self, index_to_packs, writer=None):
2116
"""Create a _PackAccess object.
2118
:param index_to_packs: A dict mapping index objects to the transport
2119
and file names for obtaining data.
2120
:param writer: A tuple (pack.ContainerWriter, write_index) which
2121
contains the pack to write, and the index that reads from it will
2125
self.container_writer = writer[0]
2126
self.write_index = writer[1]
2128
self.container_writer = None
2129
self.write_index = None
2130
self.indices = index_to_packs
2132
def add_raw_records(self, sizes, raw_data):
2133
"""Add raw knit bytes to a storage area.
2135
The data is spooled to the container writer in one bytes-record per
2138
:param sizes: An iterable containing the size of each raw data segment.
2139
:param raw_data: A bytestring containing the data.
2140
:return: A list of memos to retrieve the record later. Each memo is a
2141
tuple - (index, pos, length), where the index field is the
2142
write_index object supplied to the PackAccess object.
2144
assert type(raw_data) == str, \
2145
'data must be plain bytes was %s' % type(raw_data)
2149
p_offset, p_length = self.container_writer.add_bytes_record(
2150
raw_data[offset:offset+size], [])
2152
result.append((self.write_index, p_offset, p_length))
2156
"""Pack based knits do not get individually created."""
2158
def get_raw_records(self, memos_for_retrieval):
2159
"""Get the raw bytes for a records.
2161
:param memos_for_retrieval: An iterable containing the (index, pos,
2162
length) memo for retrieving the bytes. The Pack access method
2163
looks up the pack to use for a given record in its index_to_pack
2165
:return: An iterator over the bytes of the records.
2167
# first pass, group into same-index requests
2169
current_index = None
2170
for (index, offset, length) in memos_for_retrieval:
2171
if current_index == index:
2172
current_list.append((offset, length))
2174
if current_index is not None:
2175
request_lists.append((current_index, current_list))
2176
current_index = index
2177
current_list = [(offset, length)]
2178
# handle the last entry
2179
if current_index is not None:
2180
request_lists.append((current_index, current_list))
2181
for index, offsets in request_lists:
2182
transport, path = self.indices[index]
2183
reader = pack.make_readv_reader(transport, path, offsets)
2184
for names, read_func in reader.iter_records():
2185
yield read_func(None)
2187
def open_file(self):
2188
"""Pack based knits have no single file."""
2191
def set_writer(self, writer, index, (transport, packname)):
2192
"""Set a writer to use for adding data."""
2193
if index is not None:
2194
self.indices[index] = (transport, packname)
2195
self.container_writer = writer
2196
self.write_index = index
2199
class _StreamAccess(object):
2200
"""A Knit Access object that provides data from a datastream.
2202
It also provides a fallback to present as unannotated data, annotated data
2203
from a *backing* access object.
2205
This is triggered by a index_memo which is pointing to a different index
2206
than this was constructed with, and is used to allow extracting full
2207
unannotated texts for insertion into annotated knits.
2210
def __init__(self, reader_callable, stream_index, backing_knit,
2212
"""Create a _StreamAccess object.
2214
:param reader_callable: The reader_callable from the datastream.
2215
This is called to buffer all the data immediately, for
2217
:param stream_index: The index the data stream this provides access to
2218
which will be present in native index_memo's.
2219
:param backing_knit: The knit object that will provide access to
2220
annotated texts which are not available in the stream, so as to
2221
create unannotated texts.
2222
:param orig_factory: The original content factory used to generate the
2223
stream. This is used for checking whether the thunk code for
2224
supporting _copy_texts will generate the correct form of data.
2226
self.data = reader_callable(None)
2227
self.stream_index = stream_index
2228
self.backing_knit = backing_knit
2229
self.orig_factory = orig_factory
2231
def get_raw_records(self, memos_for_retrieval):
2232
"""Get the raw bytes for a records.
2234
:param memos_for_retrieval: An iterable containing the (thunk_flag,
2235
index, start, end) memo for retrieving the bytes.
2236
:return: An iterator over the bytes of the records.
2238
# use a generator for memory friendliness
2239
for thunk_flag, version_id, start, end in memos_for_retrieval:
2240
if version_id is self.stream_index:
2241
yield self.data[start:end]
2243
# we have been asked to thunk. This thunking only occurs when
2244
# we are obtaining plain texts from an annotated backing knit
2245
# so that _copy_texts will work.
2246
# We could improve performance here by scanning for where we need
2247
# to do this and using get_line_list, then interleaving the output
2248
# as desired. However, for now, this is sufficient.
2249
if self.orig_factory.__class__ != KnitPlainFactory:
2250
raise errors.KnitCorrupt(
2251
self, 'Bad thunk request %r' % version_id)
2252
lines = self.backing_knit.get_lines(version_id)
2253
line_bytes = ''.join(lines)
2254
digest = sha_string(line_bytes)
2256
if lines[-1][-1] != '\n':
2257
lines[-1] = lines[-1] + '\n'
2259
orig_options = list(self.backing_knit._index.get_options(version_id))
2260
if 'fulltext' not in orig_options:
2261
if 'line-delta' not in orig_options:
2262
raise errors.KnitCorrupt(self,
2263
'Unknown compression method %r' % orig_options)
2264
orig_options.remove('line-delta')
2265
orig_options.append('fulltext')
2266
# We want plain data, because we expect to thunk only to allow text
2268
size, bytes = self.backing_knit._data._record_to_data(version_id,
2269
digest, lines, line_bytes)
2273
class _StreamIndex(object):
2274
"""A Knit Index object that uses the data map from a datastream."""
2276
def __init__(self, data_list, backing_index):
2277
"""Create a _StreamIndex object.
2279
:param data_list: The data_list from the datastream.
2280
:param backing_index: The index which will supply values for nodes
2281
referenced outside of this stream.
2283
self.data_list = data_list
2284
self.backing_index = backing_index
2285
self._by_version = {}
2287
for key, options, length, parents in data_list:
2288
self._by_version[key] = options, (pos, pos + length), parents
2291
def get_ancestry(self, versions, topo_sorted):
2292
"""Get an ancestry list for versions."""
2294
# Not needed for basic joins
2295
raise NotImplementedError(self.get_ancestry)
2296
# get a graph of all the mentioned versions:
2297
# Little ugly - basically copied from KnitIndex, but don't want to
2298
# accidentally incorporate too much of that index's code.
2300
pending = set(versions)
2301
cache = self._by_version
2303
version = pending.pop()
2306
parents = [p for p in cache[version][2] if p in cache]
2308
raise RevisionNotPresent(version, self)
2309
# if not completed and not a ghost
2310
pending.update([p for p in parents if p not in ancestry])
2311
ancestry.add(version)
2312
return list(ancestry)
2314
def get_build_details(self, version_ids):
2315
"""Get the method, index_memo and compression parent for version_ids.
2317
Ghosts are omitted from the result.
2319
:param version_ids: An iterable of version_ids.
2320
:return: A dict of version_id:(index_memo, compression_parent,
2321
parents, record_details).
2323
opaque structure to pass to read_records to extract the raw
2326
Content that this record is built upon, may be None
2328
Logical parents of this node
2330
extra information about the content which needs to be passed to
2331
Factory.parse_record
2334
for version_id in version_ids:
2336
method = self.get_method(version_id)
2337
except errors.RevisionNotPresent:
2338
# ghosts are omitted
2340
parent_ids = self.get_parents_with_ghosts(version_id)
2341
noeol = ('no-eol' in self.get_options(version_id))
2342
if method == 'fulltext':
2343
compression_parent = None
2345
compression_parent = parent_ids[0]
2346
index_memo = self.get_position(version_id)
2347
result[version_id] = (index_memo, compression_parent,
2348
parent_ids, (method, noeol))
2351
def get_method(self, version_id):
2352
"""Return compression method of specified version."""
2354
options = self._by_version[version_id][0]
2356
# Strictly speaking this should check in the backing knit, but
2357
# until we have a test to discriminate, this will do.
2358
return self.backing_index.get_method(version_id)
2359
if 'fulltext' in options:
2361
elif 'line-delta' in options:
2364
raise errors.KnitIndexUnknownMethod(self, options)
2366
def get_options(self, version_id):
2367
"""Return a list representing options.
2372
return self._by_version[version_id][0]
2374
return self.backing_index.get_options(version_id)
2376
def get_parents_with_ghosts(self, version_id):
2377
"""Return parents of specified version with ghosts."""
2379
return self._by_version[version_id][2]
2381
return self.backing_index.get_parents_with_ghosts(version_id)
2383
def get_position(self, version_id):
2384
"""Return details needed to access the version.
2386
_StreamAccess has the data as a big array, so we return slice
2387
coordinates into that (as index_memo's are opaque outside the
2388
index and matching access class).
2390
:return: a tuple (thunk_flag, index, start, end). If thunk_flag is
2391
False, index will be self, otherwise it will be a version id.
2394
start, end = self._by_version[version_id][1]
2395
return False, self, start, end
2397
# Signal to the access object to handle this from the backing knit.
2398
return (True, version_id, None, None)
2400
def get_versions(self):
2401
"""Get all the versions in the stream."""
2402
return self._by_version.keys()
2404
def iter_parents(self, version_ids):
2405
"""Iterate through the parents for many version ids.
2407
:param version_ids: An iterable yielding version_ids.
2408
:return: An iterator that yields (version_id, parents). Requested
2409
version_ids not present in the versioned file are simply skipped.
2410
The order is undefined, allowing for different optimisations in
2411
the underlying implementation.
2414
for version in version_ids:
2416
result.append((version, self._by_version[version][2]))
2422
class _KnitData(object):
2423
"""Manage extraction of data from a KnitAccess, caching and decompressing.
2425
The KnitData class provides the logic for parsing and using knit records,
2426
making use of an access method for the low level read and write operations.
2429
def __init__(self, access):
2430
"""Create a KnitData object.
2432
:param access: The access method to use. Access methods such as
2433
_KnitAccess manage the insertion of raw records and the subsequent
2434
retrieval of the same.
2436
self._access = access
1361
class _KnitData(_KnitComponentFile):
1362
"""Contents of the knit data file"""
1364
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1365
create_parent_dir=False, delay_create=False,
1367
_KnitComponentFile.__init__(self, transport, filename, mode,
1368
file_mode=file_mode,
1369
create_parent_dir=create_parent_dir,
2437
1371
self._checked = False
2438
1372
# TODO: jam 20060713 conceptually, this could spill to disk
2439
1373
# if the cached size gets larger than a certain amount
2852
1794
InterVersionedFile.register_optimiser(WeaveToKnit)
2855
# Deprecated, use PatienceSequenceMatcher instead
2856
KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher
2859
def annotate_knit(knit, revision_id):
2860
"""Annotate a knit with no cached annotations.
2862
This implementation is for knits with no cached annotations.
2863
It will work for knits with cached annotations, but this is not
1797
class KnitSequenceMatcher(difflib.SequenceMatcher):
1798
"""Knit tuned sequence matcher.
1800
This is based on profiling of difflib which indicated some improvements
1801
for our usage pattern.
2866
annotator = _KnitAnnotator(knit)
2867
return iter(annotator.annotate(revision_id))
2870
class _KnitAnnotator(object):
2871
"""Build up the annotations for a text."""
2873
def __init__(self, knit):
2876
# Content objects, differs from fulltexts because of how final newlines
2877
# are treated by knits. the content objects here will always have a
2879
self._fulltext_contents = {}
2881
# Annotated lines of specific revisions
2882
self._annotated_lines = {}
2884
# Track the raw data for nodes that we could not process yet.
2885
# This maps the revision_id of the base to a list of children that will
2886
# annotated from it.
2887
self._pending_children = {}
2889
# Nodes which cannot be extracted
2890
self._ghosts = set()
2892
# Track how many children this node has, so we know if we need to keep
2894
self._annotate_children = {}
2895
self._compression_children = {}
2897
self._all_build_details = {}
2898
# The children => parent revision_id graph
2899
self._revision_id_graph = {}
2901
self._heads_provider = None
2903
self._nodes_to_keep_annotations = set()
2904
self._generations_until_keep = 100
2906
def set_generations_until_keep(self, value):
2907
"""Set the number of generations before caching a node.
2909
Setting this to -1 will cache every merge node, setting this higher
2910
will cache fewer nodes.
2912
self._generations_until_keep = value
2914
def _add_fulltext_content(self, revision_id, content_obj):
2915
self._fulltext_contents[revision_id] = content_obj
2916
# TODO: jam 20080305 It might be good to check the sha1digest here
2917
return content_obj.text()
2919
def _check_parents(self, child, nodes_to_annotate):
2920
"""Check if all parents have been processed.
2922
:param child: A tuple of (rev_id, parents, raw_content)
2923
:param nodes_to_annotate: If child is ready, add it to
2924
nodes_to_annotate, otherwise put it back in self._pending_children
2926
for parent_id in child[1]:
2927
if (parent_id not in self._annotated_lines):
2928
# This parent is present, but another parent is missing
2929
self._pending_children.setdefault(parent_id,
2933
# This one is ready to be processed
2934
nodes_to_annotate.append(child)
2936
def _add_annotation(self, revision_id, fulltext, parent_ids,
2937
left_matching_blocks=None):
2938
"""Add an annotation entry.
2940
All parents should already have been annotated.
2941
:return: A list of children that now have their parents satisfied.
2943
a = self._annotated_lines
2944
annotated_parent_lines = [a[p] for p in parent_ids]
2945
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2946
fulltext, revision_id, left_matching_blocks,
2947
heads_provider=self._get_heads_provider()))
2948
self._annotated_lines[revision_id] = annotated_lines
2949
for p in parent_ids:
2950
ann_children = self._annotate_children[p]
2951
ann_children.remove(revision_id)
2952
if (not ann_children
2953
and p not in self._nodes_to_keep_annotations):
2954
del self._annotated_lines[p]
2955
del self._all_build_details[p]
2956
if p in self._fulltext_contents:
2957
del self._fulltext_contents[p]
2958
# Now that we've added this one, see if there are any pending
2959
# deltas to be done, certainly this parent is finished
2960
nodes_to_annotate = []
2961
for child in self._pending_children.pop(revision_id, []):
2962
self._check_parents(child, nodes_to_annotate)
2963
return nodes_to_annotate
2965
def _get_build_graph(self, revision_id):
2966
"""Get the graphs for building texts and annotations.
2968
The data you need for creating a full text may be different than the
2969
data you need to annotate that text. (At a minimum, you need both
2970
parents to create an annotation, but only need 1 parent to generate the
2973
:return: A list of (revision_id, index_memo) records, suitable for
2974
passing to read_records_iter to start reading in the raw data fro/
2977
if revision_id in self._annotated_lines:
2980
pending = set([revision_id])
2985
# get all pending nodes
2987
this_iteration = pending
2988
build_details = self._knit._index.get_build_details(this_iteration)
2989
self._all_build_details.update(build_details)
2990
# new_nodes = self._knit._index._get_entries(this_iteration)
2992
for rev_id, details in build_details.iteritems():
2993
(index_memo, compression_parent, parents,
2994
record_details) = details
2995
self._revision_id_graph[rev_id] = parents
2996
records.append((rev_id, index_memo))
2997
# Do we actually need to check _annotated_lines?
2998
pending.update(p for p in parents
2999
if p not in self._all_build_details)
3000
if compression_parent:
3001
self._compression_children.setdefault(compression_parent,
3004
for parent in parents:
3005
self._annotate_children.setdefault(parent,
3007
num_gens = generation - kept_generation
3008
if ((num_gens >= self._generations_until_keep)
3009
and len(parents) > 1):
3010
kept_generation = generation
3011
self._nodes_to_keep_annotations.add(rev_id)
3013
missing_versions = this_iteration.difference(build_details.keys())
3014
self._ghosts.update(missing_versions)
3015
for missing_version in missing_versions:
3016
# add a key, no parents
3017
self._revision_id_graph[missing_version] = ()
3018
pending.discard(missing_version) # don't look for it
3019
# XXX: This should probably be a real exception, as it is a data
3021
assert not self._ghosts.intersection(self._compression_children), \
3022
"We cannot have nodes which have a compression parent of a ghost."
3023
# Cleanout anything that depends on a ghost so that we don't wait for
3024
# the ghost to show up
3025
for node in self._ghosts:
3026
if node in self._annotate_children:
3027
# We won't be building this node
3028
del self._annotate_children[node]
3029
# Generally we will want to read the records in reverse order, because
3030
# we find the parent nodes after the children
3034
def _annotate_records(self, records):
3035
"""Build the annotations for the listed records."""
3036
# We iterate in the order read, rather than a strict order requested
3037
# However, process what we can, and put off to the side things that
3038
# still need parents, cleaning them up when those parents are
3040
for (rev_id, record,
3041
digest) in self._knit._data.read_records_iter(records):
3042
if rev_id in self._annotated_lines:
3044
parent_ids = self._revision_id_graph[rev_id]
3045
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3046
details = self._all_build_details[rev_id]
3047
(index_memo, compression_parent, parents,
3048
record_details) = details
3049
nodes_to_annotate = []
3050
# TODO: Remove the punning between compression parents, and
3051
# parent_ids, we should be able to do this without assuming
3053
if len(parent_ids) == 0:
3054
# There are no parents for this node, so just add it
3055
# TODO: This probably needs to be decoupled
3056
assert compression_parent is None
3057
fulltext_content, delta = self._knit.factory.parse_record(
3058
rev_id, record, record_details, None)
3059
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3060
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3061
parent_ids, left_matching_blocks=None))
1804
def find_longest_match(self, alo, ahi, blo, bhi):
1805
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
1807
If isjunk is not defined:
1809
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
1810
alo <= i <= i+k <= ahi
1811
blo <= j <= j+k <= bhi
1812
and for all (i',j',k') meeting those conditions,
1815
and if i == i', j <= j'
1817
In other words, of all maximal matching blocks, return one that
1818
starts earliest in a, and of all those maximal matching blocks that
1819
start earliest in a, return the one that starts earliest in b.
1821
>>> s = SequenceMatcher(None, " abcd", "abcd abcd")
1822
>>> s.find_longest_match(0, 5, 0, 9)
1825
If isjunk is defined, first the longest matching block is
1826
determined as above, but with the additional restriction that no
1827
junk element appears in the block. Then that block is extended as
1828
far as possible by matching (only) junk elements on both sides. So
1829
the resulting block never matches on junk except as identical junk
1830
happens to be adjacent to an "interesting" match.
1832
Here's the same example as before, but considering blanks to be
1833
junk. That prevents " abcd" from matching the " abcd" at the tail
1834
end of the second sequence directly. Instead only the "abcd" can
1835
match, and matches the leftmost "abcd" in the second sequence:
1837
>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
1838
>>> s.find_longest_match(0, 5, 0, 9)
1841
If no blocks match, return (alo, blo, 0).
1843
>>> s = SequenceMatcher(None, "ab", "c")
1844
>>> s.find_longest_match(0, 2, 0, 1)
1848
# CAUTION: stripping common prefix or suffix would be incorrect.
1852
# Longest matching block is "ab", but if common prefix is
1853
# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
1854
# strip, so ends up claiming that ab is changed to acab by
1855
# inserting "ca" in the middle. That's minimal but unintuitive:
1856
# "it's obvious" that someone inserted "ac" at the front.
1857
# Windiff ends up at the same place as diff, but by pairing up
1858
# the unique 'b's and then matching the first two 'a's.
1860
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
1861
besti, bestj, bestsize = alo, blo, 0
1862
# find longest junk-free match
1863
# during an iteration of the loop, j2len[j] = length of longest
1864
# junk-free match ending with a[i-1] and b[j]
1868
for i in xrange(alo, ahi):
1869
# look at all instances of a[i] in b; note that because
1870
# b2j has no junk keys, the loop is skipped if a[i] is junk
1871
j2lenget = j2len.get
1874
# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the
1875
# following improvement
1876
# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)
1877
# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>
1878
# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>
1880
# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)
1881
# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>
1882
# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>
3063
child = (rev_id, parent_ids, record)
3064
# Check if all the parents are present
3065
self._check_parents(child, nodes_to_annotate)
3066
while nodes_to_annotate:
3067
# Should we use a queue here instead of a stack?
3068
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3069
(index_memo, compression_parent, parents,
3070
record_details) = self._all_build_details[rev_id]
3071
if compression_parent is not None:
3072
comp_children = self._compression_children[compression_parent]
3073
assert rev_id in comp_children
3074
# If there is only 1 child, it is safe to reuse this
3076
reuse_content = (len(comp_children) == 1
3077
and compression_parent not in
3078
self._nodes_to_keep_annotations)
3080
# Remove it from the cache since it will be changing
3081
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3082
# Make sure to copy the fulltext since it might be
3084
parent_fulltext = list(parent_fulltext_content.text())
3086
parent_fulltext_content = self._fulltext_contents[compression_parent]
3087
parent_fulltext = parent_fulltext_content.text()
3088
comp_children.remove(rev_id)
3089
fulltext_content, delta = self._knit.factory.parse_record(
3090
rev_id, record, record_details,
3091
parent_fulltext_content,
3092
copy_base_content=(not reuse_content))
3093
fulltext = self._add_fulltext_content(rev_id,
3095
blocks = KnitContent.get_line_delta_blocks(delta,
3096
parent_fulltext, fulltext)
3098
fulltext_content = self._knit.factory.parse_fulltext(
3100
fulltext = self._add_fulltext_content(rev_id,
3103
nodes_to_annotate.extend(
3104
self._add_annotation(rev_id, fulltext, parent_ids,
3105
left_matching_blocks=blocks))
3107
def _get_heads_provider(self):
3108
"""Create a heads provider for resolving ancestry issues."""
3109
if self._heads_provider is not None:
3110
return self._heads_provider
3111
parent_provider = _mod_graph.DictParentsProvider(
3112
self._revision_id_graph)
3113
graph_obj = _mod_graph.Graph(parent_provider)
3114
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3115
self._heads_provider = head_cache
3118
def annotate(self, revision_id):
3119
"""Return the annotated fulltext at the given revision.
3121
:param revision_id: The revision id for this file
3123
records = self._get_build_graph(revision_id)
3124
if revision_id in self._ghosts:
3125
raise errors.RevisionNotPresent(revision_id, self._knit)
3126
self._annotate_records(records)
3127
return self._annotated_lines[revision_id]
3131
from bzrlib._knit_load_data_c import _load_data_c as _load_data
3133
from bzrlib._knit_load_data_py import _load_data_py as _load_data
1894
k = newj2len[j] = 1 + j2lenget(-1 + j, 0)
1896
besti, bestj, bestsize = 1 + i-k, 1 + j-k, k
1899
# Extend the best by non-junk elements on each end. In particular,
1900
# "popular" non-junk elements aren't in b2j, which greatly speeds
1901
# the inner loop above, but also means "the best" match so far
1902
# doesn't contain any junk *or* popular non-junk elements.
1903
while besti > alo and bestj > blo and \
1904
not isbjunk(b[bestj-1]) and \
1905
a[besti-1] == b[bestj-1]:
1906
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
1907
while besti+bestsize < ahi and bestj+bestsize < bhi and \
1908
not isbjunk(b[bestj+bestsize]) and \
1909
a[besti+bestsize] == b[bestj+bestsize]:
1912
# Now that we have a wholly interesting match (albeit possibly
1913
# empty!), we may as well suck up the matching junk on each
1914
# side of it too. Can't think of a good reason not to, and it
1915
# saves post-processing the (possibly considerable) expense of
1916
# figuring out what to do with it. In the case of an empty
1917
# interesting match, this is clearly the right thing to do,
1918
# because no other kind of match is possible in the regions.
1919
while besti > alo and bestj > blo and \
1920
isbjunk(b[bestj-1]) and \
1921
a[besti-1] == b[bestj-1]:
1922
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
1923
while besti+bestsize < ahi and bestj+bestsize < bhi and \
1924
isbjunk(b[bestj+bestsize]) and \
1925
a[besti+bestsize] == b[bestj+bestsize]:
1926
bestsize = bestsize + 1
1928
return besti, bestj, bestsize