3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
1 |
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
16 |
|
17 |
"""Knit versionedfile implementation.
|
|
18 |
||
19 |
A knit is a versioned file implementation that supports efficient append only
|
|
20 |
updates.
|
|
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
21 |
|
22 |
Knit file layout:
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
23 |
lifeless: the data file is made up of "delta records". each delta record has a delta header
|
24 |
that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of
|
|
25 |
the -expanded data- (ie, the delta applied to the parent). the delta also ends with a
|
|
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
26 |
end-marker; simply "end VERSION"
|
27 |
||
28 |
delta can be line or full contents.a
|
|
29 |
... the 8's there are the index number of the annotation.
|
|
30 |
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
|
|
31 |
59,59,3
|
|
32 |
8
|
|
33 |
8 if ie.executable:
|
|
34 |
8 e.set('executable', 'yes')
|
|
35 |
130,130,2
|
|
36 |
8 if elt.get('executable') == 'yes':
|
|
37 |
8 ie.executable = True
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
38 |
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad
|
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
39 |
|
40 |
||
41 |
whats in an index:
|
|
42 |
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
|
|
43 |
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
|
|
44 |
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
|
|
45 |
09:33 < lifeless> right
|
|
46 |
09:33 < jrydberg> lifeless: the position and size is the range in the data file
|
|
47 |
||
48 |
||
49 |
so the index sequence is the dictionary compressed sequence number used
|
|
50 |
in the deltas to provide line annotation
|
|
51 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
52 |
"""
|
53 |
||
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
54 |
|
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
55 |
from cStringIO import StringIO |
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
56 |
from itertools import izip |
1756.2.17
by Aaron Bentley
Fixes suggested by John Meinel |
57 |
import operator |
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
58 |
import os |
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
59 |
import sys |
1594.2.19
by Robert Collins
More coalescing tweaks, and knit feedback. |
60 |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
61 |
from bzrlib.lazy_import import lazy_import |
62 |
lazy_import(globals(), """ |
|
63 |
from bzrlib import (
|
|
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
64 |
annotate,
|
3535.5.1
by John Arbash Meinel
cleanup a few imports to be lazily loaded. |
65 |
debug,
|
66 |
diff,
|
|
3224.1.10
by John Arbash Meinel
Introduce the heads_provider for reannotate. |
67 |
graph as _mod_graph,
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
68 |
index as _mod_index,
|
2998.2.2
by John Arbash Meinel
implement a faster path for copying from packs back to knits. |
69 |
lru_cache,
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
70 |
pack,
|
3535.5.1
by John Arbash Meinel
cleanup a few imports to be lazily loaded. |
71 |
progress,
|
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
72 |
trace,
|
3224.5.1
by Andrew Bennetts
Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop. |
73 |
tsort,
|
3535.5.1
by John Arbash Meinel
cleanup a few imports to be lazily loaded. |
74 |
tuned_gzip,
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
75 |
)
|
76 |
""") |
|
1911.2.3
by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids |
77 |
from bzrlib import ( |
78 |
errors, |
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
79 |
osutils, |
2104.4.2
by John Arbash Meinel
Small cleanup and NEWS entry about fixing bug #65714 |
80 |
patiencediff, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
81 |
)
|
82 |
from bzrlib.errors import ( |
|
83 |
FileExists, |
|
84 |
NoSuchFile, |
|
85 |
KnitError, |
|
86 |
InvalidRevisionId, |
|
87 |
KnitCorrupt, |
|
88 |
KnitHeaderError, |
|
89 |
RevisionNotPresent, |
|
90 |
RevisionAlreadyPresent, |
|
3787.1.1
by Robert Collins
Embed the failed text in sha1 knit errors. |
91 |
SHA1KnitCorrupt, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
92 |
)
|
93 |
from bzrlib.osutils import ( |
|
94 |
contains_whitespace, |
|
95 |
contains_linebreaks, |
|
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
96 |
sha_string, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
97 |
sha_strings, |
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
98 |
split_lines, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
99 |
)
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
100 |
from bzrlib.versionedfile import ( |
3350.3.12
by Robert Collins
Generate streams with absent records. |
101 |
AbsentContentFactory, |
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
102 |
adapter_registry, |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
103 |
ConstantMapper, |
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
104 |
ContentFactory, |
3890.2.1
by John Arbash Meinel
Start working on a ChunkedContentFactory. |
105 |
ChunkedContentFactory, |
4111.1.1
by Robert Collins
Add a groupcompress sort order. |
106 |
sort_groupcompress, |
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
107 |
VersionedFile, |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
108 |
VersionedFiles, |
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
109 |
)
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
110 |
|
111 |
||
112 |
# TODO: Split out code specific to this format into an associated object.
|
|
113 |
||
114 |
# TODO: Can we put in some kind of value to check that the index and data
|
|
115 |
# files belong together?
|
|
116 |
||
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
117 |
# TODO: accommodate binaries, perhaps by storing a byte count
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
118 |
|
119 |
# TODO: function to check whole file
|
|
120 |
||
121 |
# TODO: atomically append data, then measure backwards from the cursor
|
|
122 |
# position after writing to work out where it was located. we may need to
|
|
123 |
# bypass python file buffering.
|
|
124 |
||
125 |
DATA_SUFFIX = '.knit' |
|
126 |
INDEX_SUFFIX = '.kndx' |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
127 |
_STREAM_MIN_BUFFER_SIZE = 5*1024*1024 |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
128 |
|
129 |
||
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
130 |
class KnitAdapter(object): |
131 |
"""Base class for knit record adaption."""
|
|
132 |
||
3350.3.7
by Robert Collins
Create a registry of versioned file record adapters. |
133 |
def __init__(self, basis_vf): |
134 |
"""Create an adapter which accesses full texts from basis_vf.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
135 |
|
3350.3.7
by Robert Collins
Create a registry of versioned file record adapters. |
136 |
:param basis_vf: A versioned file to access basis texts of deltas from.
|
137 |
May be None for adapters that do not need to access basis texts.
|
|
138 |
"""
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
139 |
self._data = KnitVersionedFiles(None, None) |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
140 |
self._annotate_factory = KnitAnnotateFactory() |
141 |
self._plain_factory = KnitPlainFactory() |
|
3350.3.7
by Robert Collins
Create a registry of versioned file record adapters. |
142 |
self._basis_vf = basis_vf |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
143 |
|
144 |
||
145 |
class FTAnnotatedToUnannotated(KnitAdapter): |
|
146 |
"""An adapter from FT annotated knits to unannotated ones."""
|
|
147 |
||
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
148 |
def get_bytes(self, factory): |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
149 |
annotated_compressed_bytes = factory._raw_record |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
150 |
rec, contents = \ |
151 |
self._data._parse_record_unchecked(annotated_compressed_bytes) |
|
152 |
content = self._annotate_factory.parse_fulltext(contents, rec[1]) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
153 |
size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text()) |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
154 |
return bytes |
155 |
||
156 |
||
157 |
class DeltaAnnotatedToUnannotated(KnitAdapter): |
|
158 |
"""An adapter for deltas from annotated to unannotated."""
|
|
159 |
||
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
160 |
def get_bytes(self, factory): |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
161 |
annotated_compressed_bytes = factory._raw_record |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
162 |
rec, contents = \ |
163 |
self._data._parse_record_unchecked(annotated_compressed_bytes) |
|
164 |
delta = self._annotate_factory.parse_line_delta(contents, rec[1], |
|
165 |
plain=True) |
|
166 |
contents = self._plain_factory.lower_line_delta(delta) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
167 |
size, bytes = self._data._record_to_data((rec[1],), rec[3], contents) |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
168 |
return bytes |
169 |
||
170 |
||
171 |
class FTAnnotatedToFullText(KnitAdapter): |
|
172 |
"""An adapter from FT annotated knits to unannotated ones."""
|
|
173 |
||
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
174 |
def get_bytes(self, factory): |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
175 |
annotated_compressed_bytes = factory._raw_record |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
176 |
rec, contents = \ |
177 |
self._data._parse_record_unchecked(annotated_compressed_bytes) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
178 |
content, delta = self._annotate_factory.parse_record(factory.key[-1], |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
179 |
contents, factory._build_details, None) |
180 |
return ''.join(content.text()) |
|
181 |
||
182 |
||
183 |
class DeltaAnnotatedToFullText(KnitAdapter): |
|
184 |
"""An adapter for deltas from annotated to unannotated."""
|
|
185 |
||
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
186 |
def get_bytes(self, factory): |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
187 |
annotated_compressed_bytes = factory._raw_record |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
188 |
rec, contents = \ |
189 |
self._data._parse_record_unchecked(annotated_compressed_bytes) |
|
190 |
delta = self._annotate_factory.parse_line_delta(contents, rec[1], |
|
191 |
plain=True) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
192 |
compression_parent = factory.parents[0] |
193 |
basis_entry = self._basis_vf.get_record_stream( |
|
194 |
[compression_parent], 'unordered', True).next() |
|
195 |
if basis_entry.storage_kind == 'absent': |
|
196 |
raise errors.RevisionNotPresent(compression_parent, self._basis_vf) |
|
3890.2.9
by John Arbash Meinel
Start using osutils.chunks_as_lines rather than osutils.split_lines. |
197 |
basis_chunks = basis_entry.get_bytes_as('chunked') |
198 |
basis_lines = osutils.chunks_to_lines(basis_chunks) |
|
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
199 |
# Manually apply the delta because we have one annotated content and
|
200 |
# one plain.
|
|
201 |
basis_content = PlainKnitContent(basis_lines, compression_parent) |
|
202 |
basis_content.apply_delta(delta, rec[1]) |
|
203 |
basis_content._should_strip_eol = factory._build_details[1] |
|
204 |
return ''.join(basis_content.text()) |
|
205 |
||
206 |
||
3350.3.5
by Robert Collins
Create adapters from plain compressed knit content. |
207 |
class FTPlainToFullText(KnitAdapter): |
208 |
"""An adapter from FT plain knits to unannotated ones."""
|
|
209 |
||
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
210 |
def get_bytes(self, factory): |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
211 |
compressed_bytes = factory._raw_record |
3350.3.5
by Robert Collins
Create adapters from plain compressed knit content. |
212 |
rec, contents = \ |
213 |
self._data._parse_record_unchecked(compressed_bytes) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
214 |
content, delta = self._plain_factory.parse_record(factory.key[-1], |
3350.3.5
by Robert Collins
Create adapters from plain compressed knit content. |
215 |
contents, factory._build_details, None) |
216 |
return ''.join(content.text()) |
|
217 |
||
218 |
||
219 |
class DeltaPlainToFullText(KnitAdapter): |
|
220 |
"""An adapter for deltas from annotated to unannotated."""
|
|
221 |
||
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
222 |
def get_bytes(self, factory): |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
223 |
compressed_bytes = factory._raw_record |
3350.3.5
by Robert Collins
Create adapters from plain compressed knit content. |
224 |
rec, contents = \ |
225 |
self._data._parse_record_unchecked(compressed_bytes) |
|
226 |
delta = self._plain_factory.parse_line_delta(contents, rec[1]) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
227 |
compression_parent = factory.parents[0] |
228 |
# XXX: string splitting overhead.
|
|
229 |
basis_entry = self._basis_vf.get_record_stream( |
|
230 |
[compression_parent], 'unordered', True).next() |
|
231 |
if basis_entry.storage_kind == 'absent': |
|
232 |
raise errors.RevisionNotPresent(compression_parent, self._basis_vf) |
|
3890.2.9
by John Arbash Meinel
Start using osutils.chunks_as_lines rather than osutils.split_lines. |
233 |
basis_chunks = basis_entry.get_bytes_as('chunked') |
234 |
basis_lines = osutils.chunks_to_lines(basis_chunks) |
|
3350.3.5
by Robert Collins
Create adapters from plain compressed knit content. |
235 |
basis_content = PlainKnitContent(basis_lines, compression_parent) |
236 |
# Manually apply the delta because we have one annotated content and
|
|
237 |
# one plain.
|
|
238 |
content, _ = self._plain_factory.parse_record(rec[1], contents, |
|
239 |
factory._build_details, basis_content) |
|
240 |
return ''.join(content.text()) |
|
241 |
||
242 |
||
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
243 |
class KnitContentFactory(ContentFactory): |
244 |
"""Content factory for streaming from knits.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
245 |
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
246 |
:seealso ContentFactory:
|
247 |
"""
|
|
248 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
249 |
def __init__(self, key, parents, build_details, sha1, raw_record, |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
250 |
annotated, knit=None, network_bytes=None): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
251 |
"""Create a KnitContentFactory for key.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
252 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
253 |
:param key: The key.
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
254 |
:param parents: The parents.
|
255 |
:param build_details: The build details as returned from
|
|
256 |
get_build_details.
|
|
257 |
:param sha1: The sha1 expected from the full text of this object.
|
|
258 |
:param raw_record: The bytes of the knit data from disk.
|
|
259 |
:param annotated: True if the raw data is annotated.
|
|
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
260 |
:param network_bytes: None to calculate the network bytes on demand,
|
261 |
not-none if they are already known.
|
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
262 |
"""
|
263 |
ContentFactory.__init__(self) |
|
264 |
self.sha1 = sha1 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
265 |
self.key = key |
266 |
self.parents = parents |
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
267 |
if build_details[0] == 'line-delta': |
268 |
kind = 'delta' |
|
269 |
else: |
|
270 |
kind = 'ft' |
|
271 |
if annotated: |
|
272 |
annotated_kind = 'annotated-' |
|
273 |
else: |
|
274 |
annotated_kind = '' |
|
275 |
self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind) |
|
276 |
self._raw_record = raw_record |
|
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
277 |
self._network_bytes = network_bytes |
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
278 |
self._build_details = build_details |
279 |
self._knit = knit |
|
280 |
||
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
281 |
def _create_network_bytes(self): |
282 |
"""Create a fully serialised network version for transmission."""
|
|
283 |
# storage_kind, key, parents, Noeol, raw_record
|
|
284 |
key_bytes = '\x00'.join(self.key) |
|
285 |
if self.parents is None: |
|
286 |
parent_bytes = 'None:' |
|
287 |
else: |
|
288 |
parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents) |
|
289 |
if self._build_details[1]: |
|
290 |
noeol = 'N' |
|
291 |
else: |
|
292 |
noeol = ' ' |
|
293 |
network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes, |
|
294 |
parent_bytes, noeol, self._raw_record) |
|
295 |
self._network_bytes = network_bytes |
|
296 |
||
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
297 |
def get_bytes_as(self, storage_kind): |
298 |
if storage_kind == self.storage_kind: |
|
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
299 |
if self._network_bytes is None: |
300 |
self._create_network_bytes() |
|
301 |
return self._network_bytes |
|
4152.1.2
by Robert Collins
Add streaming from a stacked branch when the sort order is compatible with doing so. |
302 |
if ('-ft-' in self.storage_kind and |
303 |
storage_kind in ('chunked', 'fulltext')): |
|
304 |
adapter_key = (self.storage_kind, 'fulltext') |
|
305 |
adapter_factory = adapter_registry.get(adapter_key) |
|
306 |
adapter = adapter_factory(None) |
|
307 |
bytes = adapter.get_bytes(self) |
|
308 |
if storage_kind == 'chunked': |
|
309 |
return [bytes] |
|
310 |
else: |
|
311 |
return bytes |
|
3890.2.1
by John Arbash Meinel
Start working on a ChunkedContentFactory. |
312 |
if self._knit is not None: |
4152.1.2
by Robert Collins
Add streaming from a stacked branch when the sort order is compatible with doing so. |
313 |
# Not redundant with direct conversion above - that only handles
|
314 |
# fulltext cases.
|
|
3890.2.1
by John Arbash Meinel
Start working on a ChunkedContentFactory. |
315 |
if storage_kind == 'chunked': |
316 |
return self._knit.get_lines(self.key[0]) |
|
317 |
elif storage_kind == 'fulltext': |
|
318 |
return self._knit.get_text(self.key[0]) |
|
319 |
raise errors.UnavailableRepresentation(self.key, storage_kind, |
|
320 |
self.storage_kind) |
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
321 |
|
322 |
||
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
323 |
class LazyKnitContentFactory(ContentFactory): |
324 |
"""A ContentFactory which can either generate full text or a wire form.
|
|
325 |
||
326 |
:seealso ContentFactory:
|
|
327 |
"""
|
|
328 |
||
329 |
def __init__(self, key, parents, generator, first): |
|
330 |
"""Create a LazyKnitContentFactory.
|
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
331 |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
332 |
:param key: The key of the record.
|
333 |
:param parents: The parents of the record.
|
|
334 |
:param generator: A _ContentMapGenerator containing the record for this
|
|
335 |
key.
|
|
336 |
:param first: Is this the first content object returned from generator?
|
|
337 |
if it is, its storage kind is knit-delta-closure, otherwise it is
|
|
338 |
knit-delta-closure-ref
|
|
339 |
"""
|
|
340 |
self.key = key |
|
341 |
self.parents = parents |
|
342 |
self.sha1 = None |
|
343 |
self._generator = generator |
|
344 |
self.storage_kind = "knit-delta-closure" |
|
345 |
if not first: |
|
346 |
self.storage_kind = self.storage_kind + "-ref" |
|
347 |
self._first = first |
|
348 |
||
349 |
def get_bytes_as(self, storage_kind): |
|
350 |
if storage_kind == self.storage_kind: |
|
351 |
if self._first: |
|
352 |
return self._generator._wire_bytes() |
|
353 |
else: |
|
354 |
# all the keys etc are contained in the bytes returned in the
|
|
355 |
# first record.
|
|
356 |
return '' |
|
357 |
if storage_kind in ('chunked', 'fulltext'): |
|
358 |
chunks = self._generator._get_one_work(self.key).text() |
|
359 |
if storage_kind == 'chunked': |
|
360 |
return chunks |
|
361 |
else: |
|
362 |
return ''.join(chunks) |
|
363 |
raise errors.UnavailableRepresentation(self.key, storage_kind, |
|
364 |
self.storage_kind) |
|
365 |
||
366 |
||
367 |
def knit_delta_closure_to_records(storage_kind, bytes, line_end): |
|
368 |
"""Convert a network record to a iterator over stream records.
|
|
369 |
||
370 |
:param storage_kind: The storage kind of the record.
|
|
371 |
Must be 'knit-delta-closure'.
|
|
372 |
:param bytes: The bytes of the record on the network.
|
|
373 |
"""
|
|
374 |
generator = _NetworkContentMapGenerator(bytes, line_end) |
|
375 |
return generator.get_record_stream() |
|
376 |
||
377 |
||
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
378 |
def knit_network_to_record(storage_kind, bytes, line_end): |
379 |
"""Convert a network record to a record object.
|
|
380 |
||
381 |
:param storage_kind: The storage kind of the record.
|
|
382 |
:param bytes: The bytes of the record on the network.
|
|
383 |
"""
|
|
384 |
start = line_end |
|
385 |
line_end = bytes.find('\n', start) |
|
4005.3.3
by Robert Collins
Test NetworkRecordStream with delta'd texts. |
386 |
key = tuple(bytes[start:line_end].split('\x00')) |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
387 |
start = line_end + 1 |
388 |
line_end = bytes.find('\n', start) |
|
389 |
parent_line = bytes[start:line_end] |
|
390 |
if parent_line == 'None:': |
|
391 |
parents = None |
|
392 |
else: |
|
393 |
parents = tuple( |
|
4005.3.3
by Robert Collins
Test NetworkRecordStream with delta'd texts. |
394 |
[tuple(segment.split('\x00')) for segment in parent_line.split('\t') |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
395 |
if segment]) |
396 |
start = line_end + 1 |
|
4005.3.3
by Robert Collins
Test NetworkRecordStream with delta'd texts. |
397 |
noeol = bytes[start] == 'N' |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
398 |
if 'ft' in storage_kind: |
399 |
method = 'fulltext' |
|
400 |
else: |
|
401 |
method = 'line-delta' |
|
402 |
build_details = (method, noeol) |
|
403 |
start = start + 1 |
|
404 |
raw_record = bytes[start:] |
|
405 |
annotated = 'annotated' in storage_kind |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
406 |
return [KnitContentFactory(key, parents, build_details, None, raw_record, |
407 |
annotated, network_bytes=bytes)] |
|
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
408 |
|
409 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
410 |
class KnitContent(object): |
3468.2.4
by Martin Pool
Test and fix #234748 problems in trailing newline diffs |
411 |
"""Content of a knit version to which deltas can be applied.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
412 |
|
3468.2.5
by Martin Pool
Correct comment and remove overbroad except block |
413 |
This is always stored in memory as a list of lines with \n at the end,
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
414 |
plus a flag saying if the final ending is really there or not, because that
|
3468.2.5
by Martin Pool
Correct comment and remove overbroad except block |
415 |
corresponds to the on-disk knit representation.
|
3468.2.4
by Martin Pool
Test and fix #234748 problems in trailing newline diffs |
416 |
"""
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
417 |
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
418 |
def __init__(self): |
419 |
self._should_strip_eol = False |
|
420 |
||
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
421 |
def apply_delta(self, delta, new_version_id): |
2921.2.2
by Robert Collins
Review feedback. |
422 |
"""Apply delta to this object to become new_version_id."""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
423 |
raise NotImplementedError(self.apply_delta) |
424 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
425 |
def line_delta_iter(self, new_lines): |
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
426 |
"""Generate line-based delta from this content to new_lines."""
|
2151.1.1
by John Arbash Meinel
(Dmitry Vasiliev) Tune KnitContent and add tests |
427 |
new_texts = new_lines.text() |
428 |
old_texts = self.text() |
|
2781.1.1
by Martin Pool
merge cpatiencediff from Lukas |
429 |
s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts) |
2151.1.1
by John Arbash Meinel
(Dmitry Vasiliev) Tune KnitContent and add tests |
430 |
for tag, i1, i2, j1, j2 in s.get_opcodes(): |
431 |
if tag == 'equal': |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
432 |
continue
|
2151.1.1
by John Arbash Meinel
(Dmitry Vasiliev) Tune KnitContent and add tests |
433 |
# ofrom, oto, length, data
|
434 |
yield i1, i2, j2 - j1, new_lines._lines[j1:j2] |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
435 |
|
436 |
def line_delta(self, new_lines): |
|
437 |
return list(self.line_delta_iter(new_lines)) |
|
438 |
||
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
439 |
@staticmethod
|
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
440 |
def get_line_delta_blocks(knit_delta, source, target): |
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
441 |
"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""
|
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
442 |
target_len = len(target) |
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
443 |
s_pos = 0 |
444 |
t_pos = 0 |
|
445 |
for s_begin, s_end, t_len, new_text in knit_delta: |
|
2520.4.47
by Aaron Bentley
Fix get_line_delta_blocks with eol |
446 |
true_n = s_begin - s_pos |
447 |
n = true_n |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
448 |
if n > 0: |
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
449 |
# knit deltas do not provide reliable info about whether the
|
450 |
# last line of a file matches, due to eol handling.
|
|
451 |
if source[s_pos + n -1] != target[t_pos + n -1]: |
|
2520.4.47
by Aaron Bentley
Fix get_line_delta_blocks with eol |
452 |
n-=1 |
453 |
if n > 0: |
|
454 |
yield s_pos, t_pos, n |
|
455 |
t_pos += t_len + true_n |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
456 |
s_pos = s_end |
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
457 |
n = target_len - t_pos |
458 |
if n > 0: |
|
459 |
if source[s_pos + n -1] != target[t_pos + n -1]: |
|
460 |
n-=1 |
|
461 |
if n > 0: |
|
462 |
yield s_pos, t_pos, n |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
463 |
yield s_pos + (target_len - t_pos), target_len, 0 |
464 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
465 |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
466 |
class AnnotatedKnitContent(KnitContent): |
467 |
"""Annotated content."""
|
|
468 |
||
469 |
def __init__(self, lines): |
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
470 |
KnitContent.__init__(self) |
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
471 |
self._lines = lines |
472 |
||
3316.2.13
by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this |
473 |
def annotate(self): |
474 |
"""Return a list of (origin, text) for each content line."""
|
|
3468.2.4
by Martin Pool
Test and fix #234748 problems in trailing newline diffs |
475 |
lines = self._lines[:] |
476 |
if self._should_strip_eol: |
|
477 |
origin, last_line = lines[-1] |
|
478 |
lines[-1] = (origin, last_line.rstrip('\n')) |
|
479 |
return lines |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
480 |
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
481 |
def apply_delta(self, delta, new_version_id): |
2921.2.2
by Robert Collins
Review feedback. |
482 |
"""Apply delta to this object to become new_version_id."""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
483 |
offset = 0 |
484 |
lines = self._lines |
|
485 |
for start, end, count, delta_lines in delta: |
|
486 |
lines[offset+start:offset+end] = delta_lines |
|
487 |
offset = offset + (start - end) + count |
|
488 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
489 |
def text(self): |
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
490 |
try: |
3224.1.22
by John Arbash Meinel
Cleanup the extra debugging info, and some >80 char lines. |
491 |
lines = [text for origin, text in self._lines] |
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
492 |
except ValueError, e: |
493 |
# most commonly (only?) caused by the internal form of the knit
|
|
494 |
# missing annotation information because of a bug - see thread
|
|
495 |
# around 20071015
|
|
496 |
raise KnitCorrupt(self, |
|
497 |
"line in annotated knit missing annotation information: %s" |
|
498 |
% (e,)) |
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
499 |
if self._should_strip_eol: |
3350.3.4
by Robert Collins
Finish adapters for annotated knits to unannotated knits and full texts. |
500 |
lines[-1] = lines[-1].rstrip('\n') |
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
501 |
return lines |
502 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
503 |
def copy(self): |
504 |
return AnnotatedKnitContent(self._lines[:]) |
|
505 |
||
506 |
||
507 |
class PlainKnitContent(KnitContent): |
|
2794.1.3
by Robert Collins
Review feedback. |
508 |
"""Unannotated content.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
509 |
|
2794.1.3
by Robert Collins
Review feedback. |
510 |
When annotate[_iter] is called on this content, the same version is reported
|
511 |
for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent
|
|
512 |
objects.
|
|
513 |
"""
|
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
514 |
|
515 |
def __init__(self, lines, version_id): |
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
516 |
KnitContent.__init__(self) |
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
517 |
self._lines = lines |
518 |
self._version_id = version_id |
|
519 |
||
3316.2.13
by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this |
520 |
def annotate(self): |
521 |
"""Return a list of (origin, text) for each content line."""
|
|
522 |
return [(self._version_id, line) for line in self._lines] |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
523 |
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
524 |
def apply_delta(self, delta, new_version_id): |
2921.2.2
by Robert Collins
Review feedback. |
525 |
"""Apply delta to this object to become new_version_id."""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
526 |
offset = 0 |
527 |
lines = self._lines |
|
528 |
for start, end, count, delta_lines in delta: |
|
529 |
lines[offset+start:offset+end] = delta_lines |
|
530 |
offset = offset + (start - end) + count |
|
531 |
self._version_id = new_version_id |
|
532 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
533 |
def copy(self): |
534 |
return PlainKnitContent(self._lines[:], self._version_id) |
|
535 |
||
536 |
def text(self): |
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
537 |
lines = self._lines |
538 |
if self._should_strip_eol: |
|
539 |
lines = lines[:] |
|
540 |
lines[-1] = lines[-1].rstrip('\n') |
|
541 |
return lines |
|
542 |
||
543 |
||
544 |
class _KnitFactory(object): |
|
545 |
"""Base class for common Factory functions."""
|
|
546 |
||
547 |
def parse_record(self, version_id, record, record_details, |
|
548 |
base_content, copy_base_content=True): |
|
549 |
"""Parse a record into a full content object.
|
|
550 |
||
551 |
:param version_id: The official version id for this content
|
|
552 |
:param record: The data returned by read_records_iter()
|
|
553 |
:param record_details: Details about the record returned by
|
|
554 |
get_build_details
|
|
555 |
:param base_content: If get_build_details returns a compression_parent,
|
|
556 |
you must return a base_content here, else use None
|
|
557 |
:param copy_base_content: When building from the base_content, decide
|
|
558 |
you can either copy it and return a new object, or modify it in
|
|
559 |
place.
|
|
560 |
:return: (content, delta) A Content object and possibly a line-delta,
|
|
561 |
delta may be None
|
|
562 |
"""
|
|
563 |
method, noeol = record_details |
|
564 |
if method == 'line-delta': |
|
565 |
if copy_base_content: |
|
566 |
content = base_content.copy() |
|
567 |
else: |
|
568 |
content = base_content |
|
569 |
delta = self.parse_line_delta(record, version_id) |
|
570 |
content.apply_delta(delta, version_id) |
|
571 |
else: |
|
572 |
content = self.parse_fulltext(record, version_id) |
|
573 |
delta = None |
|
574 |
content._should_strip_eol = noeol |
|
575 |
return (content, delta) |
|
576 |
||
577 |
||
578 |
class KnitAnnotateFactory(_KnitFactory): |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
579 |
"""Factory for creating annotated Content objects."""
|
580 |
||
581 |
annotated = True |
|
582 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
583 |
def make(self, lines, version_id): |
584 |
num_lines = len(lines) |
|
585 |
return AnnotatedKnitContent(zip([version_id] * num_lines, lines)) |
|
586 |
||
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
587 |
def parse_fulltext(self, content, version_id): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
588 |
"""Convert fulltext to internal representation
|
589 |
||
590 |
fulltext content is of the format
|
|
591 |
revid(utf8) plaintext\n
|
|
592 |
internal representation is of the format:
|
|
593 |
(revid, plaintext)
|
|
594 |
"""
|
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
595 |
# TODO: jam 20070209 The tests expect this to be returned as tuples,
|
596 |
# but the code itself doesn't really depend on that.
|
|
597 |
# Figure out a way to not require the overhead of turning the
|
|
598 |
# list back into tuples.
|
|
599 |
lines = [tuple(line.split(' ', 1)) for line in content] |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
600 |
return AnnotatedKnitContent(lines) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
601 |
|
602 |
def parse_line_delta_iter(self, lines): |
|
2163.1.2
by John Arbash Meinel
Don't modify the list during parse_line_delta |
603 |
return iter(self.parse_line_delta(lines)) |
1628.1.2
by Robert Collins
More knit micro-optimisations. |
604 |
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
605 |
def parse_line_delta(self, lines, version_id, plain=False): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
606 |
"""Convert a line based delta into internal representation.
|
607 |
||
608 |
line delta is in the form of:
|
|
609 |
intstart intend intcount
|
|
610 |
1..count lines:
|
|
611 |
revid(utf8) newline\n
|
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
612 |
internal representation is
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
613 |
(start, end, count, [1..count tuples (revid, newline)])
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
614 |
|
615 |
:param plain: If True, the lines are returned as a plain
|
|
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
616 |
list without annotations, not as a list of (origin, content) tuples, i.e.
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
617 |
(start, end, count, [1..count newline])
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
618 |
"""
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
619 |
result = [] |
620 |
lines = iter(lines) |
|
621 |
next = lines.next |
|
2249.5.1
by John Arbash Meinel
Leave revision-ids in utf-8 when reading. |
622 |
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
623 |
cache = {} |
624 |
def cache_and_return(line): |
|
625 |
origin, text = line.split(' ', 1) |
|
626 |
return cache.setdefault(origin, origin), text |
|
627 |
||
1628.1.2
by Robert Collins
More knit micro-optimisations. |
628 |
# walk through the lines parsing.
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
629 |
# Note that the plain test is explicitly pulled out of the
|
630 |
# loop to minimise any performance impact
|
|
631 |
if plain: |
|
632 |
for header in lines: |
|
633 |
start, end, count = [int(n) for n in header.split(',')] |
|
634 |
contents = [next().split(' ', 1)[1] for i in xrange(count)] |
|
635 |
result.append((start, end, count, contents)) |
|
636 |
else: |
|
637 |
for header in lines: |
|
638 |
start, end, count = [int(n) for n in header.split(',')] |
|
639 |
contents = [tuple(next().split(' ', 1)) for i in xrange(count)] |
|
640 |
result.append((start, end, count, contents)) |
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
641 |
return result |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
642 |
|
2163.2.2
by John Arbash Meinel
Don't deal with annotations when we don't care about them. Saves another 300+ms |
643 |
def get_fulltext_content(self, lines): |
644 |
"""Extract just the content lines from a fulltext."""
|
|
645 |
return (line.split(' ', 1)[1] for line in lines) |
|
646 |
||
647 |
def get_linedelta_content(self, lines): |
|
648 |
"""Extract just the content from a line delta.
|
|
649 |
||
650 |
This doesn't return all of the extra information stored in a delta.
|
|
651 |
Only the actual content lines.
|
|
652 |
"""
|
|
653 |
lines = iter(lines) |
|
654 |
next = lines.next |
|
655 |
for header in lines: |
|
656 |
header = header.split(',') |
|
657 |
count = int(header[2]) |
|
658 |
for i in xrange(count): |
|
659 |
origin, text = next().split(' ', 1) |
|
660 |
yield text |
|
661 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
662 |
def lower_fulltext(self, content): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
663 |
"""convert a fulltext content record into a serializable form.
|
664 |
||
665 |
see parse_fulltext which this inverts.
|
|
666 |
"""
|
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
667 |
return ['%s %s' % (o, t) for o, t in content._lines] |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
668 |
|
669 |
def lower_line_delta(self, delta): |
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
670 |
"""convert a delta into a serializable form.
|
671 |
||
1628.1.2
by Robert Collins
More knit micro-optimisations. |
672 |
See parse_line_delta which this inverts.
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
673 |
"""
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
674 |
# TODO: jam 20070209 We only do the caching thing to make sure that
|
675 |
# the origin is a valid utf-8 line, eventually we could remove it
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
676 |
out = [] |
677 |
for start, end, c, lines in delta: |
|
678 |
out.append('%d,%d,%d\n' % (start, end, c)) |
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
679 |
out.extend(origin + ' ' + text |
1911.2.1
by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate |
680 |
for origin, text in lines) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
681 |
return out |
682 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
683 |
def annotate(self, knit, key): |
684 |
content = knit._get_content(key) |
|
685 |
# adjust for the fact that serialised annotations are only key suffixes
|
|
686 |
# for this factory.
|
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
687 |
if type(key) is tuple: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
688 |
prefix = key[:-1] |
689 |
origins = content.annotate() |
|
690 |
result = [] |
|
691 |
for origin, line in origins: |
|
692 |
result.append((prefix + (origin,), line)) |
|
693 |
return result |
|
694 |
else: |
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
695 |
# XXX: This smells a bit. Why would key ever be a non-tuple here?
|
696 |
# Aren't keys defined to be tuples? -- spiv 20080618
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
697 |
return content.annotate() |
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
698 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
699 |
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
700 |
class KnitPlainFactory(_KnitFactory): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
701 |
"""Factory for creating plain Content objects."""
|
702 |
||
703 |
annotated = False |
|
704 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
705 |
def make(self, lines, version_id): |
706 |
return PlainKnitContent(lines, version_id) |
|
707 |
||
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
708 |
def parse_fulltext(self, content, version_id): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
709 |
"""This parses an unannotated fulltext.
|
710 |
||
711 |
Note that this is not a noop - the internal representation
|
|
712 |
has (versionid, line) - its just a constant versionid.
|
|
713 |
"""
|
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
714 |
return self.make(content, version_id) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
715 |
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
716 |
def parse_line_delta_iter(self, lines, version_id): |
2163.1.2
by John Arbash Meinel
Don't modify the list during parse_line_delta |
717 |
cur = 0 |
718 |
num_lines = len(lines) |
|
719 |
while cur < num_lines: |
|
720 |
header = lines[cur] |
|
721 |
cur += 1 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
722 |
start, end, c = [int(n) for n in header.split(',')] |
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
723 |
yield start, end, c, lines[cur:cur+c] |
2163.1.2
by John Arbash Meinel
Don't modify the list during parse_line_delta |
724 |
cur += c |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
725 |
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
726 |
def parse_line_delta(self, lines, version_id): |
727 |
return list(self.parse_line_delta_iter(lines, version_id)) |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
728 |
|
2163.2.2
by John Arbash Meinel
Don't deal with annotations when we don't care about them. Saves another 300+ms |
729 |
def get_fulltext_content(self, lines): |
730 |
"""Extract just the content lines from a fulltext."""
|
|
731 |
return iter(lines) |
|
732 |
||
733 |
def get_linedelta_content(self, lines): |
|
734 |
"""Extract just the content from a line delta.
|
|
735 |
||
736 |
This doesn't return all of the extra information stored in a delta.
|
|
737 |
Only the actual content lines.
|
|
738 |
"""
|
|
739 |
lines = iter(lines) |
|
740 |
next = lines.next |
|
741 |
for header in lines: |
|
742 |
header = header.split(',') |
|
743 |
count = int(header[2]) |
|
744 |
for i in xrange(count): |
|
745 |
yield next() |
|
746 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
747 |
def lower_fulltext(self, content): |
748 |
return content.text() |
|
749 |
||
750 |
def lower_line_delta(self, delta): |
|
751 |
out = [] |
|
752 |
for start, end, c, lines in delta: |
|
753 |
out.append('%d,%d,%d\n' % (start, end, c)) |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
754 |
out.extend(lines) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
755 |
return out |
756 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
757 |
def annotate(self, knit, key): |
3224.1.7
by John Arbash Meinel
_StreamIndex also needs to return the proper values for get_build_details. |
758 |
annotator = _KnitAnnotator(knit) |
4454.3.26
by John Arbash Meinel
The new _KnitAnnotator based on Annotator seems to pass the test suite. |
759 |
return annotator.annotate_flat(key) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
760 |
|
761 |
||
762 |
||
763 |
def make_file_factory(annotated, mapper): |
|
764 |
"""Create a factory for creating a file based KnitVersionedFiles.
|
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
765 |
|
766 |
This is only functional enough to run interface tests, it doesn't try to
|
|
767 |
provide a full pack environment.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
768 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
769 |
:param annotated: knit annotations are wanted.
|
770 |
:param mapper: The mapper from keys to paths.
|
|
771 |
"""
|
|
772 |
def factory(transport): |
|
773 |
index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True) |
|
774 |
access = _KnitKeyAccess(transport, mapper) |
|
775 |
return KnitVersionedFiles(index, access, annotated=annotated) |
|
776 |
return factory |
|
777 |
||
778 |
||
779 |
def make_pack_factory(graph, delta, keylength): |
|
780 |
"""Create a factory for creating a pack based VersionedFiles.
|
|
781 |
||
782 |
This is only functional enough to run interface tests, it doesn't try to
|
|
783 |
provide a full pack environment.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
784 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
785 |
:param graph: Store a graph.
|
786 |
:param delta: Delta compress contents.
|
|
787 |
:param keylength: How long should keys be.
|
|
788 |
"""
|
|
789 |
def factory(transport): |
|
790 |
parents = graph or delta |
|
791 |
ref_length = 0 |
|
792 |
if graph: |
|
793 |
ref_length += 1 |
|
794 |
if delta: |
|
795 |
ref_length += 1 |
|
796 |
max_delta_chain = 200 |
|
797 |
else: |
|
798 |
max_delta_chain = 0 |
|
799 |
graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length, |
|
800 |
key_elements=keylength) |
|
801 |
stream = transport.open_write_stream('newpack') |
|
802 |
writer = pack.ContainerWriter(stream.write) |
|
803 |
writer.begin() |
|
804 |
index = _KnitGraphIndex(graph_index, lambda:True, parents=parents, |
|
805 |
deltas=delta, add_callback=graph_index.add_nodes) |
|
806 |
access = _DirectPackAccess({}) |
|
807 |
access.set_writer(writer, graph_index, (transport, 'newpack')) |
|
808 |
result = KnitVersionedFiles(index, access, |
|
809 |
max_delta_chain=max_delta_chain) |
|
810 |
result.stream = stream |
|
811 |
result.writer = writer |
|
812 |
return result |
|
813 |
return factory |
|
814 |
||
815 |
||
816 |
def cleanup_pack_knit(versioned_files): |
|
817 |
versioned_files.stream.close() |
|
818 |
versioned_files.writer.end() |
|
819 |
||
820 |
||
4039.3.5
by John Arbash Meinel
Add direct tests for _get_total_build_size. |
821 |
def _get_total_build_size(self, keys, positions): |
4039.3.4
by John Arbash Meinel
Properly determine the total number of bytes needed for a given key. |
822 |
"""Determine the total bytes to build these keys.
|
823 |
||
824 |
(helper function because _KnitGraphIndex and _KndxIndex work the same, but
|
|
825 |
don't inherit from a common base.)
|
|
826 |
||
827 |
:param keys: Keys that we want to build
|
|
828 |
:param positions: dict of {key, (info, index_memo, comp_parent)} (such
|
|
829 |
as returned by _get_components_positions)
|
|
830 |
:return: Number of bytes to build those keys
|
|
831 |
"""
|
|
832 |
all_build_index_memos = {} |
|
833 |
build_keys = keys |
|
834 |
while build_keys: |
|
835 |
next_keys = set() |
|
836 |
for key in build_keys: |
|
837 |
# This is mostly for the 'stacked' case
|
|
838 |
# Where we will be getting the data from a fallback
|
|
839 |
if key not in positions: |
|
840 |
continue
|
|
841 |
_, index_memo, compression_parent = positions[key] |
|
842 |
all_build_index_memos[key] = index_memo |
|
843 |
if compression_parent not in all_build_index_memos: |
|
844 |
next_keys.add(compression_parent) |
|
845 |
build_keys = next_keys |
|
846 |
return sum([index_memo[2] for index_memo |
|
847 |
in all_build_index_memos.itervalues()]) |
|
848 |
||
849 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
850 |
class KnitVersionedFiles(VersionedFiles): |
851 |
"""Storage for many versioned files using knit compression.
|
|
852 |
||
853 |
Backend storage is managed by indices and data objects.
|
|
3582.1.14
by Martin Pool
Clearer comments about KnitVersionedFile stacking |
854 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
855 |
:ivar _index: A _KnitGraphIndex or similar that can describe the
|
856 |
parents, graph, compression and data location of entries in this
|
|
857 |
KnitVersionedFiles. Note that this is only the index for
|
|
3582.1.16
by Martin Pool
Review feedback and news entry |
858 |
*this* vfs; if there are fallbacks they must be queried separately.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
859 |
"""
|
860 |
||
861 |
def __init__(self, index, data_access, max_delta_chain=200, |
|
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
862 |
annotated=False, reload_func=None): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
863 |
"""Create a KnitVersionedFiles with index and data_access.
|
864 |
||
865 |
:param index: The index for the knit data.
|
|
866 |
:param data_access: The access object to store and retrieve knit
|
|
867 |
records.
|
|
868 |
:param max_delta_chain: The maximum number of deltas to permit during
|
|
869 |
insertion. Set to 0 to prohibit the use of deltas.
|
|
870 |
:param annotated: Set to True to cause annotations to be calculated and
|
|
871 |
stored during insertion.
|
|
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
872 |
:param reload_func: An function that can be called if we think we need
|
873 |
to reload the pack listing and try again. See
|
|
874 |
'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.
|
|
1563.2.25
by Robert Collins
Merge in upstream. |
875 |
"""
|
3316.2.3
by Robert Collins
Remove manual notification of transaction finishing on versioned files. |
876 |
self._index = index |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
877 |
self._access = data_access |
878 |
self._max_delta_chain = max_delta_chain |
|
879 |
if annotated: |
|
880 |
self._factory = KnitAnnotateFactory() |
|
881 |
else: |
|
882 |
self._factory = KnitPlainFactory() |
|
3350.8.1
by Robert Collins
KnitVersionedFiles.add_fallback_versioned_files exists. |
883 |
self._fallback_vfs = [] |
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
884 |
self._reload_func = reload_func |
3350.8.1
by Robert Collins
KnitVersionedFiles.add_fallback_versioned_files exists. |
885 |
|
3702.1.1
by Martin Pool
Add repr for KnitVersionedFiles |
886 |
def __repr__(self): |
887 |
return "%s(%r, %r)" % ( |
|
888 |
self.__class__.__name__, |
|
889 |
self._index, |
|
890 |
self._access) |
|
891 |
||
3350.8.1
by Robert Collins
KnitVersionedFiles.add_fallback_versioned_files exists. |
892 |
def add_fallback_versioned_files(self, a_versioned_files): |
893 |
"""Add a source of texts for texts not present in this knit.
|
|
894 |
||
895 |
:param a_versioned_files: A VersionedFiles object.
|
|
896 |
"""
|
|
897 |
self._fallback_vfs.append(a_versioned_files) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
898 |
|
899 |
def add_lines(self, key, parents, lines, parent_texts=None, |
|
900 |
left_matching_blocks=None, nostore_sha=None, random_id=False, |
|
901 |
check_content=True): |
|
902 |
"""See VersionedFiles.add_lines()."""
|
|
903 |
self._index._check_write_ok() |
|
904 |
self._check_add(key, lines, random_id, check_content) |
|
905 |
if parents is None: |
|
3350.6.11
by Martin Pool
Review cleanups and documentation from Robert's mail on 2080618 |
906 |
# The caller might pass None if there is no graph data, but kndx
|
907 |
# indexes can't directly store that, so we give them
|
|
908 |
# an empty tuple instead.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
909 |
parents = () |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
910 |
line_bytes = ''.join(lines) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
911 |
return self._add(key, lines, parents, |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
912 |
parent_texts, left_matching_blocks, nostore_sha, random_id, |
913 |
line_bytes=line_bytes) |
|
914 |
||
4398.8.6
by John Arbash Meinel
Switch the api from VF.add_text to VF._add_text and trim some extra 'features'. |
915 |
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False): |
4398.9.1
by Matt Nordhoff
Update _add_text docstrings that still referred to add_text. |
916 |
"""See VersionedFiles._add_text()."""
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
917 |
self._index._check_write_ok() |
918 |
self._check_add(key, None, random_id, check_content=False) |
|
919 |
if text.__class__ is not str: |
|
4398.8.5
by John Arbash Meinel
Fix a few more cases where we were adding a list rather than an empty string. |
920 |
raise errors.BzrBadParameterUnicode("text") |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
921 |
if parents is None: |
922 |
# The caller might pass None if there is no graph data, but kndx
|
|
923 |
# indexes can't directly store that, so we give them
|
|
924 |
# an empty tuple instead.
|
|
925 |
parents = () |
|
926 |
return self._add(key, None, parents, |
|
4398.8.6
by John Arbash Meinel
Switch the api from VF.add_text to VF._add_text and trim some extra 'features'. |
927 |
None, None, nostore_sha, random_id, |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
928 |
line_bytes=text) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
929 |
|
930 |
def _add(self, key, lines, parents, parent_texts, |
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
931 |
left_matching_blocks, nostore_sha, random_id, |
932 |
line_bytes): |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
933 |
"""Add a set of lines on top of version specified by parents.
|
934 |
||
935 |
Any versions not present will be converted into ghosts.
|
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
936 |
|
937 |
:param lines: A list of strings where each one is a single line (has a
|
|
938 |
single newline at the end of the string) This is now optional
|
|
939 |
(callers can pass None). It is left in its location for backwards
|
|
940 |
compatibility. It should ''.join(lines) must == line_bytes
|
|
941 |
:param line_bytes: A single string containing the content
|
|
942 |
||
943 |
We pass both lines and line_bytes because different routes bring the
|
|
944 |
values to this function. And for memory efficiency, we don't want to
|
|
945 |
have to split/join on-demand.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
946 |
"""
|
947 |
# first thing, if the content is something we don't need to store, find
|
|
948 |
# that out.
|
|
949 |
digest = sha_string(line_bytes) |
|
950 |
if nostore_sha == digest: |
|
951 |
raise errors.ExistingContent |
|
952 |
||
953 |
present_parents = [] |
|
954 |
if parent_texts is None: |
|
955 |
parent_texts = {} |
|
3830.3.9
by Martin Pool
Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests |
956 |
# Do a single query to ascertain parent presence; we only compress
|
957 |
# against parents in the same kvf.
|
|
958 |
present_parent_map = self._index.get_parent_map(parents) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
959 |
for parent in parents: |
960 |
if parent in present_parent_map: |
|
961 |
present_parents.append(parent) |
|
962 |
||
963 |
# Currently we can only compress against the left most present parent.
|
|
964 |
if (len(present_parents) == 0 or |
|
965 |
present_parents[0] != parents[0]): |
|
966 |
delta = False |
|
967 |
else: |
|
968 |
# To speed the extract of texts the delta chain is limited
|
|
969 |
# to a fixed number of deltas. This should minimize both
|
|
970 |
# I/O and the time spend applying deltas.
|
|
971 |
delta = self._check_should_delta(present_parents[0]) |
|
972 |
||
973 |
text_length = len(line_bytes) |
|
974 |
options = [] |
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
975 |
no_eol = False |
976 |
# Note: line_bytes is not modified to add a newline, that is tracked
|
|
977 |
# via the no_eol flag. 'lines' *is* modified, because that is the
|
|
978 |
# general values needed by the Content code.
|
|
979 |
if line_bytes and line_bytes[-1] != '\n': |
|
980 |
options.append('no-eol') |
|
981 |
no_eol = True |
|
982 |
# Copy the existing list, or create a new one
|
|
983 |
if lines is None: |
|
984 |
lines = osutils.split_lines(line_bytes) |
|
985 |
else: |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
986 |
lines = lines[:] |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
987 |
# Replace the last line with one that ends in a final newline
|
988 |
lines[-1] = lines[-1] + '\n' |
|
989 |
if lines is None: |
|
990 |
lines = osutils.split_lines(line_bytes) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
991 |
|
4241.4.1
by Ian Clatworthy
add sha generation support to versionedfiles |
992 |
for element in key[:-1]: |
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
993 |
if type(element) is not str: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
994 |
raise TypeError("key contains non-strings: %r" % (key,)) |
4241.4.1
by Ian Clatworthy
add sha generation support to versionedfiles |
995 |
if key[-1] is None: |
996 |
key = key[:-1] + ('sha1:' + digest,) |
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
997 |
elif type(key[-1]) is not str: |
4241.4.1
by Ian Clatworthy
add sha generation support to versionedfiles |
998 |
raise TypeError("key contains non-strings: %r" % (key,)) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
999 |
# Knit hunks are still last-element only
|
1000 |
version_id = key[-1] |
|
1001 |
content = self._factory.make(lines, version_id) |
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
1002 |
if no_eol: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1003 |
# Hint to the content object that its text() call should strip the
|
1004 |
# EOL.
|
|
1005 |
content._should_strip_eol = True |
|
1006 |
if delta or (self._factory.annotated and len(present_parents) > 0): |
|
1007 |
# Merge annotations from parent texts if needed.
|
|
1008 |
delta_hunks = self._merge_annotations(content, present_parents, |
|
1009 |
parent_texts, delta, self._factory.annotated, |
|
1010 |
left_matching_blocks) |
|
1011 |
||
1012 |
if delta: |
|
1013 |
options.append('line-delta') |
|
1014 |
store_lines = self._factory.lower_line_delta(delta_hunks) |
|
1015 |
size, bytes = self._record_to_data(key, digest, |
|
1016 |
store_lines) |
|
1017 |
else: |
|
1018 |
options.append('fulltext') |
|
1019 |
# isinstance is slower and we have no hierarchy.
|
|
4088.3.1
by Benjamin Peterson
compare types with 'is' not == |
1020 |
if self._factory.__class__ is KnitPlainFactory: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1021 |
# Use the already joined bytes saving iteration time in
|
1022 |
# _record_to_data.
|
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
1023 |
dense_lines = [line_bytes] |
1024 |
if no_eol: |
|
1025 |
dense_lines.append('\n') |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1026 |
size, bytes = self._record_to_data(key, digest, |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
1027 |
lines, dense_lines) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1028 |
else: |
1029 |
# get mixed annotation + content and feed it into the
|
|
1030 |
# serialiser.
|
|
1031 |
store_lines = self._factory.lower_fulltext(content) |
|
1032 |
size, bytes = self._record_to_data(key, digest, |
|
1033 |
store_lines) |
|
1034 |
||
1035 |
access_memo = self._access.add_raw_records([(key, size)], bytes)[0] |
|
1036 |
self._index.add_records( |
|
1037 |
((key, options, access_memo, parents),), |
|
1038 |
random_id=random_id) |
|
1039 |
return digest, text_length, content |
|
1040 |
||
1041 |
def annotate(self, key): |
|
1042 |
"""See VersionedFiles.annotate."""
|
|
1043 |
return self._factory.annotate(self, key) |
|
1044 |
||
4454.3.65
by John Arbash Meinel
Tests that VF implementations support .get_annotator() |
1045 |
def get_annotator(self): |
1046 |
return _KnitAnnotator(self) |
|
1047 |
||
4332.3.26
by Robert Collins
Allow passing keys to check to VersionedFile.check(). |
1048 |
def check(self, progress_bar=None, keys=None): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1049 |
"""See VersionedFiles.check()."""
|
4332.3.26
by Robert Collins
Allow passing keys to check to VersionedFile.check(). |
1050 |
if keys is None: |
1051 |
return self._logical_check() |
|
1052 |
else: |
|
1053 |
# At the moment, check does not extra work over get_record_stream
|
|
1054 |
return self.get_record_stream(keys, 'unordered', True) |
|
1055 |
||
1056 |
def _logical_check(self): |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1057 |
# This doesn't actually test extraction of everything, but that will
|
1058 |
# impact 'bzr check' substantially, and needs to be integrated with
|
|
1059 |
# care. However, it does check for the obvious problem of a delta with
|
|
1060 |
# no basis.
|
|
3517.4.14
by Martin Pool
KnitVersionedFiles.check should just check its own keys then recurse into fallbacks |
1061 |
keys = self._index.keys() |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1062 |
parent_map = self.get_parent_map(keys) |
1063 |
for key in keys: |
|
1064 |
if self._index.get_method(key) != 'fulltext': |
|
1065 |
compression_parent = parent_map[key][0] |
|
1066 |
if compression_parent not in parent_map: |
|
1067 |
raise errors.KnitCorrupt(self, |
|
1068 |
"Missing basis parent %s for %s" % ( |
|
1069 |
compression_parent, key)) |
|
3517.4.14
by Martin Pool
KnitVersionedFiles.check should just check its own keys then recurse into fallbacks |
1070 |
for fallback_vfs in self._fallback_vfs: |
1071 |
fallback_vfs.check() |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1072 |
|
1073 |
def _check_add(self, key, lines, random_id, check_content): |
|
1074 |
"""check that version_id and lines are safe to add."""
|
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1075 |
version_id = key[-1] |
4241.4.1
by Ian Clatworthy
add sha generation support to versionedfiles |
1076 |
if version_id is not None: |
1077 |
if contains_whitespace(version_id): |
|
1078 |
raise InvalidRevisionId(version_id, self) |
|
1079 |
self.check_not_reserved_id(version_id) |
|
3350.6.11
by Martin Pool
Review cleanups and documentation from Robert's mail on 2080618 |
1080 |
# TODO: If random_id==False and the key is already present, we should
|
1081 |
# probably check that the existing content is identical to what is
|
|
1082 |
# being inserted, and otherwise raise an exception. This would make
|
|
1083 |
# the bundle code simpler.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1084 |
if check_content: |
1085 |
self._check_lines_not_unicode(lines) |
|
1086 |
self._check_lines_are_lines(lines) |
|
1087 |
||
1088 |
def _check_header(self, key, line): |
|
1089 |
rec = self._split_header(line) |
|
1090 |
self._check_header_version(rec, key[-1]) |
|
1091 |
return rec |
|
1092 |
||
1093 |
def _check_header_version(self, rec, version_id): |
|
1094 |
"""Checks the header version on original format knit records.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1095 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1096 |
These have the last component of the key embedded in the record.
|
1097 |
"""
|
|
1098 |
if rec[1] != version_id: |
|
1099 |
raise KnitCorrupt(self, |
|
1100 |
'unexpected version, wanted %r, got %r' % (version_id, rec[1])) |
|
1101 |
||
1102 |
def _check_should_delta(self, parent): |
|
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
1103 |
"""Iterate back through the parent listing, looking for a fulltext.
|
1104 |
||
1105 |
This is used when we want to decide whether to add a delta or a new
|
|
1106 |
fulltext. It searches for _max_delta_chain parents. When it finds a
|
|
1107 |
fulltext parent, it sees if the total size of the deltas leading up to
|
|
1108 |
it is large enough to indicate that we want a new full text anyway.
|
|
1109 |
||
1110 |
Return True if we should create a new delta, False if we should use a
|
|
1111 |
full text.
|
|
1112 |
"""
|
|
1113 |
delta_size = 0 |
|
1114 |
fulltext_size = None |
|
2147.1.2
by John Arbash Meinel
Simplify the knit max-chain detection code. |
1115 |
for count in xrange(self._max_delta_chain): |
3350.8.9
by Robert Collins
define behaviour for add_lines with stacked storage. |
1116 |
try: |
3582.1.14
by Martin Pool
Clearer comments about KnitVersionedFile stacking |
1117 |
# Note that this only looks in the index of this particular
|
1118 |
# KnitVersionedFiles, not in the fallbacks. This ensures that
|
|
1119 |
# we won't store a delta spanning physical repository
|
|
1120 |
# boundaries.
|
|
3915.3.1
by John Arbash Meinel
As part of _check_should_delta, use the get_build_details api. |
1121 |
build_details = self._index.get_build_details([parent]) |
1122 |
parent_details = build_details[parent] |
|
3973.1.1
by John Arbash Meinel
Trivially fix a bug in _check_should_delta when a parent is not present. |
1123 |
except (RevisionNotPresent, KeyError), e: |
3915.3.1
by John Arbash Meinel
As part of _check_should_delta, use the get_build_details api. |
1124 |
# Some basis is not locally present: always fulltext
|
3350.8.9
by Robert Collins
define behaviour for add_lines with stacked storage. |
1125 |
return False |
3915.3.1
by John Arbash Meinel
As part of _check_should_delta, use the get_build_details api. |
1126 |
index_memo, compression_parent, _, _ = parent_details |
1127 |
_, _, size = index_memo |
|
1128 |
if compression_parent is None: |
|
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
1129 |
fulltext_size = size |
1130 |
break
|
|
1131 |
delta_size += size |
|
3350.6.11
by Martin Pool
Review cleanups and documentation from Robert's mail on 2080618 |
1132 |
# We don't explicitly check for presence because this is in an
|
1133 |
# inner loop, and if it's missing it'll fail anyhow.
|
|
3915.3.1
by John Arbash Meinel
As part of _check_should_delta, use the get_build_details api. |
1134 |
parent = compression_parent |
2147.1.2
by John Arbash Meinel
Simplify the knit max-chain detection code. |
1135 |
else: |
1136 |
# We couldn't find a fulltext, so we must create a new one
|
|
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
1137 |
return False |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1138 |
# Simple heuristic - if the total I/O wold be greater as a delta than
|
1139 |
# the originally installed fulltext, we create a new fulltext.
|
|
2147.1.2
by John Arbash Meinel
Simplify the knit max-chain detection code. |
1140 |
return fulltext_size > delta_size |
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
1141 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1142 |
def _build_details_to_components(self, build_details): |
1143 |
"""Convert a build_details tuple to a position tuple."""
|
|
1144 |
# record_details, access_memo, compression_parent
|
|
1145 |
return build_details[3], build_details[0], build_details[1] |
|
1146 |
||
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1147 |
def _get_components_positions(self, keys, allow_missing=False): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1148 |
"""Produce a map of position data for the components of keys.
|
1149 |
||
1150 |
This data is intended to be used for retrieving the knit records.
|
|
1151 |
||
1152 |
A dict of key to (record_details, index_memo, next, parents) is
|
|
1153 |
returned.
|
|
1154 |
method is the way referenced data should be applied.
|
|
1155 |
index_memo is the handle to pass to the data access to actually get the
|
|
1156 |
data
|
|
1157 |
next is the build-parent of the version, or None for fulltexts.
|
|
1158 |
parents is the version_ids of the parents of this version
|
|
1159 |
||
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1160 |
:param allow_missing: If True do not raise an error on a missing component,
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1161 |
just ignore it.
|
1162 |
"""
|
|
1163 |
component_data = {} |
|
1164 |
pending_components = keys |
|
1165 |
while pending_components: |
|
1166 |
build_details = self._index.get_build_details(pending_components) |
|
1167 |
current_components = set(pending_components) |
|
1168 |
pending_components = set() |
|
1169 |
for key, details in build_details.iteritems(): |
|
1170 |
(index_memo, compression_parent, parents, |
|
1171 |
record_details) = details |
|
1172 |
method = record_details[0] |
|
1173 |
if compression_parent is not None: |
|
1174 |
pending_components.add(compression_parent) |
|
1175 |
component_data[key] = self._build_details_to_components(details) |
|
1176 |
missing = current_components.difference(build_details) |
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1177 |
if missing and not allow_missing: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1178 |
raise errors.RevisionNotPresent(missing.pop(), self) |
1179 |
return component_data |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1180 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1181 |
def _get_content(self, key, parent_texts={}): |
1182 |
"""Returns a content object that makes up the specified
|
|
1183 |
version."""
|
|
1184 |
cached_version = parent_texts.get(key, None) |
|
1185 |
if cached_version is not None: |
|
1186 |
# Ensure the cache dict is valid.
|
|
1187 |
if not self.get_parent_map([key]): |
|
1188 |
raise RevisionNotPresent(key, self) |
|
1189 |
return cached_version |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1190 |
generator = _VFContentMapGenerator(self, [key]) |
1191 |
return generator._get_content(key) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1192 |
|
4593.5.20
by John Arbash Meinel
Expose KnownGraph off of VersionedFiles |
1193 |
def get_known_graph_ancestry(self, keys): |
1194 |
"""Get a KnownGraph instance with the ancestry of keys."""
|
|
4593.5.36
by John Arbash Meinel
a few more implementations of the interface. |
1195 |
parent_map, missing_keys = self._index.find_ancestry(keys) |
4634.11.2
by John Arbash Meinel
Teach VF.get_known_graph_ancestry to go to fallbacks (bug #419241) |
1196 |
for fallback in self._fallback_vfs: |
1197 |
if not missing_keys: |
|
1198 |
break
|
|
1199 |
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry( |
|
1200 |
missing_keys) |
|
1201 |
parent_map.update(f_parent_map) |
|
1202 |
missing_keys = f_missing_keys |
|
4593.5.20
by John Arbash Meinel
Expose KnownGraph off of VersionedFiles |
1203 |
kg = _mod_graph.KnownGraph(parent_map) |
1204 |
return kg |
|
1205 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1206 |
def get_parent_map(self, keys): |
3517.4.17
by Martin Pool
Redo base Repository.get_parent_map to use .revisions graph |
1207 |
"""Get a map of the graph parents of keys.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1208 |
|
1209 |
:param keys: The keys to look up parents for.
|
|
1210 |
:return: A mapping from keys to parents. Absent keys are absent from
|
|
1211 |
the mapping.
|
|
1212 |
"""
|
|
3350.8.14
by Robert Collins
Review feedback. |
1213 |
return self._get_parent_map_with_sources(keys)[0] |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1214 |
|
3350.8.14
by Robert Collins
Review feedback. |
1215 |
def _get_parent_map_with_sources(self, keys): |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1216 |
"""Get a map of the parents of keys.
|
1217 |
||
1218 |
:param keys: The keys to look up parents for.
|
|
1219 |
:return: A tuple. The first element is a mapping from keys to parents.
|
|
1220 |
Absent keys are absent from the mapping. The second element is a
|
|
1221 |
list with the locations each key was found in. The first element
|
|
1222 |
is the in-this-knit parents, the second the first fallback source,
|
|
1223 |
and so on.
|
|
1224 |
"""
|
|
3350.8.2
by Robert Collins
stacked get_parent_map. |
1225 |
result = {} |
1226 |
sources = [self._index] + self._fallback_vfs |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1227 |
source_results = [] |
3350.8.2
by Robert Collins
stacked get_parent_map. |
1228 |
missing = set(keys) |
1229 |
for source in sources: |
|
1230 |
if not missing: |
|
1231 |
break
|
|
1232 |
new_result = source.get_parent_map(missing) |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1233 |
source_results.append(new_result) |
3350.8.2
by Robert Collins
stacked get_parent_map. |
1234 |
result.update(new_result) |
1235 |
missing.difference_update(set(new_result)) |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1236 |
return result, source_results |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1237 |
|
3350.8.3
by Robert Collins
VF.get_sha1s needed changing to be stackable. |
1238 |
def _get_record_map(self, keys, allow_missing=False): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1239 |
"""Produce a dictionary of knit records.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1240 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1241 |
:return: {key:(record, record_details, digest, next)}
|
1242 |
record
|
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1243 |
data returned from read_records (a KnitContentobject)
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1244 |
record_details
|
1245 |
opaque information to pass to parse_record
|
|
1246 |
digest
|
|
1247 |
SHA1 digest of the full text after all steps are done
|
|
1248 |
next
|
|
1249 |
build-parent of the version, i.e. the leftmost ancestor.
|
|
1250 |
Will be None if the record is not a delta.
|
|
3350.8.3
by Robert Collins
VF.get_sha1s needed changing to be stackable. |
1251 |
:param keys: The keys to build a map for
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1252 |
:param allow_missing: If some records are missing, rather than
|
3350.8.3
by Robert Collins
VF.get_sha1s needed changing to be stackable. |
1253 |
error, just return the data that could be generated.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1254 |
"""
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1255 |
raw_map = self._get_record_map_unparsed(keys, |
1256 |
allow_missing=allow_missing) |
|
1257 |
return self._raw_map_to_record_map(raw_map) |
|
1258 |
||
1259 |
def _raw_map_to_record_map(self, raw_map): |
|
1260 |
"""Parse the contents of _get_record_map_unparsed.
|
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
1261 |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1262 |
:return: see _get_record_map.
|
1263 |
"""
|
|
1264 |
result = {} |
|
1265 |
for key in raw_map: |
|
1266 |
data, record_details, next = raw_map[key] |
|
1267 |
content, digest = self._parse_record(key[-1], data) |
|
1268 |
result[key] = content, record_details, digest, next |
|
1269 |
return result |
|
1270 |
||
1271 |
def _get_record_map_unparsed(self, keys, allow_missing=False): |
|
1272 |
"""Get the raw data for reconstructing keys without parsing it.
|
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
1273 |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1274 |
:return: A dict suitable for parsing via _raw_map_to_record_map.
|
1275 |
key-> raw_bytes, (method, noeol), compression_parent
|
|
1276 |
"""
|
|
3789.2.11
by John Arbash Meinel
KnitVersionedFile.get_record_stream now retries *and* fails correctly. |
1277 |
# This retries the whole request if anything fails. Potentially we
|
1278 |
# could be a bit more selective. We could track the keys whose records
|
|
1279 |
# we have successfully found, and then only request the new records
|
|
1280 |
# from there. However, _get_components_positions grabs the whole build
|
|
1281 |
# chain, which means we'll likely try to grab the same records again
|
|
4005.3.7
by Robert Collins
Review feedback. |
1282 |
# anyway. Also, can the build chains change as part of a pack
|
3789.2.11
by John Arbash Meinel
KnitVersionedFile.get_record_stream now retries *and* fails correctly. |
1283 |
# operation? We wouldn't want to end up with a broken chain.
|
3789.2.10
by John Arbash Meinel
The first function for KnitVersionedFiles can now retry on request. |
1284 |
while True: |
1285 |
try: |
|
1286 |
position_map = self._get_components_positions(keys, |
|
1287 |
allow_missing=allow_missing) |
|
3789.2.11
by John Arbash Meinel
KnitVersionedFile.get_record_stream now retries *and* fails correctly. |
1288 |
# key = component_id, r = record_details, i_m = index_memo,
|
1289 |
# n = next
|
|
3789.2.10
by John Arbash Meinel
The first function for KnitVersionedFiles can now retry on request. |
1290 |
records = [(key, i_m) for key, (r, i_m, n) |
3789.2.11
by John Arbash Meinel
KnitVersionedFile.get_record_stream now retries *and* fails correctly. |
1291 |
in position_map.iteritems()] |
4039.3.1
by John Arbash Meinel
Group records to read by pack file and sort by offset. |
1292 |
# Sort by the index memo, so that we request records from the
|
1293 |
# same pack file together, and in forward-sorted order
|
|
1294 |
records.sort(key=operator.itemgetter(1)) |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1295 |
raw_record_map = {} |
1296 |
for key, data in self._read_records_iter_unchecked(records): |
|
3789.2.10
by John Arbash Meinel
The first function for KnitVersionedFiles can now retry on request. |
1297 |
(record_details, index_memo, next) = position_map[key] |
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1298 |
raw_record_map[key] = data, record_details, next |
1299 |
return raw_record_map |
|
3789.2.10
by John Arbash Meinel
The first function for KnitVersionedFiles can now retry on request. |
1300 |
except errors.RetryWithNewPacks, e: |
1301 |
self._access.reload_or_raise(e) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1302 |
|
4039.3.6
by John Arbash Meinel
Turn _split_by_prefix into a classmethod, and add direct tests. |
1303 |
@classmethod
|
1304 |
def _split_by_prefix(cls, keys): |
|
3763.4.1
by John Arbash Meinel
Possible fix for bug #269456. |
1305 |
"""For the given keys, split them up based on their prefix.
|
1306 |
||
1307 |
To keep memory pressure somewhat under control, split the
|
|
1308 |
requests back into per-file-id requests, otherwise "bzr co"
|
|
1309 |
extracts the full tree into memory before writing it to disk.
|
|
1310 |
This should be revisited if _get_content_maps() can ever cross
|
|
1311 |
file-id boundaries.
|
|
1312 |
||
4039.3.6
by John Arbash Meinel
Turn _split_by_prefix into a classmethod, and add direct tests. |
1313 |
The keys for a given file_id are kept in the same relative order.
|
1314 |
Ordering between file_ids is not, though prefix_order will return the
|
|
1315 |
order that the key was first seen.
|
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1316 |
|
3763.4.1
by John Arbash Meinel
Possible fix for bug #269456. |
1317 |
:param keys: An iterable of key tuples
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1318 |
:return: (split_map, prefix_order)
|
1319 |
split_map A dictionary mapping prefix => keys
|
|
1320 |
prefix_order The order that we saw the various prefixes
|
|
3763.4.1
by John Arbash Meinel
Possible fix for bug #269456. |
1321 |
"""
|
1322 |
split_by_prefix = {} |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1323 |
prefix_order = [] |
3763.4.1
by John Arbash Meinel
Possible fix for bug #269456. |
1324 |
for key in keys: |
1325 |
if len(key) == 1: |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1326 |
prefix = '' |
1327 |
else: |
|
1328 |
prefix = key[0] |
|
1329 |
||
1330 |
if prefix in split_by_prefix: |
|
1331 |
split_by_prefix[prefix].append(key) |
|
1332 |
else: |
|
1333 |
split_by_prefix[prefix] = [key] |
|
1334 |
prefix_order.append(prefix) |
|
1335 |
return split_by_prefix, prefix_order |
|
1336 |
||
4039.3.7
by John Arbash Meinel
Some direct tests for _group_keys_for_io |
1337 |
def _group_keys_for_io(self, keys, non_local_keys, positions, |
1338 |
_min_buffer_size=_STREAM_MIN_BUFFER_SIZE): |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1339 |
"""For the given keys, group them into 'best-sized' requests.
|
1340 |
||
1341 |
The idea is to avoid making 1 request per file, but to never try to
|
|
1342 |
unpack an entire 1.5GB source tree in a single pass. Also when
|
|
1343 |
possible, we should try to group requests to the same pack file
|
|
1344 |
together.
|
|
1345 |
||
4039.3.7
by John Arbash Meinel
Some direct tests for _group_keys_for_io |
1346 |
:return: list of (keys, non_local) tuples that indicate what keys
|
1347 |
should be fetched next.
|
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1348 |
"""
|
1349 |
# TODO: Ideally we would group on 2 factors. We want to extract texts
|
|
1350 |
# from the same pack file together, and we want to extract all
|
|
1351 |
# the texts for a given build-chain together. Ultimately it
|
|
1352 |
# probably needs a better global view.
|
|
4039.3.3
by John Arbash Meinel
Add some debugging code. |
1353 |
total_keys = len(keys) |
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1354 |
prefix_split_keys, prefix_order = self._split_by_prefix(keys) |
1355 |
prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys) |
|
1356 |
cur_keys = [] |
|
1357 |
cur_non_local = set() |
|
1358 |
cur_size = 0 |
|
4039.3.3
by John Arbash Meinel
Add some debugging code. |
1359 |
result = [] |
1360 |
sizes = [] |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1361 |
for prefix in prefix_order: |
1362 |
keys = prefix_split_keys[prefix] |
|
1363 |
non_local = prefix_split_non_local_keys.get(prefix, []) |
|
4039.3.4
by John Arbash Meinel
Properly determine the total number of bytes needed for a given key. |
1364 |
|
1365 |
this_size = self._index._get_total_build_size(keys, positions) |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1366 |
cur_size += this_size |
1367 |
cur_keys.extend(keys) |
|
1368 |
cur_non_local.update(non_local) |
|
4039.3.7
by John Arbash Meinel
Some direct tests for _group_keys_for_io |
1369 |
if cur_size > _min_buffer_size: |
4039.3.3
by John Arbash Meinel
Add some debugging code. |
1370 |
result.append((cur_keys, cur_non_local)) |
1371 |
sizes.append(cur_size) |
|
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1372 |
cur_keys = [] |
4039.3.4
by John Arbash Meinel
Properly determine the total number of bytes needed for a given key. |
1373 |
cur_non_local = set() |
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1374 |
cur_size = 0 |
1375 |
if cur_keys: |
|
4039.3.3
by John Arbash Meinel
Add some debugging code. |
1376 |
result.append((cur_keys, cur_non_local)) |
1377 |
sizes.append(cur_size) |
|
1378 |
return result |
|
3763.4.1
by John Arbash Meinel
Possible fix for bug #269456. |
1379 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1380 |
def get_record_stream(self, keys, ordering, include_delta_closure): |
1381 |
"""Get a stream of records for keys.
|
|
1382 |
||
1383 |
:param keys: The keys to include.
|
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
1384 |
:param ordering: Either 'unordered' or 'topological'. A topologically
|
1385 |
sorted stream has compression parents strictly before their
|
|
1386 |
children.
|
|
1387 |
:param include_delta_closure: If True then the closure across any
|
|
1388 |
compression parents will be included (in the opaque data).
|
|
1389 |
:return: An iterator of ContentFactory objects, each of which is only
|
|
1390 |
valid until the iterator is advanced.
|
|
1391 |
"""
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1392 |
# keys might be a generator
|
1393 |
keys = set(keys) |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1394 |
if not keys: |
1395 |
return
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1396 |
if not self._index.has_graph: |
4111.1.1
by Robert Collins
Add a groupcompress sort order. |
1397 |
# Cannot sort when no graph has been stored.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1398 |
ordering = 'unordered' |
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
1399 |
|
1400 |
remaining_keys = keys |
|
1401 |
while True: |
|
1402 |
try: |
|
1403 |
keys = set(remaining_keys) |
|
1404 |
for content_factory in self._get_remaining_record_stream(keys, |
|
1405 |
ordering, include_delta_closure): |
|
1406 |
remaining_keys.discard(content_factory.key) |
|
1407 |
yield content_factory |
|
1408 |
return
|
|
1409 |
except errors.RetryWithNewPacks, e: |
|
3789.2.11
by John Arbash Meinel
KnitVersionedFile.get_record_stream now retries *and* fails correctly. |
1410 |
self._access.reload_or_raise(e) |
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
1411 |
|
1412 |
def _get_remaining_record_stream(self, keys, ordering, |
|
1413 |
include_delta_closure): |
|
3789.2.4
by John Arbash Meinel
Add a multiple-record test, though it isn't quite what we want for the readv tests. |
1414 |
"""This function is the 'retry' portion for get_record_stream."""
|
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
1415 |
if include_delta_closure: |
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1416 |
positions = self._get_components_positions(keys, allow_missing=True) |
3350.3.3
by Robert Collins
Functional get_record_stream interface tests covering full interface. |
1417 |
else: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1418 |
build_details = self._index.get_build_details(keys) |
3350.6.11
by Martin Pool
Review cleanups and documentation from Robert's mail on 2080618 |
1419 |
# map from key to
|
1420 |
# (record_details, access_memo, compression_parent_key)
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1421 |
positions = dict((key, self._build_details_to_components(details)) |
1422 |
for key, details in build_details.iteritems()) |
|
1423 |
absent_keys = keys.difference(set(positions)) |
|
1424 |
# There may be more absent keys : if we're missing the basis component
|
|
1425 |
# and are trying to include the delta closure.
|
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1426 |
# XXX: We should not ever need to examine remote sources because we do
|
1427 |
# not permit deltas across versioned files boundaries.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1428 |
if include_delta_closure: |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1429 |
needed_from_fallback = set() |
3350.6.11
by Martin Pool
Review cleanups and documentation from Robert's mail on 2080618 |
1430 |
# Build up reconstructable_keys dict. key:True in this dict means
|
1431 |
# the key can be reconstructed.
|
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1432 |
reconstructable_keys = {} |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1433 |
for key in keys: |
1434 |
# the delta chain
|
|
1435 |
try: |
|
1436 |
chain = [key, positions[key][2]] |
|
1437 |
except KeyError: |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1438 |
needed_from_fallback.add(key) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1439 |
continue
|
1440 |
result = True |
|
1441 |
while chain[-1] is not None: |
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1442 |
if chain[-1] in reconstructable_keys: |
1443 |
result = reconstructable_keys[chain[-1]] |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1444 |
break
|
1445 |
else: |
|
1446 |
try: |
|
1447 |
chain.append(positions[chain[-1]][2]) |
|
1448 |
except KeyError: |
|
1449 |
# missing basis component
|
|
3350.8.10
by Robert Collins
Stacked insert_record_stream. |
1450 |
needed_from_fallback.add(chain[-1]) |
1451 |
result = True |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1452 |
break
|
1453 |
for chain_key in chain[:-1]: |
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1454 |
reconstructable_keys[chain_key] = result |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1455 |
if not result: |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1456 |
needed_from_fallback.add(key) |
1457 |
# Double index lookups here : need a unified api ?
|
|
3350.8.14
by Robert Collins
Review feedback. |
1458 |
global_map, parent_maps = self._get_parent_map_with_sources(keys) |
4111.1.1
by Robert Collins
Add a groupcompress sort order. |
1459 |
if ordering in ('topological', 'groupcompress'): |
1460 |
if ordering == 'topological': |
|
1461 |
# Global topological sort
|
|
1462 |
present_keys = tsort.topo_sort(global_map) |
|
1463 |
else: |
|
1464 |
present_keys = sort_groupcompress(global_map) |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1465 |
# Now group by source:
|
1466 |
source_keys = [] |
|
1467 |
current_source = None |
|
1468 |
for key in present_keys: |
|
1469 |
for parent_map in parent_maps: |
|
1470 |
if key in parent_map: |
|
1471 |
key_source = parent_map |
|
1472 |
break
|
|
1473 |
if current_source is not key_source: |
|
1474 |
source_keys.append((key_source, [])) |
|
1475 |
current_source = key_source |
|
1476 |
source_keys[-1][1].append(key) |
|
1477 |
else: |
|
3606.7.7
by John Arbash Meinel
Add tests for the fetching behavior. |
1478 |
if ordering != 'unordered': |
1479 |
raise AssertionError('valid values for ordering are:' |
|
4111.1.1
by Robert Collins
Add a groupcompress sort order. |
1480 |
' "unordered", "groupcompress" or "topological" not: %r' |
3606.7.7
by John Arbash Meinel
Add tests for the fetching behavior. |
1481 |
% (ordering,)) |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1482 |
# Just group by source; remote sources first.
|
1483 |
present_keys = [] |
|
1484 |
source_keys = [] |
|
1485 |
for parent_map in reversed(parent_maps): |
|
1486 |
source_keys.append((parent_map, [])) |
|
1487 |
for key in parent_map: |
|
1488 |
present_keys.append(key) |
|
1489 |
source_keys[-1][1].append(key) |
|
3878.1.1
by John Arbash Meinel
KVF.get_record_stream('unordered') now returns the records based on I/O ordering. |
1490 |
# We have been requested to return these records in an order that
|
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
1491 |
# suits us. So we ask the index to give us an optimally sorted
|
1492 |
# order.
|
|
3878.1.1
by John Arbash Meinel
KVF.get_record_stream('unordered') now returns the records based on I/O ordering. |
1493 |
for source, sub_keys in source_keys: |
1494 |
if source is parent_maps[0]: |
|
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
1495 |
# Only sort the keys for this VF
|
1496 |
self._index._sort_keys_by_io(sub_keys, positions) |
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1497 |
absent_keys = keys - set(global_map) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1498 |
for key in absent_keys: |
1499 |
yield AbsentContentFactory(key) |
|
1500 |
# restrict our view to the keys we can answer.
|
|
1501 |
# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.
|
|
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1502 |
# XXX: At that point we need to consider the impact of double reads by
|
1503 |
# utilising components multiple times.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1504 |
if include_delta_closure: |
1505 |
# XXX: get_content_maps performs its own index queries; allow state
|
|
1506 |
# to be passed in.
|
|
3763.4.1
by John Arbash Meinel
Possible fix for bug #269456. |
1507 |
non_local_keys = needed_from_fallback - absent_keys |
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
1508 |
for keys, non_local_keys in self._group_keys_for_io(present_keys, |
1509 |
non_local_keys, |
|
1510 |
positions): |
|
1511 |
generator = _VFContentMapGenerator(self, keys, non_local_keys, |
|
4537.3.1
by John Arbash Meinel
Start working on tests that get_record_stream gives reasonable results w/ stacking. |
1512 |
global_map, |
1513 |
ordering=ordering) |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1514 |
for record in generator.get_record_stream(): |
1515 |
yield record |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1516 |
else: |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1517 |
for source, keys in source_keys: |
1518 |
if source is parent_maps[0]: |
|
1519 |
# this KnitVersionedFiles
|
|
1520 |
records = [(key, positions[key][1]) for key in keys] |
|
4082.1.1
by Andrew Bennetts
Use _read_records_iter_unchecked in _get_remaining_record_stream. |
1521 |
for key, raw_data in self._read_records_iter_unchecked(records): |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1522 |
(record_details, index_memo, _) = positions[key] |
1523 |
yield KnitContentFactory(key, global_map[key], |
|
4082.1.1
by Andrew Bennetts
Use _read_records_iter_unchecked in _get_remaining_record_stream. |
1524 |
record_details, None, raw_data, self._factory.annotated, None) |
3350.8.6
by Robert Collins
get_record_stream stacking for delta access. |
1525 |
else: |
1526 |
vf = self._fallback_vfs[parent_maps.index(source) - 1] |
|
1527 |
for record in vf.get_record_stream(keys, ordering, |
|
1528 |
include_delta_closure): |
|
1529 |
yield record |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1530 |
|
1531 |
def get_sha1s(self, keys): |
|
1532 |
"""See VersionedFiles.get_sha1s()."""
|
|
3350.8.3
by Robert Collins
VF.get_sha1s needed changing to be stackable. |
1533 |
missing = set(keys) |
1534 |
record_map = self._get_record_map(missing, allow_missing=True) |
|
1535 |
result = {} |
|
1536 |
for key, details in record_map.iteritems(): |
|
1537 |
if key not in missing: |
|
1538 |
continue
|
|
1539 |
# record entry 2 is the 'digest'.
|
|
1540 |
result[key] = details[2] |
|
1541 |
missing.difference_update(set(result)) |
|
1542 |
for source in self._fallback_vfs: |
|
1543 |
if not missing: |
|
1544 |
break
|
|
1545 |
new_result = source.get_sha1s(missing) |
|
1546 |
result.update(new_result) |
|
1547 |
missing.difference_update(set(new_result)) |
|
1548 |
return result |
|
3052.2.2
by Robert Collins
* Operations pulling data from a smart server where the underlying |
1549 |
|
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1550 |
def insert_record_stream(self, stream): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1551 |
"""Insert a record stream into this container.
|
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1552 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1553 |
:param stream: A stream of records to insert.
|
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1554 |
:return: None
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1555 |
:seealso VersionedFiles.get_record_stream:
|
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1556 |
"""
|
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1557 |
def get_adapter(adapter_key): |
1558 |
try: |
|
1559 |
return adapters[adapter_key] |
|
1560 |
except KeyError: |
|
1561 |
adapter_factory = adapter_registry.get(adapter_key) |
|
1562 |
adapter = adapter_factory(self) |
|
1563 |
adapters[adapter_key] = adapter |
|
1564 |
return adapter |
|
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1565 |
delta_types = set() |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1566 |
if self._factory.annotated: |
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1567 |
# self is annotated, we need annotated knits to use directly.
|
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1568 |
annotated = "annotated-" |
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1569 |
convertibles = [] |
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1570 |
else: |
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1571 |
# self is not annotated, but we can strip annotations cheaply.
|
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1572 |
annotated = "" |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1573 |
convertibles = set(["knit-annotated-ft-gz"]) |
1574 |
if self._max_delta_chain: |
|
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1575 |
delta_types.add("knit-annotated-delta-gz") |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1576 |
convertibles.add("knit-annotated-delta-gz") |
3350.3.22
by Robert Collins
Review feedback. |
1577 |
# The set of types we can cheaply adapt without needing basis texts.
|
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1578 |
native_types = set() |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1579 |
if self._max_delta_chain: |
1580 |
native_types.add("knit-%sdelta-gz" % annotated) |
|
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1581 |
delta_types.add("knit-%sdelta-gz" % annotated) |
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1582 |
native_types.add("knit-%sft-gz" % annotated) |
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1583 |
knit_types = native_types.union(convertibles) |
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1584 |
adapters = {} |
3350.3.22
by Robert Collins
Review feedback. |
1585 |
# Buffer all index entries that we can't add immediately because their
|
3350.3.17
by Robert Collins
Prevent corrupt knits being created when a stream is interrupted with basis parents not present. |
1586 |
# basis parent is missing. We don't buffer all because generating
|
1587 |
# annotations may require access to some of the new records. However we
|
|
1588 |
# can't generate annotations from new deltas until their basis parent
|
|
1589 |
# is present anyway, so we get away with not needing an index that
|
|
3350.3.22
by Robert Collins
Review feedback. |
1590 |
# includes the new keys.
|
3830.3.15
by Martin Pool
Check against all parents when deciding whether to store a fulltext in a stacked repository |
1591 |
#
|
1592 |
# See <http://launchpad.net/bugs/300177> about ordering of compression
|
|
1593 |
# parents in the records - to be conservative, we insist that all
|
|
1594 |
# parents must be present to avoid expanding to a fulltext.
|
|
1595 |
#
|
|
3350.3.17
by Robert Collins
Prevent corrupt knits being created when a stream is interrupted with basis parents not present. |
1596 |
# key = basis_parent, value = index entry to add
|
4009.3.7
by Andrew Bennetts
Most tests passing. |
1597 |
buffered_index_entries = {} |
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1598 |
for record in stream: |
4082.1.3
by Andrew Bennetts
Add knit header paranoia to insert_record_stream to replace the paranoia removed from get_record_stream. |
1599 |
kind = record.storage_kind |
1600 |
if kind.startswith('knit-') and kind.endswith('-gz'): |
|
1601 |
# Check that the ID in the header of the raw knit bytes matches
|
|
1602 |
# the record metadata.
|
|
1603 |
raw_data = record._raw_record |
|
1604 |
df, rec = self._parse_record_header(record.key, raw_data) |
|
1605 |
df.close() |
|
4052.1.2
by Robert Collins
Review feedback - fix flushing buffered records in knit's insert_record_stream. |
1606 |
buffered = False |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1607 |
parents = record.parents |
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1608 |
if record.storage_kind in delta_types: |
1609 |
# TODO: eventually the record itself should track
|
|
1610 |
# compression_parent
|
|
1611 |
compression_parent = parents[0] |
|
1612 |
else: |
|
1613 |
compression_parent = None |
|
3350.3.15
by Robert Collins
Update the insert_record_stream contract to error if an absent record is provided. |
1614 |
# Raise an error when a record is missing.
|
1615 |
if record.storage_kind == 'absent': |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1616 |
raise RevisionNotPresent([record.key], self) |
3830.3.15
by Martin Pool
Check against all parents when deciding whether to store a fulltext in a stacked repository |
1617 |
elif ((record.storage_kind in knit_types) |
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1618 |
and (compression_parent is None |
3830.3.18
by Martin Pool
Faster expression evaluation order |
1619 |
or not self._fallback_vfs |
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1620 |
or self._index.has_key(compression_parent) |
1621 |
or not self.has_key(compression_parent))): |
|
3830.3.9
by Martin Pool
Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests |
1622 |
# we can insert the knit record literally if either it has no
|
1623 |
# compression parent OR we already have its basis in this kvf
|
|
1624 |
# OR the basis is not present even in the fallbacks. In the
|
|
1625 |
# last case it will either turn up later in the stream and all
|
|
1626 |
# will be well, or it won't turn up at all and we'll raise an
|
|
1627 |
# error at the end.
|
|
3830.3.13
by Martin Pool
review cleanups to insert_record_stream |
1628 |
#
|
1629 |
# TODO: self.has_key is somewhat redundant with
|
|
1630 |
# self._index.has_key; we really want something that directly
|
|
1631 |
# asks if it's only present in the fallbacks. -- mbp 20081119
|
|
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1632 |
if record.storage_kind not in native_types: |
1633 |
try: |
|
1634 |
adapter_key = (record.storage_kind, "knit-delta-gz") |
|
1635 |
adapter = get_adapter(adapter_key) |
|
1636 |
except KeyError: |
|
1637 |
adapter_key = (record.storage_kind, "knit-ft-gz") |
|
1638 |
adapter = get_adapter(adapter_key) |
|
4005.3.1
by Robert Collins
Change the signature on VersionedFiles adapters to allow less typing and more flexability inside adapters. |
1639 |
bytes = adapter.get_bytes(record) |
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1640 |
else: |
4005.3.2
by Robert Collins
First passing NetworkRecordStream test - a fulltext from any record type which isn't a chunked or fulltext can be serialised and deserialised successfully. |
1641 |
# It's a knit record, it has a _raw_record field (even if
|
1642 |
# it was reconstituted from a network stream).
|
|
1643 |
bytes = record._raw_record |
|
3350.3.9
by Robert Collins
Avoid full text reconstruction when transferring knit to knit via record streams. |
1644 |
options = [record._build_details[0]] |
1645 |
if record._build_details[1]: |
|
1646 |
options.append('no-eol') |
|
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1647 |
# Just blat it across.
|
1648 |
# Note: This does end up adding data on duplicate keys. As
|
|
1649 |
# modern repositories use atomic insertions this should not
|
|
1650 |
# lead to excessive growth in the event of interrupted fetches.
|
|
1651 |
# 'knit' repositories may suffer excessive growth, but as a
|
|
1652 |
# deprecated format this is tolerable. It can be fixed if
|
|
1653 |
# needed by in the kndx index support raising on a duplicate
|
|
1654 |
# add with identical parents and options.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1655 |
access_memo = self._access.add_raw_records( |
1656 |
[(record.key, len(bytes))], bytes)[0] |
|
1657 |
index_entry = (record.key, options, access_memo, parents) |
|
3350.3.17
by Robert Collins
Prevent corrupt knits being created when a stream is interrupted with basis parents not present. |
1658 |
if 'fulltext' not in options: |
3830.3.24
by John Arbash Meinel
We don't require all parents to be present, just the compression parent. |
1659 |
# Not a fulltext, so we need to make sure the compression
|
1660 |
# parent will also be present.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1661 |
# Note that pack backed knits don't need to buffer here
|
1662 |
# because they buffer all writes to the transaction level,
|
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1663 |
# but we don't expose that difference at the index level. If
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1664 |
# the query here has sufficient cost to show up in
|
1665 |
# profiling we should do that.
|
|
3830.3.24
by John Arbash Meinel
We don't require all parents to be present, just the compression parent. |
1666 |
#
|
3830.3.7
by Martin Pool
KnitVersionedFiles.insert_record_stream checks that compression parents are in the same kvf, not in a fallback |
1667 |
# They're required to be physically in this
|
1668 |
# KnitVersionedFiles, not in a fallback.
|
|
3871.4.3
by John Arbash Meinel
We should only care if the compression parent is not available, not if all parents are available. |
1669 |
if not self._index.has_key(compression_parent): |
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
1670 |
pending = buffered_index_entries.setdefault( |
1671 |
compression_parent, []) |
|
1672 |
pending.append(index_entry) |
|
4009.3.9
by Andrew Bennetts
Remove some XXXs. |
1673 |
buffered = True |
1674 |
if not buffered: |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1675 |
self._index.add_records([index_entry]) |
3890.2.9
by John Arbash Meinel
Start using osutils.chunks_as_lines rather than osutils.split_lines. |
1676 |
elif record.storage_kind == 'chunked': |
1677 |
self.add_lines(record.key, parents, |
|
1678 |
osutils.chunks_to_lines(record.get_bytes_as('chunked'))) |
|
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1679 |
else: |
4005.3.8
by Robert Collins
Handle record streams where a fulltext is obtainable from a record but not the storage_kind. |
1680 |
# Not suitable for direct insertion as a
|
3849.3.2
by Andrew Bennetts
Expand a comment inside insert_record_stream slightly. |
1681 |
# delta, either because it's not the right format, or this
|
1682 |
# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==
|
|
1683 |
# 0) or because it depends on a base only present in the
|
|
1684 |
# fallback kvfs.
|
|
4187.3.6
by Andrew Bennetts
Move the flush in KnitVersionedFiles.insert_record_stream so that it covers the add_lines call of the fallback case, not just the adapter.get_bytes. |
1685 |
self._access.flush() |
4005.3.8
by Robert Collins
Handle record streams where a fulltext is obtainable from a record but not the storage_kind. |
1686 |
try: |
1687 |
# Try getting a fulltext directly from the record.
|
|
1688 |
bytes = record.get_bytes_as('fulltext') |
|
1689 |
except errors.UnavailableRepresentation: |
|
1690 |
adapter_key = record.storage_kind, 'fulltext' |
|
1691 |
adapter = get_adapter(adapter_key) |
|
1692 |
bytes = adapter.get_bytes(record) |
|
1693 |
lines = split_lines(bytes) |
|
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1694 |
try: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1695 |
self.add_lines(record.key, parents, lines) |
3350.3.11
by Robert Collins
Test inserting a stream that overlaps the current content of a knit does not error. |
1696 |
except errors.RevisionAlreadyPresent: |
1697 |
pass
|
|
3350.3.17
by Robert Collins
Prevent corrupt knits being created when a stream is interrupted with basis parents not present. |
1698 |
# Add any records whose basis parent is now available.
|
4052.1.2
by Robert Collins
Review feedback - fix flushing buffered records in knit's insert_record_stream. |
1699 |
if not buffered: |
1700 |
added_keys = [record.key] |
|
1701 |
while added_keys: |
|
1702 |
key = added_keys.pop(0) |
|
1703 |
if key in buffered_index_entries: |
|
1704 |
index_entries = buffered_index_entries[key] |
|
1705 |
self._index.add_records(index_entries) |
|
1706 |
added_keys.extend( |
|
1707 |
[index_entry[0] for index_entry in index_entries]) |
|
1708 |
del buffered_index_entries[key] |
|
4009.3.8
by Andrew Bennetts
Fix test failure. |
1709 |
if buffered_index_entries: |
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
1710 |
# There were index entries buffered at the end of the stream,
|
1711 |
# So these need to be added (if the index supports holding such
|
|
1712 |
# entries for later insertion)
|
|
1713 |
for key in buffered_index_entries: |
|
1714 |
index_entries = buffered_index_entries[key] |
|
1715 |
self._index.add_records(index_entries, |
|
1716 |
missing_compression_parents=True) |
|
4009.3.2
by Andrew Bennetts
Add test_insert_record_stream_delta_missing_basis_can_be_added_later. |
1717 |
|
1718 |
def get_missing_compression_parent_keys(self): |
|
4009.3.3
by Andrew Bennetts
Add docstrings. |
1719 |
"""Return an iterable of keys of missing compression parents.
|
1720 |
||
1721 |
Check this after calling insert_record_stream to find out if there are
|
|
1722 |
any missing compression parents. If there are, the records that
|
|
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
1723 |
depend on them are not able to be inserted safely. For atomic
|
1724 |
KnitVersionedFiles built on packs, the transaction should be aborted or
|
|
1725 |
suspended - commit will fail at this point. Nonatomic knits will error
|
|
1726 |
earlier because they have no staging area to put pending entries into.
|
|
4009.3.3
by Andrew Bennetts
Add docstrings. |
1727 |
"""
|
4009.3.7
by Andrew Bennetts
Most tests passing. |
1728 |
return self._index.get_missing_compression_parents() |
3350.3.8
by Robert Collins
Basic stream insertion, no fast path yet for knit to knit. |
1729 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1730 |
def iter_lines_added_or_present_in_keys(self, keys, pb=None): |
1731 |
"""Iterate over the lines in the versioned files from keys.
|
|
1732 |
||
1733 |
This may return lines from other keys. Each item the returned
|
|
1734 |
iterator yields is a tuple of a line and a text version that that line
|
|
1735 |
is present in (not introduced in).
|
|
1736 |
||
1737 |
Ordering of results is in whatever order is most suitable for the
|
|
1738 |
underlying storage format.
|
|
1739 |
||
1740 |
If a progress bar is supplied, it may be used to indicate progress.
|
|
1741 |
The caller is responsible for cleaning up progress bars (because this
|
|
1742 |
is an iterator).
|
|
1743 |
||
1744 |
NOTES:
|
|
3830.3.17
by Martin Pool
Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present |
1745 |
* Lines are normalised by the underlying store: they will all have \\n
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1746 |
terminators.
|
1747 |
* Lines are returned in arbitrary order.
|
|
3830.3.17
by Martin Pool
Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present |
1748 |
* If a requested key did not change any lines (or didn't have any
|
1749 |
lines), it may not be mentioned at all in the result.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1750 |
|
4110.2.10
by Martin Pool
Tweak iter_lines progress messages |
1751 |
:param pb: Progress bar supplied by caller.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1752 |
:return: An iterator over (line, key).
|
1753 |
"""
|
|
1754 |
if pb is None: |
|
1755 |
pb = progress.DummyProgress() |
|
1756 |
keys = set(keys) |
|
3350.8.5
by Robert Collins
Iter_lines_added_or_present_in_keys stacks. |
1757 |
total = len(keys) |
3789.2.12
by John Arbash Meinel
iter_lines_added_or_present now retries. |
1758 |
done = False |
1759 |
while not done: |
|
1760 |
try: |
|
1761 |
# we don't care about inclusions, the caller cares.
|
|
1762 |
# but we need to setup a list of records to visit.
|
|
1763 |
# we need key, position, length
|
|
1764 |
key_records = [] |
|
1765 |
build_details = self._index.get_build_details(keys) |
|
1766 |
for key, details in build_details.iteritems(): |
|
1767 |
if key in keys: |
|
1768 |
key_records.append((key, details[0])) |
|
1769 |
records_iter = enumerate(self._read_records_iter(key_records)) |
|
1770 |
for (key_idx, (key, data, sha_value)) in records_iter: |
|
4103.3.2
by Martin Pool
Remove trailing punctuation from progress messages |
1771 |
pb.update('Walking content', key_idx, total) |
3789.2.12
by John Arbash Meinel
iter_lines_added_or_present now retries. |
1772 |
compression_parent = build_details[key][1] |
1773 |
if compression_parent is None: |
|
1774 |
# fulltext
|
|
1775 |
line_iterator = self._factory.get_fulltext_content(data) |
|
1776 |
else: |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1777 |
# Delta
|
3789.2.12
by John Arbash Meinel
iter_lines_added_or_present now retries. |
1778 |
line_iterator = self._factory.get_linedelta_content(data) |
1779 |
# Now that we are yielding the data for this key, remove it
|
|
1780 |
# from the list
|
|
1781 |
keys.remove(key) |
|
1782 |
# XXX: It might be more efficient to yield (key,
|
|
1783 |
# line_iterator) in the future. However for now, this is a
|
|
1784 |
# simpler change to integrate into the rest of the
|
|
1785 |
# codebase. RBC 20071110
|
|
1786 |
for line in line_iterator: |
|
1787 |
yield line, key |
|
1788 |
done = True |
|
1789 |
except errors.RetryWithNewPacks, e: |
|
1790 |
self._access.reload_or_raise(e) |
|
3830.3.17
by Martin Pool
Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present |
1791 |
# If there are still keys we've not yet found, we look in the fallback
|
1792 |
# vfs, and hope to find them there. Note that if the keys are found
|
|
1793 |
# but had no changes or no content, the fallback may not return
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1794 |
# anything.
|
3830.3.17
by Martin Pool
Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present |
1795 |
if keys and not self._fallback_vfs: |
1796 |
# XXX: strictly the second parameter is meant to be the file id
|
|
1797 |
# but it's not easily accessible here.
|
|
1798 |
raise RevisionNotPresent(keys, repr(self)) |
|
3350.8.5
by Robert Collins
Iter_lines_added_or_present_in_keys stacks. |
1799 |
for source in self._fallback_vfs: |
1800 |
if not keys: |
|
1801 |
break
|
|
1802 |
source_keys = set() |
|
1803 |
for line, key in source.iter_lines_added_or_present_in_keys(keys): |
|
1804 |
source_keys.add(key) |
|
1805 |
yield line, key |
|
1806 |
keys.difference_update(source_keys) |
|
4103.3.2
by Martin Pool
Remove trailing punctuation from progress messages |
1807 |
pb.update('Walking content', total, total) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1808 |
|
1809 |
def _make_line_delta(self, delta_seq, new_content): |
|
1810 |
"""Generate a line delta from delta_seq and new_content."""
|
|
1811 |
diff_hunks = [] |
|
1812 |
for op in delta_seq.get_opcodes(): |
|
1813 |
if op[0] == 'equal': |
|
1814 |
continue
|
|
1815 |
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]])) |
|
1816 |
return diff_hunks |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1817 |
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
1818 |
def _merge_annotations(self, content, parents, parent_texts={}, |
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
1819 |
delta=None, annotated=None, |
1820 |
left_matching_blocks=None): |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1821 |
"""Merge annotations for content and generate deltas.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1822 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1823 |
This is done by comparing the annotations based on changes to the text
|
1824 |
and generating a delta on the resulting full texts. If annotations are
|
|
1825 |
not being created then a simple delta is created.
|
|
1596.2.27
by Robert Collins
Note potential improvements in knit adds. |
1826 |
"""
|
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
1827 |
if left_matching_blocks is not None: |
1828 |
delta_seq = diff._PrematchedMatcher(left_matching_blocks) |
|
1829 |
else: |
|
1830 |
delta_seq = None |
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
1831 |
if annotated: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1832 |
for parent_key in parents: |
1833 |
merge_content = self._get_content(parent_key, parent_texts) |
|
1834 |
if (parent_key == parents[0] and delta_seq is not None): |
|
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
1835 |
seq = delta_seq |
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
1836 |
else: |
1837 |
seq = patiencediff.PatienceSequenceMatcher( |
|
1838 |
None, merge_content.text(), content.text()) |
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
1839 |
for i, j, n in seq.get_matching_blocks(): |
1840 |
if n == 0: |
|
1841 |
continue
|
|
3460.2.1
by Robert Collins
* Inserting a bundle which changes the contents of a file with no trailing |
1842 |
# this copies (origin, text) pairs across to the new
|
1843 |
# content for any line that matches the last-checked
|
|
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
1844 |
# parent.
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
1845 |
content._lines[j:j+n] = merge_content._lines[i:i+n] |
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
1846 |
# XXX: Robert says the following block is a workaround for a
|
1847 |
# now-fixed bug and it can probably be deleted. -- mbp 20080618
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1848 |
if content._lines and content._lines[-1][1][-1] != '\n': |
1849 |
# The copied annotation was from a line without a trailing EOL,
|
|
1850 |
# reinstate one for the content object, to ensure correct
|
|
1851 |
# serialization.
|
|
1852 |
line = content._lines[-1][1] + '\n' |
|
1853 |
content._lines[-1] = (content._lines[-1][0], line) |
|
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
1854 |
if delta: |
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
1855 |
if delta_seq is None: |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
1856 |
reference_content = self._get_content(parents[0], parent_texts) |
1857 |
new_texts = content.text() |
|
1858 |
old_texts = reference_content.text() |
|
2104.4.2
by John Arbash Meinel
Small cleanup and NEWS entry about fixing bug #65714 |
1859 |
delta_seq = patiencediff.PatienceSequenceMatcher( |
2100.2.1
by wang
Replace python's difflib by patiencediff because the worst case |
1860 |
None, old_texts, new_texts) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
1861 |
return self._make_line_delta(delta_seq, content) |
1862 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1863 |
def _parse_record(self, version_id, data): |
1864 |
"""Parse an original format knit record.
|
|
1865 |
||
1866 |
These have the last element of the key only present in the stored data.
|
|
1867 |
"""
|
|
1868 |
rec, record_contents = self._parse_record_unchecked(data) |
|
1869 |
self._check_header_version(rec, version_id) |
|
1870 |
return record_contents, rec[3] |
|
1871 |
||
1872 |
def _parse_record_header(self, key, raw_data): |
|
1873 |
"""Parse a record header for consistency.
|
|
1874 |
||
1875 |
:return: the header and the decompressor stream.
|
|
1876 |
as (stream, header_record)
|
|
1877 |
"""
|
|
3535.5.1
by John Arbash Meinel
cleanup a few imports to be lazily loaded. |
1878 |
df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data)) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1879 |
try: |
1880 |
# Current serialise
|
|
1881 |
rec = self._check_header(key, df.readline()) |
|
1882 |
except Exception, e: |
|
1883 |
raise KnitCorrupt(self, |
|
1884 |
"While reading {%s} got %s(%s)" |
|
1885 |
% (key, e.__class__.__name__, str(e))) |
|
1886 |
return df, rec |
|
1887 |
||
1888 |
def _parse_record_unchecked(self, data): |
|
1889 |
# profiling notes:
|
|
1890 |
# 4168 calls in 2880 217 internal
|
|
1891 |
# 4168 calls to _parse_record_header in 2121
|
|
1892 |
# 4168 calls to readlines in 330
|
|
3535.5.1
by John Arbash Meinel
cleanup a few imports to be lazily loaded. |
1893 |
df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data)) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1894 |
try: |
1895 |
record_contents = df.readlines() |
|
1896 |
except Exception, e: |
|
1897 |
raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" % |
|
1898 |
(data, e.__class__.__name__, str(e))) |
|
1899 |
header = record_contents.pop(0) |
|
1900 |
rec = self._split_header(header) |
|
1901 |
last_line = record_contents.pop() |
|
1902 |
if len(record_contents) != int(rec[2]): |
|
1903 |
raise KnitCorrupt(self, |
|
1904 |
'incorrect number of lines %s != %s' |
|
1905 |
' for version {%s} %s' |
|
1906 |
% (len(record_contents), int(rec[2]), |
|
1907 |
rec[1], record_contents)) |
|
1908 |
if last_line != 'end %s\n' % rec[1]: |
|
1909 |
raise KnitCorrupt(self, |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1910 |
'unexpected version end line %r, wanted %r' |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1911 |
% (last_line, rec[1])) |
1912 |
df.close() |
|
1913 |
return rec, record_contents |
|
1914 |
||
1915 |
def _read_records_iter(self, records): |
|
1916 |
"""Read text records from data file and yield result.
|
|
1917 |
||
1918 |
The result will be returned in whatever is the fastest to read.
|
|
1919 |
Not by the order requested. Also, multiple requests for the same
|
|
1920 |
record will only yield 1 response.
|
|
1921 |
:param records: A list of (key, access_memo) entries
|
|
1922 |
:return: Yields (key, contents, digest) in the order
|
|
1923 |
read, not the order requested
|
|
1924 |
"""
|
|
1925 |
if not records: |
|
1926 |
return
|
|
1927 |
||
1928 |
# XXX: This smells wrong, IO may not be getting ordered right.
|
|
1929 |
needed_records = sorted(set(records), key=operator.itemgetter(1)) |
|
1930 |
if not needed_records: |
|
1931 |
return
|
|
1932 |
||
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1933 |
# The transport optimizes the fetching as well
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1934 |
# (ie, reads continuous ranges.)
|
1935 |
raw_data = self._access.get_raw_records( |
|
1936 |
[index_memo for key, index_memo in needed_records]) |
|
1937 |
||
1938 |
for (key, index_memo), data in \ |
|
1939 |
izip(iter(needed_records), raw_data): |
|
1940 |
content, digest = self._parse_record(key[-1], data) |
|
1941 |
yield key, content, digest |
|
1942 |
||
1943 |
def _read_records_iter_raw(self, records): |
|
1944 |
"""Read text records from data file and yield raw data.
|
|
1945 |
||
1946 |
This unpacks enough of the text record to validate the id is
|
|
1947 |
as expected but thats all.
|
|
1948 |
||
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1949 |
Each item the iterator yields is (key, bytes,
|
1950 |
expected_sha1_of_full_text).
|
|
1951 |
"""
|
|
1952 |
for key, data in self._read_records_iter_unchecked(records): |
|
1953 |
# validate the header (note that we can only use the suffix in
|
|
1954 |
# current knit records).
|
|
1955 |
df, rec = self._parse_record_header(key, data) |
|
1956 |
df.close() |
|
1957 |
yield key, data, rec[3] |
|
1958 |
||
1959 |
def _read_records_iter_unchecked(self, records): |
|
1960 |
"""Read text records from data file and yield raw data.
|
|
1961 |
||
1962 |
No validation is done.
|
|
1963 |
||
1964 |
Yields tuples of (key, data).
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1965 |
"""
|
1966 |
# setup an iterator of the external records:
|
|
1967 |
# uses readv so nice and fast we hope.
|
|
1968 |
if len(records): |
|
1969 |
# grab the disk data needed.
|
|
1970 |
needed_offsets = [index_memo for key, index_memo |
|
1971 |
in records] |
|
1972 |
raw_records = self._access.get_raw_records(needed_offsets) |
|
1973 |
||
1974 |
for key, index_memo in records: |
|
1975 |
data = raw_records.next() |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
1976 |
yield key, data |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1977 |
|
1978 |
def _record_to_data(self, key, digest, lines, dense_lines=None): |
|
1979 |
"""Convert key, digest, lines into a raw data block.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1980 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1981 |
:param key: The key of the record. Currently keys are always serialised
|
1982 |
using just the trailing component.
|
|
1983 |
:param dense_lines: The bytes of lines but in a denser form. For
|
|
1984 |
instance, if lines is a list of 1000 bytestrings each ending in \n,
|
|
1985 |
dense_lines may be a list with one line in it, containing all the
|
|
1986 |
1000's lines and their \n's. Using dense_lines if it is already
|
|
1987 |
known is a win because the string join to create bytes in this
|
|
1988 |
function spends less time resizing the final string.
|
|
1989 |
:return: (len, a StringIO instance with the raw data ready to read.)
|
|
1990 |
"""
|
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
1991 |
chunks = ["version %s %d %s\n" % (key[-1], len(lines), digest)] |
1992 |
chunks.extend(dense_lines or lines) |
|
1993 |
chunks.append("end %s\n" % key[-1]) |
|
1994 |
for chunk in chunks: |
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
1995 |
if type(chunk) is not str: |
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
1996 |
raise AssertionError( |
1997 |
'data must be plain bytes was %s' % type(chunk)) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
1998 |
if lines and lines[-1][-1] != '\n': |
1999 |
raise ValueError('corrupt lines value %r' % lines) |
|
4398.8.3
by John Arbash Meinel
Rewrite some of the internals of KnitVersionedFiles._add() |
2000 |
compressed_bytes = tuned_gzip.chunks_to_gzip(chunks) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2001 |
return len(compressed_bytes), compressed_bytes |
2002 |
||
2003 |
def _split_header(self, line): |
|
2004 |
rec = line.split() |
|
2005 |
if len(rec) != 4: |
|
2006 |
raise KnitCorrupt(self, |
|
2007 |
'unexpected number of elements in record header') |
|
2008 |
return rec |
|
2009 |
||
2010 |
def keys(self): |
|
2011 |
"""See VersionedFiles.keys."""
|
|
2012 |
if 'evil' in debug.debug_flags: |
|
2013 |
trace.mutter_callsite(2, "keys scales with size of history") |
|
3350.8.4
by Robert Collins
Vf.keys() stacking support. |
2014 |
sources = [self._index] + self._fallback_vfs |
2015 |
result = set() |
|
2016 |
for source in sources: |
|
2017 |
result.update(source.keys()) |
|
2018 |
return result |
|
2019 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2020 |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2021 |
class _ContentMapGenerator(object): |
2022 |
"""Generate texts or expose raw deltas for a set of texts."""
|
|
2023 |
||
4537.3.1
by John Arbash Meinel
Start working on tests that get_record_stream gives reasonable results w/ stacking. |
2024 |
def __init__(self, ordering='unordered'): |
2025 |
self._ordering = ordering |
|
2026 |
||
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2027 |
def _get_content(self, key): |
2028 |
"""Get the content object for key."""
|
|
4005.3.7
by Robert Collins
Review feedback. |
2029 |
# Note that _get_content is only called when the _ContentMapGenerator
|
2030 |
# has been constructed with just one key requested for reconstruction.
|
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2031 |
if key in self.nonlocal_keys: |
2032 |
record = self.get_record_stream().next() |
|
2033 |
# Create a content object on the fly
|
|
2034 |
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) |
|
2035 |
return PlainKnitContent(lines, record.key) |
|
2036 |
else: |
|
2037 |
# local keys we can ask for directly
|
|
2038 |
return self._get_one_work(key) |
|
2039 |
||
2040 |
def get_record_stream(self): |
|
2041 |
"""Get a record stream for the keys requested during __init__."""
|
|
2042 |
for record in self._work(): |
|
2043 |
yield record |
|
2044 |
||
2045 |
def _work(self): |
|
2046 |
"""Produce maps of text and KnitContents as dicts.
|
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
2047 |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2048 |
:return: (text_map, content_map) where text_map contains the texts for
|
2049 |
the requested versions and content_map contains the KnitContents.
|
|
2050 |
"""
|
|
2051 |
# NB: By definition we never need to read remote sources unless texts
|
|
2052 |
# are requested from them: we don't delta across stores - and we
|
|
2053 |
# explicitly do not want to to prevent data loss situations.
|
|
2054 |
if self.global_map is None: |
|
2055 |
self.global_map = self.vf.get_parent_map(self.keys) |
|
2056 |
nonlocal_keys = self.nonlocal_keys |
|
2057 |
||
2058 |
missing_keys = set(nonlocal_keys) |
|
2059 |
# Read from remote versioned file instances and provide to our caller.
|
|
2060 |
for source in self.vf._fallback_vfs: |
|
2061 |
if not missing_keys: |
|
2062 |
break
|
|
2063 |
# Loop over fallback repositories asking them for texts - ignore
|
|
2064 |
# any missing from a particular fallback.
|
|
2065 |
for record in source.get_record_stream(missing_keys, |
|
4537.3.1
by John Arbash Meinel
Start working on tests that get_record_stream gives reasonable results w/ stacking. |
2066 |
self._ordering, True): |
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2067 |
if record.storage_kind == 'absent': |
2068 |
# Not in thie particular stream, may be in one of the
|
|
2069 |
# other fallback vfs objects.
|
|
2070 |
continue
|
|
2071 |
missing_keys.remove(record.key) |
|
2072 |
yield record |
|
2073 |
||
4454.2.1
by John Arbash Meinel
Don't populate self._raw_record_map in _work, it was done in __init__ |
2074 |
if self._raw_record_map is None: |
2075 |
raise AssertionError('_raw_record_map should have been filled') |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2076 |
first = True |
2077 |
for key in self.keys: |
|
2078 |
if key in self.nonlocal_keys: |
|
2079 |
continue
|
|
2080 |
yield LazyKnitContentFactory(key, self.global_map[key], self, first) |
|
2081 |
first = False |
|
2082 |
||
2083 |
def _get_one_work(self, requested_key): |
|
2084 |
# Now, if we have calculated everything already, just return the
|
|
2085 |
# desired text.
|
|
2086 |
if requested_key in self._contents_map: |
|
2087 |
return self._contents_map[requested_key] |
|
4005.3.7
by Robert Collins
Review feedback. |
2088 |
# To simplify things, parse everything at once - code that wants one text
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2089 |
# probably wants them all.
|
2090 |
# FUTURE: This function could be improved for the 'extract many' case
|
|
2091 |
# by tracking each component and only doing the copy when the number of
|
|
2092 |
# children than need to apply delta's to it is > 1 or it is part of the
|
|
2093 |
# final output.
|
|
2094 |
multiple_versions = len(self.keys) != 1 |
|
2095 |
if self._record_map is None: |
|
2096 |
self._record_map = self.vf._raw_map_to_record_map( |
|
2097 |
self._raw_record_map) |
|
2098 |
record_map = self._record_map |
|
2099 |
# raw_record_map is key:
|
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
2100 |
# Have read and parsed records at this point.
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2101 |
for key in self.keys: |
2102 |
if key in self.nonlocal_keys: |
|
2103 |
# already handled
|
|
2104 |
continue
|
|
2105 |
components = [] |
|
2106 |
cursor = key |
|
2107 |
while cursor is not None: |
|
2108 |
try: |
|
2109 |
record, record_details, digest, next = record_map[cursor] |
|
2110 |
except KeyError: |
|
2111 |
raise RevisionNotPresent(cursor, self) |
|
2112 |
components.append((cursor, record, record_details, digest)) |
|
2113 |
cursor = next |
|
2114 |
if cursor in self._contents_map: |
|
2115 |
# no need to plan further back
|
|
2116 |
components.append((cursor, None, None, None)) |
|
2117 |
break
|
|
2118 |
||
2119 |
content = None |
|
2120 |
for (component_id, record, record_details, |
|
2121 |
digest) in reversed(components): |
|
2122 |
if component_id in self._contents_map: |
|
2123 |
content = self._contents_map[component_id] |
|
2124 |
else: |
|
2125 |
content, delta = self._factory.parse_record(key[-1], |
|
2126 |
record, record_details, content, |
|
2127 |
copy_base_content=multiple_versions) |
|
2128 |
if multiple_versions: |
|
2129 |
self._contents_map[component_id] = content |
|
2130 |
||
2131 |
# digest here is the digest from the last applied component.
|
|
2132 |
text = content.text() |
|
2133 |
actual_sha = sha_strings(text) |
|
2134 |
if actual_sha != digest: |
|
2135 |
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text) |
|
2136 |
if multiple_versions: |
|
2137 |
return self._contents_map[requested_key] |
|
2138 |
else: |
|
2139 |
return content |
|
2140 |
||
2141 |
def _wire_bytes(self): |
|
2142 |
"""Get the bytes to put on the wire for 'key'.
|
|
2143 |
||
2144 |
The first collection of bytes asked for returns the serialised
|
|
2145 |
raw_record_map and the additional details (key, parent) for key.
|
|
2146 |
Subsequent calls return just the additional details (key, parent).
|
|
2147 |
The wire storage_kind given for the first key is 'knit-delta-closure',
|
|
2148 |
For subsequent keys it is 'knit-delta-closure-ref'.
|
|
2149 |
||
2150 |
:param key: A key from the content generator.
|
|
2151 |
:return: Bytes to put on the wire.
|
|
2152 |
"""
|
|
2153 |
lines = [] |
|
2154 |
# kind marker for dispatch on the far side,
|
|
2155 |
lines.append('knit-delta-closure') |
|
2156 |
# Annotated or not
|
|
2157 |
if self.vf._factory.annotated: |
|
2158 |
lines.append('annotated') |
|
2159 |
else: |
|
2160 |
lines.append('') |
|
2161 |
# then the list of keys
|
|
2162 |
lines.append('\t'.join(['\x00'.join(key) for key in self.keys |
|
2163 |
if key not in self.nonlocal_keys])) |
|
2164 |
# then the _raw_record_map in serialised form:
|
|
2165 |
map_byte_list = [] |
|
2166 |
# for each item in the map:
|
|
2167 |
# 1 line with key
|
|
2168 |
# 1 line with parents if the key is to be yielded (None: for None, '' for ())
|
|
2169 |
# one line with method
|
|
2170 |
# one line with noeol
|
|
2171 |
# one line with next ('' for None)
|
|
2172 |
# one line with byte count of the record bytes
|
|
2173 |
# the record bytes
|
|
2174 |
for key, (record_bytes, (method, noeol), next) in \ |
|
2175 |
self._raw_record_map.iteritems(): |
|
2176 |
key_bytes = '\x00'.join(key) |
|
2177 |
parents = self.global_map.get(key, None) |
|
2178 |
if parents is None: |
|
2179 |
parent_bytes = 'None:' |
|
2180 |
else: |
|
2181 |
parent_bytes = '\t'.join('\x00'.join(key) for key in parents) |
|
2182 |
method_bytes = method |
|
2183 |
if noeol: |
|
2184 |
noeol_bytes = "T" |
|
2185 |
else: |
|
2186 |
noeol_bytes = "F" |
|
2187 |
if next: |
|
2188 |
next_bytes = '\x00'.join(next) |
|
2189 |
else: |
|
2190 |
next_bytes = '' |
|
2191 |
map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % ( |
|
2192 |
key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes, |
|
2193 |
len(record_bytes), record_bytes)) |
|
2194 |
map_bytes = ''.join(map_byte_list) |
|
2195 |
lines.append(map_bytes) |
|
2196 |
bytes = '\n'.join(lines) |
|
2197 |
return bytes |
|
2198 |
||
2199 |
||
2200 |
class _VFContentMapGenerator(_ContentMapGenerator): |
|
2201 |
"""Content map generator reading from a VersionedFiles object."""
|
|
2202 |
||
2203 |
def __init__(self, versioned_files, keys, nonlocal_keys=None, |
|
4537.3.1
by John Arbash Meinel
Start working on tests that get_record_stream gives reasonable results w/ stacking. |
2204 |
global_map=None, raw_record_map=None, ordering='unordered'): |
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2205 |
"""Create a _ContentMapGenerator.
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
2206 |
|
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2207 |
:param versioned_files: The versioned files that the texts are being
|
2208 |
extracted from.
|
|
2209 |
:param keys: The keys to produce content maps for.
|
|
2210 |
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
|
|
2211 |
which are known to not be in this knit, but rather in one of the
|
|
2212 |
fallback knits.
|
|
2213 |
:param global_map: The result of get_parent_map(keys) (or a supermap).
|
|
2214 |
This is required if get_record_stream() is to be used.
|
|
2215 |
:param raw_record_map: A unparsed raw record map to use for answering
|
|
2216 |
contents.
|
|
2217 |
"""
|
|
4537.3.1
by John Arbash Meinel
Start working on tests that get_record_stream gives reasonable results w/ stacking. |
2218 |
_ContentMapGenerator.__init__(self, ordering=ordering) |
4005.3.6
by Robert Collins
Support delta_closure=True with NetworkRecordStream to transmit deltas over the wire when full text extraction is required on the far end. |
2219 |
# The vf to source data from
|
2220 |
self.vf = versioned_files |
|
2221 |
# The keys desired
|
|
2222 |
self.keys = list(keys) |
|
2223 |
# Keys known to be in fallback vfs objects
|
|
2224 |
if nonlocal_keys is None: |
|
2225 |
self.nonlocal_keys = set() |
|
2226 |
else: |
|
2227 |
self.nonlocal_keys = frozenset(nonlocal_keys) |
|
2228 |
# Parents data for keys to be returned in get_record_stream
|
|
2229 |
self.global_map = global_map |
|
2230 |
# The chunked lists for self.keys in text form
|
|
2231 |
self._text_map = {} |
|
2232 |
# A cache of KnitContent objects used in extracting texts.
|
|
2233 |
self._contents_map = {} |
|
2234 |
# All the knit records needed to assemble the requested keys as full
|
|
2235 |
# texts.
|
|
2236 |
self._record_map = None |
|
2237 |
if raw_record_map is None: |
|
2238 |
self._raw_record_map = self.vf._get_record_map_unparsed(keys, |
|
2239 |
allow_missing=True) |
|
2240 |
else: |
|
2241 |
self._raw_record_map = raw_record_map |
|
2242 |
# the factory for parsing records
|
|
2243 |
self._factory = self.vf._factory |
|
2244 |
||
2245 |
||
2246 |
class _NetworkContentMapGenerator(_ContentMapGenerator): |
|
2247 |
"""Content map generator sourced from a network stream."""
|
|
2248 |
||
2249 |
def __init__(self, bytes, line_end): |
|
2250 |
"""Construct a _NetworkContentMapGenerator from a bytes block."""
|
|
2251 |
self._bytes = bytes |
|
2252 |
self.global_map = {} |
|
2253 |
self._raw_record_map = {} |
|
2254 |
self._contents_map = {} |
|
2255 |
self._record_map = None |
|
2256 |
self.nonlocal_keys = [] |
|
2257 |
# Get access to record parsing facilities
|
|
2258 |
self.vf = KnitVersionedFiles(None, None) |
|
2259 |
start = line_end |
|
2260 |
# Annotated or not
|
|
2261 |
line_end = bytes.find('\n', start) |
|
2262 |
line = bytes[start:line_end] |
|
2263 |
start = line_end + 1 |
|
2264 |
if line == 'annotated': |
|
2265 |
self._factory = KnitAnnotateFactory() |
|
2266 |
else: |
|
2267 |
self._factory = KnitPlainFactory() |
|
2268 |
# list of keys to emit in get_record_stream
|
|
2269 |
line_end = bytes.find('\n', start) |
|
2270 |
line = bytes[start:line_end] |
|
2271 |
start = line_end + 1 |
|
2272 |
self.keys = [ |
|
2273 |
tuple(segment.split('\x00')) for segment in line.split('\t') |
|
2274 |
if segment] |
|
2275 |
# now a loop until the end. XXX: It would be nice if this was just a
|
|
2276 |
# bunch of the same records as get_record_stream(..., False) gives, but
|
|
2277 |
# there is a decent sized gap stopping that at the moment.
|
|
2278 |
end = len(bytes) |
|
2279 |
while start < end: |
|
2280 |
# 1 line with key
|
|
2281 |
line_end = bytes.find('\n', start) |
|
2282 |
key = tuple(bytes[start:line_end].split('\x00')) |
|
2283 |
start = line_end + 1 |
|
2284 |
# 1 line with parents (None: for None, '' for ())
|
|
2285 |
line_end = bytes.find('\n', start) |
|
2286 |
line = bytes[start:line_end] |
|
2287 |
if line == 'None:': |
|
2288 |
parents = None |
|
2289 |
else: |
|
2290 |
parents = tuple( |
|
2291 |
[tuple(segment.split('\x00')) for segment in line.split('\t') |
|
2292 |
if segment]) |
|
2293 |
self.global_map[key] = parents |
|
2294 |
start = line_end + 1 |
|
2295 |
# one line with method
|
|
2296 |
line_end = bytes.find('\n', start) |
|
2297 |
line = bytes[start:line_end] |
|
2298 |
method = line |
|
2299 |
start = line_end + 1 |
|
2300 |
# one line with noeol
|
|
2301 |
line_end = bytes.find('\n', start) |
|
2302 |
line = bytes[start:line_end] |
|
2303 |
noeol = line == "T" |
|
2304 |
start = line_end + 1 |
|
2305 |
# one line with next ('' for None)
|
|
2306 |
line_end = bytes.find('\n', start) |
|
2307 |
line = bytes[start:line_end] |
|
2308 |
if not line: |
|
2309 |
next = None |
|
2310 |
else: |
|
2311 |
next = tuple(bytes[start:line_end].split('\x00')) |
|
2312 |
start = line_end + 1 |
|
2313 |
# one line with byte count of the record bytes
|
|
2314 |
line_end = bytes.find('\n', start) |
|
2315 |
line = bytes[start:line_end] |
|
2316 |
count = int(line) |
|
2317 |
start = line_end + 1 |
|
2318 |
# the record bytes
|
|
2319 |
record_bytes = bytes[start:start+count] |
|
2320 |
start = start + count |
|
2321 |
# put it in the map
|
|
2322 |
self._raw_record_map[key] = (record_bytes, (method, noeol), next) |
|
2323 |
||
2324 |
def get_record_stream(self): |
|
2325 |
"""Get a record stream for for keys requested by the bytestream."""
|
|
2326 |
first = True |
|
2327 |
for key in self.keys: |
|
2328 |
yield LazyKnitContentFactory(key, self.global_map[key], self, first) |
|
2329 |
first = False |
|
2330 |
||
2331 |
def _wire_bytes(self): |
|
2332 |
return self._bytes |
|
2333 |
||
2334 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2335 |
class _KndxIndex(object): |
2336 |
"""Manages knit index files
|
|
2337 |
||
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
2338 |
The index is kept in memory and read on startup, to enable
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2339 |
fast lookups of revision information. The cursor of the index
|
2340 |
file is always pointing to the end, making it easy to append
|
|
2341 |
entries.
|
|
2342 |
||
2343 |
_cache is a cache for fast mapping from version id to a Index
|
|
2344 |
object.
|
|
2345 |
||
2346 |
_history is a cache for fast mapping from indexes to version ids.
|
|
2347 |
||
2348 |
The index data format is dictionary compressed when it comes to
|
|
2349 |
parent references; a index entry may only have parents that with a
|
|
2350 |
lover index number. As a result, the index is topological sorted.
|
|
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
2351 |
|
2352 |
Duplicate entries may be written to the index for a single version id
|
|
2353 |
if this is done then the latter one completely replaces the former:
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2354 |
this allows updates to correct version and parent information.
|
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
2355 |
Note that the two entries may share the delta, and that successive
|
2356 |
annotations and references MUST point to the first entry.
|
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
2357 |
|
2358 |
The index file on disc contains a header, followed by one line per knit
|
|
2359 |
record. The same revision can be present in an index file more than once.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2360 |
The first occurrence gets assigned a sequence number starting from 0.
|
2361 |
||
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
2362 |
The format of a single line is
|
2363 |
REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
|
|
2364 |
REVISION_ID is a utf8-encoded revision id
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2365 |
FLAGS is a comma separated list of flags about the record. Values include
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
2366 |
no-eol, line-delta, fulltext.
|
2367 |
BYTE_OFFSET is the ascii representation of the byte offset in the data file
|
|
2368 |
that the the compressed data starts at.
|
|
2369 |
LENGTH is the ascii representation of the length of the data file.
|
|
2370 |
PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
|
|
2371 |
REVISION_ID.
|
|
2372 |
PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
|
|
2373 |
revision id already in the knit that is a parent of REVISION_ID.
|
|
2374 |
The ' :' marker is the end of record marker.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2375 |
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
2376 |
partial writes:
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
2377 |
when a write is interrupted to the index file, it will result in a line
|
2378 |
that does not end in ' :'. If the ' :' is not present at the end of a line,
|
|
2379 |
or at the end of the file, then the record that is missing it will be
|
|
2380 |
ignored by the parser.
|
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
2381 |
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
2382 |
When writing new records to the index file, the data is preceded by '\n'
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
2383 |
to ensure that records always start on new lines even if the last write was
|
2384 |
interrupted. As a result its normal for the last line in the index to be
|
|
2385 |
missing a trailing newline. One can be added with no harmful effects.
|
|
3350.6.11
by Martin Pool
Review cleanups and documentation from Robert's mail on 2080618 |
2386 |
|
2387 |
:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,
|
|
2388 |
where prefix is e.g. the (fileid,) for .texts instances or () for
|
|
2389 |
constant-mapped things like .revisions, and the old state is
|
|
2390 |
tuple(history_vector, cache_dict). This is used to prevent having an
|
|
2391 |
ABI change with the C extension that reads .kndx files.
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2392 |
"""
|
2393 |
||
1666.1.6
by Robert Collins
Make knit the default format. |
2394 |
HEADER = "# bzr knit index 8\n" |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2395 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2396 |
def __init__(self, transport, mapper, get_scope, allow_writes, is_locked): |
2397 |
"""Create a _KndxIndex on transport using mapper."""
|
|
2398 |
self._transport = transport |
|
2399 |
self._mapper = mapper |
|
2400 |
self._get_scope = get_scope |
|
2401 |
self._allow_writes = allow_writes |
|
2402 |
self._is_locked = is_locked |
|
2403 |
self._reset_cache() |
|
2404 |
self.has_graph = True |
|
2405 |
||
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2406 |
def add_records(self, records, random_id=False, missing_compression_parents=False): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2407 |
"""Add multiple records to the index.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2408 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2409 |
:param records: a list of tuples:
|
2410 |
(key, options, access_memo, parents).
|
|
2411 |
:param random_id: If True the ids being added were randomly generated
|
|
2412 |
and no check for existence will be performed.
|
|
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2413 |
:param missing_compression_parents: If True the records being added are
|
2414 |
only compressed against texts already in the index (or inside
|
|
2415 |
records). If False the records all refer to unavailable texts (or
|
|
2416 |
texts inside records) as compression parents.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2417 |
"""
|
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2418 |
if missing_compression_parents: |
2419 |
# It might be nice to get the edge of the records. But keys isn't
|
|
2420 |
# _wrong_.
|
|
2421 |
keys = sorted(record[0] for record in records) |
|
2422 |
raise errors.RevisionNotPresent(keys, self) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2423 |
paths = {} |
2424 |
for record in records: |
|
2425 |
key = record[0] |
|
2426 |
prefix = key[:-1] |
|
2427 |
path = self._mapper.map(key) + '.kndx' |
|
2428 |
path_keys = paths.setdefault(path, (prefix, [])) |
|
2429 |
path_keys[1].append(record) |
|
2430 |
for path in sorted(paths): |
|
2431 |
prefix, path_keys = paths[path] |
|
2432 |
self._load_prefixes([prefix]) |
|
2433 |
lines = [] |
|
2434 |
orig_history = self._kndx_cache[prefix][1][:] |
|
2435 |
orig_cache = self._kndx_cache[prefix][0].copy() |
|
2436 |
||
2437 |
try: |
|
2438 |
for key, options, (_, pos, size), parents in path_keys: |
|
2439 |
if parents is None: |
|
2440 |
# kndx indices cannot be parentless.
|
|
2441 |
parents = () |
|
2442 |
line = "\n%s %s %s %s %s :" % ( |
|
2443 |
key[-1], ','.join(options), pos, size, |
|
2444 |
self._dictionary_compress(parents)) |
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
2445 |
if type(line) is not str: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2446 |
raise AssertionError( |
2447 |
'data must be utf8 was %s' % type(line)) |
|
2448 |
lines.append(line) |
|
2449 |
self._cache_key(key, options, pos, size, parents) |
|
2450 |
if len(orig_history): |
|
2451 |
self._transport.append_bytes(path, ''.join(lines)) |
|
2452 |
else: |
|
2453 |
self._init_index(path, lines) |
|
2454 |
except: |
|
2455 |
# If any problems happen, restore the original values and re-raise
|
|
2456 |
self._kndx_cache[prefix] = (orig_cache, orig_history) |
|
2457 |
raise
|
|
2458 |
||
4011.5.7
by Andrew Bennetts
Remove leading underscore from _scan_unvalidate_index, explicitly NotImplementedError it for _KndxIndex. |
2459 |
def scan_unvalidated_index(self, graph_index): |
2460 |
"""See _KnitGraphIndex.scan_unvalidated_index."""
|
|
4011.5.11
by Robert Collins
Polish the KnitVersionedFiles.scan_unvalidated_index api. |
2461 |
# Because kndx files do not support atomic insertion via separate index
|
2462 |
# files, they do not support this method.
|
|
4011.5.7
by Andrew Bennetts
Remove leading underscore from _scan_unvalidate_index, explicitly NotImplementedError it for _KndxIndex. |
2463 |
raise NotImplementedError(self.scan_unvalidated_index) |
2464 |
||
2465 |
def get_missing_compression_parents(self): |
|
2466 |
"""See _KnitGraphIndex.get_missing_compression_parents."""
|
|
4011.5.11
by Robert Collins
Polish the KnitVersionedFiles.scan_unvalidated_index api. |
2467 |
# Because kndx files do not support atomic insertion via separate index
|
2468 |
# files, they do not support this method.
|
|
2469 |
raise NotImplementedError(self.get_missing_compression_parents) |
|
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
2470 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2471 |
def _cache_key(self, key, options, pos, size, parent_keys): |
1596.2.18
by Robert Collins
More microopimisations on index reading, now down to 16000 records/seconds. |
2472 |
"""Cache a version record in the history array and index cache.
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
2473 |
|
2474 |
This is inlined into _load_data for performance. KEEP IN SYNC.
|
|
1596.2.18
by Robert Collins
More microopimisations on index reading, now down to 16000 records/seconds. |
2475 |
(It saves 60ms, 25% of the __init__ overhead on local 4000 record
|
2476 |
indexes).
|
|
2477 |
"""
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2478 |
prefix = key[:-1] |
2479 |
version_id = key[-1] |
|
2480 |
# last-element only for compatibilty with the C load_data.
|
|
2481 |
parents = tuple(parent[-1] for parent in parent_keys) |
|
2482 |
for parent in parent_keys: |
|
2483 |
if parent[:-1] != prefix: |
|
2484 |
raise ValueError("mismatched prefixes for %r, %r" % ( |
|
2485 |
key, parent_keys)) |
|
2486 |
cache, history = self._kndx_cache[prefix] |
|
1596.2.14
by Robert Collins
Make knit parsing non quadratic? |
2487 |
# only want the _history index to reference the 1st index entry
|
2488 |
# for version_id
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2489 |
if version_id not in cache: |
2490 |
index = len(history) |
|
2491 |
history.append(version_id) |
|
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
2492 |
else: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2493 |
index = cache[version_id][5] |
2494 |
cache[version_id] = (version_id, |
|
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
2495 |
options, |
2496 |
pos, |
|
2497 |
size, |
|
2498 |
parents, |
|
2499 |
index) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2500 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2501 |
def check_header(self, fp): |
2502 |
line = fp.readline() |
|
2503 |
if line == '': |
|
2504 |
# An empty file can actually be treated as though the file doesn't
|
|
2505 |
# exist yet.
|
|
2506 |
raise errors.NoSuchFile(self) |
|
2507 |
if line != self.HEADER: |
|
2508 |
raise KnitHeaderError(badline=line, filename=self) |
|
2509 |
||
2510 |
def _check_read(self): |
|
2511 |
if not self._is_locked(): |
|
2512 |
raise errors.ObjectNotLocked(self) |
|
2513 |
if self._get_scope() != self._scope: |
|
2514 |
self._reset_cache() |
|
2515 |
||
3316.2.3
by Robert Collins
Remove manual notification of transaction finishing on versioned files. |
2516 |
def _check_write_ok(self): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2517 |
"""Assert if not writes are permitted."""
|
2518 |
if not self._is_locked(): |
|
2519 |
raise errors.ObjectNotLocked(self) |
|
3316.2.5
by Robert Collins
Review feedback. |
2520 |
if self._get_scope() != self._scope: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2521 |
self._reset_cache() |
3316.2.3
by Robert Collins
Remove manual notification of transaction finishing on versioned files. |
2522 |
if self._mode != 'w': |
2523 |
raise errors.ReadOnlyObjectDirtiedError(self) |
|
2524 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2525 |
def get_build_details(self, keys): |
2526 |
"""Get the method, index_memo and compression parent for keys.
|
|
3218.1.1
by Robert Collins
Reduce index query pressure for text construction by batching the individual queries into single batch queries. |
2527 |
|
3224.1.29
by John Arbash Meinel
Properly handle annotating when ghosts are present. |
2528 |
Ghosts are omitted from the result.
|
2529 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2530 |
:param keys: An iterable of keys.
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
2531 |
:return: A dict of key:(index_memo, compression_parent, parents,
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2532 |
record_details).
|
3224.1.14
by John Arbash Meinel
Switch to making content_details opaque, step 1 |
2533 |
index_memo
|
2534 |
opaque structure to pass to read_records to extract the raw
|
|
2535 |
data
|
|
2536 |
compression_parent
|
|
2537 |
Content that this record is built upon, may be None
|
|
2538 |
parents
|
|
2539 |
Logical parents of this node
|
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
2540 |
record_details
|
3224.1.14
by John Arbash Meinel
Switch to making content_details opaque, step 1 |
2541 |
extra information about the content which needs to be passed to
|
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
2542 |
Factory.parse_record
|
3218.1.1
by Robert Collins
Reduce index query pressure for text construction by batching the individual queries into single batch queries. |
2543 |
"""
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2544 |
parent_map = self.get_parent_map(keys) |
3218.1.1
by Robert Collins
Reduce index query pressure for text construction by batching the individual queries into single batch queries. |
2545 |
result = {} |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2546 |
for key in keys: |
2547 |
if key not in parent_map: |
|
2548 |
continue # Ghost |
|
2549 |
method = self.get_method(key) |
|
2550 |
parents = parent_map[key] |
|
3218.1.1
by Robert Collins
Reduce index query pressure for text construction by batching the individual queries into single batch queries. |
2551 |
if method == 'fulltext': |
2552 |
compression_parent = None |
|
2553 |
else: |
|
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
2554 |
compression_parent = parents[0] |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2555 |
noeol = 'no-eol' in self.get_options(key) |
2556 |
index_memo = self.get_position(key) |
|
2557 |
result[key] = (index_memo, compression_parent, |
|
3224.1.14
by John Arbash Meinel
Switch to making content_details opaque, step 1 |
2558 |
parents, (method, noeol)) |
3218.1.1
by Robert Collins
Reduce index query pressure for text construction by batching the individual queries into single batch queries. |
2559 |
return result |
2560 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2561 |
def get_method(self, key): |
2562 |
"""Return compression method of specified key."""
|
|
2563 |
options = self.get_options(key) |
|
2564 |
if 'fulltext' in options: |
|
2565 |
return 'fulltext' |
|
2566 |
elif 'line-delta' in options: |
|
2567 |
return 'line-delta' |
|
2568 |
else: |
|
2569 |
raise errors.KnitIndexUnknownMethod(self, options) |
|
2570 |
||
2571 |
def get_options(self, key): |
|
2572 |
"""Return a list representing options.
|
|
2573 |
||
2574 |
e.g. ['foo', 'bar']
|
|
2575 |
"""
|
|
2576 |
prefix, suffix = self._split_key(key) |
|
2577 |
self._load_prefixes([prefix]) |
|
3350.8.9
by Robert Collins
define behaviour for add_lines with stacked storage. |
2578 |
try: |
2579 |
return self._kndx_cache[prefix][0][suffix][1] |
|
2580 |
except KeyError: |
|
2581 |
raise RevisionNotPresent(key, self) |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2582 |
|
4593.5.35
by John Arbash Meinel
Start working on a per-vf implementation test of find_ancestry. |
2583 |
def find_ancestry(self, keys): |
2584 |
"""See CombinedGraphIndex.find_ancestry()"""
|
|
2585 |
prefixes = set(key[:-1] for key in keys) |
|
2586 |
self._load_prefixes(prefixes) |
|
2587 |
result = {} |
|
2588 |
parent_map = {} |
|
2589 |
missing_keys = set() |
|
2590 |
pending_keys = list(keys) |
|
2591 |
# This assumes that keys will not reference parents in a different
|
|
2592 |
# prefix, which is accurate so far.
|
|
2593 |
while pending_keys: |
|
2594 |
key = pending_keys.pop() |
|
2595 |
if key in parent_map: |
|
2596 |
continue
|
|
2597 |
prefix = key[:-1] |
|
2598 |
try: |
|
2599 |
suffix_parents = self._kndx_cache[prefix][0][key[-1]][4] |
|
2600 |
except KeyError: |
|
2601 |
missing_keys.add(key) |
|
2602 |
else: |
|
2603 |
parent_keys = tuple([prefix + (suffix,) |
|
2604 |
for suffix in suffix_parents]) |
|
2605 |
parent_map[key] = parent_keys |
|
2606 |
pending_keys.extend([p for p in parent_keys |
|
2607 |
if p not in parent_map]) |
|
2608 |
return parent_map, missing_keys |
|
2609 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2610 |
def get_parent_map(self, keys): |
2611 |
"""Get a map of the parents of keys.
|
|
2612 |
||
2613 |
:param keys: The keys to look up parents for.
|
|
2614 |
:return: A mapping from keys to parents. Absent keys are absent from
|
|
2615 |
the mapping.
|
|
2616 |
"""
|
|
2617 |
# Parse what we need to up front, this potentially trades off I/O
|
|
2618 |
# locality (.kndx and .knit in the same block group for the same file
|
|
2619 |
# id) for less checking in inner loops.
|
|
3350.6.10
by Martin Pool
VersionedFiles review cleanups |
2620 |
prefixes = set(key[:-1] for key in keys) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2621 |
self._load_prefixes(prefixes) |
2622 |
result = {} |
|
2623 |
for key in keys: |
|
2624 |
prefix = key[:-1] |
|
2625 |
try: |
|
2626 |
suffix_parents = self._kndx_cache[prefix][0][key[-1]][4] |
|
2627 |
except KeyError: |
|
2628 |
pass
|
|
2629 |
else: |
|
2630 |
result[key] = tuple(prefix + (suffix,) for |
|
2631 |
suffix in suffix_parents) |
|
2632 |
return result |
|
2633 |
||
2634 |
def get_position(self, key): |
|
2635 |
"""Return details needed to access the version.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2636 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2637 |
:return: a tuple (key, data position, size) to hand to the access
|
2638 |
logic to get the record.
|
|
2639 |
"""
|
|
2640 |
prefix, suffix = self._split_key(key) |
|
2641 |
self._load_prefixes([prefix]) |
|
2642 |
entry = self._kndx_cache[prefix][0][suffix] |
|
2643 |
return key, entry[2], entry[3] |
|
2644 |
||
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
2645 |
has_key = _mod_index._has_key_from_parent_map |
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2646 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2647 |
def _init_index(self, path, extra_lines=[]): |
2648 |
"""Initialize an index."""
|
|
2649 |
sio = StringIO() |
|
2650 |
sio.write(self.HEADER) |
|
2651 |
sio.writelines(extra_lines) |
|
2652 |
sio.seek(0) |
|
2653 |
self._transport.put_file_non_atomic(path, sio, |
|
2654 |
create_parent_dir=True) |
|
2655 |
# self._create_parent_dir)
|
|
2656 |
# mode=self._file_mode,
|
|
2657 |
# dir_mode=self._dir_mode)
|
|
2658 |
||
2659 |
def keys(self): |
|
2660 |
"""Get all the keys in the collection.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2661 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2662 |
The keys are not ordered.
|
2663 |
"""
|
|
2664 |
result = set() |
|
2665 |
# Identify all key prefixes.
|
|
2666 |
# XXX: A bit hacky, needs polish.
|
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
2667 |
if type(self._mapper) is ConstantMapper: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2668 |
prefixes = [()] |
2669 |
else: |
|
2670 |
relpaths = set() |
|
2671 |
for quoted_relpath in self._transport.iter_files_recursive(): |
|
2672 |
path, ext = os.path.splitext(quoted_relpath) |
|
2673 |
relpaths.add(path) |
|
2674 |
prefixes = [self._mapper.unmap(path) for path in relpaths] |
|
2675 |
self._load_prefixes(prefixes) |
|
2676 |
for prefix in prefixes: |
|
2677 |
for suffix in self._kndx_cache[prefix][1]: |
|
2678 |
result.add(prefix + (suffix,)) |
|
2679 |
return result |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2680 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2681 |
def _load_prefixes(self, prefixes): |
2682 |
"""Load the indices for prefixes."""
|
|
2683 |
self._check_read() |
|
2684 |
for prefix in prefixes: |
|
2685 |
if prefix not in self._kndx_cache: |
|
2686 |
# the load_data interface writes to these variables.
|
|
2687 |
self._cache = {} |
|
2688 |
self._history = [] |
|
2689 |
self._filename = prefix |
|
2690 |
try: |
|
2691 |
path = self._mapper.map(prefix) + '.kndx' |
|
2692 |
fp = self._transport.get(path) |
|
2693 |
try: |
|
2694 |
# _load_data may raise NoSuchFile if the target knit is
|
|
2695 |
# completely empty.
|
|
2696 |
_load_data(self, fp) |
|
2697 |
finally: |
|
2698 |
fp.close() |
|
2699 |
self._kndx_cache[prefix] = (self._cache, self._history) |
|
2700 |
del self._cache |
|
2701 |
del self._filename |
|
2702 |
del self._history |
|
2703 |
except NoSuchFile: |
|
2704 |
self._kndx_cache[prefix] = ({}, []) |
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
2705 |
if type(self._mapper) is ConstantMapper: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2706 |
# preserve behaviour for revisions.kndx etc.
|
2707 |
self._init_index(path) |
|
2708 |
del self._cache |
|
2709 |
del self._filename |
|
2710 |
del self._history |
|
2711 |
||
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
2712 |
missing_keys = _mod_index._missing_keys_from_parent_map |
2713 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2714 |
def _partition_keys(self, keys): |
2715 |
"""Turn keys into a dict of prefix:suffix_list."""
|
|
2716 |
result = {} |
|
2717 |
for key in keys: |
|
2718 |
prefix_keys = result.setdefault(key[:-1], []) |
|
2719 |
prefix_keys.append(key[-1]) |
|
2720 |
return result |
|
2721 |
||
2722 |
def _dictionary_compress(self, keys): |
|
2723 |
"""Dictionary compress keys.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2724 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2725 |
:param keys: The keys to generate references to.
|
2726 |
:return: A string representation of keys. keys which are present are
|
|
2727 |
dictionary compressed, and others are emitted as fulltext with a
|
|
2728 |
'.' prefix.
|
|
2729 |
"""
|
|
2730 |
if not keys: |
|
2731 |
return '' |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
2732 |
result_list = [] |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2733 |
prefix = keys[0][:-1] |
2734 |
cache = self._kndx_cache[prefix][0] |
|
2735 |
for key in keys: |
|
2736 |
if key[:-1] != prefix: |
|
2737 |
# kndx indices cannot refer across partitioned storage.
|
|
2738 |
raise ValueError("mismatched prefixes for %r" % keys) |
|
2739 |
if key[-1] in cache: |
|
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
2740 |
# -- inlined lookup() --
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2741 |
result_list.append(str(cache[key[-1]][5])) |
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
2742 |
# -- end lookup () --
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
2743 |
else: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2744 |
result_list.append('.' + key[-1]) |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
2745 |
return ' '.join(result_list) |
2746 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2747 |
def _reset_cache(self): |
2748 |
# Possibly this should be a LRU cache. A dictionary from key_prefix to
|
|
2749 |
# (cache_dict, history_vector) for parsed kndx files.
|
|
2750 |
self._kndx_cache = {} |
|
2751 |
self._scope = self._get_scope() |
|
2752 |
allow_writes = self._allow_writes() |
|
2753 |
if allow_writes: |
|
2754 |
self._mode = 'w' |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2755 |
else: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2756 |
self._mode = 'r' |
2757 |
||
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
2758 |
def _sort_keys_by_io(self, keys, positions): |
2759 |
"""Figure out an optimal order to read the records for the given keys.
|
|
2760 |
||
2761 |
Sort keys, grouped by index and sorted by position.
|
|
2762 |
||
2763 |
:param keys: A list of keys whose records we want to read. This will be
|
|
2764 |
sorted 'in-place'.
|
|
2765 |
:param positions: A dict, such as the one returned by
|
|
2766 |
_get_components_positions()
|
|
2767 |
:return: None
|
|
2768 |
"""
|
|
3878.1.3
by John Arbash Meinel
Add a comment about what data we are sorting by. |
2769 |
def get_sort_key(key): |
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
2770 |
index_memo = positions[key][1] |
2771 |
# Group by prefix and position. index_memo[0] is the key, so it is
|
|
2772 |
# (file_id, revision_id) and we don't want to sort on revision_id,
|
|
2773 |
# index_memo[1] is the position, and index_memo[2] is the size,
|
|
2774 |
# which doesn't matter for the sort
|
|
2775 |
return index_memo[0][:-1], index_memo[1] |
|
3878.1.3
by John Arbash Meinel
Add a comment about what data we are sorting by. |
2776 |
return keys.sort(key=get_sort_key) |
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
2777 |
|
4039.3.5
by John Arbash Meinel
Add direct tests for _get_total_build_size. |
2778 |
_get_total_build_size = _get_total_build_size |
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
2779 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2780 |
def _split_key(self, key): |
2781 |
"""Split key into a prefix and suffix."""
|
|
2782 |
return key[:-1], key[-1] |
|
2783 |
||
2784 |
||
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2785 |
class _KeyRefs(object): |
2786 |
||
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2787 |
def __init__(self, track_new_keys=False): |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2788 |
# dict mapping 'key' to 'set of keys referring to that key'
|
2789 |
self.refs = {} |
|
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2790 |
if track_new_keys: |
4634.29.16
by Andrew Bennetts
Fix buggy TestKeyDependencies test, tweak error string and comment. |
2791 |
# set remembering all new keys
|
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2792 |
self.new_keys = set() |
2793 |
else: |
|
2794 |
self.new_keys = None |
|
2795 |
||
2796 |
def clear(self): |
|
2797 |
if self.refs: |
|
2798 |
self.refs.clear() |
|
2799 |
if self.new_keys: |
|
2800 |
self.new_keys.clear() |
|
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2801 |
|
2802 |
def add_references(self, key, refs): |
|
2803 |
# Record the new references
|
|
2804 |
for referenced in refs: |
|
2805 |
try: |
|
2806 |
needed_by = self.refs[referenced] |
|
2807 |
except KeyError: |
|
2808 |
needed_by = self.refs[referenced] = set() |
|
2809 |
needed_by.add(key) |
|
2810 |
# Discard references satisfied by the new key
|
|
2811 |
self.add_key(key) |
|
2812 |
||
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2813 |
def get_new_keys(self): |
2814 |
return self.new_keys |
|
2815 |
||
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2816 |
def get_unsatisfied_refs(self): |
2817 |
return self.refs.iterkeys() |
|
2818 |
||
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2819 |
def _satisfy_refs_for_key(self, key): |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2820 |
try: |
2821 |
del self.refs[key] |
|
2822 |
except KeyError: |
|
2823 |
# No keys depended on this key. That's ok.
|
|
2824 |
pass
|
|
2825 |
||
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2826 |
def add_key(self, key): |
2827 |
# satisfy refs for key, and remember that we've seen this key.
|
|
2828 |
self._satisfy_refs_for_key(key) |
|
2829 |
if self.new_keys is not None: |
|
2830 |
self.new_keys.add(key) |
|
2831 |
||
2832 |
def satisfy_refs_for_keys(self, keys): |
|
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2833 |
for key in keys: |
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
2834 |
self._satisfy_refs_for_key(key) |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2835 |
|
4309.1.2
by Andrew Bennetts
Tentative fix for bug 368418: only fail the missing parent inventories check if there are missing texts that appear to be altered by the inventories with missing parents. |
2836 |
def get_referrers(self): |
2837 |
result = set() |
|
2838 |
for referrers in self.refs.itervalues(): |
|
2839 |
result.update(referrers) |
|
2840 |
return result |
|
2841 |
||
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2842 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2843 |
class _KnitGraphIndex(object): |
2844 |
"""A KnitVersionedFiles index layered on GraphIndex."""
|
|
2845 |
||
2846 |
def __init__(self, graph_index, is_locked, deltas=False, parents=True, |
|
4634.29.3
by Andrew Bennetts
Simplify further. |
2847 |
add_callback=None, track_external_parent_refs=False): |
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
2848 |
"""Construct a KnitGraphIndex on a graph_index.
|
2849 |
||
2850 |
:param graph_index: An implementation of bzrlib.index.GraphIndex.
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2851 |
:param is_locked: A callback to check whether the object should answer
|
2852 |
queries.
|
|
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
2853 |
:param deltas: Allow delta-compressed records.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2854 |
:param parents: If True, record knits parents, if not do not record
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2855 |
parents.
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
2856 |
:param add_callback: If not None, allow additions to the index and call
|
2857 |
this callback with a list of added GraphIndex nodes:
|
|
2592.3.33
by Robert Collins
Change the order of index refs and values to make the no-graph knit index easier. |
2858 |
[(node, value, node_refs), ...]
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2859 |
:param is_locked: A callback, returns True if the index is locked and
|
2860 |
thus usable.
|
|
4257.4.11
by Andrew Bennetts
Polish the patch. |
2861 |
:param track_external_parent_refs: If True, record all external parent
|
2862 |
references parents from added records. These can be retrieved
|
|
2863 |
later by calling get_missing_parents().
|
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
2864 |
"""
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2865 |
self._add_callback = add_callback |
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
2866 |
self._graph_index = graph_index |
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
2867 |
self._deltas = deltas |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2868 |
self._parents = parents |
2869 |
if deltas and not parents: |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2870 |
# XXX: TODO: Delta tree and parent graph should be conceptually
|
2871 |
# separate.
|
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2872 |
raise KnitCorrupt(self, "Cannot do delta compression without " |
2873 |
"parent tracking.") |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2874 |
self.has_graph = parents |
2875 |
self._is_locked = is_locked |
|
4011.5.1
by Andrew Bennetts
Start to add _add_unvalidated_index/get_missing_compression_parents methods to _KnitGraphIndex. |
2876 |
self._missing_compression_parents = set() |
4257.4.11
by Andrew Bennetts
Polish the patch. |
2877 |
if track_external_parent_refs: |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2878 |
self._key_dependencies = _KeyRefs() |
4257.4.10
by Andrew Bennetts
Observe new revisions in _KnitGraphIndex.add_record rather than iterating all the uncommitted packs' indices. |
2879 |
else: |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2880 |
self._key_dependencies = None |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2881 |
|
3517.4.13
by Martin Pool
Add repr methods |
2882 |
def __repr__(self): |
2883 |
return "%s(%r)" % (self.__class__.__name__, self._graph_index) |
|
2884 |
||
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2885 |
def add_records(self, records, random_id=False, |
2886 |
missing_compression_parents=False): |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2887 |
"""Add multiple records to the index.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2888 |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2889 |
This function does not insert data into the Immutable GraphIndex
|
2890 |
backing the KnitGraphIndex, instead it prepares data for insertion by
|
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
2891 |
the caller and checks that it is safe to insert then calls
|
2892 |
self._add_callback with the prepared GraphIndex nodes.
|
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2893 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2894 |
:param records: a list of tuples:
|
2895 |
(key, options, access_memo, parents).
|
|
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
2896 |
:param random_id: If True the ids being added were randomly generated
|
2897 |
and no check for existence will be performed.
|
|
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2898 |
:param missing_compression_parents: If True the records being added are
|
2899 |
only compressed against texts already in the index (or inside
|
|
2900 |
records). If False the records all refer to unavailable texts (or
|
|
2901 |
texts inside records) as compression parents.
|
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2902 |
"""
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
2903 |
if not self._add_callback: |
2904 |
raise errors.ReadOnlyError(self) |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2905 |
# we hope there are no repositories with inconsistent parentage
|
2906 |
# anymore.
|
|
2907 |
||
2908 |
keys = {} |
|
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2909 |
compression_parents = set() |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2910 |
key_dependencies = self._key_dependencies |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2911 |
for (key, options, access_memo, parents) in records: |
2912 |
if self._parents: |
|
2913 |
parents = tuple(parents) |
|
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2914 |
if key_dependencies is not None: |
2915 |
key_dependencies.add_references(key, parents) |
|
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
2916 |
index, pos, size = access_memo |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2917 |
if 'no-eol' in options: |
2918 |
value = 'N' |
|
2919 |
else: |
|
2920 |
value = ' ' |
|
2921 |
value += "%d %d" % (pos, size) |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2922 |
if not self._deltas: |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2923 |
if 'line-delta' in options: |
2924 |
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit") |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2925 |
if self._parents: |
2926 |
if self._deltas: |
|
2927 |
if 'line-delta' in options: |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
2928 |
node_refs = (parents, (parents[0],)) |
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2929 |
if missing_compression_parents: |
2930 |
compression_parents.add(parents[0]) |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2931 |
else: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
2932 |
node_refs = (parents, ()) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2933 |
else: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
2934 |
node_refs = (parents, ) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2935 |
else: |
2936 |
if parents: |
|
2937 |
raise KnitCorrupt(self, "attempt to add node with parents " |
|
2938 |
"in parentless index.") |
|
2939 |
node_refs = () |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
2940 |
keys[key] = (value, node_refs) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2941 |
# check for dups
|
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
2942 |
if not random_id: |
2943 |
present_nodes = self._get_entries(keys) |
|
2944 |
for (index, key, value, node_refs) in present_nodes: |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2945 |
if (value[0] != keys[key][0][0] or |
3946.2.2
by Jelmer Vernooij
Remove matching test, fix handling of parentless indexes. |
2946 |
node_refs[:1] != keys[key][1][:1]): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
2947 |
raise KnitCorrupt(self, "inconsistent details in add_records" |
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
2948 |
": %s %s" % ((value, node_refs), keys[key])) |
2949 |
del keys[key] |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
2950 |
result = [] |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
2951 |
if self._parents: |
2952 |
for key, (value, node_refs) in keys.iteritems(): |
|
2953 |
result.append((key, value, node_refs)) |
|
2954 |
else: |
|
2955 |
for key, (value, node_refs) in keys.iteritems(): |
|
2956 |
result.append((key, value)) |
|
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2957 |
self._add_callback(result) |
2958 |
if missing_compression_parents: |
|
2959 |
# This may appear to be incorrect (it does not check for
|
|
2960 |
# compression parents that are in the existing graph index),
|
|
2961 |
# but such records won't have been buffered, so this is
|
|
2962 |
# actually correct: every entry when
|
|
2963 |
# missing_compression_parents==True either has a missing parent, or
|
|
2964 |
# a parent that is one of the keys in records.
|
|
2965 |
compression_parents.difference_update(keys) |
|
2966 |
self._missing_compression_parents.update(compression_parents) |
|
2967 |
# Adding records may have satisfied missing compression parents.
|
|
4009.3.7
by Andrew Bennetts
Most tests passing. |
2968 |
self._missing_compression_parents.difference_update(keys) |
4032.1.1
by John Arbash Meinel
Merge the removal of all trailing whitespace, and resolve conflicts. |
2969 |
|
4011.5.7
by Andrew Bennetts
Remove leading underscore from _scan_unvalidate_index, explicitly NotImplementedError it for _KndxIndex. |
2970 |
def scan_unvalidated_index(self, graph_index): |
4011.5.1
by Andrew Bennetts
Start to add _add_unvalidated_index/get_missing_compression_parents methods to _KnitGraphIndex. |
2971 |
"""Inform this _KnitGraphIndex that there is an unvalidated index.
|
2972 |
||
2973 |
This allows this _KnitGraphIndex to keep track of any missing
|
|
2974 |
compression parents we may want to have filled in to make those
|
|
2975 |
indices valid.
|
|
2976 |
||
2977 |
:param graph_index: A GraphIndex
|
|
2978 |
"""
|
|
4011.5.11
by Robert Collins
Polish the KnitVersionedFiles.scan_unvalidated_index api. |
2979 |
if self._deltas: |
2980 |
new_missing = graph_index.external_references(ref_list_num=1) |
|
2981 |
new_missing.difference_update(self.get_parent_map(new_missing)) |
|
2982 |
self._missing_compression_parents.update(new_missing) |
|
4634.29.3
by Andrew Bennetts
Simplify further. |
2983 |
if self._key_dependencies is not None: |
2984 |
# Add parent refs from graph_index (and discard parent refs that
|
|
2985 |
# the graph_index has).
|
|
2986 |
for node in graph_index.iter_all_entries(): |
|
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
2987 |
self._key_dependencies.add_references(node[1], node[3][0]) |
4009.3.7
by Andrew Bennetts
Most tests passing. |
2988 |
|
4011.5.1
by Andrew Bennetts
Start to add _add_unvalidated_index/get_missing_compression_parents methods to _KnitGraphIndex. |
2989 |
def get_missing_compression_parents(self): |
4009.3.12
by Robert Collins
Polish on inserting record streams with missing compression parents. |
2990 |
"""Return the keys of missing compression parents.
|
2991 |
||
2992 |
Missing compression parents occur when a record stream was missing
|
|
2993 |
basis texts, or a index was scanned that had missing basis texts.
|
|
4011.5.1
by Andrew Bennetts
Start to add _add_unvalidated_index/get_missing_compression_parents methods to _KnitGraphIndex. |
2994 |
"""
|
2995 |
return frozenset(self._missing_compression_parents) |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
2996 |
|
4257.4.11
by Andrew Bennetts
Polish the patch. |
2997 |
def get_missing_parents(self): |
2998 |
"""Return the keys of missing parents."""
|
|
4343.3.21
by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs. |
2999 |
# If updating this, you should also update
|
3000 |
# groupcompress._GCGraphIndex.get_missing_parents
|
|
4257.4.11
by Andrew Bennetts
Polish the patch. |
3001 |
# We may have false positives, so filter those out.
|
4634.29.6
by Andrew Bennetts
Put new key tracking in _KeyRefs rather than alongside it. |
3002 |
self._key_dependencies.satisfy_refs_for_keys( |
4309.1.1
by Andrew Bennetts
Track which keys referenced the missing parents. |
3003 |
self.get_parent_map(self._key_dependencies.get_unsatisfied_refs())) |
3004 |
return frozenset(self._key_dependencies.get_unsatisfied_refs()) |
|
4257.4.11
by Andrew Bennetts
Polish the patch. |
3005 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3006 |
def _check_read(self): |
3007 |
"""raise if reads are not permitted."""
|
|
3008 |
if not self._is_locked(): |
|
3009 |
raise errors.ObjectNotLocked(self) |
|
3010 |
||
3011 |
def _check_write_ok(self): |
|
3012 |
"""Assert if writes are not permitted."""
|
|
3013 |
if not self._is_locked(): |
|
3014 |
raise errors.ObjectNotLocked(self) |
|
3015 |
||
3016 |
def _compression_parent(self, an_entry): |
|
3017 |
# return the key that an_entry is compressed against, or None
|
|
3018 |
# Grab the second parent list (as deltas implies parents currently)
|
|
3019 |
compression_parents = an_entry[3][1] |
|
3020 |
if not compression_parents: |
|
3021 |
return None |
|
3022 |
if len(compression_parents) != 1: |
|
3023 |
raise AssertionError( |
|
3024 |
"Too many compression parents: %r" % compression_parents) |
|
3025 |
return compression_parents[0] |
|
3026 |
||
3027 |
def get_build_details(self, keys): |
|
3028 |
"""Get the method, index_memo and compression parent for version_ids.
|
|
3029 |
||
3030 |
Ghosts are omitted from the result.
|
|
3031 |
||
3032 |
:param keys: An iterable of keys.
|
|
3033 |
:return: A dict of key:
|
|
3034 |
(index_memo, compression_parent, parents, record_details).
|
|
3035 |
index_memo
|
|
3036 |
opaque structure to pass to read_records to extract the raw
|
|
3037 |
data
|
|
3038 |
compression_parent
|
|
3039 |
Content that this record is built upon, may be None
|
|
3040 |
parents
|
|
3041 |
Logical parents of this node
|
|
3042 |
record_details
|
|
3043 |
extra information about the content which needs to be passed to
|
|
3044 |
Factory.parse_record
|
|
3045 |
"""
|
|
3046 |
self._check_read() |
|
3047 |
result = {} |
|
3048 |
entries = self._get_entries(keys, False) |
|
3049 |
for entry in entries: |
|
3050 |
key = entry[1] |
|
3051 |
if not self._parents: |
|
3052 |
parents = () |
|
3053 |
else: |
|
3054 |
parents = entry[3][0] |
|
3055 |
if not self._deltas: |
|
3056 |
compression_parent_key = None |
|
3057 |
else: |
|
3058 |
compression_parent_key = self._compression_parent(entry) |
|
3059 |
noeol = (entry[2][0] == 'N') |
|
3060 |
if compression_parent_key: |
|
3061 |
method = 'line-delta' |
|
3062 |
else: |
|
3063 |
method = 'fulltext' |
|
3064 |
result[key] = (self._node_to_position(entry), |
|
3065 |
compression_parent_key, parents, |
|
3066 |
(method, noeol)) |
|
3067 |
return result |
|
3068 |
||
3069 |
def _get_entries(self, keys, check_present=False): |
|
3070 |
"""Get the entries for keys.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
3071 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3072 |
:param keys: An iterable of index key tuples.
|
3073 |
"""
|
|
3074 |
keys = set(keys) |
|
3075 |
found_keys = set() |
|
3076 |
if self._parents: |
|
3077 |
for node in self._graph_index.iter_entries(keys): |
|
3078 |
yield node |
|
3079 |
found_keys.add(node[1]) |
|
3080 |
else: |
|
3081 |
# adapt parentless index to the rest of the code.
|
|
3082 |
for node in self._graph_index.iter_entries(keys): |
|
3083 |
yield node[0], node[1], node[2], () |
|
3084 |
found_keys.add(node[1]) |
|
3085 |
if check_present: |
|
3086 |
missing_keys = keys.difference(found_keys) |
|
3087 |
if missing_keys: |
|
3088 |
raise RevisionNotPresent(missing_keys.pop(), self) |
|
3089 |
||
3090 |
def get_method(self, key): |
|
3091 |
"""Return compression method of specified key."""
|
|
3092 |
return self._get_method(self._get_node(key)) |
|
3093 |
||
3094 |
def _get_method(self, node): |
|
3095 |
if not self._deltas: |
|
3096 |
return 'fulltext' |
|
3097 |
if self._compression_parent(node): |
|
3098 |
return 'line-delta' |
|
3099 |
else: |
|
3100 |
return 'fulltext' |
|
3101 |
||
3102 |
def _get_node(self, key): |
|
3103 |
try: |
|
3104 |
return list(self._get_entries([key]))[0] |
|
3105 |
except IndexError: |
|
3106 |
raise RevisionNotPresent(key, self) |
|
3107 |
||
3108 |
def get_options(self, key): |
|
3109 |
"""Return a list representing options.
|
|
3110 |
||
3111 |
e.g. ['foo', 'bar']
|
|
3112 |
"""
|
|
3113 |
node = self._get_node(key) |
|
3114 |
options = [self._get_method(node)] |
|
3115 |
if node[2][0] == 'N': |
|
3116 |
options.append('no-eol') |
|
3117 |
return options |
|
3118 |
||
4593.5.35
by John Arbash Meinel
Start working on a per-vf implementation test of find_ancestry. |
3119 |
def find_ancestry(self, keys): |
3120 |
"""See CombinedGraphIndex.find_ancestry()"""
|
|
3121 |
return self._graph_index.find_ancestry(keys, 0) |
|
3122 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3123 |
def get_parent_map(self, keys): |
3124 |
"""Get a map of the parents of keys.
|
|
3125 |
||
3126 |
:param keys: The keys to look up parents for.
|
|
3127 |
:return: A mapping from keys to parents. Absent keys are absent from
|
|
3128 |
the mapping.
|
|
3129 |
"""
|
|
3130 |
self._check_read() |
|
3131 |
nodes = self._get_entries(keys) |
|
3132 |
result = {} |
|
3133 |
if self._parents: |
|
3134 |
for node in nodes: |
|
3135 |
result[node[1]] = node[3][0] |
|
3136 |
else: |
|
3137 |
for node in nodes: |
|
3138 |
result[node[1]] = None |
|
3139 |
return result |
|
3140 |
||
3141 |
def get_position(self, key): |
|
3142 |
"""Return details needed to access the version.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
3143 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3144 |
:return: a tuple (index, data position, size) to hand to the access
|
3145 |
logic to get the record.
|
|
3146 |
"""
|
|
3147 |
node = self._get_node(key) |
|
3148 |
return self._node_to_position(node) |
|
3149 |
||
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
3150 |
has_key = _mod_index._has_key_from_parent_map |
3830.3.9
by Martin Pool
Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests |
3151 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3152 |
def keys(self): |
3153 |
"""Get all the keys in the collection.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
3154 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3155 |
The keys are not ordered.
|
3156 |
"""
|
|
3157 |
self._check_read() |
|
3158 |
return [node[1] for node in self._graph_index.iter_all_entries()] |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
3159 |
|
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
3160 |
missing_keys = _mod_index._missing_keys_from_parent_map |
3161 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3162 |
def _node_to_position(self, node): |
3163 |
"""Convert an index value to position details."""
|
|
3164 |
bits = node[2][1:].split(' ') |
|
3165 |
return node[0], int(bits[0]), int(bits[1]) |
|
3166 |
||
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
3167 |
def _sort_keys_by_io(self, keys, positions): |
3168 |
"""Figure out an optimal order to read the records for the given keys.
|
|
3169 |
||
3170 |
Sort keys, grouped by index and sorted by position.
|
|
3171 |
||
3172 |
:param keys: A list of keys whose records we want to read. This will be
|
|
3173 |
sorted 'in-place'.
|
|
3174 |
:param positions: A dict, such as the one returned by
|
|
3175 |
_get_components_positions()
|
|
3176 |
:return: None
|
|
3177 |
"""
|
|
3178 |
def get_index_memo(key): |
|
3878.1.3
by John Arbash Meinel
Add a comment about what data we are sorting by. |
3179 |
# index_memo is at offset [1]. It is made up of (GraphIndex,
|
3180 |
# position, size). GI is an object, which will be unique for each
|
|
3181 |
# pack file. This causes us to group by pack file, then sort by
|
|
3182 |
# position. Size doesn't matter, but it isn't worth breaking up the
|
|
3183 |
# tuple.
|
|
3878.1.2
by John Arbash Meinel
Move the sorting into each index, and customize it for Kndx access. |
3184 |
return positions[key][1] |
3185 |
return keys.sort(key=get_index_memo) |
|
3186 |
||
4039.3.5
by John Arbash Meinel
Add direct tests for _get_total_build_size. |
3187 |
_get_total_build_size = _get_total_build_size |
4039.3.2
by John Arbash Meinel
Batch get_record_stream(fulltexts) into 5MB requests. |
3188 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3189 |
|
3190 |
class _KnitKeyAccess(object): |
|
3191 |
"""Access to records in .knit files."""
|
|
3192 |
||
3193 |
def __init__(self, transport, mapper): |
|
3194 |
"""Create a _KnitKeyAccess with transport and mapper.
|
|
3195 |
||
3196 |
:param transport: The transport the access object is rooted at.
|
|
3197 |
:param mapper: The mapper used to map keys to .knit files.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3198 |
"""
|
3199 |
self._transport = transport |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3200 |
self._mapper = mapper |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3201 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3202 |
def add_raw_records(self, key_sizes, raw_data): |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3203 |
"""Add raw knit bytes to a storage area.
|
3204 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3205 |
The data is spooled to the container writer in one bytes-record per
|
3206 |
raw data item.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3207 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3208 |
:param sizes: An iterable of tuples containing the key and size of each
|
3209 |
raw data segment.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3210 |
:param raw_data: A bytestring containing the data.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3211 |
:return: A list of memos to retrieve the record later. Each memo is an
|
3212 |
opaque index memo. For _KnitKeyAccess the memo is (key, pos,
|
|
3213 |
length), where the key is the record key.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3214 |
"""
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
3215 |
if type(raw_data) is not str: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3216 |
raise AssertionError( |
3217 |
'data must be plain bytes was %s' % type(raw_data)) |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3218 |
result = [] |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3219 |
offset = 0 |
3220 |
# TODO: This can be tuned for writing to sftp and other servers where
|
|
3221 |
# append() is relatively expensive by grouping the writes to each key
|
|
3222 |
# prefix.
|
|
3223 |
for key, size in key_sizes: |
|
3224 |
path = self._mapper.map(key) |
|
3225 |
try: |
|
3226 |
base = self._transport.append_bytes(path + '.knit', |
|
3227 |
raw_data[offset:offset+size]) |
|
3228 |
except errors.NoSuchFile: |
|
3229 |
self._transport.mkdir(osutils.dirname(path)) |
|
3230 |
base = self._transport.append_bytes(path + '.knit', |
|
3231 |
raw_data[offset:offset+size]) |
|
3232 |
# if base == 0:
|
|
3233 |
# chmod.
|
|
3234 |
offset += size |
|
3235 |
result.append((key, base, size)) |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3236 |
return result |
3237 |
||
4187.3.3
by Andrew Bennetts
In KnitVersionedFiles.insert_record_stream, flush the access object before expanding a delta into a fulltext. |
3238 |
def flush(self): |
4187.3.4
by Andrew Bennetts
Better docstrings and comments. |
3239 |
"""Flush pending writes on this access object.
|
3240 |
|
|
3241 |
For .knit files this is a no-op.
|
|
3242 |
"""
|
|
4187.3.3
by Andrew Bennetts
In KnitVersionedFiles.insert_record_stream, flush the access object before expanding a delta into a fulltext. |
3243 |
pass
|
3244 |
||
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3245 |
def get_raw_records(self, memos_for_retrieval): |
3246 |
"""Get the raw bytes for a records.
|
|
3247 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3248 |
:param memos_for_retrieval: An iterable containing the access memo for
|
3249 |
retrieving the bytes.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3250 |
:return: An iterator over the bytes of the records.
|
3251 |
"""
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3252 |
# first pass, group into same-index request to minimise readv's issued.
|
3253 |
request_lists = [] |
|
3254 |
current_prefix = None |
|
3255 |
for (key, offset, length) in memos_for_retrieval: |
|
3256 |
if current_prefix == key[:-1]: |
|
3257 |
current_list.append((offset, length)) |
|
3258 |
else: |
|
3259 |
if current_prefix is not None: |
|
3260 |
request_lists.append((current_prefix, current_list)) |
|
3261 |
current_prefix = key[:-1] |
|
3262 |
current_list = [(offset, length)] |
|
3263 |
# handle the last entry
|
|
3264 |
if current_prefix is not None: |
|
3265 |
request_lists.append((current_prefix, current_list)) |
|
3266 |
for prefix, read_vector in request_lists: |
|
3267 |
path = self._mapper.map(prefix) + '.knit' |
|
3268 |
for pos, data in self._transport.readv(path, read_vector): |
|
3269 |
yield data |
|
3270 |
||
3271 |
||
3272 |
class _DirectPackAccess(object): |
|
3273 |
"""Access to data in one or more packs with less translation."""
|
|
3274 |
||
4187.3.3
by Andrew Bennetts
In KnitVersionedFiles.insert_record_stream, flush the access object before expanding a delta into a fulltext. |
3275 |
def __init__(self, index_to_packs, reload_func=None, flush_func=None): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3276 |
"""Create a _DirectPackAccess object.
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3277 |
|
3278 |
:param index_to_packs: A dict mapping index objects to the transport
|
|
3279 |
and file names for obtaining data.
|
|
3789.2.5
by John Arbash Meinel
Change _DirectPackAccess to only raise Retry when _reload_func is defined. |
3280 |
:param reload_func: A function to call if we determine that the pack
|
3281 |
files have moved and we need to reload our caches. See
|
|
3282 |
bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3283 |
"""
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3284 |
self._container_writer = None |
3285 |
self._write_index = None |
|
3286 |
self._indices = index_to_packs |
|
3789.2.5
by John Arbash Meinel
Change _DirectPackAccess to only raise Retry when _reload_func is defined. |
3287 |
self._reload_func = reload_func |
4187.3.3
by Andrew Bennetts
In KnitVersionedFiles.insert_record_stream, flush the access object before expanding a delta into a fulltext. |
3288 |
self._flush_func = flush_func |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3289 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3290 |
def add_raw_records(self, key_sizes, raw_data): |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3291 |
"""Add raw knit bytes to a storage area.
|
3292 |
||
2670.2.3
by Robert Collins
Review feedback. |
3293 |
The data is spooled to the container writer in one bytes-record per
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3294 |
raw data item.
|
3295 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3296 |
:param sizes: An iterable of tuples containing the key and size of each
|
3297 |
raw data segment.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3298 |
:param raw_data: A bytestring containing the data.
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3299 |
:return: A list of memos to retrieve the record later. Each memo is an
|
3300 |
opaque index memo. For _DirectPackAccess the memo is (index, pos,
|
|
3301 |
length), where the index field is the write_index object supplied
|
|
3302 |
to the PackAccess object.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3303 |
"""
|
4398.8.8
by John Arbash Meinel
Respond to Andrew's review comments. |
3304 |
if type(raw_data) is not str: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3305 |
raise AssertionError( |
3306 |
'data must be plain bytes was %s' % type(raw_data)) |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3307 |
result = [] |
3308 |
offset = 0 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3309 |
for key, size in key_sizes: |
3310 |
p_offset, p_length = self._container_writer.add_bytes_record( |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3311 |
raw_data[offset:offset+size], []) |
3312 |
offset += size |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3313 |
result.append((self._write_index, p_offset, p_length)) |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3314 |
return result |
3315 |
||
4187.3.3
by Andrew Bennetts
In KnitVersionedFiles.insert_record_stream, flush the access object before expanding a delta into a fulltext. |
3316 |
def flush(self): |
4187.3.4
by Andrew Bennetts
Better docstrings and comments. |
3317 |
"""Flush pending writes on this access object.
|
3318 |
||
3319 |
This will flush any buffered writes to a NewPack.
|
|
3320 |
"""
|
|
4187.3.3
by Andrew Bennetts
In KnitVersionedFiles.insert_record_stream, flush the access object before expanding a delta into a fulltext. |
3321 |
if self._flush_func is not None: |
3322 |
self._flush_func() |
|
3323 |
||
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3324 |
def get_raw_records(self, memos_for_retrieval): |
3325 |
"""Get the raw bytes for a records.
|
|
3326 |
||
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
3327 |
:param memos_for_retrieval: An iterable containing the (index, pos,
|
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
3328 |
length) memo for retrieving the bytes. The Pack access method
|
3329 |
looks up the pack to use for a given record in its index_to_pack
|
|
3330 |
map.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3331 |
:return: An iterator over the bytes of the records.
|
3332 |
"""
|
|
3333 |
# first pass, group into same-index requests
|
|
3334 |
request_lists = [] |
|
3335 |
current_index = None |
|
3336 |
for (index, offset, length) in memos_for_retrieval: |
|
3337 |
if current_index == index: |
|
3338 |
current_list.append((offset, length)) |
|
3339 |
else: |
|
3340 |
if current_index is not None: |
|
3341 |
request_lists.append((current_index, current_list)) |
|
3342 |
current_index = index |
|
3343 |
current_list = [(offset, length)] |
|
3344 |
# handle the last entry
|
|
3345 |
if current_index is not None: |
|
3346 |
request_lists.append((current_index, current_list)) |
|
3347 |
for index, offsets in request_lists: |
|
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
3348 |
try: |
3349 |
transport, path = self._indices[index] |
|
3350 |
except KeyError: |
|
3351 |
# A KeyError here indicates that someone has triggered an index
|
|
3352 |
# reload, and this index has gone missing, we need to start
|
|
3353 |
# over.
|
|
3789.2.5
by John Arbash Meinel
Change _DirectPackAccess to only raise Retry when _reload_func is defined. |
3354 |
if self._reload_func is None: |
3355 |
# If we don't have a _reload_func there is nothing that can
|
|
3356 |
# be done
|
|
3357 |
raise
|
|
3789.2.28
by John Arbash Meinel
We don't actually have a transport yet, so we can't use it as context. |
3358 |
raise errors.RetryWithNewPacks(index, |
3789.2.27
by John Arbash Meinel
Add some context information to the Retry exceptions. |
3359 |
reload_occurred=True, |
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
3360 |
exc_info=sys.exc_info()) |
3361 |
try: |
|
3362 |
reader = pack.make_readv_reader(transport, path, offsets) |
|
3363 |
for names, read_func in reader.iter_records(): |
|
3364 |
yield read_func(None) |
|
3365 |
except errors.NoSuchFile: |
|
3366 |
# A NoSuchFile error indicates that a pack file has gone
|
|
3367 |
# missing on disk, we need to trigger a reload, and start over.
|
|
3789.2.5
by John Arbash Meinel
Change _DirectPackAccess to only raise Retry when _reload_func is defined. |
3368 |
if self._reload_func is None: |
3369 |
raise
|
|
3789.2.27
by John Arbash Meinel
Add some context information to the Retry exceptions. |
3370 |
raise errors.RetryWithNewPacks(transport.abspath(path), |
3371 |
reload_occurred=False, |
|
3789.2.1
by John Arbash Meinel
_DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. |
3372 |
exc_info=sys.exc_info()) |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
3373 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3374 |
def set_writer(self, writer, index, transport_packname): |
2592.3.70
by Robert Collins
Allow setting a writer after creating a knit._PackAccess object. |
3375 |
"""Set a writer to use for adding data."""
|
2592.3.208
by Robert Collins
Start refactoring the knit-pack thunking to be clearer. |
3376 |
if index is not None: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3377 |
self._indices[index] = transport_packname |
3378 |
self._container_writer = writer |
|
3379 |
self._write_index = index |
|
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
3380 |
|
3789.2.5
by John Arbash Meinel
Change _DirectPackAccess to only raise Retry when _reload_func is defined. |
3381 |
def reload_or_raise(self, retry_exc): |
3382 |
"""Try calling the reload function, or re-raise the original exception.
|
|
3383 |
||
3384 |
This should be called after _DirectPackAccess raises a
|
|
3385 |
RetryWithNewPacks exception. This function will handle the common logic
|
|
3386 |
of determining when the error is fatal versus being temporary.
|
|
3387 |
It will also make sure that the original exception is raised, rather
|
|
3388 |
than the RetryWithNewPacks exception.
|
|
3389 |
||
3390 |
If this function returns, then the calling function should retry
|
|
3391 |
whatever operation was being performed. Otherwise an exception will
|
|
3392 |
be raised.
|
|
3393 |
||
3394 |
:param retry_exc: A RetryWithNewPacks exception.
|
|
3395 |
"""
|
|
3789.2.6
by John Arbash Meinel
Make _DirectPackAccess.reload_or_raise maintain the logic. |
3396 |
is_error = False |
3397 |
if self._reload_func is None: |
|
3398 |
is_error = True |
|
3399 |
elif not self._reload_func(): |
|
3400 |
# The reload claimed that nothing changed
|
|
3401 |
if not retry_exc.reload_occurred: |
|
3402 |
# If there wasn't an earlier reload, then we really were
|
|
3403 |
# expecting to find changes. We didn't find them, so this is a
|
|
3404 |
# hard error
|
|
3405 |
is_error = True |
|
3406 |
if is_error: |
|
3407 |
exc_class, exc_value, exc_traceback = retry_exc.exc_info |
|
3408 |
raise exc_class, exc_value, exc_traceback |
|
3789.2.5
by John Arbash Meinel
Change _DirectPackAccess to only raise Retry when _reload_func is defined. |
3409 |
|
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
3410 |
|
2781.1.1
by Martin Pool
merge cpatiencediff from Lukas |
3411 |
# Deprecated, use PatienceSequenceMatcher instead
|
3412 |
KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher |
|
2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
3413 |
|
3414 |
||
2770.1.2
by Aaron Bentley
Convert to knit-only annotation |
3415 |
def annotate_knit(knit, revision_id): |
3416 |
"""Annotate a knit with no cached annotations.
|
|
3417 |
||
3418 |
This implementation is for knits with no cached annotations.
|
|
3419 |
It will work for knits with cached annotations, but this is not
|
|
3420 |
recommended.
|
|
3421 |
"""
|
|
3224.1.7
by John Arbash Meinel
_StreamIndex also needs to return the proper values for get_build_details. |
3422 |
annotator = _KnitAnnotator(knit) |
4454.3.26
by John Arbash Meinel
The new _KnitAnnotator based on Annotator seems to pass the test suite. |
3423 |
return iter(annotator.annotate_flat(revision_id)) |
3224.1.7
by John Arbash Meinel
_StreamIndex also needs to return the proper values for get_build_details. |
3424 |
|
3425 |
||
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3426 |
class _KnitAnnotator(annotate.Annotator): |
3224.1.5
by John Arbash Meinel
Start using a helper class for doing the knit-pack annotations. |
3427 |
"""Build up the annotations for a text."""
|
3428 |
||
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3429 |
def __init__(self, vf): |
3430 |
annotate.Annotator.__init__(self, vf) |
|
3431 |
||
3432 |
# TODO: handle Nodes which cannot be extracted
|
|
3433 |
# self._ghosts = set()
|
|
3434 |
||
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
3435 |
# Map from (key, parent_key) => matching_blocks, should be 'use once'
|
3436 |
self._matching_blocks = {} |
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3437 |
|
3438 |
# KnitContent objects
|
|
3439 |
self._content_objects = {} |
|
3440 |
# The number of children that depend on this fulltext content object
|
|
3441 |
self._num_compression_children = {} |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3442 |
# Delta records that need their compression parent before they can be
|
3443 |
# expanded
|
|
3444 |
self._pending_deltas = {} |
|
4454.3.30
by John Arbash Meinel
add a bit more work to be able to process 'pending_annotations'. |
3445 |
# Fulltext records that are waiting for their parents fulltexts before
|
3446 |
# they can be yielded for annotation
|
|
3447 |
self._pending_annotation = {} |
|
3224.1.19
by John Arbash Meinel
Work on removing nodes from the working set once they aren't needed. |
3448 |
|
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3449 |
self._all_build_details = {} |
3450 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3451 |
def _get_build_graph(self, key): |
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3452 |
"""Get the graphs for building texts and annotations.
|
3453 |
||
3454 |
The data you need for creating a full text may be different than the
|
|
3455 |
data you need to annotate that text. (At a minimum, you need both
|
|
3456 |
parents to create an annotation, but only need 1 parent to generate the
|
|
3457 |
fulltext.)
|
|
3458 |
||
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3459 |
:return: A list of (key, index_memo) records, suitable for
|
4371.2.1
by Vincent Ladeuil
Start fixing annotate for gc. |
3460 |
passing to read_records_iter to start reading in the raw data from
|
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3461 |
the pack file.
|
3462 |
"""
|
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3463 |
pending = set([key]) |
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3464 |
records = [] |
4454.3.64
by John Arbash Meinel
Ensure that _KnitAnnotator also supports add_special_text. |
3465 |
ann_keys = set() |
4454.3.26
by John Arbash Meinel
The new _KnitAnnotator based on Annotator seems to pass the test suite. |
3466 |
self._num_needed_children[key] = 1 |
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3467 |
while pending: |
3468 |
# get all pending nodes
|
|
3469 |
this_iteration = pending |
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3470 |
build_details = self._vf._index.get_build_details(this_iteration) |
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3471 |
self._all_build_details.update(build_details) |
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3472 |
# new_nodes = self._vf._index._get_entries(this_iteration)
|
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3473 |
pending = set() |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3474 |
for key, details in build_details.iteritems(): |
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3475 |
(index_memo, compression_parent, parent_keys, |
3224.1.15
by John Arbash Meinel
Finish removing method and noeol from general knowledge, |
3476 |
record_details) = details |
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3477 |
self._parent_map[key] = parent_keys |
4454.3.41
by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed. |
3478 |
self._heads_provider = None |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
3479 |
records.append((key, index_memo)) |
3224.1.19
by John Arbash Meinel
Work on removing nodes from the working set once they aren't needed. |
3480 |
# Do we actually need to check _annotated_lines?
|
4454.3.64
by John Arbash Meinel
Ensure that _KnitAnnotator also supports add_special_text. |
3481 |
pending.update([p for p in parent_keys |
3482 |
if p not in self._all_build_details]) |
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3483 |
if parent_keys: |
3484 |
for parent_key in parent_keys: |
|
3485 |
if parent_key in self._num_needed_children: |
|
3486 |
self._num_needed_children[parent_key] += 1 |
|
3487 |
else: |
|
3488 |
self._num_needed_children[parent_key] = 1 |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3489 |
if compression_parent: |
3490 |
if compression_parent in self._num_compression_children: |
|
3491 |
self._num_compression_children[compression_parent] += 1 |
|
3492 |
else: |
|
3493 |
self._num_compression_children[compression_parent] = 1 |
|
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3494 |
|
3495 |
missing_versions = this_iteration.difference(build_details.keys()) |
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3496 |
if missing_versions: |
4454.3.64
by John Arbash Meinel
Ensure that _KnitAnnotator also supports add_special_text. |
3497 |
for key in missing_versions: |
3498 |
if key in self._parent_map and key in self._text_cache: |
|
3499 |
# We already have this text ready, we just need to
|
|
3500 |
# yield it later so we get it annotated
|
|
3501 |
ann_keys.add(key) |
|
3502 |
parent_keys = self._parent_map[key] |
|
3503 |
for parent_key in parent_keys: |
|
3504 |
if parent_key in self._num_needed_children: |
|
3505 |
self._num_needed_children[parent_key] += 1 |
|
3506 |
else: |
|
3507 |
self._num_needed_children[parent_key] = 1 |
|
3508 |
pending.update([p for p in parent_keys |
|
3509 |
if p not in self._all_build_details]) |
|
3510 |
else: |
|
4454.3.65
by John Arbash Meinel
Tests that VF implementations support .get_annotator() |
3511 |
raise errors.RevisionNotPresent(key, self._vf) |
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3512 |
# Generally we will want to read the records in reverse order, because
|
3513 |
# we find the parent nodes after the children
|
|
3514 |
records.reverse() |
|
4454.3.64
by John Arbash Meinel
Ensure that _KnitAnnotator also supports add_special_text. |
3515 |
return records, ann_keys |
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3516 |
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3517 |
def _get_needed_texts(self, key, pb=None): |
4454.3.32
by John Arbash Meinel
using this custom extraction code drops us from 30.5s => 17.6s for annotate NEWS. |
3518 |
# if True or len(self._vf._fallback_vfs) > 0:
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3519 |
if len(self._vf._fallback_vfs) > 0: |
3520 |
# If we have fallbacks, go to the generic path
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
3521 |
for v in annotate.Annotator._get_needed_texts(self, key, pb=pb): |
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3522 |
yield v |
4454.3.26
by John Arbash Meinel
The new _KnitAnnotator based on Annotator seems to pass the test suite. |
3523 |
return
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3524 |
while True: |
3525 |
try: |
|
4454.3.64
by John Arbash Meinel
Ensure that _KnitAnnotator also supports add_special_text. |
3526 |
records, ann_keys = self._get_build_graph(key) |
4454.3.59
by John Arbash Meinel
Track down why the annotate retry code was failing. |
3527 |
for idx, (sub_key, text, num_lines) in enumerate( |
4454.3.42
by John Arbash Meinel
Make use of the passed in progress bar. |
3528 |
self._extract_texts(records)): |
3529 |
if pb is not None: |
|
3530 |
pb.update('annotating', idx, len(records)) |
|
4454.3.59
by John Arbash Meinel
Track down why the annotate retry code was failing. |
3531 |
yield sub_key, text, num_lines |
4454.3.64
by John Arbash Meinel
Ensure that _KnitAnnotator also supports add_special_text. |
3532 |
for sub_key in ann_keys: |
3533 |
text = self._text_cache[sub_key] |
|
3534 |
num_lines = len(text) # bad assumption |
|
3535 |
yield sub_key, text, num_lines |
|
4454.3.26
by John Arbash Meinel
The new _KnitAnnotator based on Annotator seems to pass the test suite. |
3536 |
return
|
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3537 |
except errors.RetryWithNewPacks, e: |
3538 |
self._vf._access.reload_or_raise(e) |
|
3539 |
# The cached build_details are no longer valid
|
|
3540 |
self._all_build_details.clear() |
|
3541 |
||
4454.3.37
by John Arbash Meinel
Add tests tha left-matching-blocks gets populated. |
3542 |
def _cache_delta_blocks(self, key, compression_parent, delta, lines): |
3543 |
parent_lines = self._text_cache[compression_parent] |
|
3544 |
blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines)) |
|
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
3545 |
self._matching_blocks[(key, compression_parent)] = blocks |
4454.3.37
by John Arbash Meinel
Add tests tha left-matching-blocks gets populated. |
3546 |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3547 |
def _expand_record(self, key, parent_keys, compression_parent, record, |
3548 |
record_details): |
|
4454.3.37
by John Arbash Meinel
Add tests tha left-matching-blocks gets populated. |
3549 |
delta = None |
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3550 |
if compression_parent: |
3551 |
if compression_parent not in self._content_objects: |
|
3552 |
# Waiting for the parent
|
|
3553 |
self._pending_deltas.setdefault(compression_parent, []).append( |
|
3554 |
(key, parent_keys, record, record_details)) |
|
3555 |
return None |
|
3556 |
# We have the basis parent, so expand the delta
|
|
4454.3.33
by John Arbash Meinel
Change the _expand_record code to pop out old content objects. |
3557 |
num = self._num_compression_children[compression_parent] |
3558 |
num -= 1 |
|
3559 |
if num == 0: |
|
3560 |
base_content = self._content_objects.pop(compression_parent) |
|
3561 |
self._num_compression_children.pop(compression_parent) |
|
3562 |
else: |
|
3563 |
self._num_compression_children[compression_parent] = num |
|
3564 |
base_content = self._content_objects[compression_parent] |
|
4454.3.35
by John Arbash Meinel
Figure out why we don't want to copy_base_content=False. |
3565 |
# It is tempting to want to copy_base_content=False for the last
|
3566 |
# child object. However, whenever noeol=False,
|
|
3567 |
# self._text_cache[parent_key] is content._lines. So mutating it
|
|
3568 |
# gives very bad results.
|
|
4454.3.36
by John Arbash Meinel
Only cache the content objects that we will reuse. |
3569 |
# The alternative is to copy the lines into text cache, but then we
|
3570 |
# are copying anyway, so just do it here.
|
|
4454.3.37
by John Arbash Meinel
Add tests tha left-matching-blocks gets populated. |
3571 |
content, delta = self._vf._factory.parse_record( |
4454.3.30
by John Arbash Meinel
add a bit more work to be able to process 'pending_annotations'. |
3572 |
key, record, record_details, base_content, |
3573 |
copy_base_content=True) |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3574 |
else: |
3575 |
# Fulltext record
|
|
3576 |
content, _ = self._vf._factory.parse_record( |
|
3577 |
key, record, record_details, None) |
|
4454.3.36
by John Arbash Meinel
Only cache the content objects that we will reuse. |
3578 |
if self._num_compression_children.get(key, 0) > 0: |
3579 |
self._content_objects[key] = content |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3580 |
lines = content.text() |
3581 |
self._text_cache[key] = lines |
|
4454.3.37
by John Arbash Meinel
Add tests tha left-matching-blocks gets populated. |
3582 |
if delta is not None: |
3583 |
self._cache_delta_blocks(key, compression_parent, delta, lines) |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3584 |
return lines |
3585 |
||
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
3586 |
def _get_parent_annotations_and_matches(self, key, text, parent_key): |
3587 |
"""Get the list of annotations for the parent, and the matching lines.
|
|
3588 |
||
3589 |
:param text: The opaque value given by _get_needed_texts
|
|
3590 |
:param parent_key: The key for the parent text
|
|
3591 |
:return: (parent_annotations, matching_blocks)
|
|
3592 |
parent_annotations is a list as long as the number of lines in
|
|
3593 |
parent
|
|
3594 |
matching_blocks is a list of (parent_idx, text_idx, len) tuples
|
|
3595 |
indicating which lines match between the two texts
|
|
3596 |
"""
|
|
3597 |
block_key = (key, parent_key) |
|
3598 |
if block_key in self._matching_blocks: |
|
3599 |
blocks = self._matching_blocks.pop(block_key) |
|
3600 |
parent_annotations = self._annotations_cache[parent_key] |
|
3601 |
return parent_annotations, blocks |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
3602 |
return annotate.Annotator._get_parent_annotations_and_matches(self, |
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
3603 |
key, text, parent_key) |
3604 |
||
4454.3.30
by John Arbash Meinel
add a bit more work to be able to process 'pending_annotations'. |
3605 |
def _process_pending(self, key): |
3606 |
"""The content for 'key' was just processed.
|
|
3607 |
||
3608 |
Determine if there is any more pending work to be processed.
|
|
3609 |
"""
|
|
3610 |
to_return = [] |
|
4454.3.31
by John Arbash Meinel
Change the processing lines to now handle fallbacks properly. |
3611 |
if key in self._pending_deltas: |
3612 |
compression_parent = key |
|
3613 |
children = self._pending_deltas.pop(key) |
|
3614 |
for child_key, parent_keys, record, record_details in children: |
|
3615 |
lines = self._expand_record(child_key, parent_keys, |
|
3616 |
compression_parent, |
|
3617 |
record, record_details) |
|
3618 |
if self._check_ready_for_annotations(child_key, parent_keys): |
|
3619 |
to_return.append(child_key) |
|
3620 |
# Also check any children that are waiting for this parent to be
|
|
3621 |
# annotation ready
|
|
3622 |
if key in self._pending_annotation: |
|
3623 |
children = self._pending_annotation.pop(key) |
|
3624 |
to_return.extend([c for c, p_keys in children |
|
3625 |
if self._check_ready_for_annotations(c, p_keys)]) |
|
3626 |
return to_return |
|
4454.3.30
by John Arbash Meinel
add a bit more work to be able to process 'pending_annotations'. |
3627 |
|
3628 |
def _check_ready_for_annotations(self, key, parent_keys): |
|
3629 |
"""return true if this text is ready to be yielded.
|
|
3630 |
||
3631 |
Otherwise, this will return False, and queue the text into
|
|
3632 |
self._pending_annotation
|
|
3633 |
"""
|
|
3634 |
for parent_key in parent_keys: |
|
3635 |
if parent_key not in self._annotations_cache: |
|
3636 |
# still waiting on at least one parent text, so queue it up
|
|
3637 |
# Note that if there are multiple parents, we need to wait
|
|
3638 |
# for all of them.
|
|
3639 |
self._pending_annotation.setdefault(parent_key, |
|
3640 |
[]).append((key, parent_keys)) |
|
3641 |
return False |
|
3642 |
return True |
|
3643 |
||
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3644 |
def _extract_texts(self, records): |
3645 |
"""Extract the various texts needed based on records"""
|
|
3224.1.6
by John Arbash Meinel
Refactor the annotation logic into a helper class. |
3646 |
# We iterate in the order read, rather than a strict order requested
|
3224.1.22
by John Arbash Meinel
Cleanup the extra debugging info, and some >80 char lines. |
3647 |
# However, process what we can, and put off to the side things that
|
3648 |
# still need parents, cleaning them up when those parents are
|
|
3649 |
# processed.
|
|
4454.3.29
by John Arbash Meinel
Some code comments about what needs to happen. |
3650 |
# Basic data flow:
|
3651 |
# 1) As 'records' are read, see if we can expand these records into
|
|
3652 |
# Content objects (and thus lines)
|
|
3653 |
# 2) If a given line-delta is waiting on its compression parent, it
|
|
3654 |
# gets queued up into self._pending_deltas, otherwise we expand
|
|
3655 |
# it, and put it into self._text_cache and self._content_objects
|
|
3656 |
# 3) If we expanded the text, we will then check to see if all
|
|
3657 |
# parents have also been processed. If so, this text gets yielded,
|
|
3658 |
# else this record gets set aside into pending_annotation
|
|
3659 |
# 4) Further, if we expanded the text in (2), we will then check to
|
|
3660 |
# see if there are any children in self._pending_deltas waiting to
|
|
3661 |
# also be processed. If so, we go back to (2) for those
|
|
3662 |
# 5) Further again, if we yielded the text, we can then check if that
|
|
3663 |
# 'unlocks' any of the texts in pending_annotations, which should
|
|
3664 |
# then get yielded as well
|
|
3665 |
# Note that both steps 4 and 5 are 'recursive' in that unlocking one
|
|
3666 |
# compression child could unlock yet another, and yielding a fulltext
|
|
3667 |
# will also 'unlock' the children that are waiting on that annotation.
|
|
3668 |
# (Though also, unlocking 1 parent's fulltext, does not unlock a child
|
|
3669 |
# if other parents are also waiting.)
|
|
3670 |
# We want to yield content before expanding child content objects, so
|
|
3671 |
# that we know when we can re-use the content lines, and the annotation
|
|
3672 |
# code can know when it can stop caching fulltexts, as well.
|
|
3673 |
||
4454.3.23
by John Arbash Meinel
Initial attempt at refactoring _KnitAnnotator to derive from Annotator. |
3674 |
# Children that are missing their compression parent
|
3675 |
pending_deltas = {} |
|
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3676 |
for (key, record, digest) in self._vf._read_records_iter(records): |
3677 |
# ghosts?
|
|
4454.3.26
by John Arbash Meinel
The new _KnitAnnotator based on Annotator seems to pass the test suite. |
3678 |
details = self._all_build_details[key] |
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3679 |
(_, compression_parent, parent_keys, record_details) = details |
3680 |
lines = self._expand_record(key, parent_keys, compression_parent, |
|
3681 |
record, record_details) |
|
3682 |
if lines is None: |
|
3683 |
# Pending delta should be queued up
|
|
3684 |
continue
|
|
3685 |
# At this point, we may be able to yield this content, if all
|
|
3686 |
# parents are also finished
|
|
4454.3.30
by John Arbash Meinel
add a bit more work to be able to process 'pending_annotations'. |
3687 |
yield_this_text = self._check_ready_for_annotations(key, |
3688 |
parent_keys) |
|
4454.3.29
by John Arbash Meinel
Some code comments about what needs to happen. |
3689 |
if yield_this_text: |
4454.3.28
by John Arbash Meinel
Continue breaking things to build it up cleanly. |
3690 |
# All parents present
|
3691 |
yield key, lines, len(lines) |
|
4454.3.31
by John Arbash Meinel
Change the processing lines to now handle fallbacks properly. |
3692 |
to_process = self._process_pending(key) |
3693 |
while to_process: |
|
3694 |
this_process = to_process |
|
3695 |
to_process = [] |
|
3696 |
for key in this_process: |
|
3697 |
lines = self._text_cache[key] |
|
3698 |
yield key, lines, len(lines) |
|
3699 |
to_process.extend(self._process_pending(key)) |
|
3224.1.10
by John Arbash Meinel
Introduce the heads_provider for reannotate. |
3700 |
|
2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
3701 |
try: |
4573.1.1
by Andrew Bennetts
Fix imports for _knit_load_data_pyx, which was recently renamed. |
3702 |
from bzrlib._knit_load_data_pyx import _load_data_c as _load_data |
4574.3.6
by Martin Pool
More warnings when failing to load extensions |
3703 |
except ImportError, e: |
4574.3.8
by Martin Pool
Only mutter extension load errors when they occur, and record for later |
3704 |
osutils.failed_to_load_extension(e) |
2484.1.12
by John Arbash Meinel
Switch the layout to use a matching _knit_load_data_py.py and _knit_load_data_c.pyx |
3705 |
from bzrlib._knit_load_data_py import _load_data_py as _load_data |