2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
1 |
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Knit versionedfile implementation.
|
|
18 |
||
19 |
A knit is a versioned file implementation that supports efficient append only
|
|
20 |
updates.
|
|
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
21 |
|
22 |
Knit file layout:
|
|
23 |
lifeless: the data file is made up of "delta records". each delta record has a delta header
|
|
24 |
that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of
|
|
25 |
the -expanded data- (ie, the delta applied to the parent). the delta also ends with a
|
|
26 |
end-marker; simply "end VERSION"
|
|
27 |
||
28 |
delta can be line or full contents.a
|
|
29 |
... the 8's there are the index number of the annotation.
|
|
30 |
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
|
|
31 |
59,59,3
|
|
32 |
8
|
|
33 |
8 if ie.executable:
|
|
34 |
8 e.set('executable', 'yes')
|
|
35 |
130,130,2
|
|
36 |
8 if elt.get('executable') == 'yes':
|
|
37 |
8 ie.executable = True
|
|
38 |
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad
|
|
39 |
||
40 |
||
41 |
whats in an index:
|
|
42 |
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
|
|
43 |
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
|
|
44 |
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
|
|
45 |
09:33 < lifeless> right
|
|
46 |
09:33 < jrydberg> lifeless: the position and size is the range in the data file
|
|
47 |
||
48 |
||
49 |
so the index sequence is the dictionary compressed sequence number used
|
|
50 |
in the deltas to provide line annotation
|
|
51 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
52 |
"""
|
53 |
||
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
54 |
# TODOS:
|
55 |
# 10:16 < lifeless> make partial index writes safe
|
|
56 |
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
|
|
57 |
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave
|
|
58 |
# always' approach.
|
|
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
59 |
# move sha1 out of the content so that join is faster at verifying parents
|
60 |
# record content length ?
|
|
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
61 |
|
62 |
||
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
63 |
from copy import copy |
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
64 |
from cStringIO import StringIO |
1596.2.28
by Robert Collins
more knit profile based tuning. |
65 |
from itertools import izip, chain |
1756.2.17
by Aaron Bentley
Fixes suggested by John Meinel |
66 |
import operator |
1563.2.6
by Robert Collins
Start check tests for knits (pending), and remove dead code. |
67 |
import os |
1628.1.2
by Robert Collins
More knit micro-optimisations. |
68 |
import sys |
1756.2.29
by Aaron Bentley
Remove basis knit support |
69 |
import warnings |
2762.3.1
by Robert Collins
* The compression used within the bzr repository has changed from zlib |
70 |
from zlib import Z_DEFAULT_COMPRESSION |
1594.2.19
by Robert Collins
More coalescing tweaks, and knit feedback. |
71 |
|
1594.2.17
by Robert Collins
Better readv coalescing, now with test, and progress during knit index reading. |
72 |
import bzrlib |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
73 |
from bzrlib.lazy_import import lazy_import |
74 |
lazy_import(globals(), """ |
|
75 |
from bzrlib import (
|
|
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
76 |
annotate,
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
77 |
pack,
|
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
78 |
trace,
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
79 |
)
|
80 |
""") |
|
1911.2.3
by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids |
81 |
from bzrlib import ( |
82 |
cache_utf8, |
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
83 |
debug, |
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
84 |
diff, |
1911.2.3
by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids |
85 |
errors, |
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
86 |
osutils, |
2104.4.2
by John Arbash Meinel
Small cleanup and NEWS entry about fixing bug #65714 |
87 |
patiencediff, |
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
88 |
progress, |
1551.15.46
by Aaron Bentley
Move plan merge to tree |
89 |
merge, |
2196.2.1
by John Arbash Meinel
Merge Dmitry's optimizations and minimize the actual diff. |
90 |
ui, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
91 |
)
|
92 |
from bzrlib.errors import ( |
|
93 |
FileExists, |
|
94 |
NoSuchFile, |
|
95 |
KnitError, |
|
96 |
InvalidRevisionId, |
|
97 |
KnitCorrupt, |
|
2535.3.4
by Andrew Bennetts
Simple implementation of Knit.insert_data_stream. |
98 |
KnitDataStreamIncompatible, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
99 |
KnitHeaderError, |
100 |
RevisionNotPresent, |
|
101 |
RevisionAlreadyPresent, |
|
102 |
)
|
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
103 |
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
104 |
from bzrlib.osutils import ( |
105 |
contains_whitespace, |
|
106 |
contains_linebreaks, |
|
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
107 |
sha_string, |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
108 |
sha_strings, |
109 |
)
|
|
1756.2.29
by Aaron Bentley
Remove basis knit support |
110 |
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
111 |
from bzrlib.tsort import topo_sort |
2094.3.5
by John Arbash Meinel
Fix imports to ensure modules are loaded before they are used |
112 |
import bzrlib.ui |
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
113 |
import bzrlib.weave |
1911.2.1
by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate |
114 |
from bzrlib.versionedfile import VersionedFile, InterVersionedFile |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
115 |
|
116 |
||
117 |
# TODO: Split out code specific to this format into an associated object.
|
|
118 |
||
119 |
# TODO: Can we put in some kind of value to check that the index and data
|
|
120 |
# files belong together?
|
|
121 |
||
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
122 |
# TODO: accommodate binaries, perhaps by storing a byte count
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
123 |
|
124 |
# TODO: function to check whole file
|
|
125 |
||
126 |
# TODO: atomically append data, then measure backwards from the cursor
|
|
127 |
# position after writing to work out where it was located. we may need to
|
|
128 |
# bypass python file buffering.
|
|
129 |
||
130 |
DATA_SUFFIX = '.knit' |
|
131 |
INDEX_SUFFIX = '.kndx' |
|
132 |
||
133 |
||
134 |
class KnitContent(object): |
|
135 |
"""Content of a knit version to which deltas can be applied."""
|
|
136 |
||
137 |
def annotate(self): |
|
138 |
"""Return a list of (origin, text) tuples."""
|
|
139 |
return list(self.annotate_iter()) |
|
140 |
||
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
141 |
def apply_delta(self, delta, new_version_id): |
2921.2.2
by Robert Collins
Review feedback. |
142 |
"""Apply delta to this object to become new_version_id."""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
143 |
raise NotImplementedError(self.apply_delta) |
144 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
145 |
def line_delta_iter(self, new_lines): |
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
146 |
"""Generate line-based delta from this content to new_lines."""
|
2151.1.1
by John Arbash Meinel
(Dmitry Vasiliev) Tune KnitContent and add tests |
147 |
new_texts = new_lines.text() |
148 |
old_texts = self.text() |
|
2781.1.1
by Martin Pool
merge cpatiencediff from Lukas |
149 |
s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts) |
2151.1.1
by John Arbash Meinel
(Dmitry Vasiliev) Tune KnitContent and add tests |
150 |
for tag, i1, i2, j1, j2 in s.get_opcodes(): |
151 |
if tag == 'equal': |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
152 |
continue
|
2151.1.1
by John Arbash Meinel
(Dmitry Vasiliev) Tune KnitContent and add tests |
153 |
# ofrom, oto, length, data
|
154 |
yield i1, i2, j2 - j1, new_lines._lines[j1:j2] |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
155 |
|
156 |
def line_delta(self, new_lines): |
|
157 |
return list(self.line_delta_iter(new_lines)) |
|
158 |
||
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
159 |
@staticmethod
|
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
160 |
def get_line_delta_blocks(knit_delta, source, target): |
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
161 |
"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""
|
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
162 |
target_len = len(target) |
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
163 |
s_pos = 0 |
164 |
t_pos = 0 |
|
165 |
for s_begin, s_end, t_len, new_text in knit_delta: |
|
2520.4.47
by Aaron Bentley
Fix get_line_delta_blocks with eol |
166 |
true_n = s_begin - s_pos |
167 |
n = true_n |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
168 |
if n > 0: |
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
169 |
# knit deltas do not provide reliable info about whether the
|
170 |
# last line of a file matches, due to eol handling.
|
|
171 |
if source[s_pos + n -1] != target[t_pos + n -1]: |
|
2520.4.47
by Aaron Bentley
Fix get_line_delta_blocks with eol |
172 |
n-=1 |
173 |
if n > 0: |
|
174 |
yield s_pos, t_pos, n |
|
175 |
t_pos += t_len + true_n |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
176 |
s_pos = s_end |
2520.4.48
by Aaron Bentley
Support getting blocks from knit deltas with no final EOL |
177 |
n = target_len - t_pos |
178 |
if n > 0: |
|
179 |
if source[s_pos + n -1] != target[t_pos + n -1]: |
|
180 |
n-=1 |
|
181 |
if n > 0: |
|
182 |
yield s_pos, t_pos, n |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
183 |
yield s_pos + (target_len - t_pos), target_len, 0 |
184 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
185 |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
186 |
class AnnotatedKnitContent(KnitContent): |
187 |
"""Annotated content."""
|
|
188 |
||
189 |
def __init__(self, lines): |
|
190 |
self._lines = lines |
|
191 |
||
192 |
def annotate_iter(self): |
|
193 |
"""Yield tuples of (origin, text) for each content line."""
|
|
194 |
return iter(self._lines) |
|
195 |
||
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
196 |
def apply_delta(self, delta, new_version_id): |
2921.2.2
by Robert Collins
Review feedback. |
197 |
"""Apply delta to this object to become new_version_id."""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
198 |
offset = 0 |
199 |
lines = self._lines |
|
200 |
for start, end, count, delta_lines in delta: |
|
201 |
lines[offset+start:offset+end] = delta_lines |
|
202 |
offset = offset + (start - end) + count |
|
203 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
204 |
def strip_last_line_newline(self): |
205 |
line = self._lines[-1][1].rstrip('\n') |
|
206 |
self._lines[-1] = (self._lines[-1][0], line) |
|
207 |
||
208 |
def text(self): |
|
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
209 |
try: |
210 |
return [text for origin, text in self._lines] |
|
211 |
except ValueError, e: |
|
212 |
# most commonly (only?) caused by the internal form of the knit
|
|
213 |
# missing annotation information because of a bug - see thread
|
|
214 |
# around 20071015
|
|
215 |
raise KnitCorrupt(self, |
|
216 |
"line in annotated knit missing annotation information: %s" |
|
217 |
% (e,)) |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
218 |
|
219 |
def copy(self): |
|
220 |
return AnnotatedKnitContent(self._lines[:]) |
|
221 |
||
222 |
||
223 |
class PlainKnitContent(KnitContent): |
|
2794.1.3
by Robert Collins
Review feedback. |
224 |
"""Unannotated content.
|
225 |
|
|
226 |
When annotate[_iter] is called on this content, the same version is reported
|
|
227 |
for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent
|
|
228 |
objects.
|
|
229 |
"""
|
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
230 |
|
231 |
def __init__(self, lines, version_id): |
|
232 |
self._lines = lines |
|
233 |
self._version_id = version_id |
|
234 |
||
235 |
def annotate_iter(self): |
|
236 |
"""Yield tuples of (origin, text) for each content line."""
|
|
237 |
for line in self._lines: |
|
238 |
yield self._version_id, line |
|
239 |
||
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
240 |
def apply_delta(self, delta, new_version_id): |
2921.2.2
by Robert Collins
Review feedback. |
241 |
"""Apply delta to this object to become new_version_id."""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
242 |
offset = 0 |
243 |
lines = self._lines |
|
244 |
for start, end, count, delta_lines in delta: |
|
245 |
lines[offset+start:offset+end] = delta_lines |
|
246 |
offset = offset + (start - end) + count |
|
247 |
self._version_id = new_version_id |
|
248 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
249 |
def copy(self): |
250 |
return PlainKnitContent(self._lines[:], self._version_id) |
|
251 |
||
252 |
def strip_last_line_newline(self): |
|
253 |
self._lines[-1] = self._lines[-1].rstrip('\n') |
|
254 |
||
255 |
def text(self): |
|
256 |
return self._lines |
|
257 |
||
258 |
||
259 |
class KnitAnnotateFactory(object): |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
260 |
"""Factory for creating annotated Content objects."""
|
261 |
||
262 |
annotated = True |
|
263 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
264 |
def make(self, lines, version_id): |
265 |
num_lines = len(lines) |
|
266 |
return AnnotatedKnitContent(zip([version_id] * num_lines, lines)) |
|
267 |
||
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
268 |
def parse_fulltext(self, content, version_id): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
269 |
"""Convert fulltext to internal representation
|
270 |
||
271 |
fulltext content is of the format
|
|
272 |
revid(utf8) plaintext\n
|
|
273 |
internal representation is of the format:
|
|
274 |
(revid, plaintext)
|
|
275 |
"""
|
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
276 |
# TODO: jam 20070209 The tests expect this to be returned as tuples,
|
277 |
# but the code itself doesn't really depend on that.
|
|
278 |
# Figure out a way to not require the overhead of turning the
|
|
279 |
# list back into tuples.
|
|
280 |
lines = [tuple(line.split(' ', 1)) for line in content] |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
281 |
return AnnotatedKnitContent(lines) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
282 |
|
283 |
def parse_line_delta_iter(self, lines): |
|
2163.1.2
by John Arbash Meinel
Don't modify the list during parse_line_delta |
284 |
return iter(self.parse_line_delta(lines)) |
1628.1.2
by Robert Collins
More knit micro-optimisations. |
285 |
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
286 |
def parse_line_delta(self, lines, version_id, plain=False): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
287 |
"""Convert a line based delta into internal representation.
|
288 |
||
289 |
line delta is in the form of:
|
|
290 |
intstart intend intcount
|
|
291 |
1..count lines:
|
|
292 |
revid(utf8) newline\n
|
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
293 |
internal representation is
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
294 |
(start, end, count, [1..count tuples (revid, newline)])
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
295 |
|
296 |
:param plain: If True, the lines are returned as a plain
|
|
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
297 |
list without annotations, not as a list of (origin, content) tuples, i.e.
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
298 |
(start, end, count, [1..count newline])
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
299 |
"""
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
300 |
result = [] |
301 |
lines = iter(lines) |
|
302 |
next = lines.next |
|
2249.5.1
by John Arbash Meinel
Leave revision-ids in utf-8 when reading. |
303 |
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
304 |
cache = {} |
305 |
def cache_and_return(line): |
|
306 |
origin, text = line.split(' ', 1) |
|
307 |
return cache.setdefault(origin, origin), text |
|
308 |
||
1628.1.2
by Robert Collins
More knit micro-optimisations. |
309 |
# walk through the lines parsing.
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
310 |
# Note that the plain test is explicitly pulled out of the
|
311 |
# loop to minimise any performance impact
|
|
312 |
if plain: |
|
313 |
for header in lines: |
|
314 |
start, end, count = [int(n) for n in header.split(',')] |
|
315 |
contents = [next().split(' ', 1)[1] for i in xrange(count)] |
|
316 |
result.append((start, end, count, contents)) |
|
317 |
else: |
|
318 |
for header in lines: |
|
319 |
start, end, count = [int(n) for n in header.split(',')] |
|
320 |
contents = [tuple(next().split(' ', 1)) for i in xrange(count)] |
|
321 |
result.append((start, end, count, contents)) |
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
322 |
return result |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
323 |
|
2163.2.2
by John Arbash Meinel
Don't deal with annotations when we don't care about them. Saves another 300+ms |
324 |
def get_fulltext_content(self, lines): |
325 |
"""Extract just the content lines from a fulltext."""
|
|
326 |
return (line.split(' ', 1)[1] for line in lines) |
|
327 |
||
328 |
def get_linedelta_content(self, lines): |
|
329 |
"""Extract just the content from a line delta.
|
|
330 |
||
331 |
This doesn't return all of the extra information stored in a delta.
|
|
332 |
Only the actual content lines.
|
|
333 |
"""
|
|
334 |
lines = iter(lines) |
|
335 |
next = lines.next |
|
336 |
for header in lines: |
|
337 |
header = header.split(',') |
|
338 |
count = int(header[2]) |
|
339 |
for i in xrange(count): |
|
340 |
origin, text = next().split(' ', 1) |
|
341 |
yield text |
|
342 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
343 |
def lower_fulltext(self, content): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
344 |
"""convert a fulltext content record into a serializable form.
|
345 |
||
346 |
see parse_fulltext which this inverts.
|
|
347 |
"""
|
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
348 |
# TODO: jam 20070209 We only do the caching thing to make sure that
|
349 |
# the origin is a valid utf-8 line, eventually we could remove it
|
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
350 |
return ['%s %s' % (o, t) for o, t in content._lines] |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
351 |
|
352 |
def lower_line_delta(self, delta): |
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
353 |
"""convert a delta into a serializable form.
|
354 |
||
1628.1.2
by Robert Collins
More knit micro-optimisations. |
355 |
See parse_line_delta which this inverts.
|
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
356 |
"""
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
357 |
# TODO: jam 20070209 We only do the caching thing to make sure that
|
358 |
# the origin is a valid utf-8 line, eventually we could remove it
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
359 |
out = [] |
360 |
for start, end, c, lines in delta: |
|
361 |
out.append('%d,%d,%d\n' % (start, end, c)) |
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
362 |
out.extend(origin + ' ' + text |
1911.2.1
by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate |
363 |
for origin, text in lines) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
364 |
return out |
365 |
||
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
366 |
def annotate_iter(self, knit, version_id): |
367 |
content = knit._get_content(version_id) |
|
2770.1.9
by Aaron Bentley
Simplify annotate iterator |
368 |
return content.annotate_iter() |
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
369 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
370 |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
371 |
class KnitPlainFactory(object): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
372 |
"""Factory for creating plain Content objects."""
|
373 |
||
374 |
annotated = False |
|
375 |
||
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
376 |
def make(self, lines, version_id): |
377 |
return PlainKnitContent(lines, version_id) |
|
378 |
||
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
379 |
def parse_fulltext(self, content, version_id): |
1596.2.7
by Robert Collins
Remove the requirement for reannotation in knit joins. |
380 |
"""This parses an unannotated fulltext.
|
381 |
||
382 |
Note that this is not a noop - the internal representation
|
|
383 |
has (versionid, line) - its just a constant versionid.
|
|
384 |
"""
|
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
385 |
return self.make(content, version_id) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
386 |
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
387 |
def parse_line_delta_iter(self, lines, version_id): |
2163.1.2
by John Arbash Meinel
Don't modify the list during parse_line_delta |
388 |
cur = 0 |
389 |
num_lines = len(lines) |
|
390 |
while cur < num_lines: |
|
391 |
header = lines[cur] |
|
392 |
cur += 1 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
393 |
start, end, c = [int(n) for n in header.split(',')] |
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
394 |
yield start, end, c, lines[cur:cur+c] |
2163.1.2
by John Arbash Meinel
Don't modify the list during parse_line_delta |
395 |
cur += c |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
396 |
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
397 |
def parse_line_delta(self, lines, version_id): |
398 |
return list(self.parse_line_delta_iter(lines, version_id)) |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
399 |
|
2163.2.2
by John Arbash Meinel
Don't deal with annotations when we don't care about them. Saves another 300+ms |
400 |
def get_fulltext_content(self, lines): |
401 |
"""Extract just the content lines from a fulltext."""
|
|
402 |
return iter(lines) |
|
403 |
||
404 |
def get_linedelta_content(self, lines): |
|
405 |
"""Extract just the content from a line delta.
|
|
406 |
||
407 |
This doesn't return all of the extra information stored in a delta.
|
|
408 |
Only the actual content lines.
|
|
409 |
"""
|
|
410 |
lines = iter(lines) |
|
411 |
next = lines.next |
|
412 |
for header in lines: |
|
413 |
header = header.split(',') |
|
414 |
count = int(header[2]) |
|
415 |
for i in xrange(count): |
|
416 |
yield next() |
|
417 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
418 |
def lower_fulltext(self, content): |
419 |
return content.text() |
|
420 |
||
421 |
def lower_line_delta(self, delta): |
|
422 |
out = [] |
|
423 |
for start, end, c, lines in delta: |
|
424 |
out.append('%d,%d,%d\n' % (start, end, c)) |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
425 |
out.extend(lines) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
426 |
return out |
427 |
||
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
428 |
def annotate_iter(self, knit, version_id): |
2770.1.2
by Aaron Bentley
Convert to knit-only annotation |
429 |
return annotate_knit(knit, version_id) |
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
430 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
431 |
|
432 |
def make_empty_knit(transport, relpath): |
|
433 |
"""Construct a empty knit at the specified location."""
|
|
1563.2.5
by Robert Collins
Remove unused transaction references from knit.py and the versionedfile interface. |
434 |
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
435 |
|
436 |
||
437 |
class KnitVersionedFile(VersionedFile): |
|
438 |
"""Weave-like structure with faster random access.
|
|
439 |
||
440 |
A knit stores a number of texts and a summary of the relationships
|
|
441 |
between them. Texts are identified by a string version-id. Texts
|
|
442 |
are normally stored and retrieved as a series of lines, but can
|
|
443 |
also be passed as single strings.
|
|
444 |
||
445 |
Lines are stored with the trailing newline (if any) included, to
|
|
446 |
avoid special cases for files with no final newline. Lines are
|
|
447 |
composed of 8-bit characters, not unicode. The combination of
|
|
448 |
these approaches should mean any 'binary' file can be safely
|
|
449 |
stored and retrieved.
|
|
450 |
"""
|
|
451 |
||
1946.2.12
by John Arbash Meinel
Add ability to pass a directory mode to non_atomic_put |
452 |
def __init__(self, relpath, transport, file_mode=None, access_mode=None, |
2592.3.135
by Robert Collins
Do not create many transient knit objects, saving 4% on commit. |
453 |
factory=None, delta=True, create=False, create_parent_dir=False, |
454 |
delay_create=False, dir_mode=None, index=None, access_method=None): |
|
1563.2.25
by Robert Collins
Merge in upstream. |
455 |
"""Construct a knit at location specified by relpath.
|
456 |
|
|
457 |
:param create: If not True, only open an existing knit.
|
|
1946.2.1
by John Arbash Meinel
2 changes to knits. Delay creating the .knit or .kndx file until we have actually tried to write data. Because of this, we must allow the Knit to create the prefix directories |
458 |
:param create_parent_dir: If True, create the parent directory if
|
459 |
creating the file fails. (This is used for stores with
|
|
460 |
hash-prefixes that may not exist yet)
|
|
461 |
:param delay_create: The calling code is aware that the knit won't
|
|
462 |
actually be created until the first data is stored.
|
|
2592.3.1
by Robert Collins
Allow giving KnitVersionedFile an index object to use rather than implicitly creating one. |
463 |
:param index: An index to use for the knit.
|
1563.2.25
by Robert Collins
Merge in upstream. |
464 |
"""
|
1563.2.16
by Robert Collins
Change WeaveStore into VersionedFileStore and make its versoined file class parameterisable. |
465 |
if access_mode is None: |
466 |
access_mode = 'w' |
|
1594.2.23
by Robert Collins
Test versioned file storage handling of clean/dirty status for accessed versioned files. |
467 |
super(KnitVersionedFile, self).__init__(access_mode) |
1563.2.16
by Robert Collins
Change WeaveStore into VersionedFileStore and make its versoined file class parameterisable. |
468 |
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
469 |
self.transport = transport |
470 |
self.filename = relpath |
|
1563.2.16
by Robert Collins
Change WeaveStore into VersionedFileStore and make its versoined file class parameterisable. |
471 |
self.factory = factory or KnitAnnotateFactory() |
472 |
self.writable = (access_mode == 'w') |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
473 |
self.delta = delta |
474 |
||
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
475 |
self._max_delta_chain = 200 |
476 |
||
2592.3.1
by Robert Collins
Allow giving KnitVersionedFile an index object to use rather than implicitly creating one. |
477 |
if index is None: |
478 |
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX, |
|
479 |
access_mode, create=create, file_mode=file_mode, |
|
480 |
create_parent_dir=create_parent_dir, delay_create=delay_create, |
|
481 |
dir_mode=dir_mode) |
|
482 |
else: |
|
483 |
self._index = index |
|
2592.3.69
by Robert Collins
Allow specifying an access method to KnitVersionedFile. |
484 |
if access_method is None: |
485 |
_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode, |
|
486 |
((create and not len(self)) and delay_create), create_parent_dir) |
|
487 |
else: |
|
488 |
_access = access_method |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
489 |
if create and not len(self) and not delay_create: |
490 |
_access.create() |
|
491 |
self._data = _KnitData(_access) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
492 |
|
1704.2.10
by Martin Pool
Add KnitVersionedFile.__repr__ method |
493 |
def __repr__(self): |
2592.3.159
by Robert Collins
Provide a transport for KnitVersionedFile's __repr__ in pack repositories. |
494 |
return '%s(%s)' % (self.__class__.__name__, |
1704.2.10
by Martin Pool
Add KnitVersionedFile.__repr__ method |
495 |
self.transport.abspath(self.filename)) |
496 |
||
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
497 |
def _check_should_delta(self, first_parents): |
498 |
"""Iterate back through the parent listing, looking for a fulltext.
|
|
499 |
||
500 |
This is used when we want to decide whether to add a delta or a new
|
|
501 |
fulltext. It searches for _max_delta_chain parents. When it finds a
|
|
502 |
fulltext parent, it sees if the total size of the deltas leading up to
|
|
503 |
it is large enough to indicate that we want a new full text anyway.
|
|
504 |
||
505 |
Return True if we should create a new delta, False if we should use a
|
|
506 |
full text.
|
|
507 |
"""
|
|
508 |
delta_size = 0 |
|
509 |
fulltext_size = None |
|
510 |
delta_parents = first_parents |
|
2147.1.2
by John Arbash Meinel
Simplify the knit max-chain detection code. |
511 |
for count in xrange(self._max_delta_chain): |
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
512 |
parent = delta_parents[0] |
513 |
method = self._index.get_method(parent) |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
514 |
index, pos, size = self._index.get_position(parent) |
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
515 |
if method == 'fulltext': |
516 |
fulltext_size = size |
|
517 |
break
|
|
518 |
delta_size += size |
|
519 |
delta_parents = self._index.get_parents(parent) |
|
2147.1.2
by John Arbash Meinel
Simplify the knit max-chain detection code. |
520 |
else: |
521 |
# We couldn't find a fulltext, so we must create a new one
|
|
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
522 |
return False |
2147.1.2
by John Arbash Meinel
Simplify the knit max-chain detection code. |
523 |
|
524 |
return fulltext_size > delta_size |
|
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
525 |
|
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
526 |
def _add_raw_records(self, records, data): |
527 |
"""Add all the records 'records' with data pre-joined in 'data'.
|
|
528 |
||
529 |
:param records: A list of tuples(version_id, options, parents, size).
|
|
530 |
:param data: The data for the records. When it is written, the records
|
|
531 |
are adjusted to have pos pointing into data by the sum of
|
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
532 |
the preceding records sizes.
|
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
533 |
"""
|
534 |
# write all the data
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
535 |
raw_record_sizes = [record[3] for record in records] |
536 |
positions = self._data.add_raw_records(raw_record_sizes, data) |
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
537 |
offset = 0 |
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
538 |
index_entries = [] |
2592.3.68
by Robert Collins
Make knit add_versions calls take access memo tuples rather than just pos and size. |
539 |
for (version_id, options, parents, size), access_memo in zip( |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
540 |
records, positions): |
2592.3.68
by Robert Collins
Make knit add_versions calls take access memo tuples rather than just pos and size. |
541 |
index_entries.append((version_id, options, access_memo, parents)) |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
542 |
if self._data._do_cache: |
543 |
self._data._cache[version_id] = data[offset:offset+size] |
|
544 |
offset += size |
|
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
545 |
self._index.add_versions(index_entries) |
546 |
||
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
547 |
def enable_cache(self): |
548 |
"""Start caching data for this knit"""
|
|
549 |
self._data.enable_cache() |
|
550 |
||
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
551 |
def clear_cache(self): |
552 |
"""Clear the data cache only."""
|
|
553 |
self._data.clear_cache() |
|
554 |
||
1563.2.15
by Robert Collins
remove the weavestore assumptions about the number and nature of files it manages. |
555 |
def copy_to(self, name, transport): |
556 |
"""See VersionedFile.copy_to()."""
|
|
557 |
# copy the current index to a temp index to avoid racing with local
|
|
558 |
# writes
|
|
1955.3.30
by John Arbash Meinel
fix small bug |
559 |
transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp', |
1955.3.24
by John Arbash Meinel
Update Knit to use the new non_atomic_foo functions |
560 |
self.transport.get(self._index._filename)) |
1563.2.15
by Robert Collins
remove the weavestore assumptions about the number and nature of files it manages. |
561 |
# copy the data file
|
1711.7.25
by John Arbash Meinel
try/finally to close files, _KnitData was keeping a handle to a file it never used again, and using transport.rename() when it wanted transport.move() |
562 |
f = self._data._open_file() |
563 |
try: |
|
1955.3.8
by John Arbash Meinel
avoid some deprecation warnings in other parts of the code |
564 |
transport.put_file(name + DATA_SUFFIX, f) |
1711.7.25
by John Arbash Meinel
try/finally to close files, _KnitData was keeping a handle to a file it never used again, and using transport.rename() when it wanted transport.move() |
565 |
finally: |
566 |
f.close() |
|
567 |
# move the copied index into place
|
|
568 |
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX) |
|
1563.2.15
by Robert Collins
remove the weavestore assumptions about the number and nature of files it manages. |
569 |
|
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
570 |
def create_empty(self, name, transport, mode=None): |
1955.3.8
by John Arbash Meinel
avoid some deprecation warnings in other parts of the code |
571 |
return KnitVersionedFile(name, transport, factory=self.factory, |
572 |
delta=self.delta, create=True) |
|
1563.2.15
by Robert Collins
remove the weavestore assumptions about the number and nature of files it manages. |
573 |
|
2535.3.3
by Andrew Bennetts
Add Knit.get_data_stream. |
574 |
def get_data_stream(self, required_versions): |
575 |
"""Get a data stream for the specified versions.
|
|
576 |
||
577 |
Versions may be returned in any order, not necessarily the order
|
|
578 |
specified.
|
|
579 |
||
2670.3.7
by Andrew Bennetts
Tweak docstring as requested in review. |
580 |
:param required_versions: The exact set of versions to be extracted.
|
581 |
Unlike some other knit methods, this is not used to generate a
|
|
582 |
transitive closure, rather it is used precisely as given.
|
|
2535.3.3
by Andrew Bennetts
Add Knit.get_data_stream. |
583 |
|
584 |
:returns: format_signature, list of (version, options, length, parents),
|
|
585 |
reader_callable.
|
|
586 |
"""
|
|
2858.2.1
by Martin Pool
Remove most calls to safe_file_id and safe_revision_id. |
587 |
if not isinstance(required_versions, set): |
588 |
required_versions = set(required_versions) |
|
2535.3.3
by Andrew Bennetts
Add Knit.get_data_stream. |
589 |
# we don't care about inclusions, the caller cares.
|
590 |
# but we need to setup a list of records to visit.
|
|
591 |
for version_id in required_versions: |
|
592 |
if not self.has_version(version_id): |
|
593 |
raise RevisionNotPresent(version_id, self.filename) |
|
594 |
# Pick the desired versions out of the index in oldest-to-newest order
|
|
595 |
version_list = [] |
|
596 |
for version_id in self.versions(): |
|
597 |
if version_id in required_versions: |
|
598 |
version_list.append(version_id) |
|
599 |
||
600 |
# create the list of version information for the result
|
|
601 |
copy_queue_records = [] |
|
602 |
copy_set = set() |
|
603 |
result_version_list = [] |
|
604 |
for version_id in version_list: |
|
605 |
options = self._index.get_options(version_id) |
|
606 |
parents = self._index.get_parents_with_ghosts(version_id) |
|
2535.3.36
by Andrew Bennetts
Merge bzr.dev |
607 |
index_memo = self._index.get_position(version_id) |
608 |
copy_queue_records.append((version_id, index_memo)) |
|
609 |
none, data_pos, data_size = index_memo |
|
2535.3.3
by Andrew Bennetts
Add Knit.get_data_stream. |
610 |
copy_set.add(version_id) |
611 |
# version, options, length, parents
|
|
612 |
result_version_list.append((version_id, options, data_size, |
|
613 |
parents)) |
|
614 |
||
615 |
# Read the compressed record data.
|
|
616 |
# XXX:
|
|
617 |
# From here down to the return should really be logic in the returned
|
|
2535.3.30
by Andrew Bennetts
Delete obsolete comments and other cosmetic changes. |
618 |
# callable -- in a class that adapts read_records_iter_raw to read
|
2535.3.3
by Andrew Bennetts
Add Knit.get_data_stream. |
619 |
# requests.
|
620 |
raw_datum = [] |
|
621 |
for (version_id, raw_data), \ |
|
622 |
(version_id2, options, _, parents) in \ |
|
623 |
izip(self._data.read_records_iter_raw(copy_queue_records), |
|
624 |
result_version_list): |
|
625 |
assert version_id == version_id2, 'logic error, inconsistent results' |
|
626 |
raw_datum.append(raw_data) |
|
627 |
pseudo_file = StringIO(''.join(raw_datum)) |
|
628 |
def read(length): |
|
629 |
if length is None: |
|
630 |
return pseudo_file.read() |
|
631 |
else: |
|
632 |
return pseudo_file.read(length) |
|
633 |
return (self.get_format_signature(), result_version_list, read) |
|
634 |
||
2520.4.47
by Aaron Bentley
Fix get_line_delta_blocks with eol |
635 |
def _extract_blocks(self, version_id, source, target): |
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
636 |
if self._index.get_method(version_id) != 'line-delta': |
637 |
return None |
|
638 |
parent, sha1, noeol, delta = self.get_delta(version_id) |
|
2520.4.47
by Aaron Bentley
Fix get_line_delta_blocks with eol |
639 |
return KnitContent.get_line_delta_blocks(delta, source, target) |
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
640 |
|
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
641 |
def get_delta(self, version_id): |
642 |
"""Get a delta for constructing version from some other version."""
|
|
2229.2.3
by Aaron Bentley
change reserved_id to is_reserved_id, add check_not_reserved for DRY |
643 |
self.check_not_reserved_id(version_id) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
644 |
parents = self.get_parents(version_id) |
645 |
if len(parents): |
|
646 |
parent = parents[0] |
|
647 |
else: |
|
648 |
parent = None |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
649 |
index_memo = self._index.get_position(version_id) |
650 |
data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id] |
|
1596.2.37
by Robert Collins
Switch to delta based content copying in the generic versioned file copier. |
651 |
noeol = 'no-eol' in self._index.get_options(version_id) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
652 |
if 'fulltext' == self._index.get_method(version_id): |
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
653 |
new_content = self.factory.parse_fulltext(data, version_id) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
654 |
if parent is not None: |
655 |
reference_content = self._get_content(parent) |
|
656 |
old_texts = reference_content.text() |
|
657 |
else: |
|
658 |
old_texts = [] |
|
659 |
new_texts = new_content.text() |
|
2781.1.1
by Martin Pool
merge cpatiencediff from Lukas |
660 |
delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts, |
661 |
new_texts) |
|
1596.2.37
by Robert Collins
Switch to delta based content copying in the generic versioned file copier. |
662 |
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
663 |
else: |
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
664 |
delta = self.factory.parse_line_delta(data, version_id) |
1596.2.37
by Robert Collins
Switch to delta based content copying in the generic versioned file copier. |
665 |
return parent, sha1, noeol, delta |
2535.3.1
by Andrew Bennetts
Add get_format_signature to VersionedFile |
666 |
|
667 |
def get_format_signature(self): |
|
668 |
"""See VersionedFile.get_format_signature()."""
|
|
669 |
if self.factory.annotated: |
|
670 |
annotated_part = "annotated" |
|
671 |
else: |
|
672 |
annotated_part = "plain" |
|
2535.3.17
by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request. |
673 |
return "knit-%s" % (annotated_part,) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
674 |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
675 |
def get_graph_with_ghosts(self): |
676 |
"""See VersionedFile.get_graph_with_ghosts()."""
|
|
677 |
graph_items = self._index.get_graph() |
|
678 |
return dict(graph_items) |
|
679 |
||
1666.1.6
by Robert Collins
Make knit the default format. |
680 |
def get_sha1(self, version_id): |
2520.4.89
by Aaron Bentley
Add get_sha1s to weaves |
681 |
return self.get_sha1s([version_id])[0] |
2520.4.88
by Aaron Bentley
Retrieve all sha1s at once (ftw) |
682 |
|
683 |
def get_sha1s(self, version_ids): |
|
1666.1.6
by Robert Collins
Make knit the default format. |
684 |
"""See VersionedFile.get_sha1()."""
|
2520.4.88
by Aaron Bentley
Retrieve all sha1s at once (ftw) |
685 |
record_map = self._get_record_map(version_ids) |
686 |
# record entry 2 is the 'digest'.
|
|
687 |
return [record_map[v][2] for v in version_ids] |
|
1666.1.6
by Robert Collins
Make knit the default format. |
688 |
|
1563.2.15
by Robert Collins
remove the weavestore assumptions about the number and nature of files it manages. |
689 |
@staticmethod
|
690 |
def get_suffixes(): |
|
691 |
"""See VersionedFile.get_suffixes()."""
|
|
692 |
return [DATA_SUFFIX, INDEX_SUFFIX] |
|
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
693 |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
694 |
def has_ghost(self, version_id): |
695 |
"""True if there is a ghost reference in the file to version_id."""
|
|
696 |
# maybe we have it
|
|
697 |
if self.has_version(version_id): |
|
698 |
return False |
|
1759.2.2
by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron. |
699 |
# optimisable if needed by memoising the _ghosts set.
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
700 |
items = self._index.get_graph() |
701 |
for node, parents in items: |
|
702 |
for parent in parents: |
|
703 |
if parent not in self._index._cache: |
|
704 |
if parent == version_id: |
|
705 |
return True |
|
706 |
return False |
|
707 |
||
2535.3.30
by Andrew Bennetts
Delete obsolete comments and other cosmetic changes. |
708 |
def insert_data_stream(self, (format, data_list, reader_callable)): |
2535.3.4
by Andrew Bennetts
Simple implementation of Knit.insert_data_stream. |
709 |
"""Insert knit records from a data stream into this knit.
|
710 |
||
2535.3.5
by Andrew Bennetts
Batch writes as much as possible in insert_data_stream. |
711 |
If a version in the stream is already present in this knit, it will not
|
712 |
be inserted a second time. It will be checked for consistency with the
|
|
713 |
stored version however, and may cause a KnitCorrupt error to be raised
|
|
714 |
if the data in the stream disagrees with the already stored data.
|
|
2535.3.4
by Andrew Bennetts
Simple implementation of Knit.insert_data_stream. |
715 |
|
716 |
:seealso: get_data_stream
|
|
717 |
"""
|
|
718 |
if format != self.get_format_signature(): |
|
2670.3.6
by Andrew Bennetts
Remove redundant import. |
719 |
trace.mutter('incompatible format signature inserting to %r', self) |
2535.3.4
by Andrew Bennetts
Simple implementation of Knit.insert_data_stream. |
720 |
raise KnitDataStreamIncompatible( |
721 |
format, self.get_format_signature()) |
|
2535.3.17
by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request. |
722 |
|
723 |
for version_id, options, length, parents in data_list: |
|
724 |
if self.has_version(version_id): |
|
725 |
# First check: the list of parents.
|
|
726 |
my_parents = self.get_parents_with_ghosts(version_id) |
|
727 |
if my_parents != parents: |
|
728 |
# XXX: KnitCorrupt is not quite the right exception here.
|
|
729 |
raise KnitCorrupt( |
|
730 |
self.filename, |
|
731 |
'parents list %r from data stream does not match ' |
|
732 |
'already recorded parents %r for %s' |
|
733 |
% (parents, my_parents, version_id)) |
|
734 |
||
735 |
# Also check the SHA-1 of the fulltext this content will
|
|
736 |
# produce.
|
|
737 |
raw_data = reader_callable(length) |
|
738 |
my_fulltext_sha1 = self.get_sha1(version_id) |
|
739 |
df, rec = self._data._parse_record_header(version_id, raw_data) |
|
740 |
stream_fulltext_sha1 = rec[3] |
|
741 |
if my_fulltext_sha1 != stream_fulltext_sha1: |
|
742 |
# Actually, we don't know if it's this knit that's corrupt,
|
|
743 |
# or the data stream we're trying to insert.
|
|
744 |
raise KnitCorrupt( |
|
745 |
self.filename, 'sha-1 does not match %s' % version_id) |
|
746 |
else: |
|
2535.3.57
by Andrew Bennetts
Perform some sanity checking of data streams rather than blindly inserting them into our repository. |
747 |
if 'line-delta' in options: |
2535.3.61
by Andrew Bennetts
Clarify sanity checking in insert_data_stream. |
748 |
# Make sure that this knit record is actually useful: a
|
749 |
# line-delta is no use unless we have its parent.
|
|
750 |
# Fetching from a broken repository with this problem
|
|
751 |
# shouldn't break the target repository.
|
|
752 |
if not self._index.has_version(parents[0]): |
|
753 |
raise KnitCorrupt( |
|
754 |
self.filename, |
|
755 |
'line-delta from stream references '
|
|
756 |
'missing parent %s' % parents[0]) |
|
2535.3.17
by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request. |
757 |
self._add_raw_records( |
758 |
[(version_id, options, parents, length)], |
|
759 |
reader_callable(length)) |
|
760 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
761 |
def versions(self): |
762 |
"""See VersionedFile.versions."""
|
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
763 |
if 'evil' in debug.debug_flags: |
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
764 |
trace.mutter_callsite(2, "versions scales with size of history") |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
765 |
return self._index.get_versions() |
766 |
||
767 |
def has_version(self, version_id): |
|
768 |
"""See VersionedFile.has_version."""
|
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
769 |
if 'evil' in debug.debug_flags: |
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
770 |
trace.mutter_callsite(2, "has_version is a LBYL scenario") |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
771 |
return self._index.has_version(version_id) |
772 |
||
773 |
__contains__ = has_version |
|
774 |
||
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
775 |
def _merge_annotations(self, content, parents, parent_texts={}, |
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
776 |
delta=None, annotated=None, |
777 |
left_matching_blocks=None): |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
778 |
"""Merge annotations for content. This is done by comparing
|
1596.2.27
by Robert Collins
Note potential improvements in knit adds. |
779 |
the annotations based on changed to the text.
|
780 |
"""
|
|
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
781 |
if left_matching_blocks is not None: |
782 |
delta_seq = diff._PrematchedMatcher(left_matching_blocks) |
|
783 |
else: |
|
784 |
delta_seq = None |
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
785 |
if annotated: |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
786 |
for parent_id in parents: |
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
787 |
merge_content = self._get_content(parent_id, parent_texts) |
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
788 |
if (parent_id == parents[0] and delta_seq is not None): |
789 |
seq = delta_seq |
|
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
790 |
else: |
791 |
seq = patiencediff.PatienceSequenceMatcher( |
|
792 |
None, merge_content.text(), content.text()) |
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
793 |
for i, j, n in seq.get_matching_blocks(): |
794 |
if n == 0: |
|
795 |
continue
|
|
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
796 |
# this appears to copy (origin, text) pairs across to the
|
797 |
# new content for any line that matches the last-checked
|
|
798 |
# parent.
|
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
799 |
content._lines[j:j+n] = merge_content._lines[i:i+n] |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
800 |
if delta: |
2520.4.146
by Aaron Bentley
Avoid get_matching_blocks for un-annotated text |
801 |
if delta_seq is None: |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
802 |
reference_content = self._get_content(parents[0], parent_texts) |
803 |
new_texts = content.text() |
|
804 |
old_texts = reference_content.text() |
|
2104.4.2
by John Arbash Meinel
Small cleanup and NEWS entry about fixing bug #65714 |
805 |
delta_seq = patiencediff.PatienceSequenceMatcher( |
2100.2.1
by wang
Replace python's difflib by patiencediff because the worst case |
806 |
None, old_texts, new_texts) |
1596.2.36
by Robert Collins
add a get_delta api to versioned_file. |
807 |
return self._make_line_delta(delta_seq, content) |
808 |
||
809 |
def _make_line_delta(self, delta_seq, new_content): |
|
810 |
"""Generate a line delta from delta_seq and new_content."""
|
|
811 |
diff_hunks = [] |
|
812 |
for op in delta_seq.get_opcodes(): |
|
813 |
if op[0] == 'equal': |
|
814 |
continue
|
|
815 |
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]])) |
|
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
816 |
return diff_hunks |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
817 |
|
1756.3.17
by Aaron Bentley
Combine get_components_positions with get_components_versions |
818 |
def _get_components_positions(self, version_ids): |
1756.3.19
by Aaron Bentley
Documentation and cleanups |
819 |
"""Produce a map of position data for the components of versions.
|
820 |
||
1756.3.22
by Aaron Bentley
Tweaks from review |
821 |
This data is intended to be used for retrieving the knit records.
|
1756.3.19
by Aaron Bentley
Documentation and cleanups |
822 |
|
823 |
A dict of version_id to (method, data_pos, data_size, next) is
|
|
824 |
returned.
|
|
825 |
method is the way referenced data should be applied.
|
|
826 |
data_pos is the position of the data in the knit.
|
|
827 |
data_size is the size of the data in the knit.
|
|
828 |
next is the build-parent of the version, or None for fulltexts.
|
|
829 |
"""
|
|
1756.3.9
by Aaron Bentley
More optimization refactoring |
830 |
component_data = {} |
831 |
for version_id in version_ids: |
|
832 |
cursor = version_id |
|
833 |
||
1756.3.10
by Aaron Bentley
Optimize selection and retrieval of records |
834 |
while cursor is not None and cursor not in component_data: |
1756.2.29
by Aaron Bentley
Remove basis knit support |
835 |
method = self._index.get_method(cursor) |
1756.3.10
by Aaron Bentley
Optimize selection and retrieval of records |
836 |
if method == 'fulltext': |
837 |
next = None |
|
838 |
else: |
|
2592.3.245
by Andrew Bennetts
Commit Martin's faster knit extraction one-liner that was approved by Robert and John. |
839 |
next = self.get_parents_with_ghosts(cursor)[0] |
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
840 |
index_memo = self._index.get_position(cursor) |
841 |
component_data[cursor] = (method, index_memo, next) |
|
1756.3.10
by Aaron Bentley
Optimize selection and retrieval of records |
842 |
cursor = next |
843 |
return component_data |
|
1756.3.18
by Aaron Bentley
More cleanup |
844 |
|
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
845 |
def _get_content(self, version_id, parent_texts={}): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
846 |
"""Returns a content object that makes up the specified
|
847 |
version."""
|
|
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
848 |
cached_version = parent_texts.get(version_id, None) |
849 |
if cached_version is not None: |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
850 |
if not self.has_version(version_id): |
851 |
raise RevisionNotPresent(version_id, self.filename) |
|
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
852 |
return cached_version |
853 |
||
1756.3.22
by Aaron Bentley
Tweaks from review |
854 |
text_map, contents_map = self._get_content_maps([version_id]) |
855 |
return contents_map[version_id] |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
856 |
|
857 |
def _check_versions_present(self, version_ids): |
|
858 |
"""Check that all specified versions are present."""
|
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
859 |
self._index.check_versions_present(version_ids) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
860 |
|
2794.1.1
by Robert Collins
Allow knits to be instructed not to add a text based on a sha, for commit. |
861 |
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts, |
2805.6.7
by Robert Collins
Review feedback. |
862 |
nostore_sha, random_id, check_content): |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
863 |
"""See VersionedFile.add_lines_with_ghosts()."""
|
2805.6.7
by Robert Collins
Review feedback. |
864 |
self._check_add(version_id, lines, random_id, check_content) |
2805.6.2
by Robert Collins
General cleanup of KnitVersionedFile._add. |
865 |
return self._add(version_id, lines, parents, self.delta, |
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
866 |
parent_texts, None, nostore_sha, random_id) |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
867 |
|
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
868 |
def _add_lines(self, version_id, parents, lines, parent_texts, |
2805.6.7
by Robert Collins
Review feedback. |
869 |
left_matching_blocks, nostore_sha, random_id, check_content): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
870 |
"""See VersionedFile.add_lines."""
|
2805.6.7
by Robert Collins
Review feedback. |
871 |
self._check_add(version_id, lines, random_id, check_content) |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
872 |
self._check_versions_present(parents) |
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
873 |
return self._add(version_id, lines[:], parents, self.delta, |
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
874 |
parent_texts, left_matching_blocks, nostore_sha, random_id) |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
875 |
|
2805.6.7
by Robert Collins
Review feedback. |
876 |
def _check_add(self, version_id, lines, random_id, check_content): |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
877 |
"""check that version_id and lines are safe to add."""
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
878 |
if contains_whitespace(version_id): |
1668.5.1
by Olaf Conradi
Fix bug in knits when raising InvalidRevisionId without the required |
879 |
raise InvalidRevisionId(version_id, self.filename) |
2229.2.3
by Aaron Bentley
change reserved_id to is_reserved_id, add check_not_reserved for DRY |
880 |
self.check_not_reserved_id(version_id) |
2805.6.4
by Robert Collins
Don't check for existing versions when adding texts with random revision ids. |
881 |
# Technically this could be avoided if we are happy to allow duplicate
|
882 |
# id insertion when other things than bzr core insert texts, but it
|
|
883 |
# seems useful for folk using the knit api directly to have some safety
|
|
884 |
# blanket that we can disable.
|
|
885 |
if not random_id and self.has_version(version_id): |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
886 |
raise RevisionAlreadyPresent(version_id, self.filename) |
2805.6.7
by Robert Collins
Review feedback. |
887 |
if check_content: |
888 |
self._check_lines_not_unicode(lines) |
|
889 |
self._check_lines_are_lines(lines) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
890 |
|
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
891 |
def _add(self, version_id, lines, parents, delta, parent_texts, |
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
892 |
left_matching_blocks, nostore_sha, random_id): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
893 |
"""Add a set of lines on top of version specified by parents.
|
894 |
||
895 |
If delta is true, compress the text as a line-delta against
|
|
896 |
the first parent.
|
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
897 |
|
898 |
Any versions not present will be converted into ghosts.
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
899 |
"""
|
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
900 |
# first thing, if the content is something we don't need to store, find
|
901 |
# that out.
|
|
902 |
line_bytes = ''.join(lines) |
|
903 |
digest = sha_string(line_bytes) |
|
904 |
if nostore_sha == digest: |
|
905 |
raise errors.ExistingContent |
|
1596.2.28
by Robert Collins
more knit profile based tuning. |
906 |
|
1596.2.10
by Robert Collins
Reviewer feedback on knit branches. |
907 |
present_parents = [] |
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
908 |
if parent_texts is None: |
909 |
parent_texts = {} |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
910 |
for parent in parents: |
2805.6.2
by Robert Collins
General cleanup of KnitVersionedFile._add. |
911 |
if self.has_version(parent): |
1596.2.10
by Robert Collins
Reviewer feedback on knit branches. |
912 |
present_parents.append(parent) |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
913 |
|
2805.6.2
by Robert Collins
General cleanup of KnitVersionedFile._add. |
914 |
# can only compress against the left most present parent.
|
915 |
if (delta and |
|
916 |
(len(present_parents) == 0 or |
|
917 |
present_parents[0] != parents[0])): |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
918 |
delta = False |
919 |
||
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
920 |
text_length = len(line_bytes) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
921 |
options = [] |
922 |
if lines: |
|
923 |
if lines[-1][-1] != '\n': |
|
2805.6.2
by Robert Collins
General cleanup of KnitVersionedFile._add. |
924 |
# copy the contents of lines.
|
925 |
lines = lines[:] |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
926 |
options.append('no-eol') |
927 |
lines[-1] = lines[-1] + '\n' |
|
2888.1.1
by Robert Collins
(robertc) Use prejoined content for knit storage when performing a full-text store of unannotated content. (Robert Collins) |
928 |
line_bytes += '\n' |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
929 |
|
2805.6.2
by Robert Collins
General cleanup of KnitVersionedFile._add. |
930 |
if delta: |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
931 |
# To speed the extract of texts the delta chain is limited
|
932 |
# to a fixed number of deltas. This should minimize both
|
|
933 |
# I/O and the time spend applying deltas.
|
|
2147.1.1
by John Arbash Meinel
Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size |
934 |
delta = self._check_should_delta(present_parents) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
935 |
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
936 |
assert isinstance(version_id, str) |
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
937 |
content = self.factory.make(lines, version_id) |
1596.2.34
by Robert Collins
Optimise knit add to only diff once per parent, not once per parent + once for the delta generation. |
938 |
if delta or (self.factory.annotated and len(present_parents) > 0): |
2805.6.2
by Robert Collins
General cleanup of KnitVersionedFile._add. |
939 |
# Merge annotations from parent texts if needed.
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
940 |
delta_hunks = self._merge_annotations(content, present_parents, |
2520.4.140
by Aaron Bentley
Use matching blocks from mpdiff for knit delta creation |
941 |
parent_texts, delta, self.factory.annotated, |
942 |
left_matching_blocks) |
|
1596.2.32
by Robert Collins
Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility. |
943 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
944 |
if delta: |
945 |
options.append('line-delta') |
|
946 |
store_lines = self.factory.lower_line_delta(delta_hunks) |
|
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
947 |
size, bytes = self._data._record_to_data(version_id, digest, |
948 |
store_lines) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
949 |
else: |
950 |
options.append('fulltext') |
|
2888.1.3
by Robert Collins
Review feedback. |
951 |
# isinstance is slower and we have no hierarchy.
|
2888.1.1
by Robert Collins
(robertc) Use prejoined content for knit storage when performing a full-text store of unannotated content. (Robert Collins) |
952 |
if self.factory.__class__ == KnitPlainFactory: |
2888.1.3
by Robert Collins
Review feedback. |
953 |
# Use the already joined bytes saving iteration time in
|
954 |
# _record_to_data.
|
|
2888.1.1
by Robert Collins
(robertc) Use prejoined content for knit storage when performing a full-text store of unannotated content. (Robert Collins) |
955 |
size, bytes = self._data._record_to_data(version_id, digest, |
956 |
lines, [line_bytes]) |
|
957 |
else: |
|
958 |
# get mixed annotation + content and feed it into the
|
|
959 |
# serialiser.
|
|
960 |
store_lines = self.factory.lower_fulltext(content) |
|
961 |
size, bytes = self._data._record_to_data(version_id, digest, |
|
962 |
store_lines) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
963 |
|
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
964 |
access_memo = self._data.add_raw_records([size], bytes)[0] |
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
965 |
self._index.add_versions( |
2850.1.1
by Robert Collins
* ``KnitVersionedFile.add*`` will no longer cache added records even when |
966 |
((version_id, options, access_memo, parents),), |
967 |
random_id=random_id) |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
968 |
return digest, text_length, content |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
969 |
|
1563.2.19
by Robert Collins
stub out a check for knits. |
970 |
def check(self, progress_bar=None): |
971 |
"""See VersionedFile.check()."""
|
|
972 |
||
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
973 |
def _clone_text(self, new_version_id, old_version_id, parents): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
974 |
"""See VersionedFile.clone_text()."""
|
1756.2.8
by Aaron Bentley
Implement get_line_list, cleanups |
975 |
# FIXME RBC 20060228 make fast by only inserting an index with null
|
976 |
# delta.
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
977 |
self.add_lines(new_version_id, parents, self.get_lines(old_version_id)) |
978 |
||
979 |
def get_lines(self, version_id): |
|
980 |
"""See VersionedFile.get_lines()."""
|
|
1756.2.8
by Aaron Bentley
Implement get_line_list, cleanups |
981 |
return self.get_line_list([version_id])[0] |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
982 |
|
1756.3.12
by Aaron Bentley
Stuff all text-building data in record_map |
983 |
def _get_record_map(self, version_ids): |
1756.3.19
by Aaron Bentley
Documentation and cleanups |
984 |
"""Produce a dictionary of knit records.
|
985 |
|
|
986 |
The keys are version_ids, the values are tuples of (method, content,
|
|
987 |
digest, next).
|
|
988 |
method is the way the content should be applied.
|
|
989 |
content is a KnitContent object.
|
|
990 |
digest is the SHA1 digest of this version id after all steps are done
|
|
991 |
next is the build-parent of the version, i.e. the leftmost ancestor.
|
|
992 |
If the method is fulltext, next will be None.
|
|
993 |
"""
|
|
1756.3.12
by Aaron Bentley
Stuff all text-building data in record_map |
994 |
position_map = self._get_components_positions(version_ids) |
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
995 |
# c = component_id, m = method, i_m = index_memo, n = next
|
996 |
records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()] |
|
1756.3.12
by Aaron Bentley
Stuff all text-building data in record_map |
997 |
record_map = {} |
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
998 |
for component_id, content, digest in \ |
1863.1.9
by John Arbash Meinel
Switching to have 'read_records_iter' return in random order. |
999 |
self._data.read_records_iter(records): |
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1000 |
method, index_memo, next = position_map[component_id] |
1756.3.12
by Aaron Bentley
Stuff all text-building data in record_map |
1001 |
record_map[component_id] = method, content, digest, next |
1002 |
||
1756.3.10
by Aaron Bentley
Optimize selection and retrieval of records |
1003 |
return record_map |
1756.2.5
by Aaron Bentley
Reduced read_records calls to 1 |
1004 |
|
1756.2.7
by Aaron Bentley
Implement get_text in terms of get_texts |
1005 |
def get_text(self, version_id): |
1006 |
"""See VersionedFile.get_text"""
|
|
1007 |
return self.get_texts([version_id])[0] |
|
1008 |
||
1756.2.1
by Aaron Bentley
Implement get_texts |
1009 |
def get_texts(self, version_ids): |
1756.2.8
by Aaron Bentley
Implement get_line_list, cleanups |
1010 |
return [''.join(l) for l in self.get_line_list(version_ids)] |
1011 |
||
1012 |
def get_line_list(self, version_ids): |
|
1756.2.1
by Aaron Bentley
Implement get_texts |
1013 |
"""Return the texts of listed versions as a list of strings."""
|
2229.2.1
by Aaron Bentley
Reject reserved ids in versiondfile, tree, branch and repository |
1014 |
for version_id in version_ids: |
2229.2.3
by Aaron Bentley
change reserved_id to is_reserved_id, add check_not_reserved for DRY |
1015 |
self.check_not_reserved_id(version_id) |
1756.3.13
by Aaron Bentley
Refactor get_line_list into _get_content |
1016 |
text_map, content_map = self._get_content_maps(version_ids) |
1017 |
return [text_map[v] for v in version_ids] |
|
1018 |
||
2520.4.90
by Aaron Bentley
Handle \r terminated lines in Weaves properly |
1019 |
_get_lf_split_line_list = get_line_list |
2520.4.3
by Aaron Bentley
Implement plain strategy for extracting and installing multiparent diffs |
1020 |
|
1756.3.13
by Aaron Bentley
Refactor get_line_list into _get_content |
1021 |
def _get_content_maps(self, version_ids): |
1756.3.19
by Aaron Bentley
Documentation and cleanups |
1022 |
"""Produce maps of text and KnitContents
|
1023 |
|
|
1024 |
:return: (text_map, content_map) where text_map contains the texts for
|
|
1025 |
the requested versions and content_map contains the KnitContents.
|
|
1756.3.22
by Aaron Bentley
Tweaks from review |
1026 |
Both dicts take version_ids as their keys.
|
1756.3.19
by Aaron Bentley
Documentation and cleanups |
1027 |
"""
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
1028 |
# FUTURE: This function could be improved for the 'extract many' case
|
1029 |
# by tracking each component and only doing the copy when the number of
|
|
1030 |
# children than need to apply delta's to it is > 1 or it is part of the
|
|
1031 |
# final output.
|
|
1032 |
version_ids = list(version_ids) |
|
1033 |
multiple_versions = len(version_ids) != 1 |
|
1756.3.12
by Aaron Bentley
Stuff all text-building data in record_map |
1034 |
record_map = self._get_record_map(version_ids) |
1756.2.5
by Aaron Bentley
Reduced read_records calls to 1 |
1035 |
|
1756.2.8
by Aaron Bentley
Implement get_line_list, cleanups |
1036 |
text_map = {} |
1756.3.7
by Aaron Bentley
Avoid re-parsing texts version components |
1037 |
content_map = {} |
1756.3.14
by Aaron Bentley
Handle the intermediate and final representations of no-final-eol texts |
1038 |
final_content = {} |
1756.3.10
by Aaron Bentley
Optimize selection and retrieval of records |
1039 |
for version_id in version_ids: |
1040 |
components = [] |
|
1041 |
cursor = version_id |
|
1042 |
while cursor is not None: |
|
1756.3.12
by Aaron Bentley
Stuff all text-building data in record_map |
1043 |
method, data, digest, next = record_map[cursor] |
1756.3.10
by Aaron Bentley
Optimize selection and retrieval of records |
1044 |
components.append((cursor, method, data, digest)) |
1045 |
if cursor in content_map: |
|
1046 |
break
|
|
1047 |
cursor = next |
|
1048 |
||
1756.2.1
by Aaron Bentley
Implement get_texts |
1049 |
content = None |
1756.2.7
by Aaron Bentley
Implement get_text in terms of get_texts |
1050 |
for component_id, method, data, digest in reversed(components): |
1756.3.7
by Aaron Bentley
Avoid re-parsing texts version components |
1051 |
if component_id in content_map: |
1052 |
content = content_map[component_id] |
|
1756.3.8
by Aaron Bentley
Avoid unused calls, use generators, sets instead of lists |
1053 |
else: |
1054 |
if method == 'fulltext': |
|
1055 |
assert content is None |
|
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
1056 |
content = self.factory.parse_fulltext(data, version_id) |
1756.3.8
by Aaron Bentley
Avoid unused calls, use generators, sets instead of lists |
1057 |
elif method == 'line-delta': |
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
1058 |
delta = self.factory.parse_line_delta(data, version_id) |
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
1059 |
if multiple_versions: |
1060 |
# only doing this when we want multiple versions
|
|
1061 |
# output avoids list copies - which reference and
|
|
1062 |
# dereference many strings.
|
|
1063 |
content = content.copy() |
|
1064 |
content.apply_delta(delta, version_id) |
|
1065 |
if multiple_versions: |
|
1066 |
content_map[component_id] = content |
|
1756.2.1
by Aaron Bentley
Implement get_texts |
1067 |
|
1068 |
if 'no-eol' in self._index.get_options(version_id): |
|
2921.2.1
by Robert Collins
* Knit text reconstruction now avoids making copies of the lines list for |
1069 |
if multiple_versions: |
1070 |
content = content.copy() |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
1071 |
content.strip_last_line_newline() |
1756.3.14
by Aaron Bentley
Handle the intermediate and final representations of no-final-eol texts |
1072 |
final_content[version_id] = content |
1756.2.1
by Aaron Bentley
Implement get_texts |
1073 |
|
1074 |
# digest here is the digest from the last applied component.
|
|
1756.3.6
by Aaron Bentley
More multi-text extraction |
1075 |
text = content.text() |
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
1076 |
actual_sha = sha_strings(text) |
1077 |
if actual_sha != digest: |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
1078 |
raise KnitCorrupt(self.filename, |
2911.1.1
by Martin Pool
Better messages when problems are detected inside a knit |
1079 |
'\n sha-1 %s' |
1080 |
'\n of reconstructed text does not match' |
|
1081 |
'\n expected %s' |
|
1082 |
'\n for version %s' % |
|
1083 |
(actual_sha, digest, version_id)) |
|
2794.1.2
by Robert Collins
Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts. |
1084 |
text_map[version_id] = text |
1085 |
return text_map, final_content |
|
1756.2.1
by Aaron Bentley
Implement get_texts |
1086 |
|
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
1087 |
def iter_lines_added_or_present_in_versions(self, version_ids=None, |
1088 |
pb=None): |
|
1594.2.6
by Robert Collins
Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved. |
1089 |
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
|
1090 |
if version_ids is None: |
|
1091 |
version_ids = self.versions() |
|
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
1092 |
if pb is None: |
1093 |
pb = progress.DummyProgress() |
|
1759.2.2
by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron. |
1094 |
# we don't care about inclusions, the caller cares.
|
1594.2.6
by Robert Collins
Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved. |
1095 |
# but we need to setup a list of records to visit.
|
1096 |
# we need version_id, position, length
|
|
1097 |
version_id_records = [] |
|
2163.1.1
by John Arbash Meinel
Use a set to make iter_lines_added_or_present *much* faster |
1098 |
requested_versions = set(version_ids) |
1594.3.1
by Robert Collins
Merge transaction finalisation and ensure iter_lines_added_or_present in knits does a old-to-new read in the knit. |
1099 |
# filter for available versions
|
2698.2.4
by Robert Collins
Remove full history scan during iter_lines_added_or_present in KnitVersionedFile. |
1100 |
for version_id in requested_versions: |
1594.2.6
by Robert Collins
Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved. |
1101 |
if not self.has_version(version_id): |
1102 |
raise RevisionNotPresent(version_id, self.filename) |
|
1594.3.1
by Robert Collins
Merge transaction finalisation and ensure iter_lines_added_or_present in knits does a old-to-new read in the knit. |
1103 |
# get a in-component-order queue:
|
1104 |
for version_id in self.versions(): |
|
1105 |
if version_id in requested_versions: |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1106 |
index_memo = self._index.get_position(version_id) |
1107 |
version_id_records.append((version_id, index_memo)) |
|
1594.3.1
by Robert Collins
Merge transaction finalisation and ensure iter_lines_added_or_present in knits does a old-to-new read in the knit. |
1108 |
|
1594.2.17
by Robert Collins
Better readv coalescing, now with test, and progress during knit index reading. |
1109 |
total = len(version_id_records) |
2147.1.3
by John Arbash Meinel
In knit.py we were re-using a variable in 2 loops, causing bogus progress messages to be generated. |
1110 |
for version_idx, (version_id, data, sha_value) in \ |
1111 |
enumerate(self._data.read_records_iter(version_id_records)): |
|
1112 |
pb.update('Walking content.', version_idx, total) |
|
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
1113 |
method = self._index.get_method(version_id) |
2163.1.7
by John Arbash Meinel
Switch the line iterator as suggested by Aaron Bentley |
1114 |
|
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
1115 |
assert method in ('fulltext', 'line-delta') |
1116 |
if method == 'fulltext': |
|
2163.1.7
by John Arbash Meinel
Switch the line iterator as suggested by Aaron Bentley |
1117 |
line_iterator = self.factory.get_fulltext_content(data) |
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
1118 |
else: |
2163.1.7
by John Arbash Meinel
Switch the line iterator as suggested by Aaron Bentley |
1119 |
line_iterator = self.factory.get_linedelta_content(data) |
1120 |
for line in line_iterator: |
|
1121 |
yield line |
|
1122 |
||
2039.1.1
by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000) |
1123 |
pb.update('Walking content.', total, total) |
1594.2.6
by Robert Collins
Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved. |
1124 |
|
2592.3.43
by Robert Collins
A knit iter_parents API. |
1125 |
def iter_parents(self, version_ids): |
1126 |
"""Iterate through the parents for many version ids.
|
|
1127 |
||
1128 |
:param version_ids: An iterable yielding version_ids.
|
|
1129 |
:return: An iterator that yields (version_id, parents). Requested
|
|
1130 |
version_ids not present in the versioned file are simply skipped.
|
|
1131 |
The order is undefined, allowing for different optimisations in
|
|
1132 |
the underlying implementation.
|
|
1133 |
"""
|
|
1134 |
return self._index.iter_parents(version_ids) |
|
1135 |
||
1563.2.18
by Robert Collins
get knit repositories really using knits for text storage. |
1136 |
def num_versions(self): |
1137 |
"""See VersionedFile.num_versions()."""
|
|
1138 |
return self._index.num_versions() |
|
1139 |
||
1140 |
__len__ = num_versions |
|
1141 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1142 |
def annotate_iter(self, version_id): |
1143 |
"""See VersionedFile.annotate_iter."""
|
|
2770.1.1
by Aaron Bentley
Initial implmentation of plain knit annotation |
1144 |
return self.factory.annotate_iter(self, version_id) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1145 |
|
1146 |
def get_parents(self, version_id): |
|
1147 |
"""See VersionedFile.get_parents."""
|
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
1148 |
# perf notes:
|
1149 |
# optimism counts!
|
|
1150 |
# 52554 calls in 1264 872 internal down from 3674
|
|
1151 |
try: |
|
1152 |
return self._index.get_parents(version_id) |
|
1153 |
except KeyError: |
|
1154 |
raise RevisionNotPresent(version_id, self.filename) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1155 |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1156 |
def get_parents_with_ghosts(self, version_id): |
1157 |
"""See VersionedFile.get_parents."""
|
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
1158 |
try: |
1159 |
return self._index.get_parents_with_ghosts(version_id) |
|
1160 |
except KeyError: |
|
1161 |
raise RevisionNotPresent(version_id, self.filename) |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1162 |
|
2530.1.1
by Aaron Bentley
Make topological sorting optional for get_ancestry |
1163 |
def get_ancestry(self, versions, topo_sorted=True): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1164 |
"""See VersionedFile.get_ancestry."""
|
1165 |
if isinstance(versions, basestring): |
|
1166 |
versions = [versions] |
|
1167 |
if not versions: |
|
1168 |
return [] |
|
2530.1.1
by Aaron Bentley
Make topological sorting optional for get_ancestry |
1169 |
return self._index.get_ancestry(versions, topo_sorted) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1170 |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1171 |
def get_ancestry_with_ghosts(self, versions): |
1172 |
"""See VersionedFile.get_ancestry_with_ghosts."""
|
|
1173 |
if isinstance(versions, basestring): |
|
1174 |
versions = [versions] |
|
1175 |
if not versions: |
|
1176 |
return [] |
|
1177 |
return self._index.get_ancestry_with_ghosts(versions) |
|
1178 |
||
1664.2.3
by Aaron Bentley
Add failing test case |
1179 |
def plan_merge(self, ver_a, ver_b): |
1664.2.11
by Aaron Bentley
Clarifications from merge review |
1180 |
"""See VersionedFile.plan_merge."""
|
2490.2.33
by Aaron Bentley
Disable topological sorting of get_ancestry where sensible |
1181 |
ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False)) |
1182 |
ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False)) |
|
1664.2.4
by Aaron Bentley
Identify unchanged lines correctly |
1183 |
annotated_a = self.annotate(ver_a) |
1184 |
annotated_b = self.annotate(ver_b) |
|
1551.15.46
by Aaron Bentley
Move plan merge to tree |
1185 |
return merge._plan_annotate_merge(annotated_a, annotated_b, |
1186 |
ancestors_a, ancestors_b) |
|
1664.2.4
by Aaron Bentley
Identify unchanged lines correctly |
1187 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1188 |
|
1189 |
class _KnitComponentFile(object): |
|
1190 |
"""One of the files used to implement a knit database"""
|
|
1191 |
||
1946.2.1
by John Arbash Meinel
2 changes to knits. Delay creating the .knit or .kndx file until we have actually tried to write data. Because of this, we must allow the Knit to create the prefix directories |
1192 |
def __init__(self, transport, filename, mode, file_mode=None, |
1946.2.12
by John Arbash Meinel
Add ability to pass a directory mode to non_atomic_put |
1193 |
create_parent_dir=False, dir_mode=None): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1194 |
self._transport = transport |
1195 |
self._filename = filename |
|
1196 |
self._mode = mode |
|
1946.2.3
by John Arbash Meinel
Pass around the file mode correctly |
1197 |
self._file_mode = file_mode |
1946.2.12
by John Arbash Meinel
Add ability to pass a directory mode to non_atomic_put |
1198 |
self._dir_mode = dir_mode |
1946.2.1
by John Arbash Meinel
2 changes to knits. Delay creating the .knit or .kndx file until we have actually tried to write data. Because of this, we must allow the Knit to create the prefix directories |
1199 |
self._create_parent_dir = create_parent_dir |
1200 |
self._need_to_create = False |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1201 |
|
2196.2.5
by John Arbash Meinel
Add an exception class when the knit index storage method is unknown, and properly test for it |
1202 |
def _full_path(self): |
1203 |
"""Return the full path to this file."""
|
|
1204 |
return self._transport.base + self._filename |
|
1205 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1206 |
def check_header(self, fp): |
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
1207 |
line = fp.readline() |
2171.1.1
by John Arbash Meinel
Knit index files should ignore empty indexes rather than consider them corrupt. |
1208 |
if line == '': |
1209 |
# An empty file can actually be treated as though the file doesn't
|
|
1210 |
# exist yet.
|
|
2196.2.5
by John Arbash Meinel
Add an exception class when the knit index storage method is unknown, and properly test for it |
1211 |
raise errors.NoSuchFile(self._full_path()) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1212 |
if line != self.HEADER: |
2171.1.1
by John Arbash Meinel
Knit index files should ignore empty indexes rather than consider them corrupt. |
1213 |
raise KnitHeaderError(badline=line, |
1214 |
filename=self._transport.abspath(self._filename)) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1215 |
|
1216 |
def __repr__(self): |
|
1217 |
return '%s(%s)' % (self.__class__.__name__, self._filename) |
|
1218 |
||
1219 |
||
1220 |
class _KnitIndex(_KnitComponentFile): |
|
1221 |
"""Manages knit index file.
|
|
1222 |
||
1223 |
The index is already kept in memory and read on startup, to enable
|
|
1224 |
fast lookups of revision information. The cursor of the index
|
|
1225 |
file is always pointing to the end, making it easy to append
|
|
1226 |
entries.
|
|
1227 |
||
1228 |
_cache is a cache for fast mapping from version id to a Index
|
|
1229 |
object.
|
|
1230 |
||
1231 |
_history is a cache for fast mapping from indexes to version ids.
|
|
1232 |
||
1233 |
The index data format is dictionary compressed when it comes to
|
|
1234 |
parent references; a index entry may only have parents that with a
|
|
1235 |
lover index number. As a result, the index is topological sorted.
|
|
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
1236 |
|
1237 |
Duplicate entries may be written to the index for a single version id
|
|
1238 |
if this is done then the latter one completely replaces the former:
|
|
1239 |
this allows updates to correct version and parent information.
|
|
1240 |
Note that the two entries may share the delta, and that successive
|
|
1241 |
annotations and references MUST point to the first entry.
|
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
1242 |
|
1243 |
The index file on disc contains a header, followed by one line per knit
|
|
1244 |
record. The same revision can be present in an index file more than once.
|
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
1245 |
The first occurrence gets assigned a sequence number starting from 0.
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
1246 |
|
1247 |
The format of a single line is
|
|
1248 |
REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
|
|
1249 |
REVISION_ID is a utf8-encoded revision id
|
|
1250 |
FLAGS is a comma separated list of flags about the record. Values include
|
|
1251 |
no-eol, line-delta, fulltext.
|
|
1252 |
BYTE_OFFSET is the ascii representation of the byte offset in the data file
|
|
1253 |
that the the compressed data starts at.
|
|
1254 |
LENGTH is the ascii representation of the length of the data file.
|
|
1255 |
PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
|
|
1256 |
REVISION_ID.
|
|
1257 |
PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
|
|
1258 |
revision id already in the knit that is a parent of REVISION_ID.
|
|
1259 |
The ' :' marker is the end of record marker.
|
|
1260 |
|
|
1261 |
partial writes:
|
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1262 |
when a write is interrupted to the index file, it will result in a line
|
1263 |
that does not end in ' :'. If the ' :' is not present at the end of a line,
|
|
1264 |
or at the end of the file, then the record that is missing it will be
|
|
1265 |
ignored by the parser.
|
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
1266 |
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
1267 |
When writing new records to the index file, the data is preceded by '\n'
|
1641.1.2
by Robert Collins
Change knit index files to be robust in the presence of partial writes. |
1268 |
to ensure that records always start on new lines even if the last write was
|
1269 |
interrupted. As a result its normal for the last line in the index to be
|
|
1270 |
missing a trailing newline. One can be added with no harmful effects.
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1271 |
"""
|
1272 |
||
1666.1.6
by Robert Collins
Make knit the default format. |
1273 |
HEADER = "# bzr knit index 8\n" |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1274 |
|
1596.2.18
by Robert Collins
More microopimisations on index reading, now down to 16000 records/seconds. |
1275 |
# speed of knit parsing went from 280 ms to 280 ms with slots addition.
|
1276 |
# __slots__ = ['_cache', '_history', '_transport', '_filename']
|
|
1277 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1278 |
def _cache_version(self, version_id, options, pos, size, parents): |
1596.2.18
by Robert Collins
More microopimisations on index reading, now down to 16000 records/seconds. |
1279 |
"""Cache a version record in the history array and index cache.
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1280 |
|
1281 |
This is inlined into _load_data for performance. KEEP IN SYNC.
|
|
1596.2.18
by Robert Collins
More microopimisations on index reading, now down to 16000 records/seconds. |
1282 |
(It saves 60ms, 25% of the __init__ overhead on local 4000 record
|
1283 |
indexes).
|
|
1284 |
"""
|
|
1596.2.14
by Robert Collins
Make knit parsing non quadratic? |
1285 |
# only want the _history index to reference the 1st index entry
|
1286 |
# for version_id
|
|
1596.2.18
by Robert Collins
More microopimisations on index reading, now down to 16000 records/seconds. |
1287 |
if version_id not in self._cache: |
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
1288 |
index = len(self._history) |
1596.2.14
by Robert Collins
Make knit parsing non quadratic? |
1289 |
self._history.append(version_id) |
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
1290 |
else: |
1291 |
index = self._cache[version_id][5] |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1292 |
self._cache[version_id] = (version_id, |
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
1293 |
options, |
1294 |
pos, |
|
1295 |
size, |
|
1296 |
parents, |
|
1297 |
index) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1298 |
|
1946.2.1
by John Arbash Meinel
2 changes to knits. Delay creating the .knit or .kndx file until we have actually tried to write data. Because of this, we must allow the Knit to create the prefix directories |
1299 |
def __init__(self, transport, filename, mode, create=False, file_mode=None, |
1946.2.12
by John Arbash Meinel
Add ability to pass a directory mode to non_atomic_put |
1300 |
create_parent_dir=False, delay_create=False, dir_mode=None): |
1301 |
_KnitComponentFile.__init__(self, transport, filename, mode, |
|
1302 |
file_mode=file_mode, |
|
1303 |
create_parent_dir=create_parent_dir, |
|
1304 |
dir_mode=dir_mode) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1305 |
self._cache = {} |
1563.2.11
by Robert Collins
Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis. |
1306 |
# position in _history is the 'official' index for a revision
|
1307 |
# but the values may have come from a newer entry.
|
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
1308 |
# so - wc -l of a knit index is != the number of unique names
|
1773.4.1
by Martin Pool
Add pyflakes makefile target; fix many warnings |
1309 |
# in the knit.
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1310 |
self._history = [] |
1311 |
try: |
|
2247.2.1
by John Arbash Meinel
Don't create pb for simple knit reading. |
1312 |
fp = self._transport.get(self._filename) |
1594.2.17
by Robert Collins
Better readv coalescing, now with test, and progress during knit index reading. |
1313 |
try: |
2247.2.1
by John Arbash Meinel
Don't create pb for simple knit reading. |
1314 |
# _load_data may raise NoSuchFile if the target knit is
|
1315 |
# completely empty.
|
|
2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
1316 |
_load_data(self, fp) |
2247.2.1
by John Arbash Meinel
Don't create pb for simple knit reading. |
1317 |
finally: |
1318 |
fp.close() |
|
1319 |
except NoSuchFile: |
|
1320 |
if mode != 'w' or not create: |
|
1321 |
raise
|
|
1322 |
elif delay_create: |
|
1323 |
self._need_to_create = True |
|
1324 |
else: |
|
1325 |
self._transport.put_bytes_non_atomic( |
|
1326 |
self._filename, self.HEADER, mode=self._file_mode) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1327 |
|
1328 |
def get_graph(self): |
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1329 |
"""Return a list of the node:parents lists from this knit index."""
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1330 |
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()] |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1331 |
|
2530.1.1
by Aaron Bentley
Make topological sorting optional for get_ancestry |
1332 |
def get_ancestry(self, versions, topo_sorted=True): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1333 |
"""See VersionedFile.get_ancestry."""
|
1563.2.35
by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too. |
1334 |
# get a graph of all the mentioned versions:
|
1335 |
graph = {} |
|
1336 |
pending = set(versions) |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1337 |
cache = self._cache |
1338 |
while pending: |
|
1563.2.35
by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too. |
1339 |
version = pending.pop() |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1340 |
# trim ghosts
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1341 |
try: |
1342 |
parents = [p for p in cache[version][4] if p in cache] |
|
1343 |
except KeyError: |
|
1344 |
raise RevisionNotPresent(version, self._filename) |
|
1345 |
# if not completed and not a ghost
|
|
1346 |
pending.update([p for p in parents if p not in graph]) |
|
1563.2.35
by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too. |
1347 |
graph[version] = parents |
2530.1.1
by Aaron Bentley
Make topological sorting optional for get_ancestry |
1348 |
if not topo_sorted: |
1349 |
return graph.keys() |
|
1563.2.35
by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too. |
1350 |
return topo_sort(graph.items()) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1351 |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1352 |
def get_ancestry_with_ghosts(self, versions): |
1353 |
"""See VersionedFile.get_ancestry_with_ghosts."""
|
|
1354 |
# get a graph of all the mentioned versions:
|
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1355 |
self.check_versions_present(versions) |
1356 |
cache = self._cache |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1357 |
graph = {} |
1358 |
pending = set(versions) |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1359 |
while pending: |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1360 |
version = pending.pop() |
1361 |
try: |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1362 |
parents = cache[version][4] |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1363 |
except KeyError: |
1364 |
# ghost, fake it
|
|
1365 |
graph[version] = [] |
|
1366 |
else: |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1367 |
# if not completed
|
1368 |
pending.update([p for p in parents if p not in graph]) |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1369 |
graph[version] = parents |
1370 |
return topo_sort(graph.items()) |
|
1371 |
||
2592.3.43
by Robert Collins
A knit iter_parents API. |
1372 |
def iter_parents(self, version_ids): |
1373 |
"""Iterate through the parents for many version ids.
|
|
1374 |
||
1375 |
:param version_ids: An iterable yielding version_ids.
|
|
1376 |
:return: An iterator that yields (version_id, parents). Requested
|
|
1377 |
version_ids not present in the versioned file are simply skipped.
|
|
1378 |
The order is undefined, allowing for different optimisations in
|
|
1379 |
the underlying implementation.
|
|
1380 |
"""
|
|
1381 |
for version_id in version_ids: |
|
1382 |
try: |
|
1383 |
yield version_id, tuple(self.get_parents(version_id)) |
|
1384 |
except KeyError: |
|
1385 |
pass
|
|
1386 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1387 |
def num_versions(self): |
1388 |
return len(self._history) |
|
1389 |
||
1390 |
__len__ = num_versions |
|
1391 |
||
1392 |
def get_versions(self): |
|
2592.3.6
by Robert Collins
Implement KnitGraphIndex.get_versions. |
1393 |
"""Get all the versions in the file. not topologically sorted."""
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1394 |
return self._history |
1395 |
||
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1396 |
def _version_list_to_index(self, versions): |
1397 |
result_list = [] |
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1398 |
cache = self._cache |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1399 |
for version in versions: |
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1400 |
if version in cache: |
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
1401 |
# -- inlined lookup() --
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1402 |
result_list.append(str(cache[version][5])) |
1628.1.1
by Robert Collins
Cache the index number of versions in the knit index's self._cache so that |
1403 |
# -- end lookup () --
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1404 |
else: |
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
1405 |
result_list.append('.' + version) |
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1406 |
return ' '.join(result_list) |
1407 |
||
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1408 |
def add_version(self, version_id, options, index_memo, parents): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1409 |
"""Add a version record to the index."""
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1410 |
self.add_versions(((version_id, options, index_memo, parents),)) |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1411 |
|
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
1412 |
def add_versions(self, versions, random_id=False): |
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
1413 |
"""Add multiple versions to the index.
|
1414 |
|
|
1415 |
:param versions: a list of tuples:
|
|
1416 |
(version_id, options, pos, size, parents).
|
|
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
1417 |
:param random_id: If True the ids being added were randomly generated
|
1418 |
and no check for existence will be performed.
|
|
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
1419 |
"""
|
1420 |
lines = [] |
|
2102.2.1
by John Arbash Meinel
Fix bug #64789 _KnitIndex.add_versions() should dict compress new revisions |
1421 |
orig_history = self._history[:] |
1422 |
orig_cache = self._cache.copy() |
|
1423 |
||
1424 |
try: |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1425 |
for version_id, options, (index, pos, size), parents in versions: |
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
1426 |
line = "\n%s %s %s %s %s :" % (version_id, |
2102.2.1
by John Arbash Meinel
Fix bug #64789 _KnitIndex.add_versions() should dict compress new revisions |
1427 |
','.join(options), |
1428 |
pos, |
|
1429 |
size, |
|
1430 |
self._version_list_to_index(parents)) |
|
1431 |
assert isinstance(line, str), \ |
|
1432 |
'content must be utf-8 encoded: %r' % (line,) |
|
1433 |
lines.append(line) |
|
1434 |
self._cache_version(version_id, options, pos, size, parents) |
|
1435 |
if not self._need_to_create: |
|
1436 |
self._transport.append_bytes(self._filename, ''.join(lines)) |
|
1437 |
else: |
|
1438 |
sio = StringIO() |
|
1439 |
sio.write(self.HEADER) |
|
1440 |
sio.writelines(lines) |
|
1441 |
sio.seek(0) |
|
1442 |
self._transport.put_file_non_atomic(self._filename, sio, |
|
1443 |
create_parent_dir=self._create_parent_dir, |
|
1444 |
mode=self._file_mode, |
|
1445 |
dir_mode=self._dir_mode) |
|
1446 |
self._need_to_create = False |
|
1447 |
except: |
|
1448 |
# If any problems happen, restore the original values and re-raise
|
|
1449 |
self._history = orig_history |
|
1450 |
self._cache = orig_cache |
|
1451 |
raise
|
|
1452 |
||
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1453 |
def has_version(self, version_id): |
1454 |
"""True if the version is in the index."""
|
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1455 |
return version_id in self._cache |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1456 |
|
1457 |
def get_position(self, version_id): |
|
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1458 |
"""Return details needed to access the version.
|
1459 |
|
|
1460 |
.kndx indices do not support split-out data, so return None for the
|
|
1461 |
index field.
|
|
1462 |
||
1463 |
:return: a tuple (None, data position, size) to hand to the access
|
|
1464 |
logic to get the record.
|
|
1465 |
"""
|
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1466 |
entry = self._cache[version_id] |
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1467 |
return None, entry[2], entry[3] |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1468 |
|
1469 |
def get_method(self, version_id): |
|
1470 |
"""Return compression method of specified version."""
|
|
2592.3.97
by Robert Collins
Merge more bzr.dev, addressing some bugs. [still broken] |
1471 |
try: |
1472 |
options = self._cache[version_id][1] |
|
1473 |
except KeyError: |
|
1474 |
raise RevisionNotPresent(version_id, self._filename) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1475 |
if 'fulltext' in options: |
1476 |
return 'fulltext' |
|
1477 |
else: |
|
2196.2.5
by John Arbash Meinel
Add an exception class when the knit index storage method is unknown, and properly test for it |
1478 |
if 'line-delta' not in options: |
1479 |
raise errors.KnitIndexUnknownMethod(self._full_path(), options) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1480 |
return 'line-delta' |
1481 |
||
1482 |
def get_options(self, version_id): |
|
2592.3.14
by Robert Collins
Implement KnitGraphIndex.get_options. |
1483 |
"""Return a string represention options.
|
1484 |
||
1485 |
e.g. foo,bar
|
|
1486 |
"""
|
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1487 |
return self._cache[version_id][1] |
1488 |
||
1489 |
def get_parents(self, version_id): |
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1490 |
"""Return parents of specified version ignoring ghosts."""
|
1491 |
return [parent for parent in self._cache[version_id][4] |
|
1492 |
if parent in self._cache] |
|
1493 |
||
1494 |
def get_parents_with_ghosts(self, version_id): |
|
1759.2.1
by Jelmer Vernooij
Fix some types (found using aspell). |
1495 |
"""Return parents of specified version with ghosts."""
|
1594.2.8
by Robert Collins
add ghost aware apis to knits. |
1496 |
return self._cache[version_id][4] |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1497 |
|
1498 |
def check_versions_present(self, version_ids): |
|
1499 |
"""Check that all specified versions are present."""
|
|
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
1500 |
cache = self._cache |
1501 |
for version_id in version_ids: |
|
1502 |
if version_id not in cache: |
|
1503 |
raise RevisionNotPresent(version_id, self._filename) |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
1504 |
|
1505 |
||
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1506 |
class KnitGraphIndex(object): |
1507 |
"""A knit index that builds on GraphIndex."""
|
|
1508 |
||
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1509 |
def __init__(self, graph_index, deltas=False, parents=True, add_callback=None): |
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1510 |
"""Construct a KnitGraphIndex on a graph_index.
|
1511 |
||
1512 |
:param graph_index: An implementation of bzrlib.index.GraphIndex.
|
|
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
1513 |
:param deltas: Allow delta-compressed records.
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
1514 |
:param add_callback: If not None, allow additions to the index and call
|
1515 |
this callback with a list of added GraphIndex nodes:
|
|
2592.3.33
by Robert Collins
Change the order of index refs and values to make the no-graph knit index easier. |
1516 |
[(node, value, node_refs), ...]
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1517 |
:param parents: If True, record knits parents, if not do not record
|
1518 |
parents.
|
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1519 |
"""
|
1520 |
self._graph_index = graph_index |
|
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
1521 |
self._deltas = deltas |
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
1522 |
self._add_callback = add_callback |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1523 |
self._parents = parents |
1524 |
if deltas and not parents: |
|
1525 |
raise KnitCorrupt(self, "Cannot do delta compression without " |
|
1526 |
"parent tracking.") |
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1527 |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1528 |
def _get_entries(self, keys, check_present=False): |
1529 |
"""Get the entries for keys.
|
|
1530 |
|
|
1531 |
:param keys: An iterable of index keys, - 1-tuples.
|
|
1532 |
"""
|
|
1533 |
keys = set(keys) |
|
2592.3.43
by Robert Collins
A knit iter_parents API. |
1534 |
found_keys = set() |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1535 |
if self._parents: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1536 |
for node in self._graph_index.iter_entries(keys): |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1537 |
yield node |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1538 |
found_keys.add(node[1]) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1539 |
else: |
1540 |
# adapt parentless index to the rest of the code.
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1541 |
for node in self._graph_index.iter_entries(keys): |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1542 |
yield node[0], node[1], node[2], () |
1543 |
found_keys.add(node[1]) |
|
2592.3.43
by Robert Collins
A knit iter_parents API. |
1544 |
if check_present: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1545 |
missing_keys = keys.difference(found_keys) |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1546 |
if missing_keys: |
1547 |
raise RevisionNotPresent(missing_keys.pop(), self) |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1548 |
|
1549 |
def _present_keys(self, version_ids): |
|
1550 |
return set([ |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1551 |
node[1] for node in self._get_entries(version_ids)]) |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1552 |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1553 |
def _parentless_ancestry(self, versions): |
1554 |
"""Honour the get_ancestry API for parentless knit indices."""
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1555 |
wanted_keys = self._version_ids_to_keys(versions) |
1556 |
present_keys = self._present_keys(wanted_keys) |
|
1557 |
missing = set(wanted_keys).difference(present_keys) |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1558 |
if missing: |
1559 |
raise RevisionNotPresent(missing.pop(), self) |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1560 |
return list(self._keys_to_version_ids(present_keys)) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1561 |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1562 |
def get_ancestry(self, versions, topo_sorted=True): |
1563 |
"""See VersionedFile.get_ancestry."""
|
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1564 |
if not self._parents: |
1565 |
return self._parentless_ancestry(versions) |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1566 |
# XXX: This will do len(history) index calls - perhaps
|
1567 |
# it should be altered to be a index core feature?
|
|
1568 |
# get a graph of all the mentioned versions:
|
|
1569 |
graph = {} |
|
2592.3.30
by Robert Collins
Make GraphKnitIndex get_ancestry the same as regular knits. |
1570 |
ghosts = set() |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1571 |
versions = self._version_ids_to_keys(versions) |
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1572 |
pending = set(versions) |
1573 |
while pending: |
|
1574 |
# get all pending nodes
|
|
1575 |
this_iteration = pending |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1576 |
new_nodes = self._get_entries(this_iteration) |
2592.3.53
by Robert Collins
Remove usage of difference_update in knit.py. |
1577 |
found = set() |
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1578 |
pending = set() |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1579 |
for (index, key, value, node_refs) in new_nodes: |
2592.3.30
by Robert Collins
Make GraphKnitIndex get_ancestry the same as regular knits. |
1580 |
# dont ask for ghosties - otherwise
|
1581 |
# we we can end up looping with pending
|
|
1582 |
# being entirely ghosted.
|
|
1583 |
graph[key] = [parent for parent in node_refs[0] |
|
1584 |
if parent not in ghosts] |
|
2592.3.53
by Robert Collins
Remove usage of difference_update in knit.py. |
1585 |
# queue parents
|
1586 |
for parent in graph[key]: |
|
1587 |
# dont examine known nodes again
|
|
1588 |
if parent in graph: |
|
1589 |
continue
|
|
1590 |
pending.add(parent) |
|
1591 |
found.add(key) |
|
1592 |
ghosts.update(this_iteration.difference(found)) |
|
2592.3.30
by Robert Collins
Make GraphKnitIndex get_ancestry the same as regular knits. |
1593 |
if versions.difference(graph): |
1594 |
raise RevisionNotPresent(versions.difference(graph).pop(), self) |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1595 |
if topo_sorted: |
1596 |
result_keys = topo_sort(graph.items()) |
|
1597 |
else: |
|
1598 |
result_keys = graph.iterkeys() |
|
1599 |
return [key[0] for key in result_keys] |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1600 |
|
1601 |
def get_ancestry_with_ghosts(self, versions): |
|
1602 |
"""See VersionedFile.get_ancestry."""
|
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1603 |
if not self._parents: |
1604 |
return self._parentless_ancestry(versions) |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1605 |
# XXX: This will do len(history) index calls - perhaps
|
1606 |
# it should be altered to be a index core feature?
|
|
1607 |
# get a graph of all the mentioned versions:
|
|
1608 |
graph = {} |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1609 |
versions = self._version_ids_to_keys(versions) |
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1610 |
pending = set(versions) |
1611 |
while pending: |
|
1612 |
# get all pending nodes
|
|
1613 |
this_iteration = pending |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1614 |
new_nodes = self._get_entries(this_iteration) |
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1615 |
pending = set() |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1616 |
for (index, key, value, node_refs) in new_nodes: |
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1617 |
graph[key] = node_refs[0] |
1618 |
# queue parents
|
|
2592.3.53
by Robert Collins
Remove usage of difference_update in knit.py. |
1619 |
for parent in graph[key]: |
1620 |
# dont examine known nodes again
|
|
1621 |
if parent in graph: |
|
1622 |
continue
|
|
1623 |
pending.add(parent) |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1624 |
missing_versions = this_iteration.difference(graph) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1625 |
missing_needed = versions.intersection(missing_versions) |
1626 |
if missing_needed: |
|
1627 |
raise RevisionNotPresent(missing_needed.pop(), self) |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1628 |
for missing_version in missing_versions: |
1629 |
# add a key, no parents
|
|
1630 |
graph[missing_version] = [] |
|
2592.3.53
by Robert Collins
Remove usage of difference_update in knit.py. |
1631 |
pending.discard(missing_version) # don't look for it |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1632 |
result_keys = topo_sort(graph.items()) |
1633 |
return [key[0] for key in result_keys] |
|
2592.3.4
by Robert Collins
Implement get_ancestry/get_ancestry_with_ghosts for KnitGraphIndex. |
1634 |
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1635 |
def get_graph(self): |
1636 |
"""Return a list of the node:parents lists from this knit index."""
|
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1637 |
if not self._parents: |
1638 |
return [(key, ()) for key in self.get_versions()] |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1639 |
result = [] |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1640 |
for index, key, value, refs in self._graph_index.iter_all_entries(): |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1641 |
result.append((key[0], tuple([ref[0] for ref in refs[0]]))) |
1642 |
return result |
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1643 |
|
2592.3.43
by Robert Collins
A knit iter_parents API. |
1644 |
def iter_parents(self, version_ids): |
1645 |
"""Iterate through the parents for many version ids.
|
|
1646 |
||
1647 |
:param version_ids: An iterable yielding version_ids.
|
|
1648 |
:return: An iterator that yields (version_id, parents). Requested
|
|
1649 |
version_ids not present in the versioned file are simply skipped.
|
|
1650 |
The order is undefined, allowing for different optimisations in
|
|
1651 |
the underlying implementation.
|
|
1652 |
"""
|
|
1653 |
if self._parents: |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1654 |
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids))) |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1655 |
all_parents = set() |
1656 |
present_parents = set() |
|
1657 |
for node in all_nodes: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1658 |
all_parents.update(node[3][0]) |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1659 |
# any node we are querying must be present
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1660 |
present_parents.add(node[1]) |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1661 |
unknown_parents = all_parents.difference(present_parents) |
1662 |
present_parents.update(self._present_keys(unknown_parents)) |
|
1663 |
for node in all_nodes: |
|
1664 |
parents = [] |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1665 |
for parent in node[3][0]: |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1666 |
if parent in present_parents: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1667 |
parents.append(parent[0]) |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1668 |
yield node[1][0], tuple(parents) |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1669 |
else: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1670 |
for node in self._get_entries(self._version_ids_to_keys(version_ids)): |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1671 |
yield node[1][0], () |
2592.3.43
by Robert Collins
A knit iter_parents API. |
1672 |
|
2592.3.5
by Robert Collins
Implement KnitGraphIndex.num_versions. |
1673 |
def num_versions(self): |
1674 |
return len(list(self._graph_index.iter_all_entries())) |
|
2592.3.2
by Robert Collins
Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API. |
1675 |
|
2592.3.6
by Robert Collins
Implement KnitGraphIndex.get_versions. |
1676 |
__len__ = num_versions |
1677 |
||
1678 |
def get_versions(self): |
|
1679 |
"""Get all the versions in the file. not topologically sorted."""
|
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1680 |
return [node[1][0] for node in self._graph_index.iter_all_entries()] |
2592.3.6
by Robert Collins
Implement KnitGraphIndex.get_versions. |
1681 |
|
2592.3.9
by Robert Collins
Implement KnitGraphIndex.has_version. |
1682 |
def has_version(self, version_id): |
1683 |
"""True if the version is in the index."""
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1684 |
return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1 |
1685 |
||
1686 |
def _keys_to_version_ids(self, keys): |
|
1687 |
return tuple(key[0] for key in keys) |
|
2592.3.6
by Robert Collins
Implement KnitGraphIndex.get_versions. |
1688 |
|
2592.3.10
by Robert Collins
Implement KnitGraphIndex.get_position. |
1689 |
def get_position(self, version_id): |
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1690 |
"""Return details needed to access the version.
|
1691 |
|
|
1692 |
:return: a tuple (index, data position, size) to hand to the access
|
|
1693 |
logic to get the record.
|
|
1694 |
"""
|
|
1695 |
node = self._get_node(version_id) |
|
1696 |
bits = node[2][1:].split(' ') |
|
1697 |
return node[0], int(bits[0]), int(bits[1]) |
|
2592.3.10
by Robert Collins
Implement KnitGraphIndex.get_position. |
1698 |
|
2592.3.11
by Robert Collins
Implement KnitGraphIndex.get_method. |
1699 |
def get_method(self, version_id): |
1700 |
"""Return compression method of specified version."""
|
|
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
1701 |
if not self._deltas: |
2592.3.11
by Robert Collins
Implement KnitGraphIndex.get_method. |
1702 |
return 'fulltext' |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1703 |
return self._parent_compression(self._get_node(version_id)[3][1]) |
2592.3.14
by Robert Collins
Implement KnitGraphIndex.get_options. |
1704 |
|
1705 |
def _parent_compression(self, reference_list): |
|
1706 |
# use the second reference list to decide if this is delta'd or not.
|
|
1707 |
if len(reference_list): |
|
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
1708 |
return 'line-delta' |
2592.3.11
by Robert Collins
Implement KnitGraphIndex.get_method. |
1709 |
else: |
2592.3.13
by Robert Collins
Implement KnitGraphIndex.get_method. |
1710 |
return 'fulltext' |
2592.3.11
by Robert Collins
Implement KnitGraphIndex.get_method. |
1711 |
|
1712 |
def _get_node(self, version_id): |
|
2592.3.97
by Robert Collins
Merge more bzr.dev, addressing some bugs. [still broken] |
1713 |
try: |
1714 |
return list(self._get_entries(self._version_ids_to_keys([version_id])))[0] |
|
1715 |
except IndexError: |
|
1716 |
raise RevisionNotPresent(version_id, self) |
|
2592.3.11
by Robert Collins
Implement KnitGraphIndex.get_method. |
1717 |
|
2592.3.14
by Robert Collins
Implement KnitGraphIndex.get_options. |
1718 |
def get_options(self, version_id): |
1719 |
"""Return a string represention options.
|
|
1720 |
||
1721 |
e.g. foo,bar
|
|
1722 |
"""
|
|
1723 |
node = self._get_node(version_id) |
|
1724 |
if not self._deltas: |
|
1725 |
options = ['fulltext'] |
|
1726 |
else: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1727 |
options = [self._parent_compression(node[3][1])] |
1728 |
if node[2][0] == 'N': |
|
2592.3.14
by Robert Collins
Implement KnitGraphIndex.get_options. |
1729 |
options.append('no-eol') |
2658.2.1
by Robert Collins
Fix mismatch between KnitGraphIndex and KnitIndex in get_options. |
1730 |
return options |
2592.3.11
by Robert Collins
Implement KnitGraphIndex.get_method. |
1731 |
|
2592.3.15
by Robert Collins
Implement KnitGraphIndex.get_parents/get_parents_with_ghosts. |
1732 |
def get_parents(self, version_id): |
1733 |
"""Return parents of specified version ignoring ghosts."""
|
|
2592.3.43
by Robert Collins
A knit iter_parents API. |
1734 |
parents = list(self.iter_parents([version_id])) |
1735 |
if not parents: |
|
1736 |
# missing key
|
|
1737 |
raise errors.RevisionNotPresent(version_id, self) |
|
1738 |
return parents[0][1] |
|
2592.3.15
by Robert Collins
Implement KnitGraphIndex.get_parents/get_parents_with_ghosts. |
1739 |
|
1740 |
def get_parents_with_ghosts(self, version_id): |
|
1741 |
"""Return parents of specified version with ghosts."""
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1742 |
nodes = list(self._get_entries(self._version_ids_to_keys([version_id]), |
1743 |
check_present=True)) |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1744 |
if not self._parents: |
1745 |
return () |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1746 |
return self._keys_to_version_ids(nodes[0][3][0]) |
2592.3.15
by Robert Collins
Implement KnitGraphIndex.get_parents/get_parents_with_ghosts. |
1747 |
|
2592.3.16
by Robert Collins
Implement KnitGraphIndex.check_versions_present. |
1748 |
def check_versions_present(self, version_ids): |
1749 |
"""Check that all specified versions are present."""
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1750 |
keys = self._version_ids_to_keys(version_ids) |
1751 |
present = self._present_keys(keys) |
|
1752 |
missing = keys.difference(present) |
|
2592.3.16
by Robert Collins
Implement KnitGraphIndex.check_versions_present. |
1753 |
if missing: |
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
1754 |
raise RevisionNotPresent(missing.pop(), self) |
2592.3.16
by Robert Collins
Implement KnitGraphIndex.check_versions_present. |
1755 |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1756 |
def add_version(self, version_id, options, access_memo, parents): |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1757 |
"""Add a version record to the index."""
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1758 |
return self.add_versions(((version_id, options, access_memo, parents),)) |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1759 |
|
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
1760 |
def add_versions(self, versions, random_id=False): |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1761 |
"""Add multiple versions to the index.
|
1762 |
|
|
1763 |
This function does not insert data into the Immutable GraphIndex
|
|
1764 |
backing the KnitGraphIndex, instead it prepares data for insertion by
|
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
1765 |
the caller and checks that it is safe to insert then calls
|
1766 |
self._add_callback with the prepared GraphIndex nodes.
|
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1767 |
|
1768 |
:param versions: a list of tuples:
|
|
1769 |
(version_id, options, pos, size, parents).
|
|
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
1770 |
:param random_id: If True the ids being added were randomly generated
|
1771 |
and no check for existence will be performed.
|
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1772 |
"""
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
1773 |
if not self._add_callback: |
1774 |
raise errors.ReadOnlyError(self) |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1775 |
# we hope there are no repositories with inconsistent parentage
|
1776 |
# anymore.
|
|
1777 |
# check for dups
|
|
1778 |
||
1779 |
keys = {} |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1780 |
for (version_id, options, access_memo, parents) in versions: |
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1781 |
index, pos, size = access_memo |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1782 |
key = (version_id, ) |
1783 |
parents = tuple((parent, ) for parent in parents) |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1784 |
if 'no-eol' in options: |
1785 |
value = 'N' |
|
1786 |
else: |
|
1787 |
value = ' ' |
|
1788 |
value += "%d %d" % (pos, size) |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1789 |
if not self._deltas: |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1790 |
if 'line-delta' in options: |
1791 |
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit") |
|
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1792 |
if self._parents: |
1793 |
if self._deltas: |
|
1794 |
if 'line-delta' in options: |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1795 |
node_refs = (parents, (parents[0],)) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1796 |
else: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1797 |
node_refs = (parents, ()) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1798 |
else: |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1799 |
node_refs = (parents, ) |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1800 |
else: |
1801 |
if parents: |
|
1802 |
raise KnitCorrupt(self, "attempt to add node with parents " |
|
1803 |
"in parentless index.") |
|
1804 |
node_refs = () |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1805 |
keys[key] = (value, node_refs) |
2841.2.1
by Robert Collins
* Commit no longer checks for new text keys during insertion when the |
1806 |
if not random_id: |
1807 |
present_nodes = self._get_entries(keys) |
|
1808 |
for (index, key, value, node_refs) in present_nodes: |
|
1809 |
if (value, node_refs) != keys[key]: |
|
1810 |
raise KnitCorrupt(self, "inconsistent details in add_versions" |
|
1811 |
": %s %s" % ((value, node_refs), keys[key])) |
|
1812 |
del keys[key] |
|
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1813 |
result = [] |
2592.3.34
by Robert Collins
Rough unfactored support for parentless KnitGraphIndexs. |
1814 |
if self._parents: |
1815 |
for key, (value, node_refs) in keys.iteritems(): |
|
1816 |
result.append((key, value, node_refs)) |
|
1817 |
else: |
|
1818 |
for key, (value, node_refs) in keys.iteritems(): |
|
1819 |
result.append((key, value)) |
|
2592.3.19
by Robert Collins
Change KnitGraphIndex from returning data to performing a callback on insertions. |
1820 |
self._add_callback(result) |
2592.3.17
by Robert Collins
Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile. |
1821 |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
1822 |
def _version_ids_to_keys(self, version_ids): |
1823 |
return set((version_id, ) for version_id in version_ids) |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1824 |
|
1825 |
||
1826 |
class _KnitAccess(object): |
|
1827 |
"""Access to knit records in a .knit file."""
|
|
1828 |
||
1829 |
def __init__(self, transport, filename, _file_mode, _dir_mode, |
|
1830 |
_need_to_create, _create_parent_dir): |
|
1831 |
"""Create a _KnitAccess for accessing and inserting data.
|
|
1832 |
||
1833 |
:param transport: The transport the .knit is located on.
|
|
1834 |
:param filename: The filename of the .knit.
|
|
1835 |
"""
|
|
1836 |
self._transport = transport |
|
1837 |
self._filename = filename |
|
1838 |
self._file_mode = _file_mode |
|
1839 |
self._dir_mode = _dir_mode |
|
1840 |
self._need_to_create = _need_to_create |
|
1841 |
self._create_parent_dir = _create_parent_dir |
|
1842 |
||
1843 |
def add_raw_records(self, sizes, raw_data): |
|
1844 |
"""Add raw knit bytes to a storage area.
|
|
1845 |
||
1846 |
The data is spooled to whereever the access method is storing data.
|
|
1847 |
||
1848 |
:param sizes: An iterable containing the size of each raw data segment.
|
|
1849 |
:param raw_data: A bytestring containing the data.
|
|
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1850 |
:return: A list of memos to retrieve the record later. Each memo is a
|
1851 |
tuple - (index, pos, length), where the index field is always None
|
|
1852 |
for the .knit access method.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1853 |
"""
|
1854 |
assert type(raw_data) == str, \ |
|
1855 |
'data must be plain bytes was %s' % type(raw_data) |
|
1856 |
if not self._need_to_create: |
|
1857 |
base = self._transport.append_bytes(self._filename, raw_data) |
|
1858 |
else: |
|
1859 |
self._transport.put_bytes_non_atomic(self._filename, raw_data, |
|
1860 |
create_parent_dir=self._create_parent_dir, |
|
1861 |
mode=self._file_mode, |
|
1862 |
dir_mode=self._dir_mode) |
|
1863 |
self._need_to_create = False |
|
1864 |
base = 0 |
|
1865 |
result = [] |
|
1866 |
for size in sizes: |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1867 |
result.append((None, base, size)) |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1868 |
base += size |
1869 |
return result |
|
1870 |
||
1871 |
def create(self): |
|
1872 |
"""IFF this data access has its own storage area, initialise it.
|
|
1873 |
||
1874 |
:return: None.
|
|
1875 |
"""
|
|
1876 |
self._transport.put_bytes_non_atomic(self._filename, '', |
|
1877 |
mode=self._file_mode) |
|
1878 |
||
1879 |
def open_file(self): |
|
1880 |
"""IFF this data access can be represented as a single file, open it.
|
|
1881 |
||
1882 |
For knits that are not mapped to a single file on disk this will
|
|
1883 |
always return None.
|
|
1884 |
||
1885 |
:return: None or a file handle.
|
|
1886 |
"""
|
|
1887 |
try: |
|
1888 |
return self._transport.get(self._filename) |
|
1889 |
except NoSuchFile: |
|
1890 |
pass
|
|
1891 |
return None |
|
1892 |
||
1893 |
def get_raw_records(self, memos_for_retrieval): |
|
1894 |
"""Get the raw bytes for a records.
|
|
1895 |
||
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1896 |
:param memos_for_retrieval: An iterable containing the (index, pos,
|
1897 |
length) memo for retrieving the bytes. The .knit method ignores
|
|
1898 |
the index as there is always only a single file.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1899 |
:return: An iterator over the bytes of the records.
|
1900 |
"""
|
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
1901 |
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval] |
1902 |
for pos, data in self._transport.readv(self._filename, read_vector): |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1903 |
yield data |
1904 |
||
1905 |
||
1906 |
class _PackAccess(object): |
|
1907 |
"""Access to knit records via a collection of packs."""
|
|
1908 |
||
1909 |
def __init__(self, index_to_packs, writer=None): |
|
1910 |
"""Create a _PackAccess object.
|
|
1911 |
||
1912 |
:param index_to_packs: A dict mapping index objects to the transport
|
|
1913 |
and file names for obtaining data.
|
|
1914 |
:param writer: A tuple (pack.ContainerWriter, write_index) which
|
|
2670.2.3
by Robert Collins
Review feedback. |
1915 |
contains the pack to write, and the index that reads from it will
|
1916 |
be associated with.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1917 |
"""
|
1918 |
if writer: |
|
1919 |
self.container_writer = writer[0] |
|
1920 |
self.write_index = writer[1] |
|
1921 |
else: |
|
1922 |
self.container_writer = None |
|
1923 |
self.write_index = None |
|
1924 |
self.indices = index_to_packs |
|
1925 |
||
1926 |
def add_raw_records(self, sizes, raw_data): |
|
1927 |
"""Add raw knit bytes to a storage area.
|
|
1928 |
||
2670.2.3
by Robert Collins
Review feedback. |
1929 |
The data is spooled to the container writer in one bytes-record per
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1930 |
raw data item.
|
1931 |
||
1932 |
:param sizes: An iterable containing the size of each raw data segment.
|
|
1933 |
:param raw_data: A bytestring containing the data.
|
|
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1934 |
:return: A list of memos to retrieve the record later. Each memo is a
|
1935 |
tuple - (index, pos, length), where the index field is the
|
|
1936 |
write_index object supplied to the PackAccess object.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1937 |
"""
|
1938 |
assert type(raw_data) == str, \ |
|
1939 |
'data must be plain bytes was %s' % type(raw_data) |
|
1940 |
result = [] |
|
1941 |
offset = 0 |
|
1942 |
for size in sizes: |
|
1943 |
p_offset, p_length = self.container_writer.add_bytes_record( |
|
1944 |
raw_data[offset:offset+size], []) |
|
1945 |
offset += size |
|
1946 |
result.append((self.write_index, p_offset, p_length)) |
|
1947 |
return result |
|
1948 |
||
1949 |
def create(self): |
|
1950 |
"""Pack based knits do not get individually created."""
|
|
1951 |
||
1952 |
def get_raw_records(self, memos_for_retrieval): |
|
1953 |
"""Get the raw bytes for a records.
|
|
1954 |
||
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1955 |
:param memos_for_retrieval: An iterable containing the (index, pos,
|
1956 |
length) memo for retrieving the bytes. The Pack access method
|
|
1957 |
looks up the pack to use for a given record in its index_to_pack
|
|
1958 |
map.
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1959 |
:return: An iterator over the bytes of the records.
|
1960 |
"""
|
|
1961 |
# first pass, group into same-index requests
|
|
1962 |
request_lists = [] |
|
1963 |
current_index = None |
|
1964 |
for (index, offset, length) in memos_for_retrieval: |
|
1965 |
if current_index == index: |
|
1966 |
current_list.append((offset, length)) |
|
1967 |
else: |
|
1968 |
if current_index is not None: |
|
1969 |
request_lists.append((current_index, current_list)) |
|
1970 |
current_index = index |
|
1971 |
current_list = [(offset, length)] |
|
1972 |
# handle the last entry
|
|
1973 |
if current_index is not None: |
|
1974 |
request_lists.append((current_index, current_list)) |
|
1975 |
for index, offsets in request_lists: |
|
1976 |
transport, path = self.indices[index] |
|
1977 |
reader = pack.make_readv_reader(transport, path, offsets) |
|
1978 |
for names, read_func in reader.iter_records(): |
|
1979 |
yield read_func(None) |
|
1980 |
||
1981 |
def open_file(self): |
|
1982 |
"""Pack based knits have no single file."""
|
|
1983 |
return None |
|
1984 |
||
2592.3.70
by Robert Collins
Allow setting a writer after creating a knit._PackAccess object. |
1985 |
def set_writer(self, writer, index, (transport, packname)): |
1986 |
"""Set a writer to use for adding data."""
|
|
2592.3.208
by Robert Collins
Start refactoring the knit-pack thunking to be clearer. |
1987 |
if index is not None: |
1988 |
self.indices[index] = (transport, packname) |
|
2592.3.70
by Robert Collins
Allow setting a writer after creating a knit._PackAccess object. |
1989 |
self.container_writer = writer |
1990 |
self.write_index = index |
|
1991 |
||
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1992 |
|
1993 |
class _KnitData(object): |
|
2670.2.2
by Robert Collins
* In ``bzrlib.knit`` the internal interface has been altered to use |
1994 |
"""Manage extraction of data from a KnitAccess, caching and decompressing.
|
1995 |
|
|
1996 |
The KnitData class provides the logic for parsing and using knit records,
|
|
1997 |
making use of an access method for the low level read and write operations.
|
|
1998 |
"""
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
1999 |
|
2000 |
def __init__(self, access): |
|
2001 |
"""Create a KnitData object.
|
|
2002 |
||
2003 |
:param access: The access method to use. Access methods such as
|
|
2004 |
_KnitAccess manage the insertion of raw records and the subsequent
|
|
2005 |
retrieval of the same.
|
|
2006 |
"""
|
|
2007 |
self._access = access |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2008 |
self._checked = False |
1863.1.8
by John Arbash Meinel
Removing disk-backed-cache |
2009 |
# TODO: jam 20060713 conceptually, this could spill to disk
|
2010 |
# if the cached size gets larger than a certain amount
|
|
2011 |
# but it complicates the model a bit, so for now just use
|
|
2012 |
# a simple dictionary
|
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2013 |
self._cache = {} |
2014 |
self._do_cache = False |
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2015 |
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2016 |
def enable_cache(self): |
2017 |
"""Enable caching of reads."""
|
|
1863.1.8
by John Arbash Meinel
Removing disk-backed-cache |
2018 |
self._do_cache = True |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2019 |
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2020 |
def clear_cache(self): |
2021 |
"""Clear the record cache."""
|
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2022 |
self._do_cache = False |
2023 |
self._cache = {} |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2024 |
|
2025 |
def _open_file(self): |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2026 |
return self._access.open_file() |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2027 |
|
2888.1.1
by Robert Collins
(robertc) Use prejoined content for knit storage when performing a full-text store of unannotated content. (Robert Collins) |
2028 |
def _record_to_data(self, version_id, digest, lines, dense_lines=None): |
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2029 |
"""Convert version_id, digest, lines into a raw data block.
|
2030 |
|
|
2888.1.2
by Robert Collins
Cleanup the dense_lines parameter docstring to be more useful. |
2031 |
:param dense_lines: The bytes of lines but in a denser form. For
|
2032 |
instance, if lines is a list of 1000 bytestrings each ending in \n,
|
|
2033 |
dense_lines may be a list with one line in it, containing all the
|
|
2034 |
1000's lines and their \n's. Using dense_lines if it is already
|
|
2035 |
known is a win because the string join to create bytes in this
|
|
2036 |
function spends less time resizing the final string.
|
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2037 |
:return: (len, a StringIO instance with the raw data ready to read.)
|
2038 |
"""
|
|
2888.1.1
by Robert Collins
(robertc) Use prejoined content for knit storage when performing a full-text store of unannotated content. (Robert Collins) |
2039 |
# Note: using a string copy here increases memory pressure with e.g.
|
2040 |
# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine
|
|
2041 |
# when doing the initial commit of a mozilla tree. RBC 20070921
|
|
2042 |
bytes = ''.join(chain( |
|
2249.5.15
by John Arbash Meinel
remove get_cached_utf8 checks which were slowing things down. |
2043 |
["version %s %d %s\n" % (version_id, |
1596.2.28
by Robert Collins
more knit profile based tuning. |
2044 |
len(lines), |
2045 |
digest)], |
|
2888.1.1
by Robert Collins
(robertc) Use prejoined content for knit storage when performing a full-text store of unannotated content. (Robert Collins) |
2046 |
dense_lines or lines, |
2047 |
["end %s\n" % version_id])) |
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
2048 |
assert bytes.__class__ == str |
2049 |
compressed_bytes = bytes_to_gzip(bytes) |
|
2050 |
return len(compressed_bytes), compressed_bytes |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2051 |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2052 |
def add_raw_records(self, sizes, raw_data): |
1692.4.1
by Robert Collins
Multiple merges: |
2053 |
"""Append a prepared record to the data file.
|
2329.1.2
by John Arbash Meinel
Remove some spurious whitespace changes. |
2054 |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2055 |
:param sizes: An iterable containing the size of each raw data segment.
|
2056 |
:param raw_data: A bytestring containing the data.
|
|
2057 |
:return: a list of index data for the way the data was stored.
|
|
2058 |
See the access method add_raw_records documentation for more
|
|
2059 |
details.
|
|
1692.4.1
by Robert Collins
Multiple merges: |
2060 |
"""
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2061 |
return self._access.add_raw_records(sizes, raw_data) |
2329.1.2
by John Arbash Meinel
Remove some spurious whitespace changes. |
2062 |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2063 |
def _parse_record_header(self, version_id, raw_data): |
2064 |
"""Parse a record header for consistency.
|
|
2065 |
||
2066 |
:return: the header and the decompressor stream.
|
|
2067 |
as (stream, header_record)
|
|
2068 |
"""
|
|
2069 |
df = GzipFile(mode='rb', fileobj=StringIO(raw_data)) |
|
2329.1.1
by John Arbash Meinel
Update _KnitData parser to raise more helpful errors when it detects corruption. |
2070 |
try: |
2071 |
rec = self._check_header(version_id, df.readline()) |
|
2358.3.4
by Martin Pool
Fix mangled knit.py changes |
2072 |
except Exception, e: |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2073 |
raise KnitCorrupt(self._access, |
2329.1.1
by John Arbash Meinel
Update _KnitData parser to raise more helpful errors when it detects corruption. |
2074 |
"While reading {%s} got %s(%s)" |
2075 |
% (version_id, e.__class__.__name__, str(e))) |
|
2358.3.4
by Martin Pool
Fix mangled knit.py changes |
2076 |
return df, rec |
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2077 |
|
2358.3.4
by Martin Pool
Fix mangled knit.py changes |
2078 |
def _check_header(self, version_id, line): |
2079 |
rec = line.split() |
|
2080 |
if len(rec) != 4: |
|
2081 |
raise KnitCorrupt(self._access, |
|
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2082 |
'unexpected number of elements in record header') |
2249.5.12
by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8 |
2083 |
if rec[1] != version_id: |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2084 |
raise KnitCorrupt(self._access, |
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2085 |
'unexpected version, wanted %r, got %r' |
2086 |
% (version_id, rec[1])) |
|
2087 |
return rec |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2088 |
|
2089 |
def _parse_record(self, version_id, data): |
|
1628.1.2
by Robert Collins
More knit micro-optimisations. |
2090 |
# profiling notes:
|
2091 |
# 4168 calls in 2880 217 internal
|
|
2092 |
# 4168 calls to _parse_record_header in 2121
|
|
2093 |
# 4168 calls to readlines in 330
|
|
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2094 |
df = GzipFile(mode='rb', fileobj=StringIO(data)) |
2095 |
||
2329.1.1
by John Arbash Meinel
Update _KnitData parser to raise more helpful errors when it detects corruption. |
2096 |
try: |
2097 |
record_contents = df.readlines() |
|
2358.3.4
by Martin Pool
Fix mangled knit.py changes |
2098 |
except Exception, e: |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2099 |
raise KnitCorrupt(self._access, |
2329.1.1
by John Arbash Meinel
Update _KnitData parser to raise more helpful errors when it detects corruption. |
2100 |
"While reading {%s} got %s(%s)" |
2101 |
% (version_id, e.__class__.__name__, str(e))) |
|
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2102 |
header = record_contents.pop(0) |
2103 |
rec = self._check_header(version_id, header) |
|
2104 |
||
2105 |
last_line = record_contents.pop() |
|
2329.1.1
by John Arbash Meinel
Update _KnitData parser to raise more helpful errors when it detects corruption. |
2106 |
if len(record_contents) != int(rec[2]): |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2107 |
raise KnitCorrupt(self._access, |
2329.1.1
by John Arbash Meinel
Update _KnitData parser to raise more helpful errors when it detects corruption. |
2108 |
'incorrect number of lines %s != %s' |
2109 |
' for version {%s}' |
|
2110 |
% (len(record_contents), int(rec[2]), |
|
2111 |
version_id)) |
|
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2112 |
if last_line != 'end %s\n' % rec[1]: |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2113 |
raise KnitCorrupt(self._access, |
2163.2.4
by John Arbash Meinel
Split _KnitData._parse_header up, so that we have 1 readlines() call, rather than readline+readlines() |
2114 |
'unexpected version end line %r, wanted %r' |
2115 |
% (last_line, version_id)) |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2116 |
df.close() |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2117 |
return record_contents, rec[3] |
2118 |
||
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2119 |
def read_records_iter_raw(self, records): |
2120 |
"""Read text records from data file and yield raw data.
|
|
2121 |
||
2122 |
This unpacks enough of the text record to validate the id is
|
|
2123 |
as expected but thats all.
|
|
2124 |
"""
|
|
2125 |
# setup an iterator of the external records:
|
|
2126 |
# uses readv so nice and fast we hope.
|
|
1756.3.23
by Aaron Bentley
Remove knit caches |
2127 |
if len(records): |
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2128 |
# grab the disk data needed.
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2129 |
if self._cache: |
2130 |
# Don't check _cache if it is empty
|
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
2131 |
needed_offsets = [index_memo for version_id, index_memo |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2132 |
in records |
2133 |
if version_id not in self._cache] |
|
2134 |
else: |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
2135 |
needed_offsets = [index_memo for version_id, index_memo |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2136 |
in records] |
2137 |
||
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2138 |
raw_records = self._access.get_raw_records(needed_offsets) |
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2139 |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
2140 |
for version_id, index_memo in records: |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2141 |
if version_id in self._cache: |
2142 |
# This data has already been validated
|
|
2143 |
data = self._cache[version_id] |
|
2144 |
else: |
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2145 |
data = raw_records.next() |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2146 |
if self._do_cache: |
2147 |
self._cache[version_id] = data |
|
2148 |
||
2149 |
# validate the header
|
|
2150 |
df, rec = self._parse_record_header(version_id, data) |
|
2151 |
df.close() |
|
1756.3.23
by Aaron Bentley
Remove knit caches |
2152 |
yield version_id, data |
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2153 |
|
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2154 |
def read_records_iter(self, records): |
2155 |
"""Read text records from data file and yield result.
|
|
2156 |
||
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2157 |
The result will be returned in whatever is the fastest to read.
|
2158 |
Not by the order requested. Also, multiple requests for the same
|
|
2159 |
record will only yield 1 response.
|
|
2160 |
:param records: A list of (version_id, pos, len) entries
|
|
2161 |
:return: Yields (version_id, contents, digest) in the order
|
|
2162 |
read, not the order requested
|
|
2163 |
"""
|
|
2164 |
if not records: |
|
2165 |
return
|
|
2166 |
||
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2167 |
if self._cache: |
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2168 |
# Skip records we have alread seen
|
2169 |
yielded_records = set() |
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2170 |
needed_records = set() |
2171 |
for record in records: |
|
2172 |
if record[0] in self._cache: |
|
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2173 |
if record[0] in yielded_records: |
2174 |
continue
|
|
2175 |
yielded_records.add(record[0]) |
|
2176 |
data = self._cache[record[0]] |
|
2177 |
content, digest = self._parse_record(record[0], data) |
|
2178 |
yield (record[0], content, digest) |
|
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2179 |
else: |
2180 |
needed_records.add(record) |
|
2181 |
needed_records = sorted(needed_records, key=operator.itemgetter(1)) |
|
2182 |
else: |
|
2183 |
needed_records = sorted(set(records), key=operator.itemgetter(1)) |
|
1756.3.23
by Aaron Bentley
Remove knit caches |
2184 |
|
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2185 |
if not needed_records: |
2186 |
return
|
|
2187 |
||
2188 |
# The transport optimizes the fetching as well
|
|
2189 |
# (ie, reads continuous ranges.)
|
|
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2190 |
raw_data = self._access.get_raw_records( |
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
2191 |
[index_memo for version_id, index_memo in needed_records]) |
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2192 |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
2193 |
for (version_id, index_memo), data in \ |
2592.3.66
by Robert Collins
Allow adaption of KnitData to pack files. |
2194 |
izip(iter(needed_records), raw_data): |
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2195 |
content, digest = self._parse_record(version_id, data) |
1863.1.1
by John Arbash Meinel
Allow Versioned files to do caching if explicitly asked, and implement for Knit |
2196 |
if self._do_cache: |
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2197 |
self._cache[version_id] = data |
1756.3.23
by Aaron Bentley
Remove knit caches |
2198 |
yield version_id, content, digest |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2199 |
|
2200 |
def read_records(self, records): |
|
2201 |
"""Read records into a dictionary."""
|
|
2202 |
components = {} |
|
1863.1.5
by John Arbash Meinel
Add a read_records_iter_unsorted, which can return records in any order. |
2203 |
for record_id, content, digest in \ |
1863.1.9
by John Arbash Meinel
Switching to have 'read_records_iter' return in random order. |
2204 |
self.read_records_iter(records): |
1563.2.4
by Robert Collins
First cut at including the knit implementation of versioned_file. |
2205 |
components[record_id] = (content, digest) |
2206 |
return components |
|
2207 |
||
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
2208 |
|
2209 |
class InterKnit(InterVersionedFile): |
|
2210 |
"""Optimised code paths for knit to knit operations."""
|
|
2211 |
||
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
2212 |
_matching_file_from_factory = KnitVersionedFile |
2213 |
_matching_file_to_factory = KnitVersionedFile |
|
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
2214 |
|
2215 |
@staticmethod
|
|
2216 |
def is_compatible(source, target): |
|
2217 |
"""Be compatible with knits. """
|
|
2218 |
try: |
|
2219 |
return (isinstance(source, KnitVersionedFile) and |
|
2220 |
isinstance(target, KnitVersionedFile)) |
|
2221 |
except AttributeError: |
|
2222 |
return False |
|
2223 |
||
1563.2.31
by Robert Collins
Convert Knit repositories to use knits. |
2224 |
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False): |
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
2225 |
"""See InterVersionedFile.join."""
|
2226 |
assert isinstance(self.source, KnitVersionedFile) |
|
2227 |
assert isinstance(self.target, KnitVersionedFile) |
|
2228 |
||
2851.4.3
by Ian Clatworthy
fix up plain-to-annotated knit conversion |
2229 |
# If the source and target are mismatched w.r.t. annotations vs
|
2230 |
# plain, the data needs to be converted accordingly
|
|
2231 |
if self.source.factory.annotated == self.target.factory.annotated: |
|
2232 |
converter = None |
|
2233 |
elif self.source.factory.annotated: |
|
2234 |
converter = self._anno_to_plain_converter |
|
2235 |
else: |
|
2236 |
# We're converting from a plain to an annotated knit. This requires
|
|
2237 |
# building the annotations from scratch. The generic join code
|
|
2238 |
# handles this implicitly so we delegate to it.
|
|
2239 |
return super(InterKnit, self).join(pb, msg, version_ids, |
|
2240 |
ignore_missing) |
|
2241 |
||
1684.3.2
by Robert Collins
Factor out version_ids-to-join selection in InterVersionedfile. |
2242 |
version_ids = self._get_source_version_ids(version_ids, ignore_missing) |
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
2243 |
if not version_ids: |
2244 |
return 0 |
|
2245 |
||
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
2246 |
pb = ui.ui_factory.nested_progress_bar() |
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2247 |
try: |
2248 |
version_ids = list(version_ids) |
|
2249 |
if None in version_ids: |
|
2250 |
version_ids.remove(None) |
|
2251 |
||
2252 |
self.source_ancestry = set(self.source.get_ancestry(version_ids)) |
|
2253 |
this_versions = set(self.target._index.get_versions()) |
|
2825.4.1
by Robert Collins
* ``pull``, ``merge`` and ``push`` will no longer silently correct some |
2254 |
# XXX: For efficiency we should not look at the whole index,
|
2255 |
# we only need to consider the referenced revisions - they
|
|
2256 |
# must all be present, or the method must be full-text.
|
|
2257 |
# TODO, RBC 20070919
|
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2258 |
needed_versions = self.source_ancestry - this_versions |
2259 |
||
2825.4.1
by Robert Collins
* ``pull``, ``merge`` and ``push`` will no longer silently correct some |
2260 |
if not needed_versions: |
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2261 |
return 0 |
1910.2.65
by Aaron Bentley
Remove the check-parent patch |
2262 |
full_list = topo_sort(self.source.get_graph()) |
2263 |
||
2264 |
version_list = [i for i in full_list if (not self.target.has_version(i) |
|
2265 |
and i in needed_versions)] |
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2266 |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2267 |
# plan the join:
|
2268 |
copy_queue = [] |
|
2269 |
copy_queue_records = [] |
|
2270 |
copy_set = set() |
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2271 |
for version_id in version_list: |
2272 |
options = self.source._index.get_options(version_id) |
|
2273 |
parents = self.source._index.get_parents_with_ghosts(version_id) |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2274 |
# check that its will be a consistent copy:
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2275 |
for parent in parents: |
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2276 |
# if source has the parent, we must :
|
2277 |
# * already have it or
|
|
2278 |
# * have it scheduled already
|
|
1759.2.2
by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron. |
2279 |
# otherwise we don't care
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2280 |
assert (self.target.has_version(parent) or |
2281 |
parent in copy_set or |
|
2282 |
not self.source.has_version(parent)) |
|
2592.3.71
by Robert Collins
Basic version of knit-based repository operating, many tests failing. |
2283 |
index_memo = self.source._index.get_position(version_id) |
2284 |
copy_queue_records.append((version_id, index_memo)) |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2285 |
copy_queue.append((version_id, options, parents)) |
2286 |
copy_set.add(version_id) |
|
2287 |
||
2288 |
# data suck the join:
|
|
2289 |
count = 0 |
|
2290 |
total = len(version_list) |
|
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
2291 |
raw_datum = [] |
2292 |
raw_records = [] |
|
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2293 |
for (version_id, raw_data), \ |
2294 |
(version_id2, options, parents) in \ |
|
2295 |
izip(self.source._data.read_records_iter_raw(copy_queue_records), |
|
2296 |
copy_queue): |
|
2297 |
assert version_id == version_id2, 'logic error, inconsistent results' |
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2298 |
count = count + 1 |
1596.2.8
by Robert Collins
Join knits with the original gzipped data avoiding recompression. |
2299 |
pb.update("Joining knit", count, total) |
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
2300 |
if converter: |
2301 |
size, raw_data = converter(raw_data, version_id, options, |
|
2302 |
parents) |
|
2851.4.1
by Ian Clatworthy
Support joining plain knits to annotated knits and vice versa |
2303 |
else: |
2304 |
size = len(raw_data) |
|
2305 |
raw_records.append((version_id, options, parents, size)) |
|
1692.2.1
by Robert Collins
Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions. |
2306 |
raw_datum.append(raw_data) |
2307 |
self.target._add_raw_records(raw_records, ''.join(raw_datum)) |
|
1594.2.24
by Robert Collins
Make use of the transaction finalisation warning support to implement in-knit caching. |
2308 |
return count |
2309 |
finally: |
|
2310 |
pb.finished() |
|
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
2311 |
|
2851.4.2
by Ian Clatworthy
use factory methods in annotated-to-plain conversion instead of duplicating format knowledge |
2312 |
def _anno_to_plain_converter(self, raw_data, version_id, options, |
2313 |
parents): |
|
2314 |
"""Convert annotated content to plain content."""
|
|
2315 |
data, digest = self.source._data._parse_record(version_id, raw_data) |
|
2316 |
if 'fulltext' in options: |
|
2317 |
content = self.source.factory.parse_fulltext(data, version_id) |
|
2318 |
lines = self.target.factory.lower_fulltext(content) |
|
2319 |
else: |
|
2320 |
delta = self.source.factory.parse_line_delta(data, version_id, |
|
2321 |
plain=True) |
|
2322 |
lines = self.target.factory.lower_line_delta(delta) |
|
2323 |
return self.target._data._record_to_data(version_id, digest, lines) |
|
2324 |
||
1563.2.13
by Robert Collins
InterVersionedFile implemented. |
2325 |
|
2326 |
InterVersionedFile.register_optimiser(InterKnit) |
|
1596.2.24
by Robert Collins
Gzipfile was slightly slower than ideal. |
2327 |
|
2328 |
||
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
2329 |
class WeaveToKnit(InterVersionedFile): |
2330 |
"""Optimised code paths for weave to knit operations."""
|
|
2331 |
||
2332 |
_matching_file_from_factory = bzrlib.weave.WeaveFile |
|
2333 |
_matching_file_to_factory = KnitVersionedFile |
|
2334 |
||
2335 |
@staticmethod
|
|
2336 |
def is_compatible(source, target): |
|
2337 |
"""Be compatible with weaves to knits."""
|
|
2338 |
try: |
|
2339 |
return (isinstance(source, bzrlib.weave.Weave) and |
|
2340 |
isinstance(target, KnitVersionedFile)) |
|
2341 |
except AttributeError: |
|
2342 |
return False |
|
2343 |
||
2344 |
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False): |
|
2345 |
"""See InterVersionedFile.join."""
|
|
2346 |
assert isinstance(self.source, bzrlib.weave.Weave) |
|
2347 |
assert isinstance(self.target, KnitVersionedFile) |
|
2348 |
||
2349 |
version_ids = self._get_source_version_ids(version_ids, ignore_missing) |
|
2350 |
||
2351 |
if not version_ids: |
|
2352 |
return 0 |
|
2353 |
||
2158.3.1
by Dmitry Vasiliev
KnitIndex tests/fixes/optimizations |
2354 |
pb = ui.ui_factory.nested_progress_bar() |
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
2355 |
try: |
2356 |
version_ids = list(version_ids) |
|
2357 |
||
2358 |
self.source_ancestry = set(self.source.get_ancestry(version_ids)) |
|
2359 |
this_versions = set(self.target._index.get_versions()) |
|
2360 |
needed_versions = self.source_ancestry - this_versions |
|
2361 |
||
2825.4.1
by Robert Collins
* ``pull``, ``merge`` and ``push`` will no longer silently correct some |
2362 |
if not needed_versions: |
1684.3.3
by Robert Collins
Add a special cased weaves to knit converter. |
2363 |
return 0 |
2364 |
full_list = topo_sort(self.source.get_graph()) |
|
2365 |
||
2366 |
version_list = [i for i in full_list if (not self.target.has_version(i) |
|
2367 |
and i in needed_versions)] |
|
2368 |
||
2369 |
# do the join:
|
|
2370 |
count = 0 |
|
2371 |
total = len(version_list) |
|
2372 |
for version_id in version_list: |
|
2373 |
pb.update("Converting to knit", count, total) |
|
2374 |
parents = self.source.get_parents(version_id) |
|
2375 |
# check that its will be a consistent copy:
|
|
2376 |
for parent in parents: |
|
2377 |
# if source has the parent, we must already have it
|
|
2378 |
assert (self.target.has_version(parent)) |
|
2379 |
self.target.add_lines( |
|
2380 |
version_id, parents, self.source.get_lines(version_id)) |
|
2381 |
count = count + 1 |
|
2382 |
return count |
|
2383 |
finally: |
|
2384 |
pb.finished() |
|
2385 |
||
2386 |
||
2387 |
InterVersionedFile.register_optimiser(WeaveToKnit) |
|
2388 |
||
2389 |
||
2781.1.1
by Martin Pool
merge cpatiencediff from Lukas |
2390 |
# Deprecated, use PatienceSequenceMatcher instead
|
2391 |
KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher |
|
2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
2392 |
|
2393 |
||
2770.1.2
by Aaron Bentley
Convert to knit-only annotation |
2394 |
def annotate_knit(knit, revision_id): |
2395 |
"""Annotate a knit with no cached annotations.
|
|
2396 |
||
2397 |
This implementation is for knits with no cached annotations.
|
|
2398 |
It will work for knits with cached annotations, but this is not
|
|
2399 |
recommended.
|
|
2400 |
"""
|
|
2770.1.8
by Aaron Bentley
Use topo-sorted ancestry for generating annotations |
2401 |
ancestry = knit.get_ancestry(revision_id) |
2770.1.3
by Aaron Bentley
Rework knit annotation as stack-based |
2402 |
fulltext = dict(zip(ancestry, knit.get_line_list(ancestry))) |
2403 |
annotations = {} |
|
2770.1.8
by Aaron Bentley
Use topo-sorted ancestry for generating annotations |
2404 |
for candidate in ancestry: |
2770.1.4
by Aaron Bentley
Further optimize annotation, using existing matching blocks |
2405 |
if candidate in annotations: |
2406 |
continue
|
|
2770.1.3
by Aaron Bentley
Rework knit annotation as stack-based |
2407 |
parents = knit.get_parents(candidate) |
2770.1.8
by Aaron Bentley
Use topo-sorted ancestry for generating annotations |
2408 |
if len(parents) == 0: |
2409 |
blocks = None |
|
2410 |
elif knit._index.get_method(candidate) != 'line-delta': |
|
2411 |
blocks = None |
|
2770.1.3
by Aaron Bentley
Rework knit annotation as stack-based |
2412 |
else: |
2770.1.8
by Aaron Bentley
Use topo-sorted ancestry for generating annotations |
2413 |
parent, sha1, noeol, delta = knit.get_delta(candidate) |
2414 |
blocks = KnitContent.get_line_delta_blocks(delta, |
|
2415 |
fulltext[parents[0]], fulltext[candidate]) |
|
2416 |
annotations[candidate] = list(annotate.reannotate([annotations[p] |
|
2417 |
for p in parents], fulltext[candidate], candidate, blocks)) |
|
2770.1.3
by Aaron Bentley
Rework knit annotation as stack-based |
2418 |
return iter(annotations[revision_id]) |
2770.1.2
by Aaron Bentley
Convert to knit-only annotation |
2419 |
|
2420 |
||
2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
2421 |
try: |
2484.1.12
by John Arbash Meinel
Switch the layout to use a matching _knit_load_data_py.py and _knit_load_data_c.pyx |
2422 |
from bzrlib._knit_load_data_c import _load_data_c as _load_data |
2484.1.1
by John Arbash Meinel
Add an initial function to read knit indexes in pyrex. |
2423 |
except ImportError: |
2484.1.12
by John Arbash Meinel
Switch the layout to use a matching _knit_load_data_py.py and _knit_load_data_c.pyx |
2424 |
from bzrlib._knit_load_data_py import _load_data_py as _load_data |