5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1 |
# Copyright (C) 2005-2011 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Repository formats built around versioned files."""
|
|
18 |
||
19 |
||
20 |
from bzrlib.lazy_import import lazy_import |
|
21 |
lazy_import(globals(), """ |
|
5852.1.8
by Jelmer Vernooij
Simplify revision limiting. |
22 |
import itertools
|
23 |
||
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
24 |
from bzrlib import (
|
5850.1.3
by Jelmer Vernooij
Add VersionedFileCheck. |
25 |
check,
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
26 |
debug,
|
27 |
fetch as _mod_fetch,
|
|
28 |
fifo_cache,
|
|
29 |
gpg,
|
|
30 |
graph,
|
|
31 |
inventory_delta,
|
|
32 |
lru_cache,
|
|
33 |
osutils,
|
|
34 |
revision as _mod_revision,
|
|
35 |
serializer as _mod_serializer,
|
|
36 |
static_tuple,
|
|
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
37 |
symbol_versioning,
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
38 |
tsort,
|
39 |
ui,
|
|
40 |
versionedfile,
|
|
41 |
)
|
|
42 |
||
43 |
from bzrlib.recordcounter import RecordCounter
|
|
44 |
from bzrlib.revisiontree import InventoryRevisionTree
|
|
45 |
from bzrlib.testament import Testament
|
|
46 |
""") |
|
47 |
||
48 |
from bzrlib import ( |
|
49 |
errors, |
|
50 |
)
|
|
51 |
from bzrlib.decorators import ( |
|
52 |
needs_read_lock, |
|
53 |
needs_write_lock, |
|
54 |
only_raises, |
|
55 |
)
|
|
56 |
from bzrlib.inventory import ( |
|
57 |
Inventory, |
|
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
58 |
InventoryDirectory, |
59 |
ROOT_ID, |
|
60 |
entry_factory, |
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
61 |
)
|
62 |
||
63 |
from bzrlib.repository import ( |
|
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
64 |
CommitBuilder, |
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
65 |
InterRepository, |
66 |
MetaDirRepository, |
|
5815.4.5
by Jelmer Vernooij
Use MetaDirVersionedFileRepositoryFormat (a Soyuz worthy name). |
67 |
MetaDirRepositoryFormat, |
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
68 |
Repository, |
5815.4.5
by Jelmer Vernooij
Use MetaDirVersionedFileRepositoryFormat (a Soyuz worthy name). |
69 |
RepositoryFormat, |
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
70 |
)
|
71 |
||
72 |
from bzrlib.trace import ( |
|
73 |
mutter, |
|
74 |
)
|
|
75 |
||
76 |
||
5815.4.5
by Jelmer Vernooij
Use MetaDirVersionedFileRepositoryFormat (a Soyuz worthy name). |
77 |
class VersionedFileRepositoryFormat(RepositoryFormat): |
5815.4.16
by Jelmer Vernooij
Move supports_versioned_files setting to VersionedFileRepositoryFormat. |
78 |
"""Base class for all repository formats that are VersionedFiles-based."""
|
79 |
||
80 |
supports_full_versioned_files = True |
|
5993.3.2
by Jelmer Vernooij
Add Repository.supports_versioned_directories. |
81 |
supports_versioned_directories = True |
5815.4.5
by Jelmer Vernooij
Use MetaDirVersionedFileRepositoryFormat (a Soyuz worthy name). |
82 |
|
83 |
# Should commit add an inventory, or an inventory delta to the repository.
|
|
84 |
_commit_inv_deltas = True |
|
85 |
# What order should fetch operations request streams in?
|
|
86 |
# The default is unordered as that is the cheapest for an origin to
|
|
87 |
# provide.
|
|
88 |
_fetch_order = 'unordered' |
|
89 |
# Does this repository format use deltas that can be fetched as-deltas ?
|
|
90 |
# (E.g. knits, where the knit deltas can be transplanted intact.
|
|
91 |
# We default to False, which will ensure that enough data to get
|
|
92 |
# a full text out of any fetch stream will be grabbed.
|
|
93 |
_fetch_uses_deltas = False |
|
94 |
||
95 |
||
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
96 |
class VersionedFileCommitBuilder(CommitBuilder): |
97 |
"""Commit builder implementation for versioned files based repositories.
|
|
98 |
"""
|
|
99 |
||
100 |
# this commit builder supports the record_entry_contents interface
|
|
101 |
supports_record_entry_contents = True |
|
102 |
||
103 |
# the default CommitBuilder does not manage trees whose root is versioned.
|
|
104 |
_versioned_root = False |
|
105 |
||
106 |
def __init__(self, repository, parents, config, timestamp=None, |
|
107 |
timezone=None, committer=None, revprops=None, |
|
108 |
revision_id=None, lossy=False): |
|
109 |
super(VersionedFileCommitBuilder, self).__init__(repository, |
|
110 |
parents, config, timestamp, timezone, committer, revprops, |
|
111 |
revision_id, lossy) |
|
5847.3.2
by Jelmer Vernooij
Move root checking to VersionedFileRepository. |
112 |
try: |
113 |
basis_id = self.parents[0] |
|
114 |
except IndexError: |
|
115 |
basis_id = _mod_revision.NULL_REVISION |
|
116 |
self.basis_delta_revision = basis_id |
|
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
117 |
self.new_inventory = Inventory(None) |
118 |
self._basis_delta = [] |
|
119 |
self.__heads = graph.HeadsCache(repository.get_graph()).heads |
|
120 |
# memo'd check for no-op commits.
|
|
121 |
self._any_changes = False |
|
122 |
# API compatibility, older code that used CommitBuilder did not call
|
|
123 |
# .record_delete(), which means the delta that is computed would not be
|
|
124 |
# valid. Callers that will call record_delete() should call
|
|
125 |
# .will_record_deletes() to indicate that.
|
|
126 |
self._recording_deletes = False |
|
127 |
||
128 |
def will_record_deletes(self): |
|
129 |
"""Tell the commit builder that deletes are being notified.
|
|
130 |
||
131 |
This enables the accumulation of an inventory delta; for the resulting
|
|
132 |
commit to be valid, deletes against the basis MUST be recorded via
|
|
133 |
builder.record_delete().
|
|
134 |
"""
|
|
135 |
self._recording_deletes = True |
|
136 |
||
137 |
def any_changes(self): |
|
138 |
"""Return True if any entries were changed.
|
|
139 |
||
140 |
This includes merge-only changes. It is the core for the --unchanged
|
|
141 |
detection in commit.
|
|
142 |
||
143 |
:return: True if any changes have occured.
|
|
144 |
"""
|
|
145 |
return self._any_changes |
|
146 |
||
147 |
def _ensure_fallback_inventories(self): |
|
148 |
"""Ensure that appropriate inventories are available.
|
|
149 |
||
150 |
This only applies to repositories that are stacked, and is about
|
|
151 |
enusring the stacking invariants. Namely, that for any revision that is
|
|
152 |
present, we either have all of the file content, or we have the parent
|
|
153 |
inventory and the delta file content.
|
|
154 |
"""
|
|
155 |
if not self.repository._fallback_repositories: |
|
156 |
return
|
|
157 |
if not self.repository._format.supports_chks: |
|
158 |
raise errors.BzrError("Cannot commit directly to a stacked branch" |
|
159 |
" in pre-2a formats. See "
|
|
160 |
"https://bugs.launchpad.net/bzr/+bug/375013 for details.") |
|
161 |
# This is a stacked repo, we need to make sure we have the parent
|
|
162 |
# inventories for the parents.
|
|
163 |
parent_keys = [(p,) for p in self.parents] |
|
164 |
parent_map = self.repository.inventories._index.get_parent_map(parent_keys) |
|
165 |
missing_parent_keys = set([pk for pk in parent_keys |
|
166 |
if pk not in parent_map]) |
|
167 |
fallback_repos = list(reversed(self.repository._fallback_repositories)) |
|
168 |
missing_keys = [('inventories', pk[0]) |
|
169 |
for pk in missing_parent_keys] |
|
170 |
resume_tokens = [] |
|
171 |
while missing_keys and fallback_repos: |
|
172 |
fallback_repo = fallback_repos.pop() |
|
173 |
source = fallback_repo._get_source(self.repository._format) |
|
174 |
sink = self.repository._get_sink() |
|
175 |
stream = source.get_stream_for_missing_keys(missing_keys) |
|
176 |
missing_keys = sink.insert_stream_without_locking(stream, |
|
177 |
self.repository._format) |
|
178 |
if missing_keys: |
|
179 |
raise errors.BzrError('Unable to fill in parent inventories for a' |
|
180 |
' stacked branch') |
|
181 |
||
182 |
def commit(self, message): |
|
183 |
"""Make the actual commit.
|
|
184 |
||
185 |
:return: The revision id of the recorded revision.
|
|
186 |
"""
|
|
187 |
self._validate_unicode_text(message, 'commit message') |
|
188 |
rev = _mod_revision.Revision( |
|
189 |
timestamp=self._timestamp, |
|
190 |
timezone=self._timezone, |
|
191 |
committer=self._committer, |
|
192 |
message=message, |
|
193 |
inventory_sha1=self.inv_sha1, |
|
194 |
revision_id=self._new_revision_id, |
|
195 |
properties=self._revprops) |
|
196 |
rev.parent_ids = self.parents |
|
197 |
self.repository.add_revision(self._new_revision_id, rev, |
|
198 |
self.new_inventory, self._config) |
|
199 |
self._ensure_fallback_inventories() |
|
200 |
self.repository.commit_write_group() |
|
201 |
return self._new_revision_id |
|
202 |
||
203 |
def abort(self): |
|
204 |
"""Abort the commit that is being built.
|
|
205 |
"""
|
|
206 |
self.repository.abort_write_group() |
|
207 |
||
208 |
def revision_tree(self): |
|
209 |
"""Return the tree that was just committed.
|
|
210 |
||
211 |
After calling commit() this can be called to get a
|
|
212 |
RevisionTree representing the newly committed tree. This is
|
|
213 |
preferred to calling Repository.revision_tree() because that may
|
|
214 |
require deserializing the inventory, while we already have a copy in
|
|
215 |
memory.
|
|
216 |
"""
|
|
217 |
if self.new_inventory is None: |
|
218 |
self.new_inventory = self.repository.get_inventory( |
|
219 |
self._new_revision_id) |
|
220 |
return InventoryRevisionTree(self.repository, self.new_inventory, |
|
221 |
self._new_revision_id) |
|
222 |
||
223 |
def finish_inventory(self): |
|
224 |
"""Tell the builder that the inventory is finished.
|
|
225 |
||
226 |
:return: The inventory id in the repository, which can be used with
|
|
227 |
repository.get_inventory.
|
|
228 |
"""
|
|
229 |
if self.new_inventory is None: |
|
230 |
# an inventory delta was accumulated without creating a new
|
|
231 |
# inventory.
|
|
232 |
basis_id = self.basis_delta_revision |
|
233 |
# We ignore the 'inventory' returned by add_inventory_by_delta
|
|
234 |
# because self.new_inventory is used to hint to the rest of the
|
|
235 |
# system what code path was taken
|
|
236 |
self.inv_sha1, _ = self.repository.add_inventory_by_delta( |
|
237 |
basis_id, self._basis_delta, self._new_revision_id, |
|
238 |
self.parents) |
|
239 |
else: |
|
240 |
if self.new_inventory.root is None: |
|
241 |
raise AssertionError('Root entry should be supplied to' |
|
242 |
' record_entry_contents, as of bzr 0.10.') |
|
243 |
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None)) |
|
244 |
self.new_inventory.revision_id = self._new_revision_id |
|
245 |
self.inv_sha1 = self.repository.add_inventory( |
|
246 |
self._new_revision_id, |
|
247 |
self.new_inventory, |
|
248 |
self.parents |
|
249 |
)
|
|
250 |
return self._new_revision_id |
|
251 |
||
252 |
def _check_root(self, ie, parent_invs, tree): |
|
253 |
"""Helper for record_entry_contents.
|
|
254 |
||
255 |
:param ie: An entry being added.
|
|
256 |
:param parent_invs: The inventories of the parent revisions of the
|
|
257 |
commit.
|
|
258 |
:param tree: The tree that is being committed.
|
|
259 |
"""
|
|
260 |
# In this revision format, root entries have no knit or weave When
|
|
261 |
# serializing out to disk and back in root.revision is always
|
|
262 |
# _new_revision_id
|
|
263 |
ie.revision = self._new_revision_id |
|
264 |
||
265 |
def _require_root_change(self, tree): |
|
266 |
"""Enforce an appropriate root object change.
|
|
267 |
||
268 |
This is called once when record_iter_changes is called, if and only if
|
|
269 |
the root was not in the delta calculated by record_iter_changes.
|
|
270 |
||
271 |
:param tree: The tree which is being committed.
|
|
272 |
"""
|
|
273 |
if len(self.parents) == 0: |
|
274 |
raise errors.RootMissing() |
|
275 |
entry = entry_factory['directory'](tree.path2id(''), '', |
|
276 |
None) |
|
277 |
entry.revision = self._new_revision_id |
|
278 |
self._basis_delta.append(('', '', entry.file_id, entry)) |
|
279 |
||
280 |
def _get_delta(self, ie, basis_inv, path): |
|
281 |
"""Get a delta against the basis inventory for ie."""
|
|
5967.7.1
by Martin Pool
Deprecate __contains__ on Tree and Inventory |
282 |
if not basis_inv.has_id(ie.file_id): |
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
283 |
# add
|
284 |
result = (None, path, ie.file_id, ie) |
|
285 |
self._basis_delta.append(result) |
|
286 |
return result |
|
287 |
elif ie != basis_inv[ie.file_id]: |
|
288 |
# common but altered
|
|
289 |
# TODO: avoid tis id2path call.
|
|
290 |
result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie) |
|
291 |
self._basis_delta.append(result) |
|
292 |
return result |
|
293 |
else: |
|
294 |
# common, unaltered
|
|
295 |
return None |
|
296 |
||
297 |
def _heads(self, file_id, revision_ids): |
|
298 |
"""Calculate the graph heads for revision_ids in the graph of file_id.
|
|
299 |
||
300 |
This can use either a per-file graph or a global revision graph as we
|
|
301 |
have an identity relationship between the two graphs.
|
|
302 |
"""
|
|
303 |
return self.__heads(revision_ids) |
|
304 |
||
305 |
def get_basis_delta(self): |
|
306 |
"""Return the complete inventory delta versus the basis inventory.
|
|
307 |
||
308 |
This has been built up with the calls to record_delete and
|
|
309 |
record_entry_contents. The client must have already called
|
|
310 |
will_record_deletes() to indicate that they will be generating a
|
|
311 |
complete delta.
|
|
312 |
||
313 |
:return: An inventory delta, suitable for use with apply_delta, or
|
|
314 |
Repository.add_inventory_by_delta, etc.
|
|
315 |
"""
|
|
316 |
if not self._recording_deletes: |
|
317 |
raise AssertionError("recording deletes not activated.") |
|
318 |
return self._basis_delta |
|
319 |
||
320 |
def record_delete(self, path, file_id): |
|
321 |
"""Record that a delete occured against a basis tree.
|
|
322 |
||
323 |
This is an optional API - when used it adds items to the basis_delta
|
|
324 |
being accumulated by the commit builder. It cannot be called unless the
|
|
325 |
method will_record_deletes() has been called to inform the builder that
|
|
326 |
a delta is being supplied.
|
|
327 |
||
328 |
:param path: The path of the thing deleted.
|
|
329 |
:param file_id: The file id that was deleted.
|
|
330 |
"""
|
|
331 |
if not self._recording_deletes: |
|
332 |
raise AssertionError("recording deletes not activated.") |
|
333 |
delta = (path, None, file_id, None) |
|
334 |
self._basis_delta.append(delta) |
|
335 |
self._any_changes = True |
|
336 |
return delta |
|
337 |
||
338 |
def record_entry_contents(self, ie, parent_invs, path, tree, |
|
339 |
content_summary): |
|
340 |
"""Record the content of ie from tree into the commit if needed.
|
|
341 |
||
342 |
Side effect: sets ie.revision when unchanged
|
|
343 |
||
344 |
:param ie: An inventory entry present in the commit.
|
|
345 |
:param parent_invs: The inventories of the parent revisions of the
|
|
346 |
commit.
|
|
347 |
:param path: The path the entry is at in the tree.
|
|
348 |
:param tree: The tree which contains this entry and should be used to
|
|
349 |
obtain content.
|
|
350 |
:param content_summary: Summary data from the tree about the paths
|
|
351 |
content - stat, length, exec, sha/link target. This is only
|
|
352 |
accessed when the entry has a revision of None - that is when it is
|
|
353 |
a candidate to commit.
|
|
354 |
:return: A tuple (change_delta, version_recorded, fs_hash).
|
|
355 |
change_delta is an inventory_delta change for this entry against
|
|
356 |
the basis tree of the commit, or None if no change occured against
|
|
357 |
the basis tree.
|
|
358 |
version_recorded is True if a new version of the entry has been
|
|
359 |
recorded. For instance, committing a merge where a file was only
|
|
360 |
changed on the other side will return (delta, False).
|
|
361 |
fs_hash is either None, or the hash details for the path (currently
|
|
362 |
a tuple of the contents sha1 and the statvalue returned by
|
|
363 |
tree.get_file_with_stat()).
|
|
364 |
"""
|
|
365 |
if self.new_inventory.root is None: |
|
366 |
if ie.parent_id is not None: |
|
367 |
raise errors.RootMissing() |
|
368 |
self._check_root(ie, parent_invs, tree) |
|
369 |
if ie.revision is None: |
|
370 |
kind = content_summary[0] |
|
371 |
else: |
|
372 |
# ie is carried over from a prior commit
|
|
373 |
kind = ie.kind |
|
374 |
# XXX: repository specific check for nested tree support goes here - if
|
|
375 |
# the repo doesn't want nested trees we skip it ?
|
|
376 |
if (kind == 'tree-reference' and |
|
377 |
not self.repository._format.supports_tree_reference): |
|
378 |
# mismatch between commit builder logic and repository:
|
|
379 |
# this needs the entry creation pushed down into the builder.
|
|
380 |
raise NotImplementedError('Missing repository subtree support.') |
|
381 |
self.new_inventory.add(ie) |
|
382 |
||
383 |
# TODO: slow, take it out of the inner loop.
|
|
384 |
try: |
|
385 |
basis_inv = parent_invs[0] |
|
386 |
except IndexError: |
|
387 |
basis_inv = Inventory(root_id=None) |
|
388 |
||
389 |
# ie.revision is always None if the InventoryEntry is considered
|
|
390 |
# for committing. We may record the previous parents revision if the
|
|
391 |
# content is actually unchanged against a sole head.
|
|
392 |
if ie.revision is not None: |
|
393 |
if not self._versioned_root and path == '': |
|
394 |
# repositories that do not version the root set the root's
|
|
395 |
# revision to the new commit even when no change occurs (more
|
|
396 |
# specifically, they do not record a revision on the root; and
|
|
397 |
# the rev id is assigned to the root during deserialisation -
|
|
398 |
# this masks when a change may have occurred against the basis.
|
|
399 |
# To match this we always issue a delta, because the revision
|
|
400 |
# of the root will always be changing.
|
|
5967.7.1
by Martin Pool
Deprecate __contains__ on Tree and Inventory |
401 |
if basis_inv.has_id(ie.file_id): |
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
402 |
delta = (basis_inv.id2path(ie.file_id), path, |
403 |
ie.file_id, ie) |
|
404 |
else: |
|
405 |
# add
|
|
406 |
delta = (None, path, ie.file_id, ie) |
|
407 |
self._basis_delta.append(delta) |
|
408 |
return delta, False, None |
|
409 |
else: |
|
410 |
# we don't need to commit this, because the caller already
|
|
411 |
# determined that an existing revision of this file is
|
|
412 |
# appropriate. If it's not being considered for committing then
|
|
413 |
# it and all its parents to the root must be unaltered so
|
|
414 |
# no-change against the basis.
|
|
415 |
if ie.revision == self._new_revision_id: |
|
416 |
raise AssertionError("Impossible situation, a skipped " |
|
417 |
"inventory entry (%r) claims to be modified in this " |
|
418 |
"commit (%r).", (ie, self._new_revision_id)) |
|
419 |
return None, False, None |
|
420 |
# XXX: Friction: parent_candidates should return a list not a dict
|
|
421 |
# so that we don't have to walk the inventories again.
|
|
422 |
parent_candiate_entries = ie.parent_candidates(parent_invs) |
|
423 |
head_set = self._heads(ie.file_id, parent_candiate_entries.keys()) |
|
424 |
heads = [] |
|
425 |
for inv in parent_invs: |
|
5967.7.1
by Martin Pool
Deprecate __contains__ on Tree and Inventory |
426 |
if inv.has_id(ie.file_id): |
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
427 |
old_rev = inv[ie.file_id].revision |
428 |
if old_rev in head_set: |
|
429 |
heads.append(inv[ie.file_id].revision) |
|
430 |
head_set.remove(inv[ie.file_id].revision) |
|
431 |
||
432 |
store = False |
|
433 |
# now we check to see if we need to write a new record to the
|
|
434 |
# file-graph.
|
|
435 |
# We write a new entry unless there is one head to the ancestors, and
|
|
436 |
# the kind-derived content is unchanged.
|
|
437 |
||
438 |
# Cheapest check first: no ancestors, or more the one head in the
|
|
439 |
# ancestors, we write a new node.
|
|
440 |
if len(heads) != 1: |
|
441 |
store = True |
|
442 |
if not store: |
|
443 |
# There is a single head, look it up for comparison
|
|
444 |
parent_entry = parent_candiate_entries[heads[0]] |
|
445 |
# if the non-content specific data has changed, we'll be writing a
|
|
446 |
# node:
|
|
447 |
if (parent_entry.parent_id != ie.parent_id or |
|
448 |
parent_entry.name != ie.name): |
|
449 |
store = True |
|
450 |
# now we need to do content specific checks:
|
|
451 |
if not store: |
|
452 |
# if the kind changed the content obviously has
|
|
453 |
if kind != parent_entry.kind: |
|
454 |
store = True |
|
455 |
# Stat cache fingerprint feedback for the caller - None as we usually
|
|
456 |
# don't generate one.
|
|
457 |
fingerprint = None |
|
458 |
if kind == 'file': |
|
459 |
if content_summary[2] is None: |
|
460 |
raise ValueError("Files must not have executable = None") |
|
461 |
if not store: |
|
462 |
# We can't trust a check of the file length because of content
|
|
463 |
# filtering...
|
|
464 |
if (# if the exec bit has changed we have to store: |
|
465 |
parent_entry.executable != content_summary[2]): |
|
466 |
store = True |
|
467 |
elif parent_entry.text_sha1 == content_summary[3]: |
|
468 |
# all meta and content is unchanged (using a hash cache
|
|
469 |
# hit to check the sha)
|
|
470 |
ie.revision = parent_entry.revision |
|
471 |
ie.text_size = parent_entry.text_size |
|
472 |
ie.text_sha1 = parent_entry.text_sha1 |
|
473 |
ie.executable = parent_entry.executable |
|
474 |
return self._get_delta(ie, basis_inv, path), False, None |
|
475 |
else: |
|
476 |
# Either there is only a hash change(no hash cache entry,
|
|
477 |
# or same size content change), or there is no change on
|
|
478 |
# this file at all.
|
|
479 |
# Provide the parent's hash to the store layer, so that the
|
|
480 |
# content is unchanged we will not store a new node.
|
|
481 |
nostore_sha = parent_entry.text_sha1 |
|
482 |
if store: |
|
483 |
# We want to record a new node regardless of the presence or
|
|
484 |
# absence of a content change in the file.
|
|
485 |
nostore_sha = None |
|
486 |
ie.executable = content_summary[2] |
|
487 |
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path) |
|
488 |
try: |
|
489 |
text = file_obj.read() |
|
490 |
finally: |
|
491 |
file_obj.close() |
|
492 |
try: |
|
493 |
ie.text_sha1, ie.text_size = self._add_text_to_weave( |
|
494 |
ie.file_id, text, heads, nostore_sha) |
|
495 |
# Let the caller know we generated a stat fingerprint.
|
|
496 |
fingerprint = (ie.text_sha1, stat_value) |
|
497 |
except errors.ExistingContent: |
|
498 |
# Turns out that the file content was unchanged, and we were
|
|
499 |
# only going to store a new node if it was changed. Carry over
|
|
500 |
# the entry.
|
|
501 |
ie.revision = parent_entry.revision |
|
502 |
ie.text_size = parent_entry.text_size |
|
503 |
ie.text_sha1 = parent_entry.text_sha1 |
|
504 |
ie.executable = parent_entry.executable |
|
505 |
return self._get_delta(ie, basis_inv, path), False, None |
|
506 |
elif kind == 'directory': |
|
507 |
if not store: |
|
508 |
# all data is meta here, nothing specific to directory, so
|
|
509 |
# carry over:
|
|
510 |
ie.revision = parent_entry.revision |
|
511 |
return self._get_delta(ie, basis_inv, path), False, None |
|
512 |
self._add_text_to_weave(ie.file_id, '', heads, None) |
|
513 |
elif kind == 'symlink': |
|
514 |
current_link_target = content_summary[3] |
|
515 |
if not store: |
|
516 |
# symlink target is not generic metadata, check if it has
|
|
517 |
# changed.
|
|
518 |
if current_link_target != parent_entry.symlink_target: |
|
519 |
store = True |
|
520 |
if not store: |
|
521 |
# unchanged, carry over.
|
|
522 |
ie.revision = parent_entry.revision |
|
523 |
ie.symlink_target = parent_entry.symlink_target |
|
524 |
return self._get_delta(ie, basis_inv, path), False, None |
|
525 |
ie.symlink_target = current_link_target |
|
526 |
self._add_text_to_weave(ie.file_id, '', heads, None) |
|
527 |
elif kind == 'tree-reference': |
|
528 |
if not store: |
|
529 |
if content_summary[3] != parent_entry.reference_revision: |
|
530 |
store = True |
|
531 |
if not store: |
|
532 |
# unchanged, carry over.
|
|
533 |
ie.reference_revision = parent_entry.reference_revision |
|
534 |
ie.revision = parent_entry.revision |
|
535 |
return self._get_delta(ie, basis_inv, path), False, None |
|
536 |
ie.reference_revision = content_summary[3] |
|
537 |
if ie.reference_revision is None: |
|
538 |
raise AssertionError("invalid content_summary for nested tree: %r" |
|
539 |
% (content_summary,)) |
|
540 |
self._add_text_to_weave(ie.file_id, '', heads, None) |
|
541 |
else: |
|
542 |
raise NotImplementedError('unknown kind') |
|
543 |
ie.revision = self._new_revision_id |
|
5847.3.2
by Jelmer Vernooij
Move root checking to VersionedFileRepository. |
544 |
# The initial commit adds a root directory, but this in itself is not
|
545 |
# a worthwhile commit.
|
|
546 |
if (self.basis_delta_revision != _mod_revision.NULL_REVISION or |
|
547 |
path != ""): |
|
548 |
self._any_changes = True |
|
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
549 |
return self._get_delta(ie, basis_inv, path), True, fingerprint |
550 |
||
551 |
def record_iter_changes(self, tree, basis_revision_id, iter_changes, |
|
552 |
_entry_factory=entry_factory): |
|
553 |
"""Record a new tree via iter_changes.
|
|
554 |
||
555 |
:param tree: The tree to obtain text contents from for changed objects.
|
|
556 |
:param basis_revision_id: The revision id of the tree the iter_changes
|
|
557 |
has been generated against. Currently assumed to be the same
|
|
558 |
as self.parents[0] - if it is not, errors may occur.
|
|
559 |
:param iter_changes: An iter_changes iterator with the changes to apply
|
|
560 |
to basis_revision_id. The iterator must not include any items with
|
|
561 |
a current kind of None - missing items must be either filtered out
|
|
562 |
or errored-on beefore record_iter_changes sees the item.
|
|
563 |
:param _entry_factory: Private method to bind entry_factory locally for
|
|
564 |
performance.
|
|
565 |
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
|
|
566 |
tree._observed_sha1.
|
|
567 |
"""
|
|
568 |
# Create an inventory delta based on deltas between all the parents and
|
|
569 |
# deltas between all the parent inventories. We use inventory delta's
|
|
570 |
# between the inventory objects because iter_changes masks
|
|
571 |
# last-changed-field only changes.
|
|
572 |
# Working data:
|
|
573 |
# file_id -> change map, change is fileid, paths, changed, versioneds,
|
|
574 |
# parents, names, kinds, executables
|
|
575 |
merged_ids = {} |
|
576 |
# {file_id -> revision_id -> inventory entry, for entries in parent
|
|
577 |
# trees that are not parents[0]
|
|
578 |
parent_entries = {} |
|
579 |
ghost_basis = False |
|
580 |
try: |
|
581 |
revtrees = list(self.repository.revision_trees(self.parents)) |
|
582 |
except errors.NoSuchRevision: |
|
583 |
# one or more ghosts, slow path.
|
|
584 |
revtrees = [] |
|
585 |
for revision_id in self.parents: |
|
586 |
try: |
|
587 |
revtrees.append(self.repository.revision_tree(revision_id)) |
|
588 |
except errors.NoSuchRevision: |
|
589 |
if not revtrees: |
|
590 |
basis_revision_id = _mod_revision.NULL_REVISION |
|
591 |
ghost_basis = True |
|
592 |
revtrees.append(self.repository.revision_tree( |
|
593 |
_mod_revision.NULL_REVISION)) |
|
594 |
# The basis inventory from a repository
|
|
595 |
if revtrees: |
|
596 |
basis_inv = revtrees[0].inventory |
|
597 |
else: |
|
598 |
basis_inv = self.repository.revision_tree( |
|
599 |
_mod_revision.NULL_REVISION).inventory |
|
600 |
if len(self.parents) > 0: |
|
601 |
if basis_revision_id != self.parents[0] and not ghost_basis: |
|
602 |
raise Exception( |
|
603 |
"arbitrary basis parents not yet supported with merges") |
|
604 |
for revtree in revtrees[1:]: |
|
605 |
for change in revtree.inventory._make_delta(basis_inv): |
|
606 |
if change[1] is None: |
|
607 |
# Not present in this parent.
|
|
608 |
continue
|
|
609 |
if change[2] not in merged_ids: |
|
610 |
if change[0] is not None: |
|
611 |
basis_entry = basis_inv[change[2]] |
|
612 |
merged_ids[change[2]] = [ |
|
613 |
# basis revid
|
|
614 |
basis_entry.revision, |
|
615 |
# new tree revid
|
|
616 |
change[3].revision] |
|
617 |
parent_entries[change[2]] = { |
|
618 |
# basis parent
|
|
619 |
basis_entry.revision:basis_entry, |
|
620 |
# this parent
|
|
621 |
change[3].revision:change[3], |
|
622 |
}
|
|
623 |
else: |
|
624 |
merged_ids[change[2]] = [change[3].revision] |
|
625 |
parent_entries[change[2]] = {change[3].revision:change[3]} |
|
626 |
else: |
|
627 |
merged_ids[change[2]].append(change[3].revision) |
|
628 |
parent_entries[change[2]][change[3].revision] = change[3] |
|
629 |
else: |
|
630 |
merged_ids = {} |
|
631 |
# Setup the changes from the tree:
|
|
632 |
# changes maps file_id -> (change, [parent revision_ids])
|
|
633 |
changes= {} |
|
634 |
for change in iter_changes: |
|
635 |
# This probably looks up in basis_inv way to much.
|
|
636 |
if change[1][0] is not None: |
|
637 |
head_candidate = [basis_inv[change[0]].revision] |
|
638 |
else: |
|
639 |
head_candidate = [] |
|
640 |
changes[change[0]] = change, merged_ids.get(change[0], |
|
641 |
head_candidate) |
|
642 |
unchanged_merged = set(merged_ids) - set(changes) |
|
643 |
# Extend the changes dict with synthetic changes to record merges of
|
|
644 |
# texts.
|
|
645 |
for file_id in unchanged_merged: |
|
646 |
# Record a merged version of these items that did not change vs the
|
|
647 |
# basis. This can be either identical parallel changes, or a revert
|
|
648 |
# of a specific file after a merge. The recorded content will be
|
|
649 |
# that of the current tree (which is the same as the basis), but
|
|
650 |
# the per-file graph will reflect a merge.
|
|
651 |
# NB:XXX: We are reconstructing path information we had, this
|
|
652 |
# should be preserved instead.
|
|
653 |
# inv delta change: (file_id, (path_in_source, path_in_target),
|
|
654 |
# changed_content, versioned, parent, name, kind,
|
|
655 |
# executable)
|
|
656 |
try: |
|
657 |
basis_entry = basis_inv[file_id] |
|
658 |
except errors.NoSuchId: |
|
659 |
# a change from basis->some_parents but file_id isn't in basis
|
|
660 |
# so was new in the merge, which means it must have changed
|
|
661 |
# from basis -> current, and as it hasn't the add was reverted
|
|
662 |
# by the user. So we discard this change.
|
|
663 |
pass
|
|
664 |
else: |
|
665 |
change = (file_id, |
|
666 |
(basis_inv.id2path(file_id), tree.id2path(file_id)), |
|
667 |
False, (True, True), |
|
668 |
(basis_entry.parent_id, basis_entry.parent_id), |
|
669 |
(basis_entry.name, basis_entry.name), |
|
670 |
(basis_entry.kind, basis_entry.kind), |
|
671 |
(basis_entry.executable, basis_entry.executable)) |
|
672 |
changes[file_id] = (change, merged_ids[file_id]) |
|
673 |
# changes contains tuples with the change and a set of inventory
|
|
674 |
# candidates for the file.
|
|
675 |
# inv delta is:
|
|
676 |
# old_path, new_path, file_id, new_inventory_entry
|
|
677 |
seen_root = False # Is the root in the basis delta? |
|
678 |
inv_delta = self._basis_delta |
|
679 |
modified_rev = self._new_revision_id |
|
680 |
for change, head_candidates in changes.values(): |
|
681 |
if change[3][1]: # versioned in target. |
|
682 |
# Several things may be happening here:
|
|
683 |
# We may have a fork in the per-file graph
|
|
684 |
# - record a change with the content from tree
|
|
685 |
# We may have a change against < all trees
|
|
686 |
# - carry over the tree that hasn't changed
|
|
687 |
# We may have a change against all trees
|
|
688 |
# - record the change with the content from tree
|
|
689 |
kind = change[6][1] |
|
690 |
file_id = change[0] |
|
691 |
entry = _entry_factory[kind](file_id, change[5][1], |
|
692 |
change[4][1]) |
|
693 |
head_set = self._heads(change[0], set(head_candidates)) |
|
694 |
heads = [] |
|
695 |
# Preserve ordering.
|
|
696 |
for head_candidate in head_candidates: |
|
697 |
if head_candidate in head_set: |
|
698 |
heads.append(head_candidate) |
|
699 |
head_set.remove(head_candidate) |
|
700 |
carried_over = False |
|
701 |
if len(heads) == 1: |
|
702 |
# Could be a carry-over situation:
|
|
703 |
parent_entry_revs = parent_entries.get(file_id, None) |
|
704 |
if parent_entry_revs: |
|
705 |
parent_entry = parent_entry_revs.get(heads[0], None) |
|
706 |
else: |
|
707 |
parent_entry = None |
|
708 |
if parent_entry is None: |
|
709 |
# The parent iter_changes was called against is the one
|
|
710 |
# that is the per-file head, so any change is relevant
|
|
711 |
# iter_changes is valid.
|
|
712 |
carry_over_possible = False |
|
713 |
else: |
|
714 |
# could be a carry over situation
|
|
715 |
# A change against the basis may just indicate a merge,
|
|
716 |
# we need to check the content against the source of the
|
|
717 |
# merge to determine if it was changed after the merge
|
|
718 |
# or carried over.
|
|
719 |
if (parent_entry.kind != entry.kind or |
|
720 |
parent_entry.parent_id != entry.parent_id or |
|
721 |
parent_entry.name != entry.name): |
|
722 |
# Metadata common to all entries has changed
|
|
723 |
# against per-file parent
|
|
724 |
carry_over_possible = False |
|
725 |
else: |
|
726 |
carry_over_possible = True |
|
727 |
# per-type checks for changes against the parent_entry
|
|
728 |
# are done below.
|
|
729 |
else: |
|
730 |
# Cannot be a carry-over situation
|
|
731 |
carry_over_possible = False |
|
732 |
# Populate the entry in the delta
|
|
733 |
if kind == 'file': |
|
734 |
# XXX: There is still a small race here: If someone reverts the content of a file
|
|
735 |
# after iter_changes examines and decides it has changed,
|
|
736 |
# we will unconditionally record a new version even if some
|
|
737 |
# other process reverts it while commit is running (with
|
|
738 |
# the revert happening after iter_changes did its
|
|
739 |
# examination).
|
|
740 |
if change[7][1]: |
|
741 |
entry.executable = True |
|
742 |
else: |
|
743 |
entry.executable = False |
|
744 |
if (carry_over_possible and |
|
745 |
parent_entry.executable == entry.executable): |
|
746 |
# Check the file length, content hash after reading
|
|
747 |
# the file.
|
|
748 |
nostore_sha = parent_entry.text_sha1 |
|
749 |
else: |
|
750 |
nostore_sha = None |
|
751 |
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1]) |
|
752 |
try: |
|
753 |
text = file_obj.read() |
|
754 |
finally: |
|
755 |
file_obj.close() |
|
756 |
try: |
|
757 |
entry.text_sha1, entry.text_size = self._add_text_to_weave( |
|
758 |
file_id, text, heads, nostore_sha) |
|
759 |
yield file_id, change[1][1], (entry.text_sha1, stat_value) |
|
760 |
except errors.ExistingContent: |
|
761 |
# No content change against a carry_over parent
|
|
762 |
# Perhaps this should also yield a fs hash update?
|
|
763 |
carried_over = True |
|
764 |
entry.text_size = parent_entry.text_size |
|
765 |
entry.text_sha1 = parent_entry.text_sha1 |
|
766 |
elif kind == 'symlink': |
|
767 |
# Wants a path hint?
|
|
768 |
entry.symlink_target = tree.get_symlink_target(file_id) |
|
769 |
if (carry_over_possible and |
|
770 |
parent_entry.symlink_target == entry.symlink_target): |
|
771 |
carried_over = True |
|
772 |
else: |
|
773 |
self._add_text_to_weave(change[0], '', heads, None) |
|
774 |
elif kind == 'directory': |
|
775 |
if carry_over_possible: |
|
776 |
carried_over = True |
|
777 |
else: |
|
778 |
# Nothing to set on the entry.
|
|
779 |
# XXX: split into the Root and nonRoot versions.
|
|
780 |
if change[1][1] != '' or self.repository.supports_rich_root(): |
|
781 |
self._add_text_to_weave(change[0], '', heads, None) |
|
782 |
elif kind == 'tree-reference': |
|
783 |
if not self.repository._format.supports_tree_reference: |
|
784 |
# This isn't quite sane as an error, but we shouldn't
|
|
785 |
# ever see this code path in practice: tree's don't
|
|
786 |
# permit references when the repo doesn't support tree
|
|
787 |
# references.
|
|
788 |
raise errors.UnsupportedOperation(tree.add_reference, |
|
789 |
self.repository) |
|
790 |
reference_revision = tree.get_reference_revision(change[0]) |
|
791 |
entry.reference_revision = reference_revision |
|
792 |
if (carry_over_possible and |
|
793 |
parent_entry.reference_revision == reference_revision): |
|
794 |
carried_over = True |
|
795 |
else: |
|
796 |
self._add_text_to_weave(change[0], '', heads, None) |
|
797 |
else: |
|
798 |
raise AssertionError('unknown kind %r' % kind) |
|
799 |
if not carried_over: |
|
800 |
entry.revision = modified_rev |
|
801 |
else: |
|
802 |
entry.revision = parent_entry.revision |
|
803 |
else: |
|
804 |
entry = None |
|
805 |
new_path = change[1][1] |
|
806 |
inv_delta.append((change[1][0], new_path, change[0], entry)) |
|
807 |
if new_path == '': |
|
808 |
seen_root = True |
|
809 |
self.new_inventory = None |
|
5847.3.2
by Jelmer Vernooij
Move root checking to VersionedFileRepository. |
810 |
# The initial commit adds a root directory, but this in itself is not
|
811 |
# a worthwhile commit.
|
|
812 |
if ((len(inv_delta) > 0 and basis_revision_id != _mod_revision.NULL_REVISION) or |
|
813 |
(len(inv_delta) > 1 and basis_revision_id == _mod_revision.NULL_REVISION)): |
|
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
814 |
# This should perhaps be guarded by a check that the basis we
|
815 |
# commit against is the basis for the commit and if not do a delta
|
|
816 |
# against the basis.
|
|
817 |
self._any_changes = True |
|
818 |
if not seen_root: |
|
819 |
# housekeeping root entry changes do not affect no-change commits.
|
|
820 |
self._require_root_change(tree) |
|
821 |
self.basis_delta_revision = basis_revision_id |
|
822 |
||
823 |
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha): |
|
824 |
parent_keys = tuple([(file_id, parent) for parent in parents]) |
|
825 |
return self.repository.texts._add_text( |
|
826 |
(file_id, self._new_revision_id), parent_keys, new_text, |
|
827 |
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2] |
|
828 |
||
829 |
||
830 |
class VersionedFileRootCommitBuilder(VersionedFileCommitBuilder): |
|
831 |
"""This commitbuilder actually records the root id"""
|
|
832 |
||
833 |
# the root entry gets versioned properly by this builder.
|
|
834 |
_versioned_root = True |
|
835 |
||
836 |
def _check_root(self, ie, parent_invs, tree): |
|
837 |
"""Helper for record_entry_contents.
|
|
838 |
||
839 |
:param ie: An entry being added.
|
|
840 |
:param parent_invs: The inventories of the parent revisions of the
|
|
841 |
commit.
|
|
842 |
:param tree: The tree that is being committed.
|
|
843 |
"""
|
|
844 |
||
845 |
def _require_root_change(self, tree): |
|
846 |
"""Enforce an appropriate root object change.
|
|
847 |
||
848 |
This is called once when record_iter_changes is called, if and only if
|
|
849 |
the root was not in the delta calculated by record_iter_changes.
|
|
850 |
||
851 |
:param tree: The tree which is being committed.
|
|
852 |
"""
|
|
853 |
# versioned roots do not change unless the tree found a change.
|
|
854 |
||
855 |
||
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
856 |
class VersionedFileRepository(Repository): |
857 |
"""Repository holding history for one or more branches.
|
|
858 |
||
859 |
The repository holds and retrieves historical information including
|
|
860 |
revisions and file history. It's normally accessed only by the Branch,
|
|
861 |
which views a particular line of development through that history.
|
|
862 |
||
863 |
The Repository builds on top of some byte storage facilies (the revisions,
|
|
864 |
signatures, inventories, texts and chk_bytes attributes) and a Transport,
|
|
865 |
which respectively provide byte storage and a means to access the (possibly
|
|
866 |
remote) disk.
|
|
867 |
||
868 |
The byte storage facilities are addressed via tuples, which we refer to
|
|
869 |
as 'keys' throughout the code base. Revision_keys, inventory_keys and
|
|
870 |
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
|
|
871 |
(file_id, revision_id). chk_bytes uses CHK keys - a 1-tuple with a single
|
|
872 |
byte string made up of a hash identifier and a hash value.
|
|
873 |
We use this interface because it allows low friction with the underlying
|
|
874 |
code that implements disk indices, network encoding and other parts of
|
|
875 |
bzrlib.
|
|
876 |
||
877 |
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
|
|
878 |
the serialised revisions for the repository. This can be used to obtain
|
|
879 |
revision graph information or to access raw serialised revisions.
|
|
880 |
The result of trying to insert data into the repository via this store
|
|
881 |
is undefined: it should be considered read-only except for implementors
|
|
882 |
of repositories.
|
|
883 |
:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
|
|
884 |
the serialised signatures for the repository. This can be used to
|
|
885 |
obtain access to raw serialised signatures. The result of trying to
|
|
886 |
insert data into the repository via this store is undefined: it should
|
|
887 |
be considered read-only except for implementors of repositories.
|
|
888 |
:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
|
|
889 |
the serialised inventories for the repository. This can be used to
|
|
890 |
obtain unserialised inventories. The result of trying to insert data
|
|
891 |
into the repository via this store is undefined: it should be
|
|
892 |
considered read-only except for implementors of repositories.
|
|
893 |
:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
|
|
894 |
texts of files and directories for the repository. This can be used to
|
|
895 |
obtain file texts or file graphs. Note that Repository.iter_file_bytes
|
|
896 |
is usually a better interface for accessing file texts.
|
|
897 |
The result of trying to insert data into the repository via this store
|
|
898 |
is undefined: it should be considered read-only except for implementors
|
|
899 |
of repositories.
|
|
900 |
:ivar chk_bytes: A bzrlib.versionedfile.VersionedFiles instance containing
|
|
901 |
any data the repository chooses to store or have indexed by its hash.
|
|
902 |
The result of trying to insert data into the repository via this store
|
|
903 |
is undefined: it should be considered read-only except for implementors
|
|
904 |
of repositories.
|
|
905 |
:ivar _transport: Transport for file access to repository, typically
|
|
906 |
pointing to .bzr/repository.
|
|
907 |
"""
|
|
908 |
||
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
909 |
# What class to use for a CommitBuilder. Often it's simpler to change this
|
910 |
# in a Repository class subclass rather than to override
|
|
911 |
# get_commit_builder.
|
|
912 |
_commit_builder_class = VersionedFileCommitBuilder |
|
913 |
||
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
914 |
def add_fallback_repository(self, repository): |
915 |
"""Add a repository to use for looking up data not held locally.
|
|
916 |
||
917 |
:param repository: A repository.
|
|
918 |
"""
|
|
919 |
if not self._format.supports_external_lookups: |
|
920 |
raise errors.UnstackableRepositoryFormat(self._format, self.base) |
|
6015.29.1
by John Arbash Meinel
Merge 2.3 into 2.4, and fix up the conflict for resolving bug #835035 |
921 |
# This can raise an exception, so should be done before we lock the
|
922 |
# fallback repository.
|
|
923 |
self._check_fallback_repository(repository) |
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
924 |
if self.is_locked(): |
925 |
# This repository will call fallback.unlock() when we transition to
|
|
926 |
# the unlocked state, so we make sure to increment the lock count
|
|
927 |
repository.lock_read() |
|
928 |
self._fallback_repositories.append(repository) |
|
929 |
self.texts.add_fallback_versioned_files(repository.texts) |
|
930 |
self.inventories.add_fallback_versioned_files(repository.inventories) |
|
931 |
self.revisions.add_fallback_versioned_files(repository.revisions) |
|
932 |
self.signatures.add_fallback_versioned_files(repository.signatures) |
|
933 |
if self.chk_bytes is not None: |
|
934 |
self.chk_bytes.add_fallback_versioned_files(repository.chk_bytes) |
|
935 |
||
936 |
@only_raises(errors.LockNotHeld, errors.LockBroken) |
|
937 |
def unlock(self): |
|
938 |
super(VersionedFileRepository, self).unlock() |
|
939 |
if self.control_files._lock_count == 0: |
|
940 |
self._inventory_entry_cache.clear() |
|
941 |
||
942 |
def add_inventory(self, revision_id, inv, parents): |
|
943 |
"""Add the inventory inv to the repository as revision_id.
|
|
944 |
||
945 |
:param parents: The revision ids of the parents that revision_id
|
|
946 |
is known to have and are in the repository already.
|
|
947 |
||
948 |
:returns: The validator(which is a sha1 digest, though what is sha'd is
|
|
949 |
repository format specific) of the serialized inventory.
|
|
950 |
"""
|
|
951 |
if not self.is_in_write_group(): |
|
952 |
raise AssertionError("%r not in write group" % (self,)) |
|
953 |
_mod_revision.check_not_reserved_id(revision_id) |
|
954 |
if not (inv.revision_id is None or inv.revision_id == revision_id): |
|
955 |
raise AssertionError( |
|
956 |
"Mismatch between inventory revision"
|
|
957 |
" id and insertion revid (%r, %r)" |
|
958 |
% (inv.revision_id, revision_id)) |
|
959 |
if inv.root is None: |
|
960 |
raise errors.RootMissing() |
|
961 |
return self._add_inventory_checked(revision_id, inv, parents) |
|
962 |
||
963 |
def _add_inventory_checked(self, revision_id, inv, parents): |
|
964 |
"""Add inv to the repository after checking the inputs.
|
|
965 |
||
966 |
This function can be overridden to allow different inventory styles.
|
|
967 |
||
968 |
:seealso: add_inventory, for the contract.
|
|
969 |
"""
|
|
970 |
inv_lines = self._serializer.write_inventory_to_lines(inv) |
|
971 |
return self._inventory_add_lines(revision_id, parents, |
|
972 |
inv_lines, check_content=False) |
|
973 |
||
974 |
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id, |
|
975 |
parents, basis_inv=None, propagate_caches=False): |
|
976 |
"""Add a new inventory expressed as a delta against another revision.
|
|
977 |
||
978 |
See the inventory developers documentation for the theory behind
|
|
979 |
inventory deltas.
|
|
980 |
||
981 |
:param basis_revision_id: The inventory id the delta was created
|
|
982 |
against. (This does not have to be a direct parent.)
|
|
983 |
:param delta: The inventory delta (see Inventory.apply_delta for
|
|
984 |
details).
|
|
985 |
:param new_revision_id: The revision id that the inventory is being
|
|
986 |
added for.
|
|
987 |
:param parents: The revision ids of the parents that revision_id is
|
|
988 |
known to have and are in the repository already. These are supplied
|
|
989 |
for repositories that depend on the inventory graph for revision
|
|
990 |
graph access, as well as for those that pun ancestry with delta
|
|
991 |
compression.
|
|
992 |
:param basis_inv: The basis inventory if it is already known,
|
|
993 |
otherwise None.
|
|
994 |
:param propagate_caches: If True, the caches for this inventory are
|
|
995 |
copied to and updated for the result if possible.
|
|
996 |
||
997 |
:returns: (validator, new_inv)
|
|
998 |
The validator(which is a sha1 digest, though what is sha'd is
|
|
999 |
repository format specific) of the serialized inventory, and the
|
|
1000 |
resulting inventory.
|
|
1001 |
"""
|
|
1002 |
if not self.is_in_write_group(): |
|
1003 |
raise AssertionError("%r not in write group" % (self,)) |
|
1004 |
_mod_revision.check_not_reserved_id(new_revision_id) |
|
1005 |
basis_tree = self.revision_tree(basis_revision_id) |
|
1006 |
basis_tree.lock_read() |
|
1007 |
try: |
|
1008 |
# Note that this mutates the inventory of basis_tree, which not all
|
|
1009 |
# inventory implementations may support: A better idiom would be to
|
|
1010 |
# return a new inventory, but as there is no revision tree cache in
|
|
1011 |
# repository this is safe for now - RBC 20081013
|
|
1012 |
if basis_inv is None: |
|
1013 |
basis_inv = basis_tree.inventory |
|
1014 |
basis_inv.apply_delta(delta) |
|
1015 |
basis_inv.revision_id = new_revision_id |
|
1016 |
return (self.add_inventory(new_revision_id, basis_inv, parents), |
|
1017 |
basis_inv) |
|
1018 |
finally: |
|
1019 |
basis_tree.unlock() |
|
1020 |
||
1021 |
def _inventory_add_lines(self, revision_id, parents, lines, |
|
1022 |
check_content=True): |
|
1023 |
"""Store lines in inv_vf and return the sha1 of the inventory."""
|
|
1024 |
parents = [(parent,) for parent in parents] |
|
1025 |
result = self.inventories.add_lines((revision_id,), parents, lines, |
|
1026 |
check_content=check_content)[0] |
|
1027 |
self.inventories._access.flush() |
|
1028 |
return result |
|
1029 |
||
1030 |
def add_revision(self, revision_id, rev, inv=None, config=None): |
|
1031 |
"""Add rev to the revision store as revision_id.
|
|
1032 |
||
1033 |
:param revision_id: the revision id to use.
|
|
1034 |
:param rev: The revision object.
|
|
1035 |
:param inv: The inventory for the revision. if None, it will be looked
|
|
1036 |
up in the inventory storer
|
|
1037 |
:param config: If None no digital signature will be created.
|
|
1038 |
If supplied its signature_needed method will be used
|
|
1039 |
to determine if a signature should be made.
|
|
1040 |
"""
|
|
1041 |
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
|
|
1042 |
# rev.parent_ids?
|
|
1043 |
_mod_revision.check_not_reserved_id(revision_id) |
|
1044 |
if config is not None and config.signature_needed(): |
|
1045 |
if inv is None: |
|
1046 |
inv = self.get_inventory(revision_id) |
|
1047 |
tree = InventoryRevisionTree(self, inv, revision_id) |
|
1048 |
testament = Testament(rev, tree) |
|
1049 |
plaintext = testament.as_short_text() |
|
1050 |
self.store_revision_signature( |
|
1051 |
gpg.GPGStrategy(config), plaintext, revision_id) |
|
1052 |
# check inventory present
|
|
1053 |
if not self.inventories.get_parent_map([(revision_id,)]): |
|
1054 |
if inv is None: |
|
1055 |
raise errors.WeaveRevisionNotPresent(revision_id, |
|
1056 |
self.inventories) |
|
1057 |
else: |
|
1058 |
# yes, this is not suitable for adding with ghosts.
|
|
1059 |
rev.inventory_sha1 = self.add_inventory(revision_id, inv, |
|
1060 |
rev.parent_ids) |
|
1061 |
else: |
|
1062 |
key = (revision_id,) |
|
1063 |
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key] |
|
1064 |
self._add_revision(rev) |
|
1065 |
||
1066 |
def _add_revision(self, revision): |
|
1067 |
text = self._serializer.write_revision_to_string(revision) |
|
1068 |
key = (revision.revision_id,) |
|
1069 |
parents = tuple((parent,) for parent in revision.parent_ids) |
|
1070 |
self.revisions.add_lines(key, parents, osutils.split_lines(text)) |
|
1071 |
||
1072 |
def _check_inventories(self, checker): |
|
1073 |
"""Check the inventories found from the revision scan.
|
|
1074 |
|
|
1075 |
This is responsible for verifying the sha1 of inventories and
|
|
1076 |
creating a pending_keys set that covers data referenced by inventories.
|
|
1077 |
"""
|
|
1078 |
bar = ui.ui_factory.nested_progress_bar() |
|
1079 |
try: |
|
1080 |
self._do_check_inventories(checker, bar) |
|
1081 |
finally: |
|
1082 |
bar.finished() |
|
1083 |
||
1084 |
def _do_check_inventories(self, checker, bar): |
|
1085 |
"""Helper for _check_inventories."""
|
|
1086 |
revno = 0 |
|
1087 |
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()} |
|
1088 |
kinds = ['chk_bytes', 'texts'] |
|
1089 |
count = len(checker.pending_keys) |
|
1090 |
bar.update("inventories", 0, 2) |
|
1091 |
current_keys = checker.pending_keys |
|
1092 |
checker.pending_keys = {} |
|
1093 |
# Accumulate current checks.
|
|
1094 |
for key in current_keys: |
|
1095 |
if key[0] != 'inventories' and key[0] not in kinds: |
|
1096 |
checker._report_items.append('unknown key type %r' % (key,)) |
|
1097 |
keys[key[0]].add(key[1:]) |
|
1098 |
if keys['inventories']: |
|
1099 |
# NB: output order *should* be roughly sorted - topo or
|
|
1100 |
# inverse topo depending on repository - either way decent
|
|
1101 |
# to just delta against. However, pre-CHK formats didn't
|
|
1102 |
# try to optimise inventory layout on disk. As such the
|
|
1103 |
# pre-CHK code path does not use inventory deltas.
|
|
1104 |
last_object = None |
|
1105 |
for record in self.inventories.check(keys=keys['inventories']): |
|
1106 |
if record.storage_kind == 'absent': |
|
1107 |
checker._report_items.append( |
|
1108 |
'Missing inventory {%s}' % (record.key,)) |
|
1109 |
else: |
|
1110 |
last_object = self._check_record('inventories', record, |
|
1111 |
checker, last_object, |
|
1112 |
current_keys[('inventories',) + record.key]) |
|
1113 |
del keys['inventories'] |
|
1114 |
else: |
|
1115 |
return
|
|
1116 |
bar.update("texts", 1) |
|
1117 |
while (checker.pending_keys or keys['chk_bytes'] |
|
1118 |
or keys['texts']): |
|
1119 |
# Something to check.
|
|
1120 |
current_keys = checker.pending_keys |
|
1121 |
checker.pending_keys = {} |
|
1122 |
# Accumulate current checks.
|
|
1123 |
for key in current_keys: |
|
1124 |
if key[0] not in kinds: |
|
1125 |
checker._report_items.append('unknown key type %r' % (key,)) |
|
1126 |
keys[key[0]].add(key[1:]) |
|
1127 |
# Check the outermost kind only - inventories || chk_bytes || texts
|
|
1128 |
for kind in kinds: |
|
1129 |
if keys[kind]: |
|
1130 |
last_object = None |
|
1131 |
for record in getattr(self, kind).check(keys=keys[kind]): |
|
1132 |
if record.storage_kind == 'absent': |
|
1133 |
checker._report_items.append( |
|
1134 |
'Missing %s {%s}' % (kind, record.key,)) |
|
1135 |
else: |
|
1136 |
last_object = self._check_record(kind, record, |
|
1137 |
checker, last_object, current_keys[(kind,) + record.key]) |
|
1138 |
keys[kind] = set() |
|
1139 |
break
|
|
1140 |
||
1141 |
def _check_record(self, kind, record, checker, last_object, item_data): |
|
1142 |
"""Check a single text from this repository."""
|
|
1143 |
if kind == 'inventories': |
|
1144 |
rev_id = record.key[0] |
|
1145 |
inv = self._deserialise_inventory(rev_id, |
|
1146 |
record.get_bytes_as('fulltext')) |
|
1147 |
if last_object is not None: |
|
1148 |
delta = inv._make_delta(last_object) |
|
1149 |
for old_path, path, file_id, ie in delta: |
|
1150 |
if ie is None: |
|
1151 |
continue
|
|
1152 |
ie.check(checker, rev_id, inv) |
|
1153 |
else: |
|
1154 |
for path, ie in inv.iter_entries(): |
|
1155 |
ie.check(checker, rev_id, inv) |
|
1156 |
if self._format.fast_deltas: |
|
1157 |
return inv |
|
1158 |
elif kind == 'chk_bytes': |
|
1159 |
# No code written to check chk_bytes for this repo format.
|
|
1160 |
checker._report_items.append( |
|
1161 |
'unsupported key type chk_bytes for %s' % (record.key,)) |
|
1162 |
elif kind == 'texts': |
|
1163 |
self._check_text(record, checker, item_data) |
|
1164 |
else: |
|
1165 |
checker._report_items.append( |
|
1166 |
'unknown key type %s for %s' % (kind, record.key)) |
|
1167 |
||
1168 |
def _check_text(self, record, checker, item_data): |
|
1169 |
"""Check a single text."""
|
|
1170 |
# Check it is extractable.
|
|
1171 |
# TODO: check length.
|
|
1172 |
if record.storage_kind == 'chunked': |
|
1173 |
chunks = record.get_bytes_as(record.storage_kind) |
|
1174 |
sha1 = osutils.sha_strings(chunks) |
|
1175 |
length = sum(map(len, chunks)) |
|
1176 |
else: |
|
1177 |
content = record.get_bytes_as('fulltext') |
|
1178 |
sha1 = osutils.sha_string(content) |
|
1179 |
length = len(content) |
|
1180 |
if item_data and sha1 != item_data[1]: |
|
1181 |
checker._report_items.append( |
|
1182 |
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' % |
|
1183 |
(record.key, sha1, item_data[1], item_data[2])) |
|
1184 |
||
1185 |
def __init__(self, _format, a_bzrdir, control_files): |
|
1186 |
"""Instantiate a VersionedFileRepository.
|
|
1187 |
||
1188 |
:param _format: The format of the repository on disk.
|
|
1189 |
:param a_bzrdir: The BzrDir of the repository.
|
|
1190 |
:param control_files: Control files to use for locking, etc.
|
|
1191 |
"""
|
|
1192 |
# In the future we will have a single api for all stores for
|
|
1193 |
# getting file texts, inventories and revisions, then
|
|
1194 |
# this construct will accept instances of those things.
|
|
1195 |
super(VersionedFileRepository, self).__init__(_format, a_bzrdir, |
|
1196 |
control_files) |
|
1197 |
# for tests
|
|
1198 |
self._reconcile_does_inventory_gc = True |
|
1199 |
self._reconcile_fixes_text_parents = False |
|
1200 |
self._reconcile_backsup_inventory = True |
|
1201 |
# An InventoryEntry cache, used during deserialization
|
|
1202 |
self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024) |
|
1203 |
# Is it safe to return inventory entries directly from the entry cache,
|
|
1204 |
# rather copying them?
|
|
1205 |
self._safe_to_return_from_cache = False |
|
1206 |
||
5815.4.21
by Jelmer Vernooij
Fix lock. |
1207 |
@needs_read_lock
|
1208 |
def gather_stats(self, revid=None, committers=None): |
|
5815.4.18
by Jelmer Vernooij
Move vf-specific gather_stats bits to vf_repository. |
1209 |
"""See Repository.gather_stats()."""
|
5815.4.21
by Jelmer Vernooij
Fix lock. |
1210 |
result = super(VersionedFileRepository, self).gather_stats(revid, committers) |
5815.4.18
by Jelmer Vernooij
Move vf-specific gather_stats bits to vf_repository. |
1211 |
# now gather global repository information
|
1212 |
# XXX: This is available for many repos regardless of listability.
|
|
1213 |
if self.user_transport.listable(): |
|
1214 |
# XXX: do we want to __define len__() ?
|
|
1215 |
# Maybe the versionedfiles object should provide a different
|
|
1216 |
# method to get the number of keys.
|
|
1217 |
result['revisions'] = len(self.revisions.keys()) |
|
1218 |
# result['size'] = t
|
|
1219 |
return result |
|
1220 |
||
5815.4.2
by Jelmer Vernooij
split out versionedfile-specific stuff from commitbuilder. |
1221 |
def get_commit_builder(self, branch, parents, config, timestamp=None, |
1222 |
timezone=None, committer=None, revprops=None, |
|
1223 |
revision_id=None, lossy=False): |
|
1224 |
"""Obtain a CommitBuilder for this repository.
|
|
1225 |
||
1226 |
:param branch: Branch to commit to.
|
|
1227 |
:param parents: Revision ids of the parents of the new revision.
|
|
1228 |
:param config: Configuration to use.
|
|
1229 |
:param timestamp: Optional timestamp recorded for commit.
|
|
1230 |
:param timezone: Optional timezone for timestamp.
|
|
1231 |
:param committer: Optional committer to set for commit.
|
|
1232 |
:param revprops: Optional dictionary of revision properties.
|
|
1233 |
:param revision_id: Optional revision id.
|
|
1234 |
:param lossy: Whether to discard data that can not be natively
|
|
1235 |
represented, when pushing to a foreign VCS
|
|
1236 |
"""
|
|
1237 |
if self._fallback_repositories and not self._format.supports_chks: |
|
1238 |
raise errors.BzrError("Cannot commit directly to a stacked branch" |
|
1239 |
" in pre-2a formats. See "
|
|
1240 |
"https://bugs.launchpad.net/bzr/+bug/375013 for details.") |
|
1241 |
result = self._commit_builder_class(self, parents, config, |
|
1242 |
timestamp, timezone, committer, revprops, revision_id, |
|
1243 |
lossy) |
|
1244 |
self.start_write_group() |
|
1245 |
return result |
|
1246 |
||
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1247 |
def get_missing_parent_inventories(self, check_for_missing_texts=True): |
1248 |
"""Return the keys of missing inventory parents for revisions added in
|
|
1249 |
this write group.
|
|
1250 |
||
1251 |
A revision is not complete if the inventory delta for that revision
|
|
1252 |
cannot be calculated. Therefore if the parent inventories of a
|
|
1253 |
revision are not present, the revision is incomplete, and e.g. cannot
|
|
1254 |
be streamed by a smart server. This method finds missing inventory
|
|
1255 |
parents for revisions added in this write group.
|
|
1256 |
"""
|
|
1257 |
if not self._format.supports_external_lookups: |
|
1258 |
# This is only an issue for stacked repositories
|
|
1259 |
return set() |
|
1260 |
if not self.is_in_write_group(): |
|
1261 |
raise AssertionError('not in a write group') |
|
1262 |
||
1263 |
# XXX: We assume that every added revision already has its
|
|
1264 |
# corresponding inventory, so we only check for parent inventories that
|
|
1265 |
# might be missing, rather than all inventories.
|
|
1266 |
parents = set(self.revisions._index.get_missing_parents()) |
|
1267 |
parents.discard(_mod_revision.NULL_REVISION) |
|
1268 |
unstacked_inventories = self.inventories._index |
|
1269 |
present_inventories = unstacked_inventories.get_parent_map( |
|
1270 |
key[-1:] for key in parents) |
|
1271 |
parents.difference_update(present_inventories) |
|
1272 |
if len(parents) == 0: |
|
1273 |
# No missing parent inventories.
|
|
1274 |
return set() |
|
1275 |
if not check_for_missing_texts: |
|
1276 |
return set(('inventories', rev_id) for (rev_id,) in parents) |
|
1277 |
# Ok, now we have a list of missing inventories. But these only matter
|
|
1278 |
# if the inventories that reference them are missing some texts they
|
|
1279 |
# appear to introduce.
|
|
1280 |
# XXX: Texts referenced by all added inventories need to be present,
|
|
1281 |
# but at the moment we're only checking for texts referenced by
|
|
1282 |
# inventories at the graph's edge.
|
|
1283 |
key_deps = self.revisions._index._key_dependencies |
|
1284 |
key_deps.satisfy_refs_for_keys(present_inventories) |
|
1285 |
referrers = frozenset(r[0] for r in key_deps.get_referrers()) |
|
1286 |
file_ids = self.fileids_altered_by_revision_ids(referrers) |
|
1287 |
missing_texts = set() |
|
1288 |
for file_id, version_ids in file_ids.iteritems(): |
|
1289 |
missing_texts.update( |
|
1290 |
(file_id, version_id) for version_id in version_ids) |
|
1291 |
present_texts = self.texts.get_parent_map(missing_texts) |
|
1292 |
missing_texts.difference_update(present_texts) |
|
1293 |
if not missing_texts: |
|
1294 |
# No texts are missing, so all revisions and their deltas are
|
|
1295 |
# reconstructable.
|
|
1296 |
return set() |
|
1297 |
# Alternatively the text versions could be returned as the missing
|
|
1298 |
# keys, but this is likely to be less data.
|
|
1299 |
missing_keys = set(('inventories', rev_id) for (rev_id,) in parents) |
|
1300 |
return missing_keys |
|
1301 |
||
1302 |
@needs_read_lock
|
|
1303 |
def has_revisions(self, revision_ids): |
|
1304 |
"""Probe to find out the presence of multiple revisions.
|
|
1305 |
||
1306 |
:param revision_ids: An iterable of revision_ids.
|
|
1307 |
:return: A set of the revision_ids that were present.
|
|
1308 |
"""
|
|
1309 |
parent_map = self.revisions.get_parent_map( |
|
1310 |
[(rev_id,) for rev_id in revision_ids]) |
|
1311 |
result = set() |
|
1312 |
if _mod_revision.NULL_REVISION in revision_ids: |
|
1313 |
result.add(_mod_revision.NULL_REVISION) |
|
1314 |
result.update([key[0] for key in parent_map]) |
|
1315 |
return result |
|
1316 |
||
1317 |
@needs_read_lock
|
|
1318 |
def get_revision_reconcile(self, revision_id): |
|
1319 |
"""'reconcile' helper routine that allows access to a revision always.
|
|
1320 |
||
1321 |
This variant of get_revision does not cross check the weave graph
|
|
1322 |
against the revision one as get_revision does: but it should only
|
|
1323 |
be used by reconcile, or reconcile-alike commands that are correcting
|
|
1324 |
or testing the revision graph.
|
|
1325 |
"""
|
|
1326 |
return self._get_revisions([revision_id])[0] |
|
1327 |
||
1328 |
@needs_read_lock
|
|
1329 |
def get_revisions(self, revision_ids): |
|
1330 |
"""Get many revisions at once.
|
|
1331 |
|
|
1332 |
Repositories that need to check data on every revision read should
|
|
1333 |
subclass this method.
|
|
1334 |
"""
|
|
1335 |
return self._get_revisions(revision_ids) |
|
1336 |
||
1337 |
@needs_read_lock
|
|
1338 |
def _get_revisions(self, revision_ids): |
|
1339 |
"""Core work logic to get many revisions without sanity checks."""
|
|
1340 |
revs = {} |
|
1341 |
for revid, rev in self._iter_revisions(revision_ids): |
|
1342 |
if rev is None: |
|
1343 |
raise errors.NoSuchRevision(self, revid) |
|
1344 |
revs[revid] = rev |
|
1345 |
return [revs[revid] for revid in revision_ids] |
|
1346 |
||
1347 |
def _iter_revisions(self, revision_ids): |
|
1348 |
"""Iterate over revision objects.
|
|
1349 |
||
1350 |
:param revision_ids: An iterable of revisions to examine. None may be
|
|
1351 |
passed to request all revisions known to the repository. Note that
|
|
1352 |
not all repositories can find unreferenced revisions; for those
|
|
1353 |
repositories only referenced ones will be returned.
|
|
1354 |
:return: An iterator of (revid, revision) tuples. Absent revisions (
|
|
1355 |
those asked for but not available) are returned as (revid, None).
|
|
1356 |
"""
|
|
1357 |
if revision_ids is None: |
|
1358 |
revision_ids = self.all_revision_ids() |
|
1359 |
else: |
|
1360 |
for rev_id in revision_ids: |
|
1361 |
if not rev_id or not isinstance(rev_id, basestring): |
|
1362 |
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self) |
|
1363 |
keys = [(key,) for key in revision_ids] |
|
1364 |
stream = self.revisions.get_record_stream(keys, 'unordered', True) |
|
1365 |
for record in stream: |
|
1366 |
revid = record.key[0] |
|
1367 |
if record.storage_kind == 'absent': |
|
1368 |
yield (revid, None) |
|
1369 |
else: |
|
1370 |
text = record.get_bytes_as('fulltext') |
|
1371 |
rev = self._serializer.read_revision_from_string(text) |
|
1372 |
yield (revid, rev) |
|
1373 |
||
1374 |
@needs_write_lock
|
|
1375 |
def add_signature_text(self, revision_id, signature): |
|
1376 |
"""Store a signature text for a revision.
|
|
1377 |
||
1378 |
:param revision_id: Revision id of the revision
|
|
1379 |
:param signature: Signature text.
|
|
1380 |
"""
|
|
1381 |
self.signatures.add_lines((revision_id,), (), |
|
1382 |
osutils.split_lines(signature)) |
|
1383 |
||
1384 |
def find_text_key_references(self): |
|
1385 |
"""Find the text key references within the repository.
|
|
1386 |
||
1387 |
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
|
|
1388 |
to whether they were referred to by the inventory of the
|
|
1389 |
revision_id that they contain. The inventory texts from all present
|
|
1390 |
revision ids are assessed to generate this report.
|
|
1391 |
"""
|
|
1392 |
revision_keys = self.revisions.keys() |
|
1393 |
w = self.inventories |
|
1394 |
pb = ui.ui_factory.nested_progress_bar() |
|
1395 |
try: |
|
1396 |
return self._serializer._find_text_key_references( |
|
1397 |
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb)) |
|
1398 |
finally: |
|
1399 |
pb.finished() |
|
1400 |
||
1401 |
def _inventory_xml_lines_for_keys(self, keys): |
|
1402 |
"""Get a line iterator of the sort needed for findind references.
|
|
1403 |
||
1404 |
Not relevant for non-xml inventory repositories.
|
|
1405 |
||
1406 |
Ghosts in revision_keys are ignored.
|
|
1407 |
||
1408 |
:param revision_keys: The revision keys for the inventories to inspect.
|
|
1409 |
:return: An iterator over (inventory line, revid) for the fulltexts of
|
|
1410 |
all of the xml inventories specified by revision_keys.
|
|
1411 |
"""
|
|
1412 |
stream = self.inventories.get_record_stream(keys, 'unordered', True) |
|
1413 |
for record in stream: |
|
1414 |
if record.storage_kind != 'absent': |
|
1415 |
chunks = record.get_bytes_as('chunked') |
|
1416 |
revid = record.key[-1] |
|
1417 |
lines = osutils.chunks_to_lines(chunks) |
|
1418 |
for line in lines: |
|
1419 |
yield line, revid |
|
1420 |
||
1421 |
def _find_file_ids_from_xml_inventory_lines(self, line_iterator, |
|
1422 |
revision_keys): |
|
1423 |
"""Helper routine for fileids_altered_by_revision_ids.
|
|
1424 |
||
1425 |
This performs the translation of xml lines to revision ids.
|
|
1426 |
||
1427 |
:param line_iterator: An iterator of lines, origin_version_id
|
|
1428 |
:param revision_keys: The revision ids to filter for. This should be a
|
|
1429 |
set or other type which supports efficient __contains__ lookups, as
|
|
1430 |
the revision key from each parsed line will be looked up in the
|
|
1431 |
revision_keys filter.
|
|
1432 |
:return: a dictionary mapping altered file-ids to an iterable of
|
|
5891.1.3
by Andrew Bennetts
Move docstring formatting fixes. |
1433 |
revision_ids. Each altered file-ids has the exact revision_ids that
|
1434 |
altered it listed explicitly.
|
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1435 |
"""
|
1436 |
seen = set(self._serializer._find_text_key_references( |
|
1437 |
line_iterator).iterkeys()) |
|
1438 |
parent_keys = self._find_parent_keys_of_revisions(revision_keys) |
|
1439 |
parent_seen = set(self._serializer._find_text_key_references( |
|
1440 |
self._inventory_xml_lines_for_keys(parent_keys))) |
|
1441 |
new_keys = seen - parent_seen |
|
1442 |
result = {} |
|
1443 |
setdefault = result.setdefault |
|
1444 |
for key in new_keys: |
|
1445 |
setdefault(key[0], set()).add(key[-1]) |
|
1446 |
return result |
|
1447 |
||
1448 |
def _find_parent_keys_of_revisions(self, revision_keys): |
|
1449 |
"""Similar to _find_parent_ids_of_revisions, but used with keys.
|
|
1450 |
||
1451 |
:param revision_keys: An iterable of revision_keys.
|
|
1452 |
:return: The parents of all revision_keys that are not already in
|
|
1453 |
revision_keys
|
|
1454 |
"""
|
|
1455 |
parent_map = self.revisions.get_parent_map(revision_keys) |
|
1456 |
parent_keys = set() |
|
1457 |
map(parent_keys.update, parent_map.itervalues()) |
|
1458 |
parent_keys.difference_update(revision_keys) |
|
1459 |
parent_keys.discard(_mod_revision.NULL_REVISION) |
|
1460 |
return parent_keys |
|
1461 |
||
1462 |
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None): |
|
1463 |
"""Find the file ids and versions affected by revisions.
|
|
1464 |
||
1465 |
:param revisions: an iterable containing revision ids.
|
|
1466 |
:param _inv_weave: The inventory weave from this repository or None.
|
|
1467 |
If None, the inventory weave will be opened automatically.
|
|
1468 |
:return: a dictionary mapping altered file-ids to an iterable of
|
|
5891.1.3
by Andrew Bennetts
Move docstring formatting fixes. |
1469 |
revision_ids. Each altered file-ids has the exact revision_ids that
|
1470 |
altered it listed explicitly.
|
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1471 |
"""
|
1472 |
selected_keys = set((revid,) for revid in revision_ids) |
|
1473 |
w = _inv_weave or self.inventories |
|
1474 |
return self._find_file_ids_from_xml_inventory_lines( |
|
1475 |
w.iter_lines_added_or_present_in_keys( |
|
1476 |
selected_keys, pb=None), |
|
1477 |
selected_keys) |
|
1478 |
||
1479 |
def iter_files_bytes(self, desired_files): |
|
1480 |
"""Iterate through file versions.
|
|
1481 |
||
1482 |
Files will not necessarily be returned in the order they occur in
|
|
1483 |
desired_files. No specific order is guaranteed.
|
|
1484 |
||
1485 |
Yields pairs of identifier, bytes_iterator. identifier is an opaque
|
|
1486 |
value supplied by the caller as part of desired_files. It should
|
|
1487 |
uniquely identify the file version in the caller's context. (Examples:
|
|
1488 |
an index number or a TreeTransform trans_id.)
|
|
1489 |
||
1490 |
bytes_iterator is an iterable of bytestrings for the file. The
|
|
1491 |
kind of iterable and length of the bytestrings are unspecified, but for
|
|
1492 |
this implementation, it is a list of bytes produced by
|
|
1493 |
VersionedFile.get_record_stream().
|
|
1494 |
||
1495 |
:param desired_files: a list of (file_id, revision_id, identifier)
|
|
1496 |
triples
|
|
1497 |
"""
|
|
1498 |
text_keys = {} |
|
1499 |
for file_id, revision_id, callable_data in desired_files: |
|
1500 |
text_keys[(file_id, revision_id)] = callable_data |
|
1501 |
for record in self.texts.get_record_stream(text_keys, 'unordered', True): |
|
1502 |
if record.storage_kind == 'absent': |
|
1503 |
raise errors.RevisionNotPresent(record.key, self) |
|
1504 |
yield text_keys[record.key], record.get_bytes_as('chunked') |
|
1505 |
||
1506 |
def _generate_text_key_index(self, text_key_references=None, |
|
1507 |
ancestors=None): |
|
1508 |
"""Generate a new text key index for the repository.
|
|
1509 |
||
1510 |
This is an expensive function that will take considerable time to run.
|
|
1511 |
||
1512 |
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
|
|
1513 |
list of parents, also text keys. When a given key has no parents,
|
|
1514 |
the parents list will be [NULL_REVISION].
|
|
1515 |
"""
|
|
1516 |
# All revisions, to find inventory parents.
|
|
1517 |
if ancestors is None: |
|
1518 |
graph = self.get_graph() |
|
1519 |
ancestors = graph.get_parent_map(self.all_revision_ids()) |
|
1520 |
if text_key_references is None: |
|
1521 |
text_key_references = self.find_text_key_references() |
|
1522 |
pb = ui.ui_factory.nested_progress_bar() |
|
1523 |
try: |
|
1524 |
return self._do_generate_text_key_index(ancestors, |
|
1525 |
text_key_references, pb) |
|
1526 |
finally: |
|
1527 |
pb.finished() |
|
1528 |
||
1529 |
def _do_generate_text_key_index(self, ancestors, text_key_references, pb): |
|
1530 |
"""Helper for _generate_text_key_index to avoid deep nesting."""
|
|
1531 |
revision_order = tsort.topo_sort(ancestors) |
|
1532 |
invalid_keys = set() |
|
1533 |
revision_keys = {} |
|
1534 |
for revision_id in revision_order: |
|
1535 |
revision_keys[revision_id] = set() |
|
1536 |
text_count = len(text_key_references) |
|
1537 |
# a cache of the text keys to allow reuse; costs a dict of all the
|
|
1538 |
# keys, but saves a 2-tuple for every child of a given key.
|
|
1539 |
text_key_cache = {} |
|
1540 |
for text_key, valid in text_key_references.iteritems(): |
|
1541 |
if not valid: |
|
1542 |
invalid_keys.add(text_key) |
|
1543 |
else: |
|
1544 |
revision_keys[text_key[1]].add(text_key) |
|
1545 |
text_key_cache[text_key] = text_key |
|
1546 |
del text_key_references |
|
1547 |
text_index = {} |
|
1548 |
text_graph = graph.Graph(graph.DictParentsProvider(text_index)) |
|
1549 |
NULL_REVISION = _mod_revision.NULL_REVISION |
|
1550 |
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
|
|
1551 |
# too small for large or very branchy trees. However, for 55K path
|
|
1552 |
# trees, it would be easy to use too much memory trivially. Ideally we
|
|
1553 |
# could gauge this by looking at available real memory etc, but this is
|
|
1554 |
# always a tricky proposition.
|
|
1555 |
inventory_cache = lru_cache.LRUCache(10) |
|
1556 |
batch_size = 10 # should be ~150MB on a 55K path tree |
|
1557 |
batch_count = len(revision_order) / batch_size + 1 |
|
1558 |
processed_texts = 0 |
|
1559 |
pb.update("Calculating text parents", processed_texts, text_count) |
|
1560 |
for offset in xrange(batch_count): |
|
1561 |
to_query = revision_order[offset * batch_size:(offset + 1) * |
|
1562 |
batch_size] |
|
1563 |
if not to_query: |
|
1564 |
break
|
|
1565 |
for revision_id in to_query: |
|
1566 |
parent_ids = ancestors[revision_id] |
|
1567 |
for text_key in revision_keys[revision_id]: |
|
1568 |
pb.update("Calculating text parents", processed_texts) |
|
1569 |
processed_texts += 1 |
|
1570 |
candidate_parents = [] |
|
1571 |
for parent_id in parent_ids: |
|
1572 |
parent_text_key = (text_key[0], parent_id) |
|
1573 |
try: |
|
1574 |
check_parent = parent_text_key not in \ |
|
1575 |
revision_keys[parent_id] |
|
1576 |
except KeyError: |
|
1577 |
# the parent parent_id is a ghost:
|
|
1578 |
check_parent = False |
|
1579 |
# truncate the derived graph against this ghost.
|
|
1580 |
parent_text_key = None |
|
1581 |
if check_parent: |
|
1582 |
# look at the parent commit details inventories to
|
|
1583 |
# determine possible candidates in the per file graph.
|
|
1584 |
# TODO: cache here.
|
|
1585 |
try: |
|
1586 |
inv = inventory_cache[parent_id] |
|
1587 |
except KeyError: |
|
1588 |
inv = self.revision_tree(parent_id).inventory |
|
1589 |
inventory_cache[parent_id] = inv |
|
1590 |
try: |
|
1591 |
parent_entry = inv[text_key[0]] |
|
1592 |
except (KeyError, errors.NoSuchId): |
|
1593 |
parent_entry = None |
|
1594 |
if parent_entry is not None: |
|
1595 |
parent_text_key = ( |
|
1596 |
text_key[0], parent_entry.revision) |
|
1597 |
else: |
|
1598 |
parent_text_key = None |
|
1599 |
if parent_text_key is not None: |
|
1600 |
candidate_parents.append( |
|
1601 |
text_key_cache[parent_text_key]) |
|
1602 |
parent_heads = text_graph.heads(candidate_parents) |
|
1603 |
new_parents = list(parent_heads) |
|
1604 |
new_parents.sort(key=lambda x:candidate_parents.index(x)) |
|
1605 |
if new_parents == []: |
|
1606 |
new_parents = [NULL_REVISION] |
|
1607 |
text_index[text_key] = new_parents |
|
1608 |
||
1609 |
for text_key in invalid_keys: |
|
1610 |
text_index[text_key] = [NULL_REVISION] |
|
1611 |
return text_index |
|
1612 |
||
1613 |
def item_keys_introduced_by(self, revision_ids, _files_pb=None): |
|
1614 |
"""Get an iterable listing the keys of all the data introduced by a set
|
|
1615 |
of revision IDs.
|
|
1616 |
||
1617 |
The keys will be ordered so that the corresponding items can be safely
|
|
1618 |
fetched and inserted in that order.
|
|
1619 |
||
1620 |
:returns: An iterable producing tuples of (knit-kind, file-id,
|
|
1621 |
versions). knit-kind is one of 'file', 'inventory', 'signatures',
|
|
1622 |
'revisions'. file-id is None unless knit-kind is 'file'.
|
|
1623 |
"""
|
|
1624 |
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb): |
|
1625 |
yield result |
|
1626 |
del _files_pb |
|
1627 |
for result in self._find_non_file_keys_to_fetch(revision_ids): |
|
1628 |
yield result |
|
1629 |
||
1630 |
def _find_file_keys_to_fetch(self, revision_ids, pb): |
|
1631 |
# XXX: it's a bit weird to control the inventory weave caching in this
|
|
1632 |
# generator. Ideally the caching would be done in fetch.py I think. Or
|
|
1633 |
# maybe this generator should explicitly have the contract that it
|
|
1634 |
# should not be iterated until the previously yielded item has been
|
|
1635 |
# processed?
|
|
1636 |
inv_w = self.inventories |
|
1637 |
||
1638 |
# file ids that changed
|
|
1639 |
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w) |
|
1640 |
count = 0 |
|
1641 |
num_file_ids = len(file_ids) |
|
1642 |
for file_id, altered_versions in file_ids.iteritems(): |
|
1643 |
if pb is not None: |
|
1644 |
pb.update("Fetch texts", count, num_file_ids) |
|
1645 |
count += 1 |
|
1646 |
yield ("file", file_id, altered_versions) |
|
1647 |
||
1648 |
def _find_non_file_keys_to_fetch(self, revision_ids): |
|
1649 |
# inventory
|
|
1650 |
yield ("inventory", None, revision_ids) |
|
1651 |
||
1652 |
# signatures
|
|
1653 |
# XXX: Note ATM no callers actually pay attention to this return
|
|
1654 |
# instead they just use the list of revision ids and ignore
|
|
1655 |
# missing sigs. Consider removing this work entirely
|
|
1656 |
revisions_with_signatures = set(self.signatures.get_parent_map( |
|
1657 |
[(r,) for r in revision_ids])) |
|
1658 |
revisions_with_signatures = set( |
|
1659 |
[r for (r,) in revisions_with_signatures]) |
|
1660 |
revisions_with_signatures.intersection_update(revision_ids) |
|
1661 |
yield ("signatures", None, revisions_with_signatures) |
|
1662 |
||
1663 |
# revisions
|
|
1664 |
yield ("revisions", None, revision_ids) |
|
1665 |
||
1666 |
@needs_read_lock
|
|
1667 |
def get_inventory(self, revision_id): |
|
1668 |
"""Get Inventory object by revision id."""
|
|
1669 |
return self.iter_inventories([revision_id]).next() |
|
1670 |
||
1671 |
def iter_inventories(self, revision_ids, ordering=None): |
|
1672 |
"""Get many inventories by revision_ids.
|
|
1673 |
||
1674 |
This will buffer some or all of the texts used in constructing the
|
|
1675 |
inventories in memory, but will only parse a single inventory at a
|
|
1676 |
time.
|
|
1677 |
||
1678 |
:param revision_ids: The expected revision ids of the inventories.
|
|
1679 |
:param ordering: optional ordering, e.g. 'topological'. If not
|
|
1680 |
specified, the order of revision_ids will be preserved (by
|
|
1681 |
buffering if necessary).
|
|
1682 |
:return: An iterator of inventories.
|
|
1683 |
"""
|
|
1684 |
if ((None in revision_ids) |
|
1685 |
or (_mod_revision.NULL_REVISION in revision_ids)): |
|
1686 |
raise ValueError('cannot get null revision inventory') |
|
1687 |
return self._iter_inventories(revision_ids, ordering) |
|
1688 |
||
1689 |
def _iter_inventories(self, revision_ids, ordering): |
|
1690 |
"""single-document based inventory iteration."""
|
|
1691 |
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering) |
|
1692 |
for text, revision_id in inv_xmls: |
|
1693 |
yield self._deserialise_inventory(revision_id, text) |
|
1694 |
||
1695 |
def _iter_inventory_xmls(self, revision_ids, ordering): |
|
1696 |
if ordering is None: |
|
1697 |
order_as_requested = True |
|
1698 |
ordering = 'unordered' |
|
1699 |
else: |
|
1700 |
order_as_requested = False |
|
1701 |
keys = [(revision_id,) for revision_id in revision_ids] |
|
1702 |
if not keys: |
|
1703 |
return
|
|
1704 |
if order_as_requested: |
|
1705 |
key_iter = iter(keys) |
|
1706 |
next_key = key_iter.next() |
|
1707 |
stream = self.inventories.get_record_stream(keys, ordering, True) |
|
1708 |
text_chunks = {} |
|
1709 |
for record in stream: |
|
1710 |
if record.storage_kind != 'absent': |
|
1711 |
chunks = record.get_bytes_as('chunked') |
|
1712 |
if order_as_requested: |
|
1713 |
text_chunks[record.key] = chunks |
|
1714 |
else: |
|
1715 |
yield ''.join(chunks), record.key[-1] |
|
1716 |
else: |
|
1717 |
raise errors.NoSuchRevision(self, record.key) |
|
1718 |
if order_as_requested: |
|
1719 |
# Yield as many results as we can while preserving order.
|
|
1720 |
while next_key in text_chunks: |
|
1721 |
chunks = text_chunks.pop(next_key) |
|
1722 |
yield ''.join(chunks), next_key[-1] |
|
1723 |
try: |
|
1724 |
next_key = key_iter.next() |
|
1725 |
except StopIteration: |
|
1726 |
# We still want to fully consume the get_record_stream,
|
|
1727 |
# just in case it is not actually finished at this point
|
|
1728 |
next_key = None |
|
1729 |
break
|
|
1730 |
||
1731 |
def _deserialise_inventory(self, revision_id, xml): |
|
1732 |
"""Transform the xml into an inventory object.
|
|
1733 |
||
1734 |
:param revision_id: The expected revision id of the inventory.
|
|
1735 |
:param xml: A serialised inventory.
|
|
1736 |
"""
|
|
1737 |
result = self._serializer.read_inventory_from_string(xml, revision_id, |
|
1738 |
entry_cache=self._inventory_entry_cache, |
|
1739 |
return_from_cache=self._safe_to_return_from_cache) |
|
1740 |
if result.revision_id != revision_id: |
|
1741 |
raise AssertionError('revision id mismatch %s != %s' % ( |
|
1742 |
result.revision_id, revision_id)) |
|
1743 |
return result |
|
1744 |
||
1745 |
def get_serializer_format(self): |
|
1746 |
return self._serializer.format_num |
|
1747 |
||
1748 |
@needs_read_lock
|
|
1749 |
def _get_inventory_xml(self, revision_id): |
|
1750 |
"""Get serialized inventory as a string."""
|
|
1751 |
texts = self._iter_inventory_xmls([revision_id], 'unordered') |
|
1752 |
try: |
|
1753 |
text, revision_id = texts.next() |
|
1754 |
except StopIteration: |
|
1755 |
raise errors.HistoryMissing(self, 'inventory', revision_id) |
|
1756 |
return text |
|
1757 |
||
1758 |
@needs_read_lock
|
|
1759 |
def revision_tree(self, revision_id): |
|
1760 |
"""Return Tree for a revision on this branch.
|
|
1761 |
||
1762 |
`revision_id` may be NULL_REVISION for the empty tree revision.
|
|
1763 |
"""
|
|
1764 |
revision_id = _mod_revision.ensure_null(revision_id) |
|
1765 |
# TODO: refactor this to use an existing revision object
|
|
1766 |
# so we don't need to read it in twice.
|
|
1767 |
if revision_id == _mod_revision.NULL_REVISION: |
|
1768 |
return InventoryRevisionTree(self, |
|
1769 |
Inventory(root_id=None), _mod_revision.NULL_REVISION) |
|
1770 |
else: |
|
1771 |
inv = self.get_inventory(revision_id) |
|
1772 |
return InventoryRevisionTree(self, inv, revision_id) |
|
1773 |
||
1774 |
def revision_trees(self, revision_ids): |
|
1775 |
"""Return Trees for revisions in this repository.
|
|
1776 |
||
1777 |
:param revision_ids: a sequence of revision-ids;
|
|
1778 |
a revision-id may not be None or 'null:'
|
|
1779 |
"""
|
|
1780 |
inventories = self.iter_inventories(revision_ids) |
|
1781 |
for inv in inventories: |
|
1782 |
yield InventoryRevisionTree(self, inv, inv.revision_id) |
|
1783 |
||
1784 |
def _filtered_revision_trees(self, revision_ids, file_ids): |
|
1785 |
"""Return Tree for a revision on this branch with only some files.
|
|
1786 |
||
1787 |
:param revision_ids: a sequence of revision-ids;
|
|
1788 |
a revision-id may not be None or 'null:'
|
|
1789 |
:param file_ids: if not None, the result is filtered
|
|
1790 |
so that only those file-ids, their parents and their
|
|
1791 |
children are included.
|
|
1792 |
"""
|
|
1793 |
inventories = self.iter_inventories(revision_ids) |
|
1794 |
for inv in inventories: |
|
1795 |
# Should we introduce a FilteredRevisionTree class rather
|
|
1796 |
# than pre-filter the inventory here?
|
|
1797 |
filtered_inv = inv.filter(file_ids) |
|
1798 |
yield InventoryRevisionTree(self, filtered_inv, filtered_inv.revision_id) |
|
1799 |
||
1800 |
def get_parent_map(self, revision_ids): |
|
1801 |
"""See graph.StackedParentsProvider.get_parent_map"""
|
|
1802 |
# revisions index works in keys; this just works in revisions
|
|
1803 |
# therefore wrap and unwrap
|
|
1804 |
query_keys = [] |
|
1805 |
result = {} |
|
1806 |
for revision_id in revision_ids: |
|
1807 |
if revision_id == _mod_revision.NULL_REVISION: |
|
1808 |
result[revision_id] = () |
|
1809 |
elif revision_id is None: |
|
1810 |
raise ValueError('get_parent_map(None) is not valid') |
|
1811 |
else: |
|
1812 |
query_keys.append((revision_id ,)) |
|
1813 |
for ((revision_id,), parent_keys) in \ |
|
1814 |
self.revisions.get_parent_map(query_keys).iteritems(): |
|
1815 |
if parent_keys: |
|
1816 |
result[revision_id] = tuple([parent_revid |
|
1817 |
for (parent_revid,) in parent_keys]) |
|
1818 |
else: |
|
1819 |
result[revision_id] = (_mod_revision.NULL_REVISION,) |
|
1820 |
return result |
|
1821 |
||
1822 |
@needs_read_lock
|
|
1823 |
def get_known_graph_ancestry(self, revision_ids): |
|
1824 |
"""Return the known graph for a set of revision ids and their ancestors.
|
|
1825 |
"""
|
|
1826 |
st = static_tuple.StaticTuple |
|
1827 |
revision_keys = [st(r_id).intern() for r_id in revision_ids] |
|
1828 |
known_graph = self.revisions.get_known_graph_ancestry(revision_keys) |
|
1829 |
return graph.GraphThunkIdsToKeys(known_graph) |
|
1830 |
||
5815.5.5
by Jelmer Vernooij
Add more tests. |
1831 |
@needs_read_lock
|
5815.5.8
by Jelmer Vernooij
Use traditional (fileid, revision) entries in file graph. |
1832 |
def get_file_graph(self): |
1833 |
"""Return the graph walker for text revisions."""
|
|
1834 |
return graph.Graph(self.texts) |
|
5815.5.5
by Jelmer Vernooij
Add more tests. |
1835 |
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1836 |
def _get_versioned_file_checker(self, text_key_references=None, |
1837 |
ancestors=None): |
|
1838 |
"""Return an object suitable for checking versioned files.
|
|
1839 |
|
|
1840 |
:param text_key_references: if non-None, an already built
|
|
1841 |
dictionary mapping text keys ((fileid, revision_id) tuples)
|
|
1842 |
to whether they were referred to by the inventory of the
|
|
1843 |
revision_id that they contain. If None, this will be
|
|
1844 |
calculated.
|
|
1845 |
:param ancestors: Optional result from
|
|
1846 |
self.get_graph().get_parent_map(self.all_revision_ids()) if already
|
|
1847 |
available.
|
|
1848 |
"""
|
|
1849 |
return _VersionedFileChecker(self, |
|
1850 |
text_key_references=text_key_references, ancestors=ancestors) |
|
1851 |
||
1852 |
@needs_read_lock
|
|
1853 |
def has_signature_for_revision_id(self, revision_id): |
|
1854 |
"""Query for a revision signature for revision_id in the repository."""
|
|
1855 |
if not self.has_revision(revision_id): |
|
1856 |
raise errors.NoSuchRevision(self, revision_id) |
|
1857 |
sig_present = (1 == len( |
|
1858 |
self.signatures.get_parent_map([(revision_id,)]))) |
|
1859 |
return sig_present |
|
1860 |
||
1861 |
@needs_read_lock
|
|
1862 |
def get_signature_text(self, revision_id): |
|
1863 |
"""Return the text for a signature."""
|
|
1864 |
stream = self.signatures.get_record_stream([(revision_id,)], |
|
1865 |
'unordered', True) |
|
1866 |
record = stream.next() |
|
1867 |
if record.storage_kind == 'absent': |
|
1868 |
raise errors.NoSuchRevision(self, revision_id) |
|
1869 |
return record.get_bytes_as('fulltext') |
|
1870 |
||
5850.1.6
by Jelmer Vernooij
Reintroduce double indirection, raise NotImplementedError from _check rather than |
1871 |
@needs_read_lock
|
1872 |
def _check(self, revision_ids, callback_refs, check_repo): |
|
5850.1.3
by Jelmer Vernooij
Add VersionedFileCheck. |
1873 |
result = check.VersionedFileCheck(self, check_repo=check_repo) |
1874 |
result.check(callback_refs) |
|
1875 |
return result |
|
1876 |
||
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1877 |
def _find_inconsistent_revision_parents(self, revisions_iterator=None): |
1878 |
"""Find revisions with different parent lists in the revision object
|
|
1879 |
and in the index graph.
|
|
1880 |
||
1881 |
:param revisions_iterator: None, or an iterator of (revid,
|
|
1882 |
Revision-or-None). This iterator controls the revisions checked.
|
|
1883 |
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
|
|
1884 |
parents-in-revision).
|
|
1885 |
"""
|
|
1886 |
if not self.is_locked(): |
|
1887 |
raise AssertionError() |
|
1888 |
vf = self.revisions |
|
1889 |
if revisions_iterator is None: |
|
1890 |
revisions_iterator = self._iter_revisions(None) |
|
1891 |
for revid, revision in revisions_iterator: |
|
1892 |
if revision is None: |
|
1893 |
pass
|
|
1894 |
parent_map = vf.get_parent_map([(revid,)]) |
|
1895 |
parents_according_to_index = tuple(parent[-1] for parent in |
|
1896 |
parent_map[(revid,)]) |
|
1897 |
parents_according_to_revision = tuple(revision.parent_ids) |
|
1898 |
if parents_according_to_index != parents_according_to_revision: |
|
1899 |
yield (revid, parents_according_to_index, |
|
1900 |
parents_according_to_revision) |
|
1901 |
||
1902 |
def _check_for_inconsistent_revision_parents(self): |
|
1903 |
inconsistencies = list(self._find_inconsistent_revision_parents()) |
|
1904 |
if inconsistencies: |
|
1905 |
raise errors.BzrCheckError( |
|
1906 |
"Revision knit has inconsistent parents.") |
|
1907 |
||
1908 |
def _get_sink(self): |
|
1909 |
"""Return a sink for streaming into this repository."""
|
|
1910 |
return StreamSink(self) |
|
1911 |
||
1912 |
def _get_source(self, to_format): |
|
1913 |
"""Return a source for streaming from this repository."""
|
|
1914 |
return StreamSource(self, to_format) |
|
1915 |
||
1916 |
||
1917 |
class MetaDirVersionedFileRepository(MetaDirRepository, |
|
1918 |
VersionedFileRepository): |
|
1919 |
"""Repositories in a meta-dir, that work via versioned file objects."""
|
|
1920 |
||
1921 |
def __init__(self, _format, a_bzrdir, control_files): |
|
1922 |
super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir, |
|
1923 |
control_files) |
|
1924 |
||
1925 |
||
5815.4.5
by Jelmer Vernooij
Use MetaDirVersionedFileRepositoryFormat (a Soyuz worthy name). |
1926 |
class MetaDirVersionedFileRepositoryFormat(MetaDirRepositoryFormat, |
1927 |
VersionedFileRepositoryFormat): |
|
1928 |
"""Base class for repository formats using versioned files in metadirs."""
|
|
1929 |
||
1930 |
||
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
1931 |
class StreamSink(object): |
1932 |
"""An object that can insert a stream into a repository.
|
|
1933 |
||
1934 |
This interface handles the complexity of reserialising inventories and
|
|
1935 |
revisions from different formats, and allows unidirectional insertion into
|
|
1936 |
stacked repositories without looking for the missing basis parents
|
|
1937 |
beforehand.
|
|
1938 |
"""
|
|
1939 |
||
1940 |
def __init__(self, target_repo): |
|
1941 |
self.target_repo = target_repo |
|
1942 |
||
1943 |
def insert_stream(self, stream, src_format, resume_tokens): |
|
1944 |
"""Insert a stream's content into the target repository.
|
|
1945 |
||
1946 |
:param src_format: a bzr repository format.
|
|
1947 |
||
1948 |
:return: a list of resume tokens and an iterable of keys additional
|
|
1949 |
items required before the insertion can be completed.
|
|
1950 |
"""
|
|
1951 |
self.target_repo.lock_write() |
|
1952 |
try: |
|
1953 |
if resume_tokens: |
|
1954 |
self.target_repo.resume_write_group(resume_tokens) |
|
1955 |
is_resume = True |
|
1956 |
else: |
|
1957 |
self.target_repo.start_write_group() |
|
1958 |
is_resume = False |
|
1959 |
try: |
|
1960 |
# locked_insert_stream performs a commit|suspend.
|
|
1961 |
missing_keys = self.insert_stream_without_locking(stream, |
|
1962 |
src_format, is_resume) |
|
1963 |
if missing_keys: |
|
1964 |
# suspend the write group and tell the caller what we is
|
|
1965 |
# missing. We know we can suspend or else we would not have
|
|
1966 |
# entered this code path. (All repositories that can handle
|
|
1967 |
# missing keys can handle suspending a write group).
|
|
1968 |
write_group_tokens = self.target_repo.suspend_write_group() |
|
1969 |
return write_group_tokens, missing_keys |
|
1970 |
hint = self.target_repo.commit_write_group() |
|
1971 |
to_serializer = self.target_repo._format._serializer |
|
1972 |
src_serializer = src_format._serializer |
|
1973 |
if (to_serializer != src_serializer and |
|
1974 |
self.target_repo._format.pack_compresses): |
|
1975 |
self.target_repo.pack(hint=hint) |
|
1976 |
return [], set() |
|
1977 |
except: |
|
1978 |
self.target_repo.abort_write_group(suppress_errors=True) |
|
1979 |
raise
|
|
1980 |
finally: |
|
1981 |
self.target_repo.unlock() |
|
1982 |
||
1983 |
def insert_stream_without_locking(self, stream, src_format, |
|
1984 |
is_resume=False): |
|
1985 |
"""Insert a stream's content into the target repository.
|
|
1986 |
||
1987 |
This assumes that you already have a locked repository and an active
|
|
1988 |
write group.
|
|
1989 |
||
1990 |
:param src_format: a bzr repository format.
|
|
1991 |
:param is_resume: Passed down to get_missing_parent_inventories to
|
|
1992 |
indicate if we should be checking for missing texts at the same
|
|
1993 |
time.
|
|
1994 |
||
1995 |
:return: A set of keys that are missing.
|
|
1996 |
"""
|
|
1997 |
if not self.target_repo.is_write_locked(): |
|
1998 |
raise errors.ObjectNotLocked(self) |
|
1999 |
if not self.target_repo.is_in_write_group(): |
|
2000 |
raise errors.BzrError('you must already be in a write group') |
|
2001 |
to_serializer = self.target_repo._format._serializer |
|
2002 |
src_serializer = src_format._serializer |
|
2003 |
new_pack = None |
|
2004 |
if to_serializer == src_serializer: |
|
2005 |
# If serializers match and the target is a pack repository, set the
|
|
2006 |
# write cache size on the new pack. This avoids poor performance
|
|
2007 |
# on transports where append is unbuffered (such as
|
|
2008 |
# RemoteTransport). This is safe to do because nothing should read
|
|
2009 |
# back from the target repository while a stream with matching
|
|
2010 |
# serialization is being inserted.
|
|
2011 |
# The exception is that a delta record from the source that should
|
|
2012 |
# be a fulltext may need to be expanded by the target (see
|
|
2013 |
# test_fetch_revisions_with_deltas_into_pack); but we take care to
|
|
2014 |
# explicitly flush any buffered writes first in that rare case.
|
|
2015 |
try: |
|
2016 |
new_pack = self.target_repo._pack_collection._new_pack |
|
2017 |
except AttributeError: |
|
2018 |
# Not a pack repository
|
|
2019 |
pass
|
|
2020 |
else: |
|
2021 |
new_pack.set_write_cache_size(1024*1024) |
|
2022 |
for substream_type, substream in stream: |
|
2023 |
if 'stream' in debug.debug_flags: |
|
2024 |
mutter('inserting substream: %s', substream_type) |
|
2025 |
if substream_type == 'texts': |
|
2026 |
self.target_repo.texts.insert_record_stream(substream) |
|
2027 |
elif substream_type == 'inventories': |
|
2028 |
if src_serializer == to_serializer: |
|
2029 |
self.target_repo.inventories.insert_record_stream( |
|
2030 |
substream) |
|
2031 |
else: |
|
2032 |
self._extract_and_insert_inventories( |
|
2033 |
substream, src_serializer) |
|
2034 |
elif substream_type == 'inventory-deltas': |
|
2035 |
self._extract_and_insert_inventory_deltas( |
|
2036 |
substream, src_serializer) |
|
2037 |
elif substream_type == 'chk_bytes': |
|
2038 |
# XXX: This doesn't support conversions, as it assumes the
|
|
2039 |
# conversion was done in the fetch code.
|
|
2040 |
self.target_repo.chk_bytes.insert_record_stream(substream) |
|
2041 |
elif substream_type == 'revisions': |
|
2042 |
# This may fallback to extract-and-insert more often than
|
|
2043 |
# required if the serializers are different only in terms of
|
|
2044 |
# the inventory.
|
|
2045 |
if src_serializer == to_serializer: |
|
2046 |
self.target_repo.revisions.insert_record_stream(substream) |
|
2047 |
else: |
|
2048 |
self._extract_and_insert_revisions(substream, |
|
2049 |
src_serializer) |
|
2050 |
elif substream_type == 'signatures': |
|
2051 |
self.target_repo.signatures.insert_record_stream(substream) |
|
2052 |
else: |
|
2053 |
raise AssertionError('kaboom! %s' % (substream_type,)) |
|
2054 |
# Done inserting data, and the missing_keys calculations will try to
|
|
2055 |
# read back from the inserted data, so flush the writes to the new pack
|
|
2056 |
# (if this is pack format).
|
|
2057 |
if new_pack is not None: |
|
2058 |
new_pack._write_data('', flush=True) |
|
2059 |
# Find all the new revisions (including ones from resume_tokens)
|
|
2060 |
missing_keys = self.target_repo.get_missing_parent_inventories( |
|
2061 |
check_for_missing_texts=is_resume) |
|
2062 |
try: |
|
2063 |
for prefix, versioned_file in ( |
|
2064 |
('texts', self.target_repo.texts), |
|
2065 |
('inventories', self.target_repo.inventories), |
|
2066 |
('revisions', self.target_repo.revisions), |
|
2067 |
('signatures', self.target_repo.signatures), |
|
2068 |
('chk_bytes', self.target_repo.chk_bytes), |
|
2069 |
):
|
|
2070 |
if versioned_file is None: |
|
2071 |
continue
|
|
2072 |
# TODO: key is often going to be a StaticTuple object
|
|
2073 |
# I don't believe we can define a method by which
|
|
2074 |
# (prefix,) + StaticTuple will work, though we could
|
|
2075 |
# define a StaticTuple.sq_concat that would allow you to
|
|
2076 |
# pass in either a tuple or a StaticTuple as the second
|
|
2077 |
# object, so instead we could have:
|
|
2078 |
# StaticTuple(prefix) + key here...
|
|
2079 |
missing_keys.update((prefix,) + key for key in |
|
2080 |
versioned_file.get_missing_compression_parent_keys()) |
|
2081 |
except NotImplementedError: |
|
2082 |
# cannot even attempt suspending, and missing would have failed
|
|
2083 |
# during stream insertion.
|
|
2084 |
missing_keys = set() |
|
2085 |
return missing_keys |
|
2086 |
||
2087 |
def _extract_and_insert_inventory_deltas(self, substream, serializer): |
|
2088 |
target_rich_root = self.target_repo._format.rich_root_data |
|
2089 |
target_tree_refs = self.target_repo._format.supports_tree_reference |
|
2090 |
for record in substream: |
|
2091 |
# Insert the delta directly
|
|
2092 |
inventory_delta_bytes = record.get_bytes_as('fulltext') |
|
2093 |
deserialiser = inventory_delta.InventoryDeltaDeserializer() |
|
2094 |
try: |
|
2095 |
parse_result = deserialiser.parse_text_bytes( |
|
2096 |
inventory_delta_bytes) |
|
2097 |
except inventory_delta.IncompatibleInventoryDelta, err: |
|
2098 |
mutter("Incompatible delta: %s", err.msg) |
|
2099 |
raise errors.IncompatibleRevision(self.target_repo._format) |
|
2100 |
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result |
|
2101 |
revision_id = new_id |
|
2102 |
parents = [key[0] for key in record.parents] |
|
2103 |
self.target_repo.add_inventory_by_delta( |
|
2104 |
basis_id, inv_delta, revision_id, parents) |
|
2105 |
||
2106 |
def _extract_and_insert_inventories(self, substream, serializer, |
|
2107 |
parse_delta=None): |
|
2108 |
"""Generate a new inventory versionedfile in target, converting data.
|
|
2109 |
||
2110 |
The inventory is retrieved from the source, (deserializing it), and
|
|
2111 |
stored in the target (reserializing it in a different format).
|
|
2112 |
"""
|
|
2113 |
target_rich_root = self.target_repo._format.rich_root_data |
|
2114 |
target_tree_refs = self.target_repo._format.supports_tree_reference |
|
2115 |
for record in substream: |
|
2116 |
# It's not a delta, so it must be a fulltext in the source
|
|
2117 |
# serializer's format.
|
|
2118 |
bytes = record.get_bytes_as('fulltext') |
|
2119 |
revision_id = record.key[0] |
|
2120 |
inv = serializer.read_inventory_from_string(bytes, revision_id) |
|
2121 |
parents = [key[0] for key in record.parents] |
|
2122 |
self.target_repo.add_inventory(revision_id, inv, parents) |
|
2123 |
# No need to keep holding this full inv in memory when the rest of
|
|
2124 |
# the substream is likely to be all deltas.
|
|
2125 |
del inv |
|
2126 |
||
2127 |
def _extract_and_insert_revisions(self, substream, serializer): |
|
2128 |
for record in substream: |
|
2129 |
bytes = record.get_bytes_as('fulltext') |
|
2130 |
revision_id = record.key[0] |
|
2131 |
rev = serializer.read_revision_from_string(bytes) |
|
2132 |
if rev.revision_id != revision_id: |
|
2133 |
raise AssertionError('wtf: %s != %s' % (rev, revision_id)) |
|
2134 |
self.target_repo.add_revision(revision_id, rev) |
|
2135 |
||
2136 |
def finished(self): |
|
2137 |
if self.target_repo._format._fetch_reconcile: |
|
2138 |
self.target_repo.reconcile() |
|
2139 |
||
2140 |
||
2141 |
class StreamSource(object): |
|
2142 |
"""A source of a stream for fetching between repositories."""
|
|
2143 |
||
2144 |
def __init__(self, from_repository, to_format): |
|
2145 |
"""Create a StreamSource streaming from from_repository."""
|
|
2146 |
self.from_repository = from_repository |
|
2147 |
self.to_format = to_format |
|
2148 |
self._record_counter = RecordCounter() |
|
2149 |
||
2150 |
def delta_on_metadata(self): |
|
2151 |
"""Return True if delta's are permitted on metadata streams.
|
|
2152 |
||
2153 |
That is on revisions and signatures.
|
|
2154 |
"""
|
|
2155 |
src_serializer = self.from_repository._format._serializer |
|
2156 |
target_serializer = self.to_format._serializer |
|
2157 |
return (self.to_format._fetch_uses_deltas and |
|
2158 |
src_serializer == target_serializer) |
|
2159 |
||
2160 |
def _fetch_revision_texts(self, revs): |
|
2161 |
# fetch signatures first and then the revision texts
|
|
2162 |
# may need to be a InterRevisionStore call here.
|
|
2163 |
from_sf = self.from_repository.signatures |
|
2164 |
# A missing signature is just skipped.
|
|
2165 |
keys = [(rev_id,) for rev_id in revs] |
|
2166 |
signatures = versionedfile.filter_absent(from_sf.get_record_stream( |
|
2167 |
keys, |
|
2168 |
self.to_format._fetch_order, |
|
2169 |
not self.to_format._fetch_uses_deltas)) |
|
2170 |
# If a revision has a delta, this is actually expanded inside the
|
|
2171 |
# insert_record_stream code now, which is an alternate fix for
|
|
2172 |
# bug #261339
|
|
2173 |
from_rf = self.from_repository.revisions |
|
2174 |
revisions = from_rf.get_record_stream( |
|
2175 |
keys, |
|
2176 |
self.to_format._fetch_order, |
|
2177 |
not self.delta_on_metadata()) |
|
2178 |
return [('signatures', signatures), ('revisions', revisions)] |
|
2179 |
||
2180 |
def _generate_root_texts(self, revs): |
|
2181 |
"""This will be called by get_stream between fetching weave texts and
|
|
2182 |
fetching the inventory weave.
|
|
2183 |
"""
|
|
2184 |
if self._rich_root_upgrade(): |
|
2185 |
return _mod_fetch.Inter1and2Helper( |
|
2186 |
self.from_repository).generate_root_texts(revs) |
|
2187 |
else: |
|
2188 |
return [] |
|
2189 |
||
2190 |
def get_stream(self, search): |
|
2191 |
phase = 'file' |
|
2192 |
revs = search.get_keys() |
|
2193 |
graph = self.from_repository.get_graph() |
|
2194 |
revs = tsort.topo_sort(graph.get_parent_map(revs)) |
|
2195 |
data_to_fetch = self.from_repository.item_keys_introduced_by(revs) |
|
2196 |
text_keys = [] |
|
2197 |
for knit_kind, file_id, revisions in data_to_fetch: |
|
2198 |
if knit_kind != phase: |
|
2199 |
phase = knit_kind |
|
2200 |
# Make a new progress bar for this phase
|
|
2201 |
if knit_kind == "file": |
|
2202 |
# Accumulate file texts
|
|
2203 |
text_keys.extend([(file_id, revision) for revision in |
|
2204 |
revisions]) |
|
2205 |
elif knit_kind == "inventory": |
|
2206 |
# Now copy the file texts.
|
|
2207 |
from_texts = self.from_repository.texts |
|
2208 |
yield ('texts', from_texts.get_record_stream( |
|
2209 |
text_keys, self.to_format._fetch_order, |
|
2210 |
not self.to_format._fetch_uses_deltas)) |
|
2211 |
# Cause an error if a text occurs after we have done the
|
|
2212 |
# copy.
|
|
2213 |
text_keys = None |
|
2214 |
# Before we process the inventory we generate the root
|
|
2215 |
# texts (if necessary) so that the inventories references
|
|
2216 |
# will be valid.
|
|
2217 |
for _ in self._generate_root_texts(revs): |
|
2218 |
yield _ |
|
2219 |
# we fetch only the referenced inventories because we do not
|
|
2220 |
# know for unselected inventories whether all their required
|
|
2221 |
# texts are present in the other repository - it could be
|
|
2222 |
# corrupt.
|
|
2223 |
for info in self._get_inventory_stream(revs): |
|
2224 |
yield info |
|
2225 |
elif knit_kind == "signatures": |
|
2226 |
# Nothing to do here; this will be taken care of when
|
|
2227 |
# _fetch_revision_texts happens.
|
|
2228 |
pass
|
|
2229 |
elif knit_kind == "revisions": |
|
2230 |
for record in self._fetch_revision_texts(revs): |
|
2231 |
yield record |
|
2232 |
else: |
|
2233 |
raise AssertionError("Unknown knit kind %r" % knit_kind) |
|
2234 |
||
2235 |
def get_stream_for_missing_keys(self, missing_keys): |
|
2236 |
# missing keys can only occur when we are byte copying and not
|
|
2237 |
# translating (because translation means we don't send
|
|
2238 |
# unreconstructable deltas ever).
|
|
2239 |
keys = {} |
|
2240 |
keys['texts'] = set() |
|
2241 |
keys['revisions'] = set() |
|
2242 |
keys['inventories'] = set() |
|
2243 |
keys['chk_bytes'] = set() |
|
2244 |
keys['signatures'] = set() |
|
2245 |
for key in missing_keys: |
|
2246 |
keys[key[0]].add(key[1:]) |
|
2247 |
if len(keys['revisions']): |
|
2248 |
# If we allowed copying revisions at this point, we could end up
|
|
2249 |
# copying a revision without copying its required texts: a
|
|
2250 |
# violation of the requirements for repository integrity.
|
|
2251 |
raise AssertionError( |
|
2252 |
'cannot copy revisions to fill in missing deltas %s' % ( |
|
2253 |
keys['revisions'],)) |
|
2254 |
for substream_kind, keys in keys.iteritems(): |
|
2255 |
vf = getattr(self.from_repository, substream_kind) |
|
2256 |
if vf is None and keys: |
|
2257 |
raise AssertionError( |
|
2258 |
"cannot fill in keys for a versioned file we don't"
|
|
2259 |
" have: %s needs %s" % (substream_kind, keys)) |
|
2260 |
if not keys: |
|
2261 |
# No need to stream something we don't have
|
|
2262 |
continue
|
|
2263 |
if substream_kind == 'inventories': |
|
2264 |
# Some missing keys are genuinely ghosts, filter those out.
|
|
2265 |
present = self.from_repository.inventories.get_parent_map(keys) |
|
2266 |
revs = [key[0] for key in present] |
|
2267 |
# Get the inventory stream more-or-less as we do for the
|
|
2268 |
# original stream; there's no reason to assume that records
|
|
2269 |
# direct from the source will be suitable for the sink. (Think
|
|
2270 |
# e.g. 2a -> 1.9-rich-root).
|
|
2271 |
for info in self._get_inventory_stream(revs, missing=True): |
|
2272 |
yield info |
|
2273 |
continue
|
|
2274 |
||
2275 |
# Ask for full texts always so that we don't need more round trips
|
|
2276 |
# after this stream.
|
|
2277 |
# Some of the missing keys are genuinely ghosts, so filter absent
|
|
2278 |
# records. The Sink is responsible for doing another check to
|
|
2279 |
# ensure that ghosts don't introduce missing data for future
|
|
2280 |
# fetches.
|
|
2281 |
stream = versionedfile.filter_absent(vf.get_record_stream(keys, |
|
2282 |
self.to_format._fetch_order, True)) |
|
2283 |
yield substream_kind, stream |
|
2284 |
||
2285 |
def inventory_fetch_order(self): |
|
2286 |
if self._rich_root_upgrade(): |
|
2287 |
return 'topological' |
|
2288 |
else: |
|
2289 |
return self.to_format._fetch_order |
|
2290 |
||
2291 |
def _rich_root_upgrade(self): |
|
2292 |
return (not self.from_repository._format.rich_root_data and |
|
2293 |
self.to_format.rich_root_data) |
|
2294 |
||
2295 |
def _get_inventory_stream(self, revision_ids, missing=False): |
|
2296 |
from_format = self.from_repository._format |
|
2297 |
if (from_format.supports_chks and self.to_format.supports_chks and |
|
2298 |
from_format.network_name() == self.to_format.network_name()): |
|
2299 |
raise AssertionError( |
|
2300 |
"this case should be handled by GroupCHKStreamSource") |
|
2301 |
elif 'forceinvdeltas' in debug.debug_flags: |
|
2302 |
return self._get_convertable_inventory_stream(revision_ids, |
|
2303 |
delta_versus_null=missing) |
|
2304 |
elif from_format.network_name() == self.to_format.network_name(): |
|
2305 |
# Same format.
|
|
2306 |
return self._get_simple_inventory_stream(revision_ids, |
|
2307 |
missing=missing) |
|
2308 |
elif (not from_format.supports_chks and not self.to_format.supports_chks |
|
2309 |
and from_format._serializer == self.to_format._serializer): |
|
2310 |
# Essentially the same format.
|
|
2311 |
return self._get_simple_inventory_stream(revision_ids, |
|
2312 |
missing=missing) |
|
2313 |
else: |
|
2314 |
# Any time we switch serializations, we want to use an
|
|
2315 |
# inventory-delta based approach.
|
|
2316 |
return self._get_convertable_inventory_stream(revision_ids, |
|
2317 |
delta_versus_null=missing) |
|
2318 |
||
2319 |
def _get_simple_inventory_stream(self, revision_ids, missing=False): |
|
2320 |
# NB: This currently reopens the inventory weave in source;
|
|
2321 |
# using a single stream interface instead would avoid this.
|
|
2322 |
from_weave = self.from_repository.inventories |
|
2323 |
if missing: |
|
2324 |
delta_closure = True |
|
2325 |
else: |
|
2326 |
delta_closure = not self.delta_on_metadata() |
|
2327 |
yield ('inventories', from_weave.get_record_stream( |
|
2328 |
[(rev_id,) for rev_id in revision_ids], |
|
2329 |
self.inventory_fetch_order(), delta_closure)) |
|
2330 |
||
2331 |
def _get_convertable_inventory_stream(self, revision_ids, |
|
2332 |
delta_versus_null=False): |
|
2333 |
# The two formats are sufficiently different that there is no fast
|
|
2334 |
# path, so we need to send just inventorydeltas, which any
|
|
2335 |
# sufficiently modern client can insert into any repository.
|
|
2336 |
# The StreamSink code expects to be able to
|
|
2337 |
# convert on the target, so we need to put bytes-on-the-wire that can
|
|
2338 |
# be converted. That means inventory deltas (if the remote is <1.19,
|
|
2339 |
# RemoteStreamSink will fallback to VFS to insert the deltas).
|
|
2340 |
yield ('inventory-deltas', |
|
2341 |
self._stream_invs_as_deltas(revision_ids, |
|
2342 |
delta_versus_null=delta_versus_null)) |
|
2343 |
||
2344 |
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False): |
|
2345 |
"""Return a stream of inventory-deltas for the given rev ids.
|
|
2346 |
||
2347 |
:param revision_ids: The list of inventories to transmit
|
|
2348 |
:param delta_versus_null: Don't try to find a minimal delta for this
|
|
2349 |
entry, instead compute the delta versus the NULL_REVISION. This
|
|
2350 |
effectively streams a complete inventory. Used for stuff like
|
|
2351 |
filling in missing parents, etc.
|
|
2352 |
"""
|
|
2353 |
from_repo = self.from_repository |
|
2354 |
revision_keys = [(rev_id,) for rev_id in revision_ids] |
|
2355 |
parent_map = from_repo.inventories.get_parent_map(revision_keys) |
|
2356 |
# XXX: possibly repos could implement a more efficient iter_inv_deltas
|
|
2357 |
# method...
|
|
2358 |
inventories = self.from_repository.iter_inventories( |
|
2359 |
revision_ids, 'topological') |
|
2360 |
format = from_repo._format |
|
2361 |
invs_sent_so_far = set([_mod_revision.NULL_REVISION]) |
|
2362 |
inventory_cache = lru_cache.LRUCache(50) |
|
2363 |
null_inventory = from_repo.revision_tree( |
|
2364 |
_mod_revision.NULL_REVISION).inventory |
|
2365 |
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
|
|
2366 |
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
|
|
2367 |
# repo back into a non-rich-root repo ought to be allowed)
|
|
2368 |
serializer = inventory_delta.InventoryDeltaSerializer( |
|
2369 |
versioned_root=format.rich_root_data, |
|
2370 |
tree_references=format.supports_tree_reference) |
|
2371 |
for inv in inventories: |
|
2372 |
key = (inv.revision_id,) |
|
2373 |
parent_keys = parent_map.get(key, ()) |
|
2374 |
delta = None |
|
2375 |
if not delta_versus_null and parent_keys: |
|
2376 |
# The caller did not ask for complete inventories and we have
|
|
2377 |
# some parents that we can delta against. Make a delta against
|
|
2378 |
# each parent so that we can find the smallest.
|
|
2379 |
parent_ids = [parent_key[0] for parent_key in parent_keys] |
|
2380 |
for parent_id in parent_ids: |
|
2381 |
if parent_id not in invs_sent_so_far: |
|
2382 |
# We don't know that the remote side has this basis, so
|
|
2383 |
# we can't use it.
|
|
2384 |
continue
|
|
2385 |
if parent_id == _mod_revision.NULL_REVISION: |
|
2386 |
parent_inv = null_inventory |
|
2387 |
else: |
|
2388 |
parent_inv = inventory_cache.get(parent_id, None) |
|
2389 |
if parent_inv is None: |
|
2390 |
parent_inv = from_repo.get_inventory(parent_id) |
|
2391 |
candidate_delta = inv._make_delta(parent_inv) |
|
2392 |
if (delta is None or |
|
2393 |
len(delta) > len(candidate_delta)): |
|
2394 |
delta = candidate_delta |
|
2395 |
basis_id = parent_id |
|
2396 |
if delta is None: |
|
2397 |
# Either none of the parents ended up being suitable, or we
|
|
2398 |
# were asked to delta against NULL
|
|
2399 |
basis_id = _mod_revision.NULL_REVISION |
|
2400 |
delta = inv._make_delta(null_inventory) |
|
2401 |
invs_sent_so_far.add(inv.revision_id) |
|
2402 |
inventory_cache[inv.revision_id] = inv |
|
2403 |
delta_serialized = ''.join( |
|
2404 |
serializer.delta_to_lines(basis_id, key[-1], delta)) |
|
2405 |
yield versionedfile.FulltextContentFactory( |
|
2406 |
key, parent_keys, None, delta_serialized) |
|
2407 |
||
2408 |
||
2409 |
class _VersionedFileChecker(object): |
|
2410 |
||
2411 |
def __init__(self, repository, text_key_references=None, ancestors=None): |
|
2412 |
self.repository = repository |
|
2413 |
self.text_index = self.repository._generate_text_key_index( |
|
2414 |
text_key_references=text_key_references, ancestors=ancestors) |
|
2415 |
||
2416 |
def calculate_file_version_parents(self, text_key): |
|
2417 |
"""Calculate the correct parents for a file version according to
|
|
2418 |
the inventories.
|
|
2419 |
"""
|
|
2420 |
parent_keys = self.text_index[text_key] |
|
2421 |
if parent_keys == [_mod_revision.NULL_REVISION]: |
|
2422 |
return () |
|
2423 |
return tuple(parent_keys) |
|
2424 |
||
2425 |
def check_file_version_parents(self, texts, progress_bar=None): |
|
2426 |
"""Check the parents stored in a versioned file are correct.
|
|
2427 |
||
2428 |
It also detects file versions that are not referenced by their
|
|
2429 |
corresponding revision's inventory.
|
|
2430 |
||
2431 |
:returns: A tuple of (wrong_parents, dangling_file_versions).
|
|
2432 |
wrong_parents is a dict mapping {revision_id: (stored_parents,
|
|
2433 |
correct_parents)} for each revision_id where the stored parents
|
|
2434 |
are not correct. dangling_file_versions is a set of (file_id,
|
|
2435 |
revision_id) tuples for versions that are present in this versioned
|
|
2436 |
file, but not used by the corresponding inventory.
|
|
2437 |
"""
|
|
2438 |
local_progress = None |
|
2439 |
if progress_bar is None: |
|
2440 |
local_progress = ui.ui_factory.nested_progress_bar() |
|
2441 |
progress_bar = local_progress |
|
2442 |
try: |
|
2443 |
return self._check_file_version_parents(texts, progress_bar) |
|
2444 |
finally: |
|
2445 |
if local_progress: |
|
2446 |
local_progress.finished() |
|
2447 |
||
2448 |
def _check_file_version_parents(self, texts, progress_bar): |
|
2449 |
"""See check_file_version_parents."""
|
|
2450 |
wrong_parents = {} |
|
2451 |
self.file_ids = set([file_id for file_id, _ in |
|
2452 |
self.text_index.iterkeys()]) |
|
2453 |
# text keys is now grouped by file_id
|
|
2454 |
n_versions = len(self.text_index) |
|
2455 |
progress_bar.update('loading text store', 0, n_versions) |
|
2456 |
parent_map = self.repository.texts.get_parent_map(self.text_index) |
|
2457 |
# On unlistable transports this could well be empty/error...
|
|
2458 |
text_keys = self.repository.texts.keys() |
|
2459 |
unused_keys = frozenset(text_keys) - set(self.text_index) |
|
2460 |
for num, key in enumerate(self.text_index.iterkeys()): |
|
2461 |
progress_bar.update('checking text graph', num, n_versions) |
|
2462 |
correct_parents = self.calculate_file_version_parents(key) |
|
2463 |
try: |
|
2464 |
knit_parents = parent_map[key] |
|
2465 |
except errors.RevisionNotPresent: |
|
2466 |
# Missing text!
|
|
2467 |
knit_parents = None |
|
2468 |
if correct_parents != knit_parents: |
|
2469 |
wrong_parents[key] = (knit_parents, correct_parents) |
|
2470 |
return wrong_parents, unused_keys |
|
2471 |
||
2472 |
||
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
2473 |
class InterVersionedFileRepository(InterRepository): |
2474 |
||
2475 |
_walk_to_common_revisions_batch_size = 50 |
|
2476 |
||
2477 |
@needs_write_lock
|
|
2478 |
def fetch(self, revision_id=None, find_ghosts=False, |
|
2479 |
fetch_spec=None): |
|
2480 |
"""Fetch the content required to construct revision_id.
|
|
2481 |
||
2482 |
The content is copied from self.source to self.target.
|
|
2483 |
||
2484 |
:param revision_id: if None all content is copied, if NULL_REVISION no
|
|
2485 |
content is copied.
|
|
2486 |
:return: None.
|
|
2487 |
"""
|
|
2488 |
ui.ui_factory.warn_experimental_format_fetch(self) |
|
2489 |
from bzrlib.fetch import RepoFetcher |
|
2490 |
# See <https://launchpad.net/bugs/456077> asking for a warning here
|
|
2491 |
if self.source._format.network_name() != self.target._format.network_name(): |
|
2492 |
ui.ui_factory.show_user_warning('cross_format_fetch', |
|
2493 |
from_format=self.source._format, |
|
2494 |
to_format=self.target._format) |
|
2495 |
f = RepoFetcher(to_repository=self.target, |
|
2496 |
from_repository=self.source, |
|
2497 |
last_revision=revision_id, |
|
2498 |
fetch_spec=fetch_spec, |
|
2499 |
find_ghosts=find_ghosts) |
|
2500 |
||
2501 |
def _walk_to_common_revisions(self, revision_ids, if_present_ids=None): |
|
2502 |
"""Walk out from revision_ids in source to revisions target has.
|
|
2503 |
||
2504 |
:param revision_ids: The start point for the search.
|
|
2505 |
:return: A set of revision ids.
|
|
2506 |
"""
|
|
2507 |
target_graph = self.target.get_graph() |
|
2508 |
revision_ids = frozenset(revision_ids) |
|
2509 |
if if_present_ids: |
|
2510 |
all_wanted_revs = revision_ids.union(if_present_ids) |
|
2511 |
else: |
|
2512 |
all_wanted_revs = revision_ids |
|
2513 |
missing_revs = set() |
|
2514 |
source_graph = self.source.get_graph() |
|
2515 |
# ensure we don't pay silly lookup costs.
|
|
2516 |
searcher = source_graph._make_breadth_first_searcher(all_wanted_revs) |
|
2517 |
null_set = frozenset([_mod_revision.NULL_REVISION]) |
|
2518 |
searcher_exhausted = False |
|
2519 |
while True: |
|
2520 |
next_revs = set() |
|
2521 |
ghosts = set() |
|
2522 |
# Iterate the searcher until we have enough next_revs
|
|
2523 |
while len(next_revs) < self._walk_to_common_revisions_batch_size: |
|
2524 |
try: |
|
2525 |
next_revs_part, ghosts_part = searcher.next_with_ghosts() |
|
2526 |
next_revs.update(next_revs_part) |
|
2527 |
ghosts.update(ghosts_part) |
|
2528 |
except StopIteration: |
|
2529 |
searcher_exhausted = True |
|
2530 |
break
|
|
2531 |
# If there are ghosts in the source graph, and the caller asked for
|
|
2532 |
# them, make sure that they are present in the target.
|
|
2533 |
# We don't care about other ghosts as we can't fetch them and
|
|
2534 |
# haven't been asked to.
|
|
2535 |
ghosts_to_check = set(revision_ids.intersection(ghosts)) |
|
2536 |
revs_to_get = set(next_revs).union(ghosts_to_check) |
|
2537 |
if revs_to_get: |
|
2538 |
have_revs = set(target_graph.get_parent_map(revs_to_get)) |
|
2539 |
# we always have NULL_REVISION present.
|
|
2540 |
have_revs = have_revs.union(null_set) |
|
2541 |
# Check if the target is missing any ghosts we need.
|
|
2542 |
ghosts_to_check.difference_update(have_revs) |
|
2543 |
if ghosts_to_check: |
|
2544 |
# One of the caller's revision_ids is a ghost in both the
|
|
2545 |
# source and the target.
|
|
2546 |
raise errors.NoSuchRevision( |
|
2547 |
self.source, ghosts_to_check.pop()) |
|
2548 |
missing_revs.update(next_revs - have_revs) |
|
2549 |
# Because we may have walked past the original stop point, make
|
|
2550 |
# sure everything is stopped
|
|
2551 |
stop_revs = searcher.find_seen_ancestors(have_revs) |
|
2552 |
searcher.stop_searching_any(stop_revs) |
|
2553 |
if searcher_exhausted: |
|
2554 |
break
|
|
2555 |
return searcher.get_result() |
|
2556 |
||
2557 |
@needs_read_lock
|
|
2558 |
def search_missing_revision_ids(self, |
|
2559 |
revision_id=symbol_versioning.DEPRECATED_PARAMETER, |
|
5852.1.7
by Jelmer Vernooij
merge bzr.dev. |
2560 |
find_ghosts=True, revision_ids=None, if_present_ids=None, |
2561 |
limit=None): |
|
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
2562 |
"""Return the revision ids that source has that target does not.
|
2563 |
||
2564 |
:param revision_id: only return revision ids included by this
|
|
2565 |
revision_id.
|
|
2566 |
:param revision_ids: return revision ids included by these
|
|
2567 |
revision_ids. NoSuchRevision will be raised if any of these
|
|
2568 |
revisions are not present.
|
|
2569 |
:param if_present_ids: like revision_ids, but will not cause
|
|
2570 |
NoSuchRevision if any of these are absent, instead they will simply
|
|
2571 |
not be in the result. This is useful for e.g. finding revisions
|
|
2572 |
to fetch for tags, which may reference absent revisions.
|
|
2573 |
:param find_ghosts: If True find missing revisions in deep history
|
|
2574 |
rather than just finding the surface difference.
|
|
2575 |
:return: A bzrlib.graph.SearchResult.
|
|
2576 |
"""
|
|
2577 |
if symbol_versioning.deprecated_passed(revision_id): |
|
2578 |
symbol_versioning.warn( |
|
2579 |
'search_missing_revision_ids(revision_id=...) was '
|
|
2580 |
'deprecated in 2.4. Use revision_ids=[...] instead.', |
|
2581 |
DeprecationWarning, stacklevel=2) |
|
2582 |
if revision_ids is not None: |
|
2583 |
raise AssertionError( |
|
2584 |
'revision_ids is mutually exclusive with revision_id') |
|
2585 |
if revision_id is not None: |
|
2586 |
revision_ids = [revision_id] |
|
2587 |
del revision_id |
|
2588 |
# stop searching at found target revisions.
|
|
2589 |
if not find_ghosts and (revision_ids is not None or if_present_ids is |
|
2590 |
not None): |
|
5852.1.7
by Jelmer Vernooij
merge bzr.dev. |
2591 |
result = self._walk_to_common_revisions(revision_ids, |
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
2592 |
if_present_ids=if_present_ids) |
5852.1.8
by Jelmer Vernooij
Simplify revision limiting. |
2593 |
if limit is None: |
2594 |
return result |
|
5852.1.7
by Jelmer Vernooij
merge bzr.dev. |
2595 |
result_set = result.get_keys() |
2596 |
else: |
|
2597 |
# generic, possibly worst case, slow code path.
|
|
2598 |
target_ids = set(self.target.all_revision_ids()) |
|
2599 |
source_ids = self._present_source_revisions_for( |
|
2600 |
revision_ids, if_present_ids) |
|
2601 |
result_set = set(source_ids).difference(target_ids) |
|
2602 |
if limit is not None: |
|
5852.1.8
by Jelmer Vernooij
Simplify revision limiting. |
2603 |
topo_ordered = self.source.get_graph().iter_topo_order(result_set) |
2604 |
result_set = set(itertools.islice(topo_ordered, limit)) |
|
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
2605 |
return self.source.revision_ids_to_search_result(result_set) |
2606 |
||
2607 |
def _present_source_revisions_for(self, revision_ids, if_present_ids=None): |
|
2608 |
"""Returns set of all revisions in ancestry of revision_ids present in
|
|
2609 |
the source repo.
|
|
2610 |
||
2611 |
:param revision_ids: if None, all revisions in source are returned.
|
|
2612 |
:param if_present_ids: like revision_ids, but if any/all of these are
|
|
2613 |
absent no error is raised.
|
|
2614 |
"""
|
|
2615 |
if revision_ids is not None or if_present_ids is not None: |
|
2616 |
# First, ensure all specified revisions exist. Callers expect
|
|
2617 |
# NoSuchRevision when they pass absent revision_ids here.
|
|
2618 |
if revision_ids is None: |
|
2619 |
revision_ids = set() |
|
2620 |
if if_present_ids is None: |
|
2621 |
if_present_ids = set() |
|
2622 |
revision_ids = set(revision_ids) |
|
2623 |
if_present_ids = set(if_present_ids) |
|
2624 |
all_wanted_ids = revision_ids.union(if_present_ids) |
|
2625 |
graph = self.source.get_graph() |
|
2626 |
present_revs = set(graph.get_parent_map(all_wanted_ids)) |
|
2627 |
missing = revision_ids.difference(present_revs) |
|
2628 |
if missing: |
|
2629 |
raise errors.NoSuchRevision(self.source, missing.pop()) |
|
2630 |
found_ids = all_wanted_ids.intersection(present_revs) |
|
2631 |
source_ids = [rev_id for (rev_id, parents) in |
|
2632 |
graph.iter_ancestry(found_ids) |
|
2633 |
if rev_id != _mod_revision.NULL_REVISION |
|
2634 |
and parents is not None] |
|
2635 |
else: |
|
2636 |
source_ids = self.source.all_revision_ids() |
|
2637 |
return set(source_ids) |
|
2638 |
||
2639 |
@classmethod
|
|
2640 |
def _get_repo_format_to_test(self): |
|
2641 |
return None |
|
2642 |
||
2643 |
@classmethod
|
|
2644 |
def is_compatible(cls, source, target): |
|
2645 |
# The default implementation is compatible with everything
|
|
2646 |
return (source._format.supports_full_versioned_files and |
|
2647 |
target._format.supports_full_versioned_files) |
|
2648 |
||
2649 |
||
2650 |
class InterDifferingSerializer(InterVersionedFileRepository): |
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
2651 |
|
2652 |
@classmethod
|
|
2653 |
def _get_repo_format_to_test(self): |
|
2654 |
return None |
|
2655 |
||
2656 |
@staticmethod
|
|
2657 |
def is_compatible(source, target): |
|
5815.4.19
by Jelmer Vernooij
Fix test failures. |
2658 |
if not source._format.supports_full_versioned_files: |
2659 |
return False |
|
2660 |
if not target._format.supports_full_versioned_files: |
|
2661 |
return False |
|
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
2662 |
# This is redundant with format.check_conversion_target(), however that
|
2663 |
# raises an exception, and we just want to say "False" as in we won't
|
|
2664 |
# support converting between these formats.
|
|
2665 |
if 'IDS_never' in debug.debug_flags: |
|
2666 |
return False |
|
2667 |
if source.supports_rich_root() and not target.supports_rich_root(): |
|
2668 |
return False |
|
2669 |
if (source._format.supports_tree_reference |
|
2670 |
and not target._format.supports_tree_reference): |
|
2671 |
return False |
|
2672 |
if target._fallback_repositories and target._format.supports_chks: |
|
2673 |
# IDS doesn't know how to copy CHKs for the parent inventories it
|
|
2674 |
# adds to stacked repos.
|
|
2675 |
return False |
|
2676 |
if 'IDS_always' in debug.debug_flags: |
|
2677 |
return True |
|
2678 |
# Only use this code path for local source and target. IDS does far
|
|
2679 |
# too much IO (both bandwidth and roundtrips) over a network.
|
|
2680 |
if not source.bzrdir.transport.base.startswith('file:///'): |
|
2681 |
return False |
|
2682 |
if not target.bzrdir.transport.base.startswith('file:///'): |
|
2683 |
return False |
|
2684 |
return True |
|
2685 |
||
2686 |
def _get_trees(self, revision_ids, cache): |
|
2687 |
possible_trees = [] |
|
2688 |
for rev_id in revision_ids: |
|
2689 |
if rev_id in cache: |
|
2690 |
possible_trees.append((rev_id, cache[rev_id])) |
|
2691 |
else: |
|
2692 |
# Not cached, but inventory might be present anyway.
|
|
2693 |
try: |
|
2694 |
tree = self.source.revision_tree(rev_id) |
|
2695 |
except errors.NoSuchRevision: |
|
2696 |
# Nope, parent is ghost.
|
|
2697 |
pass
|
|
2698 |
else: |
|
2699 |
cache[rev_id] = tree |
|
2700 |
possible_trees.append((rev_id, tree)) |
|
2701 |
return possible_trees |
|
2702 |
||
2703 |
def _get_delta_for_revision(self, tree, parent_ids, possible_trees): |
|
2704 |
"""Get the best delta and base for this revision.
|
|
2705 |
||
2706 |
:return: (basis_id, delta)
|
|
2707 |
"""
|
|
2708 |
deltas = [] |
|
2709 |
# Generate deltas against each tree, to find the shortest.
|
|
2710 |
texts_possibly_new_in_tree = set() |
|
2711 |
for basis_id, basis_tree in possible_trees: |
|
2712 |
delta = tree.inventory._make_delta(basis_tree.inventory) |
|
2713 |
for old_path, new_path, file_id, new_entry in delta: |
|
2714 |
if new_path is None: |
|
2715 |
# This file_id isn't present in the new rev, so we don't
|
|
2716 |
# care about it.
|
|
2717 |
continue
|
|
2718 |
if not new_path: |
|
2719 |
# Rich roots are handled elsewhere...
|
|
2720 |
continue
|
|
2721 |
kind = new_entry.kind |
|
2722 |
if kind != 'directory' and kind != 'file': |
|
2723 |
# No text record associated with this inventory entry.
|
|
2724 |
continue
|
|
2725 |
# This is a directory or file that has changed somehow.
|
|
2726 |
texts_possibly_new_in_tree.add((file_id, new_entry.revision)) |
|
2727 |
deltas.append((len(delta), basis_id, delta)) |
|
2728 |
deltas.sort() |
|
2729 |
return deltas[0][1:] |
|
2730 |
||
2731 |
def _fetch_parent_invs_for_stacking(self, parent_map, cache): |
|
2732 |
"""Find all parent revisions that are absent, but for which the
|
|
2733 |
inventory is present, and copy those inventories.
|
|
2734 |
||
2735 |
This is necessary to preserve correctness when the source is stacked
|
|
2736 |
without fallbacks configured. (Note that in cases like upgrade the
|
|
2737 |
source may be not have _fallback_repositories even though it is
|
|
2738 |
stacked.)
|
|
2739 |
"""
|
|
2740 |
parent_revs = set() |
|
2741 |
for parents in parent_map.values(): |
|
2742 |
parent_revs.update(parents) |
|
2743 |
present_parents = self.source.get_parent_map(parent_revs) |
|
2744 |
absent_parents = set(parent_revs).difference(present_parents) |
|
2745 |
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map( |
|
2746 |
(rev_id,) for rev_id in absent_parents) |
|
2747 |
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking] |
|
2748 |
for parent_tree in self.source.revision_trees(parent_inv_ids): |
|
2749 |
current_revision_id = parent_tree.get_revision_id() |
|
2750 |
parents_parents_keys = parent_invs_keys_for_stacking[ |
|
2751 |
(current_revision_id,)] |
|
2752 |
parents_parents = [key[-1] for key in parents_parents_keys] |
|
2753 |
basis_id = _mod_revision.NULL_REVISION |
|
2754 |
basis_tree = self.source.revision_tree(basis_id) |
|
2755 |
delta = parent_tree.inventory._make_delta(basis_tree.inventory) |
|
2756 |
self.target.add_inventory_by_delta( |
|
2757 |
basis_id, delta, current_revision_id, parents_parents) |
|
2758 |
cache[current_revision_id] = parent_tree |
|
2759 |
||
2760 |
def _fetch_batch(self, revision_ids, basis_id, cache): |
|
2761 |
"""Fetch across a few revisions.
|
|
2762 |
||
2763 |
:param revision_ids: The revisions to copy
|
|
2764 |
:param basis_id: The revision_id of a tree that must be in cache, used
|
|
2765 |
as a basis for delta when no other base is available
|
|
2766 |
:param cache: A cache of RevisionTrees that we can use.
|
|
2767 |
:return: The revision_id of the last converted tree. The RevisionTree
|
|
2768 |
for it will be in cache
|
|
2769 |
"""
|
|
2770 |
# Walk though all revisions; get inventory deltas, copy referenced
|
|
2771 |
# texts that delta references, insert the delta, revision and
|
|
2772 |
# signature.
|
|
2773 |
root_keys_to_create = set() |
|
2774 |
text_keys = set() |
|
2775 |
pending_deltas = [] |
|
2776 |
pending_revisions = [] |
|
2777 |
parent_map = self.source.get_parent_map(revision_ids) |
|
2778 |
self._fetch_parent_invs_for_stacking(parent_map, cache) |
|
2779 |
self.source._safe_to_return_from_cache = True |
|
2780 |
for tree in self.source.revision_trees(revision_ids): |
|
2781 |
# Find a inventory delta for this revision.
|
|
2782 |
# Find text entries that need to be copied, too.
|
|
2783 |
current_revision_id = tree.get_revision_id() |
|
2784 |
parent_ids = parent_map.get(current_revision_id, ()) |
|
2785 |
parent_trees = self._get_trees(parent_ids, cache) |
|
2786 |
possible_trees = list(parent_trees) |
|
2787 |
if len(possible_trees) == 0: |
|
2788 |
# There either aren't any parents, or the parents are ghosts,
|
|
2789 |
# so just use the last converted tree.
|
|
2790 |
possible_trees.append((basis_id, cache[basis_id])) |
|
2791 |
basis_id, delta = self._get_delta_for_revision(tree, parent_ids, |
|
2792 |
possible_trees) |
|
2793 |
revision = self.source.get_revision(current_revision_id) |
|
2794 |
pending_deltas.append((basis_id, delta, |
|
2795 |
current_revision_id, revision.parent_ids)) |
|
2796 |
if self._converting_to_rich_root: |
|
2797 |
self._revision_id_to_root_id[current_revision_id] = \ |
|
2798 |
tree.get_root_id() |
|
2799 |
# Determine which texts are in present in this revision but not in
|
|
2800 |
# any of the available parents.
|
|
2801 |
texts_possibly_new_in_tree = set() |
|
2802 |
for old_path, new_path, file_id, entry in delta: |
|
2803 |
if new_path is None: |
|
2804 |
# This file_id isn't present in the new rev
|
|
2805 |
continue
|
|
2806 |
if not new_path: |
|
2807 |
# This is the root
|
|
2808 |
if not self.target.supports_rich_root(): |
|
2809 |
# The target doesn't support rich root, so we don't
|
|
2810 |
# copy
|
|
2811 |
continue
|
|
2812 |
if self._converting_to_rich_root: |
|
2813 |
# This can't be copied normally, we have to insert
|
|
2814 |
# it specially
|
|
2815 |
root_keys_to_create.add((file_id, entry.revision)) |
|
2816 |
continue
|
|
2817 |
kind = entry.kind |
|
2818 |
texts_possibly_new_in_tree.add((file_id, entry.revision)) |
|
2819 |
for basis_id, basis_tree in possible_trees: |
|
2820 |
basis_inv = basis_tree.inventory |
|
2821 |
for file_key in list(texts_possibly_new_in_tree): |
|
2822 |
file_id, file_revision = file_key |
|
2823 |
try: |
|
2824 |
entry = basis_inv[file_id] |
|
2825 |
except errors.NoSuchId: |
|
2826 |
continue
|
|
2827 |
if entry.revision == file_revision: |
|
2828 |
texts_possibly_new_in_tree.remove(file_key) |
|
2829 |
text_keys.update(texts_possibly_new_in_tree) |
|
2830 |
pending_revisions.append(revision) |
|
2831 |
cache[current_revision_id] = tree |
|
2832 |
basis_id = current_revision_id |
|
2833 |
self.source._safe_to_return_from_cache = False |
|
2834 |
# Copy file texts
|
|
2835 |
from_texts = self.source.texts |
|
2836 |
to_texts = self.target.texts |
|
2837 |
if root_keys_to_create: |
|
2838 |
root_stream = _mod_fetch._new_root_data_stream( |
|
2839 |
root_keys_to_create, self._revision_id_to_root_id, parent_map, |
|
2840 |
self.source) |
|
2841 |
to_texts.insert_record_stream(root_stream) |
|
2842 |
to_texts.insert_record_stream(from_texts.get_record_stream( |
|
2843 |
text_keys, self.target._format._fetch_order, |
|
2844 |
not self.target._format._fetch_uses_deltas)) |
|
2845 |
# insert inventory deltas
|
|
2846 |
for delta in pending_deltas: |
|
2847 |
self.target.add_inventory_by_delta(*delta) |
|
2848 |
if self.target._fallback_repositories: |
|
2849 |
# Make sure this stacked repository has all the parent inventories
|
|
2850 |
# for the new revisions that we are about to insert. We do this
|
|
2851 |
# before adding the revisions so that no revision is added until
|
|
2852 |
# all the inventories it may depend on are added.
|
|
2853 |
# Note that this is overzealous, as we may have fetched these in an
|
|
2854 |
# earlier batch.
|
|
2855 |
parent_ids = set() |
|
2856 |
revision_ids = set() |
|
2857 |
for revision in pending_revisions: |
|
2858 |
revision_ids.add(revision.revision_id) |
|
2859 |
parent_ids.update(revision.parent_ids) |
|
2860 |
parent_ids.difference_update(revision_ids) |
|
2861 |
parent_ids.discard(_mod_revision.NULL_REVISION) |
|
2862 |
parent_map = self.source.get_parent_map(parent_ids) |
|
2863 |
# we iterate over parent_map and not parent_ids because we don't
|
|
2864 |
# want to try copying any revision which is a ghost
|
|
2865 |
for parent_tree in self.source.revision_trees(parent_map): |
|
2866 |
current_revision_id = parent_tree.get_revision_id() |
|
2867 |
parents_parents = parent_map[current_revision_id] |
|
2868 |
possible_trees = self._get_trees(parents_parents, cache) |
|
2869 |
if len(possible_trees) == 0: |
|
2870 |
# There either aren't any parents, or the parents are
|
|
2871 |
# ghosts, so just use the last converted tree.
|
|
2872 |
possible_trees.append((basis_id, cache[basis_id])) |
|
2873 |
basis_id, delta = self._get_delta_for_revision(parent_tree, |
|
2874 |
parents_parents, possible_trees) |
|
2875 |
self.target.add_inventory_by_delta( |
|
2876 |
basis_id, delta, current_revision_id, parents_parents) |
|
2877 |
# insert signatures and revisions
|
|
2878 |
for revision in pending_revisions: |
|
2879 |
try: |
|
2880 |
signature = self.source.get_signature_text( |
|
2881 |
revision.revision_id) |
|
2882 |
self.target.add_signature_text(revision.revision_id, |
|
2883 |
signature) |
|
2884 |
except errors.NoSuchRevision: |
|
2885 |
pass
|
|
2886 |
self.target.add_revision(revision.revision_id, revision) |
|
2887 |
return basis_id |
|
2888 |
||
2889 |
def _fetch_all_revisions(self, revision_ids, pb): |
|
2890 |
"""Fetch everything for the list of revisions.
|
|
2891 |
||
2892 |
:param revision_ids: The list of revisions to fetch. Must be in
|
|
2893 |
topological order.
|
|
2894 |
:param pb: A ProgressTask
|
|
2895 |
:return: None
|
|
2896 |
"""
|
|
2897 |
basis_id, basis_tree = self._get_basis(revision_ids[0]) |
|
2898 |
batch_size = 100 |
|
2899 |
cache = lru_cache.LRUCache(100) |
|
2900 |
cache[basis_id] = basis_tree |
|
2901 |
del basis_tree # We don't want to hang on to it here |
|
2902 |
hints = [] |
|
2903 |
a_graph = None |
|
2904 |
||
2905 |
for offset in range(0, len(revision_ids), batch_size): |
|
2906 |
self.target.start_write_group() |
|
2907 |
try: |
|
2908 |
pb.update('Transferring revisions', offset, |
|
2909 |
len(revision_ids)) |
|
2910 |
batch = revision_ids[offset:offset+batch_size] |
|
2911 |
basis_id = self._fetch_batch(batch, basis_id, cache) |
|
2912 |
except: |
|
2913 |
self.source._safe_to_return_from_cache = False |
|
2914 |
self.target.abort_write_group() |
|
2915 |
raise
|
|
2916 |
else: |
|
2917 |
hint = self.target.commit_write_group() |
|
2918 |
if hint: |
|
2919 |
hints.extend(hint) |
|
2920 |
if hints and self.target._format.pack_compresses: |
|
2921 |
self.target.pack(hint=hints) |
|
2922 |
pb.update('Transferring revisions', len(revision_ids), |
|
2923 |
len(revision_ids)) |
|
2924 |
||
2925 |
@needs_write_lock
|
|
2926 |
def fetch(self, revision_id=None, find_ghosts=False, |
|
2927 |
fetch_spec=None): |
|
2928 |
"""See InterRepository.fetch()."""
|
|
2929 |
if fetch_spec is not None: |
|
2930 |
revision_ids = fetch_spec.get_keys() |
|
2931 |
else: |
|
2932 |
revision_ids = None |
|
2933 |
ui.ui_factory.warn_experimental_format_fetch(self) |
|
2934 |
if (not self.source.supports_rich_root() |
|
2935 |
and self.target.supports_rich_root()): |
|
2936 |
self._converting_to_rich_root = True |
|
2937 |
self._revision_id_to_root_id = {} |
|
2938 |
else: |
|
2939 |
self._converting_to_rich_root = False |
|
2940 |
# See <https://launchpad.net/bugs/456077> asking for a warning here
|
|
2941 |
if self.source._format.network_name() != self.target._format.network_name(): |
|
2942 |
ui.ui_factory.show_user_warning('cross_format_fetch', |
|
2943 |
from_format=self.source._format, |
|
2944 |
to_format=self.target._format) |
|
2945 |
if revision_ids is None: |
|
2946 |
if revision_id: |
|
2947 |
search_revision_ids = [revision_id] |
|
2948 |
else: |
|
2949 |
search_revision_ids = None |
|
2950 |
revision_ids = self.target.search_missing_revision_ids(self.source, |
|
2951 |
revision_ids=search_revision_ids, |
|
2952 |
find_ghosts=find_ghosts).get_keys() |
|
2953 |
if not revision_ids: |
|
2954 |
return 0, 0 |
|
2955 |
revision_ids = tsort.topo_sort( |
|
2956 |
self.source.get_graph().get_parent_map(revision_ids)) |
|
2957 |
if not revision_ids: |
|
2958 |
return 0, 0 |
|
2959 |
# Walk though all revisions; get inventory deltas, copy referenced
|
|
2960 |
# texts that delta references, insert the delta, revision and
|
|
2961 |
# signature.
|
|
2962 |
pb = ui.ui_factory.nested_progress_bar() |
|
2963 |
try: |
|
2964 |
self._fetch_all_revisions(revision_ids, pb) |
|
2965 |
finally: |
|
2966 |
pb.finished() |
|
2967 |
return len(revision_ids), 0 |
|
2968 |
||
2969 |
def _get_basis(self, first_revision_id): |
|
2970 |
"""Get a revision and tree which exists in the target.
|
|
2971 |
||
2972 |
This assumes that first_revision_id is selected for transmission
|
|
2973 |
because all other ancestors are already present. If we can't find an
|
|
2974 |
ancestor we fall back to NULL_REVISION since we know that is safe.
|
|
2975 |
||
2976 |
:return: (basis_id, basis_tree)
|
|
2977 |
"""
|
|
2978 |
first_rev = self.source.get_revision(first_revision_id) |
|
2979 |
try: |
|
2980 |
basis_id = first_rev.parent_ids[0] |
|
2981 |
# only valid as a basis if the target has it
|
|
2982 |
self.target.get_revision(basis_id) |
|
2983 |
# Try to get a basis tree - if it's a ghost it will hit the
|
|
2984 |
# NoSuchRevision case.
|
|
2985 |
basis_tree = self.source.revision_tree(basis_id) |
|
2986 |
except (IndexError, errors.NoSuchRevision): |
|
2987 |
basis_id = _mod_revision.NULL_REVISION |
|
2988 |
basis_tree = self.source.revision_tree(basis_id) |
|
2989 |
return basis_id, basis_tree |
|
2990 |
||
2991 |
||
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
2992 |
class InterSameDataRepository(InterVersionedFileRepository): |
5815.4.19
by Jelmer Vernooij
Fix test failures. |
2993 |
"""Code for converting between repositories that represent the same data.
|
2994 |
||
2995 |
Data format and model must match for this to work.
|
|
2996 |
"""
|
|
2997 |
||
2998 |
@classmethod
|
|
2999 |
def _get_repo_format_to_test(self): |
|
3000 |
"""Repository format for testing with.
|
|
3001 |
||
3002 |
InterSameData can pull from subtree to subtree and from non-subtree to
|
|
3003 |
non-subtree, so we test this with the richest repository format.
|
|
3004 |
"""
|
|
3005 |
from bzrlib.repofmt import knitrepo |
|
3006 |
return knitrepo.RepositoryFormatKnit3() |
|
3007 |
||
3008 |
@staticmethod
|
|
3009 |
def is_compatible(source, target): |
|
3010 |
return ( |
|
3011 |
InterRepository._same_model(source, target) and |
|
3012 |
source._format.supports_full_versioned_files and |
|
3013 |
target._format.supports_full_versioned_files) |
|
3014 |
||
3015 |
||
5863.4.1
by Jelmer Vernooij
Move interrepository implementation to vf_repository. |
3016 |
InterRepository.register_optimiser(InterVersionedFileRepository) |
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
3017 |
InterRepository.register_optimiser(InterDifferingSerializer) |
5815.4.19
by Jelmer Vernooij
Fix test failures. |
3018 |
InterRepository.register_optimiser(InterSameDataRepository) |
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
3019 |
|
3020 |
||
3021 |
def install_revisions(repository, iterable, num_revisions=None, pb=None): |
|
3022 |
"""Install all revision data into a repository.
|
|
3023 |
||
3024 |
Accepts an iterable of revision, tree, signature tuples. The signature
|
|
3025 |
may be None.
|
|
3026 |
"""
|
|
3027 |
repository.start_write_group() |
|
3028 |
try: |
|
3029 |
inventory_cache = lru_cache.LRUCache(10) |
|
3030 |
for n, (revision, revision_tree, signature) in enumerate(iterable): |
|
3031 |
_install_revision(repository, revision, revision_tree, signature, |
|
3032 |
inventory_cache) |
|
3033 |
if pb is not None: |
|
3034 |
pb.update('Transferring revisions', n + 1, num_revisions) |
|
3035 |
except: |
|
3036 |
repository.abort_write_group() |
|
3037 |
raise
|
|
3038 |
else: |
|
3039 |
repository.commit_write_group() |
|
3040 |
||
3041 |
||
3042 |
def _install_revision(repository, rev, revision_tree, signature, |
|
3043 |
inventory_cache): |
|
3044 |
"""Install all revision data into a repository."""
|
|
3045 |
present_parents = [] |
|
3046 |
parent_trees = {} |
|
3047 |
for p_id in rev.parent_ids: |
|
3048 |
if repository.has_revision(p_id): |
|
3049 |
present_parents.append(p_id) |
|
3050 |
parent_trees[p_id] = repository.revision_tree(p_id) |
|
3051 |
else: |
|
3052 |
parent_trees[p_id] = repository.revision_tree( |
|
3053 |
_mod_revision.NULL_REVISION) |
|
3054 |
||
3055 |
inv = revision_tree.inventory |
|
3056 |
entries = inv.iter_entries() |
|
3057 |
# backwards compatibility hack: skip the root id.
|
|
3058 |
if not repository.supports_rich_root(): |
|
3059 |
path, root = entries.next() |
|
3060 |
if root.revision != rev.revision_id: |
|
3061 |
raise errors.IncompatibleRevision(repr(repository)) |
|
3062 |
text_keys = {} |
|
3063 |
for path, ie in entries: |
|
3064 |
text_keys[(ie.file_id, ie.revision)] = ie |
|
3065 |
text_parent_map = repository.texts.get_parent_map(text_keys) |
|
3066 |
missing_texts = set(text_keys) - set(text_parent_map) |
|
3067 |
# Add the texts that are not already present
|
|
3068 |
for text_key in missing_texts: |
|
3069 |
ie = text_keys[text_key] |
|
3070 |
text_parents = [] |
|
3071 |
# FIXME: TODO: The following loop overlaps/duplicates that done by
|
|
3072 |
# commit to determine parents. There is a latent/real bug here where
|
|
3073 |
# the parents inserted are not those commit would do - in particular
|
|
3074 |
# they are not filtered by heads(). RBC, AB
|
|
3075 |
for revision, tree in parent_trees.iteritems(): |
|
5967.7.1
by Martin Pool
Deprecate __contains__ on Tree and Inventory |
3076 |
if not tree.has_id(ie.file_id): |
5815.4.1
by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository. |
3077 |
continue
|
3078 |
parent_id = tree.get_file_revision(ie.file_id) |
|
3079 |
if parent_id in text_parents: |
|
3080 |
continue
|
|
3081 |
text_parents.append((ie.file_id, parent_id)) |
|
3082 |
lines = revision_tree.get_file(ie.file_id).readlines() |
|
3083 |
repository.texts.add_lines(text_key, text_parents, lines) |
|
3084 |
try: |
|
3085 |
# install the inventory
|
|
3086 |
if repository._format._commit_inv_deltas and len(rev.parent_ids): |
|
3087 |
# Cache this inventory
|
|
3088 |
inventory_cache[rev.revision_id] = inv |
|
3089 |
try: |
|
3090 |
basis_inv = inventory_cache[rev.parent_ids[0]] |
|
3091 |
except KeyError: |
|
3092 |
repository.add_inventory(rev.revision_id, inv, present_parents) |
|
3093 |
else: |
|
3094 |
delta = inv._make_delta(basis_inv) |
|
3095 |
repository.add_inventory_by_delta(rev.parent_ids[0], delta, |
|
3096 |
rev.revision_id, present_parents) |
|
3097 |
else: |
|
3098 |
repository.add_inventory(rev.revision_id, inv, present_parents) |
|
3099 |
except errors.RevisionAlreadyPresent: |
|
3100 |
pass
|
|
3101 |
if signature is not None: |
|
3102 |
repository.add_signature_text(rev.revision_id, signature) |
|
3103 |
repository.add_revision(rev.revision_id, rev, inv) |
|
3104 |
||
3105 |
||
3106 |
def install_revision(repository, rev, revision_tree): |
|
3107 |
"""Install all revision data into a repository."""
|
|
3108 |
install_revisions(repository, [(rev, revision_tree, None)]) |