27
27
stored, so that if a revision is present we can totally recreate it.
28
28
However, we can't know what files are included in a revision until we
29
29
read its inventory. So we query the inventory store of the source for
30
the ids we need, and then pull those ids and finally actually join
30
the ids we need, and then pull those ids and then return to the inventories.
35
34
import bzrlib.errors as errors
36
from bzrlib.errors import (InstallFailed,
35
from bzrlib.errors import InstallFailed
38
36
from bzrlib.progress import ProgressPhase
39
from bzrlib.revision import NULL_REVISION
37
from bzrlib.revision import is_null, NULL_REVISION
40
38
from bzrlib.symbol_versioning import (deprecated_function,
44
41
from bzrlib.trace import mutter
44
from bzrlib.lazy_import import lazy_import
48
46
# TODO: Avoid repeatedly opening weaves so many times.
82
71
count_copied -- number of revisions copied
84
This should not be used directory, its essential a object to encapsulate
73
This should not be used directly, it's essential a object to encapsulate
85
74
the logic in InterRepository.fetch().
87
def __init__(self, to_repository, from_repository, last_revision=None, pb=None):
77
def __init__(self, to_repository, from_repository, last_revision=None, pb=None,
79
"""Create a repo fetcher.
81
:param find_ghosts: If True search the entire history for ghosts.
88
83
# result variables.
89
84
self.failed_revisions = []
90
85
self.count_copied = 0
91
if to_repository.control_files._transport.base == from_repository.control_files._transport.base:
92
# check that last_revision is in 'from' and then return a no-operation.
93
if last_revision not in (None, NULL_REVISION):
94
from_repository.get_revision(last_revision)
86
if to_repository.has_same_location(from_repository):
87
# repository.fetch should be taking care of this case.
88
raise errors.BzrError('RepoFetcher run '
89
'between two objects at the same location: '
90
'%r and %r' % (to_repository, from_repository))
96
91
self.to_repository = to_repository
97
92
self.from_repository = from_repository
98
93
# must not mutate self._last_revision as its potentially a shared instance
99
94
self._last_revision = last_revision
95
self.find_ghosts = find_ghosts
101
97
self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
102
98
self.nested_pb = self.pb
122
127
requested revisions, finally clearing the progress bar.
124
129
self.to_weaves = self.to_repository.weave_store
125
self.to_control = self.to_repository.control_weaves
126
130
self.from_weaves = self.from_repository.weave_store
127
self.from_control = self.from_repository.control_weaves
128
131
self.count_total = 0
129
132
self.file_ids_names = {}
130
pp = ProgressPhase('Fetch phase', 4, self.pb)
133
pp = ProgressPhase('Transferring', 4, self.pb)
133
revs = self._revids_to_fetch()
137
self._fetch_weave_texts(revs)
139
self._fetch_inventory_weave(revs)
141
self._fetch_revision_texts(revs)
142
self.count_copied += len(revs)
136
search = self._revids_to_fetch()
139
if getattr(self, '_fetch_everything_for_search', None) is not None:
140
self._fetch_everything_for_search(search, pp)
142
# backward compatibility
143
self._fetch_everything_for_revisions(search.get_keys, pp)
147
def _fetch_everything_for_search(self, search, pp):
148
"""Fetch all data for the given set of revisions."""
149
# The first phase is "file". We pass the progress bar for it directly
150
# into item_keys_introduced_by, which has more information about how
151
# that phase is progressing than we do. Progress updates for the other
152
# phases are taken care of in this function.
153
# XXX: there should be a clear owner of the progress reporting. Perhaps
154
# item_keys_introduced_by should have a richer API than it does at the
155
# moment, so that it can feed the progress information back to this
158
pb = bzrlib.ui.ui_factory.nested_progress_bar()
160
revs = search.get_keys()
161
data_to_fetch = self.from_repository.item_keys_introduced_by(revs, pb)
162
for knit_kind, file_id, revisions in data_to_fetch:
163
if knit_kind != phase:
165
# Make a new progress bar for this phase
168
pb = bzrlib.ui.ui_factory.nested_progress_bar()
169
if knit_kind == "file":
170
self._fetch_weave_text(file_id, revisions)
171
elif knit_kind == "inventory":
172
# Before we process the inventory we generate the root
173
# texts (if necessary) so that the inventories references
175
self._generate_root_texts(revs)
176
# NB: This currently reopens the inventory weave in source;
177
# using a full get_data_stream instead would avoid this.
178
self._fetch_inventory_weave(revs, pb)
179
elif knit_kind == "signatures":
180
# Nothing to do here; this will be taken care of when
181
# _fetch_revision_texts happens.
183
elif knit_kind == "revisions":
184
self._fetch_revision_texts(revs, pb)
186
raise AssertionError("Unknown knit kind %r" % knit_kind)
190
self.count_copied += len(revs)
146
192
def _revids_to_fetch(self):
193
"""Determines the exact revisions needed from self.from_repository to
194
install self._last_revision in self.to_repository.
196
If no revisions need to be fetched, then this just returns None.
147
198
mutter('fetch up to rev {%s}', self._last_revision)
148
199
if self._last_revision is NULL_REVISION:
149
200
# explicit limit of no revisions needed
151
202
if (self._last_revision is not None and
152
203
self.to_repository.has_revision(self._last_revision)):
156
return self.to_repository.missing_revision_ids(self.from_repository,
206
return self.to_repository.search_missing_revision_ids(
207
self.from_repository, self._last_revision,
208
find_ghosts=self.find_ghosts)
158
209
except errors.NoSuchRevision:
159
210
raise InstallFailed([self._last_revision])
161
def _fetch_weave_texts(self, revs):
162
texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
164
# fileids_altered_by_revision_ids requires reading the inventory
165
# weave, we will need to read the inventory weave again when
166
# all this is done, so enable caching for that specific weave
167
inv_w = self.from_repository.get_inventory_weave()
169
file_ids = self.from_repository.fileids_altered_by_revision_ids(revs)
171
num_file_ids = len(file_ids)
172
for file_id, required_versions in file_ids.items():
173
texts_pb.update("fetch texts", count, num_file_ids)
175
to_weave = self.to_weaves.get_weave_or_empty(file_id,
176
self.to_repository.get_transaction())
177
from_weave = self.from_weaves.get_weave(file_id,
178
self.from_repository.get_transaction())
179
# we fetch all the texts, because texts do
180
# not reference anything, and its cheap enough
181
to_weave.join(from_weave, version_ids=required_versions)
182
# we don't need *all* of this data anymore, but we dont know
183
# what we do. This cache clearing will result in a new read
184
# of the knit data when we do the checkout, but probably we
185
# want to emit the needed data on the fly rather than at the
187
# the from weave should know not to cache data being joined,
188
# but its ok to ask it to clear.
189
from_weave.clear_cache()
190
to_weave.clear_cache()
194
def _fetch_inventory_weave(self, revs):
195
pb = bzrlib.ui.ui_factory.nested_progress_bar()
197
pb.update("fetch inventory", 0, 2)
198
to_weave = self.to_control.get_weave('inventory',
199
self.to_repository.get_transaction())
201
child_pb = bzrlib.ui.ui_factory.nested_progress_bar()
203
# just merge, this is optimisable and its means we don't
204
# copy unreferenced data such as not-needed inventories.
205
pb.update("fetch inventory", 1, 3)
206
from_weave = self.from_repository.get_inventory_weave()
207
pb.update("fetch inventory", 2, 3)
208
# we fetch only the referenced inventories because we do not
209
# know for unselected inventories whether all their required
210
# texts are present in the other repository - it could be
212
to_weave.join(from_weave, pb=child_pb, msg='merge inventory',
214
from_weave.clear_cache()
212
def _fetch_weave_text(self, file_id, required_versions):
213
to_weave = self.to_weaves.get_weave_or_empty(file_id,
214
self.to_repository.get_transaction())
215
from_weave = self.from_weaves.get_weave(file_id,
216
self.from_repository.get_transaction())
217
# we fetch all the texts, because texts do
218
# not reference anything, and its cheap enough
219
to_weave.join(from_weave, version_ids=required_versions)
221
def _fetch_inventory_weave(self, revs, pb):
222
pb.update("fetch inventory", 0, 2)
223
to_weave = self.to_repository.get_inventory_weave()
224
child_pb = bzrlib.ui.ui_factory.nested_progress_bar()
226
# just merge, this is optimisable and its means we don't
227
# copy unreferenced data such as not-needed inventories.
228
pb.update("fetch inventory", 1, 3)
229
from_weave = self.from_repository.get_inventory_weave()
230
pb.update("fetch inventory", 2, 3)
231
# we fetch only the referenced inventories because we do not
232
# know for unselected inventories whether all their required
233
# texts are present in the other repository - it could be
235
to_weave.join(from_weave, pb=child_pb, msg='merge inventory',
240
def _generate_root_texts(self, revs):
241
"""This will be called by __fetch between fetching weave texts and
242
fetching the inventory weave.
244
Subclasses should override this if they need to generate root texts
245
after fetching weave texts.
221
250
class GenericRepoFetcher(RepoFetcher):
225
254
It triggers a reconciliation after fetching to ensure integrity.
228
def _fetch_revision_texts(self, revs):
257
def _fetch_revision_texts(self, revs, pb):
229
258
"""Fetch revision object texts"""
230
rev_pb = bzrlib.ui.ui_factory.nested_progress_bar()
232
to_txn = self.to_transaction = self.to_repository.get_transaction()
235
to_store = self.to_repository._revision_store
237
pb = bzrlib.ui.ui_factory.nested_progress_bar()
239
pb.update('copying revisions', count, total)
241
sig_text = self.from_repository.get_signature_text(rev)
242
to_store.add_revision_signature_text(rev, sig_text, to_txn)
243
except errors.NoSuchRevision:
246
to_store.add_revision(self.from_repository.get_revision(rev),
251
# fixup inventory if needed:
252
# this is expensive because we have no inverse index to current ghosts.
253
# but on local disk its a few seconds and sftp push is already insane.
255
# FIXME: repository should inform if this is needed.
256
self.to_repository.reconcile()
259
to_txn = self.to_transaction = self.to_repository.get_transaction()
262
to_store = self.to_repository._revision_store
264
pb.update('copying revisions', count, total)
266
sig_text = self.from_repository.get_signature_text(rev)
267
to_store.add_revision_signature_text(rev, sig_text, to_txn)
268
except errors.NoSuchRevision:
271
to_store.add_revision(self.from_repository.get_revision(rev),
274
# fixup inventory if needed:
275
# this is expensive because we have no inverse index to current ghosts.
276
# but on local disk its a few seconds and sftp push is already insane.
278
# FIXME: repository should inform if this is needed.
279
self.to_repository.reconcile()
261
282
class KnitRepoFetcher(RepoFetcher):
323
346
parent_texts = {}
324
347
versionedfile = {}
325
348
to_store = self.target.weave_store
349
parent_map = self.source.get_graph().get_parent_map(revs)
326
350
for tree in self.iter_rev_trees(revs):
327
351
revision_id = tree.inventory.root.revision
328
root_id = tree.inventory.root.file_id
329
parents = inventory_weave.get_parents(revision_id)
352
root_id = tree.get_root_id()
353
parents = parent_map[revision_id]
354
if parents[0] == NULL_REVISION:
330
356
if root_id not in versionedfile:
331
versionedfile[root_id] = to_store.get_weave_or_empty(root_id,
357
versionedfile[root_id] = to_store.get_weave_or_empty(root_id,
332
358
self.target.get_transaction())
333
parent_texts[root_id] = versionedfile[root_id].add_lines(
359
_, _, parent_texts[root_id] = versionedfile[root_id].add_lines(
334
360
revision_id, parents, [], parent_texts)
336
362
def regenerate_inventory(self, revs):
350
375
class Model1toKnit2Fetcher(GenericRepoFetcher):
351
376
"""Fetch from a Model1 repository into a Knit2 repository
353
def __init__(self, to_repository, from_repository, last_revision=None,
378
def __init__(self, to_repository, from_repository, last_revision=None,
379
pb=None, find_ghosts=True):
355
380
self.helper = Inter1and2Helper(from_repository, to_repository)
356
381
GenericRepoFetcher.__init__(self, to_repository, from_repository,
382
last_revision, pb, find_ghosts)
359
def _fetch_weave_texts(self, revs):
360
GenericRepoFetcher._fetch_weave_texts(self, revs)
361
# Now generate a weave for the tree root
384
def _generate_root_texts(self, revs):
362
385
self.helper.generate_root_texts(revs)
364
def _fetch_inventory_weave(self, revs):
387
def _fetch_inventory_weave(self, revs, pb):
365
388
self.helper.regenerate_inventory(revs)
369
392
"""Fetch from a Knit1 repository into a Knit2 repository"""
371
394
def __init__(self, to_repository, from_repository, last_revision=None,
395
pb=None, find_ghosts=True):
373
396
self.helper = Inter1and2Helper(from_repository, to_repository)
374
397
KnitRepoFetcher.__init__(self, to_repository, from_repository,
398
last_revision, pb, find_ghosts)
377
def _fetch_weave_texts(self, revs):
378
KnitRepoFetcher._fetch_weave_texts(self, revs)
379
# Now generate a weave for the tree root
400
def _generate_root_texts(self, revs):
380
401
self.helper.generate_root_texts(revs)
382
def _fetch_inventory_weave(self, revs):
403
def _fetch_inventory_weave(self, revs, pb):
383
404
self.helper.regenerate_inventory(revs)
386
class Fetcher(object):
387
"""Backwards compatibility glue for branch.fetch()."""
389
@deprecated_method(zero_eight)
390
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
391
"""Please see branch.fetch()."""
392
to_branch.fetch(from_branch, last_revision, pb)
407
class RemoteToOtherFetcher(GenericRepoFetcher):
409
def _fetch_everything_for_search(self, search, pp):
410
data_stream = self.from_repository.get_data_stream_for_search(search)
411
self.to_repository.insert_data_stream(data_stream)