73
82
count_copied -- number of revisions copied
75
This should not be used directly, it's essential a object to encapsulate
84
This should not be used directory, its essential a object to encapsulate
76
85
the logic in InterRepository.fetch().
79
def __init__(self, to_repository, from_repository, last_revision=None, pb=None,
81
"""Create a repo fetcher.
83
:param find_ghosts: If True search the entire history for ghosts.
84
:param _write_group_acquired_callable: Don't use; this parameter only
85
exists to facilitate a hack done in InterPackRepo.fetch. We would
86
like to remove this parameter.
87
def __init__(self, to_repository, from_repository, last_revision=None, pb=None):
88
88
# result variables.
89
89
self.failed_revisions = []
90
90
self.count_copied = 0
91
if to_repository.has_same_location(from_repository):
92
# repository.fetch should be taking care of this case.
93
raise errors.BzrError('RepoFetcher run '
94
'between two objects at the same location: '
95
'%r and %r' % (to_repository, from_repository))
91
if to_repository.control_files._transport.base == from_repository.control_files._transport.base:
92
# check that last_revision is in 'from' and then return a no-operation.
93
if last_revision not in (None, NULL_REVISION):
94
from_repository.get_revision(last_revision)
96
96
self.to_repository = to_repository
97
97
self.from_repository = from_repository
98
98
# must not mutate self._last_revision as its potentially a shared instance
99
99
self._last_revision = last_revision
100
self.find_ghosts = find_ghosts
102
101
self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
103
102
self.nested_pb = self.pb
131
121
This initialises all the needed variables, and then fetches the
132
122
requested revisions, finally clearing the progress bar.
124
self.to_weaves = self.to_repository.weave_store
125
self.to_control = self.to_repository.control_weaves
126
self.from_weaves = self.from_repository.weave_store
127
self.from_control = self.from_repository.control_weaves
134
128
self.count_total = 0
135
129
self.file_ids_names = {}
136
pp = ProgressPhase('Transferring', 4, self.pb)
130
pp = ProgressPhase('Fetch phase', 4, self.pb)
139
search = self._revids_to_fetch()
142
if getattr(self, '_fetch_everything_for_search', None) is not None:
143
self._fetch_everything_for_search(search, pp)
145
# backward compatibility
146
self._fetch_everything_for_revisions(search.get_keys, pp)
133
revs = self._revids_to_fetch()
137
self._fetch_weave_texts(revs)
139
self._fetch_inventory_weave(revs)
141
self._fetch_revision_texts(revs)
142
self.count_copied += len(revs)
150
def _fetch_everything_for_search(self, search, pp):
151
"""Fetch all data for the given set of revisions."""
152
# The first phase is "file". We pass the progress bar for it directly
153
# into item_keys_introduced_by, which has more information about how
154
# that phase is progressing than we do. Progress updates for the other
155
# phases are taken care of in this function.
156
# XXX: there should be a clear owner of the progress reporting. Perhaps
157
# item_keys_introduced_by should have a richer API than it does at the
158
# moment, so that it can feed the progress information back to this
161
pb = bzrlib.ui.ui_factory.nested_progress_bar()
163
revs = search.get_keys()
164
graph = self.from_repository.get_graph()
165
revs = list(graph.iter_topo_order(revs))
166
data_to_fetch = self.from_repository.item_keys_introduced_by(revs,
169
for knit_kind, file_id, revisions in data_to_fetch:
170
if knit_kind != phase:
172
# Make a new progress bar for this phase
175
pb = bzrlib.ui.ui_factory.nested_progress_bar()
176
if knit_kind == "file":
177
# Accumulate file texts
178
text_keys.extend([(file_id, revision) for revision in
180
elif knit_kind == "inventory":
181
# Now copy the file texts.
182
to_texts = self.to_repository.texts
183
from_texts = self.from_repository.texts
184
to_texts.insert_record_stream(from_texts.get_record_stream(
185
text_keys, self.to_repository._fetch_order,
186
not self.to_repository._fetch_uses_deltas))
187
# Cause an error if a text occurs after we have done the
190
# Before we process the inventory we generate the root
191
# texts (if necessary) so that the inventories references
193
self._generate_root_texts(revs)
194
# NB: This currently reopens the inventory weave in source;
195
# using a single stream interface instead would avoid this.
196
self._fetch_inventory_weave(revs, pb)
197
elif knit_kind == "signatures":
198
# Nothing to do here; this will be taken care of when
199
# _fetch_revision_texts happens.
201
elif knit_kind == "revisions":
202
self._fetch_revision_texts(revs, pb)
204
raise AssertionError("Unknown knit kind %r" % knit_kind)
205
if self.to_repository._fetch_reconcile:
206
self.to_repository.reconcile()
210
self.count_copied += len(revs)
212
146
def _revids_to_fetch(self):
213
"""Determines the exact revisions needed from self.from_repository to
214
install self._last_revision in self.to_repository.
216
If no revisions need to be fetched, then this just returns None.
218
147
mutter('fetch up to rev {%s}', self._last_revision)
219
148
if self._last_revision is NULL_REVISION:
220
149
# explicit limit of no revisions needed
222
151
if (self._last_revision is not None and
223
152
self.to_repository.has_revision(self._last_revision)):
226
return self.to_repository.search_missing_revision_ids(
227
self.from_repository, self._last_revision,
228
find_ghosts=self.find_ghosts)
229
except errors.NoSuchRevision, e:
156
return self.to_repository.missing_revision_ids(self.from_repository,
158
except errors.NoSuchRevision:
230
159
raise InstallFailed([self._last_revision])
232
def _fetch_inventory_weave(self, revs, pb):
233
pb.update("fetch inventory", 0, 2)
234
to_weave = self.to_repository.inventories
235
# just merge, this is optimisable and its means we don't
236
# copy unreferenced data such as not-needed inventories.
237
pb.update("fetch inventory", 1, 3)
238
from_weave = self.from_repository.inventories
239
pb.update("fetch inventory", 2, 3)
240
# we fetch only the referenced inventories because we do not
241
# know for unselected inventories whether all their required
242
# texts are present in the other repository - it could be
244
to_weave.insert_record_stream(from_weave.get_record_stream(
245
[(rev_id,) for rev_id in revs],
246
self.to_repository._fetch_order,
247
not self.to_repository._fetch_uses_deltas))
249
def _fetch_revision_texts(self, revs, pb):
250
# fetch signatures first and then the revision texts
161
def _fetch_weave_texts(self, revs):
162
texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
164
# fileids_altered_by_revision_ids requires reading the inventory
165
# weave, we will need to read the inventory weave again when
166
# all this is done, so enable caching for that specific weave
167
inv_w = self.from_repository.get_inventory_weave()
169
file_ids = self.from_repository.fileids_altered_by_revision_ids(revs)
171
num_file_ids = len(file_ids)
172
for file_id, required_versions in file_ids.items():
173
texts_pb.update("fetch texts", count, num_file_ids)
175
to_weave = self.to_weaves.get_weave_or_empty(file_id,
176
self.to_repository.get_transaction())
177
from_weave = self.from_weaves.get_weave(file_id,
178
self.from_repository.get_transaction())
179
# we fetch all the texts, because texts do
180
# not reference anything, and its cheap enough
181
to_weave.join(from_weave, version_ids=required_versions)
182
# we don't need *all* of this data anymore, but we dont know
183
# what we do. This cache clearing will result in a new read
184
# of the knit data when we do the checkout, but probably we
185
# want to emit the needed data on the fly rather than at the
187
# the from weave should know not to cache data being joined,
188
# but its ok to ask it to clear.
189
from_weave.clear_cache()
190
to_weave.clear_cache()
194
def _fetch_inventory_weave(self, revs):
195
pb = bzrlib.ui.ui_factory.nested_progress_bar()
197
pb.update("fetch inventory", 0, 2)
198
to_weave = self.to_control.get_weave('inventory',
199
self.to_repository.get_transaction())
201
child_pb = bzrlib.ui.ui_factory.nested_progress_bar()
203
# just merge, this is optimisable and its means we don't
204
# copy unreferenced data such as not-needed inventories.
205
pb.update("fetch inventory", 1, 3)
206
from_weave = self.from_repository.get_inventory_weave()
207
pb.update("fetch inventory", 2, 3)
208
# we fetch only the referenced inventories because we do not
209
# know for unselected inventories whether all their required
210
# texts are present in the other repository - it could be
212
to_weave.join(from_weave, pb=child_pb, msg='merge inventory',
214
from_weave.clear_cache()
221
class GenericRepoFetcher(RepoFetcher):
222
"""This is a generic repo to repo fetcher.
224
This makes minimal assumptions about repo layout and contents.
225
It triggers a reconciliation after fetching to ensure integrity.
228
def _fetch_revision_texts(self, revs):
229
"""Fetch revision object texts"""
230
rev_pb = bzrlib.ui.ui_factory.nested_progress_bar()
232
to_txn = self.to_transaction = self.to_repository.get_transaction()
235
to_store = self.to_repository._revision_store
237
pb = bzrlib.ui.ui_factory.nested_progress_bar()
239
pb.update('copying revisions', count, total)
241
sig_text = self.from_repository.get_signature_text(rev)
242
to_store.add_revision_signature_text(rev, sig_text, to_txn)
243
except errors.NoSuchRevision:
246
to_store.add_revision(self.from_repository.get_revision(rev),
251
# fixup inventory if needed:
252
# this is expensive because we have no inverse index to current ghosts.
253
# but on local disk its a few seconds and sftp push is already insane.
255
# FIXME: repository should inform if this is needed.
256
self.to_repository.reconcile()
261
class KnitRepoFetcher(RepoFetcher):
262
"""This is a knit format repository specific fetcher.
264
This differs from the GenericRepoFetcher by not doing a
265
reconciliation after copying, and using knit joining to
269
def _fetch_revision_texts(self, revs):
251
270
# may need to be a InterRevisionStore call here.
252
to_sf = self.to_repository.signatures
253
from_sf = self.from_repository.signatures
254
# A missing signature is just skipped.
255
to_sf.insert_record_stream(filter_absent(from_sf.get_record_stream(
256
[(rev_id,) for rev_id in revs],
257
self.to_repository._fetch_order,
258
not self.to_repository._fetch_uses_deltas)))
259
self._fetch_just_revision_texts(revs)
261
def _fetch_just_revision_texts(self, version_ids):
262
to_rf = self.to_repository.revisions
263
from_rf = self.from_repository.revisions
264
# If a revision has a delta, this is actually expanded inside the
265
# insert_record_stream code now, which is an alternate fix for
267
to_rf.insert_record_stream(from_rf.get_record_stream(
268
[(rev_id,) for rev_id in version_ids],
269
self.to_repository._fetch_order,
270
not self.to_repository._fetch_uses_deltas))
272
def _generate_root_texts(self, revs):
273
"""This will be called by __fetch between fetching weave texts and
274
fetching the inventory weave.
276
Subclasses should override this if they need to generate root texts
277
after fetching weave texts.
271
from_transaction = self.from_repository.get_transaction()
272
to_transaction = self.to_repository.get_transaction()
273
to_sf = self.to_repository._revision_store.get_signature_file(
275
from_sf = self.from_repository._revision_store.get_signature_file(
277
to_sf.join(from_sf, version_ids=revs, ignore_missing=True)
278
to_rf = self.to_repository._revision_store.get_revision_file(
280
from_rf = self.from_repository._revision_store.get_revision_file(
282
to_rf.join(from_rf, version_ids=revs)
282
285
class Inter1and2Helper(object):
314
315
revs = revs[100:]
316
def _find_root_ids(self, revs, parent_map, graph):
318
planned_versions = {}
319
for tree in self.iter_rev_trees(revs):
320
revision_id = tree.inventory.root.revision
321
root_id = tree.get_root_id()
322
planned_versions.setdefault(root_id, []).append(revision_id)
323
revision_root[revision_id] = root_id
324
# Find out which parents we don't already know root ids for
326
for revision_parents in parent_map.itervalues():
327
parents.update(revision_parents)
328
parents.difference_update(revision_root.keys() + [NULL_REVISION])
329
# Limit to revisions present in the versionedfile
330
parents = graph.get_parent_map(parents).keys()
331
for tree in self.iter_rev_trees(parents):
332
root_id = tree.get_root_id()
333
revision_root[tree.get_revision_id()] = root_id
334
return revision_root, planned_versions
336
317
def generate_root_texts(self, revs):
337
318
"""Generate VersionedFiles for all root ids.
339
320
:param revs: the revisions to include
341
to_texts = self.target.texts
342
graph = self.source.get_graph()
343
parent_map = graph.get_parent_map(revs)
344
rev_order = topo_sort(parent_map)
345
rev_id_to_root_id, root_id_to_rev_ids = self._find_root_ids(
346
revs, parent_map, graph)
347
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
349
# Guaranteed stable, this groups all the file id operations together
350
# retaining topological order within the revisions of a file id.
351
# File id splits and joins would invalidate this, but they don't exist
352
# yet, and are unlikely to in non-rich-root environments anyway.
353
root_id_order.sort(key=operator.itemgetter(0))
354
# Create a record stream containing the roots to create.
356
for key in root_id_order:
357
root_id, rev_id = key
358
rev_parents = parent_map[rev_id]
359
# We drop revision parents with different file-ids, because
360
# that represents a rename of the root to a different location
361
# - its not actually a parent for us. (We could look for that
362
# file id in the revision tree at considerably more expense,
363
# but for now this is sufficient (and reconcile will catch and
364
# correct this anyway).
365
# When a parent revision is a ghost, we guess that its root id
366
# was unchanged (rather than trimming it from the parent list).
367
parent_keys = tuple((root_id, parent) for parent in rev_parents
368
if parent != NULL_REVISION and
369
rev_id_to_root_id.get(parent, root_id) == root_id)
370
yield FulltextContentFactory(key, parent_keys, None, '')
371
to_texts.insert_record_stream(yield_roots())
322
inventory_weave = self.source.get_inventory_weave()
325
to_store = self.target.weave_store
326
for tree in self.iter_rev_trees(revs):
327
revision_id = tree.inventory.root.revision
328
root_id = tree.inventory.root.file_id
329
parents = inventory_weave.get_parents(revision_id)
330
if root_id not in versionedfile:
331
versionedfile[root_id] = to_store.get_weave_or_empty(root_id,
332
self.target.get_transaction())
333
parent_texts[root_id] = versionedfile[root_id].add_lines(
334
revision_id, parents, [], parent_texts)
373
336
def regenerate_inventory(self, revs):
374
337
"""Generate a new inventory versionedfile in target, convertin data.
377
340
stored in the target (reserializing it in a different format).
378
341
:param revs: The revisions to include
343
inventory_weave = self.source.get_inventory_weave()
380
344
for tree in self.iter_rev_trees(revs):
381
parents = tree.get_parent_ids()
345
parents = inventory_weave.get_parents(tree.get_revision_id())
382
346
self.target.add_inventory(tree.get_revision_id(), tree.inventory,
385
def fetch_revisions(self, revision_ids):
386
# TODO: should this batch them up rather than requesting 10,000
388
for revision in self.source.get_revisions(revision_ids):
389
self.target.add_revision(revision.revision_id, revision)
392
class Model1toKnit2Fetcher(RepoFetcher):
350
class Model1toKnit2Fetcher(GenericRepoFetcher):
393
351
"""Fetch from a Model1 repository into a Knit2 repository
395
def __init__(self, to_repository, from_repository, last_revision=None,
396
pb=None, find_ghosts=True):
353
def __init__(self, to_repository, from_repository, last_revision=None,
397
355
self.helper = Inter1and2Helper(from_repository, to_repository)
398
RepoFetcher.__init__(self, to_repository, from_repository,
399
last_revision, pb, find_ghosts)
356
GenericRepoFetcher.__init__(self, to_repository, from_repository,
401
def _generate_root_texts(self, revs):
359
def _fetch_weave_texts(self, revs):
360
GenericRepoFetcher._fetch_weave_texts(self, revs)
361
# Now generate a weave for the tree root
402
362
self.helper.generate_root_texts(revs)
404
def _fetch_inventory_weave(self, revs, pb):
364
def _fetch_inventory_weave(self, revs):
405
365
self.helper.regenerate_inventory(revs)
407
def _fetch_revision_texts(self, revs, pb):
408
"""Fetch revision object texts"""
412
pb.update('copying revisions', count, total)
414
sig_text = self.from_repository.get_signature_text(rev)
415
self.to_repository.add_signature_text(rev, sig_text)
416
except errors.NoSuchRevision:
419
self._copy_revision(rev)
422
def _copy_revision(self, rev):
423
self.helper.fetch_revisions([rev])
426
class Knit1to2Fetcher(RepoFetcher):
368
class Knit1to2Fetcher(KnitRepoFetcher):
427
369
"""Fetch from a Knit1 repository into a Knit2 repository"""
429
def __init__(self, to_repository, from_repository, last_revision=None,
430
pb=None, find_ghosts=True):
371
def __init__(self, to_repository, from_repository, last_revision=None,
431
373
self.helper = Inter1and2Helper(from_repository, to_repository)
432
RepoFetcher.__init__(self, to_repository, from_repository,
433
last_revision, pb, find_ghosts)
374
KnitRepoFetcher.__init__(self, to_repository, from_repository,
435
def _generate_root_texts(self, revs):
377
def _fetch_weave_texts(self, revs):
378
KnitRepoFetcher._fetch_weave_texts(self, revs)
379
# Now generate a weave for the tree root
436
380
self.helper.generate_root_texts(revs)
438
def _fetch_inventory_weave(self, revs, pb):
382
def _fetch_inventory_weave(self, revs):
439
383
self.helper.regenerate_inventory(revs)
441
def _fetch_just_revision_texts(self, version_ids):
442
self.helper.fetch_revisions(version_ids)
386
class Fetcher(object):
387
"""Backwards compatibility glue for branch.fetch()."""
389
@deprecated_method(zero_eight)
390
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
391
"""Please see branch.fetch()."""
392
to_branch.fetch(from_branch, last_revision, pb)