73
82
count_copied -- number of revisions copied
75
This should not be used directly, it's essential a object to encapsulate
84
This should not be used directory, its essential a object to encapsulate
76
85
the logic in InterRepository.fetch().
79
def __init__(self, to_repository, from_repository, last_revision=None, pb=None,
81
"""Create a repo fetcher.
83
:param find_ghosts: If True search the entire history for ghosts.
87
def __init__(self, to_repository, from_repository, last_revision=None, pb=None):
85
88
# result variables.
86
89
self.failed_revisions = []
87
90
self.count_copied = 0
88
if to_repository.has_same_location(from_repository):
89
# repository.fetch should be taking care of this case.
90
raise errors.BzrError('RepoFetcher run '
91
'between two objects at the same location: '
92
'%r and %r' % (to_repository, from_repository))
91
if to_repository.control_files._transport.base == from_repository.control_files._transport.base:
92
# check that last_revision is in 'from' and then return a no-operation.
93
if last_revision not in (None, NULL_REVISION):
94
from_repository.get_revision(last_revision)
93
96
self.to_repository = to_repository
94
97
self.from_repository = from_repository
95
98
# must not mutate self._last_revision as its potentially a shared instance
96
99
self._last_revision = last_revision
97
self.find_ghosts = find_ghosts
99
101
self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
100
102
self.nested_pb = self.pb
128
121
This initialises all the needed variables, and then fetches the
129
122
requested revisions, finally clearing the progress bar.
124
self.to_weaves = self.to_repository.weave_store
125
self.to_control = self.to_repository.control_weaves
126
self.from_weaves = self.from_repository.weave_store
127
self.from_control = self.from_repository.control_weaves
131
128
self.count_total = 0
132
129
self.file_ids_names = {}
133
pp = ProgressPhase('Transferring', 4, self.pb)
130
pp = ProgressPhase('Fetch phase', 4, self.pb)
136
search = self._revids_to_fetch()
139
if getattr(self, '_fetch_everything_for_search', None) is not None:
140
self._fetch_everything_for_search(search, pp)
142
# backward compatibility
143
self._fetch_everything_for_revisions(search.get_keys, pp)
133
revs = self._revids_to_fetch()
137
self._fetch_weave_texts(revs)
139
self._fetch_inventory_weave(revs)
141
self._fetch_revision_texts(revs)
142
self.count_copied += len(revs)
147
def _fetch_everything_for_search(self, search, pp):
148
"""Fetch all data for the given set of revisions."""
149
# The first phase is "file". We pass the progress bar for it directly
150
# into item_keys_introduced_by, which has more information about how
151
# that phase is progressing than we do. Progress updates for the other
152
# phases are taken care of in this function.
153
# XXX: there should be a clear owner of the progress reporting. Perhaps
154
# item_keys_introduced_by should have a richer API than it does at the
155
# moment, so that it can feed the progress information back to this
158
pb = bzrlib.ui.ui_factory.nested_progress_bar()
160
revs = search.get_keys()
161
graph = self.from_repository.get_graph()
162
revs = list(graph.iter_topo_order(revs))
163
data_to_fetch = self.from_repository.item_keys_introduced_by(revs,
166
for knit_kind, file_id, revisions in data_to_fetch:
167
if knit_kind != phase:
169
# Make a new progress bar for this phase
172
pb = bzrlib.ui.ui_factory.nested_progress_bar()
173
if knit_kind == "file":
174
# Accumulate file texts
175
text_keys.extend([(file_id, revision) for revision in
177
elif knit_kind == "inventory":
178
# Now copy the file texts.
179
to_texts = self.to_repository.texts
180
from_texts = self.from_repository.texts
181
to_texts.insert_record_stream(from_texts.get_record_stream(
182
text_keys, self.to_repository._fetch_order,
183
self.to_repository._fetch_uses_deltas))
184
# Cause an error if a text occurs after we have done the
187
# Before we process the inventory we generate the root
188
# texts (if necessary) so that the inventories references
190
self._generate_root_texts(revs)
191
# NB: This currently reopens the inventory weave in source;
192
# using a single stream interface instead would avoid this.
193
self._fetch_inventory_weave(revs, pb)
194
elif knit_kind == "signatures":
195
# Nothing to do here; this will be taken care of when
196
# _fetch_revision_texts happens.
198
elif knit_kind == "revisions":
199
self._fetch_revision_texts(revs, pb)
201
raise AssertionError("Unknown knit kind %r" % knit_kind)
202
if self.to_repository._fetch_reconcile:
203
self.to_repository.reconcile()
207
self.count_copied += len(revs)
209
146
def _revids_to_fetch(self):
210
"""Determines the exact revisions needed from self.from_repository to
211
install self._last_revision in self.to_repository.
213
If no revisions need to be fetched, then this just returns None.
215
147
mutter('fetch up to rev {%s}', self._last_revision)
216
148
if self._last_revision is NULL_REVISION:
217
149
# explicit limit of no revisions needed
219
151
if (self._last_revision is not None and
220
152
self.to_repository.has_revision(self._last_revision)):
223
return self.to_repository.search_missing_revision_ids(
224
self.from_repository, self._last_revision,
225
find_ghosts=self.find_ghosts)
226
except errors.NoSuchRevision, e:
156
return self.to_repository.missing_revision_ids(self.from_repository,
158
except errors.NoSuchRevision:
227
159
raise InstallFailed([self._last_revision])
229
def _fetch_inventory_weave(self, revs, pb):
230
pb.update("fetch inventory", 0, 2)
231
to_weave = self.to_repository.inventories
232
child_pb = bzrlib.ui.ui_factory.nested_progress_bar()
234
# just merge, this is optimisable and its means we don't
235
# copy unreferenced data such as not-needed inventories.
236
pb.update("fetch inventory", 1, 3)
237
from_weave = self.from_repository.inventories
238
pb.update("fetch inventory", 2, 3)
239
# we fetch only the referenced inventories because we do not
240
# know for unselected inventories whether all their required
241
# texts are present in the other repository - it could be
243
to_weave.insert_record_stream(from_weave.get_record_stream(
244
[(rev_id,) for rev_id in revs],
245
self.to_repository._fetch_order,
246
self.to_repository._fetch_uses_deltas))
250
def _fetch_revision_texts(self, revs, pb):
161
def _fetch_weave_texts(self, revs):
162
texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
164
# fileids_altered_by_revision_ids requires reading the inventory
165
# weave, we will need to read the inventory weave again when
166
# all this is done, so enable caching for that specific weave
167
inv_w = self.from_repository.get_inventory_weave()
169
file_ids = self.from_repository.fileids_altered_by_revision_ids(revs)
171
num_file_ids = len(file_ids)
172
for file_id, required_versions in file_ids.items():
173
texts_pb.update("fetch texts", count, num_file_ids)
175
to_weave = self.to_weaves.get_weave_or_empty(file_id,
176
self.to_repository.get_transaction())
177
from_weave = self.from_weaves.get_weave(file_id,
178
self.from_repository.get_transaction())
179
# we fetch all the texts, because texts do
180
# not reference anything, and its cheap enough
181
to_weave.join(from_weave, version_ids=required_versions)
182
# we don't need *all* of this data anymore, but we dont know
183
# what we do. This cache clearing will result in a new read
184
# of the knit data when we do the checkout, but probably we
185
# want to emit the needed data on the fly rather than at the
187
# the from weave should know not to cache data being joined,
188
# but its ok to ask it to clear.
189
from_weave.clear_cache()
190
to_weave.clear_cache()
194
def _fetch_inventory_weave(self, revs):
195
pb = bzrlib.ui.ui_factory.nested_progress_bar()
197
pb.update("fetch inventory", 0, 2)
198
to_weave = self.to_control.get_weave('inventory',
199
self.to_repository.get_transaction())
201
child_pb = bzrlib.ui.ui_factory.nested_progress_bar()
203
# just merge, this is optimisable and its means we don't
204
# copy unreferenced data such as not-needed inventories.
205
pb.update("fetch inventory", 1, 3)
206
from_weave = self.from_repository.get_inventory_weave()
207
pb.update("fetch inventory", 2, 3)
208
# we fetch only the referenced inventories because we do not
209
# know for unselected inventories whether all their required
210
# texts are present in the other repository - it could be
212
to_weave.join(from_weave, pb=child_pb, msg='merge inventory',
214
from_weave.clear_cache()
221
class GenericRepoFetcher(RepoFetcher):
222
"""This is a generic repo to repo fetcher.
224
This makes minimal assumptions about repo layout and contents.
225
It triggers a reconciliation after fetching to ensure integrity.
228
def _fetch_revision_texts(self, revs):
229
"""Fetch revision object texts"""
230
rev_pb = bzrlib.ui.ui_factory.nested_progress_bar()
232
to_txn = self.to_transaction = self.to_repository.get_transaction()
235
to_store = self.to_repository._revision_store
237
pb = bzrlib.ui.ui_factory.nested_progress_bar()
239
pb.update('copying revisions', count, total)
241
sig_text = self.from_repository.get_signature_text(rev)
242
to_store.add_revision_signature_text(rev, sig_text, to_txn)
243
except errors.NoSuchRevision:
246
to_store.add_revision(self.from_repository.get_revision(rev),
251
# fixup inventory if needed:
252
# this is expensive because we have no inverse index to current ghosts.
253
# but on local disk its a few seconds and sftp push is already insane.
255
# FIXME: repository should inform if this is needed.
256
self.to_repository.reconcile()
261
class KnitRepoFetcher(RepoFetcher):
262
"""This is a knit format repository specific fetcher.
264
This differs from the GenericRepoFetcher by not doing a
265
reconciliation after copying, and using knit joining to
269
def _fetch_revision_texts(self, revs):
251
270
# may need to be a InterRevisionStore call here.
252
to_sf = self.to_repository.signatures
253
from_sf = self.from_repository.signatures
254
# A missing signature is just skipped.
255
to_sf.insert_record_stream(filter_absent(from_sf.get_record_stream(
256
[(rev_id,) for rev_id in revs],
257
self.to_repository._fetch_order,
258
self.to_repository._fetch_uses_deltas)))
259
self._fetch_just_revision_texts(revs)
261
def _fetch_just_revision_texts(self, version_ids):
262
to_rf = self.to_repository.revisions
263
from_rf = self.from_repository.revisions
264
to_rf.insert_record_stream(from_rf.get_record_stream(
265
[(rev_id,) for rev_id in version_ids],
266
self.to_repository._fetch_order,
267
self.to_repository._fetch_uses_deltas))
269
def _generate_root_texts(self, revs):
270
"""This will be called by __fetch between fetching weave texts and
271
fetching the inventory weave.
273
Subclasses should override this if they need to generate root texts
274
after fetching weave texts.
271
from_transaction = self.from_repository.get_transaction()
272
to_transaction = self.to_repository.get_transaction()
273
to_sf = self.to_repository._revision_store.get_signature_file(
275
from_sf = self.from_repository._revision_store.get_signature_file(
277
to_sf.join(from_sf, version_ids=revs, ignore_missing=True)
278
to_rf = self.to_repository._revision_store.get_revision_file(
280
from_rf = self.from_repository._revision_store.get_revision_file(
282
to_rf.join(from_rf, version_ids=revs)
279
285
class Inter1and2Helper(object):
311
315
revs = revs[100:]
313
def _find_root_ids(self, revs, parent_map, graph):
315
planned_versions = {}
316
for tree in self.iter_rev_trees(revs):
317
revision_id = tree.inventory.root.revision
318
root_id = tree.get_root_id()
319
planned_versions.setdefault(root_id, []).append(revision_id)
320
revision_root[revision_id] = root_id
321
# Find out which parents we don't already know root ids for
323
for revision_parents in parent_map.itervalues():
324
parents.update(revision_parents)
325
parents.difference_update(revision_root.keys() + [NULL_REVISION])
326
# Limit to revisions present in the versionedfile
327
parents = graph.get_parent_map(parents).keys()
328
for tree in self.iter_rev_trees(parents):
329
root_id = tree.get_root_id()
330
revision_root[tree.get_revision_id()] = root_id
331
return revision_root, planned_versions
333
317
def generate_root_texts(self, revs):
334
318
"""Generate VersionedFiles for all root ids.
336
320
:param revs: the revisions to include
338
to_texts = self.target.texts
339
graph = self.source.get_graph()
340
parent_map = graph.get_parent_map(revs)
341
rev_order = topo_sort(parent_map)
342
rev_id_to_root_id, root_id_to_rev_ids = self._find_root_ids(
343
revs, parent_map, graph)
344
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
346
# Guaranteed stable, this groups all the file id operations together
347
# retaining topological order within the revisions of a file id.
348
# File id splits and joins would invalidate this, but they don't exist
349
# yet, and are unlikely to in non-rich-root environments anyway.
350
root_id_order.sort(key=operator.itemgetter(0))
351
# Create a record stream containing the roots to create.
353
for key in root_id_order:
354
root_id, rev_id = key
355
rev_parents = parent_map[rev_id]
356
# We drop revision parents with different file-ids, because
357
# that represents a rename of the root to a different location
358
# - its not actually a parent for us. (We could look for that
359
# file id in the revision tree at considerably more expense,
360
# but for now this is sufficient (and reconcile will catch and
361
# correct this anyway).
362
# When a parent revision is a ghost, we guess that its root id
363
# was unchanged (rather than trimming it from the parent list).
364
parent_keys = tuple((root_id, parent) for parent in rev_parents
365
if parent != NULL_REVISION and
366
rev_id_to_root_id.get(parent, root_id) == root_id)
367
yield FulltextContentFactory(key, parent_keys, None, '')
368
to_texts.insert_record_stream(yield_roots())
322
inventory_weave = self.source.get_inventory_weave()
325
to_store = self.target.weave_store
326
for tree in self.iter_rev_trees(revs):
327
revision_id = tree.inventory.root.revision
328
root_id = tree.inventory.root.file_id
329
parents = inventory_weave.get_parents(revision_id)
330
if root_id not in versionedfile:
331
versionedfile[root_id] = to_store.get_weave_or_empty(root_id,
332
self.target.get_transaction())
333
parent_texts[root_id] = versionedfile[root_id].add_lines(
334
revision_id, parents, [], parent_texts)
370
336
def regenerate_inventory(self, revs):
371
337
"""Generate a new inventory versionedfile in target, convertin data.
374
340
stored in the target (reserializing it in a different format).
375
341
:param revs: The revisions to include
343
inventory_weave = self.source.get_inventory_weave()
377
344
for tree in self.iter_rev_trees(revs):
378
parents = tree.get_parent_ids()
345
parents = inventory_weave.get_parents(tree.get_revision_id())
379
346
self.target.add_inventory(tree.get_revision_id(), tree.inventory,
382
def fetch_revisions(self, revision_ids):
383
for revision in self.source.get_revisions(revision_ids):
384
self.target.add_revision(revision.revision_id, revision)
387
class Model1toKnit2Fetcher(RepoFetcher):
350
class Model1toKnit2Fetcher(GenericRepoFetcher):
388
351
"""Fetch from a Model1 repository into a Knit2 repository
390
def __init__(self, to_repository, from_repository, last_revision=None,
391
pb=None, find_ghosts=True):
353
def __init__(self, to_repository, from_repository, last_revision=None,
392
355
self.helper = Inter1and2Helper(from_repository, to_repository)
393
RepoFetcher.__init__(self, to_repository, from_repository,
394
last_revision, pb, find_ghosts)
356
GenericRepoFetcher.__init__(self, to_repository, from_repository,
396
def _generate_root_texts(self, revs):
359
def _fetch_weave_texts(self, revs):
360
GenericRepoFetcher._fetch_weave_texts(self, revs)
361
# Now generate a weave for the tree root
397
362
self.helper.generate_root_texts(revs)
399
def _fetch_inventory_weave(self, revs, pb):
364
def _fetch_inventory_weave(self, revs):
400
365
self.helper.regenerate_inventory(revs)
402
def _fetch_revision_texts(self, revs, pb):
403
"""Fetch revision object texts"""
407
pb.update('copying revisions', count, total)
409
sig_text = self.from_repository.get_signature_text(rev)
410
self.to_repository.add_signature_text(rev, sig_text)
411
except errors.NoSuchRevision:
414
self._copy_revision(rev)
417
def _copy_revision(self, rev):
418
self.helper.fetch_revisions([rev])
421
class Knit1to2Fetcher(RepoFetcher):
368
class Knit1to2Fetcher(KnitRepoFetcher):
422
369
"""Fetch from a Knit1 repository into a Knit2 repository"""
424
def __init__(self, to_repository, from_repository, last_revision=None,
425
pb=None, find_ghosts=True):
371
def __init__(self, to_repository, from_repository, last_revision=None,
426
373
self.helper = Inter1and2Helper(from_repository, to_repository)
427
RepoFetcher.__init__(self, to_repository, from_repository,
428
last_revision, pb, find_ghosts)
374
KnitRepoFetcher.__init__(self, to_repository, from_repository,
430
def _generate_root_texts(self, revs):
377
def _fetch_weave_texts(self, revs):
378
KnitRepoFetcher._fetch_weave_texts(self, revs)
379
# Now generate a weave for the tree root
431
380
self.helper.generate_root_texts(revs)
433
def _fetch_inventory_weave(self, revs, pb):
382
def _fetch_inventory_weave(self, revs):
434
383
self.helper.regenerate_inventory(revs)
436
def _fetch_just_revision_texts(self, version_ids):
437
self.helper.fetch_revisions(version_ids)
386
class Fetcher(object):
387
"""Backwards compatibility glue for branch.fetch()."""
389
@deprecated_method(zero_eight)
390
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
391
"""Please see branch.fetch()."""
392
to_branch.fetch(from_branch, last_revision, pb)