21
34
that has merged into it. As the first step of a merge, pull, or
22
35
branch operation we copy history from the source into the destination
38
The copying is done in a slightly complicated order. We don't want to
39
add a revision to the store until everything it refers to is also
40
stored, so that if a revision is present we can totally recreate it.
41
However, we can't know what files are included in a revision until we
42
read its inventory. Therefore, we first pull the XML and hold it in
43
memory until we've updated all of the files referenced.
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
40
from bzrlib.revision import NULL_REVISION
41
from bzrlib.trace import mutter
44
class RepoFetcher(object):
45
"""Pull revisions and texts from one repository to another.
47
This should not be used directly, it's essential a object to encapsulate
48
the logic in InterRepository.fetch().
46
# TODO: Avoid repeatedly opening weaves so many times.
48
# XXX: This doesn't handle ghost (not present in branch) revisions at
49
# all yet. I'm not sure they really should be supported.
51
# NOTE: This doesn't copy revisions which may be present but not
52
# merged into the last revision. I'm not sure we want to do that.
54
# - get a list of revisions that need to be pulled in
55
# - for each one, pull in that revision file
56
# and get the inventory, and store the inventory with right
58
# - and get the ancestry, and store that with right parents too
59
# - and keep a note of all file ids and version seen
60
# - then go through all files; for each one get the weave,
61
# and add in all file versions
65
def greedy_fetch(to_branch, from_branch, revision=None, pb=None):
66
f = Fetcher(to_branch, from_branch, revision, pb)
67
return f.count_copied, f.failed_revisions
71
class Fetcher(object):
72
"""Pull revisions and texts from one branch to another.
74
This doesn't update the destination's history; that can be done
75
separately if desired.
78
If set, pull only up to this revision_id.
82
last_revision -- if last_revision
83
is given it will be that, otherwise the last revision of
86
count_copied -- number of revisions copied
88
count_weaves -- number of file weaves copied
51
def __init__(self, to_repository, from_repository, last_revision=None,
52
find_ghosts=True, fetch_spec=None):
53
"""Create a repo fetcher.
55
:param last_revision: If set, try to limit to the data this revision
57
:param find_ghosts: If True search the entire history for ghosts.
59
# repository.fetch has the responsibility for short-circuiting
60
# attempts to copy between a repository and itself.
61
self.to_repository = to_repository
62
self.from_repository = from_repository
63
self.sink = to_repository._get_sink()
64
# must not mutate self._last_revision as its potentially a shared instance
65
self._last_revision = last_revision
66
self._fetch_spec = fetch_spec
67
self.find_ghosts = find_ghosts
68
self.from_repository.lock_read()
69
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
70
self.from_repository, self.from_repository._format,
71
self.to_repository, self.to_repository._format)
75
self.from_repository.unlock()
78
"""Primary worker function.
80
This initialises all the needed variables, and then fetches the
81
requested revisions, finally clearing the progress bar.
83
# Roughly this is what we're aiming for fetch to become:
85
# missing = self.sink.insert_stream(self.source.get_stream(search))
87
# missing = self.sink.insert_stream(self.source.get_items(missing))
90
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
91
if to_branch == from_branch:
92
raise Exception("can't fetch from a branch to itself")
93
self.to_branch = to_branch
94
self.to_weaves = to_branch.weave_store
95
self.to_control = to_branch.control_weaves
96
self.from_branch = from_branch
97
self.from_weaves = from_branch.weave_store
98
self.from_control = from_branch.control_weaves
99
self.failed_revisions = []
100
self.count_copied = 0
89
101
self.count_total = 0
102
self.count_weaves = 0
103
self.copied_file_ids = set()
90
104
self.file_ids_names = {}
91
pb = ui.ui_factory.nested_progress_bar()
92
pb.show_pct = pb.show_count = False
94
pb.update("Finding revisions", 0, 2)
95
search = self._revids_to_fetch()
98
pb.update("Fetching revisions", 1, 2)
99
self._fetch_everything_for_search(search)
103
def _fetch_everything_for_search(self, search):
104
"""Fetch all data for the given set of revisions."""
105
# The first phase is "file". We pass the progress bar for it directly
106
# into item_keys_introduced_by, which has more information about how
107
# that phase is progressing than we do. Progress updates for the other
108
# phases are taken care of in this function.
109
# XXX: there should be a clear owner of the progress reporting. Perhaps
110
# item_keys_introduced_by should have a richer API than it does at the
111
# moment, so that it can feed the progress information back to this
113
if (self.from_repository._format.rich_root_data and
114
not self.to_repository._format.rich_root_data):
115
raise errors.IncompatibleRepositories(
116
self.from_repository, self.to_repository,
117
"different rich-root support")
118
pb = ui.ui_factory.nested_progress_bar()
120
pb.update("Get stream source")
121
source = self.from_repository._get_source(
122
self.to_repository._format)
123
stream = source.get_stream(search)
124
from_format = self.from_repository._format
125
pb.update("Inserting stream")
126
resume_tokens, missing_keys = self.sink.insert_stream(
127
stream, from_format, [])
128
if self.to_repository._fallback_repositories:
130
self._parent_inventories(search.get_keys()))
132
pb.update("Missing keys")
133
stream = source.get_stream_for_missing_keys(missing_keys)
134
pb.update("Inserting missing keys")
135
resume_tokens, missing_keys = self.sink.insert_stream(
136
stream, from_format, resume_tokens)
138
raise AssertionError(
139
"second push failed to complete a fetch %r." % (
142
raise AssertionError(
143
"second push failed to commit the fetch %r." % (
145
pb.update("Finishing stream")
150
def _revids_to_fetch(self):
151
"""Determines the exact revisions needed from self.from_repository to
152
install self._last_revision in self.to_repository.
154
If no revisions need to be fetched, then this just returns None.
156
if self._fetch_spec is not None:
157
return self._fetch_spec
158
mutter('fetch up to rev {%s}', self._last_revision)
159
if self._last_revision is NULL_REVISION:
160
# explicit limit of no revisions needed
162
return self.to_repository.search_missing_revision_ids(
163
self.from_repository, self._last_revision,
164
find_ghosts=self.find_ghosts)
166
def _parent_inventories(self, revision_ids):
167
# Find all the parent revisions referenced by the stream, but
168
# not present in the stream, and make sure we send their
170
parent_maps = self.to_repository.get_parent_map(revision_ids)
172
map(parents.update, parent_maps.itervalues())
173
parents.discard(NULL_REVISION)
174
parents.difference_update(revision_ids)
175
missing_keys = set(('inventories', rev_id) for rev_id in parents)
179
class Inter1and2Helper(object):
180
"""Helper for operations that convert data from model 1 and 2
182
This is for use by fetchers and converters.
185
def __init__(self, source):
188
:param source: The repository data comes from
192
def iter_rev_trees(self, revs):
193
"""Iterate through RevisionTrees efficiently.
195
Additionally, the inventory's revision_id is set if unset.
197
Trees are retrieved in batches of 100, and then yielded in the order
200
:param revs: A list of revision ids
202
# In case that revs is not a list.
205
for tree in self.source.revision_trees(revs[:100]):
206
if tree.inventory.revision_id is None:
207
tree.inventory.revision_id = tree.get_revision_id()
211
def _find_root_ids(self, revs, parent_map, graph):
213
for tree in self.iter_rev_trees(revs):
214
revision_id = tree.inventory.root.revision
215
root_id = tree.get_root_id()
216
revision_root[revision_id] = root_id
217
# Find out which parents we don't already know root ids for
219
for revision_parents in parent_map.itervalues():
220
parents.update(revision_parents)
221
parents.difference_update(revision_root.keys() + [NULL_REVISION])
222
# Limit to revisions present in the versionedfile
223
parents = graph.get_parent_map(parents).keys()
224
for tree in self.iter_rev_trees(parents):
225
root_id = tree.get_root_id()
226
revision_root[tree.get_revision_id()] = root_id
229
def generate_root_texts(self, revs):
230
"""Generate VersionedFiles for all root ids.
232
:param revs: the revisions to include
234
graph = self.source.get_graph()
235
parent_map = graph.get_parent_map(revs)
236
rev_order = tsort.topo_sort(parent_map)
237
rev_id_to_root_id = self._find_root_ids(revs, parent_map, graph)
238
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
240
# Guaranteed stable, this groups all the file id operations together
241
# retaining topological order within the revisions of a file id.
242
# File id splits and joins would invalidate this, but they don't exist
243
# yet, and are unlikely to in non-rich-root environments anyway.
244
root_id_order.sort(key=operator.itemgetter(0))
245
# Create a record stream containing the roots to create.
247
# XXX: not covered by tests, should have a flag to always run
248
# this. -- mbp 20100129
249
graph = self.source_repo.get_known_graph_ancestry(revs)
250
new_roots_stream = _new_root_data_stream(
251
root_id_order, rev_id_to_root_id, parent_map, self.source, graph)
252
return [('texts', new_roots_stream)]
255
def _get_rich_root_heads_graph(source_repo, revision_ids):
256
"""Get a Graph object suitable for asking heads() for new rich roots."""
260
def _new_root_data_stream(
261
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
262
"""Generate a texts substream of synthesised root entries.
264
Used in fetches that do rich-root upgrades.
266
:param root_keys_to_create: iterable of (root_id, rev_id) pairs describing
267
the root entries to create.
268
:param rev_id_to_root_id_map: dict of known rev_id -> root_id mappings for
269
calculating the parents. If a parent rev_id is not found here then it
270
will be recalculated.
271
:param parent_map: a parent map for all the revisions in
273
:param graph: a graph to use instead of repo.get_graph().
275
for root_key in root_keys_to_create:
276
root_id, rev_id = root_key
277
parent_keys = _parent_keys_for_root_version(
278
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph)
279
yield versionedfile.FulltextContentFactory(
280
root_key, parent_keys, None, '')
283
def _parent_keys_for_root_version(
284
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None):
285
"""Get the parent keys for a given root id.
287
A helper function for _new_root_data_stream.
289
# Include direct parents of the revision, but only if they used the same
290
# root_id and are heads.
291
rev_parents = parent_map[rev_id]
293
for parent_id in rev_parents:
294
if parent_id == NULL_REVISION:
296
if parent_id not in rev_id_to_root_id_map:
297
# We probably didn't read this revision, go spend the extra effort
300
tree = repo.revision_tree(parent_id)
301
except errors.NoSuchRevision:
302
# Ghost, fill out rev_id_to_root_id in case we encounter this
304
# But set parent_root_id to None since we don't really know
305
parent_root_id = None
307
parent_root_id = tree.get_root_id()
308
rev_id_to_root_id_map[parent_id] = None
310
# rev_id_to_root_id_map[parent_id] = parent_root_id
311
# memory consumption maybe?
313
parent_root_id = rev_id_to_root_id_map[parent_id]
314
if root_id == parent_root_id:
315
# With stacking we _might_ want to refer to a non-local revision,
316
# but this code path only applies when we have the full content
317
# available, so ghosts really are ghosts, not just the edge of
319
parent_ids.append(parent_id)
321
# root_id may be in the parent anyway.
323
tree = repo.revision_tree(parent_id)
324
except errors.NoSuchRevision:
325
# ghost, can't refer to it.
106
self.pb = bzrlib.ui.ui_factory.progress_bar()
109
self.from_branch.lock_read()
111
revs = self._revids_to_fetch(last_revision )
114
self._fetch_revision_texts( revs )
115
self._fetch_weave_texts( revs )
116
self._fetch_inventory_weave( revs )
117
self.count_copied += len(revs)
119
self.from_branch.unlock()
122
def _revids_to_fetch(self, last_revision):
123
self.last_revision = self._find_last_revision(last_revision)
124
mutter('fetch up to rev {%s}', self.last_revision)
125
if (self.last_revision is not None and
126
self.to_branch.has_revision(self.last_revision)):
129
branch_from_revs = set(self.from_branch.get_ancestry(self.last_revision))
131
raise InstallFailed([self.last_revision])
133
self.dest_last_rev = self.to_branch.last_revision()
134
branch_to_revs = set(self.to_branch.get_ancestry(self.dest_last_rev))
136
return branch_from_revs.difference( branch_to_revs )
138
def _fetch_revision_texts( self, revs ):
139
self.to_branch.revision_store.copy_multi(
140
self.from_branch.revision_store, revs )
142
def _fetch_weave_texts( self, revs ):
143
file_ids = self.from_branch.fileid_involved_by_set( revs )
145
num_file_ids = len(file_ids)
146
for file_id in file_ids:
147
self.pb.update( "merge weave merge",count,num_file_ids)
149
to_weave = self.to_weaves.get_weave_or_empty(file_id,
150
self.to_branch.get_transaction())
151
from_weave = self.from_weaves.get_weave(file_id,
152
self.from_branch.get_transaction())
154
if to_weave.numversions() > 0:
155
# destination has contents, must merge
329
parent_ids.append(tree.inventory[root_id].revision)
330
except errors.NoSuchId:
333
# Drop non-head parents
335
graph = repo.get_graph()
336
heads = graph.heads(parent_ids)
338
for parent_id in parent_ids:
339
if parent_id in heads and parent_id not in selected_ids:
340
selected_ids.append(parent_id)
341
parent_keys = [(root_id, parent_id) for parent_id in selected_ids]
157
to_weave.join(from_weave)
158
except errors.WeaveParentMismatch:
159
to_weave.reweave(from_weave)
161
# destination is empty, just replace it
162
to_weave = from_weave.copy( )
164
self.to_weaves.put_weave(file_id, to_weave,
165
self.to_branch.get_transaction())
169
def _fetch_inventory_weave( self, revs ):
170
self.pb.update( "inventory merge",0,1)
172
from_weave = self.from_control.get_weave('inventory',
173
self.from_branch.get_transaction())
174
to_weave = self.to_control.get_weave('inventory',
175
self.to_branch.get_transaction())
177
if to_weave.numversions() > 0:
178
# destination has contents, must merge
180
to_weave.join(from_weave)
181
except errors.WeaveParentMismatch:
182
to_weave.reweave(from_weave)
184
# destination is empty, just replace it
185
to_weave = from_weave.copy( )
187
self.to_control.put_weave('inventory', to_weave,
188
self.to_branch.get_transaction())
192
def _find_last_revision(self, last_revision):
193
"""Find the limiting source revision.
195
Every ancestor of that revision will be merged across.
197
Returns the revision_id, or returns None if there's no history
198
in the source branch."""
201
self.pb.update('get source history')
202
from_history = self.from_branch.revision_history()
203
self.pb.update('get destination history')
205
return from_history[-1]
207
return None # no history in the source branch