31
21
that has merged into it. As the first step of a merge, pull, or
32
22
branch operation we copy history from the source into the destination
35
The copying is done in a slightly complicated order. We don't want to
36
add a revision to the store until everything it refers to is also
37
stored, so that if a revision is present we can totally recreate it.
38
However, we can't know what files are included in a revision until we
39
read its inventory. Therefore, we first pull the XML and hold it in
40
memory until we've updated all of the files referenced.
43
# TODO: Avoid repeatedly opening weaves so many times.
45
# XXX: This doesn't handle ghost (not present in branch) revisions at
46
# all yet. I'm not sure they really should be supported.
48
# NOTE: This doesn't copy revisions which may be present but not
49
# merged into the last revision. I'm not sure we want to do that.
51
# - get a list of revisions that need to be pulled in
52
# - for each one, pull in that revision file
53
# and get the inventory, and store the inventory with right
55
# - and get the ancestry, and store that with right parents too
56
# - and keep a note of all file ids and version seen
57
# - then go through all files; for each one get the weave,
58
# and add in all file versions
62
def greedy_fetch(to_branch, from_branch, revision=None, pb=None):
63
f = Fetcher(to_branch, from_branch, revision, pb)
64
return f.count_copied, f.failed_revisions
68
class Fetcher(object):
69
"""Pull revisions and texts from one branch to another.
71
This doesn't update the destination's history; that can be done
72
separately if desired.
75
If set, pull only up to this revision_id.
79
last_revision -- if last_revision
80
is given it will be that, otherwise the last revision of
83
count_copied -- number of revisions copied
85
count_weaves -- number of file weaves copied
33
from bzrlib.revision import NULL_REVISION
34
from bzrlib.tsort import topo_sort
35
from bzrlib.trace import mutter
37
from bzrlib.versionedfile import FulltextContentFactory
40
class RepoFetcher(object):
41
"""Pull revisions and texts from one repository to another.
43
This should not be used directly, it's essential a object to encapsulate
44
the logic in InterRepository.fetch().
87
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
88
if to_branch == from_branch:
89
raise Exception("can't fetch from a branch to itself")
90
self.to_branch = to_branch
91
self.to_weaves = to_branch.weave_store
92
self.to_control = to_branch.control_weaves
93
self.from_branch = from_branch
94
self.from_weaves = from_branch.weave_store
95
self.from_control = from_branch.control_weaves
96
self.failed_revisions = []
47
def __init__(self, to_repository, from_repository, last_revision=None,
48
pb=None, find_ghosts=True, fetch_spec=None):
49
"""Create a repo fetcher.
51
:param last_revision: If set, try to limit to the data this revision
53
:param find_ghosts: If True search the entire history for ghosts.
54
:param pb: ProgressBar object to use; deprecated and ignored.
55
This method will just create one on top of the stack.
58
symbol_versioning.warn(
59
symbol_versioning.deprecated_in((1, 14, 0))
60
% "pb parameter to RepoFetcher.__init__")
61
# and for simplicity it is in fact ignored
62
# repository.fetch has the responsibility for short-circuiting
63
# attempts to copy between a repository and itself.
64
self.to_repository = to_repository
65
self.from_repository = from_repository
66
self.sink = to_repository._get_sink()
67
# must not mutate self._last_revision as its potentially a shared instance
68
self._last_revision = last_revision
69
self._fetch_spec = fetch_spec
70
self.find_ghosts = find_ghosts
71
self.from_repository.lock_read()
72
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
73
self.from_repository, self.from_repository._format,
74
self.to_repository, self.to_repository._format)
78
self.from_repository.unlock()
81
"""Primary worker function.
83
This initialises all the needed variables, and then fetches the
84
requested revisions, finally clearing the progress bar.
86
# Roughly this is what we're aiming for fetch to become:
88
# missing = self.sink.insert_stream(self.source.get_stream(search))
90
# missing = self.sink.insert_stream(self.source.get_items(missing))
98
92
self.count_total = 0
100
self.copied_file_ids = set()
102
self.pb = bzrlib.ui.ui_factory.progress_bar()
106
self.last_revision = self._find_last_revision(last_revision)
107
except NoSuchRevision, e:
108
mutter('failed getting last revision: %s', e)
109
raise InstallFailed([last_revision])
110
mutter('fetch up to rev {%s}', self.last_revision)
112
revs_to_fetch = self._compare_ancestries()
114
raise InstallFailed([self.last_revision])
115
self._copy_revisions(revs_to_fetch)
116
self.new_ancestry = revs_to_fetch
119
def _find_last_revision(self, last_revision):
120
"""Find the limiting source revision.
122
Every ancestor of that revision will be merged across.
124
Returns the revision_id, or returns None if there's no history
125
in the source branch."""
126
self.pb.update('get source history')
127
from_history = self.from_branch.revision_history()
128
self.pb.update('get destination history')
130
self.from_branch.get_revision(last_revision)
133
return from_history[-1]
135
return None # no history in the source branch
138
def _compare_ancestries(self):
139
"""Get a list of revisions that must be copied.
141
That is, every revision that's in the ancestry of the source
142
branch and not in the destination branch."""
143
self.pb.update('get source ancestry')
144
self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)
146
dest_last_rev = self.to_branch.last_revision()
147
self.pb.update('get destination ancestry')
149
dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)
152
ss = set(dest_ancestry)
154
for rev_id in self.from_ancestry:
156
to_fetch.append(rev_id)
157
mutter('need to get revision {%s}', rev_id)
158
mutter('need to get %d revisions in total', len(to_fetch))
159
self.count_total = len(to_fetch)
162
def _copy_revisions(self, revs_to_fetch):
164
for rev_id in revs_to_fetch:
168
if self.to_branch.has_revision(rev_id):
170
self.pb.update('fetch revision', i, self.count_total)
171
self._copy_one_revision(rev_id)
172
self.count_copied += 1
175
def _copy_one_revision(self, rev_id):
176
"""Copy revision and everything referenced by it."""
177
mutter('copying revision {%s}', rev_id)
178
rev_xml = self.from_branch.get_revision_xml(rev_id)
179
inv_xml = self.from_branch.get_inventory_xml(rev_id)
180
rev = serializer_v5.read_revision_from_string(rev_xml)
181
inv = serializer_v5.read_inventory_from_string(inv_xml)
182
assert rev.revision_id == rev_id
183
assert rev.inventory_sha1 == sha_string(inv_xml)
184
mutter(' commiter %s, %d parents',
187
self._copy_new_texts(rev_id, inv)
188
parents = rev.parent_ids
189
for parent in parents:
190
if not self.to_branch.has_revision(parent):
191
parents.pop(parents.index(parent))
192
self._copy_inventory(rev_id, inv_xml, parents)
193
self._copy_ancestry(rev_id, parents)
194
self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)
195
mutter('copied revision %s', rev_id)
198
def _copy_inventory(self, rev_id, inv_xml, parent_ids):
199
self.to_control.add_text('inventory', rev_id,
200
split_lines(inv_xml), parent_ids)
203
def _copy_ancestry(self, rev_id, parent_ids):
204
ancestry_lines = self.from_control.get_lines('ancestry', rev_id)
205
self.to_control.add_text('ancestry', rev_id, ancestry_lines,
209
def _copy_new_texts(self, rev_id, inv):
210
"""Copy any new texts occuring in this revision."""
211
# TODO: Rather than writing out weaves every time, hold them
212
# in memory until everything's done? But this way is nicer
213
# if it's interrupted.
214
for path, ie in inv.iter_entries():
215
if ie.revision != rev_id:
217
mutter('%s {%s} is changed in this revision',
219
self._copy_one_weave(rev_id, ie.file_id)
222
def _copy_one_weave(self, rev_id, file_id):
223
"""Copy one file weave."""
224
mutter('copy file {%s} modified in {%s}', file_id, rev_id)
225
if file_id in self.copied_file_ids:
226
mutter('file {%s} already copied', file_id)
228
from_weave = self.from_weaves.get_weave(file_id)
229
to_weave = self.to_weaves.get_weave_or_empty(file_id)
230
to_weave.join(from_weave)
231
self.to_weaves.put_weave(file_id, to_weave)
232
self.count_weaves += 1
233
self.copied_file_ids.add(file_id)
234
mutter('copied file {%s}', file_id)
93
self.file_ids_names = {}
94
pb = bzrlib.ui.ui_factory.nested_progress_bar()
95
pb.show_pct = pb.show_count = False
97
pb.update("Finding revisions", 0, 2)
98
search = self._revids_to_fetch()
101
pb.update("Fetching revisions", 1, 2)
102
self._fetch_everything_for_search(search)
106
def _fetch_everything_for_search(self, search):
107
"""Fetch all data for the given set of revisions."""
108
# The first phase is "file". We pass the progress bar for it directly
109
# into item_keys_introduced_by, which has more information about how
110
# that phase is progressing than we do. Progress updates for the other
111
# phases are taken care of in this function.
112
# XXX: there should be a clear owner of the progress reporting. Perhaps
113
# item_keys_introduced_by should have a richer API than it does at the
114
# moment, so that it can feed the progress information back to this
116
if (self.from_repository._format.rich_root_data and
117
not self.to_repository._format.rich_root_data):
118
raise errors.IncompatibleRepositories(
119
self.from_repository, self.to_repository,
120
"different rich-root support")
121
pb = bzrlib.ui.ui_factory.nested_progress_bar()
123
pb.update("Get stream source")
124
source = self.from_repository._get_source(
125
self.to_repository._format)
126
stream = source.get_stream(search)
127
from_format = self.from_repository._format
128
pb.update("Inserting stream")
129
resume_tokens, missing_keys = self.sink.insert_stream(
130
stream, from_format, [])
131
if self.to_repository._fallback_repositories:
133
self._parent_inventories(search.get_keys()))
135
pb.update("Missing keys")
136
stream = source.get_stream_for_missing_keys(missing_keys)
137
pb.update("Inserting missing keys")
138
resume_tokens, missing_keys = self.sink.insert_stream(
139
stream, from_format, resume_tokens)
141
raise AssertionError(
142
"second push failed to complete a fetch %r." % (
145
raise AssertionError(
146
"second push failed to commit the fetch %r." % (
148
pb.update("Finishing stream")
153
def _revids_to_fetch(self):
154
"""Determines the exact revisions needed from self.from_repository to
155
install self._last_revision in self.to_repository.
157
If no revisions need to be fetched, then this just returns None.
159
if self._fetch_spec is not None:
160
return self._fetch_spec
161
mutter('fetch up to rev {%s}', self._last_revision)
162
if self._last_revision is NULL_REVISION:
163
# explicit limit of no revisions needed
165
return self.to_repository.search_missing_revision_ids(
166
self.from_repository, self._last_revision,
167
find_ghosts=self.find_ghosts)
169
def _parent_inventories(self, revision_ids):
170
# Find all the parent revisions referenced by the stream, but
171
# not present in the stream, and make sure we send their
173
parent_maps = self.to_repository.get_parent_map(revision_ids)
175
map(parents.update, parent_maps.itervalues())
176
parents.discard(NULL_REVISION)
177
parents.difference_update(revision_ids)
178
missing_keys = set(('inventories', rev_id) for rev_id in parents)
182
class Inter1and2Helper(object):
183
"""Helper for operations that convert data from model 1 and 2
185
This is for use by fetchers and converters.
188
def __init__(self, source):
191
:param source: The repository data comes from
195
def iter_rev_trees(self, revs):
196
"""Iterate through RevisionTrees efficiently.
198
Additionally, the inventory's revision_id is set if unset.
200
Trees are retrieved in batches of 100, and then yielded in the order
203
:param revs: A list of revision ids
205
# In case that revs is not a list.
208
for tree in self.source.revision_trees(revs[:100]):
209
if tree.inventory.revision_id is None:
210
tree.inventory.revision_id = tree.get_revision_id()
214
def _find_root_ids(self, revs, parent_map, graph):
216
planned_versions = {}
217
for tree in self.iter_rev_trees(revs):
218
revision_id = tree.inventory.root.revision
219
root_id = tree.get_root_id()
220
planned_versions.setdefault(root_id, []).append(revision_id)
221
revision_root[revision_id] = root_id
222
# Find out which parents we don't already know root ids for
224
for revision_parents in parent_map.itervalues():
225
parents.update(revision_parents)
226
parents.difference_update(revision_root.keys() + [NULL_REVISION])
227
# Limit to revisions present in the versionedfile
228
parents = graph.get_parent_map(parents).keys()
229
for tree in self.iter_rev_trees(parents):
230
root_id = tree.get_root_id()
231
revision_root[tree.get_revision_id()] = root_id
232
return revision_root, planned_versions
234
def generate_root_texts(self, revs):
235
"""Generate VersionedFiles for all root ids.
237
:param revs: the revisions to include
239
graph = self.source.get_graph()
240
parent_map = graph.get_parent_map(revs)
241
rev_order = topo_sort(parent_map)
242
rev_id_to_root_id, root_id_to_rev_ids = self._find_root_ids(
243
revs, parent_map, graph)
244
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
246
# Guaranteed stable, this groups all the file id operations together
247
# retaining topological order within the revisions of a file id.
248
# File id splits and joins would invalidate this, but they don't exist
249
# yet, and are unlikely to in non-rich-root environments anyway.
250
root_id_order.sort(key=operator.itemgetter(0))
251
# Create a record stream containing the roots to create.
253
for key in root_id_order:
254
root_id, rev_id = key
255
rev_parents = parent_map[rev_id]
256
# We drop revision parents with different file-ids, because
257
# that represents a rename of the root to a different location
258
# - its not actually a parent for us. (We could look for that
259
# file id in the revision tree at considerably more expense,
260
# but for now this is sufficient (and reconcile will catch and
261
# correct this anyway).
262
# When a parent revision is a ghost, we guess that its root id
263
# was unchanged (rather than trimming it from the parent list).
264
parent_keys = tuple((root_id, parent) for parent in rev_parents
265
if parent != NULL_REVISION and
266
rev_id_to_root_id.get(parent, root_id) == root_id)
267
yield FulltextContentFactory(key, parent_keys, None, '')
268
return [('texts', yield_roots())]