58
59
# and add in all file versions
62
@deprecated_function(zero_eight)
62
63
def greedy_fetch(to_branch, from_branch, revision=None, pb=None):
64
"""Legacy API, please see branch.fetch(from_branch, last_revision, pb)."""
63
65
f = Fetcher(to_branch, from_branch, revision, pb)
64
66
return f.count_copied, f.failed_revisions
68
class Fetcher(object):
69
"""Pull revisions and texts from one branch to another.
71
This doesn't update the destination's history; that can be done
72
separately if desired.
75
If set, pull only up to this revision_id.
79
last_revision -- if last_revision
80
is given it will be that, otherwise the last revision of
71
class RepoFetcher(object):
72
"""Pull revisions and texts from one repository to another.
75
if set, try to limit to the data this revision references.
83
78
count_copied -- number of revisions copied
85
count_weaves -- number of file weaves copied
80
This should not be used directory, its essential a object to encapsulate
81
the logic in InterRepository.fetch().
87
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
88
if to_branch == from_branch:
89
raise Exception("can't fetch from a branch to itself")
90
self.to_branch = to_branch
91
self.to_weaves = to_branch.weave_store
92
self.to_control = to_branch.control_weaves
93
self.from_branch = from_branch
94
self.from_weaves = from_branch.weave_store
95
self.from_control = from_branch.control_weaves
83
def __init__(self, to_repository, from_repository, last_revision=None, pb=None):
96
85
self.failed_revisions = []
97
86
self.count_copied = 0
100
self.copied_file_ids = set()
87
if to_repository.control_files._transport.base == from_repository.control_files._transport.base:
88
# check that last_revision is in 'from' and then return a no-operation.
89
if last_revision not in (None, NULL_REVISION):
90
from_repository.get_revision(last_revision)
92
self.to_repository = to_repository
93
self.from_repository = from_repository
94
# must not mutate self._last_revision as its potentially a shared instance
95
self._last_revision = last_revision
102
self.pb = bzrlib.ui.ui_factory.progress_bar()
97
self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
98
self.nested_pb = self.pb
106
self.last_revision = self._find_last_revision(last_revision)
107
except NoSuchRevision, e:
108
mutter('failed getting last revision: %s', e)
109
raise InstallFailed([last_revision])
110
mutter('fetch up to rev {%s}', self.last_revision)
112
revs_to_fetch = self._compare_ancestries()
114
raise InstallFailed([self.last_revision])
115
self._copy_revisions(revs_to_fetch)
116
self.new_ancestry = revs_to_fetch
119
def _find_last_revision(self, last_revision):
120
"""Find the limiting source revision.
122
Every ancestor of that revision will be merged across.
124
Returns the revision_id, or returns None if there's no history
125
in the source branch."""
126
self.pb.update('get source history')
127
from_history = self.from_branch.revision_history()
128
self.pb.update('get destination history')
130
self.from_branch.get_revision(last_revision)
133
return from_history[-1]
135
return None # no history in the source branch
101
self.nested_pb = None
102
self.from_repository.lock_read()
104
self.to_repository.lock_write()
108
if self.nested_pb is not None:
109
self.nested_pb.finished()
110
self.to_repository.unlock()
112
self.from_repository.unlock()
115
"""Primary worker function.
117
This initialises all the needed variables, and then fetches the
118
requested revisions, finally clearing the progress bar.
120
self.to_weaves = self.to_repository.weave_store
121
self.to_control = self.to_repository.control_weaves
122
self.from_weaves = self.from_repository.weave_store
123
self.from_control = self.from_repository.control_weaves
125
self.file_ids_names = {}
128
revs = self._revids_to_fetch()
131
self.pb.update('Fetching text', 1, self.total_steps)
132
self._fetch_weave_texts(revs)
133
self.pb.update('Fetching inventories', 2, self.total_steps)
134
self._fetch_inventory_weave(revs)
135
self.pb.update('Fetching revisions', 3, self.total_steps)
136
self._fetch_revision_texts(revs)
137
self.pb.update('Fetching revisions', 4, self.total_steps)
138
self.count_copied += len(revs)
142
def _revids_to_fetch(self):
143
self.pb.update('Calculating needed data', 0, self.total_steps)
144
mutter('fetch up to rev {%s}', self._last_revision)
145
if self._last_revision is NULL_REVISION:
146
# explicit limit of no revisions needed
148
if (self._last_revision != None and
149
self.to_repository.has_revision(self._last_revision)):
138
def _compare_ancestries(self):
139
"""Get a list of revisions that must be copied.
141
That is, every revision that's in the ancestry of the source
142
branch and not in the destination branch."""
143
self.pb.update('get source ancestry')
144
self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)
146
dest_last_rev = self.to_branch.last_revision()
147
self.pb.update('get destination ancestry')
149
dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)
152
ss = set(dest_ancestry)
154
for rev_id in self.from_ancestry:
156
to_fetch.append(rev_id)
157
mutter('need to get revision {%s}', rev_id)
158
mutter('need to get %d revisions in total', len(to_fetch))
159
self.count_total = len(to_fetch)
162
def _copy_revisions(self, revs_to_fetch):
164
for rev_id in revs_to_fetch:
168
if self.to_branch.has_revision(rev_id):
170
self.pb.update('fetch revision', i, self.count_total)
171
self._copy_one_revision(rev_id)
172
self.count_copied += 1
175
def _copy_one_revision(self, rev_id):
176
"""Copy revision and everything referenced by it."""
177
mutter('copying revision {%s}', rev_id)
178
rev_xml = self.from_branch.get_revision_xml(rev_id)
179
inv_xml = self.from_branch.get_inventory_xml(rev_id)
180
rev = serializer_v5.read_revision_from_string(rev_xml)
181
inv = serializer_v5.read_inventory_from_string(inv_xml)
182
assert rev.revision_id == rev_id
183
assert rev.inventory_sha1 == sha_string(inv_xml)
184
mutter(' commiter %s, %d parents',
187
self._copy_new_texts(rev_id, inv)
188
parents = rev.parent_ids
189
for parent in parents:
190
if not self.to_branch.has_revision(parent):
191
parents.pop(parents.index(parent))
192
self._copy_inventory(rev_id, inv_xml, parents)
193
self._copy_ancestry(rev_id, parents)
194
self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)
195
mutter('copied revision %s', rev_id)
198
def _copy_inventory(self, rev_id, inv_xml, parent_ids):
199
self.to_control.add_text('inventory', rev_id,
200
split_lines(inv_xml), parent_ids)
203
def _copy_ancestry(self, rev_id, parent_ids):
204
ancestry_lines = self.from_control.get_lines('ancestry', rev_id)
205
self.to_control.add_text('ancestry', rev_id, ancestry_lines,
209
def _copy_new_texts(self, rev_id, inv):
210
"""Copy any new texts occuring in this revision."""
211
# TODO: Rather than writing out weaves every time, hold them
212
# in memory until everything's done? But this way is nicer
213
# if it's interrupted.
214
for path, ie in inv.iter_entries():
215
if ie.revision != rev_id:
217
mutter('%s {%s} is changed in this revision',
219
self._copy_one_weave(rev_id, ie.file_id)
222
def _copy_one_weave(self, rev_id, file_id):
223
"""Copy one file weave."""
224
mutter('copy file {%s} modified in {%s}', file_id, rev_id)
225
if file_id in self.copied_file_ids:
226
mutter('file {%s} already copied', file_id)
228
from_weave = self.from_weaves.get_weave(file_id)
229
to_weave = self.to_weaves.get_weave_or_empty(file_id)
230
to_weave.join(from_weave)
231
self.to_weaves.put_weave(file_id, to_weave)
232
self.count_weaves += 1
233
self.copied_file_ids.add(file_id)
234
mutter('copied file {%s}', file_id)
153
return self.to_repository.missing_revision_ids(self.from_repository,
155
except errors.NoSuchRevision:
156
raise InstallFailed([self._last_revision])
158
def _fetch_weave_texts(self, revs):
159
texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
161
file_ids = self.from_repository.fileid_involved_by_set(revs)
163
num_file_ids = len(file_ids)
164
for file_id in file_ids:
165
texts_pb.update("fetch texts", count, num_file_ids)
167
to_weave = self.to_weaves.get_weave_or_empty(file_id,
168
self.to_repository.get_transaction())
170
if to_weave.num_versions() > 0:
171
# destination has contents, must merge
172
from_weave = self.from_weaves.get_weave(file_id,
173
self.from_repository.get_transaction())
174
# we fetch all the texts, because texts do
175
# not reference anything, and its cheap enough
176
to_weave.join(from_weave)
178
# destination is empty, just copy it.
179
# this copies all the texts, which is useful and
180
# on per-file basis quite cheap.
181
self.to_weaves.copy_multi(
185
self.from_repository.get_transaction(),
186
self.to_repository.get_transaction())
190
def _fetch_inventory_weave(self, revs):
191
inv_pb = bzrlib.ui.ui_factory.nested_progress_bar()
193
inv_pb.update("fetch inventory", 0, 2)
194
to_weave = self.to_control.get_weave('inventory',
195
self.to_repository.get_transaction())
197
# just merge, this is optimisable and its means we dont
198
# copy unreferenced data such as not-needed inventories.
199
self.pb.update("fetch inventory", 1, 2)
200
from_weave = self.from_repository.get_inventory_weave()
201
self.pb.update("fetch inventory", 2, 2)
202
# we fetch only the referenced inventories because we do not
203
# know for unselected inventories whether all their required
204
# texts are present in the other repository - it could be
206
to_weave.join(from_weave, msg='fetch inventory', version_ids=revs)
211
class GenericRepoFetcher(RepoFetcher):
212
"""This is a generic repo to repo fetcher.
214
This makes minimal assumptions about repo layout and contents.
215
It triggers a reconciliation after fetching to ensure integrity.
218
def _fetch_revision_texts(self, revs):
219
rev_pb = bzrlib.ui.ui_factory.nested_progress_bar()
221
self.to_transaction = self.to_repository.get_transaction()
225
rev_pb.update('fetch revisions', count, total)
227
sig_text = self.from_repository.get_signature_text(rev)
228
self.to_repository._revision_store.add_revision_signature_text(
229
rev, sig_text, self.to_transaction)
230
except errors.NoSuchRevision:
233
self.to_repository._revision_store.add_revision(
234
self.from_repository.get_revision(rev),
237
rev_pb.update('copying revisions', count, total)
238
# fixup inventory if needed:
239
# this is expensive because we have no inverse index to current ghosts.
240
# but on local disk its a few seconds and sftp push is already insane.
242
# FIXME: repository should inform if this is needed.
243
self.to_repository.reconcile()
248
class KnitRepoFetcher(RepoFetcher):
249
"""This is a knit format repository specific fetcher.
251
This differs from the GenericRepoFetcher by not doing a
252
reconciliation after copying, and using knit joining to
256
def _fetch_revision_texts(self, revs):
257
# may need to be a InterRevisionStore call here.
258
from_transaction = self.from_repository.get_transaction()
259
to_transaction = self.to_repository.get_transaction()
260
to_sf = self.to_repository._revision_store.get_signature_file(
262
from_sf = self.from_repository._revision_store.get_signature_file(
264
to_sf.join(from_sf, version_ids=revs, ignore_missing=True)
265
to_rf = self.to_repository._revision_store.get_revision_file(
267
from_rf = self.from_repository._revision_store.get_revision_file(
269
to_rf.join(from_rf, version_ids=revs)
272
class Fetcher(object):
273
"""Backwards compatability glue for branch.fetch()."""
275
@deprecated_method(zero_eight)
276
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
277
"""Please see branch.fetch()."""
278
to_branch.fetch(from_branch, last_revision, pb)