62
58
# and add in all file versions
65
@deprecated_function(zero_eight)
66
62
def greedy_fetch(to_branch, from_branch, revision=None, pb=None):
67
"""Legacy API, please see branch.fetch(from_branch, last_revision, pb)."""
68
63
f = Fetcher(to_branch, from_branch, revision, pb)
69
64
return f.count_copied, f.failed_revisions
74
class RepoFetcher(object):
75
"""Pull revisions and texts from one repository to another.
78
if set, try to limit to the data this revision references.
68
class Fetcher(object):
69
"""Pull revisions and texts from one branch to another.
71
This doesn't update the destination's history; that can be done
72
separately if desired.
75
If set, pull only up to this revision_id.
79
last_revision -- if last_revision
80
is given it will be that, otherwise the last revision of
81
83
count_copied -- number of revisions copied
83
This should not be used directory, its essential a object to encapsulate
84
the logic in InterRepository.fetch().
85
count_weaves -- number of file weaves copied
86
def __init__(self, to_repository, from_repository, last_revision=None, pb=None):
87
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
88
if to_branch == from_branch:
89
raise Exception("can't fetch from a branch to itself")
90
self.to_branch = to_branch
91
self.to_weaves = to_branch.weave_store
92
self.to_control = to_branch.control_weaves
93
self.from_branch = from_branch
94
self.from_weaves = from_branch.weave_store
95
self.from_control = from_branch.control_weaves
88
96
self.failed_revisions = []
89
97
self.count_copied = 0
90
if to_repository.control_files._transport.base == from_repository.control_files._transport.base:
91
# check that last_revision is in 'from' and then return a no-operation.
92
if last_revision not in (None, NULL_REVISION):
93
from_repository.get_revision(last_revision)
95
self.to_repository = to_repository
96
self.from_repository = from_repository
97
# must not mutate self._last_revision as its potentially a shared instance
98
self._last_revision = last_revision
100
self.copied_file_ids = set()
100
self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
101
self.nested_pb = self.pb
102
self.pb = bzrlib.ui.ui_factory.progress_bar()
104
self.nested_pb = None
105
self.from_repository.lock_read()
107
self.to_repository.lock_write()
111
if self.nested_pb is not None:
112
self.nested_pb.finished()
113
self.to_repository.unlock()
115
self.from_repository.unlock()
118
"""Primary worker function.
120
This initialises all the needed variables, and then fetches the
121
requested revisions, finally clearing the progress bar.
123
self.to_weaves = self.to_repository.weave_store
124
self.to_control = self.to_repository.control_weaves
125
self.from_weaves = self.from_repository.weave_store
126
self.from_control = self.from_repository.control_weaves
128
self.file_ids_names = {}
129
pp = ProgressPhase('Fetch phase', 4, self.pb)
132
revs = self._revids_to_fetch()
136
self._fetch_weave_texts(revs)
138
self._fetch_inventory_weave(revs)
140
self._fetch_revision_texts(revs)
141
self.count_copied += len(revs)
145
def _revids_to_fetch(self):
146
mutter('fetch up to rev {%s}', self._last_revision)
147
if self._last_revision is NULL_REVISION:
148
# explicit limit of no revisions needed
150
if (self._last_revision != None and
151
self.to_repository.has_revision(self._last_revision)):
106
self.last_revision = self._find_last_revision(last_revision)
107
except NoSuchRevision, e:
108
mutter('failed getting last revision: %s', e)
109
raise InstallFailed([last_revision])
110
mutter('fetch up to rev {%s}', self.last_revision)
112
revs_to_fetch = self._compare_ancestries()
114
raise InstallFailed([self.last_revision])
115
self._copy_revisions(revs_to_fetch)
116
self.new_ancestry = revs_to_fetch
119
def _find_last_revision(self, last_revision):
120
"""Find the limiting source revision.
122
Every ancestor of that revision will be merged across.
124
Returns the revision_id, or returns None if there's no history
125
in the source branch."""
126
self.pb.update('get source history')
127
from_history = self.from_branch.revision_history()
128
self.pb.update('get destination history')
130
self.from_branch.get_revision(last_revision)
133
return from_history[-1]
135
return None # no history in the source branch
155
return self.to_repository.missing_revision_ids(self.from_repository,
157
except errors.NoSuchRevision:
158
raise InstallFailed([self._last_revision])
160
def _fetch_weave_texts(self, revs):
161
texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
163
file_ids = self.from_repository.fileids_altered_by_revision_ids(revs)
165
num_file_ids = len(file_ids)
166
for file_id, required_versions in file_ids.items():
167
texts_pb.update("fetch texts", count, num_file_ids)
169
to_weave = self.to_weaves.get_weave_or_empty(file_id,
170
self.to_repository.get_transaction())
171
from_weave = self.from_weaves.get_weave(file_id,
172
self.from_repository.get_transaction())
173
# we fetch all the texts, because texts do
174
# not reference anything, and its cheap enough
175
to_weave.join(from_weave, version_ids=required_versions)
176
# we don't need *all* of this data anymore, but we dont know
177
# what we do. This cache clearing will result in a new read
178
# of the knit data when we do the checkout, but probably we
179
# want to emit the needed data on the fly rather than at the
181
# the from weave should know not to cache data being joined,
182
# but its ok to ask it to clear.
183
from_weave.clear_cache()
184
to_weave.clear_cache()
188
def _fetch_inventory_weave(self, revs):
189
pb = bzrlib.ui.ui_factory.nested_progress_bar()
191
pb.update("fetch inventory", 0, 2)
192
to_weave = self.to_control.get_weave('inventory',
193
self.to_repository.get_transaction())
195
child_pb = bzrlib.ui.ui_factory.nested_progress_bar()
197
# just merge, this is optimisable and its means we don't
198
# copy unreferenced data such as not-needed inventories.
199
pb.update("fetch inventory", 1, 3)
200
from_weave = self.from_repository.get_inventory_weave()
201
pb.update("fetch inventory", 2, 3)
202
# we fetch only the referenced inventories because we do not
203
# know for unselected inventories whether all their required
204
# texts are present in the other repository - it could be
206
to_weave.join(from_weave, pb=child_pb, msg='merge inventory',
214
class GenericRepoFetcher(RepoFetcher):
215
"""This is a generic repo to repo fetcher.
217
This makes minimal assumptions about repo layout and contents.
218
It triggers a reconciliation after fetching to ensure integrity.
221
def _fetch_revision_texts(self, revs):
222
"""Fetch revision object texts"""
223
rev_pb = bzrlib.ui.ui_factory.nested_progress_bar()
225
to_txn = self.to_transaction = self.to_repository.get_transaction()
228
to_store = self.to_repository._revision_store
230
pb = bzrlib.ui.ui_factory.nested_progress_bar()
232
pb.update('copying revisions', count, total)
234
sig_text = self.from_repository.get_signature_text(rev)
235
to_store.add_revision_signature_text(rev, sig_text, to_txn)
236
except errors.NoSuchRevision:
239
to_store.add_revision(self.from_repository.get_revision(rev),
244
# fixup inventory if needed:
245
# this is expensive because we have no inverse index to current ghosts.
246
# but on local disk its a few seconds and sftp push is already insane.
248
# FIXME: repository should inform if this is needed.
249
self.to_repository.reconcile()
254
class KnitRepoFetcher(RepoFetcher):
255
"""This is a knit format repository specific fetcher.
257
This differs from the GenericRepoFetcher by not doing a
258
reconciliation after copying, and using knit joining to
262
def _fetch_revision_texts(self, revs):
263
# may need to be a InterRevisionStore call here.
264
from_transaction = self.from_repository.get_transaction()
265
to_transaction = self.to_repository.get_transaction()
266
to_sf = self.to_repository._revision_store.get_signature_file(
268
from_sf = self.from_repository._revision_store.get_signature_file(
270
to_sf.join(from_sf, version_ids=revs, ignore_missing=True)
271
to_rf = self.to_repository._revision_store.get_revision_file(
273
from_rf = self.from_repository._revision_store.get_revision_file(
275
to_rf.join(from_rf, version_ids=revs)
278
class Fetcher(object):
279
"""Backwards compatibility glue for branch.fetch()."""
281
@deprecated_method(zero_eight)
282
def __init__(self, to_branch, from_branch, last_revision=None, pb=None):
283
"""Please see branch.fetch()."""
284
to_branch.fetch(from_branch, last_revision, pb)
138
def _compare_ancestries(self):
139
"""Get a list of revisions that must be copied.
141
That is, every revision that's in the ancestry of the source
142
branch and not in the destination branch."""
143
self.pb.update('get source ancestry')
144
self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)
146
dest_last_rev = self.to_branch.last_revision()
147
self.pb.update('get destination ancestry')
149
dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)
152
ss = set(dest_ancestry)
154
for rev_id in self.from_ancestry:
156
to_fetch.append(rev_id)
157
mutter('need to get revision {%s}', rev_id)
158
mutter('need to get %d revisions in total', len(to_fetch))
159
self.count_total = len(to_fetch)
162
def _copy_revisions(self, revs_to_fetch):
164
for rev_id in revs_to_fetch:
168
if self.to_branch.has_revision(rev_id):
170
self.pb.update('fetch revision', i, self.count_total)
171
self._copy_one_revision(rev_id)
172
self.count_copied += 1
175
def _copy_one_revision(self, rev_id):
176
"""Copy revision and everything referenced by it."""
177
mutter('copying revision {%s}', rev_id)
178
rev_xml = self.from_branch.get_revision_xml(rev_id)
179
inv_xml = self.from_branch.get_inventory_xml(rev_id)
180
rev = serializer_v5.read_revision_from_string(rev_xml)
181
inv = serializer_v5.read_inventory_from_string(inv_xml)
182
assert rev.revision_id == rev_id
183
assert rev.inventory_sha1 == sha_string(inv_xml)
184
mutter(' commiter %s, %d parents',
187
self._copy_new_texts(rev_id, inv)
188
parents = rev.parent_ids
189
for parent in parents:
190
if not self.to_branch.has_revision(parent):
191
parents.pop(parents.index(parent))
192
self._copy_inventory(rev_id, inv_xml, parents)
193
self._copy_ancestry(rev_id, parents)
194
self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)
195
mutter('copied revision %s', rev_id)
198
def _copy_inventory(self, rev_id, inv_xml, parent_ids):
199
self.to_control.add_text('inventory', rev_id,
200
split_lines(inv_xml), parent_ids)
203
def _copy_ancestry(self, rev_id, parent_ids):
204
ancestry_lines = self.from_control.get_lines('ancestry', rev_id)
205
self.to_control.add_text('ancestry', rev_id, ancestry_lines,
209
def _copy_new_texts(self, rev_id, inv):
210
"""Copy any new texts occuring in this revision."""
211
# TODO: Rather than writing out weaves every time, hold them
212
# in memory until everything's done? But this way is nicer
213
# if it's interrupted.
214
for path, ie in inv.iter_entries():
215
if ie.revision != rev_id:
217
mutter('%s {%s} is changed in this revision',
219
self._copy_one_weave(rev_id, ie.file_id)
222
def _copy_one_weave(self, rev_id, file_id):
223
"""Copy one file weave."""
224
mutter('copy file {%s} modified in {%s}', file_id, rev_id)
225
if file_id in self.copied_file_ids:
226
mutter('file {%s} already copied', file_id)
228
from_weave = self.from_weaves.get_weave(file_id)
229
to_weave = self.to_weaves.get_weave_or_empty(file_id)
230
to_weave.join(from_weave)
231
self.to_weaves.put_weave(file_id, to_weave)
232
self.count_weaves += 1
233
self.copied_file_ids.add(file_id)
234
mutter('copied file {%s}', file_id)