13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
"""bzr upgrade logic."""
20
from bzrlib.bzrdir import BzrDir, format_registry
19
# change upgrade from .bzr to create a '.bzr-new', then do a bait and switch.
22
# To make this properly useful
24
# 1. assign text version ids, and put those text versions into
25
# the inventory as they're converted.
27
# 2. keep track of the previous version of each file, rather than
28
# just using the last one imported
30
# 3. assign entry versions when files are added, renamed or moved.
32
# 4. when merged-in versions are observed, walk down through them
33
# to discover everything, then commit bottom-up
35
# 5. track ancestry as things are merged in, and commit that in each
38
# Perhaps it's best to first walk the whole graph and make a plan for
39
# what should be imported in what order? Need a kind of topological
40
# sort of all revisions. (Or do we, can we just before doing a revision
41
# see that all its parents have either been converted or abandoned?)
44
# Cannot import a revision until all its parents have been
45
# imported. in other words, we can only import revisions whose
46
# parents have all been imported. the first step must be to
47
# import a revision with no parents, of which there must be at
48
# least one. (So perhaps it's useful to store forward pointers
49
# from a list of parents to their children?)
51
# Another (equivalent?) approach is to build up the ordered
52
# ancestry list for the last revision, and walk through that. We
53
# are going to need that.
55
# We don't want to have to recurse all the way back down the list.
57
# Suppose we keep a queue of the revisions able to be processed at
58
# any point. This starts out with all the revisions having no
61
# This seems like a generally useful algorithm...
63
# The current algorithm is dumb (O(n**2)?) but will do the job, and
64
# takes less than a second on the bzr.dev branch.
66
# This currently does a kind of lazy conversion of file texts, where a
67
# new text is written in every version. That's unnecessary but for
68
# the moment saves us having to worry about when files need new
71
from cStringIO import StringIO
78
from bzrlib.branch import Branch
79
import bzrlib.bzrdir as bzrdir
80
from bzrlib.bzrdir import BzrDirFormat, BzrDirFormat4, BzrDirFormat5, BzrDirFormat6
21
81
import bzrlib.errors as errors
22
from bzrlib.remote import RemoteBzrDir
23
import bzrlib.ui as ui
82
from bzrlib.errors import NoSuchFile, UpgradeReadonly
83
import bzrlib.hashcache as hashcache
84
from bzrlib.lockable_files import LockableFiles
85
from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath
86
from bzrlib.ui import ui_factory
87
from bzrlib.store.text import TextStore
88
from bzrlib.store.weave import WeaveStore
89
from bzrlib.trace import mutter, note, warning
90
from bzrlib.transactions import PassThroughTransaction
91
from bzrlib.transport import get_transport
92
from bzrlib.transport.local import LocalTransport
93
from bzrlib.weave import Weave
94
from bzrlib.weavefile import read_weave, write_weave
95
from bzrlib.xml4 import serializer_v4
96
from bzrlib.xml5 import serializer_v5
26
99
class Convert(object):
28
def __init__(self, url, format=None):
30
self.bzrdir = BzrDir.open_unsupported(url)
31
# XXX: Change to cleanup
32
warning_id = 'cross_format_fetch'
33
saved_warning = warning_id in ui.ui_factory.suppressed_warnings
34
if isinstance(self.bzrdir, RemoteBzrDir):
35
self.bzrdir._ensure_real()
36
self.bzrdir = self.bzrdir._real_bzrdir
37
if self.bzrdir.root_transport.is_readonly():
38
raise errors.UpgradeReadonly
39
self.transport = self.bzrdir.root_transport
40
ui.ui_factory.suppressed_warnings.add(warning_id)
101
def __init__(self, transport):
102
self.base = transport.base
103
self.converted_revs = set()
104
self.absent_revisions = set()
107
self.transport = transport
108
if self.transport.is_readonly():
109
raise UpgradeReadonly
110
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'branch-lock')
111
# Lock the branch (soon to be meta dir) to prevent anyone racing with us
112
# This is currently windows incompatible, it will deadlock. When the upgrade
113
# logic becomes format specific, then we can have the format know how to pass this
114
# on. Also note that we probably have an 'upgrade meta' which upgrades the constituent
116
print "FIXME: control files reuse"
117
self.control_files.lock_write()
45
ui.ui_factory.suppressed_warnings.remove(warning_id)
121
self.control_files.unlock()
47
123
def convert(self):
124
if not self._open_branch():
126
note('starting upgrade of %s', self.base)
127
self._backup_control_dir()
128
self.pb = ui_factory.progress_bar()
129
if isinstance(self.old_format, BzrDirFormat4):
130
note('starting upgrade from format 4 to 5')
131
self._convert_to_weaves()
132
if isinstance(self.old_format, BzrDirFormat5):
133
note('starting upgrade from format 5 to 6')
134
self._convert_to_prefixed()
135
if isinstance(self.transport, LocalTransport):
136
cache = hashcache.HashCache(abspath(self.base))
141
def _convert_to_prefixed(self):
142
from bzrlib.store import hash_prefix
143
bzr_transport = self.transport.clone('.bzr')
144
bzr_transport.delete('branch-format')
145
for store_name in ["weaves", "revision-store"]:
146
note("adding prefixes to %s" % store_name)
147
store_transport = bzr_transport.clone(store_name)
148
for filename in store_transport.list_dir('.'):
149
if (filename.endswith(".weave") or
150
filename.endswith(".gz") or
151
filename.endswith(".sig")):
152
file_id = os.path.splitext(filename)[0]
155
prefix_dir = hash_prefix(file_id)
156
# FIXME keep track of the dirs made RBC 20060121
158
store_transport.move(filename, prefix_dir + '/' + filename)
159
except NoSuchFile: # catches missing dirs strangely enough
160
store_transport.mkdir(prefix_dir)
161
store_transport.move(filename, prefix_dir + '/' + filename)
162
self.old_format = BzrDirFormat6()
163
self._set_new_format(self.old_format.get_format_string())
164
self.bzrdir = self.old_format.open(self.transport)
165
self.branch = self.bzrdir.open_branch()
167
def _convert_to_weaves(self):
168
note('note: upgrade may be faster if all store files are ungzipped first')
169
bzr_transport = self.transport.clone('.bzr')
49
branch = self.bzrdir.open_branch()
50
if branch.user_url != self.bzrdir.user_url:
51
ui.ui_factory.note("This is a checkout. The branch (%s) needs to be "
52
"upgraded separately." %
55
except (errors.NotBranchError, errors.IncompatibleRepositories):
56
# might not be a format we can open without upgrading; see e.g.
57
# https://bugs.launchpad.net/bzr/+bug/253891
59
if self.format is None:
172
stat = bzr_transport.stat('weaves')
173
if not S_ISDIR(stat.st_mode):
174
bzr_transport.delete('weaves')
175
bzr_transport.mkdir('weaves')
177
bzr_transport.mkdir('weaves')
178
self.inv_weave = Weave('inventory')
179
# holds in-memory weaves for all files
180
self.text_weaves = {}
181
bzr_transport.delete('branch-format')
182
self._convert_working_inv()
183
rev_history = self.branch.revision_history()
184
# to_read is a stack holding the revisions we still need to process;
185
# appending to it adds new highest-priority revisions
186
self.known_revisions = set(rev_history)
187
self.to_read = rev_history[-1:]
189
rev_id = self.to_read.pop()
190
if (rev_id not in self.revisions
191
and rev_id not in self.absent_revisions):
192
self._load_one_rev(rev_id)
194
to_import = self._make_order()
195
for i, rev_id in enumerate(to_import):
196
self.pb.update('converting revision', i, len(to_import))
197
self._convert_one_rev(rev_id)
199
self._write_all_weaves()
200
self._write_all_revs()
201
note('upgraded to weaves:')
202
note(' %6d revisions and inventories' % len(self.revisions))
203
note(' %6d revisions not present' % len(self.absent_revisions))
204
note(' %6d texts' % self.text_count)
205
self._cleanup_spare_files_after_format4()
206
self.old_format = BzrDirFormat5()
207
self._set_new_format(self.old_format.get_format_string())
208
self.bzrdir = self.old_format.open(self.transport)
209
self.branch = self.bzrdir.open_branch()
211
def _open_branch(self):
212
self.old_format = BzrDirFormat.find_format(self.transport)
213
self.bzrdir = self.old_format.open(self.transport)
214
self.branch = self.bzrdir.open_branch()
215
if isinstance(self.old_format, BzrDirFormat6):
216
note('this branch is in the most current format (%s)', self.old_format)
218
if (not isinstance(self.old_format, BzrDirFormat4) and
219
not isinstance(self.old_format, BzrDirFormat5) and
220
not isinstance(self.old_format, bzrdir.BzrDirMetaFormat1)):
221
raise errors.BzrError("cannot upgrade from branch format %s" %
222
self.branch._branch_format)
225
def _set_new_format(self, format):
226
self.branch.control_files.put_utf8('branch-format', format)
228
def _cleanup_spare_files_after_format4(self):
229
transport = self.transport.clone('.bzr')
230
print "FIXME working tree upgrade foo."
231
for n in 'merged-patches', 'pending-merged-patches':
61
rich_root = self.bzrdir.find_repository()._format.rich_root_data
62
except errors.NoRepositoryPresent:
63
rich_root = False # assume no rich roots
65
format_name = "default-rich-root"
67
format_name = "default"
68
format = format_registry.make_bzrdir(format_name)
71
if not self.bzrdir.needs_format_conversion(format):
72
raise errors.UpToDateFormat(self.bzrdir._format)
73
if not self.bzrdir.can_convert_format():
74
raise errors.BzrError("cannot upgrade from bzrdir format %s" %
76
self.bzrdir.check_conversion_target(format)
77
ui.ui_factory.note('starting upgrade of %s' % self.transport.base)
79
self.bzrdir.backup_bzrdir()
80
while self.bzrdir.needs_format_conversion(format):
81
converter = self.bzrdir._format.get_converter(format)
82
self.bzrdir = converter.convert(self.bzrdir, None)
83
ui.ui_factory.note("finished")
86
def upgrade(url, format=None):
87
"""Upgrade to format, or the default bzrdir format if not supplied."""
233
## assert os.path.getsize(p) == 0
237
transport.delete_tree('inventory-store')
238
transport.delete_tree('text-store')
240
def _backup_control_dir(self):
241
note('making backup of tree history')
242
self.transport.copy_tree('.bzr', '.bzr.backup')
243
note('%s.bzr has been backed up to %s.bzr.backup',
246
note('if conversion fails, you can move this directory back to .bzr')
247
note('if it succeeds, you can remove this directory if you wish')
249
def _convert_working_inv(self):
251
inv = serializer_v4.read_inventory(branch.control_files.get('inventory'))
252
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
253
print "fixme inventory is a working tree change."
254
branch.control_files.put('inventory', new_inv_xml)
256
def _write_all_weaves(self):
257
bzr_transport = self.transport.clone('.bzr')
258
controlweaves = WeaveStore(bzr_transport, prefixed=False)
259
weave_transport = bzr_transport.clone('weaves')
260
weaves = WeaveStore(weave_transport, prefixed=False)
261
transaction = PassThroughTransaction()
263
controlweaves.put_weave('inventory', self.inv_weave, transaction)
266
for file_id, file_weave in self.text_weaves.items():
267
self.pb.update('writing weave', i, len(self.text_weaves))
268
weaves.put_weave(file_id, file_weave, transaction)
273
def _write_all_revs(self):
274
"""Write all revisions out in new form."""
275
transport = self.transport.clone('.bzr')
276
transport.delete_tree('revision-store')
277
transport.mkdir('revision-store')
278
revision_transport = transport.clone('revision-store')
280
revision_store = TextStore(revision_transport,
284
for i, rev_id in enumerate(self.converted_revs):
285
self.pb.update('write revision', i, len(self.converted_revs))
287
serializer_v5.write_revision(self.revisions[rev_id], rev_tmp)
289
revision_store.add(rev_tmp, rev_id)
294
def _load_one_rev(self, rev_id):
295
"""Load a revision object into memory.
297
Any parents not either loaded or abandoned get queued to be
299
self.pb.update('loading revision',
301
len(self.known_revisions))
302
if not self.branch.repository.revision_store.has_id(rev_id):
304
note('revision {%s} not present in branch; '
305
'will be converted as a ghost',
307
self.absent_revisions.add(rev_id)
309
rev_xml = self.branch.repository.revision_store.get(rev_id).read()
310
rev = serializer_v4.read_revision_from_string(rev_xml)
311
for parent_id in rev.parent_ids:
312
self.known_revisions.add(parent_id)
313
self.to_read.append(parent_id)
314
self.revisions[rev_id] = rev
317
def _load_old_inventory(self, rev_id):
318
assert rev_id not in self.converted_revs
319
old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()
320
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
321
rev = self.revisions[rev_id]
322
if rev.inventory_sha1:
323
assert rev.inventory_sha1 == sha_string(old_inv_xml), \
324
'inventory sha mismatch for {%s}' % rev_id
328
def _load_updated_inventory(self, rev_id):
329
assert rev_id in self.converted_revs
330
inv_xml = self.inv_weave.get_text(rev_id)
331
inv = serializer_v5.read_inventory_from_string(inv_xml)
335
def _convert_one_rev(self, rev_id):
336
"""Convert revision and all referenced objects to new format."""
337
rev = self.revisions[rev_id]
338
inv = self._load_old_inventory(rev_id)
339
present_parents = [p for p in rev.parent_ids
340
if p not in self.absent_revisions]
341
self._convert_revision_contents(rev, inv, present_parents)
342
self._store_new_weave(rev, inv, present_parents)
343
self.converted_revs.add(rev_id)
346
def _store_new_weave(self, rev, inv, present_parents):
347
# the XML is now updated with text versions
351
if ie.kind == 'root_directory':
353
assert hasattr(ie, 'revision'), \
354
'no revision on {%s} in {%s}' % \
355
(file_id, rev.revision_id)
356
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
357
new_inv_sha1 = sha_string(new_inv_xml)
358
self.inv_weave.add(rev.revision_id,
360
new_inv_xml.splitlines(True),
362
rev.inventory_sha1 = new_inv_sha1
364
def _convert_revision_contents(self, rev, inv, present_parents):
365
"""Convert all the files within a revision.
367
Also upgrade the inventory to refer to the text revision ids."""
368
rev_id = rev.revision_id
369
mutter('converting texts of revision {%s}',
371
parent_invs = map(self._load_updated_inventory, present_parents)
374
self._convert_file_version(rev, ie, parent_invs)
376
def _convert_file_version(self, rev, ie, parent_invs):
377
"""Convert one version of one file.
379
The file needs to be added into the weave if it is a merge
380
of >=2 parents or if it's changed from its parent.
382
if ie.kind == 'root_directory':
385
rev_id = rev.revision_id
386
w = self.text_weaves.get(file_id)
389
self.text_weaves[file_id] = w
391
previous_entries = ie.find_previous_heads(parent_invs, w)
392
for old_revision in previous_entries:
393
# if this fails, its a ghost ?
394
assert old_revision in self.converted_revs
395
self.snapshot_ie(previous_entries, ie, w, rev_id)
397
assert getattr(ie, 'revision', None) is not None
399
def snapshot_ie(self, previous_revisions, ie, w, rev_id):
400
# TODO: convert this logic, which is ~= snapshot to
401
# a call to:. This needs the path figured out. rather than a work_tree
402
# a v4 revision_tree can be given, or something that looks enough like
403
# one to give the file content to the entry if it needs it.
404
# and we need something that looks like a weave store for snapshot to
406
#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))
407
if len(previous_revisions) == 1:
408
previous_ie = previous_revisions.values()[0]
409
if ie._unchanged(previous_ie):
410
ie.revision = previous_ie.revision
412
parent_indexes = map(w.lookup, previous_revisions)
414
text = self.branch.repository.text_store.get(ie.text_id)
415
file_lines = text.readlines()
416
assert sha_strings(file_lines) == ie.text_sha1
417
assert sum(map(len, file_lines)) == ie.text_size
418
w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)
421
w.add(rev_id, parent_indexes, [], None)
423
##mutter('import text {%s} of {%s}',
424
## ie.text_id, file_id)
426
def _make_order(self):
427
"""Return a suitable order for importing revisions.
429
The order must be such that an revision is imported after all
430
its (present) parents.
432
todo = set(self.revisions.keys())
433
done = self.absent_revisions.copy()
436
# scan through looking for a revision whose parents
438
for rev_id in sorted(list(todo)):
439
rev = self.revisions[rev_id]
440
parent_ids = set(rev.parent_ids)
441
if parent_ids.issubset(done):
442
# can take this one now
450
t = get_transport(url)