19
19
# change upgrade from .bzr to create a '.bzr-new', then do a bait and switch.
22
# To make this properly useful
24
# 1. assign text version ids, and put those text versions into
25
# the inventory as they're converted.
27
# 2. keep track of the previous version of each file, rather than
28
# just using the last one imported
30
# 3. assign entry versions when files are added, renamed or moved.
32
# 4. when merged-in versions are observed, walk down through them
33
# to discover everything, then commit bottom-up
35
# 5. track ancestry as things are merged in, and commit that in each
38
# Perhaps it's best to first walk the whole graph and make a plan for
39
# what should be imported in what order? Need a kind of topological
40
# sort of all revisions. (Or do we, can we just before doing a revision
41
# see that all its parents have either been converted or abandoned?)
44
# Cannot import a revision until all its parents have been
45
# imported. in other words, we can only import revisions whose
46
# parents have all been imported. the first step must be to
47
# import a revision with no parents, of which there must be at
48
# least one. (So perhaps it's useful to store forward pointers
49
# from a list of parents to their children?)
51
# Another (equivalent?) approach is to build up the ordered
52
# ancestry list for the last revision, and walk through that. We
53
# are going to need that.
55
# We don't want to have to recurse all the way back down the list.
57
# Suppose we keep a queue of the revisions able to be processed at
58
# any point. This starts out with all the revisions having no
61
# This seems like a generally useful algorithm...
63
# The current algorithm is dumb (O(n**2)?) but will do the job, and
64
# takes less than a second on the bzr.dev branch.
66
# This currently does a kind of lazy conversion of file texts, where a
67
# new text is written in every version. That's unnecessary but for
68
# the moment saves us having to worry about when files need new
71
from cStringIO import StringIO
78
from bzrlib.branch import Branch
79
import bzrlib.bzrdir as bzrdir
80
from bzrlib.bzrdir import BzrDirFormat, BzrDirFormat4, BzrDirFormat5, BzrDirFormat6
22
from bzrlib.bzrdir import ConvertBzrDir4To5, ConvertBzrDir5To6, BzrDir, BzrDirFormat4, BzrDirFormat5
81
23
import bzrlib.errors as errors
82
from bzrlib.errors import NoSuchFile, UpgradeReadonly
83
import bzrlib.hashcache as hashcache
84
from bzrlib.lockable_files import LockableFiles
85
from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath
86
from bzrlib.ui import ui_factory
87
from bzrlib.store.text import TextStore
88
from bzrlib.store.weave import WeaveStore
89
from bzrlib.trace import mutter, note, warning
90
from bzrlib.transactions import PassThroughTransaction
91
24
from bzrlib.transport import get_transport
92
from bzrlib.transport.local import LocalTransport
93
from bzrlib.weave import Weave
94
from bzrlib.weavefile import read_weave, write_weave
95
from bzrlib.xml4 import serializer_v4
96
from bzrlib.xml5 import serializer_v5
25
import bzrlib.ui as ui
99
28
class Convert(object):
101
def __init__(self, transport):
102
self.base = transport.base
103
self.converted_revs = set()
104
self.absent_revisions = set()
107
self.transport = transport
108
if self.transport.is_readonly():
109
raise UpgradeReadonly
110
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'branch-lock')
111
# Lock the branch (soon to be meta dir) to prevent anyone racing with us
112
# This is currently windows incompatible, it will deadlock. When the upgrade
113
# logic becomes format specific, then we can have the format know how to pass this
114
# on. Also note that we probably have an 'upgrade meta' which upgrades the constituent
116
# FIXME: control files reuse
117
self.control_files.lock_write()
121
self.control_files.unlock()
30
def __init__(self, url, format):
32
self.bzrdir = BzrDir.open_unsupported(url)
33
if self.bzrdir.root_transport.is_readonly():
34
raise errors.UpgradeReadonly
35
self.transport = self.bzrdir.root_transport
123
38
def convert(self):
124
if not self._open_branch():
126
note('starting upgrade of %s', self.base)
39
self.pb = ui.ui_factory.progress_bar()
40
branch = self.bzrdir.open_branch()
41
if branch.bzrdir.root_transport.base != self.bzrdir.root_transport.base:
42
self.pb.note("This is a checkout. The branch (%s) needs to be "
43
"upgraded separately.",
44
branch.bzrdir.root_transport.base)
45
if not self.bzrdir.needs_format_conversion(self.format):
46
raise errors.UpToDateFormat(self.bzrdir._format)
47
if not self.bzrdir.can_convert_format():
48
raise errors.BzrError("cannot upgrade from branch format %s" %
50
self.pb.note('starting upgrade of %s', self.transport.base)
127
51
self._backup_control_dir()
128
self.pb = ui_factory.progress_bar()
129
if isinstance(self.old_format, BzrDirFormat4):
130
note('starting upgrade from format 4 to 5')
131
if isinstance(self.transport, LocalTransport):
132
self.bzrdir.get_workingtree_transport(None).delete('stat-cache')
133
self._convert_to_weaves()
134
if isinstance(self.old_format, BzrDirFormat5):
135
note('starting upgrade from format 5 to 6')
136
self._convert_to_prefixed()
139
def _convert_to_prefixed(self):
140
from bzrlib.store import hash_prefix
141
bzr_transport = self.transport.clone('.bzr')
142
bzr_transport.delete('branch-format')
143
for store_name in ["weaves", "revision-store"]:
144
note("adding prefixes to %s" % store_name)
145
store_transport = bzr_transport.clone(store_name)
146
for filename in store_transport.list_dir('.'):
147
if (filename.endswith(".weave") or
148
filename.endswith(".gz") or
149
filename.endswith(".sig")):
150
file_id = os.path.splitext(filename)[0]
153
prefix_dir = hash_prefix(file_id)
154
# FIXME keep track of the dirs made RBC 20060121
156
store_transport.move(filename, prefix_dir + '/' + filename)
157
except NoSuchFile: # catches missing dirs strangely enough
158
store_transport.mkdir(prefix_dir)
159
store_transport.move(filename, prefix_dir + '/' + filename)
160
self.old_format = BzrDirFormat6()
161
self._set_new_format(self.old_format.get_format_string())
162
self.bzrdir = self.old_format.open(self.transport)
163
self.branch = self.bzrdir.open_branch()
165
def _convert_to_weaves(self):
166
note('note: upgrade may be faster if all store files are ungzipped first')
167
bzr_transport = self.transport.clone('.bzr')
170
stat = bzr_transport.stat('weaves')
171
if not S_ISDIR(stat.st_mode):
172
bzr_transport.delete('weaves')
173
bzr_transport.mkdir('weaves')
175
bzr_transport.mkdir('weaves')
176
self.inv_weave = Weave('inventory')
177
# holds in-memory weaves for all files
178
self.text_weaves = {}
179
bzr_transport.delete('branch-format')
180
self._convert_working_inv()
181
rev_history = self.branch.revision_history()
182
# to_read is a stack holding the revisions we still need to process;
183
# appending to it adds new highest-priority revisions
184
self.known_revisions = set(rev_history)
185
self.to_read = rev_history[-1:]
187
rev_id = self.to_read.pop()
188
if (rev_id not in self.revisions
189
and rev_id not in self.absent_revisions):
190
self._load_one_rev(rev_id)
192
to_import = self._make_order()
193
for i, rev_id in enumerate(to_import):
194
self.pb.update('converting revision', i, len(to_import))
195
self._convert_one_rev(rev_id)
197
self._write_all_weaves()
198
self._write_all_revs()
199
note('upgraded to weaves:')
200
note(' %6d revisions and inventories' % len(self.revisions))
201
note(' %6d revisions not present' % len(self.absent_revisions))
202
note(' %6d texts' % self.text_count)
203
self._cleanup_spare_files_after_format4()
204
self.old_format = BzrDirFormat5()
205
self._set_new_format(self.old_format.get_format_string())
206
self.bzrdir = self.old_format.open(self.transport)
207
self.branch = self.bzrdir.open_branch()
209
def _open_branch(self):
210
self.old_format = BzrDirFormat.find_format(self.transport)
211
self.bzrdir = self.old_format.open(self.transport)
212
self.branch = self.bzrdir.open_branch()
213
if isinstance(self.old_format, BzrDirFormat6):
214
note('this branch is in the most current format (%s)', self.old_format)
216
if (not isinstance(self.old_format, BzrDirFormat4) and
217
not isinstance(self.old_format, BzrDirFormat5) and
218
not isinstance(self.old_format, bzrdir.BzrDirMetaFormat1)):
219
raise errors.BzrError("cannot upgrade from branch format %s" %
220
self.branch._branch_format)
223
def _set_new_format(self, format):
224
self.branch.control_files.put_utf8('branch-format', format)
226
def _cleanup_spare_files_after_format4(self):
227
transport = self.transport.clone('.bzr')
228
# FIXME working tree upgrade foo.
229
for n in 'merged-patches', 'pending-merged-patches':
231
## assert os.path.getsize(p) == 0
235
transport.delete_tree('inventory-store')
236
transport.delete_tree('text-store')
52
while self.bzrdir.needs_format_conversion(self.format):
53
converter = self.bzrdir._format.get_converter(self.format)
54
self.bzrdir = converter.convert(self.bzrdir, self.pb)
55
self.pb.note("finished")
238
57
def _backup_control_dir(self):
239
note('making backup of tree history')
58
self.pb.note('making backup of tree history')
240
59
self.transport.copy_tree('.bzr', '.bzr.backup')
241
note('%s.bzr has been backed up to %s.bzr.backup',
60
self.pb.note('%s.bzr has been backed up to %s.bzr.backup',
242
61
self.transport.base,
243
62
self.transport.base)
244
note('if conversion fails, you can move this directory back to .bzr')
245
note('if it succeeds, you can remove this directory if you wish')
247
def _convert_working_inv(self):
249
inv = serializer_v4.read_inventory(branch.control_files.get('inventory'))
250
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
251
# FIXME inventory is a working tree change.
252
branch.control_files.put('inventory', new_inv_xml)
254
def _write_all_weaves(self):
255
bzr_transport = self.transport.clone('.bzr')
256
controlweaves = WeaveStore(bzr_transport, prefixed=False)
257
weave_transport = bzr_transport.clone('weaves')
258
weaves = WeaveStore(weave_transport, prefixed=False)
259
transaction = PassThroughTransaction()
261
controlweaves.put_weave('inventory', self.inv_weave, transaction)
264
for file_id, file_weave in self.text_weaves.items():
265
self.pb.update('writing weave', i, len(self.text_weaves))
266
weaves.put_weave(file_id, file_weave, transaction)
271
def _write_all_revs(self):
272
"""Write all revisions out in new form."""
273
transport = self.transport.clone('.bzr')
274
transport.delete_tree('revision-store')
275
transport.mkdir('revision-store')
276
revision_transport = transport.clone('revision-store')
278
revision_store = TextStore(revision_transport,
282
for i, rev_id in enumerate(self.converted_revs):
283
self.pb.update('write revision', i, len(self.converted_revs))
285
serializer_v5.write_revision(self.revisions[rev_id], rev_tmp)
287
revision_store.add(rev_tmp, rev_id)
292
def _load_one_rev(self, rev_id):
293
"""Load a revision object into memory.
295
Any parents not either loaded or abandoned get queued to be
297
self.pb.update('loading revision',
299
len(self.known_revisions))
300
if not self.branch.repository.revision_store.has_id(rev_id):
302
note('revision {%s} not present in branch; '
303
'will be converted as a ghost',
305
self.absent_revisions.add(rev_id)
307
rev_xml = self.branch.repository.revision_store.get(rev_id).read()
308
rev = serializer_v4.read_revision_from_string(rev_xml)
309
for parent_id in rev.parent_ids:
310
self.known_revisions.add(parent_id)
311
self.to_read.append(parent_id)
312
self.revisions[rev_id] = rev
315
def _load_old_inventory(self, rev_id):
316
assert rev_id not in self.converted_revs
317
old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()
318
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
319
rev = self.revisions[rev_id]
320
if rev.inventory_sha1:
321
assert rev.inventory_sha1 == sha_string(old_inv_xml), \
322
'inventory sha mismatch for {%s}' % rev_id
326
def _load_updated_inventory(self, rev_id):
327
assert rev_id in self.converted_revs
328
inv_xml = self.inv_weave.get_text(rev_id)
329
inv = serializer_v5.read_inventory_from_string(inv_xml)
333
def _convert_one_rev(self, rev_id):
334
"""Convert revision and all referenced objects to new format."""
335
rev = self.revisions[rev_id]
336
inv = self._load_old_inventory(rev_id)
337
present_parents = [p for p in rev.parent_ids
338
if p not in self.absent_revisions]
339
self._convert_revision_contents(rev, inv, present_parents)
340
self._store_new_weave(rev, inv, present_parents)
341
self.converted_revs.add(rev_id)
344
def _store_new_weave(self, rev, inv, present_parents):
345
# the XML is now updated with text versions
349
if ie.kind == 'root_directory':
351
assert hasattr(ie, 'revision'), \
352
'no revision on {%s} in {%s}' % \
353
(file_id, rev.revision_id)
354
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
355
new_inv_sha1 = sha_string(new_inv_xml)
356
self.inv_weave.add(rev.revision_id,
358
new_inv_xml.splitlines(True),
360
rev.inventory_sha1 = new_inv_sha1
362
def _convert_revision_contents(self, rev, inv, present_parents):
363
"""Convert all the files within a revision.
365
Also upgrade the inventory to refer to the text revision ids."""
366
rev_id = rev.revision_id
367
mutter('converting texts of revision {%s}',
369
parent_invs = map(self._load_updated_inventory, present_parents)
372
self._convert_file_version(rev, ie, parent_invs)
374
def _convert_file_version(self, rev, ie, parent_invs):
375
"""Convert one version of one file.
377
The file needs to be added into the weave if it is a merge
378
of >=2 parents or if it's changed from its parent.
380
if ie.kind == 'root_directory':
383
rev_id = rev.revision_id
384
w = self.text_weaves.get(file_id)
387
self.text_weaves[file_id] = w
389
previous_entries = ie.find_previous_heads(parent_invs, w)
390
for old_revision in previous_entries:
391
# if this fails, its a ghost ?
392
assert old_revision in self.converted_revs
393
self.snapshot_ie(previous_entries, ie, w, rev_id)
395
assert getattr(ie, 'revision', None) is not None
397
def snapshot_ie(self, previous_revisions, ie, w, rev_id):
398
# TODO: convert this logic, which is ~= snapshot to
399
# a call to:. This needs the path figured out. rather than a work_tree
400
# a v4 revision_tree can be given, or something that looks enough like
401
# one to give the file content to the entry if it needs it.
402
# and we need something that looks like a weave store for snapshot to
404
#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))
405
if len(previous_revisions) == 1:
406
previous_ie = previous_revisions.values()[0]
407
if ie._unchanged(previous_ie):
408
ie.revision = previous_ie.revision
410
parent_indexes = map(w.lookup, previous_revisions)
412
text = self.branch.repository.text_store.get(ie.text_id)
413
file_lines = text.readlines()
414
assert sha_strings(file_lines) == ie.text_sha1
415
assert sum(map(len, file_lines)) == ie.text_size
416
w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)
419
w.add(rev_id, parent_indexes, [], None)
421
##mutter('import text {%s} of {%s}',
422
## ie.text_id, file_id)
424
def _make_order(self):
425
"""Return a suitable order for importing revisions.
427
The order must be such that an revision is imported after all
428
its (present) parents.
430
todo = set(self.revisions.keys())
431
done = self.absent_revisions.copy()
434
# scan through looking for a revision whose parents
436
for rev_id in sorted(list(todo)):
437
rev = self.revisions[rev_id]
438
parent_ids = set(rev.parent_ids)
439
if parent_ids.issubset(done):
440
# can take this one now
448
t = get_transport(url)
63
self.pb.note('if conversion fails, you can move this directory back to .bzr')
64
self.pb.note('if it succeeds, you can remove this directory if you wish')
66
def upgrade(url, format=None):
67
"""Upgrade to format, or the default bzrdir format if not supplied."""