14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""bzr upgrade logic."""
19
# change upgrade from .bzr to create a '.bzr-new', then do a bait and switch.
22
from bzrlib.bzrdir import ConvertBzrDir4To5, ConvertBzrDir5To6, BzrDir, BzrDirFormat4, BzrDirFormat5
23
import bzrlib.errors as errors
17
"""Experiment in converting existing bzr branches to weaves."""
19
# To make this properly useful
21
# 1. assign text version ids, and put those text versions into
22
# the inventory as they're converted.
24
# 2. keep track of the previous version of each file, rather than
25
# just using the last one imported
27
# 3. assign entry versions when files are added, renamed or moved.
29
# 4. when merged-in versions are observed, walk down through them
30
# to discover everything, then commit bottom-up
32
# 5. track ancestry as things are merged in, and commit that in each
35
# Perhaps it's best to first walk the whole graph and make a plan for
36
# what should be imported in what order? Need a kind of topological
37
# sort of all revisions. (Or do we, can we just before doing a revision
38
# see that all its parents have either been converted or abandoned?)
41
# Cannot import a revision until all its parents have been
42
# imported. in other words, we can only import revisions whose
43
# parents have all been imported. the first step must be to
44
# import a revision with no parents, of which there must be at
45
# least one. (So perhaps it's useful to store forward pointers
46
# from a list of parents to their children?)
48
# Another (equivalent?) approach is to build up the ordered
49
# ancestry list for the last revision, and walk through that. We
50
# are going to need that.
52
# We don't want to have to recurse all the way back down the list.
54
# Suppose we keep a queue of the revisions able to be processed at
55
# any point. This starts out with all the revisions having no
58
# This seems like a generally useful algorithm...
60
# The current algorithm is dumb (O(n**2)?) but will do the job, and
61
# takes less than a second on the bzr.dev branch.
63
# This currently does a kind of lazy conversion of file texts, where a
64
# new text is written in every version. That's unnecessary but for
65
# the moment saves us having to worry about when files need new
68
from cStringIO import StringIO
75
from bzrlib.branch import Branch
76
from bzrlib.branch import BZR_BRANCH_FORMAT_5, BZR_BRANCH_FORMAT_6
77
from bzrlib.branch import BzrBranchFormat, BzrBranchFormat4, BzrBranchFormat5, BzrBranchFormat6
78
from bzrlib.errors import NoSuchFile, UpgradeReadonly
79
import bzrlib.hashcache as hashcache
80
from bzrlib.lockable_files import LockableFiles
81
from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath
82
from bzrlib.ui import ui_factory
83
from bzrlib.store.text import TextStore
84
from bzrlib.store.weave import WeaveStore
85
from bzrlib.trace import mutter, note, warning
86
from bzrlib.transactions import PassThroughTransaction
24
87
from bzrlib.transport import get_transport
25
import bzrlib.ui as ui
88
from bzrlib.transport.local import LocalTransport
89
from bzrlib.weave import Weave
90
from bzrlib.weavefile import read_weave, write_weave
91
from bzrlib.xml4 import serializer_v4
92
from bzrlib.xml5 import serializer_v5
28
95
class Convert(object):
30
def __init__(self, url, format):
32
self.bzrdir = BzrDir.open_unsupported(url)
33
if self.bzrdir.root_transport.is_readonly():
34
raise errors.UpgradeReadonly
35
self.transport = self.bzrdir.root_transport
36
self.pb = ui.ui_factory.nested_progress_bar()
97
def __init__(self, transport):
98
self.base = transport.base
99
self.converted_revs = set()
100
self.absent_revisions = set()
103
self.transport = transport
104
if self.transport.is_readonly():
105
raise UpgradeReadonly
106
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'branch-lock')
107
# Lock the branch (soon to be meta dir) to prevent anyone racing with us
108
# This is currently windows incompatible, it will deadlock. When the upgrade
109
# logic becomes format specific, then we can have the format know how to pass this
110
# on. Also note that we probably have an 'upgrade meta' which upgrades the constituent
112
print "FIXME: control files reuse"
113
self.control_files.lock_write()
117
self.control_files.unlock()
42
119
def convert(self):
44
branch = self.bzrdir.open_branch()
45
if branch.bzrdir.root_transport.base != \
46
self.bzrdir.root_transport.base:
47
self.pb.note("This is a checkout. The branch (%s) needs to be "
48
"upgraded separately.",
49
branch.bzrdir.root_transport.base)
50
except errors.NotBranchError:
52
if not self.bzrdir.needs_format_conversion(self.format):
53
raise errors.UpToDateFormat(self.bzrdir._format)
54
if not self.bzrdir.can_convert_format():
55
raise errors.BzrError("cannot upgrade from branch format %s" %
57
self.pb.note('starting upgrade of %s', self.transport.base)
120
if not self._open_branch():
122
note('starting upgrade of %s', self.base)
58
123
self._backup_control_dir()
59
while self.bzrdir.needs_format_conversion(self.format):
60
converter = self.bzrdir._format.get_converter(self.format)
61
self.bzrdir = converter.convert(self.bzrdir, self.pb)
62
self.pb.note("finished")
124
self.pb = ui_factory.progress_bar()
125
if isinstance(self.old_format, BzrBranchFormat4):
126
note('starting upgrade from format 4 to 5')
127
self._convert_to_weaves()
128
if isinstance(self.old_format, BzrBranchFormat5):
129
note('starting upgrade from format 5 to 6')
130
self._convert_to_prefixed()
131
if isinstance(self.transport, LocalTransport):
132
cache = hashcache.HashCache(abspath(self.base))
137
def _convert_to_prefixed(self):
138
from bzrlib.store import hash_prefix
139
bzr_transport = self.transport.clone('.bzr')
140
bzr_transport.delete('branch-format')
141
for store_name in ["weaves", "revision-store"]:
142
note("adding prefixes to %s" % store_name)
143
store_transport = bzr_transport.clone(store_name)
144
for filename in store_transport.list_dir('.'):
145
if filename.endswith(".weave") or filename.endswith(".gz"):
146
file_id = os.path.splitext(filename)[0]
149
prefix_dir = hash_prefix(file_id)
150
# FIXME keep track of the dirs made RBC 20060121
152
store_transport.move(filename, prefix_dir + '/' + filename)
153
except NoSuchFile: # catches missing dirs strangely enough
154
store_transport.mkdir(prefix_dir)
155
store_transport.move(filename, prefix_dir + '/' + filename)
156
self._set_new_format(BZR_BRANCH_FORMAT_6)
157
self.branch = BzrBranchFormat6().open(self.transport)
158
self.old_format = self.branch._branch_format
160
def _convert_to_weaves(self):
161
note('note: upgrade may be faster if all store files are ungzipped first')
162
bzr_transport = self.transport.clone('.bzr')
165
stat = bzr_transport.stat('weaves')
166
if not S_ISDIR(stat.st_mode):
167
bzr_transport.delete('weaves')
168
bzr_transport.mkdir('weaves')
170
bzr_transport.mkdir('weaves')
171
self.inv_weave = Weave('inventory')
172
# holds in-memory weaves for all files
173
self.text_weaves = {}
174
bzr_transport.delete('branch-format')
175
self._convert_working_inv()
176
rev_history = self.branch.revision_history()
177
# to_read is a stack holding the revisions we still need to process;
178
# appending to it adds new highest-priority revisions
179
self.known_revisions = set(rev_history)
180
self.to_read = rev_history[-1:]
182
rev_id = self.to_read.pop()
183
if (rev_id not in self.revisions
184
and rev_id not in self.absent_revisions):
185
self._load_one_rev(rev_id)
187
to_import = self._make_order()
188
for i, rev_id in enumerate(to_import):
189
self.pb.update('converting revision', i, len(to_import))
190
self._convert_one_rev(rev_id)
192
self._write_all_weaves()
193
self._write_all_revs()
194
note('upgraded to weaves:')
195
note(' %6d revisions and inventories' % len(self.revisions))
196
note(' %6d revisions not present' % len(self.absent_revisions))
197
note(' %6d texts' % self.text_count)
198
self._cleanup_spare_files_after_format4()
199
self._set_new_format(BZR_BRANCH_FORMAT_5)
200
self.branch = BzrBranchFormat5().open(self.transport)
201
self.old_format = self.branch._branch_format
203
def _open_branch(self):
204
self.old_format = BzrBranchFormat.find_format(self.transport)
205
self.branch = self.old_format.open(self.transport)
206
if isinstance(self.old_format, BzrBranchFormat6):
207
note('this branch is in the most current format (%s)', self.old_format)
209
if (not isinstance(self.old_format, BzrBranchFormat4) and
210
not isinstance(self.old_format, BzrBranchFormat5)):
211
raise BzrError("cannot upgrade from branch format %s" %
212
self.branch._branch_format)
215
def _set_new_format(self, format):
216
self.branch.control_files.put_utf8('branch-format', format)
218
def _cleanup_spare_files_after_format4(self):
219
transport = self.transport.clone('.bzr')
220
print "FIXME working tree upgrade foo."
221
for n in 'merged-patches', 'pending-merged-patches':
223
## assert os.path.getsize(p) == 0
227
transport.delete_tree('inventory-store')
228
transport.delete_tree('text-store')
64
230
def _backup_control_dir(self):
65
self.pb.note('making backup of tree history')
231
note('making backup of tree history')
66
232
self.transport.copy_tree('.bzr', '.bzr.backup')
67
self.pb.note('%s.bzr has been backed up to %s.bzr.backup',
233
note('%s.bzr has been backed up to %s.bzr.backup',
68
234
self.transport.base,
69
235
self.transport.base)
70
self.pb.note('if conversion fails, you can move this directory back to .bzr')
71
self.pb.note('if it succeeds, you can remove this directory if you wish')
73
def upgrade(url, format=None):
74
"""Upgrade to format, or the default bzrdir format if not supplied."""
236
note('if conversion fails, you can move this directory back to .bzr')
237
note('if it succeeds, you can remove this directory if you wish')
239
def _convert_working_inv(self):
241
inv = serializer_v4.read_inventory(branch.control_files.get('inventory'))
242
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
243
print "fixme inventory is a working tree change."
244
branch.control_files.put('inventory', new_inv_xml)
246
def _write_all_weaves(self):
247
bzr_transport = self.transport.clone('.bzr')
248
controlweaves = WeaveStore(bzr_transport, prefixed=False)
249
weave_transport = bzr_transport.clone('weaves')
250
weaves = WeaveStore(weave_transport, prefixed=False)
251
transaction = PassThroughTransaction()
253
controlweaves.put_weave('inventory', self.inv_weave, transaction)
256
for file_id, file_weave in self.text_weaves.items():
257
self.pb.update('writing weave', i, len(self.text_weaves))
258
weaves.put_weave(file_id, file_weave, transaction)
263
def _write_all_revs(self):
264
"""Write all revisions out in new form."""
265
transport = self.transport.clone('.bzr')
266
transport.delete_tree('revision-store')
267
transport.mkdir('revision-store')
268
revision_transport = transport.clone('revision-store')
270
revision_store = TextStore(revision_transport,
274
for i, rev_id in enumerate(self.converted_revs):
275
self.pb.update('write revision', i, len(self.converted_revs))
277
serializer_v5.write_revision(self.revisions[rev_id], rev_tmp)
279
revision_store.add(rev_tmp, rev_id)
284
def _load_one_rev(self, rev_id):
285
"""Load a revision object into memory.
287
Any parents not either loaded or abandoned get queued to be
289
self.pb.update('loading revision',
291
len(self.known_revisions))
292
if not self.branch.repository.revision_store.has_id(rev_id):
294
note('revision {%s} not present in branch; '
295
'will be converted as a ghost',
297
self.absent_revisions.add(rev_id)
299
rev_xml = self.branch.repository.revision_store.get(rev_id).read()
300
rev = serializer_v4.read_revision_from_string(rev_xml)
301
for parent_id in rev.parent_ids:
302
self.known_revisions.add(parent_id)
303
self.to_read.append(parent_id)
304
self.revisions[rev_id] = rev
307
def _load_old_inventory(self, rev_id):
308
assert rev_id not in self.converted_revs
309
old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()
310
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
311
rev = self.revisions[rev_id]
312
if rev.inventory_sha1:
313
assert rev.inventory_sha1 == sha_string(old_inv_xml), \
314
'inventory sha mismatch for {%s}' % rev_id
318
def _load_updated_inventory(self, rev_id):
319
assert rev_id in self.converted_revs
320
inv_xml = self.inv_weave.get_text(rev_id)
321
inv = serializer_v5.read_inventory_from_string(inv_xml)
325
def _convert_one_rev(self, rev_id):
326
"""Convert revision and all referenced objects to new format."""
327
rev = self.revisions[rev_id]
328
inv = self._load_old_inventory(rev_id)
329
present_parents = [p for p in rev.parent_ids
330
if p not in self.absent_revisions]
331
self._convert_revision_contents(rev, inv, present_parents)
332
self._store_new_weave(rev, inv, present_parents)
333
self.converted_revs.add(rev_id)
336
def _store_new_weave(self, rev, inv, present_parents):
337
# the XML is now updated with text versions
341
if ie.kind == 'root_directory':
343
assert hasattr(ie, 'revision'), \
344
'no revision on {%s} in {%s}' % \
345
(file_id, rev.revision_id)
346
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
347
new_inv_sha1 = sha_string(new_inv_xml)
348
self.inv_weave.add(rev.revision_id,
350
new_inv_xml.splitlines(True),
352
rev.inventory_sha1 = new_inv_sha1
354
def _convert_revision_contents(self, rev, inv, present_parents):
355
"""Convert all the files within a revision.
357
Also upgrade the inventory to refer to the text revision ids."""
358
rev_id = rev.revision_id
359
mutter('converting texts of revision {%s}',
361
parent_invs = map(self._load_updated_inventory, present_parents)
364
self._convert_file_version(rev, ie, parent_invs)
366
def _convert_file_version(self, rev, ie, parent_invs):
367
"""Convert one version of one file.
369
The file needs to be added into the weave if it is a merge
370
of >=2 parents or if it's changed from its parent.
372
if ie.kind == 'root_directory':
375
rev_id = rev.revision_id
376
w = self.text_weaves.get(file_id)
379
self.text_weaves[file_id] = w
381
previous_entries = ie.find_previous_heads(parent_invs, w)
382
for old_revision in previous_entries:
383
# if this fails, its a ghost ?
384
assert old_revision in self.converted_revs
385
self.snapshot_ie(previous_entries, ie, w, rev_id)
387
assert getattr(ie, 'revision', None) is not None
389
def snapshot_ie(self, previous_revisions, ie, w, rev_id):
390
# TODO: convert this logic, which is ~= snapshot to
391
# a call to:. This needs the path figured out. rather than a work_tree
392
# a v4 revision_tree can be given, or something that looks enough like
393
# one to give the file content to the entry if it needs it.
394
# and we need something that looks like a weave store for snapshot to
396
#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))
397
if len(previous_revisions) == 1:
398
previous_ie = previous_revisions.values()[0]
399
if ie._unchanged(previous_ie):
400
ie.revision = previous_ie.revision
402
parent_indexes = map(w.lookup, previous_revisions)
404
text = self.branch.repository.text_store.get(ie.text_id)
405
file_lines = text.readlines()
406
assert sha_strings(file_lines) == ie.text_sha1
407
assert sum(map(len, file_lines)) == ie.text_size
408
w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)
411
w.add(rev_id, parent_indexes, [], None)
413
##mutter('import text {%s} of {%s}',
414
## ie.text_id, file_id)
416
def _make_order(self):
417
"""Return a suitable order for importing revisions.
419
The order must be such that an revision is imported after all
420
its (present) parents.
422
todo = set(self.revisions.keys())
423
done = self.absent_revisions.copy()
426
# scan through looking for a revision whose parents
428
for rev_id in sorted(list(todo)):
429
rev = self.revisions[rev_id]
430
parent_ids = set(rev.parent_ids)
431
if parent_ids.issubset(done):
432
# can take this one now
440
t = get_transport(url)