14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Experiment in converting existing bzr branches to weaves."""
19
# To make this properly useful
21
# 1. assign text version ids, and put those text versions into
22
# the inventory as they're converted.
24
# 2. keep track of the previous version of each file, rather than
25
# just using the last one imported
27
# 3. assign entry versions when files are added, renamed or moved.
29
# 4. when merged-in versions are observed, walk down through them
30
# to discover everything, then commit bottom-up
32
# 5. track ancestry as things are merged in, and commit that in each
35
# Perhaps it's best to first walk the whole graph and make a plan for
36
# what should be imported in what order? Need a kind of topological
37
# sort of all revisions. (Or do we, can we just before doing a revision
38
# see that all its parents have either been converted or abandoned?)
41
# Cannot import a revision until all its parents have been
42
# imported. in other words, we can only import revisions whose
43
# parents have all been imported. the first step must be to
44
# import a revision with no parents, of which there must be at
45
# least one. (So perhaps it's useful to store forward pointers
46
# from a list of parents to their children?)
48
# Another (equivalent?) approach is to build up the ordered
49
# ancestry list for the last revision, and walk through that. We
50
# are going to need that.
52
# We don't want to have to recurse all the way back down the list.
54
# Suppose we keep a queue of the revisions able to be processed at
55
# any point. This starts out with all the revisions having no
58
# This seems like a generally useful algorithm...
60
# The current algorithm is dumb (O(n**2)?) but will do the job, and
61
# takes less than a second on the bzr.dev branch.
63
# This currently does a kind of lazy conversion of file texts, where a
64
# new text is written in every version. That's unnecessary but for
65
# the moment saves us having to worry about when files need new
68
from cStringIO import StringIO
75
from bzrlib.branch import Branch
76
from bzrlib.branch import BZR_BRANCH_FORMAT_5, BZR_BRANCH_FORMAT_6
77
from bzrlib.branch import BzrBranchFormat, BzrBranchFormat4, BzrBranchFormat5, BzrBranchFormat6
78
from bzrlib.errors import NoSuchFile, UpgradeReadonly
79
import bzrlib.hashcache as hashcache
80
from bzrlib.lockable_files import LockableFiles
81
from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath
82
from bzrlib.ui import ui_factory
83
from bzrlib.store.text import TextStore
84
from bzrlib.store.weave import WeaveStore
85
from bzrlib.trace import mutter, note, warning
86
from bzrlib.transactions import PassThroughTransaction
17
"""bzr upgrade logic."""
19
# change upgrade from .bzr to create a '.bzr-new', then do a bait and switch.
22
from bzrlib.bzrdir import ConvertBzrDir4To5, ConvertBzrDir5To6, BzrDir, BzrDirFormat4, BzrDirFormat5
23
import bzrlib.errors as errors
87
24
from bzrlib.transport import get_transport
88
from bzrlib.transport.local import LocalTransport
89
from bzrlib.weave import Weave
90
from bzrlib.weavefile import read_weave, write_weave
91
from bzrlib.xml4 import serializer_v4
92
from bzrlib.xml5 import serializer_v5
25
import bzrlib.ui as ui
95
28
class Convert(object):
97
def __init__(self, transport):
98
self.base = transport.base
99
self.converted_revs = set()
100
self.absent_revisions = set()
103
self.transport = transport
104
if self.transport.is_readonly():
105
raise UpgradeReadonly
106
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'branch-lock')
107
# Lock the branch (soon to be meta dir) to prevent anyone racing with us
108
# This is currently windows incompatible, it will deadlock. When the upgrade
109
# logic becomes format specific, then we can have the format know how to pass this
110
# on. Also note that we probably have an 'upgrade meta' which upgrades the constituent
112
# FIXME: control files reuse
113
self.control_files.lock_write()
30
def __init__(self, url, format):
32
self.bzrdir = BzrDir.open_unsupported(url)
33
if self.bzrdir.root_transport.is_readonly():
34
raise errors.UpgradeReadonly
35
self.transport = self.bzrdir.root_transport
36
self.pb = ui.ui_factory.nested_progress_bar()
117
self.control_files.unlock()
119
42
def convert(self):
120
if not self._open_branch():
122
note('starting upgrade of %s', self.base)
43
branch = self.bzrdir.open_branch()
44
if branch.bzrdir.root_transport.base != self.bzrdir.root_transport.base:
45
self.pb.note("This is a checkout. The branch (%s) needs to be "
46
"upgraded separately.",
47
branch.bzrdir.root_transport.base)
48
if not self.bzrdir.needs_format_conversion(self.format):
49
raise errors.UpToDateFormat(self.bzrdir._format)
50
if not self.bzrdir.can_convert_format():
51
raise errors.BzrError("cannot upgrade from branch format %s" %
53
self.pb.note('starting upgrade of %s', self.transport.base)
123
54
self._backup_control_dir()
124
self.pb = ui_factory.progress_bar()
125
if isinstance(self.old_format, BzrBranchFormat4):
126
note('starting upgrade from format 4 to 5')
127
self._convert_to_weaves()
128
if isinstance(self.old_format, BzrBranchFormat5):
129
note('starting upgrade from format 5 to 6')
130
self._convert_to_prefixed()
131
if isinstance(self.transport, LocalTransport):
132
cache = hashcache.HashCache(abspath(self.base))
137
def _convert_to_prefixed(self):
138
from bzrlib.store import hash_prefix
139
bzr_transport = self.transport.clone('.bzr')
140
bzr_transport.delete('branch-format')
141
for store_name in ["weaves", "revision-store"]:
142
note("adding prefixes to %s" % store_name)
143
store_transport = bzr_transport.clone(store_name)
144
for filename in store_transport.list_dir('.'):
145
if (filename.endswith(".weave") or
146
filename.endswith(".gz") or
147
filename.endswith(".sig")):
148
file_id = os.path.splitext(filename)[0]
151
prefix_dir = hash_prefix(file_id)
152
# FIXME keep track of the dirs made RBC 20060121
154
store_transport.move(filename, prefix_dir + '/' + filename)
155
except NoSuchFile: # catches missing dirs strangely enough
156
store_transport.mkdir(prefix_dir)
157
store_transport.move(filename, prefix_dir + '/' + filename)
158
self._set_new_format(BZR_BRANCH_FORMAT_6)
159
self.branch = BzrBranchFormat6().open(self.transport)
160
self.old_format = self.branch._branch_format
162
def _convert_to_weaves(self):
163
note('note: upgrade may be faster if all store files are ungzipped first')
164
bzr_transport = self.transport.clone('.bzr')
167
stat = bzr_transport.stat('weaves')
168
if not S_ISDIR(stat.st_mode):
169
bzr_transport.delete('weaves')
170
bzr_transport.mkdir('weaves')
172
bzr_transport.mkdir('weaves')
173
self.inv_weave = Weave('inventory')
174
# holds in-memory weaves for all files
175
self.text_weaves = {}
176
bzr_transport.delete('branch-format')
177
self._convert_working_inv()
178
rev_history = self.branch.revision_history()
179
# to_read is a stack holding the revisions we still need to process;
180
# appending to it adds new highest-priority revisions
181
self.known_revisions = set(rev_history)
182
self.to_read = rev_history[-1:]
184
rev_id = self.to_read.pop()
185
if (rev_id not in self.revisions
186
and rev_id not in self.absent_revisions):
187
self._load_one_rev(rev_id)
189
to_import = self._make_order()
190
for i, rev_id in enumerate(to_import):
191
self.pb.update('converting revision', i, len(to_import))
192
self._convert_one_rev(rev_id)
194
self._write_all_weaves()
195
self._write_all_revs()
196
note('upgraded to weaves:')
197
note(' %6d revisions and inventories' % len(self.revisions))
198
note(' %6d revisions not present' % len(self.absent_revisions))
199
note(' %6d texts' % self.text_count)
200
self._cleanup_spare_files_after_format4()
201
self._set_new_format(BZR_BRANCH_FORMAT_5)
202
self.branch = BzrBranchFormat5().open(self.transport)
203
self.old_format = self.branch._branch_format
205
def _open_branch(self):
206
self.old_format = BzrBranchFormat.find_format(self.transport)
207
self.branch = self.old_format.open(self.transport)
208
if isinstance(self.old_format, BzrBranchFormat6):
209
note('this branch is in the most current format (%s)', self.old_format)
211
if (not isinstance(self.old_format, BzrBranchFormat4) and
212
not isinstance(self.old_format, BzrBranchFormat5)):
213
raise BzrError("cannot upgrade from branch format %s" %
214
self.branch._branch_format)
217
def _set_new_format(self, format):
218
self.branch.control_files.put_utf8('branch-format', format)
220
def _cleanup_spare_files_after_format4(self):
221
transport = self.transport.clone('.bzr')
222
# FIXME working tree upgrade foo.
223
for n in 'merged-patches', 'pending-merged-patches':
225
## assert os.path.getsize(p) == 0
229
transport.delete_tree('inventory-store')
230
transport.delete_tree('text-store')
55
while self.bzrdir.needs_format_conversion(self.format):
56
converter = self.bzrdir._format.get_converter(self.format)
57
self.bzrdir = converter.convert(self.bzrdir, self.pb)
58
self.pb.note("finished")
232
60
def _backup_control_dir(self):
233
note('making backup of tree history')
61
self.pb.note('making backup of tree history')
234
62
self.transport.copy_tree('.bzr', '.bzr.backup')
235
note('%s.bzr has been backed up to %s.bzr.backup',
63
self.pb.note('%s.bzr has been backed up to %s.bzr.backup',
236
64
self.transport.base,
237
65
self.transport.base)
238
note('if conversion fails, you can move this directory back to .bzr')
239
note('if it succeeds, you can remove this directory if you wish')
241
def _convert_working_inv(self):
243
inv = serializer_v4.read_inventory(branch.control_files.get('inventory'))
244
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
245
# FIXME inventory is a working tree change.
246
branch.control_files.put('inventory', new_inv_xml)
248
def _write_all_weaves(self):
249
bzr_transport = self.transport.clone('.bzr')
250
controlweaves = WeaveStore(bzr_transport, prefixed=False)
251
weave_transport = bzr_transport.clone('weaves')
252
weaves = WeaveStore(weave_transport, prefixed=False)
253
transaction = PassThroughTransaction()
255
controlweaves.put_weave('inventory', self.inv_weave, transaction)
258
for file_id, file_weave in self.text_weaves.items():
259
self.pb.update('writing weave', i, len(self.text_weaves))
260
weaves.put_weave(file_id, file_weave, transaction)
265
def _write_all_revs(self):
266
"""Write all revisions out in new form."""
267
transport = self.transport.clone('.bzr')
268
transport.delete_tree('revision-store')
269
transport.mkdir('revision-store')
270
revision_transport = transport.clone('revision-store')
272
revision_store = TextStore(revision_transport,
276
for i, rev_id in enumerate(self.converted_revs):
277
self.pb.update('write revision', i, len(self.converted_revs))
279
serializer_v5.write_revision(self.revisions[rev_id], rev_tmp)
281
revision_store.add(rev_tmp, rev_id)
286
def _load_one_rev(self, rev_id):
287
"""Load a revision object into memory.
289
Any parents not either loaded or abandoned get queued to be
291
self.pb.update('loading revision',
293
len(self.known_revisions))
294
if not self.branch.repository.revision_store.has_id(rev_id):
296
note('revision {%s} not present in branch; '
297
'will be converted as a ghost',
299
self.absent_revisions.add(rev_id)
301
rev_xml = self.branch.repository.revision_store.get(rev_id).read()
302
rev = serializer_v4.read_revision_from_string(rev_xml)
303
for parent_id in rev.parent_ids:
304
self.known_revisions.add(parent_id)
305
self.to_read.append(parent_id)
306
self.revisions[rev_id] = rev
309
def _load_old_inventory(self, rev_id):
310
assert rev_id not in self.converted_revs
311
old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()
312
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
313
rev = self.revisions[rev_id]
314
if rev.inventory_sha1:
315
assert rev.inventory_sha1 == sha_string(old_inv_xml), \
316
'inventory sha mismatch for {%s}' % rev_id
320
def _load_updated_inventory(self, rev_id):
321
assert rev_id in self.converted_revs
322
inv_xml = self.inv_weave.get_text(rev_id)
323
inv = serializer_v5.read_inventory_from_string(inv_xml)
327
def _convert_one_rev(self, rev_id):
328
"""Convert revision and all referenced objects to new format."""
329
rev = self.revisions[rev_id]
330
inv = self._load_old_inventory(rev_id)
331
present_parents = [p for p in rev.parent_ids
332
if p not in self.absent_revisions]
333
self._convert_revision_contents(rev, inv, present_parents)
334
self._store_new_weave(rev, inv, present_parents)
335
self.converted_revs.add(rev_id)
338
def _store_new_weave(self, rev, inv, present_parents):
339
# the XML is now updated with text versions
343
if ie.kind == 'root_directory':
345
assert hasattr(ie, 'revision'), \
346
'no revision on {%s} in {%s}' % \
347
(file_id, rev.revision_id)
348
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
349
new_inv_sha1 = sha_string(new_inv_xml)
350
self.inv_weave.add(rev.revision_id,
352
new_inv_xml.splitlines(True),
354
rev.inventory_sha1 = new_inv_sha1
356
def _convert_revision_contents(self, rev, inv, present_parents):
357
"""Convert all the files within a revision.
359
Also upgrade the inventory to refer to the text revision ids."""
360
rev_id = rev.revision_id
361
mutter('converting texts of revision {%s}',
363
parent_invs = map(self._load_updated_inventory, present_parents)
366
self._convert_file_version(rev, ie, parent_invs)
368
def _convert_file_version(self, rev, ie, parent_invs):
369
"""Convert one version of one file.
371
The file needs to be added into the weave if it is a merge
372
of >=2 parents or if it's changed from its parent.
374
if ie.kind == 'root_directory':
377
rev_id = rev.revision_id
378
w = self.text_weaves.get(file_id)
381
self.text_weaves[file_id] = w
383
previous_entries = ie.find_previous_heads(parent_invs, w)
384
for old_revision in previous_entries:
385
# if this fails, its a ghost ?
386
assert old_revision in self.converted_revs
387
self.snapshot_ie(previous_entries, ie, w, rev_id)
389
assert getattr(ie, 'revision', None) is not None
391
def snapshot_ie(self, previous_revisions, ie, w, rev_id):
392
# TODO: convert this logic, which is ~= snapshot to
393
# a call to:. This needs the path figured out. rather than a work_tree
394
# a v4 revision_tree can be given, or something that looks enough like
395
# one to give the file content to the entry if it needs it.
396
# and we need something that looks like a weave store for snapshot to
398
#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))
399
if len(previous_revisions) == 1:
400
previous_ie = previous_revisions.values()[0]
401
if ie._unchanged(previous_ie):
402
ie.revision = previous_ie.revision
404
parent_indexes = map(w.lookup, previous_revisions)
406
text = self.branch.repository.text_store.get(ie.text_id)
407
file_lines = text.readlines()
408
assert sha_strings(file_lines) == ie.text_sha1
409
assert sum(map(len, file_lines)) == ie.text_size
410
w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)
413
w.add(rev_id, parent_indexes, [], None)
415
##mutter('import text {%s} of {%s}',
416
## ie.text_id, file_id)
418
def _make_order(self):
419
"""Return a suitable order for importing revisions.
421
The order must be such that an revision is imported after all
422
its (present) parents.
424
todo = set(self.revisions.keys())
425
done = self.absent_revisions.copy()
428
# scan through looking for a revision whose parents
430
for rev_id in sorted(list(todo)):
431
rev = self.revisions[rev_id]
432
parent_ids = set(rev.parent_ids)
433
if parent_ids.issubset(done):
434
# can take this one now
442
t = get_transport(url)
66
self.pb.note('if conversion fails, you can move this directory back to .bzr')
67
self.pb.note('if it succeeds, you can remove this directory if you wish')
69
def upgrade(url, format=None):
70
"""Upgrade to format, or the default bzrdir format if not supplied."""