40
39
# sort of all revisions. (Or do we, can we just before doing a revision
41
40
# see that all its parents have either been converted or abandoned?)
44
# Cannot import a revision until all its parents have been
45
# imported. in other words, we can only import revisions whose
46
# parents have all been imported. the first step must be to
47
# import a revision with no parents, of which there must be at
48
# least one. (So perhaps it's useful to store forward pointers
49
# from a list of parents to their children?)
51
# Another (equivalent?) approach is to build up the ordered
52
# ancestry list for the last revision, and walk through that. We
53
# are going to need that.
55
# We don't want to have to recurse all the way back down the list.
57
# Suppose we keep a queue of the revisions able to be processed at
58
# any point. This starts out with all the revisions having no
61
# This seems like a generally useful algorithm...
63
# The current algorithm is dumb (O(n**2)?) but will do the job, and
64
# takes less than a second on the bzr.dev branch.
66
# This currently does a kind of lazy conversion of file texts, where a
67
# new text is written in every version. That's unnecessary but for
68
# the moment saves us having to worry about when files need new
71
from cStringIO import StringIO
51
import hotshot, hotshot.stats
78
from bzrlib.branch import Branch
79
import bzrlib.bzrdir as bzrdir
80
from bzrlib.bzrdir import BzrDirFormat, BzrDirFormat4, BzrDirFormat5, BzrDirFormat6
81
import bzrlib.errors as errors
82
from bzrlib.errors import NoSuchFile, UpgradeReadonly
83
import bzrlib.hashcache as hashcache
84
from bzrlib.lockable_files import LockableFiles
85
from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath
86
from bzrlib.ui import ui_factory
87
from bzrlib.store.text import TextStore
88
from bzrlib.store.weave import WeaveStore
89
from bzrlib.trace import mutter, note, warning
90
from bzrlib.transactions import PassThroughTransaction
91
from bzrlib.transport import get_transport
92
from bzrlib.transport.local import LocalTransport
56
from bzrlib.branch import Branch, find_branch
57
from bzrlib.revfile import Revfile
93
58
from bzrlib.weave import Weave
94
59
from bzrlib.weavefile import read_weave, write_weave
60
from bzrlib.progress import ProgressBar
61
from bzrlib.atomicfile import AtomicFile
95
62
from bzrlib.xml4 import serializer_v4
96
63
from bzrlib.xml5 import serializer_v5
64
from bzrlib.trace import mutter, note, warning, enable_default_logging
99
68
class Convert(object):
101
def __init__(self, transport):
102
self.base = transport.base
103
71
self.converted_revs = set()
104
72
self.absent_revisions = set()
105
73
self.text_count = 0
107
self.transport = transport
108
if self.transport.is_readonly():
109
raise UpgradeReadonly
110
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'branch-lock')
111
# Lock the branch (soon to be meta dir) to prevent anyone racing with us
112
# This is currently windows incompatible, it will deadlock. When the upgrade
113
# logic becomes format specific, then we can have the format know how to pass this
114
# on. Also note that we probably have an 'upgrade meta' which upgrades the constituent
116
print "FIXME: control files reuse"
117
self.control_files.lock_write()
121
self.control_files.unlock()
123
78
def convert(self):
124
if not self._open_branch():
126
note('starting upgrade of %s', self.base)
127
self._backup_control_dir()
128
self.pb = ui_factory.progress_bar()
129
if isinstance(self.old_format, BzrDirFormat4):
130
note('starting upgrade from format 4 to 5')
131
if isinstance(self.transport, LocalTransport):
132
self.bzrdir.get_workingtree_transport(None).delete('stat-cache')
133
self._convert_to_weaves()
134
if isinstance(self.old_format, BzrDirFormat5):
135
note('starting upgrade from format 5 to 6')
136
self._convert_to_prefixed()
139
def _convert_to_prefixed(self):
140
from bzrlib.store import hash_prefix
141
bzr_transport = self.transport.clone('.bzr')
142
bzr_transport.delete('branch-format')
143
for store_name in ["weaves", "revision-store"]:
144
note("adding prefixes to %s" % store_name)
145
store_transport = bzr_transport.clone(store_name)
146
for filename in store_transport.list_dir('.'):
147
if (filename.endswith(".weave") or
148
filename.endswith(".gz") or
149
filename.endswith(".sig")):
150
file_id = os.path.splitext(filename)[0]
153
prefix_dir = hash_prefix(file_id)
154
# FIXME keep track of the dirs made RBC 20060121
156
store_transport.move(filename, prefix_dir + '/' + filename)
157
except NoSuchFile: # catches missing dirs strangely enough
158
store_transport.mkdir(prefix_dir)
159
store_transport.move(filename, prefix_dir + '/' + filename)
160
self.old_format = BzrDirFormat6()
161
self._set_new_format(self.old_format.get_format_string())
162
self.bzrdir = self.old_format.open(self.transport)
163
self.branch = self.bzrdir.open_branch()
165
def _convert_to_weaves(self):
166
note('note: upgrade may be faster if all store files are ungzipped first')
167
bzr_transport = self.transport.clone('.bzr')
170
stat = bzr_transport.stat('weaves')
171
if not S_ISDIR(stat.st_mode):
172
bzr_transport.delete('weaves')
173
bzr_transport.mkdir('weaves')
175
bzr_transport.mkdir('weaves')
176
self.inv_weave = Weave('inventory')
79
enable_default_logging()
80
self.pb = ProgressBar()
81
self.inv_weave = Weave('__inventory')
82
self.anc_weave = Weave('__ancestry')
177
86
# holds in-memory weaves for all files
178
self.text_weaves = {}
179
bzr_transport.delete('branch-format')
180
self._convert_working_inv()
181
rev_history = self.branch.revision_history()
182
# to_read is a stack holding the revisions we still need to process;
89
b = self.branch = Branch('.', relax_version_check=True)
92
rev_history = b.revision_history()
96
# todo is a stack holding the revisions we still need to process;
183
97
# appending to it adds new highest-priority revisions
184
self.known_revisions = set(rev_history)
185
self.to_read = rev_history[-1:]
187
rev_id = self.to_read.pop()
188
if (rev_id not in self.revisions
189
and rev_id not in self.absent_revisions):
190
self._load_one_rev(rev_id)
192
to_import = self._make_order()
193
for i, rev_id in enumerate(to_import):
194
self.pb.update('converting revision', i, len(to_import))
195
self._convert_one_rev(rev_id)
98
self.todo = rev_history[:]
100
self.total_revs = len(self.todo)
103
self._convert_one_rev(self.todo.pop())
106
print 'upgraded to weaves:'
107
print ' %6d revisions and inventories' % len(self.converted_revs)
108
print ' %6d absent revisions removed' % len(self.absent_revisions)
109
print ' %6d texts' % self.text_count
197
111
self._write_all_weaves()
198
self._write_all_revs()
199
note('upgraded to weaves:')
200
note(' %6d revisions and inventories' % len(self.revisions))
201
note(' %6d revisions not present' % len(self.absent_revisions))
202
note(' %6d texts' % self.text_count)
203
self._cleanup_spare_files_after_format4()
204
self.old_format = BzrDirFormat5()
205
self._set_new_format(self.old_format.get_format_string())
206
self.bzrdir = self.old_format.open(self.transport)
207
self.branch = self.bzrdir.open_branch()
209
def _open_branch(self):
210
self.old_format = BzrDirFormat.find_format(self.transport)
211
self.bzrdir = self.old_format.open(self.transport)
212
self.branch = self.bzrdir.open_branch()
213
if isinstance(self.old_format, BzrDirFormat6):
214
note('this branch is in the most current format (%s)', self.old_format)
216
if (not isinstance(self.old_format, BzrDirFormat4) and
217
not isinstance(self.old_format, BzrDirFormat5) and
218
not isinstance(self.old_format, bzrdir.BzrDirMetaFormat1)):
219
raise errors.BzrError("cannot upgrade from branch format %s" %
220
self.branch._branch_format)
223
def _set_new_format(self, format):
224
self.branch.control_files.put_utf8('branch-format', format)
226
def _cleanup_spare_files_after_format4(self):
227
transport = self.transport.clone('.bzr')
228
print "FIXME working tree upgrade foo."
229
for n in 'merged-patches', 'pending-merged-patches':
231
## assert os.path.getsize(p) == 0
235
transport.delete_tree('inventory-store')
236
transport.delete_tree('text-store')
238
def _backup_control_dir(self):
239
note('making backup of tree history')
240
self.transport.copy_tree('.bzr', '.bzr.backup')
241
note('%s.bzr has been backed up to %s.bzr.backup',
244
note('if conversion fails, you can move this directory back to .bzr')
245
note('if it succeeds, you can remove this directory if you wish')
247
def _convert_working_inv(self):
249
inv = serializer_v4.read_inventory(branch.control_files.get('inventory'))
250
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
251
print "fixme inventory is a working tree change."
252
branch.control_files.put('inventory', new_inv_xml)
254
114
def _write_all_weaves(self):
255
bzr_transport = self.transport.clone('.bzr')
256
controlweaves = WeaveStore(bzr_transport, prefixed=False)
257
weave_transport = bzr_transport.clone('weaves')
258
weaves = WeaveStore(weave_transport, prefixed=False)
259
transaction = PassThroughTransaction()
261
controlweaves.put_weave('inventory', self.inv_weave, transaction)
264
for file_id, file_weave in self.text_weaves.items():
265
self.pb.update('writing weave', i, len(self.text_weaves))
266
weaves.put_weave(file_id, file_weave, transaction)
271
def _write_all_revs(self):
272
"""Write all revisions out in new form."""
273
transport = self.transport.clone('.bzr')
274
transport.delete_tree('revision-store')
275
transport.mkdir('revision-store')
276
revision_transport = transport.clone('revision-store')
278
revision_store = TextStore(revision_transport,
282
for i, rev_id in enumerate(self.converted_revs):
283
self.pb.update('write revision', i, len(self.converted_revs))
285
serializer_v5.write_revision(self.revisions[rev_id], rev_tmp)
287
revision_store.add(rev_tmp, rev_id)
292
def _load_one_rev(self, rev_id):
293
"""Load a revision object into memory.
295
Any parents not either loaded or abandoned get queued to be
297
self.pb.update('loading revision',
299
len(self.known_revisions))
300
if not self.branch.repository.revision_store.has_id(rev_id):
116
return ############################################
117
# TODO: commit them all atomically at the end, not one by one
118
write_atomic_weave(self.inv_weave, 'weaves/inventory.weave')
119
write_atomic_weave(self.anc_weave, 'weaves/ancestry.weave')
120
for file_id, file_weave in text_weaves.items():
121
self.pb.update('writing weave', i, len(text_weaves))
122
write_atomic_weave(file_weave, 'weaves/%s.weave' % file_id)
128
def _convert_one_rev(self, rev_id):
129
self._bump_progress()
132
if rev_id not in b.revision_store:
302
134
note('revision {%s} not present in branch; '
303
'will be converted as a ghost',
135
'will not be converted',
305
137
self.absent_revisions.add(rev_id)
307
rev_xml = self.branch.repository.revision_store.get(rev_id).read()
308
rev = serializer_v4.read_revision_from_string(rev_xml)
309
for parent_id in rev.parent_ids:
310
self.known_revisions.add(parent_id)
311
self.to_read.append(parent_id)
312
self.revisions[rev_id] = rev
315
def _load_old_inventory(self, rev_id):
316
assert rev_id not in self.converted_revs
317
old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()
318
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
319
rev = self.revisions[rev_id]
320
if rev.inventory_sha1:
321
assert rev.inventory_sha1 == sha_string(old_inv_xml), \
322
'inventory sha mismatch for {%s}' % rev_id
326
def _load_updated_inventory(self, rev_id):
327
assert rev_id in self.converted_revs
328
inv_xml = self.inv_weave.get_text(rev_id)
329
inv = serializer_v5.read_inventory_from_string(inv_xml)
333
def _convert_one_rev(self, rev_id):
334
"""Convert revision and all referenced objects to new format."""
335
rev = self.revisions[rev_id]
336
inv = self._load_old_inventory(rev_id)
337
present_parents = [p for p in rev.parent_ids
338
if p not in self.absent_revisions]
339
self._convert_revision_contents(rev, inv, present_parents)
340
self._store_new_weave(rev, inv, present_parents)
140
rev_xml = b.revision_store[rev_id].read()
141
inv_xml = b.inventory_store[rev_id].read()
143
rev = serializer_v4.read_revision_from_string(rev_xml)
144
inv = serializer_v4.read_inventory_from_string(inv_xml)
146
# see if parents need to be done first
147
for parent_id in [x.revision_id for x in rev.parents]:
148
if parent_id not in self.converted_revs:
149
self.todo.append(parent_id)
341
151
self.converted_revs.add(rev_id)
344
def _store_new_weave(self, rev, inv, present_parents):
345
# the XML is now updated with text versions
349
if ie.kind == 'root_directory':
351
assert hasattr(ie, 'revision'), \
352
'no revision on {%s} in {%s}' % \
353
(file_id, rev.revision_id)
354
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
355
new_inv_sha1 = sha_string(new_inv_xml)
356
self.inv_weave.add(rev.revision_id,
358
new_inv_xml.splitlines(True),
360
rev.inventory_sha1 = new_inv_sha1
362
def _convert_revision_contents(self, rev, inv, present_parents):
363
"""Convert all the files within a revision.
365
Also upgrade the inventory to refer to the text revision ids."""
366
rev_id = rev.revision_id
367
mutter('converting texts of revision {%s}',
369
parent_invs = map(self._load_updated_inventory, present_parents)
153
return ##########################################
155
new_idx = self.inv_weave.add(rev_id, inv_parents, inv_xml)
156
inv_parents = [new_idx]
158
tree = b.revision_tree(rev_id)
161
# for each file in the inventory, put it into its own revfile
370
162
for file_id in inv:
371
163
ie = inv[file_id]
372
self._convert_file_version(rev, ie, parent_invs)
374
def _convert_file_version(self, rev, ie, parent_invs):
375
"""Convert one version of one file.
377
The file needs to be added into the weave if it is a merge
378
of >=2 parents or if it's changed from its parent.
380
if ie.kind == 'root_directory':
383
rev_id = rev.revision_id
384
w = self.text_weaves.get(file_id)
387
self.text_weaves[file_id] = w
389
previous_entries = ie.find_previous_heads(parent_invs, w)
390
for old_revision in previous_entries:
391
# if this fails, its a ghost ?
392
assert old_revision in self.converted_revs
393
self.snapshot_ie(previous_entries, ie, w, rev_id)
395
assert getattr(ie, 'revision', None) is not None
397
def snapshot_ie(self, previous_revisions, ie, w, rev_id):
398
# TODO: convert this logic, which is ~= snapshot to
399
# a call to:. This needs the path figured out. rather than a work_tree
400
# a v4 revision_tree can be given, or something that looks enough like
401
# one to give the file content to the entry if it needs it.
402
# and we need something that looks like a weave store for snapshot to
404
#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))
405
if len(previous_revisions) == 1:
406
previous_ie = previous_revisions.values()[0]
407
if ie._unchanged(previous_ie):
408
ie.revision = previous_ie.revision
410
parent_indexes = map(w.lookup, previous_revisions)
412
text = self.branch.repository.text_store.get(ie.text_id)
413
file_lines = text.readlines()
414
assert sha_strings(file_lines) == ie.text_sha1
415
assert sum(map(len, file_lines)) == ie.text_size
416
w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)
419
w.add(rev_id, parent_indexes, [], None)
421
##mutter('import text {%s} of {%s}',
422
## ie.text_id, file_id)
424
def _make_order(self):
425
"""Return a suitable order for importing revisions.
427
The order must be such that an revision is imported after all
428
its (present) parents.
430
todo = set(self.revisions.keys())
431
done = self.absent_revisions.copy()
434
# scan through looking for a revision whose parents
436
for rev_id in sorted(list(todo)):
437
rev = self.revisions[rev_id]
438
parent_ids = set(rev.parent_ids)
439
if parent_ids.issubset(done):
440
# can take this one now
448
t = get_transport(url)
164
if ie.kind != 'file':
166
if last_text_sha.get(file_id) == ie.text_sha1:
169
last_text_sha[file_id] = ie.text_sha1
171
# new text (though possibly already stored); need to store it
172
text_lines = tree.get_file(file_id).readlines()
174
# if the file's created for the first time in this
175
# revision then make a new weave; else find the old one
176
if file_id not in text_weaves:
177
text_weaves[file_id] = Weave()
179
w = text_weaves[file_id]
181
# base the new text version off whatever was last
182
# (actually it'd be better to track this, to allow for
183
# files that are deleted and then reappear)
190
w.add(rev_id, parents, text_lines)
195
def _bump_progress(self):
196
self.pb.update('converting revisions',
197
len(self.converted_revs),
201
def write_atomic_weave(weave, filename):
202
inv_wf = AtomicFile(filename)
204
write_weave(weave, inv_wf)
212
def profile_convert():
213
prof_f = tempfile.NamedTemporaryFile()
215
prof = hotshot.Profile(prof_f.name)
217
prof.runcall(Convert)
220
stats = hotshot.stats.load(prof_f.name)
222
stats.sort_stats('time')
223
# XXX: Might like to write to stderr or the trace file instead but
224
# print_stats seems hardcoded to stdout
225
stats.print_stats(20)
228
if '-p' in sys.argv[1:]: