3
# Copyright (C) 2005 Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
"""Experiment in converting existing bzr branches to weaves."""
21
# To make this properly useful
23
# 1. assign text version ids, and put those text versions into
24
# the inventory as they're converted.
26
# 2. keep track of the previous version of each file, rather than
27
# just using the last one imported
29
# 3. assign entry versions when files are added, renamed or moved.
31
# 4. when merged-in versions are observed, walk down through them
32
# to discover everything, then commit bottom-up
34
# 5. track ancestry as things are merged in, and commit that in each
37
# Perhaps it's best to first walk the whole graph and make a plan for
38
# what should be imported in what order? Need a kind of topological
39
# sort of all revisions. (Or do we, can we just before doing a revision
40
# see that all its parents have either been converted or abandoned?)
43
# Cannot import a revision until all its parents have been
44
# imported. in other words, we can only import revisions whose
45
# parents have all been imported. the first step must be to
46
# import a revision with no parents, of which there must be at
47
# least one. (So perhaps it's useful to store forward pointers
48
# from a list of parents to their children?)
50
# Another (equivalent?) approach is to build up the ordered
51
# ancestry list for the last revision, and walk through that. We
52
# are going to need that.
54
# We don't want to have to recurse all the way back down the list.
56
# Suppose we keep a queue of the revisions able to be processed at
57
# any point. This starts out with all the revisions having no
60
# This seems like a generally useful algorithm...
62
# The current algorithm is dumb (O(n**2)?) but will do the job, and
63
# takes less than a second on the bzr.dev branch.
65
# This currently does a kind of lazy conversion of file texts, where a
66
# new text is written in every version. That's unnecessary but for
67
# the moment saves us having to worry about when files need new
70
# TODO: Check that the working directory is clean before converting
83
import hotshot, hotshot.stats
88
from bzrlib.branch import Branch, find_branch, BZR_BRANCH_FORMAT_5
89
from bzrlib.revfile import Revfile
90
from bzrlib.weave import Weave
91
from bzrlib.weavefile import read_weave, write_weave
92
from bzrlib.progress import ProgressBar
93
from bzrlib.atomicfile import AtomicFile
94
from bzrlib.xml4 import serializer_v4
95
from bzrlib.xml5 import serializer_v5
96
from bzrlib.trace import mutter, note, warning, enable_default_logging
97
from bzrlib.osutils import sha_strings, sha_string
98
from bzrlib.commit import merge_ancestry_lines
101
class Convert(object):
103
self.converted_revs = set()
104
self.absent_revisions = set()
107
self.inventories = {}
113
if not os.path.exists('.bzr/allow-upgrade'):
114
raise Exception, "please create .bzr/allow-upgrade to indicate consent"
115
self._backup_control_dir()
116
note('starting upgrade')
117
note('note: upgrade will be faster if all store files are ungzipped first')
118
self.pb = ProgressBar()
119
if not os.path.isdir('.bzr/weaves'):
120
os.mkdir('.bzr/weaves')
121
self.inv_weave = Weave('__inventory')
122
self.anc_weave = Weave('__ancestry')
124
# holds in-memory weaves for all files
125
self.text_weaves = {}
126
self.branch = Branch('.', relax_version_check=True)
127
os.remove(self.branch.controlfilename('branch-format'))
128
self._convert_working_inv()
129
rev_history = self.branch.revision_history()
130
# to_read is a stack holding the revisions we still need to process;
131
# appending to it adds new highest-priority revisions
132
self.known_revisions = set(rev_history)
133
self.to_read = [rev_history[-1]]
135
rev_id = self.to_read.pop()
136
if (rev_id not in self.revisions
137
and rev_id not in self.absent_revisions):
138
self._load_one_rev(rev_id)
140
to_import = self._make_order()
141
for i, rev_id in enumerate(to_import):
142
self.pb.update('converting revision', i, len(to_import))
143
self._convert_one_rev(rev_id)
145
note('upgraded to weaves:')
146
note(' %6d revisions and inventories' % len(self.revisions))
147
note(' %6d absent revisions removed' % len(self.absent_revisions))
148
note(' %6d texts' % self.text_count)
149
self._write_all_weaves()
150
self._write_all_revs()
151
self._set_new_format()
152
self._cleanup_spare_files()
155
def _set_new_format(self):
156
f = self.branch.controlfile('branch-format', 'wb')
158
f.write(BZR_BRANCH_FORMAT_5)
164
def _cleanup_spare_files(self):
165
for n in 'merged-patches', 'pending-merged-patches':
166
p = self.branch.controlfilename(n)
167
if not os.path.exists(p):
169
assert os.path.getsize(p) == 0
171
os.remove('.bzr/allow-upgrade')
172
shutil.rmtree('.bzr/inventory-store')
173
shutil.rmtree('.bzr/text-store')
176
def _backup_control_dir(self):
177
shutil.copytree('.bzr', '.bzr.backup')
178
note('.bzr has been backed up to .bzr.backup')
179
note('if conversion fails, you can move this directory back to .bzr')
180
note('if it succeeds, you can remove this directory if you wish')
183
def _convert_working_inv(self):
185
inv = serializer_v4.read_inventory(branch.controlfile('inventory', 'rb'))
186
serializer_v5.write_inventory(inv, branch.controlfile('inventory', 'wb'))
190
def _write_all_weaves(self):
191
write_a_weave(self.inv_weave, '.bzr/inventory.weave')
192
write_a_weave(self.anc_weave, '.bzr/ancestry.weave')
195
for file_id, file_weave in self.text_weaves.items():
196
self.pb.update('writing weave', i, len(self.text_weaves))
197
write_a_weave(file_weave, '.bzr/weaves/%s.weave' % file_id)
203
def _write_all_revs(self):
204
"""Write all revisions out in new form."""
205
shutil.rmtree('.bzr/revision-store')
206
os.mkdir('.bzr/revision-store')
208
for i, rev_id in enumerate(self.converted_revs):
209
self.pb.update('write revision', i, len(self.converted_revs))
210
f = file('.bzr/revision-store/%s' % rev_id, 'wb')
212
serializer_v5.write_revision(self.revisions[rev_id], f)
219
def _load_one_rev(self, rev_id):
220
"""Load a revision object into memory.
222
Any parents not either loaded or abandoned get queued to be
224
self.pb.update('loading revision',
226
len(self.known_revisions))
227
if rev_id not in self.branch.revision_store:
229
note('revision {%s} not present in branch; '
230
'will not be converted',
232
self.absent_revisions.add(rev_id)
234
rev_xml = self.branch.revision_store[rev_id].read()
235
rev = serializer_v4.read_revision_from_string(rev_xml)
236
for parent_id in rev.parent_ids:
237
self.known_revisions.add(parent_id)
238
self.to_read.append(parent_id)
239
self.revisions[rev_id] = rev
240
old_inv_xml = self.branch.inventory_store[rev_id].read()
241
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
242
assert rev.inventory_sha1 == sha_string(old_inv_xml)
243
self.inventories[rev_id] = inv
246
def _convert_one_rev(self, rev_id):
247
"""Convert revision and all referenced objects to new format."""
248
rev = self.revisions[rev_id]
249
inv = self.inventories[rev_id]
250
for parent_id in rev.parent_ids[:]:
251
if parent_id in self.absent_revisions:
252
rev.parent_ids.remove(parent_id)
254
note('remove {%s} as parent of {%s}', parent_id, rev_id)
255
self._convert_revision_contents(rev, inv)
256
# the XML is now updated with text versions
257
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
258
new_inv_sha1 = sha_string(new_inv_xml)
259
self.inv_weave.add(rev_id, rev.parent_ids,
260
new_inv_xml.splitlines(True),
262
# TODO: Upgrade revision XML and write that out
263
rev.inventory_sha1 = new_inv_sha1
264
self._make_rev_ancestry(rev)
265
self.converted_revs.add(rev_id)
268
def _make_rev_ancestry(self, rev):
269
rev_id = rev.revision_id
270
for parent_id in rev.parent_ids:
271
assert parent_id in self.converted_revs
273
lines = list(self.anc_weave.mash_iter(rev.parent_ids))
276
lines.append(rev_id + '\n')
278
parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]
279
new_lines = merge_ancestry_lines(rev_id, parent_ancestries)
280
assert set(lines) == set(new_lines)
281
self.ancestries[rev_id] = new_lines
282
self.anc_weave.add(rev_id, rev.parent_ids, lines)
285
def _convert_revision_contents(self, rev, inv):
286
"""Convert all the files within a revision.
288
Also upgrade the inventory to refer to the text revision ids."""
289
rev_id = rev.revision_id
290
mutter('converting texts of revision {%s}',
294
self._set_name_version(rev, ie)
295
if ie.kind != 'file':
297
self._convert_file_version(rev, ie)
300
def _set_name_version(self, rev, ie):
301
"""Set name version for a file.
303
Done in a slightly lazy way: if the file is renamed or in a merge revision
304
it gets a new version, otherwise the same as before.
307
if len(rev.parent_ids) != 1:
308
ie.name_version = rev.revision_id
310
old_inv = self.inventories[rev.parent_ids[0]]
311
if not old_inv.has_id(file_id):
312
ie.name_version = rev.revision_id
314
old_ie = old_inv[file_id]
315
if (old_ie.parent_id != ie.parent_id
316
or old_ie.name != ie.name):
317
ie.name_version = rev.revision_id
319
ie.name_version = old_ie.name_version
323
def _convert_file_version(self, rev, ie):
324
"""Convert one version of one file.
326
The file needs to be added into the weave if it is a merge
327
of >=2 parents or if it's changed from its parent.
330
rev_id = rev.revision_id
331
w = self.text_weaves.get(file_id)
334
self.text_weaves[file_id] = w
335
file_lines = self.branch.text_store[ie.text_id].readlines()
336
assert sha_strings(file_lines) == ie.text_sha1
337
assert sum(map(len, file_lines)) == ie.text_size
340
for parent_id in rev.parent_ids:
341
##if parent_id in self.absent_revisions:
343
assert parent_id in self.converted_revs, \
344
'parent {%s} not converted' % parent_id
345
parent_inv = self.inventories[parent_id]
346
if parent_inv.has_id(file_id):
347
parent_ie = parent_inv[file_id]
348
old_text_version = parent_ie.text_version
349
assert old_text_version in self.converted_revs
350
if old_text_version not in file_parents:
351
file_parents.append(old_text_version)
352
if parent_ie.text_sha1 != ie.text_sha1:
354
if len(file_parents) != 1 or text_changed:
355
w.add(rev_id, file_parents, file_lines, ie.text_sha1)
356
ie.text_version = rev_id
358
##mutter('import text {%s} of {%s}',
359
## ie.text_id, file_id)
361
##mutter('text of {%s} unchanged from parent', file_id)
362
ie.text_version = file_parents[0]
367
def _make_order(self):
368
"""Return a suitable order for importing revisions.
370
The order must be such that an revision is imported after all
371
its (present) parents.
373
todo = set(self.revisions.keys())
374
done = self.absent_revisions.copy()
377
# scan through looking for a revision whose parents
379
for rev_id in sorted(list(todo)):
380
rev = self.revisions[rev_id]
381
parent_ids = set(rev.parent_ids)
382
if parent_ids.issubset(done):
383
# can take this one now
390
def write_a_weave(weave, filename):
391
inv_wf = file(filename, 'wb')
393
write_weave(weave, inv_wf)
400
def profile_convert():
401
prof_f = tempfile.NamedTemporaryFile()
403
prof = hotshot.Profile(prof_f.name)
405
prof.runcall(Convert)
408
stats = hotshot.stats.load(prof_f.name)
410
stats.sort_stats('time')
411
# XXX: Might like to write to stderr or the trace file instead but
412
# print_stats seems hardcoded to stdout
413
stats.print_stats(100)
416
if __name__ == '__main__':
417
enable_default_logging()
419
if '-p' in sys.argv[1:]: