~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to tools/history2weaves.py

Committer: Martin Pool
Date: 2005-09-22 06:28:55 UTC
Revision ID: mbp@sourcefrog.net-20050922062855-a29aa53982b752d6

- try to avoid checking texts repeatedly

files added:
HACKING

Makefile

NEWS.developers

bzr-man.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/selftest

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/weavestore.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/statcache.py

bzrlib/tests.py

files renamed:
elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

contrib/bash/bzr => contrib/bash/bzr.simple

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

doc/formats.txt

doc/index.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py

testbzr

Show diffs side-by-side

added added

removed removed

tools/history2weaves.py

#! /usr/bin/python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Experiment in converting existing bzr branches to weaves."""

# To make this properly useful

# 1. assign text version ids, and put those text versions into

# the inventory as they're converted.

# 2. keep track of the previous version of each file, rather than

# just using the last one imported

# 3. assign entry versions when files are added, renamed or moved.

# 4. when merged-in versions are observed, walk down through them

# to discover everything, then commit bottom-up

# 5. track ancestry as things are merged in, and commit that in each

# revision

# Perhaps it's best to first walk the whole graph and make a plan for

# what should be imported in what order? Need a kind of topological

# sort of all revisions. (Or do we, can we just before doing a revision

# see that all its parents have either been converted or abandoned?)

# Cannot import a revision until all its parents have been

# imported. in other words, we can only import revisions whose

# parents have all been imported. the first step must be to

# import a revision with no parents, of which there must be at

# least one. (So perhaps it's useful to store forward pointers

# from a list of parents to their children?)

# Another (equivalent?) approach is to build up the ordered

# ancestry list for the last revision, and walk through that. We

# are going to need that.

# We don't want to have to recurse all the way back down the list.

# Suppose we keep a queue of the revisions able to be processed at

# any point. This starts out with all the revisions having no

# parents.

# This seems like a generally useful algorithm...

# The current algorithm is dumb (O(n**2)?) but will do the job, and

# takes less than a second on the bzr.dev branch.

# This currently does a kind of lazy conversion of file texts, where a

# new text is written in every version. That's unnecessary but for

# the moment saves us having to worry about when files need new

# versions.

# TODO: Check that the working directory is clean before converting

if False:

try:

import psyco

psyco.full()

except ImportError:

pass

import os

import tempfile

import hotshot, hotshot.stats

import sys

import logging

import shutil

from bzrlib.branch import Branch, find_branch, BZR_BRANCH_FORMAT_5

from bzrlib.revfile import Revfile

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave

from bzrlib.progress import ProgressBar

from bzrlib.atomicfile import AtomicFile

from bzrlib.xml4 import serializer_v4

from bzrlib.xml5 import serializer_v5

from bzrlib.trace import mutter, note, warning, enable_default_logging

from bzrlib.osutils import sha_strings, sha_string

from bzrlib.commit import merge_ancestry_lines

100

101

class Convert(object):

102

def __init__(self):

103

self.converted_revs = set()

104

self.absent_revisions = set()

105

self.text_count = 0

106

self.revisions = {}

107

self.inventories = {}

108

self.convert()

109

110

111

112

def convert(self):

113

if not os.path.exists('.bzr/allow-upgrade'):

114

raise Exception, "please create .bzr/allow-upgrade to indicate consent"

115

self._backup_control_dir()

116

note('starting upgrade')

117

note('note: upgrade will be faster if all store files are ungzipped first')

118

self.pb = ProgressBar()

119

if not os.path.isdir('.bzr/weaves'):

120

os.mkdir('.bzr/weaves')

121

self.inv_weave = Weave('__inventory')

122

self.anc_weave = Weave('__ancestry')

123

self.ancestries = {}

124

# holds in-memory weaves for all files

125

self.text_weaves = {}

126

self.branch = Branch('.', relax_version_check=True)

127

os.remove(self.branch.controlfilename('branch-format'))

128

self._convert_working_inv()

129

rev_history = self.branch.revision_history()

130

# to_read is a stack holding the revisions we still need to process;

131

# appending to it adds new highest-priority revisions

132

self.known_revisions = set(rev_history)

133

self.to_read = [rev_history[-1]]

134

while self.to_read:

135

rev_id = self.to_read.pop()

136

if (rev_id not in self.revisions

137

and rev_id not in self.absent_revisions):

138

self._load_one_rev(rev_id)

139

self.pb.clear()

140

to_import = self._make_order()

141

for i, rev_id in enumerate(to_import):

142

self.pb.update('converting revision', i, len(to_import))

143

self._convert_one_rev(rev_id)

144

self.pb.clear()

145

note('upgraded to weaves:')

146

note(' %6d revisions and inventories' % len(self.revisions))

147

note(' %6d absent revisions removed' % len(self.absent_revisions))

148

note(' %6d texts' % self.text_count)

149

self._write_all_weaves()

150

self._write_all_revs()

151

self._set_new_format()

152

self._cleanup_spare_files()

153

154

155

def _set_new_format(self):

156

f = self.branch.controlfile('branch-format', 'wb')

157

try:

158

f.write(BZR_BRANCH_FORMAT_5)

159

finally:

160

f.close()

161

162

163

164

def _cleanup_spare_files(self):

165

for n in 'merged-patches', 'pending-merged-patches':

166

p = self.branch.controlfilename(n)

167

if not os.path.exists(p):

168

continue

169

assert os.path.getsize(p) == 0

170

os.remove(p)

171

os.remove('.bzr/allow-upgrade')

172

shutil.rmtree('.bzr/inventory-store')

173

shutil.rmtree('.bzr/text-store')

174

175

176

def _backup_control_dir(self):

177

shutil.copytree('.bzr', '.bzr.backup')

178

note('.bzr has been backed up to .bzr.backup')

179

note('if conversion fails, you can move this directory back to .bzr')

180

note('if it succeeds, you can remove this directory if you wish')

181

182

183

def _convert_working_inv(self):

184

branch = self.branch

185

inv = serializer_v4.read_inventory(branch.controlfile('inventory', 'rb'))

186

serializer_v5.write_inventory(inv, branch.controlfile('inventory', 'wb'))

187

188

189

190

def _write_all_weaves(self):

191

write_a_weave(self.inv_weave, '.bzr/inventory.weave')

192

write_a_weave(self.anc_weave, '.bzr/ancestry.weave')

193

i = 0

194

try:

195

for file_id, file_weave in self.text_weaves.items():

196

self.pb.update('writing weave', i, len(self.text_weaves))

197

write_a_weave(file_weave, '.bzr/weaves/%s.weave' % file_id)

198

i += 1

199

finally:

200

self.pb.clear()

201

202

203

def _write_all_revs(self):

204

"""Write all revisions out in new form."""

205

shutil.rmtree('.bzr/revision-store')

206

os.mkdir('.bzr/revision-store')

207

try:

208

for i, rev_id in enumerate(self.converted_revs):

209

self.pb.update('write revision', i, len(self.converted_revs))

210

f = file('.bzr/revision-store/%s' % rev_id, 'wb')

211

try:

212

serializer_v5.write_revision(self.revisions[rev_id], f)

213

finally:

214

f.close()

215

finally:

216

self.pb.clear()

217

218

219

def _load_one_rev(self, rev_id):

220

"""Load a revision object into memory.

221

222

Any parents not either loaded or abandoned get queued to be

223

loaded."""

224

self.pb.update('loading revision',

225

len(self.revisions),

226

len(self.known_revisions))

227

if rev_id not in self.branch.revision_store:

228

self.pb.clear()

229

note('revision {%s} not present in branch; '

230

'will not be converted',

231

rev_id)

232

self.absent_revisions.add(rev_id)

233

else:

234

rev_xml = self.branch.revision_store[rev_id].read()

235

rev = serializer_v4.read_revision_from_string(rev_xml)

236

for parent_id in rev.parent_ids:

237

self.known_revisions.add(parent_id)

238

self.to_read.append(parent_id)

239

self.revisions[rev_id] = rev

240

old_inv_xml = self.branch.inventory_store[rev_id].read()

241

inv = serializer_v4.read_inventory_from_string(old_inv_xml)

242

assert rev.inventory_sha1 == sha_string(old_inv_xml)

243

self.inventories[rev_id] = inv

244

245

246

def _convert_one_rev(self, rev_id):

247

"""Convert revision and all referenced objects to new format."""

248

rev = self.revisions[rev_id]

249

inv = self.inventories[rev_id]

250

for parent_id in rev.parent_ids[:]:

251

if parent_id in self.absent_revisions:

252

rev.parent_ids.remove(parent_id)

253

self.pb.clear()

254

note('remove {%s} as parent of {%s}', parent_id, rev_id)

255

self._convert_revision_contents(rev, inv)

256

# the XML is now updated with text versions

257

new_inv_xml = serializer_v5.write_inventory_to_string(inv)

258

new_inv_sha1 = sha_string(new_inv_xml)

259

self.inv_weave.add(rev_id, rev.parent_ids,

260

new_inv_xml.splitlines(True),

261

new_inv_sha1)

262

# TODO: Upgrade revision XML and write that out

263

rev.inventory_sha1 = new_inv_sha1

264

self._make_rev_ancestry(rev)

265

self.converted_revs.add(rev_id)

266

267

268

def _make_rev_ancestry(self, rev):

269

rev_id = rev.revision_id

270

for parent_id in rev.parent_ids:

271

assert parent_id in self.converted_revs

272

if rev.parent_ids:

273

lines = list(self.anc_weave.mash_iter(rev.parent_ids))

274

else:

275

lines = []

276

lines.append(rev_id + '\n')

277

if __debug__:

278

parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]

279

new_lines = merge_ancestry_lines(rev_id, parent_ancestries)

280

assert set(lines) == set(new_lines)

281

self.ancestries[rev_id] = new_lines

282

self.anc_weave.add(rev_id, rev.parent_ids, lines)

283

284

285

def _convert_revision_contents(self, rev, inv):

286

"""Convert all the files within a revision.

287

288

Also upgrade the inventory to refer to the text revision ids."""

289

rev_id = rev.revision_id

290

mutter('converting texts of revision {%s}',

291

rev_id)

292

for file_id in inv:

293

ie = inv[file_id]

294

self._set_name_version(rev, ie)

295

if ie.kind != 'file':

296

continue

297

self._convert_file_version(rev, ie)

298

299

300

def _set_name_version(self, rev, ie):

301

"""Set name version for a file.

302

303

Done in a slightly lazy way: if the file is renamed or in a merge revision

304

it gets a new version, otherwise the same as before.

305

"""

306

file_id = ie.file_id

307

if len(rev.parent_ids) != 1:

308

ie.name_version = rev.revision_id

309

else:

310

old_inv = self.inventories[rev.parent_ids[0]]

311

if not old_inv.has_id(file_id):

312

ie.name_version = rev.revision_id

313

else:

314

old_ie = old_inv[file_id]

315

if (old_ie.parent_id != ie.parent_id

316

or old_ie.name != ie.name):

317

ie.name_version = rev.revision_id

318

else:

319

ie.name_version = old_ie.name_version

320

321

322

323

def _convert_file_version(self, rev, ie):

324

"""Convert one version of one file.

325

326

The file needs to be added into the weave if it is a merge

327

of >=2 parents or if it's changed from its parent.

328

"""

329

file_id = ie.file_id

330

rev_id = rev.revision_id

331

w = self.text_weaves.get(file_id)

332

if w is None:

333

w = Weave(file_id)

334

self.text_weaves[file_id] = w

335

file_lines = self.branch.text_store[ie.text_id].readlines()

336

assert sha_strings(file_lines) == ie.text_sha1

337

assert sum(map(len, file_lines)) == ie.text_size

338

file_parents = []

339

text_changed = False

340

for parent_id in rev.parent_ids:

341

##if parent_id in self.absent_revisions:

342

## continue

343

assert parent_id in self.converted_revs, \

344

'parent {%s} not converted' % parent_id

345

parent_inv = self.inventories[parent_id]

346

if parent_inv.has_id(file_id):

347

parent_ie = parent_inv[file_id]

348

old_text_version = parent_ie.text_version

349

assert old_text_version in self.converted_revs

350

if old_text_version not in file_parents:

351

file_parents.append(old_text_version)

352

if parent_ie.text_sha1 != ie.text_sha1:

353

text_changed = True

354

if len(file_parents) != 1 or text_changed:

355

w.add(rev_id, file_parents, file_lines, ie.text_sha1)

356

ie.text_version = rev_id

357

self.text_count += 1

358

##mutter('import text {%s} of {%s}',

359

## ie.text_id, file_id)

360

else:

361

##mutter('text of {%s} unchanged from parent', file_id)

362

ie.text_version = file_parents[0]

363

del ie.text_id

364

365

366

367

def _make_order(self):

368

"""Return a suitable order for importing revisions.

369

370

The order must be such that an revision is imported after all

371

its (present) parents.

372

"""

373

todo = set(self.revisions.keys())

374

done = self.absent_revisions.copy()

375

o = []

376

while todo:

377

# scan through looking for a revision whose parents

378

# are all done

379

for rev_id in sorted(list(todo)):

380

rev = self.revisions[rev_id]

381

parent_ids = set(rev.parent_ids)

382

if parent_ids.issubset(done):

383

# can take this one now

384

o.append(rev_id)

385

todo.remove(rev_id)

386

done.add(rev_id)

387

return o

388

389

390

def write_a_weave(weave, filename):

391

inv_wf = file(filename, 'wb')

392

try:

393

write_weave(weave, inv_wf)

394

finally:

395

inv_wf.close()

396

397

398

399

400

def profile_convert():

401

prof_f = tempfile.NamedTemporaryFile()

402

403

prof = hotshot.Profile(prof_f.name)

404

405

prof.runcall(Convert)

406

prof.close()

407

408

stats = hotshot.stats.load(prof_f.name)

409

##stats.strip_dirs()

410

stats.sort_stats('time')

411

# XXX: Might like to write to stderr or the trace file instead but

412

# print_stats seems hardcoded to stdout

413

stats.print_stats(100)

414

415

416

if __name__ == '__main__':

417

enable_default_logging()

418

419

if '-p' in sys.argv[1:]:

420

profile_convert()

421

else:

422

Convert()

Older »