~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/upgrade.py

Committer: Martin Pool
Date: 2005-09-30 05:15:03 UTC
mto: (1185.14.2)
mto: This revision was merged to the branch mainline in revision 1396.
Revision ID: mbp@sourcefrog.net-20050930051503-9c049325215ddd1c

- fix up Branch.open_downlevel for Transport

files added:
HACKING

Makefile

NEWS.developers

bzr-man.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/clone.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/add-bzr-to-baz

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/revfile-annotation.txt

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/symlink-support.patch

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/remotebranch.py

bzrlib/tests.py

doc/faq.txt

doc/quickref.txt

test.sh

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

contrib/bash/bzr => contrib/bash/bzr.simple

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/mdiff.py

bzrlib/osutils.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/xml.py

contrib/zsh/_bzr

doc/formats.txt

doc/index.txt

doc/revfile.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py

testbzr

Show diffs side-by-side

added added

removed removed

bzrlib/upgrade.py

#! /usr/bin/python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Experiment in converting existing bzr branches to weaves."""

# To make this properly useful

# 1. assign text version ids, and put those text versions into

# the inventory as they're converted.

# 2. keep track of the previous version of each file, rather than

# just using the last one imported

# 3. assign entry versions when files are added, renamed or moved.

# 4. when merged-in versions are observed, walk down through them

# to discover everything, then commit bottom-up

# 5. track ancestry as things are merged in, and commit that in each

# revision

# Perhaps it's best to first walk the whole graph and make a plan for

# what should be imported in what order? Need a kind of topological

# sort of all revisions. (Or do we, can we just before doing a revision

# see that all its parents have either been converted or abandoned?)

# Cannot import a revision until all its parents have been

# imported. in other words, we can only import revisions whose

# parents have all been imported. the first step must be to

# import a revision with no parents, of which there must be at

# least one. (So perhaps it's useful to store forward pointers

# from a list of parents to their children?)

# Another (equivalent?) approach is to build up the ordered

# ancestry list for the last revision, and walk through that. We

# are going to need that.

# We don't want to have to recurse all the way back down the list.

# Suppose we keep a queue of the revisions able to be processed at

# any point. This starts out with all the revisions having no

# parents.

# This seems like a generally useful algorithm...

# The current algorithm is dumb (O(n**2)?) but will do the job, and

# takes less than a second on the bzr.dev branch.

# This currently does a kind of lazy conversion of file texts, where a

# new text is written in every version. That's unnecessary but for

# the moment saves us having to worry about when files need new

# versions.

if False:

try:

import psyco

psyco.full()

except ImportError:

pass

import os

import tempfile

import sys

import logging

import shutil

from bzrlib.branch import Branch, find_branch, BZR_BRANCH_FORMAT_5

from bzrlib.revfile import Revfile

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave

from bzrlib.progress import ProgressBar

from bzrlib.atomicfile import AtomicFile

from bzrlib.xml4 import serializer_v4

from bzrlib.xml5 import serializer_v5

from bzrlib.trace import mutter, note, warning, enable_default_logging

from bzrlib.osutils import sha_strings, sha_string

from bzrlib.commit import merge_ancestry_lines

class Convert(object):

def __init__(self, base_dir):

100

self.base = base_dir

101

self.converted_revs = set()

102

self.absent_revisions = set()

103

self.text_count = 0

104

self.revisions = {}

105

self.convert()

106

107

108

def convert(self):

109

if not self._open_branch():

110

return

111

note('starting upgrade of %s', os.path.abspath(self.base))

112

self._backup_control_dir()

113

note('starting upgrade')

114

note('note: upgrade may be faster if all store files are ungzipped first')

115

self.pb = ProgressBar()

116

if not os.path.isdir(self.base + '/.bzr/weaves'):

117

os.mkdir(self.base + '/.bzr/weaves')

118

self.inv_weave = Weave('inventory')

119

self.anc_weave = Weave('ancestry')

120

self.ancestries = {}

121

# holds in-memory weaves for all files

122

self.text_weaves = {}

123

os.remove(self.branch.controlfilename('branch-format'))

124

self._convert_working_inv()

125

rev_history = self.branch.revision_history()

126

# to_read is a stack holding the revisions we still need to process;

127

# appending to it adds new highest-priority revisions

128

self.known_revisions = set(rev_history)

129

self.to_read = [rev_history[-1]]

130

while self.to_read:

131

rev_id = self.to_read.pop()

132

if (rev_id not in self.revisions

133

and rev_id not in self.absent_revisions):

134

self._load_one_rev(rev_id)

135

self.pb.clear()

136

to_import = self._make_order()

137

for i, rev_id in enumerate(to_import):

138

self.pb.update('converting revision', i, len(to_import))

139

self._convert_one_rev(rev_id)

140

self.pb.clear()

141

note('upgraded to weaves:')

142

note(' %6d revisions and inventories' % len(self.revisions))

143

note(' %6d absent revisions removed' % len(self.absent_revisions))

144

note(' %6d texts' % self.text_count)

145

self._write_all_weaves()

146

self._write_all_revs()

147

self._set_new_format()

148

self._cleanup_spare_files()

149

150

151

def _open_branch(self):

152

self.branch = Branch.open_downlevel(self.base)

153

if self.branch._branch_format == 5:

154

note('this branch is already in the most current format')

155

return False

156

if self.branch._branch_format != 4:

157

raise BzrError("cannot upgrade from branch format %r" %

158

self.branch._branch_format)

159

return True

160

161

162

def _set_new_format(self):

163

f = self.branch.controlfile('branch-format', 'wb')

164

try:

165

f.write(BZR_BRANCH_FORMAT_5)

166

finally:

167

f.close()

168

169

170

def _cleanup_spare_files(self):

171

for n in 'merged-patches', 'pending-merged-patches':

172

p = self.branch.controlfilename(n)

173

if not os.path.exists(p):

174

continue

175

## assert os.path.getsize(p) == 0

176

os.remove(p)

177

shutil.rmtree(self.base + '/.bzr/inventory-store')

178

shutil.rmtree(self.base + '/.bzr/text-store')

179

180

181

def _backup_control_dir(self):

182

orig = self.base + '/.bzr'

183

backup = orig + '.backup'

184

note('making backup of tree history')

185

shutil.copytree(orig, backup)

186

note('%s has been backed up to %s', orig, backup)

187

note('if conversion fails, you can move this directory back to .bzr')

188

note('if it succeeds, you can remove this directory if you wish')

189

190

191

def _convert_working_inv(self):

192

branch = self.branch

193

inv = serializer_v4.read_inventory(branch.controlfile('inventory', 'rb'))

194

serializer_v5.write_inventory(inv, branch.controlfile('inventory', 'wb'))

195

196

197

198

def _write_all_weaves(self):

199

write_a_weave(self.inv_weave, self.base + '/.bzr/inventory.weave')

200

write_a_weave(self.anc_weave, self.base + '/.bzr/ancestry.weave')

201

i = 0

202

try:

203

for file_id, file_weave in self.text_weaves.items():

204

self.pb.update('writing weave', i, len(self.text_weaves))

205

write_a_weave(file_weave, self.base + '/.bzr/weaves/%s.weave' % file_id)

206

i += 1

207

finally:

208

self.pb.clear()

209

210

211

def _write_all_revs(self):

212

"""Write all revisions out in new form."""

213

shutil.rmtree(self.base + '/.bzr/revision-store')

214

os.mkdir(self.base + '/.bzr/revision-store')

215

try:

216

for i, rev_id in enumerate(self.converted_revs):

217

self.pb.update('write revision', i, len(self.converted_revs))

218

f = file(self.base + '/.bzr/revision-store/%s' % rev_id, 'wb')

219

try:

220

serializer_v5.write_revision(self.revisions[rev_id], f)

221

finally:

222

f.close()

223

finally:

224

self.pb.clear()

225

226

227

def _load_one_rev(self, rev_id):

228

"""Load a revision object into memory.

229

230

Any parents not either loaded or abandoned get queued to be

231

loaded."""

232

self.pb.update('loading revision',

233

len(self.revisions),

234

len(self.known_revisions))

235

if rev_id not in self.branch.revision_store:

236

self.pb.clear()

237

note('revision {%s} not present in branch; '

238

'will not be converted',

239

rev_id)

240

self.absent_revisions.add(rev_id)

241

else:

242

rev_xml = self.branch.revision_store[rev_id].read()

243

rev = serializer_v4.read_revision_from_string(rev_xml)

244

for parent_id in rev.parent_ids:

245

self.known_revisions.add(parent_id)

246

self.to_read.append(parent_id)

247

self.revisions[rev_id] = rev

248

249

250

def _load_old_inventory(self, rev_id):

251

assert rev_id not in self.converted_revs

252

old_inv_xml = self.branch.inventory_store[rev_id].read()

253

inv = serializer_v4.read_inventory_from_string(old_inv_xml)

254

rev = self.revisions[rev_id]

255

if rev.inventory_sha1:

256

assert rev.inventory_sha1 == sha_string(old_inv_xml), \

257

'inventory sha mismatch for {%s}' % rev_id

258

return inv

259

260

261

def _load_updated_inventory(self, rev_id):

262

assert rev_id in self.converted_revs

263

inv_xml = self.inv_weave.get_text(rev_id)

264

inv = serializer_v5.read_inventory_from_string(inv_xml)

265

return inv

266

267

268

def _convert_one_rev(self, rev_id):

269

"""Convert revision and all referenced objects to new format."""

270

rev = self.revisions[rev_id]

271

inv = self._load_old_inventory(rev_id)

272

for parent_id in rev.parent_ids[:]:

273

if parent_id in self.absent_revisions:

274

rev.parent_ids.remove(parent_id)

275

self.pb.clear()

276

note('remove {%s} as parent of {%s}', parent_id, rev_id)

277

self._convert_revision_contents(rev, inv)

278

self._store_new_weave(rev, inv)

279

self._make_rev_ancestry(rev)

280

self.converted_revs.add(rev_id)

281

282

283

def _store_new_weave(self, rev, inv):

284

# the XML is now updated with text versions

285

if __debug__:

286

for file_id in inv:

287

ie = inv[file_id]

288

if ie.kind == 'root_directory':

289

continue

290

assert hasattr(ie, 'name_version'), \

291

'no name_version on {%s} in {%s}' % \

292

(file_id, rev.revision_id)

293

if ie.kind == 'file':

294

assert hasattr(ie, 'text_version')

295

296

new_inv_xml = serializer_v5.write_inventory_to_string(inv)

297

new_inv_sha1 = sha_string(new_inv_xml)

298

self.inv_weave.add(rev.revision_id, rev.parent_ids,

299

new_inv_xml.splitlines(True),

300

new_inv_sha1)

301

rev.inventory_sha1 = new_inv_sha1

302

303

304

def _make_rev_ancestry(self, rev):

305

rev_id = rev.revision_id

306

for parent_id in rev.parent_ids:

307

assert parent_id in self.converted_revs

308

if rev.parent_ids:

309

lines = list(self.anc_weave.mash_iter(rev.parent_ids))

310

else:

311

lines = []

312

lines.append(rev_id + '\n')

313

if __debug__:

314

parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]

315

new_lines = merge_ancestry_lines(rev_id, parent_ancestries)

316

assert set(lines) == set(new_lines)

317

self.ancestries[rev_id] = new_lines

318

self.anc_weave.add(rev_id, rev.parent_ids, lines)

319

320

321

def _convert_revision_contents(self, rev, inv):

322

"""Convert all the files within a revision.

323

324

Also upgrade the inventory to refer to the text revision ids."""

325

rev_id = rev.revision_id

326

mutter('converting texts of revision {%s}',

327

rev_id)

328

parent_invs = map(self._load_updated_inventory, rev.parent_ids)

329

for file_id in inv:

330

ie = inv[file_id]

331

self._set_name_version(rev, ie, parent_invs)

332

if ie.kind != 'file':

333

continue

334

self._convert_file_version(rev, ie, parent_invs)

335

336

337

def _set_name_version(self, rev, ie, parent_invs):

338

"""Set name version for a file.

339

340

Done in a slightly lazy way: if the file is renamed or in a merge revision

341

it gets a new version, otherwise the same as before.

342

"""

343

file_id = ie.file_id

344

if ie.kind == 'root_directory':

345

return

346

if len(parent_invs) != 1:

347

ie.name_version = rev.revision_id

348

else:

349

old_inv = parent_invs[0]

350

if not old_inv.has_id(file_id):

351

ie.name_version = rev.revision_id

352

else:

353

old_ie = old_inv[file_id]

354

if (old_ie.parent_id != ie.parent_id

355

or old_ie.name != ie.name):

356

ie.name_version = rev.revision_id

357

else:

358

ie.name_version = old_ie.name_version

359

360

361

362

def _convert_file_version(self, rev, ie, parent_invs):

363

"""Convert one version of one file.

364

365

The file needs to be added into the weave if it is a merge

366

of >=2 parents or if it's changed from its parent.

367

"""

368

file_id = ie.file_id

369

rev_id = rev.revision_id

370

w = self.text_weaves.get(file_id)

371

if w is None:

372

w = Weave(file_id)

373

self.text_weaves[file_id] = w

374

file_parents = []

375

text_changed = False

376

for parent_inv in parent_invs:

377

if parent_inv.has_id(file_id):

378

parent_ie = parent_inv[file_id]

379

old_text_version = parent_ie.text_version

380

assert old_text_version in self.converted_revs

381

if old_text_version not in file_parents:

382

file_parents.append(old_text_version)

383

if parent_ie.text_sha1 != ie.text_sha1:

384

text_changed = True

385

if len(file_parents) != 1 or text_changed:

386

file_lines = self.branch.text_store[ie.text_id].readlines()

387

assert sha_strings(file_lines) == ie.text_sha1

388

assert sum(map(len, file_lines)) == ie.text_size

389

w.add(rev_id, file_parents, file_lines, ie.text_sha1)

390

ie.text_version = rev_id

391

self.text_count += 1

392

##mutter('import text {%s} of {%s}',

393

## ie.text_id, file_id)

394

else:

395

##mutter('text of {%s} unchanged from parent', file_id)

396

ie.text_version = file_parents[0]

397

del ie.text_id

398

399

400

401

def _make_order(self):

402

"""Return a suitable order for importing revisions.

403

404

The order must be such that an revision is imported after all

405

its (present) parents.

406

"""

407

todo = set(self.revisions.keys())

408

done = self.absent_revisions.copy()

409

o = []

410

while todo:

411

# scan through looking for a revision whose parents

412

# are all done

413

for rev_id in sorted(list(todo)):

414

rev = self.revisions[rev_id]

415

parent_ids = set(rev.parent_ids)

416

if parent_ids.issubset(done):

417

# can take this one now

418

o.append(rev_id)

419

todo.remove(rev_id)

420

done.add(rev_id)

421

return o

422

423

424

def write_a_weave(weave, filename):

425

inv_wf = file(filename, 'wb')

426

try:

427

write_weave(weave, inv_wf)

428

finally:

429

inv_wf.close()

430

431

432

def upgrade(base_dir):

433

Convert(base_dir)

Older »