~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/upgrade.py

Committer: Robert Collins
Date: 2006-01-05 00:16:01 UTC
mfrom: (1185.69.2 bzr-storage)
mto: (1185.65.25 storage) (1534.1.15 integration)
mto: This revision was merged to the branch mainline in revision 1550.
Revision ID: robertc@robertcollins.net-20060105001601-41706f37af6ae182

Merge from jam-storage.

files added:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzr_man.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/revfile-annotation.txt

doc/revfile.txt

doc/split-join-files.txt

doc/switch-in-branch.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

tools

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/mdiff.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/tests.py

doc/faq.txt

doc/quickref.txt

test.sh

testbzr

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/xml.py

doc/Makefile

doc/formats.txt

doc/index.txt

doc/merge.txt

doc/random.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/upgrade.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Experiment in converting existing bzr branches to weaves."""

# To make this properly useful

# 1. assign text version ids, and put those text versions into

# the inventory as they're converted.

# 2. keep track of the previous version of each file, rather than

# just using the last one imported

# 3. assign entry versions when files are added, renamed or moved.

# 4. when merged-in versions are observed, walk down through them

# to discover everything, then commit bottom-up

# 5. track ancestry as things are merged in, and commit that in each

# revision

# Perhaps it's best to first walk the whole graph and make a plan for

# what should be imported in what order? Need a kind of topological

# sort of all revisions. (Or do we, can we just before doing a revision

# see that all its parents have either been converted or abandoned?)

# Cannot import a revision until all its parents have been

# imported. in other words, we can only import revisions whose

# parents have all been imported. the first step must be to

# import a revision with no parents, of which there must be at

# least one. (So perhaps it's useful to store forward pointers

# from a list of parents to their children?)

# Another (equivalent?) approach is to build up the ordered

# ancestry list for the last revision, and walk through that. We

# are going to need that.

# We don't want to have to recurse all the way back down the list.

# Suppose we keep a queue of the revisions able to be processed at

# any point. This starts out with all the revisions having no

# parents.

# This seems like a generally useful algorithm...

# The current algorithm is dumb (O(n**2)?) but will do the job, and

# takes less than a second on the bzr.dev branch.

# This currently does a kind of lazy conversion of file texts, where a

# new text is written in every version. That's unnecessary but for

# the moment saves us having to worry about when files need new

# versions.

import os

import tempfile

import sys

import shutil

from bzrlib.branch import Branch, find_branch

from bzrlib.branch import BZR_BRANCH_FORMAT_5, BZR_BRANCH_FORMAT_6

import bzrlib.hashcache as hashcache

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave

from bzrlib.ui import ui_factory

from bzrlib.atomicfile import AtomicFile

from bzrlib.xml4 import serializer_v4

from bzrlib.xml5 import serializer_v5

from bzrlib.trace import mutter, note, warning

from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath

class Convert(object):

def __init__(self, base_dir):

self.base = base_dir

self.converted_revs = set()

self.absent_revisions = set()

self.text_count = 0

self.revisions = {}

self.convert()

def convert(self):

if not self._open_branch():

return

100

note('starting upgrade of %s', os.path.abspath(self.base))

101

self._backup_control_dir()

102

self.pb = ui_factory.progress_bar()

103

if self.old_format == 4:

104

note('starting upgrade from format 4 to 5')

105

self.branch.lock_write()

106

try:

107

self._convert_to_weaves()

108

finally:

109

self.branch.unlock()

110

self._open_branch()

111

if self.old_format == 5:

112

note('starting upgrade from format 5 to 6')

113

self.branch.lock_write()

114

try:

115

self._convert_to_prefixed()

116

finally:

117

self.branch.unlock()

118

self._open_branch()

119

cache = hashcache.HashCache(abspath(self.base))

120

cache.clear()

121

cache.write()

122

note("finished")

123

124

125

def _convert_to_prefixed(self):

126

from bzrlib.store import hash_prefix

127

for store_name in ["weaves", "revision-store"]:

128

note("adding prefixes to %s" % store_name)

129

store_dir = pathjoin(self.base, ".bzr", store_name)

130

for filename in os.listdir(store_dir):

131

if filename.endswith(".weave") or filename.endswith(".gz"):

132

file_id = os.path.splitext(filename)[0]

133

else:

134

file_id = filename

135

prefix_dir = pathjoin(store_dir, hash_prefix(file_id))

136

if not os.path.isdir(prefix_dir):

137

os.mkdir(prefix_dir)

138

os.rename(pathjoin(store_dir, filename),

139

pathjoin(prefix_dir, filename))

140

self._set_new_format(BZR_BRANCH_FORMAT_6)

141

142

143

def _convert_to_weaves(self):

144

note('note: upgrade may be faster if all store files are ungzipped first')

145

if not os.path.isdir(self.base + '/.bzr/weaves'):

146

os.mkdir(self.base + '/.bzr/weaves')

147

self.inv_weave = Weave('inventory')

148

# holds in-memory weaves for all files

149

self.text_weaves = {}

150

os.remove(self.branch.control_files.controlfilename('branch-format'))

151

self._convert_working_inv()

152

rev_history = self.branch.revision_history()

153

# to_read is a stack holding the revisions we still need to process;

154

# appending to it adds new highest-priority revisions

155

self.known_revisions = set(rev_history)

156

self.to_read = rev_history[-1:]

157

while self.to_read:

158

rev_id = self.to_read.pop()

159

if (rev_id not in self.revisions

160

and rev_id not in self.absent_revisions):

161

self._load_one_rev(rev_id)

162

self.pb.clear()

163

to_import = self._make_order()

164

for i, rev_id in enumerate(to_import):

165

self.pb.update('converting revision', i, len(to_import))

166

self._convert_one_rev(rev_id)

167

self.pb.clear()

168

note('upgraded to weaves:')

169

note(' %6d revisions and inventories' % len(self.revisions))

170

note(' %6d revisions not present' % len(self.absent_revisions))

171

note(' %6d texts' % self.text_count)

172

self._write_all_weaves()

173

self._write_all_revs()

174

self._cleanup_spare_files()

175

self._set_new_format(BZR_BRANCH_FORMAT_5)

176

177

178

def _open_branch(self):

179

self.branch = Branch.open_downlevel(self.base)

180

self.old_format = self.branch._branch_format

181

if self.old_format == 6:

182

note('this branch is in the most current format')

183

return False

184

if self.old_format not in (4, 5):

185

raise BzrError("cannot upgrade from branch format %r" %

186

self.branch._branch_format)

187

return True

188

189

def _set_new_format(self, format):

190

self.branch.control_files.put_utf8('branch-format', format)

191

192

def _cleanup_spare_files(self):

193

for n in 'merged-patches', 'pending-merged-patches':

194

p = self.branch.control_files.controlfilename(n)

195

if not os.path.exists(p):

196

continue

197

## assert os.path.getsize(p) == 0

198

os.remove(p)

199

shutil.rmtree(self.base + '/.bzr/inventory-store')

200

shutil.rmtree(self.base + '/.bzr/text-store')

201

202

def _backup_control_dir(self):

203

orig = self.base + '/.bzr'

204

backup = orig + '.backup'

205

note('making backup of tree history')

206

shutil.copytree(orig, backup)

207

note('%s has been backed up to %s', orig, backup)

208

note('if conversion fails, you can move this directory back to .bzr')

209

note('if it succeeds, you can remove this directory if you wish')

210

211

def _convert_working_inv(self):

212

branch = self.branch

213

inv = serializer_v4.read_inventory(branch.control_files.controlfile('inventory', 'rb'))

214

new_inv_xml = serializer_v5.write_inventory_to_string(inv)

215

branch.control_files.put_utf8('inventory', new_inv_xml)

216

217

def _write_all_weaves(self):

218

write_a_weave(self.inv_weave, self.base + '/.bzr/inventory.weave')

219

i = 0

220

try:

221

for file_id, file_weave in self.text_weaves.items():

222

self.pb.update('writing weave', i, len(self.text_weaves))

223

write_a_weave(file_weave, self.base + '/.bzr/weaves/%s.weave' % file_id)

224

i += 1

225

finally:

226

self.pb.clear()

227

228

229

def _write_all_revs(self):

230

"""Write all revisions out in new form."""

231

shutil.rmtree(self.base + '/.bzr/revision-store')

232

os.mkdir(self.base + '/.bzr/revision-store')

233

try:

234

for i, rev_id in enumerate(self.converted_revs):

235

self.pb.update('write revision', i, len(self.converted_revs))

236

f = file(self.base + '/.bzr/revision-store/%s' % rev_id, 'wb')

237

try:

238

serializer_v5.write_revision(self.revisions[rev_id], f)

239

finally:

240

f.close()

241

finally:

242

self.pb.clear()

243

244

245

def _load_one_rev(self, rev_id):

246

"""Load a revision object into memory.

247

248

Any parents not either loaded or abandoned get queued to be

249

loaded."""

250

self.pb.update('loading revision',

251

len(self.revisions),

252

len(self.known_revisions))

253

if not self.branch.repository.revision_store.has_id(rev_id):

254

self.pb.clear()

255

note('revision {%s} not present in branch; '

256

'will be converted as a ghost',

257

rev_id)

258

self.absent_revisions.add(rev_id)

259

else:

260

rev_xml = self.branch.repository.revision_store.get(rev_id).read()

261

rev = serializer_v4.read_revision_from_string(rev_xml)

262

for parent_id in rev.parent_ids:

263

self.known_revisions.add(parent_id)

264

self.to_read.append(parent_id)

265

self.revisions[rev_id] = rev

266

267

268

def _load_old_inventory(self, rev_id):

269

assert rev_id not in self.converted_revs

270

old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()

271

inv = serializer_v4.read_inventory_from_string(old_inv_xml)

272

rev = self.revisions[rev_id]

273

if rev.inventory_sha1:

274

assert rev.inventory_sha1 == sha_string(old_inv_xml), \

275

'inventory sha mismatch for {%s}' % rev_id

276

return inv

277

278

279

def _load_updated_inventory(self, rev_id):

280

assert rev_id in self.converted_revs

281

inv_xml = self.inv_weave.get_text(rev_id)

282

inv = serializer_v5.read_inventory_from_string(inv_xml)

283

return inv

284

285

286

def _convert_one_rev(self, rev_id):

287

"""Convert revision and all referenced objects to new format."""

288

rev = self.revisions[rev_id]

289

inv = self._load_old_inventory(rev_id)

290

present_parents = [p for p in rev.parent_ids

291

if p not in self.absent_revisions]

292

self._convert_revision_contents(rev, inv, present_parents)

293

self._store_new_weave(rev, inv, present_parents)

294

self.converted_revs.add(rev_id)

295

296

297

def _store_new_weave(self, rev, inv, present_parents):

298

# the XML is now updated with text versions

299

if __debug__:

300

for file_id in inv:

301

ie = inv[file_id]

302

if ie.kind == 'root_directory':

303

continue

304

assert hasattr(ie, 'revision'), \

305

'no revision on {%s} in {%s}' % \

306

(file_id, rev.revision_id)

307

new_inv_xml = serializer_v5.write_inventory_to_string(inv)

308

new_inv_sha1 = sha_string(new_inv_xml)

309

self.inv_weave.add(rev.revision_id,

310

present_parents,

311

new_inv_xml.splitlines(True),

312

new_inv_sha1)

313

rev.inventory_sha1 = new_inv_sha1

314

315

def _convert_revision_contents(self, rev, inv, present_parents):

316

"""Convert all the files within a revision.

317

318

Also upgrade the inventory to refer to the text revision ids."""

319

rev_id = rev.revision_id

320

mutter('converting texts of revision {%s}',

321

rev_id)

322

parent_invs = map(self._load_updated_inventory, present_parents)

323

for file_id in inv:

324

ie = inv[file_id]

325

self._convert_file_version(rev, ie, parent_invs)

326

327

def _convert_file_version(self, rev, ie, parent_invs):

328

"""Convert one version of one file.

329

330

The file needs to be added into the weave if it is a merge

331

of >=2 parents or if it's changed from its parent.

332

"""

333

if ie.kind == 'root_directory':

334

return

335

file_id = ie.file_id

336

rev_id = rev.revision_id

337

w = self.text_weaves.get(file_id)

338

if w is None:

339

w = Weave(file_id)

340

self.text_weaves[file_id] = w

341

text_changed = False

342

previous_entries = ie.find_previous_heads(parent_invs, w)

343

for old_revision in previous_entries:

344

# if this fails, its a ghost ?

345

assert old_revision in self.converted_revs

346

self.snapshot_ie(previous_entries, ie, w, rev_id)

347

del ie.text_id

348

assert getattr(ie, 'revision', None) is not None

349

350

def snapshot_ie(self, previous_revisions, ie, w, rev_id):

351

# TODO: convert this logic, which is ~= snapshot to

352

# a call to:. This needs the path figured out. rather than a work_tree

353

# a v4 revision_tree can be given, or something that looks enough like

354

# one to give the file content to the entry if it needs it.

355

# and we need something that looks like a weave store for snapshot to

356

# save against.

357

#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))

358

if len(previous_revisions) == 1:

359

previous_ie = previous_revisions.values()[0]

360

if ie._unchanged(previous_ie):

361

ie.revision = previous_ie.revision

362

return

363

parent_indexes = map(w.lookup, previous_revisions)

364

if ie.has_text():

365

text = self.branch.repository.text_store.get(ie.text_id)

366

file_lines = text.readlines()

367

assert sha_strings(file_lines) == ie.text_sha1

368

assert sum(map(len, file_lines)) == ie.text_size

369

w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)

370

self.text_count += 1

371

else:

372

w.add(rev_id, parent_indexes, [], None)

373

ie.revision = rev_id

374

##mutter('import text {%s} of {%s}',

375

## ie.text_id, file_id)

376

377

def _make_order(self):

378

"""Return a suitable order for importing revisions.

379

380

The order must be such that an revision is imported after all

381

its (present) parents.

382

"""

383

todo = set(self.revisions.keys())

384

done = self.absent_revisions.copy()

385

o = []

386

while todo:

387

# scan through looking for a revision whose parents

388

# are all done

389

for rev_id in sorted(list(todo)):

390

rev = self.revisions[rev_id]

391

parent_ids = set(rev.parent_ids)

392

if parent_ids.issubset(done):

393

# can take this one now

394

o.append(rev_id)

395

todo.remove(rev_id)

396

done.add(rev_id)

397

return o

398

399

400

def write_a_weave(weave, filename):

401

inv_wf = file(filename, 'wb')

402

try:

403

write_weave(weave, inv_wf)

404

finally:

405

inv_wf.close()

406

407

408

def upgrade(base_dir):

409

Convert(base_dir)

Older »