~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to tools/history2weaves.py

Committer: Martin Pool
Date: 2005-09-20 08:06:24 UTC
Revision ID: mbp@sourcefrog.net-20050920080624-25bfe88297f9eaa2

- doc

files added:
HACKING

Makefile

NEWS.developers

bzrlib/builtins.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/plugins/__init__.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/teststore.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/weavestore.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/emacs

contrib/emacs/bzr-mode.el

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches/cache_weave_inclusions.diff

patches/pending-merge.patch

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tutorial.txt

files removed:
plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

files renamed:
plugins/ => bzrlib/plugins/

tools/testweave.py => bzrlib/selftest/test_weave.py

effbot/ => bzrlib/util/effbot/

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

doc/index.txt

doc/todo-from-arch.txt

setup.py

testsweet.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

tools/history2weaves.py

#! /usr/bin/python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Experiment in converting existing bzr branches to weaves."""

# To make this properly useful

# 1. assign text version ids, and put those text versions into

# the inventory as they're converted.

# 2. keep track of the previous version of each file, rather than

# just using the last one imported

# 3. assign entry versions when files are added, renamed or moved.

# 4. when merged-in versions are observed, walk down through them

# to discover everything, then commit bottom-up

# 5. track ancestry as things are merged in, and commit that in each

# revision

# Perhaps it's best to first walk the whole graph and make a plan for

# what should be imported in what order? Need a kind of topological

# sort of all revisions. (Or do we, can we just before doing a revision

# see that all its parents have either been converted or abandoned?)

# Cannot import a revision until all its parents have been

# imported. in other words, we can only import revisions whose

# parents have all been imported. the first step must be to

# import a revision with no parents, of which there must be at

# least one. (So perhaps it's useful to store forward pointers

# from a list of parents to their children?)

# Another (equivalent?) approach is to build up the ordered

# ancestry list for the last revision, and walk through that. We

# are going to need that.

# We don't want to have to recurse all the way back down the list.

# Suppose we keep a queue of the revisions able to be processed at

# any point. This starts out with all the revisions having no

# parents.

# This seems like a generally useful algorithm...

# The current algorithm is dumb (O(n**2)?) but will do the job, and

# takes less than a second on the bzr.dev branch.

# This currently does a kind of lazy conversion of file texts, where a

# new text is written in every version. That's unnecessary but for

# the moment saves us having to worry about when files need new

# versions.

if True:

try:

import psyco

psyco.full()

except ImportError:

pass

import tempfile

import hotshot, hotshot.stats

import sys

import logging

import time

from bzrlib.branch import Branch, find_branch

from bzrlib.revfile import Revfile

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave

from bzrlib.progress import ProgressBar

from bzrlib.atomicfile import AtomicFile

from bzrlib.xml4 import serializer_v4

from bzrlib.xml5 import serializer_v5

from bzrlib.trace import mutter, note, warning, enable_default_logging

from bzrlib.osutils import sha_strings, sha_string

from bzrlib.commit import merge_ancestry_lines

class Convert(object):

def __init__(self):

100

self.converted_revs = set()

101

self.absent_revisions = set()

102

self.text_count = 0

103

self.revisions = {}

104

self.inventories = {}

105

self.convert()

106

107

108

109

110

def convert(self):

111

enable_default_logging()

112

self.pb = ProgressBar()

113

self.inv_weave = Weave('__inventory')

114

self.anc_weave = Weave('__ancestry')

115

self.ancestries = {}

116

# holds in-memory weaves for all files

117

self.text_weaves = {}

118

119

self.branch = Branch('.', relax_version_check=True)

120

121

revno = 1

122

rev_history = self.branch.revision_history()

123

last_idx = None

124

inv_parents = []

125

126

# to_read is a stack holding the revisions we still need to process;

127

# appending to it adds new highest-priority revisions

128

importorder = []

129

self.known_revisions = set(rev_history)

130

self.to_read = [rev_history[-1]]

131

while self.to_read:

132

rev_id = self.to_read.pop()

133

if (rev_id not in self.revisions

134

and rev_id not in self.absent_revisions):

135

self._load_one_rev(rev_id)

136

self.pb.clear()

137

to_import = self._make_order()

138

for i, rev_id in enumerate(to_import):

139

self.pb.update('converting revision', i, len(to_import))

140

self._convert_one_rev(rev_id)

141

self.pb.clear()

142

print 'upgraded to weaves:'

143

print ' %6d revisions and inventories' % len(self.revisions)

144

print ' %6d absent revisions removed' % len(self.absent_revisions)

145

print ' %6d texts' % self.text_count

146

self._write_all_weaves()

147

self._write_all_revs()

148

149

150

def _write_all_weaves(self):

151

write_a_weave(self.inv_weave, 'weaves/inventory.weave')

152

write_a_weave(self.anc_weave, 'weaves/ancestry.weave')

153

i = 0

154

try:

155

for file_id, file_weave in self.text_weaves.items():

156

self.pb.update('writing weave', i, len(self.text_weaves))

157

write_a_weave(file_weave, 'weaves/%s.weave' % file_id)

158

i += 1

159

finally:

160

self.pb.clear()

161

162

163

def _write_all_revs(self):

164

"""Write all revisions out in new form."""

165

try:

166

for i, rev_id in enumerate(self.converted_revs):

167

self.pb.update('write revision', i, len(self.converted_revs))

168

f = file('new-revisions/%s' % rev_id, 'wb')

169

try:

170

serializer_v5.write_revision(self.revisions[rev_id], f)

171

finally:

172

f.close()

173

finally:

174

self.pb.clear()

175

176

177

def _load_one_rev(self, rev_id):

178

"""Load a revision object into memory.

179

180

Any parents not either loaded or abandoned get queued to be

181

loaded."""

182

self.pb.update('loading revision',

183

len(self.revisions),

184

len(self.known_revisions))

185

if rev_id not in self.branch.revision_store:

186

self.pb.clear()

187

note('revision {%s} not present in branch; '

188

'will not be converted',

189

rev_id)

190

self.absent_revisions.add(rev_id)

191

else:

192

rev_xml = self.branch.revision_store[rev_id].read()

193

rev = serializer_v4.read_revision_from_string(rev_xml)

194

for parent_id in rev.parent_ids:

195

self.known_revisions.add(parent_id)

196

self.to_read.append(parent_id)

197

self.revisions[rev_id] = rev

198

old_inv_xml = self.branch.inventory_store[rev_id].read()

199

inv = serializer_v4.read_inventory_from_string(old_inv_xml)

200

assert rev.inventory_sha1 == sha_string(old_inv_xml)

201

self.inventories[rev_id] = inv

202

203

204

def _convert_one_rev(self, rev_id):

205

"""Convert revision and all referenced objects to new format."""

206

rev = self.revisions[rev_id]

207

inv = self.inventories[rev_id]

208

for parent_id in rev.parent_ids[:]:

209

if parent_id in self.absent_revisions:

210

rev.parent_ids.remove(parent_id)

211

self.pb.clear()

212

note('remove {%s} as parent of {%s}', parent_id, rev_id)

213

self._convert_revision_contents(rev, inv)

214

# the XML is now updated with text versions

215

new_inv_xml = serializer_v5.write_inventory_to_string(inv)

216

new_inv_sha1 = sha_string(new_inv_xml)

217

self.inv_weave.add(rev_id, rev.parent_ids,

218

new_inv_xml.splitlines(True),

219

new_inv_sha1)

220

# TODO: Upgrade revision XML and write that out

221

rev.inventory_sha1 = new_inv_sha1

222

self._make_rev_ancestry(rev)

223

self.converted_revs.add(rev_id)

224

225

226

def _make_rev_ancestry(self, rev):

227

rev_id = rev.revision_id

228

for parent_id in rev.parent_ids:

229

assert parent_id in self.converted_revs

230

parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]

231

new_lines = merge_ancestry_lines(rev_id, parent_ancestries)

232

self.ancestries[rev_id] = new_lines

233

self.anc_weave.add(rev_id, rev.parent_ids, new_lines)

234

235

236

def _convert_revision_contents(self, rev, inv):

237

"""Convert all the files within a revision.

238

239

Also upgrade the inventory to refer to the text revision ids."""

240

rev_id = rev.revision_id

241

mutter('converting texts of revision {%s}',

242

rev_id)

243

for file_id in inv:

244

ie = inv[file_id]

245

if ie.kind != 'file':

246

continue

247

self._convert_file_version(rev, ie)

248

# TODO: Check and convert name versions

249

250

251

def _convert_file_version(self, rev, ie):

252

"""Convert one version of one file.

253

254

The file needs to be added into the weave if it is a merge

255

of >=2 parents or if it's changed from its parent.

256

"""

257

file_id = ie.file_id

258

rev_id = rev.revision_id

259

w = self.text_weaves.get(file_id)

260

if w is None:

261

w = Weave(file_id)

262

self.text_weaves[file_id] = w

263

file_lines = self.branch.text_store[ie.text_id].readlines()

264

assert sha_strings(file_lines) == ie.text_sha1

265

assert sum(map(len, file_lines)) == ie.text_size

266

file_parents = []

267

text_changed = False

268

for parent_id in rev.parent_ids:

269

##if parent_id in self.absent_revisions:

270

## continue

271

assert parent_id in self.converted_revs, \

272

'parent {%s} not converted' % parent_id

273

parent_inv = self.inventories[parent_id]

274

if parent_inv.has_id(file_id):

275

parent_ie = parent_inv[file_id]

276

old_text_version = parent_ie.text_version

277

assert old_text_version in self.converted_revs

278

if old_text_version not in file_parents:

279

file_parents.append(old_text_version)

280

if parent_ie.text_sha1 != ie.text_sha1:

281

text_changed = True

282

if len(file_parents) != 1 or text_changed:

283

w.add(rev_id, file_parents, file_lines, ie.text_sha1)

284

ie.name_version = ie.text_version = rev_id

285

self.text_count += 1

286

##mutter('import text {%s} of {%s}',

287

## ie.text_id, file_id)

288

else:

289

##mutter('text of {%s} unchanged from parent', file_id)

290

ie.text_version = file_parents[0]

291

ie.name_version = file_parents[0]

292

del ie.text_id

293

294

295

296

def _make_order(self):

297

"""Return a suitable order for importing revisions.

298

299

The order must be such that an revision is imported after all

300

its (present) parents.

301

"""

302

todo = set(self.revisions.keys())

303

done = self.absent_revisions.copy()

304

o = []

305

while todo:

306

# scan through looking for a revision whose parents

307

# are all done

308

for rev_id in sorted(list(todo)):

309

rev = self.revisions[rev_id]

310

parent_ids = set(rev.parent_ids)

311

if parent_ids.issubset(done):

312

# can take this one now

313

o.append(rev_id)

314

todo.remove(rev_id)

315

done.add(rev_id)

316

return o

317

318

319

def write_a_weave(weave, filename):

320

inv_wf = file(filename, 'wb')

321

try:

322

write_weave(weave, inv_wf)

323

finally:

324

inv_wf.close()

325

326

327

328

329

def profile_convert():

330

prof_f = tempfile.NamedTemporaryFile()

331

332

prof = hotshot.Profile(prof_f.name)

333

334

prof.runcall(Convert)

335

prof.close()

336

337

stats = hotshot.stats.load(prof_f.name)

338

##stats.strip_dirs()

339

stats.sort_stats('time')

340

# XXX: Might like to write to stderr or the trace file instead but

341

# print_stats seems hardcoded to stdout

342

stats.print_stats(100)

343

344

345

enable_default_logging()

346

347

if '-p' in sys.argv[1:]:

348

profile_convert()

349

else:

350

Convert()

351

Older »