~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to tools/history2weaves.py

Committer: Martin Pool
Date: 2005-07-01 02:36:27 UTC
mto: This revision was merged to the branch mainline in revision 852.
Revision ID: mbp@sourcefrog.net-20050701023627-d8422b67a4c1d6d1

Show profile when converting inventory too.

files added:
.bzrignore

testsweet.py

files removed:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzr-man.py

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/weavestore.py

bzrlib/workingtree.py

bzrlib/xml.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

setup.py

testbzr

testsweet.py

tools

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
tools/convertfile.py => convertfile.py

tools/convertinv.py => convertinv.py

bzrlib/selftest/test_weave.py => testweave.py

bzrlib/weave.py => weave.py

bzrlib/weavefile.py => weavefile.py

Show diffs side-by-side

added added

removed removed

tools/history2weaves.py

#! /usr/bin/python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Experiment in converting existing bzr branches to weaves."""

# To make this properly useful

# 1. assign text version ids, and put those text versions into

# the inventory as they're converted.

# 2. keep track of the previous version of each file, rather than

# just using the last one imported

# 3. assign entry versions when files are added, renamed or moved.

# 4. when merged-in versions are observed, walk down through them

# to discover everything, then commit bottom-up

# 5. track ancestry as things are merged in, and commit that in each

# revision

# Perhaps it's best to first walk the whole graph and make a plan for

# what should be imported in what order? Need a kind of topological

# sort of all revisions. (Or do we, can we just before doing a revision

# see that all its parents have either been converted or abandoned?)

if False:

try:

import psyco

psyco.full()

except ImportError:

pass

import tempfile

import hotshot, hotshot.stats

import sys

import logging

import time

from bzrlib.branch import Branch, find_branch

from bzrlib.revfile import Revfile

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave

from bzrlib.progress import ProgressBar

from bzrlib.atomicfile import AtomicFile

from bzrlib.xml4 import serializer_v4

from bzrlib.xml5 import serializer_v5

import bzrlib.trace

class Convert(object):

def __init__(self):

self.total_revs = 0

self.converted_revs = 0

self.text_count = 0

self.convert()

def convert(self):

bzrlib.trace.enable_default_logging()

self.pb = ProgressBar()

self.inv_weave = Weave('__inventory')

self.anc_weave = Weave('__ancestry')

last_text_sha = {}

# holds in-memory weaves for all files

text_weaves = {}

b = self.branch = Branch('.', relax_version_check=True)

revno = 1

rev_history = b.revision_history()

last_idx = None

inv_parents = []

# todo is a stack holding the revisions we still need to process;

# appending to it adds new highest-priority revisions

todo = rev_history[:]

todo.reverse()

self.total_revs = len(todo)

100

101

while todo:

102

self._convert_one_rev(todo.pop())

103

104

self.pb.clear()

105

print 'upgraded to weaves:'

106

print ' %6d revisions and inventories' % self.converted_revs

107

print ' %6d texts' % self.text_count

108

109

self._write_all_weaves()

110

111

112

def _write_all_weaves(self):

113

i = 0

114

return ############################################

115

# TODO: commit them all atomically at the end, not one by one

116

write_atomic_weave(self.inv_weave, 'weaves/inventory.weave')

117

write_atomic_weave(self.anc_weave, 'weaves/ancestry.weave')

118

for file_id, file_weave in text_weaves.items():

119

self.pb.update('writing weave', i, len(text_weaves))

120

write_atomic_weave(file_weave, 'weaves/%s.weave' % file_id)

121

i += 1

122

123

self.pb.clear()

124

125

126

def _convert_one_rev(self, rev_id):

127

self._bump_progress()

128

b = self.branch

129

rev_xml = b.revision_store[rev_id].read()

130

inv_xml = b.inventory_store[rev_id].read()

131

132

rev = serializer_v4.read_revision_from_string(rev_xml)

133

inv = serializer_v4.read_inventory_from_string(inv_xml)

134

135

return ##########################################

136

137

new_idx = self.inv_weave.add(rev_id, inv_parents, inv_xml)

138

inv_parents = [new_idx]

139

140

tree = b.revision_tree(rev_id)

141

inv = tree.inventory

142

143

# for each file in the inventory, put it into its own revfile

144

for file_id in inv:

145

ie = inv[file_id]

146

if ie.kind != 'file':

147

continue

148

if last_text_sha.get(file_id) == ie.text_sha1:

149

# same as last time

150

continue

151

last_text_sha[file_id] = ie.text_sha1

152

153

# new text (though possibly already stored); need to store it

154

text_lines = tree.get_file(file_id).readlines()

155

156

# if the file's created for the first time in this

157

# revision then make a new weave; else find the old one

158

if file_id not in text_weaves:

159

text_weaves[file_id] = Weave()

160

161

w = text_weaves[file_id]

162

163

# base the new text version off whatever was last

164

# (actually it'd be better to track this, to allow for

165

# files that are deleted and then reappear)

166

last = len(w)

167

if last == 0:

168

parents = []

169

else:

170

parents = [last-1]

171

172

w.add(rev_id, parents, text_lines)

173

text_count += 1

174

175

revno += 1

176

177

def _bump_progress(self):

178

self.converted_revs += 1

179

self.pb.update('converting revisions',

180

self.converted_revs,

181

self.total_revs)

182

183

184

def write_atomic_weave(weave, filename):

185

inv_wf = AtomicFile(filename)

186

try:

187

write_weave(weave, inv_wf)

188

inv_wf.commit()

189

finally:

190

inv_wf.close()

191

192

193

194

195

def profile_convert():

196

prof_f = tempfile.NamedTemporaryFile()

197

198

prof = hotshot.Profile(prof_f.name)

199

200

prof.runcall(Convert)

201

prof.close()

202

203

stats = hotshot.stats.load(prof_f.name)

204

##stats.strip_dirs()

205

stats.sort_stats('time')

206

# XXX: Might like to write to stderr or the trace file instead but

207

# print_stats seems hardcoded to stdout

208

stats.print_stats(20)

209

210

211

if '-p' in sys.argv[1:]:

212

profile_convert()

213

else:

214

Convert()

215

Older »