~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/fetch.py

Committer: Robert Collins
Date: 2005-10-03 01:42:16 UTC
Revision ID: robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

integrate in Gustavos x-bit patch

files added:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

NEWS.developers

TODO

build-api

bzr-man.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/clone.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revfile.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/textinv.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

setup.py

testbzr

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/tests.py

files renamed:
bzr.py => bzrlib/commands.py

bzrlib/store.py => bzrlib/store/__init__.py

files modified:
README

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import os

from cStringIO import StringIO

import bzrlib.errors

from bzrlib.trace import mutter, note, warning

from bzrlib.branch import Branch

from bzrlib.progress import ProgressBar

from bzrlib.xml5 import serializer_v5

from bzrlib.osutils import sha_string, split_lines

from bzrlib.errors import InstallFailed, NoSuchRevision, WeaveError

"""Copying of history from one branch to another.

The basic plan is that every branch knows the history of everything

that has merged into it. As the first step of a merge, pull, or

branch operation we copy history from the source into the destination

branch.

The copying is done in a slightly complicated order. We don't want to

add a revision to the store until everything it refers to is also

stored, so that if a revision is present we can totally recreate it.

However, we can't know what files are included in a revision until we

read its inventory. Therefore, we first pull the XML and hold it in

memory until we've updated all of the files referenced.

"""

# TODO: Avoid repeatedly opening weaves so many times.

# XXX: This doesn't handle ghost (not present in branch) revisions at

# all yet. I'm not sure they really should be supported.

# NOTE: This doesn't copy revisions which may be present but not

# merged into the last revision. I'm not sure we want to do that.

# - get a list of revisions that need to be pulled in

# - for each one, pull in that revision file

# and get the inventory, and store the inventory with right

# parents.

# - and get the ancestry, and store that with right parents too

# - and keep a note of all file ids and version seen

# - then go through all files; for each one get the weave,

# and add in all file versions

def greedy_fetch(to_branch, from_branch, revision=None, pb=None):

f = Fetcher(to_branch, from_branch, revision, pb)

return f.count_copied, f.failed_revisions

class Fetcher(object):

"""Pull revisions and texts from one branch to another.

This doesn't update the destination's history; that can be done

separately if desired.

revision_limit

If set, pull only up to this revision_id.

After running:

last_revision -- if last_revision

is given it will be that, otherwise the last revision of

from_branch

count_copied -- number of revisions copied

count_texts -- number of file texts copied

"""

def __init__(self, to_branch, from_branch, last_revision=None, pb=None):

if to_branch == from_branch:

raise Exception("can't fetch from a branch to itself")

self.to_branch = to_branch

self.to_weaves = to_branch.weave_store

self.to_control = to_branch.control_weaves

self.from_branch = from_branch

self.from_weaves = from_branch.weave_store

self.from_control = from_branch.control_weaves

self.failed_revisions = []

self.count_copied = 0

self.count_total = 0

self.count_texts = 0

100

if pb is None:

101

self.pb = bzrlib.ui.ui_factory.progress_bar()

102

else:

103

self.pb = pb

104

try:

105

self.last_revision = self._find_last_revision(last_revision)

106

except NoSuchRevision, e:

107

mutter('failed getting last revision: %s', e)

108

raise InstallFailed([last_revision])

109

mutter('fetch up to rev {%s}', self.last_revision)

110

try:

111

revs_to_fetch = self._compare_ancestries()

112

except WeaveError:

113

raise InstallFailed([self.last_revision])

114

self._copy_revisions(revs_to_fetch)

115

self.new_ancestry = revs_to_fetch

116

117

118

def _find_last_revision(self, last_revision):

119

"""Find the limiting source revision.

120

121

Every ancestor of that revision will be merged across.

122

123

Returns the revision_id, or returns None if there's no history

124

in the source branch."""

125

self.pb.update('get source history')

126

from_history = self.from_branch.revision_history()

127

self.pb.update('get destination history')

128

if last_revision:

129

self.from_branch.get_revision(last_revision)

130

return last_revision

131

elif from_history:

132

return from_history[-1]

133

else:

134

return None # no history in the source branch

135

136

137

def _compare_ancestries(self):

138

"""Get a list of revisions that must be copied.

139

140

That is, every revision that's in the ancestry of the source

141

branch and not in the destination branch."""

142

self.pb.update('get source ancestry')

143

self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)

144

145

dest_last_rev = self.to_branch.last_revision()

146

self.pb.update('get destination ancestry')

147

if dest_last_rev:

148

dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)

149

else:

150

dest_ancestry = []

151

ss = set(dest_ancestry)

152

to_fetch = []

153

for rev_id in self.from_ancestry:

154

if rev_id not in ss:

155

to_fetch.append(rev_id)

156

mutter('need to get revision {%s}', rev_id)

157

mutter('need to get %d revisions in total', len(to_fetch))

158

self.count_total = len(to_fetch)

159

return to_fetch

160

161

def _copy_revisions(self, revs_to_fetch):

162

i = 0

163

for rev_id in revs_to_fetch:

164

i += 1

165

if rev_id is None:

166

continue

167

if self.to_branch.has_revision(rev_id):

168

continue

169

self.pb.update('fetch revision', i, self.count_total)

170

self._copy_one_revision(rev_id)

171

self.count_copied += 1

172

173

174

def _copy_one_revision(self, rev_id):

175

"""Copy revision and everything referenced by it."""

176

mutter('copying revision {%s}', rev_id)

177

rev_xml = self.from_branch.get_revision_xml(rev_id)

178

inv_xml = self.from_branch.get_inventory_xml(rev_id)

179

rev = serializer_v5.read_revision_from_string(rev_xml)

180

inv = serializer_v5.read_inventory_from_string(inv_xml)

181

assert rev.revision_id == rev_id

182

assert rev.inventory_sha1 == sha_string(inv_xml)

183

mutter(' commiter %s, %d parents',

184

rev.committer,

185

len(rev.parent_ids))

186

self._copy_new_texts(rev_id, inv)

187

parents = rev.parent_ids

188

for parent in parents:

189

if not self.to_branch.has_revision(parent):

190

parents.pop(parents.index(parent))

191

self._copy_inventory(rev_id, inv_xml, parents)

192

self._copy_ancestry(rev_id, parents)

193

self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)

194

195

196

def _copy_inventory(self, rev_id, inv_xml, parent_ids):

197

self.to_control.add_text('inventory', rev_id,

198

split_lines(inv_xml), parent_ids)

199

200

201

def _copy_ancestry(self, rev_id, parent_ids):

202

ancestry_lines = self.from_control.get_lines('ancestry', rev_id)

203

self.to_control.add_text('ancestry', rev_id, ancestry_lines,

204

parent_ids)

205

206

207

def _copy_new_texts(self, rev_id, inv):

208

"""Copy any new texts occuring in this revision."""

209

# TODO: Rather than writing out weaves every time, hold them

210

# in memory until everything's done? But this way is nicer

211

# if it's interrupted.

212

for path, ie in inv.iter_entries():

213

if ie.kind != 'file':

214

continue

215

if ie.revision != rev_id:

216

continue

217

mutter('%s {%s} is changed in this revision',

218

path, ie.file_id)

219

self._copy_one_text(rev_id, ie.file_id)

220

221

222

def _copy_one_text(self, rev_id, file_id):

223

"""Copy one file text."""

224

mutter('copy text version {%s} of file {%s}',

225

rev_id, file_id)

226

from_weave = self.from_weaves.get_weave(file_id)

227

from_idx = from_weave.lookup(rev_id)

228

from_parents = map(from_weave.idx_to_name, from_weave.parents(from_idx))

229

text_lines = from_weave.get(from_idx)

230

to_weave = self.to_weaves.get_weave_or_empty(file_id)

231

to_parents = map(to_weave.lookup, from_parents)

232

# it's ok to add even if the text is already there

233

to_weave.add(rev_id, to_parents, text_lines)

234

self.to_weaves.put_weave(file_id, to_weave)

235

self.count_texts += 1

236

237

238

fetch = Fetcher

Older »