~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/fetch.py

Committer: Martin Pool
Date: 2005-05-10 06:00:59 UTC
Revision ID: mbp@sourcefrog.net-20050510060059-bae67a465325f650

- Use AtomicFile to update statcache.
- New closed property on AtomicFile

files added:
bzrlib/cache.py

bzrlib/remotebranch.py

bzrlib/tests.py

files removed:
HACKING

Makefile

NEWS.developers

bzr-man.py

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/clone.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_upgrade.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/testworkingtree.py

bzrlib/selftest/treeshape.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

tools

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/store/__init__.py => bzrlib/store.py

contrib/bash/bzr.simple => contrib/bash/bzr

bzrlib/util/elementtree/ => elementtree/

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/osutils.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

doc/formats.txt

doc/index.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py

testbzr

urlgrabber/keepalive.py

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import os

from cStringIO import StringIO

import bzrlib.errors

from bzrlib.trace import mutter, note, warning

from bzrlib.branch import Branch

from bzrlib.progress import ProgressBar

from bzrlib.xml5 import serializer_v5

from bzrlib.osutils import sha_string, split_lines

from bzrlib.errors import InstallFailed, NoSuchRevision, WeaveError

"""Copying of history from one branch to another.

The basic plan is that every branch knows the history of everything

that has merged into it. As the first step of a merge, pull, or

branch operation we copy history from the source into the destination

branch.

The copying is done in a slightly complicated order. We don't want to

add a revision to the store until everything it refers to is also

stored, so that if a revision is present we can totally recreate it.

However, we can't know what files are included in a revision until we

read its inventory. Therefore, we first pull the XML and hold it in

memory until we've updated all of the files referenced.

"""

# TODO: Avoid repeatedly opening weaves so many times.

# XXX: This doesn't handle ghost (not present in branch) revisions at

# all yet. I'm not sure they really should be supported.

# NOTE: This doesn't copy revisions which may be present but not

# merged into the last revision. I'm not sure we want to do that.

# - get a list of revisions that need to be pulled in

# - for each one, pull in that revision file

# and get the inventory, and store the inventory with right

# parents.

# - and get the ancestry, and store that with right parents too

# - and keep a note of all file ids and version seen

# - then go through all files; for each one get the weave,

# and add in all file versions

def greedy_fetch(to_branch, from_branch, revision=None, pb=None):

f = Fetcher(to_branch, from_branch, revision, pb)

return f.count_copied, f.failed_revisions

class Fetcher(object):

"""Pull revisions and texts from one branch to another.

This doesn't update the destination's history; that can be done

separately if desired.

revision_limit

If set, pull only up to this revision_id.

After running:

last_revision -- if last_revision

is given it will be that, otherwise the last revision of

from_branch

count_copied -- number of revisions copied

count_weaves -- number of file weaves copied

"""

def __init__(self, to_branch, from_branch, last_revision=None, pb=None):

if to_branch == from_branch:

raise Exception("can't fetch from a branch to itself")

self.to_branch = to_branch

self.to_weaves = to_branch.weave_store

self.to_control = to_branch.control_weaves

self.from_branch = from_branch

self.from_weaves = from_branch.weave_store

self.from_control = from_branch.control_weaves

self.failed_revisions = []

self.count_copied = 0

self.count_total = 0

self.count_weaves = 0

100

self.copied_file_ids = set()

101

if pb is None:

102

self.pb = bzrlib.ui.ui_factory.progress_bar()

103

else:

104

self.pb = pb

105

try:

106

self.last_revision = self._find_last_revision(last_revision)

107

except NoSuchRevision, e:

108

mutter('failed getting last revision: %s', e)

109

raise InstallFailed([last_revision])

110

mutter('fetch up to rev {%s}', self.last_revision)

111

try:

112

revs_to_fetch = self._compare_ancestries()

113

except WeaveError:

114

raise InstallFailed([self.last_revision])

115

self._copy_revisions(revs_to_fetch)

116

self.new_ancestry = revs_to_fetch

117

118

119

def _find_last_revision(self, last_revision):

120

"""Find the limiting source revision.

121

122

Every ancestor of that revision will be merged across.

123

124

Returns the revision_id, or returns None if there's no history

125

in the source branch."""

126

self.pb.update('get source history')

127

from_history = self.from_branch.revision_history()

128

self.pb.update('get destination history')

129

if last_revision:

130

self.from_branch.get_revision(last_revision)

131

return last_revision

132

elif from_history:

133

return from_history[-1]

134

else:

135

return None # no history in the source branch

136

137

138

def _compare_ancestries(self):

139

"""Get a list of revisions that must be copied.

140

141

That is, every revision that's in the ancestry of the source

142

branch and not in the destination branch."""

143

self.pb.update('get source ancestry')

144

self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)

145

146

dest_last_rev = self.to_branch.last_revision()

147

self.pb.update('get destination ancestry')

148

if dest_last_rev:

149

dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)

150

else:

151

dest_ancestry = []

152

ss = set(dest_ancestry)

153

to_fetch = []

154

for rev_id in self.from_ancestry:

155

if rev_id not in ss:

156

to_fetch.append(rev_id)

157

mutter('need to get revision {%s}', rev_id)

158

mutter('need to get %d revisions in total', len(to_fetch))

159

self.count_total = len(to_fetch)

160

return to_fetch

161

162

def _copy_revisions(self, revs_to_fetch):

163

i = 0

164

for rev_id in revs_to_fetch:

165

i += 1

166

if rev_id is None:

167

continue

168

if self.to_branch.has_revision(rev_id):

169

continue

170

self.pb.update('fetch revision', i, self.count_total)

171

self._copy_one_revision(rev_id)

172

self.count_copied += 1

173

174

175

def _copy_one_revision(self, rev_id):

176

"""Copy revision and everything referenced by it."""

177

mutter('copying revision {%s}', rev_id)

178

rev_xml = self.from_branch.get_revision_xml(rev_id)

179

inv_xml = self.from_branch.get_inventory_xml(rev_id)

180

rev = serializer_v5.read_revision_from_string(rev_xml)

181

inv = serializer_v5.read_inventory_from_string(inv_xml)

182

assert rev.revision_id == rev_id

183

assert rev.inventory_sha1 == sha_string(inv_xml)

184

mutter(' commiter %s, %d parents',

185

rev.committer,

186

len(rev.parent_ids))

187

self._copy_new_texts(rev_id, inv)

188

parents = rev.parent_ids

189

for parent in parents:

190

if not self.to_branch.has_revision(parent):

191

parents.pop(parents.index(parent))

192

self._copy_inventory(rev_id, inv_xml, parents)

193

self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)

194

mutter('copied revision %s', rev_id)

195

196

197

def _copy_inventory(self, rev_id, inv_xml, parent_ids):

198

self.to_control.add_text('inventory', rev_id,

199

split_lines(inv_xml), parent_ids)

200

201

def _copy_new_texts(self, rev_id, inv):

202

"""Copy any new texts occuring in this revision."""

203

# TODO: Rather than writing out weaves every time, hold them

204

# in memory until everything's done? But this way is nicer

205

# if it's interrupted.

206

for path, ie in inv.iter_entries():

207

if ie.revision != rev_id:

208

continue

209

mutter('%s {%s} is changed in this revision',

210

path, ie.file_id)

211

self._copy_one_weave(rev_id, ie.file_id)

212

213

214

def _copy_one_weave(self, rev_id, file_id):

215

"""Copy one file weave."""

216

mutter('copy file {%s} modified in {%s}', file_id, rev_id)

217

if file_id in self.copied_file_ids:

218

mutter('file {%s} already copied', file_id)

219

return

220

from_weave = self.from_weaves.get_weave(file_id)

221

to_weave = self.to_weaves.get_weave_or_empty(file_id)

222

to_weave.join(from_weave)

223

self.to_weaves.put_weave(file_id, to_weave)

224

self.count_weaves += 1

225

self.copied_file_ids.add(file_id)

226

mutter('copied file {%s}', file_id)

227

228

229

fetch = Fetcher

Older »