~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/fetch.py

Committer: Robert Collins
Date: 2006-01-05 00:16:01 UTC
mfrom: (1185.69.2 bzr-storage)
mto: (1185.65.25 storage) (1534.1.15 integration)
mto: This revision was merged to the branch mainline in revision 1550.
Revision ID: robertc@robertcollins.net-20060105001601-41706f37af6ae182

Merge from jam-storage.

files added:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/iterablefile.py

bzrlib/lockable_files.py

bzrlib/lsprof.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugins/__init__.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

tools/biobench.py

tools/capture_tree.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/mdiff.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

files renamed:
bzr-man.py => bzr_man.py

plugins/ => bzrlib/plugins/

bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/test_branch.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/selftest/teststatus.py => bzrlib/tests/test_status.py

tools/testweave.py => bzrlib/tests/test_weave.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

effbot/ => bzrlib/util/effbot/

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py *

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/zsh/_bzr

doc/index.txt

doc/random.txt

doc/todo-from-arch.txt

setup.py *

tools/weavebench.py

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from copy import copy

import os

from cStringIO import StringIO

import bzrlib

import bzrlib.errors as errors

from bzrlib.errors import (InstallFailed, NoSuchRevision, WeaveError,

MissingText)

from bzrlib.trace import mutter, note, warning

from bzrlib.branch import Branch

from bzrlib.progress import ProgressBar

from bzrlib.xml5 import serializer_v5

from bzrlib.osutils import sha_string, split_lines

"""Copying of history from one branch to another.

The basic plan is that every branch knows the history of everything

that has merged into it. As the first step of a merge, pull, or

branch operation we copy history from the source into the destination

branch.

The copying is done in a slightly complicated order. We don't want to

add a revision to the store until everything it refers to is also

stored, so that if a revision is present we can totally recreate it.

However, we can't know what files are included in a revision until we

read its inventory. Therefore, we first pull the XML and hold it in

memory until we've updated all of the files referenced.

"""

# TODO: Avoid repeatedly opening weaves so many times.

# XXX: This doesn't handle ghost (not present in branch) revisions at

# all yet. I'm not sure they really should be supported.

# NOTE: This doesn't copy revisions which may be present but not

# merged into the last revision. I'm not sure we want to do that.

# - get a list of revisions that need to be pulled in

# - for each one, pull in that revision file

# and get the inventory, and store the inventory with right

# parents.

# - and get the ancestry, and store that with right parents too

# - and keep a note of all file ids and version seen

# - then go through all files; for each one get the weave,

# and add in all file versions

def greedy_fetch(to_branch, from_branch, revision=None, pb=None):

f = Fetcher(to_branch, from_branch, revision, pb)

return f.count_copied, f.failed_revisions

class Fetcher(object):

"""Pull revisions and texts from one branch to another.

This doesn't update the destination's history; that can be done

separately if desired.

revision_limit

If set, pull only up to this revision_id.

After running:

last_revision -- if last_revision

is given it will be that, otherwise the last revision of

from_branch

count_copied -- number of revisions copied

count_weaves -- number of file weaves copied

"""

def __init__(self, to_branch, from_branch, last_revision=None, pb=None):

if to_branch == from_branch:

raise Exception("can't fetch from a branch to itself")

self.to_branch = to_branch

self.to_repository = to_branch.repository

self.to_weaves = self.to_repository.weave_store

self.to_control = self.to_repository.control_weaves

self.from_branch = from_branch

self.from_repository = from_branch.repository

self.from_weaves = self.from_repository.weave_store

100

self.from_control = self.from_repository.control_weaves

101

self.failed_revisions = []

102

self.count_copied = 0

103

self.count_total = 0

104

self.count_weaves = 0

105

self.copied_file_ids = set()

106

self.file_ids_names = {}

107

if pb is None:

108

self.pb = bzrlib.ui.ui_factory.progress_bar()

109

else:

110

self.pb = pb

111

self.from_branch.lock_read()

112

try:

113

self._fetch_revisions(last_revision)

114

finally:

115

self.from_branch.unlock()

116

self.pb.clear()

117

118

def _fetch_revisions(self, last_revision):

119

self.last_revision = self._find_last_revision(last_revision)

120

mutter('fetch up to rev {%s}', self.last_revision)

121

if (self.last_revision is not None and

122

self.to_repository.has_revision(self.last_revision)):

123

return

124

try:

125

revs_to_fetch = self._compare_ancestries()

126

except WeaveError:

127

raise InstallFailed([self.last_revision])

128

self._copy_revisions(revs_to_fetch)

129

self.new_ancestry = revs_to_fetch

130

131

def _find_last_revision(self, last_revision):

132

"""Find the limiting source revision.

133

134

Every ancestor of that revision will be merged across.

135

136

Returns the revision_id, or returns None if there's no history

137

in the source branch."""

138

if last_revision:

139

return last_revision

140

self.pb.update('get source history')

141

from_history = self.from_branch.revision_history()

142

self.pb.update('get destination history')

143

if from_history:

144

return from_history[-1]

145

else:

146

return None # no history in the source branch

147

148

149

def _compare_ancestries(self):

150

"""Get a list of revisions that must be copied.

151

152

That is, every revision that's in the ancestry of the source

153

branch and not in the destination branch."""

154

self.pb.update('get source ancestry')

155

from_repository = self.from_branch.repository

156

self.from_ancestry = from_repository.get_ancestry(self.last_revision)

157

158

dest_last_rev = self.to_branch.last_revision()

159

self.pb.update('get destination ancestry')

160

if dest_last_rev:

161

to_repository = self.to_branch.repository

162

dest_ancestry = to_repository.get_ancestry(dest_last_rev)

163

else:

164

dest_ancestry = []

165

ss = set(dest_ancestry)

166

to_fetch = []

167

for rev_id in self.from_ancestry:

168

if rev_id not in ss:

169

to_fetch.append(rev_id)

170

mutter('need to get revision {%s}', rev_id)

171

mutter('need to get %d revisions in total', len(to_fetch))

172

self.count_total = len(to_fetch)

173

return to_fetch

174

175

def _copy_revisions(self, revs_to_fetch):

176

i = 0

177

for rev_id in revs_to_fetch:

178

i += 1

179

if rev_id is None:

180

continue

181

if self.to_repository.has_revision(rev_id):

182

continue

183

self.pb.update('copy revision', i, self.count_total)

184

self._copy_one_revision(rev_id)

185

self.count_copied += 1

186

187

188

def _copy_one_revision(self, rev_id):

189

"""Copy revision and everything referenced by it."""

190

mutter('copying revision {%s}', rev_id)

191

rev_xml = self.from_repository.get_revision_xml(rev_id)

192

inv_xml = self.from_repository.get_inventory_xml(rev_id)

193

rev = serializer_v5.read_revision_from_string(rev_xml)

194

inv = serializer_v5.read_inventory_from_string(inv_xml)

195

assert rev.revision_id == rev_id

196

assert rev.inventory_sha1 == sha_string(inv_xml)

197

mutter(' commiter %s, %d parents',

198

rev.committer,

199

len(rev.parent_ids))

200

self._copy_new_texts(rev_id, inv)

201

parents = rev.parent_ids

202

new_parents = copy(parents)

203

for parent in parents:

204

if not self.to_repository.has_revision(parent):

205

new_parents.pop(new_parents.index(parent))

206

self._copy_inventory(rev_id, inv_xml, new_parents)

207

self.to_repository.revision_store.add(StringIO(rev_xml), rev_id)

208

mutter('copied revision %s', rev_id)

209

210

def _copy_inventory(self, rev_id, inv_xml, parent_ids):

211

self.to_control.add_text('inventory', rev_id,

212

split_lines(inv_xml), parent_ids,

213

self.to_repository.get_transaction())

214

215

def _copy_new_texts(self, rev_id, inv):

216

"""Copy any new texts occuring in this revision."""

217

# TODO: Rather than writing out weaves every time, hold them

218

# in memory until everything's done? But this way is nicer

219

# if it's interrupted.

220

for path, ie in inv.iter_entries():

221

self._copy_one_weave(rev_id, ie.file_id, ie.revision)

222

223

def _copy_one_weave(self, rev_id, file_id, text_revision):

224

"""Copy one file weave, esuring the result contains text_revision."""

225

# check if the revision is already there

226

if file_id in self.file_ids_names.keys( ) and \

227

text_revision in self.file_ids_names[file_id]:

228

return

229

to_weave = self.to_weaves.get_weave_or_empty(file_id,

230

self.to_repository.get_transaction())

231

if not file_id in self.file_ids_names.keys( ):

232

self.file_ids_names[file_id] = to_weave.names( )

233

if text_revision in to_weave:

234

return

235

from_weave = self.from_weaves.get_weave(file_id,

236

self.from_branch.repository.get_transaction())

237

if text_revision not in from_weave:

238

raise MissingText(self.from_branch, text_revision, file_id)

239

mutter('copy file {%s} modified in {%s}', file_id, rev_id)

240

241

if to_weave.numversions() > 0:

242

# destination has contents, must merge

243

try:

244

to_weave.join(from_weave)

245

except errors.WeaveParentMismatch:

246

to_weave.reweave(from_weave)

247

else:

248

# destination is empty, just replace it

249

to_weave = from_weave.copy( )

250

self.to_weaves.put_weave(file_id, to_weave,

251

self.to_repository.get_transaction())

252

self.count_weaves += 1

253

self.copied_file_ids.add(file_id)

254

self.file_ids_names[file_id] = to_weave.names()

255

mutter('copied file {%s}', file_id)

256

257

258

fetch = Fetcher

Older »