~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/fetch.py

Committer: John Arbash Meinel
Date: 2006-01-02 21:20:36 UTC
mfrom: (1185.61.5 bzr.dev)
mto: (1534.1.1 integration) (1505.1.30 bzr-bound-branch) (1185.69.1 bzr-storage) (1553.5.1 bzr.dev (Main development branch)) (1608.2.1 bzr.mbp.escape-stores)
mto: This revision was merged to the branch mainline in revision 1536.
Revision ID: john@arbash-meinel.com-20060102212036-5013c8712ee1e1ba

[merge] Jamie Wilkinson, don't export .bzrignore

files added:
INSTALL

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/clone.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/lsprof.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_revision_info.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

tools/biobench.py

tools/capture_tree.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/mdiff.py

bzrlib/meta_store.py

bzrlib/plugins/checkperms

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files renamed:
bzr-man.py => bzr_man.py

bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/test_branch.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/selftest/teststatus.py => bzrlib/tests/test_status.py

tools/testweave.py => bzrlib/tests/test_weave.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

bzrlib/ui.py => bzrlib/ui/__init__.py

tools/history2weaves.py => bzrlib/upgrade.py

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py *

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_smart_add.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/zsh/_bzr

doc/index.txt

doc/random.txt

doc/todo-from-arch.txt

setup.py *

tools/weavebench.py

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import bzrlib.errors

from bzrlib.selftest.testrevision import make_branches

from bzrlib.trace import mutter

from copy import copy

import os

from cStringIO import StringIO

import bzrlib

import bzrlib.errors as errors

from bzrlib.errors import (InstallFailed, NoSuchRevision, WeaveError,

MissingText)

from bzrlib.trace import mutter, note, warning

from bzrlib.branch import Branch

import sys

import os

def greedy_fetch(from_branch, to_branch, last_revision=None):

from_history = from_branch.revision_history()

if last_revision is not None:

from_history = from_history[:from_history.index(last_revision)+1]

to_history = to_branch.revision_history()

missing = []

for rev_id in from_history:

if not has_revision(to_branch, rev_id):

missing.append(rev_id)

while len(missing) > 0:

to_branch.install_revisions(from_branch, revision_ids=missing)

new_missing = []

for rev_id in missing:

from bzrlib.progress import ProgressBar

from bzrlib.xml5 import serializer_v5

from bzrlib.osutils import sha_string, split_lines

"""Copying of history from one branch to another.

The basic plan is that every branch knows the history of everything

that has merged into it. As the first step of a merge, pull, or

branch operation we copy history from the source into the destination

branch.

The copying is done in a slightly complicated order. We don't want to

add a revision to the store until everything it refers to is also

stored, so that if a revision is present we can totally recreate it.

However, we can't know what files are included in a revision until we

read its inventory. Therefore, we first pull the XML and hold it in

memory until we've updated all of the files referenced.

"""

# TODO: Avoid repeatedly opening weaves so many times.

# XXX: This doesn't handle ghost (not present in branch) revisions at

# all yet. I'm not sure they really should be supported.

# NOTE: This doesn't copy revisions which may be present but not

# merged into the last revision. I'm not sure we want to do that.

# - get a list of revisions that need to be pulled in

# - for each one, pull in that revision file

# and get the inventory, and store the inventory with right

# parents.

# - and get the ancestry, and store that with right parents too

# - and keep a note of all file ids and version seen

# - then go through all files; for each one get the weave,

# and add in all file versions

def greedy_fetch(to_branch, from_branch, revision=None, pb=None):

f = Fetcher(to_branch, from_branch, revision, pb)

return f.count_copied, f.failed_revisions

class Fetcher(object):

"""Pull revisions and texts from one branch to another.

This doesn't update the destination's history; that can be done

separately if desired.

revision_limit

If set, pull only up to this revision_id.

After running:

last_revision -- if last_revision

is given it will be that, otherwise the last revision of

from_branch

count_copied -- number of revisions copied

count_weaves -- number of file weaves copied

"""

def __init__(self, to_branch, from_branch, last_revision=None, pb=None):

if to_branch == from_branch:

raise Exception("can't fetch from a branch to itself")

self.to_branch = to_branch

self.to_weaves = to_branch.weave_store

self.to_control = to_branch.control_weaves

self.from_branch = from_branch

self.from_weaves = from_branch.weave_store

self.from_control = from_branch.control_weaves

self.failed_revisions = []

100

self.count_copied = 0

101

self.count_total = 0

102

self.count_weaves = 0

103

self.copied_file_ids = set()

104

self.file_ids_names = {}

105

if pb is None:

106

self.pb = bzrlib.ui.ui_factory.progress_bar()

107

else:

108

self.pb = pb

109

self.from_branch.lock_read()

110

try:

111

self._fetch_revisions(last_revision)

112

finally:

113

self.from_branch.unlock()

114

self.pb.clear()

115

116

def _fetch_revisions(self, last_revision):

117

self.last_revision = self._find_last_revision(last_revision)

118

mutter('fetch up to rev {%s}', self.last_revision)

119

if (self.last_revision is not None and

120

self.to_branch.has_revision(self.last_revision)):

121

return

122

try:

123

revs_to_fetch = self._compare_ancestries()

124

except WeaveError:

125

raise InstallFailed([self.last_revision])

126

self._copy_revisions(revs_to_fetch)

127

self.new_ancestry = revs_to_fetch

128

129

def _find_last_revision(self, last_revision):

130

"""Find the limiting source revision.

131

132

Every ancestor of that revision will be merged across.

133

134

Returns the revision_id, or returns None if there's no history

135

in the source branch."""

136

if last_revision:

137

return last_revision

138

self.pb.update('get source history')

139

from_history = self.from_branch.revision_history()

140

self.pb.update('get destination history')

141

if from_history:

142

return from_history[-1]

143

else:

144

return None # no history in the source branch

145

146

147

def _compare_ancestries(self):

148

"""Get a list of revisions that must be copied.

149

150

That is, every revision that's in the ancestry of the source

151

branch and not in the destination branch."""

152

self.pb.update('get source ancestry')

153

self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)

154

155

dest_last_rev = self.to_branch.last_revision()

156

self.pb.update('get destination ancestry')

157

if dest_last_rev:

158

dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)

159

else:

160

dest_ancestry = []

161

ss = set(dest_ancestry)

162

to_fetch = []

163

for rev_id in self.from_ancestry:

164

if rev_id not in ss:

165

to_fetch.append(rev_id)

166

mutter('need to get revision {%s}', rev_id)

167

mutter('need to get %d revisions in total', len(to_fetch))

168

self.count_total = len(to_fetch)

169

return to_fetch

170

171

def _copy_revisions(self, revs_to_fetch):

172

i = 0

173

for rev_id in revs_to_fetch:

174

i += 1

175

if rev_id is None:

176

continue

177

if self.to_branch.has_revision(rev_id):

178

continue

179

self.pb.update('copy revision', i, self.count_total)

180

self._copy_one_revision(rev_id)

181

self.count_copied += 1

182

183

184

def _copy_one_revision(self, rev_id):

185

"""Copy revision and everything referenced by it."""

186

mutter('copying revision {%s}', rev_id)

187

rev_xml = self.from_branch.get_revision_xml(rev_id)

188

inv_xml = self.from_branch.get_inventory_xml(rev_id)

189

rev = serializer_v5.read_revision_from_string(rev_xml)

190

inv = serializer_v5.read_inventory_from_string(inv_xml)

191

assert rev.revision_id == rev_id

192

assert rev.inventory_sha1 == sha_string(inv_xml)

193

mutter(' commiter %s, %d parents',

194

rev.committer,

195

len(rev.parent_ids))

196

self._copy_new_texts(rev_id, inv)

197

parents = rev.parent_ids

198

new_parents = copy(parents)

199

for parent in parents:

200

if not self.to_branch.has_revision(parent):

201

new_parents.pop(new_parents.index(parent))

202

self._copy_inventory(rev_id, inv_xml, new_parents)

203

self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)

204

mutter('copied revision %s', rev_id)

205

206

def _copy_inventory(self, rev_id, inv_xml, parent_ids):

207

self.to_control.add_text('inventory', rev_id,

208

split_lines(inv_xml), parent_ids,

209

self.to_branch.get_transaction())

210

211

def _copy_new_texts(self, rev_id, inv):

212

"""Copy any new texts occuring in this revision."""

213

# TODO: Rather than writing out weaves every time, hold them

214

# in memory until everything's done? But this way is nicer

215

# if it's interrupted.

216

for path, ie in inv.iter_entries():

217

self._copy_one_weave(rev_id, ie.file_id, ie.revision)

218

219

def _copy_one_weave(self, rev_id, file_id, text_revision):

220

"""Copy one file weave, esuring the result contains text_revision."""

221

# check if the revision is already there

222

if file_id in self.file_ids_names.keys( ) and \

223

text_revision in self.file_ids_names[file_id]:

224

return

225

to_weave = self.to_weaves.get_weave_or_empty(file_id,

226

self.to_branch.get_transaction())

227

if not file_id in self.file_ids_names.keys( ):

228

self.file_ids_names[file_id] = to_weave.names( )

229

if text_revision in to_weave:

230

return

231

from_weave = self.from_weaves.get_weave(file_id,

232

self.from_branch.get_transaction())

233

if text_revision not in from_weave:

234

raise MissingText(self.from_branch, text_revision, file_id)

235

mutter('copy file {%s} modified in {%s}', file_id, rev_id)

236

237

if to_weave.numversions() > 0:

238

# destination has contents, must merge

239

try:

revision = from_branch.get_revision(rev_id)

except bzrlib.errors.NoSuchRevision:

if revision in from_history:

raise

else:

continue

for parent in [p.revision_id for p in revision.parents]:

if not has_revision(to_branch, parent):

new_missing.append(parent)

missing = new_missing

from testsweet import InTempDir

def has_revision(branch, revision_id):

try:

branch.get_revision_xml(revision_id)

return True

except bzrlib.errors.NoSuchRevision:

return False

class TestFetch(InTempDir):

def runTest(self):

def new_branch(name):

os.mkdir(name)

return Branch(name, init=True)

#highest indices a: 3, b: 4

br_a, br_b = make_branches()

assert not has_revision(br_b, br_a.revision_history()[3])

assert has_revision(br_b, br_a.revision_history()[2])

assert len(br_b.revision_history()) == 5

greedy_fetch(br_a, br_b, br_a.revision_history()[2])

# greedy_fetch is not supposed to alter the revision history

assert len(br_b.revision_history()) == 5

assert not has_revision(br_b, br_a.revision_history()[3])

assert len(br_b.revision_history()) == 5

greedy_fetch(br_a, br_b, br_a.revision_history()[3])

assert has_revision(br_b, br_a.revision_history()[3])

assert not has_revision(br_a, br_b.revision_history()[3])

assert not has_revision(br_a, br_b.revision_history()[4])

greedy_fetch(br_b, br_a)

assert has_revision(br_a, br_b.revision_history()[3])

assert has_revision(br_a, br_b.revision_history()[4])

br_b2 = new_branch('br_b2')

greedy_fetch(br_b, br_b2)

assert has_revision(br_b2, br_b.revision_history()[4])

assert has_revision(br_b2, br_a.revision_history()[2])

assert not has_revision(br_b2, br_a.revision_history()[3])

br_a2 = new_branch('br_a2')

greedy_fetch(br_a, br_a2)

assert has_revision(br_a2, br_b.revision_history()[4])

assert has_revision(br_a2, br_a.revision_history()[3])

if __name__ == '__main__':

import sys

sys.exit(run_suite(unittest.makeSuite()))

240

to_weave.join(from_weave)

241

except errors.WeaveParentMismatch:

242

to_weave.reweave(from_weave)

243

else:

244

# destination is empty, just replace it

245

to_weave = from_weave.copy( )

246

self.to_weaves.put_weave(file_id, to_weave,

247

self.to_branch.get_transaction())

248

self.count_weaves += 1

249

self.copied_file_ids.add(file_id)

250

self.file_ids_names[file_id] = to_weave.names()

251

mutter('copied file {%s}', file_id)

252

253

254

fetch = Fetcher

Older »