~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/fetch.py

Committer: Martin Pool
Date: 2005-09-16 07:38:10 UTC
Revision ID: mbp@sourcefrog.net-20050916073810-1f358be198c9ed91

- fix bug in committing files that are renamed but not modified

- add test for this

files added:
bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_xml.py

bzrlib/weavestore.py

bzrlib/xml5.py

files renamed:
tools/testweave.py => bzrlib/selftest/test_weave.py

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

NEWS

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/inventory.py

bzrlib/merge.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

tools/history2weaves.py

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import os

from cStringIO import StringIO

import bzrlib.errors

from bzrlib.trace import mutter, note

from bzrlib.branch import Branch

from bzrlib.trace import mutter, note, warning

from bzrlib.branch import Branch, INVENTORY_FILEID, ANCESTRY_FILEID

from bzrlib.progress import ProgressBar

import sys

import os

def has_revision(branch, revision_id):

try:

branch.get_revision_xml(revision_id)

return True

except bzrlib.errors.NoSuchRevision:

return False

from bzrlib.xml5 import serializer_v5

from bzrlib.osutils import sha_string, split_lines

from bzrlib.errors import NoSuchRevision

"""Copying of history from one branch to another.

The basic plan is that every branch knows the history of everything

that has merged into it. As the first step of a merge, pull, or

branch operation we copy history from the source into the destination

branch.

The copying is done in a slightly complicated order. We don't want to

add a revision to the store until everything it refers to is also

stored, so that if a revision is present we can totally recreate it.

However, we can't know what files are included in a revision until we

read its inventory. Therefore, we first pull the XML and hold it in

memory until we've updated all of the files referenced.

"""

# TODO: Avoid repeatedly opening weaves so many times.

# XXX: This doesn't handle ghost (not present in branch) revisions at

# all yet. I'm not sure they really should be supported.

# NOTE: This doesn't copy revisions which may be present but not

# merged into the last revision. I'm not sure we want to do that.

# - get a list of revisions that need to be pulled in

# - for each one, pull in that revision file

# and get the inventory, and store the inventory with right

# parents.

# - and get the ancestry, and store that with right parents too

# - and keep a note of all file ids and version seen

# - then go through all files; for each one get the weave,

# and add in all file versions

def greedy_fetch(to_branch, from_branch, revision=None, pb=None):

"""Copy a revision and all available ancestors from one branch to another

If no revision is specified, uses the last revision in the source branch's

revision history.

f = Fetcher(to_branch, from_branch, revision, pb)

return f.count_copied, f.failed_revisions

class Fetcher(object):

"""Pull revisions and texts from one branch to another.

This doesn't update the destination's history; that can be done

separately if desired.

revision_limit

If set, pull only up to this revision_id.

After running:

last_revision -- if last_revision

is given it will be that, otherwise the last revision of

from_branch

count_copied -- number of revisions copied

count_texts -- number of file texts copied

"""

from_history = from_branch.revision_history()

required_revisions = set(from_history)

all_failed = set()

if revision is not None:

required_revisions.add(revision)

try:

rev_index = from_history.index(revision)

except ValueError:

rev_index = None

if rev_index is not None:

from_history = from_history[:rev_index + 1]

else:

from_history = [revision]

to_history = to_branch.revision_history()

missing = []

for rev_id in from_history:

if not has_revision(to_branch, rev_id):

missing.append(rev_id)

count = 0

while len(missing) > 0:

installed, failed = to_branch.install_revisions(from_branch,

revision_ids=missing,

pb=pb)

count += installed

required_failed = failed.intersection(required_revisions)

if len(required_failed) > 0:

raise bzrlib.errors.InstallFailed(required_failed)

for rev_id in failed:

note("Failed to install %s" % rev_id)

all_failed.update(failed)

new_missing = set()

for rev_id in missing:

try:

revision = from_branch.get_revision(rev_id)

except bzrlib.errors.NoSuchRevision:

if revision in from_history:

raise

else:

continue

for parent in [p.revision_id for p in revision.parents]:

if not has_revision(to_branch, parent):

new_missing.add(parent)

missing = new_missing

return count, all_failed

def __init__(self, to_branch, from_branch, last_revision=None, pb=None):

self.to_branch = to_branch

self.to_weaves = to_branch.weave_store

self.from_branch = from_branch

self.from_weaves = from_branch.weave_store

self.failed_revisions = []

self.count_copied = 0

self.count_total = 0

self.count_texts = 0

if pb is None:

self.pb = bzrlib.ui.ui_factory.progress_bar()

else:

self.pb = pb

100

self.last_revision = self._find_last_revision(last_revision)

101

mutter('fetch up to rev {%s}', self.last_revision)

102

revs_to_fetch = self._compare_ancestries()

103

self._copy_revisions(revs_to_fetch)

104

self.new_ancestry = revs_to_fetch

105

106

107

108

def _find_last_revision(self, last_revision):

109

"""Find the limiting source revision.

110

111

Every ancestor of that revision will be merged across.

112

113

Returns the revision_id, or returns None if there's no history

114

in the source branch."""

115

self.pb.update('get source history')

116

from_history = self.from_branch.revision_history()

117

self.pb.update('get destination history')

118

if last_revision:

119

if last_revision not in from_history:

120

raise NoSuchRevision(self.from_branch, last_revision)

121

else:

122

return last_revision

123

elif from_history:

124

return from_history[-1]

125

else:

126

return None # no history in the source branch

127

128

129

def _compare_ancestries(self):

130

"""Get a list of revisions that must be copied.

131

132

That is, every revision that's in the ancestry of the source

133

branch and not in the destination branch."""

134

self.pb.update('get source ancestry')

135

self.from_ancestry = self.from_branch.get_ancestry(self.last_revision)

136

137

dest_last_rev = self.to_branch.last_revision()

138

self.pb.update('get destination ancestry')

139

if dest_last_rev:

140

dest_ancestry = self.to_branch.get_ancestry(dest_last_rev)

141

else:

142

dest_ancestry = []

143

ss = set(dest_ancestry)

144

to_fetch = []

145

for rev_id in self.from_ancestry:

146

if rev_id not in ss:

147

to_fetch.append(rev_id)

148

mutter('need to get revision {%s}', rev_id)

149

mutter('need to get %d revisions in total', len(to_fetch))

150

self.count_total = len(to_fetch)

151

return to_fetch

152

153

154

155

def _copy_revisions(self, revs_to_fetch):

156

i = 0

157

for rev_id in revs_to_fetch:

158

i += 1

159

if self.to_branch.has_revision(rev_id):

160

continue

161

self.pb.update('fetch revision', i, self.count_total)

162

self._copy_one_revision(rev_id)

163

self.count_copied += 1

164

165

166

def _copy_one_revision(self, rev_id):

167

"""Copy revision and everything referenced by it."""

168

mutter('copying revision {%s}', rev_id)

169

rev_xml = self.from_branch.get_revision_xml(rev_id)

170

inv_xml = self.from_branch.get_inventory_xml(rev_id)

171

rev = serializer_v5.read_revision_from_string(rev_xml)

172

inv = serializer_v5.read_inventory_from_string(inv_xml)

173

assert rev.revision_id == rev_id

174

assert rev.inventory_sha1 == sha_string(inv_xml)

175

mutter(' commiter %s, %d parents',

176

rev.committer,

177

len(rev.parents))

178

self._copy_new_texts(rev_id, inv)

179

parent_ids = [x.revision_id for x in rev.parents]

180

self._copy_inventory(rev_id, inv_xml, parent_ids)

181

self._copy_ancestry(rev_id, parent_ids)

182

self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)

183

184

185

def _copy_inventory(self, rev_id, inv_xml, parent_ids):

186

self.to_weaves.add_text(INVENTORY_FILEID, rev_id,

187

split_lines(inv_xml), parent_ids)

188

189

190

def _copy_ancestry(self, rev_id, parent_ids):

191

ancestry_lines = self.from_weaves.get_lines(ANCESTRY_FILEID, rev_id)

192

self.to_weaves.add_text(ANCESTRY_FILEID, rev_id, ancestry_lines,

193

parent_ids)

194

195

196

def _copy_new_texts(self, rev_id, inv):

197

"""Copy any new texts occuring in this revision."""

198

# TODO: Rather than writing out weaves every time, hold them

199

# in memory until everything's done? But this way is nicer

200

# if it's interrupted.

201

for path, ie in inv.iter_entries():

202

if ie.kind != 'file':

203

continue

204

if ie.text_version != rev_id:

205

continue

206

mutter('%s {%s} is changed in this revision',

207

path, ie.file_id)

208

self._copy_one_text(rev_id, ie.file_id)

209

210

211

def _copy_one_text(self, rev_id, file_id):

212

"""Copy one file text."""

213

mutter('copy text version {%s} of file {%s}',

214

rev_id, file_id)

215

from_weave = self.from_weaves.get_weave(file_id)

216

from_idx = from_weave.lookup(rev_id)

217

from_parents = map(from_weave.idx_to_name, from_weave.parents(from_idx))

218

text_lines = from_weave.get(from_idx)

219

to_weave = self.to_weaves.get_weave_or_empty(file_id)

220

to_parents = map(to_weave.lookup, from_parents)

221

# it's ok to add even if the text is already there

222

to_weave.add(rev_id, to_parents, text_lines)

223

self.to_weaves.put_weave(file_id, to_weave)

224

self.count_texts += 1

225

226

227

fetch = Fetcher

Older »