~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/graph.py

Committer: Robert Collins
Date: 2007-06-26 08:52:20 UTC
mto: This revision was merged to the branch mainline in revision 2554.
Revision ID: robertc@robertcollins.net-20070626085220-iovhwfjflk8vffbh

Add require_api API.

files added:
bzrlib/api.py

bzrlib/branchbuilder.py

bzrlib/graph.py

bzrlib/smtp_connection.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_info.py

bzrlib/tests/test_smtp_connection.py

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundles.txt

doc/developers/container-format.txt

doc/developers/gc.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-commit.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/revert.txt

files renamed:
bzrlib/graph.py => bzrlib/deprecated_graph.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/help_topics.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/status.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_http.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transform.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/tsort.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

contrib/bash/bzr.simple

doc/configuration.txt

doc/developers/HACKING

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/profiling.txt

doc/tutorial.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bzrlib import (

errors,

tsort,

)

from bzrlib.deprecated_graph import (node_distances, select_farthest)

from bzrlib.revision import NULL_REVISION

# DIAGRAM of terminology

# A

# /\

# B C

# | |\

# D E F

# |\/| |

# |/\|/

# G H

# In this diagram, relative to G and H:

# A, B, C, D, E are common ancestors.

# C, D and E are border ancestors, because each has a non-common descendant.

# D and E are least common ancestors because none of their descendants are

# common ancestors.

# C is not a least common ancestor because its descendant, E, is a common

# ancestor.

# The find_unique_lca algorithm will pick A in two steps:

# 1. find_lca('G', 'H') => ['D', 'E']

# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']

class _StackedParentsProvider(object):

def __init__(self, parent_providers):

self._parent_providers = parent_providers

def __repr__(self):

return "_StackedParentsProvider(%r)" % self._parent_providers

def get_parents(self, revision_ids):

"""Find revision ids of the parents of a list of revisions

A list is returned of the same length as the input. Each entry

is a list of parent ids for the corresponding input revision.

[NULL_REVISION] is used as the parent of the first user-committed

revision. Its parent list is empty.

If the revision is not present (i.e. a ghost), None is used in place

of the list of parents.

"""

found = {}

for parents_provider in self._parent_providers:

pending_revisions = [r for r in revision_ids if r not in found]

parent_list = parents_provider.get_parents(pending_revisions)

new_found = dict((k, v) for k, v in zip(pending_revisions,

parent_list) if v is not None)

found.update(new_found)

if len(found) == len(revision_ids):

break

return [found.get(r, None) for r in revision_ids]

class Graph(object):

"""Provide incremental access to revision graphs.

This is the generic implementation; it is intended to be subclassed to

specialize it for other repository types.

"""

def __init__(self, parents_provider):

"""Construct a Graph that uses several graphs as its input

This should not normally be invoked directly, because there may be

specialized implementations for particular repository types. See

Repository.get_graph()

:param parents_func: an object providing a get_parents call

conforming to the behavior of StackedParentsProvider.get_parents

"""

self.get_parents = parents_provider.get_parents

self._parents_provider = parents_provider

100

def __repr__(self):

101

return 'Graph(%r)' % self._parents_provider

102

103

def find_lca(self, *revisions):

104

"""Determine the lowest common ancestors of the provided revisions

105

106

A lowest common ancestor is a common ancestor none of whose

107

descendants are common ancestors. In graphs, unlike trees, there may

108

be multiple lowest common ancestors.

109

110

This algorithm has two phases. Phase 1 identifies border ancestors,

111

and phase 2 filters border ancestors to determine lowest common

112

ancestors.

113

114

In phase 1, border ancestors are identified, using a breadth-first

115

search starting at the bottom of the graph. Searches are stopped

116

whenever a node or one of its descendants is determined to be common

117

118

In phase 2, the border ancestors are filtered to find the least

119

common ancestors. This is done by searching the ancestries of each

120

border ancestor.

121

122

Phase 2 is perfomed on the principle that a border ancestor that is

123

not an ancestor of any other border ancestor is a least common

124

ancestor.

125

126

Searches are stopped when they find a node that is determined to be a

127

common ancestor of all border ancestors, because this shows that it

128

cannot be a descendant of any border ancestor.

129

130

The scaling of this operation should be proportional to

131

1. The number of uncommon ancestors

132

2. The number of border ancestors

133

3. The length of the shortest path between a border ancestor and an

134

ancestor of all border ancestors.

135

"""

136

border_common, common, sides = self._find_border_ancestors(revisions)

137

return self._filter_candidate_lca(border_common)

138

139

def find_difference(self, left_revision, right_revision):

140

"""Determine the graph difference between two revisions"""

141

border, common, (left, right) = self._find_border_ancestors(

142

[left_revision, right_revision])

143

return (left.difference(right).difference(common),

144

right.difference(left).difference(common))

145

146

def _make_breadth_first_searcher(self, revisions):

147

return _BreadthFirstSearcher(revisions, self)

148

149

def _find_border_ancestors(self, revisions):

150

"""Find common ancestors with at least one uncommon descendant.

151

152

Border ancestors are identified using a breadth-first

153

search starting at the bottom of the graph. Searches are stopped

154

whenever a node or one of its descendants is determined to be common.

155

156

This will scale with the number of uncommon ancestors.

157

158

As well as the border ancestors, a set of seen common ancestors and a

159

list of sets of seen ancestors for each input revision is returned.

160

This allows calculation of graph difference from the results of this

161

operation.

162

"""

163

if None in revisions:

164

raise errors.InvalidRevisionId(None, self)

165

common_searcher = self._make_breadth_first_searcher([])

166

common_ancestors = set()

167

searchers = [self._make_breadth_first_searcher([r])

168

for r in revisions]

169

active_searchers = searchers[:]

170

border_ancestors = set()

171

def update_common(searcher, revisions):

172

w_seen_ancestors = searcher.find_seen_ancestors(

173

revision)

174

stopped = searcher.stop_searching_any(w_seen_ancestors)

175

common_ancestors.update(w_seen_ancestors)

176

common_searcher.start_searching(stopped)

177

178

while True:

179

if len(active_searchers) == 0:

180

return border_ancestors, common_ancestors, [s.seen for s in

181

searchers]

182

try:

183

new_common = common_searcher.next()

184

common_ancestors.update(new_common)

185

except StopIteration:

186

pass

187

else:

188

for searcher in active_searchers:

189

for revision in new_common.intersection(searcher.seen):

190

update_common(searcher, revision)

191

192

newly_seen = set()

193

new_active_searchers = []

194

for searcher in active_searchers:

195

try:

196

newly_seen.update(searcher.next())

197

except StopIteration:

198

pass

199

else:

200

new_active_searchers.append(searcher)

201

active_searchers = new_active_searchers

202

for revision in newly_seen:

203

if revision in common_ancestors:

204

for searcher in searchers:

205

update_common(searcher, revision)

206

continue

207

for searcher in searchers:

208

if revision not in searcher.seen:

209

break

210

else:

211

border_ancestors.add(revision)

212

for searcher in searchers:

213

update_common(searcher, revision)

214

215

def _filter_candidate_lca(self, candidate_lca):

216

"""Remove candidates which are ancestors of other candidates.

217

218

This is done by searching the ancestries of each border ancestor. It

219

is perfomed on the principle that a border ancestor that is not an

220

ancestor of any other border ancestor is a lowest common ancestor.

221

222

Searches are stopped when they find a node that is determined to be a

223

common ancestor of all border ancestors, because this shows that it

224

cannot be a descendant of any border ancestor.

225

226

This will scale with the number of candidate ancestors and the length

227

of the shortest path from a candidate to an ancestor common to all

228

candidates.

229

"""

230

searchers = dict((c, self._make_breadth_first_searcher([c]))

231

for c in candidate_lca)

232

active_searchers = dict(searchers)

233

# skip over the actual candidate for each searcher

234

for searcher in active_searchers.itervalues():

235

searcher.next()

236

while len(active_searchers) > 0:

237

for candidate, searcher in list(active_searchers.iteritems()):

238

try:

239

ancestors = searcher.next()

240

except StopIteration:

241

del active_searchers[candidate]

242

continue

243

for ancestor in ancestors:

244

if ancestor in candidate_lca:

245

candidate_lca.remove(ancestor)

246

del searchers[ancestor]

247

if ancestor in active_searchers:

248

del active_searchers[ancestor]

249

for searcher in searchers.itervalues():

250

if ancestor not in searcher.seen:

251

break

252

else:

253

# if this revision was seen by all searchers, then it

254

# is a descendant of all candidates, so we can stop

255

# searching it, and any seen ancestors

256

for searcher in searchers.itervalues():

257

seen_ancestors =\

258

searcher.find_seen_ancestors(ancestor)

259

searcher.stop_searching_any(seen_ancestors)

260

return candidate_lca

261

262

def find_unique_lca(self, left_revision, right_revision):

263

"""Find a unique LCA.

264

265

Find lowest common ancestors. If there is no unique common

266

ancestor, find the lowest common ancestors of those ancestors.

267

268

Iteration stops when a unique lowest common ancestor is found.

269

The graph origin is necessarily a unique lowest common ancestor.

270

271

Note that None is not an acceptable substitute for NULL_REVISION.

272

in the input for this method.

273

"""

274

revisions = [left_revision, right_revision]

275

while True:

276

lca = self.find_lca(*revisions)

277

if len(lca) == 1:

278

return lca.pop()

279

revisions = lca

280

281

def iter_topo_order(self, revisions):

282

"""Iterate through the input revisions in topological order.

283

284

This sorting only ensures that parents come before their children.

285

An ancestor may sort after a descendant if the relationship is not

286

visible in the supplied list of revisions.

287

"""

288

sorter = tsort.TopoSorter(zip(revisions, self.get_parents(revisions)))

289

return sorter.iter_topo_order()

290

291

292

class _BreadthFirstSearcher(object):

293

"""Parallel search the breadth-first the ancestry of revisions.

294

295

This class implements the iterator protocol, but additionally

296

1. provides a set of seen ancestors, and

297

2. allows some ancestries to be unsearched, via stop_searching_any

298

"""

299

300

def __init__(self, revisions, parents_provider):

301

self._start = set(revisions)

302

self._search_revisions = None

303

self.seen = set(revisions)

304

self._parents_provider = parents_provider

305

306

def __repr__(self):

307

return ('_BreadthFirstSearcher(self._search_revisions=%r,'

308

' self.seen=%r)' % (self._search_revisions, self.seen))

309

310

def next(self):

311

"""Return the next ancestors of this revision.

312

313

Ancestors are returned in the order they are seen in a breadth-first

314

traversal. No ancestor will be returned more than once.

315

"""

316

if self._search_revisions is None:

317

self._search_revisions = self._start

318

else:

319

new_search_revisions = set()

320

for parents in self._parents_provider.get_parents(

321

self._search_revisions):

322

if parents is None:

323

continue

324

new_search_revisions.update(p for p in parents if

325

p not in self.seen)

326

self._search_revisions = new_search_revisions

327

if len(self._search_revisions) == 0:

328

raise StopIteration()

329

self.seen.update(self._search_revisions)

330

return self._search_revisions

331

332

def __iter__(self):

333

return self

334

335

def find_seen_ancestors(self, revision):

336

"""Find ancestors of this revision that have already been seen."""

337

searcher = _BreadthFirstSearcher([revision], self._parents_provider)

338

seen_ancestors = set()

339

for ancestors in searcher:

340

for ancestor in ancestors:

341

if ancestor not in self.seen:

342

searcher.stop_searching_any([ancestor])

343

else:

344

seen_ancestors.add(ancestor)

345

return seen_ancestors

346

347

def stop_searching_any(self, revisions):

348

"""

349

Remove any of the specified revisions from the search list.

350

351

None of the specified revisions are required to be present in the

352

search list. In this case, the call is a no-op.

353

"""

354

stopped = self._search_revisions.intersection(revisions)

355

self._search_revisions = self._search_revisions.difference(revisions)

356

return stopped

357

358

def start_searching(self, revisions):

359

if self._search_revisions is None:

360

self._start = set(revisions)

361

else:

362

self._search_revisions.update(revisions.difference(self.seen))

363

self.seen.update(revisions)

Older »