~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/graph.py

Committer: Vincent Ladeuil
Date: 2007-06-20 14:25:06 UTC
mfrom: (2540 +trunk)
mto: This revision was merged to the branch mainline in revision 2646.
Revision ID: v.ladeuil+lp@free.fr-20070620142506-txsb1v8538kpsafw

merge bzr.dev @ 2540

files added:
bzrlib/graph.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_info.py

bzrlib/tests/test_lsprof.py

doc/developers/planned-change-integration.txt

doc/developers/profiling.txt

files renamed:
bzrlib/graph.py => bzrlib/deprecated_graph.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/commands.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/errors.py

bzrlib/help_topics.py

bzrlib/info.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/osutils.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/symbol_versioning.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/weave_commands.py

bzrlib/workingtree.py

doc/developers/index.txt

doc/developers/performance-roadmap.txt

doc/developers/performance.dot

doc/developers/planned-performance-changes.txt

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bzrlib import errors

from bzrlib.deprecated_graph import (node_distances, select_farthest)

from bzrlib.revision import NULL_REVISION

# DIAGRAM of terminology

# A

# /\

# B C

# | |\

# D E F

# |\/| |

# |/\|/

# G H

# In this diagram, relative to G and H:

# A, B, C, D, E are common ancestors.

# C, D and E are border ancestors, because each has a non-common descendant.

# D and E are least common ancestors because none of their descendants are

# common ancestors.

# C is not a least common ancestor because its descendant, E, is a common

# ancestor.

# The find_unique_lca algorithm will pick A in two steps:

# 1. find_lca('G', 'H') => ['D', 'E']

# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']

class _StackedParentsProvider(object):

def __init__(self, parent_providers):

self._parent_providers = parent_providers

def __repr__(self):

return "_StackedParentsProvider(%r)" % self._parent_providers

def get_parents(self, revision_ids):

"""Find revision ids of the parents of a list of revisions

A list is returned of the same length as the input. Each entry

is a list of parent ids for the corresponding input revision.

[NULL_REVISION] is used as the parent of the first user-committed

revision. Its parent list is empty.

If the revision is not present (i.e. a ghost), None is used in place

of the list of parents.

"""

found = {}

for parents_provider in self._parent_providers:

pending_revisions = [r for r in revision_ids if r not in found]

parent_list = parents_provider.get_parents(pending_revisions)

new_found = dict((k, v) for k, v in zip(pending_revisions,

parent_list) if v is not None)

found.update(new_found)

if len(found) == len(revision_ids):

break

return [found.get(r, None) for r in revision_ids]

class Graph(object):

"""Provide incremental access to revision graphs.

This is the generic implementation; it is intended to be subclassed to

specialize it for other repository types.

"""

def __init__(self, parents_provider):

"""Construct a Graph that uses several graphs as its input

This should not normally be invoked directly, because there may be

specialized implementations for particular repository types. See

Repository.get_graph()

:param parents_func: an object providing a get_parents call

conforming to the behavior of StackedParentsProvider.get_parents

"""

self.get_parents = parents_provider.get_parents

self._parents_provider = parents_provider

def __repr__(self):

return 'Graph(%r)' % self._parents_provider

100

def find_lca(self, *revisions):

101

"""Determine the lowest common ancestors of the provided revisions

102

103

A lowest common ancestor is a common ancestor none of whose

104

descendants are common ancestors. In graphs, unlike trees, there may

105

be multiple lowest common ancestors.

106

107

This algorithm has two phases. Phase 1 identifies border ancestors,

108

and phase 2 filters border ancestors to determine lowest common

109

ancestors.

110

111

In phase 1, border ancestors are identified, using a breadth-first

112

search starting at the bottom of the graph. Searches are stopped

113

whenever a node or one of its descendants is determined to be common

114

115

In phase 2, the border ancestors are filtered to find the least

116

common ancestors. This is done by searching the ancestries of each

117

border ancestor.

118

119

Phase 2 is perfomed on the principle that a border ancestor that is

120

not an ancestor of any other border ancestor is a least common

121

ancestor.

122

123

Searches are stopped when they find a node that is determined to be a

124

common ancestor of all border ancestors, because this shows that it

125

cannot be a descendant of any border ancestor.

126

127

The scaling of this operation should be proportional to

128

1. The number of uncommon ancestors

129

2. The number of border ancestors

130

3. The length of the shortest path between a border ancestor and an

131

ancestor of all border ancestors.

132

"""

133

border_common, common, sides = self._find_border_ancestors(revisions)

134

return self._filter_candidate_lca(border_common)

135

136

def find_difference(self, left_revision, right_revision):

137

"""Determine the graph difference between two revisions"""

138

border, common, (left, right) = self._find_border_ancestors(

139

[left_revision, right_revision])

140

return (left.difference(right).difference(common),

141

right.difference(left).difference(common))

142

143

def _make_breadth_first_searcher(self, revisions):

144

return _BreadthFirstSearcher(revisions, self)

145

146

def _find_border_ancestors(self, revisions):

147

"""Find common ancestors with at least one uncommon descendant.

148

149

Border ancestors are identified using a breadth-first

150

search starting at the bottom of the graph. Searches are stopped

151

whenever a node or one of its descendants is determined to be common.

152

153

This will scale with the number of uncommon ancestors.

154

155

As well as the border ancestors, a set of seen common ancestors and a

156

list of sets of seen ancestors for each input revision is returned.

157

This allows calculation of graph difference from the results of this

158

operation.

159

"""

160

if None in revisions:

161

raise errors.InvalidRevisionId(None, self)

162

common_searcher = self._make_breadth_first_searcher([])

163

common_ancestors = set()

164

searchers = [self._make_breadth_first_searcher([r])

165

for r in revisions]

166

active_searchers = searchers[:]

167

border_ancestors = set()

168

def update_common(searcher, revisions):

169

w_seen_ancestors = searcher.find_seen_ancestors(

170

revision)

171

stopped = searcher.stop_searching_any(w_seen_ancestors)

172

common_ancestors.update(w_seen_ancestors)

173

common_searcher.start_searching(stopped)

174

175

while True:

176

if len(active_searchers) == 0:

177

return border_ancestors, common_ancestors, [s.seen for s in

178

searchers]

179

try:

180

new_common = common_searcher.next()

181

common_ancestors.update(new_common)

182

except StopIteration:

183

pass

184

else:

185

for searcher in active_searchers:

186

for revision in new_common.intersection(searcher.seen):

187

update_common(searcher, revision)

188

189

newly_seen = set()

190

new_active_searchers = []

191

for searcher in active_searchers:

192

try:

193

newly_seen.update(searcher.next())

194

except StopIteration:

195

pass

196

else:

197

new_active_searchers.append(searcher)

198

active_searchers = new_active_searchers

199

for revision in newly_seen:

200

if revision in common_ancestors:

201

for searcher in searchers:

202

update_common(searcher, revision)

203

continue

204

for searcher in searchers:

205

if revision not in searcher.seen:

206

break

207

else:

208

border_ancestors.add(revision)

209

for searcher in searchers:

210

update_common(searcher, revision)

211

212

def _filter_candidate_lca(self, candidate_lca):

213

"""Remove candidates which are ancestors of other candidates.

214

215

This is done by searching the ancestries of each border ancestor. It

216

is perfomed on the principle that a border ancestor that is not an

217

ancestor of any other border ancestor is a lowest common ancestor.

218

219

Searches are stopped when they find a node that is determined to be a

220

common ancestor of all border ancestors, because this shows that it

221

cannot be a descendant of any border ancestor.

222

223

This will scale with the number of candidate ancestors and the length

224

of the shortest path from a candidate to an ancestor common to all

225

candidates.

226

"""

227

searchers = dict((c, self._make_breadth_first_searcher([c]))

228

for c in candidate_lca)

229

active_searchers = dict(searchers)

230

# skip over the actual candidate for each searcher

231

for searcher in active_searchers.itervalues():

232

searcher.next()

233

while len(active_searchers) > 0:

234

for candidate, searcher in list(active_searchers.iteritems()):

235

try:

236

ancestors = searcher.next()

237

except StopIteration:

238

del active_searchers[candidate]

239

continue

240

for ancestor in ancestors:

241

if ancestor in candidate_lca:

242

candidate_lca.remove(ancestor)

243

del searchers[ancestor]

244

if ancestor in active_searchers:

245

del active_searchers[ancestor]

246

for searcher in searchers.itervalues():

247

if ancestor not in searcher.seen:

248

break

249

else:

250

# if this revision was seen by all searchers, then it

251

# is a descendant of all candidates, so we can stop

252

# searching it, and any seen ancestors

253

for searcher in searchers.itervalues():

254

seen_ancestors =\

255

searcher.find_seen_ancestors(ancestor)

256

searcher.stop_searching_any(seen_ancestors)

257

return candidate_lca

258

259

def find_unique_lca(self, left_revision, right_revision):

260

"""Find a unique LCA.

261

262

Find lowest common ancestors. If there is no unique common

263

ancestor, find the lowest common ancestors of those ancestors.

264

265

Iteration stops when a unique lowest common ancestor is found.

266

The graph origin is necessarily a unique lowest common ancestor.

267

268

Note that None is not an acceptable substitute for NULL_REVISION.

269

in the input for this method.

270

"""

271

revisions = [left_revision, right_revision]

272

while True:

273

lca = self.find_lca(*revisions)

274

if len(lca) == 1:

275

return lca.pop()

276

revisions = lca

277

278

279

class _BreadthFirstSearcher(object):

280

"""Parallel search the breadth-first the ancestry of revisions.

281

282

This class implements the iterator protocol, but additionally

283

1. provides a set of seen ancestors, and

284

2. allows some ancestries to be unsearched, via stop_searching_any

285

"""

286

287

def __init__(self, revisions, parents_provider):

288

self._start = set(revisions)

289

self._search_revisions = None

290

self.seen = set(revisions)

291

self._parents_provider = parents_provider

292

293

def __repr__(self):

294

return ('_BreadthFirstSearcher(self._search_revisions=%r,'

295

' self.seen=%r)' % (self._search_revisions, self.seen))

296

297

def next(self):

298

"""Return the next ancestors of this revision.

299

300

Ancestors are returned in the order they are seen in a breadth-first

301

traversal. No ancestor will be returned more than once.

302

"""

303

if self._search_revisions is None:

304

self._search_revisions = self._start

305

else:

306

new_search_revisions = set()

307

for parents in self._parents_provider.get_parents(

308

self._search_revisions):

309

if parents is None:

310

continue

311

new_search_revisions.update(p for p in parents if

312

p not in self.seen)

313

self._search_revisions = new_search_revisions

314

if len(self._search_revisions) == 0:

315

raise StopIteration()

316

self.seen.update(self._search_revisions)

317

return self._search_revisions

318

319

def __iter__(self):

320

return self

321

322

def find_seen_ancestors(self, revision):

323

"""Find ancestors of this revision that have already been seen."""

324

searcher = _BreadthFirstSearcher([revision], self._parents_provider)

325

seen_ancestors = set()

326

for ancestors in searcher:

327

for ancestor in ancestors:

328

if ancestor not in self.seen:

329

searcher.stop_searching_any([ancestor])

330

else:

331

seen_ancestors.add(ancestor)

332

return seen_ancestors

333

334

def stop_searching_any(self, revisions):

335

"""

336

Remove any of the specified revisions from the search list.

337

338

None of the specified revisions are required to be present in the

339

search list. In this case, the call is a no-op.

340

"""

341

stopped = self._search_revisions.intersection(revisions)

342

self._search_revisions = self._search_revisions.difference(revisions)

343

return stopped

344

345

def start_searching(self, revisions):

346

if self._search_revisions is None:

347

self._start = set(revisions)

348

else:

349

self._search_revisions.update(revisions.difference(self.seen))

350

self.seen.update(revisions)

Older »