~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/graph.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-06-18 21:07:10 UTC
mfrom: (2490.2.27 graphwalker)
Revision ID: pqm@pqm.ubuntu.com-20070618210710-6y8wzcqiw2kvxdiy

Better merge base selection and graph API

files added:
bzrlib/graph.py

bzrlib/tests/test_graph.py

files renamed:
bzrlib/graph.py => bzrlib/deprecated_graph.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

files modified:
NEWS

bzrlib/builtins.py

bzrlib/bundle/commands.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/tests/__init__.py

bzrlib/tests/test_revision.py

bzrlib/workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bzrlib.deprecated_graph import (node_distances, select_farthest)

from bzrlib.revision import NULL_REVISION

# DIAGRAM of terminology

# A

# /\

# B C

# | |\

# D E F

# |\/| |

# |/\|/

# G H

# In this diagram, relative to G and H:

# A, B, C, D, E are common ancestors.

# C, D and E are border ancestors, because each has a non-common descendant.

# D and E are least common ancestors because none of their descendants are

# common ancestors.

# C is not a least common ancestor because its descendant, E, is a common

# ancestor.

# The find_unique_lca algorithm will pick A in two steps:

# 1. find_lca('G', 'H') => ['D', 'E']

# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']

class _StackedParentsProvider(object):

def __init__(self, parent_providers):

self._parent_providers = parent_providers

def get_parents(self, revision_ids):

"""Find revision ids of the parents of a list of revisions

A list is returned of the same length as the input. Each entry

is a list of parent ids for the corresponding input revision.

[NULL_REVISION] is used as the parent of the first user-committed

revision. Its parent list is empty.

If the revision is not present (i.e. a ghost), None is used in place

of the list of parents.

"""

found = {}

for parents_provider in self._parent_providers:

pending_revisions = [r for r in revision_ids if r not in found]

parent_list = parents_provider.get_parents(pending_revisions)

new_found = dict((k, v) for k, v in zip(pending_revisions,

parent_list) if v is not None)

found.update(new_found)

if len(found) == len(revision_ids):

break

return [found.get(r, None) for r in revision_ids]

class Graph(object):

"""Provide incremental access to revision graphs.

This is the generic implementation; it is intended to be subclassed to

specialize it for other repository types.

"""

def __init__(self, parents_provider):

"""Construct a Graph that uses several graphs as its input

This should not normally be invoked directly, because there may be

specialized implementations for particular repository types. See

Repository.get_graph()

:param parents_func: an object providing a get_parents call

conforming to the behavior of StackedParentsProvider.get_parents

"""

self.get_parents = parents_provider.get_parents

def find_lca(self, *revisions):

"""Determine the lowest common ancestors of the provided revisions

A lowest common ancestor is a common ancestor none of whose

descendants are common ancestors. In graphs, unlike trees, there may

be multiple lowest common ancestors.

This algorithm has two phases. Phase 1 identifies border ancestors,

100

and phase 2 filters border ancestors to determine lowest common

101

ancestors.

102

103

In phase 1, border ancestors are identified, using a breadth-first

104

search starting at the bottom of the graph. Searches are stopped

105

whenever a node or one of its descendants is determined to be common

106

107

In phase 2, the border ancestors are filtered to find the least

108

common ancestors. This is done by searching the ancestries of each

109

border ancestor.

110

111

Phase 2 is perfomed on the principle that a border ancestor that is

112

not an ancestor of any other border ancestor is a least common

113

ancestor.

114

115

Searches are stopped when they find a node that is determined to be a

116

common ancestor of all border ancestors, because this shows that it

117

cannot be a descendant of any border ancestor.

118

119

The scaling of this operation should be proportional to

120

1. The number of uncommon ancestors

121

2. The number of border ancestors

122

3. The length of the shortest path between a border ancestor and an

123

ancestor of all border ancestors.

124

"""

125

border_common, common, sides = self._find_border_ancestors(revisions)

126

return self._filter_candidate_lca(border_common)

127

128

def find_difference(self, left_revision, right_revision):

129

"""Determine the graph difference between two revisions"""

130

border, common, (left, right) = self._find_border_ancestors(

131

[left_revision, right_revision])

132

return (left.difference(right).difference(common),

133

right.difference(left).difference(common))

134

135

def _make_breadth_first_searcher(self, revisions):

136

return _BreadthFirstSearcher(revisions, self)

137

138

def _find_border_ancestors(self, revisions):

139

"""Find common ancestors with at least one uncommon descendant.

140

141

Border ancestors are identified using a breadth-first

142

search starting at the bottom of the graph. Searches are stopped

143

whenever a node or one of its descendants is determined to be common.

144

145

This will scale with the number of uncommon ancestors.

146

147

As well as the border ancestors, a set of seen common ancestors and a

148

list of sets of seen ancestors for each input revision is returned.

149

This allows calculation of graph difference from the results of this

150

operation.

151

"""

152

common_searcher = self._make_breadth_first_searcher([])

153

common_ancestors = set()

154

searchers = [self._make_breadth_first_searcher([r])

155

for r in revisions]

156

active_searchers = searchers[:]

157

border_ancestors = set()

158

def update_common(searcher, revisions):

159

w_seen_ancestors = searcher.find_seen_ancestors(

160

revision)

161

stopped = searcher.stop_searching_any(w_seen_ancestors)

162

common_ancestors.update(w_seen_ancestors)

163

common_searcher.start_searching(stopped)

164

165

while True:

166

if len(active_searchers) == 0:

167

return border_ancestors, common_ancestors, [s.seen for s in

168

searchers]

169

try:

170

new_common = common_searcher.next()

171

common_ancestors.update(new_common)

172

except StopIteration:

173

pass

174

else:

175

for searcher in active_searchers:

176

for revision in new_common.intersection(searcher.seen):

177

update_common(searcher, revision)

178

179

newly_seen = set()

180

new_active_searchers = []

181

for searcher in active_searchers:

182

try:

183

newly_seen.update(searcher.next())

184

except StopIteration:

185

pass

186

else:

187

new_active_searchers.append(searcher)

188

active_searchers = new_active_searchers

189

for revision in newly_seen:

190

if revision in common_ancestors:

191

for searcher in searchers:

192

update_common(searcher, revision)

193

continue

194

for searcher in searchers:

195

if revision not in searcher.seen:

196

break

197

else:

198

border_ancestors.add(revision)

199

for searcher in searchers:

200

update_common(searcher, revision)

201

202

def _filter_candidate_lca(self, candidate_lca):

203

"""Remove candidates which are ancestors of other candidates.

204

205

This is done by searching the ancestries of each border ancestor. It

206

is perfomed on the principle that a border ancestor that is not an

207

ancestor of any other border ancestor is a lowest common ancestor.

208

209

Searches are stopped when they find a node that is determined to be a

210

common ancestor of all border ancestors, because this shows that it

211

cannot be a descendant of any border ancestor.

212

213

This will scale with the number of candidate ancestors and the length

214

of the shortest path from a candidate to an ancestor common to all

215

candidates.

216

"""

217

searchers = dict((c, self._make_breadth_first_searcher([c]))

218

for c in candidate_lca)

219

active_searchers = dict(searchers)

220

# skip over the actual candidate for each searcher

221

for searcher in active_searchers.itervalues():

222

searcher.next()

223

while len(active_searchers) > 0:

224

for candidate, searcher in list(active_searchers.iteritems()):

225

try:

226

ancestors = searcher.next()

227

except StopIteration:

228

del active_searchers[candidate]

229

continue

230

for ancestor in ancestors:

231

if ancestor in candidate_lca:

232

candidate_lca.remove(ancestor)

233

del searchers[ancestor]

234

if ancestor in active_searchers:

235

del active_searchers[ancestor]

236

for searcher in searchers.itervalues():

237

if ancestor not in searcher.seen:

238

break

239

else:

240

# if this revision was seen by all searchers, then it

241

# is a descendant of all candidates, so we can stop

242

# searching it, and any seen ancestors

243

for searcher in searchers.itervalues():

244

seen_ancestors =\

245

searcher.find_seen_ancestors(ancestor)

246

searcher.stop_searching_any(seen_ancestors)

247

return candidate_lca

248

249

def find_unique_lca(self, left_revision, right_revision):

250

"""Find a unique LCA.

251

252

Find lowest common ancestors. If there is no unique common

253

ancestor, find the lowest common ancestors of those ancestors.

254

255

Iteration stops when a unique lowest common ancestor is found.

256

The graph origin is necessarily a unique lowest common ancestor.

257

258

Note that None is not an acceptable substitute for NULL_REVISION.

259

in the input for this method.

260

"""

261

revisions = [left_revision, right_revision]

262

while True:

263

lca = self.find_lca(*revisions)

264

if len(lca) == 1:

265

return lca.pop()

266

revisions = lca

267

268

269

class _BreadthFirstSearcher(object):

270

"""Parallel search the breadth-first the ancestry of revisions.

271

272

This class implements the iterator protocol, but additionally

273

1. provides a set of seen ancestors, and

274

2. allows some ancestries to be unsearched, via stop_searching_any

275

"""

276

277

def __init__(self, revisions, parents_provider):

278

self._start = set(revisions)

279

self._search_revisions = None

280

self.seen = set(revisions)

281

self._parents_provider = parents_provider

282

283

def __repr__(self):

284

return ('_BreadthFirstSearcher(self._search_revisions=%r,'

285

' self.seen=%r)' % (self._search_revisions, self.seen))

286

287

def next(self):

288

"""Return the next ancestors of this revision.

289

290

Ancestors are returned in the order they are seen in a breadth-first

291

traversal. No ancestor will be returned more than once.

292

"""

293

if self._search_revisions is None:

294

self._search_revisions = self._start

295

else:

296

new_search_revisions = set()

297

for parents in self._parents_provider.get_parents(

298

self._search_revisions):

299

if parents is None:

300

continue

301

new_search_revisions.update(p for p in parents if

302

p not in self.seen)

303

self._search_revisions = new_search_revisions

304

if len(self._search_revisions) == 0:

305

raise StopIteration()

306

self.seen.update(self._search_revisions)

307

return self._search_revisions

308

309

def __iter__(self):

310

return self

311

312

def find_seen_ancestors(self, revision):

313

"""Find ancestors of this revision that have already been seen."""

314

searcher = _BreadthFirstSearcher([revision], self._parents_provider)

315

seen_ancestors = set()

316

for ancestors in searcher:

317

for ancestor in ancestors:

318

if ancestor not in self.seen:

319

searcher.stop_searching_any([ancestor])

320

else:

321

seen_ancestors.add(ancestor)

322

return seen_ancestors

323

324

def stop_searching_any(self, revisions):

325

"""

326

Remove any of the specified revisions from the search list.

327

328

None of the specified revisions are required to be present in the

329

search list. In this case, the call is a no-op.

330

"""

331

stopped = self._search_revisions.intersection(revisions)

332

self._search_revisions = self._search_revisions.difference(revisions)

333

return stopped

334

335

def start_searching(self, revisions):

336

if self._search_revisions is None:

337

self._start = set(revisions)

338

else:

339

self._search_revisions.update(revisions.difference(self.seen))

340

self.seen.update(revisions)

Older »