~bzr-pqm/bzr/bzr.dev : revision 4449

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Implementation of Graph algorithms when we have already loaded everything.

18

"""

19

20

import heapq

21

22

from bzrlib import (

23

revision,

24

)

25

26

27

class _KnownGraphNode(object):

28

"""Represents a single object in the known graph."""

29

30

__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',

31

'gdfo', 'ancestor_of')

32

33

def __init__(self, key, parent_keys):

34

self.key = key

35

self.parent_keys = parent_keys

36

self.child_keys = []

37

# oldest ancestor, such that no parents between here and there have >1

38

# child or >1 parent.

39

self.linear_dominator = None

40

# Greatest distance from origin

41

self.gdfo = None

42

# This will become a tuple of known heads that have this node as an

43

# ancestor

44

self.ancestor_of = None

45

46

def __repr__(self):

47

return '%s(%s gdfo:%s par:%s child:%s %s)' % (

48

self.__class__.__name__, self.key, self.gdfo,

49

self.parent_keys, self.child_keys,

50

self.linear_dominator)

51

52

53

class KnownGraph(object):

54

"""This is a class which assumes we already know the full graph."""

55

56

def __init__(self, parent_map, do_cache=True):

57

"""Create a new KnownGraph instance.

58

59

:param parent_map: A dictionary mapping key => parent_keys

60

"""

61

self._nodes = {}

62

# Maps {sorted(revision_id, revision_id): heads}

63

self._known_heads = {}

64

self.do_cache = do_cache

65

self._initialize_nodes(parent_map)

66

self._find_linear_dominators()

67

self._find_gdfo()

68

69

def _initialize_nodes(self, parent_map):

70

"""Populate self._nodes.

71

72

After this has finished, self._nodes will have an entry for every entry

73

in parent_map. Ghosts will have a parent_keys = None, all nodes found

74

will also have .child_keys populated with all known child_keys.

75

"""

76

nodes = self._nodes

77

for key, parent_keys in parent_map.iteritems():

78

if key in nodes:

79

node = nodes[key]

80

node.parent_keys = parent_keys

81

else:

82

node = _KnownGraphNode(key, parent_keys)

83

nodes[key] = node

84

for parent_key in parent_keys:

85

try:

86

parent_node = nodes[parent_key]

87

except KeyError:

88

parent_node = _KnownGraphNode(parent_key, None)

89

nodes[parent_key] = parent_node

90

parent_node.child_keys.append(key)

91

92

def _find_linear_dominators(self):

93

"""For each node in the set, find any linear dominators.

94

95

For any given node, the 'linear dominator' is an ancestor, such that

96

all parents between this node and that one have a single parent, and a

97

single child. So if A->B->C->D then B,C,D all have a linear dominator

98

of A.

99

100

There are two main benefits:

101

1) When walking the graph, we can jump to the nearest linear dominator,

102

rather than walking all of the nodes inbetween.

103

2) When caching heads() results, dominators give the "same" results as

104

their children. (If the dominator is a head, then the descendant is

105

a head, if the dominator is not a head, then the child isn't

106

either.)

107

"""

108

def check_node(node):

109

if node.parent_keys is None or len(node.parent_keys) != 1:

110

# This node is either a ghost, a tail, or has multiple parents

111

# It its own dominator

112

node.linear_dominator = node.key

113

return None

114

parent_node = self._nodes[node.parent_keys[0]]

115

if len(parent_node.child_keys) > 1:

116

# The parent has multiple children, so *this* node is the

117

# dominator

118

node.linear_dominator = node.key

119

return None

120

# The parent is already filled in, so add and continue

121

if parent_node.linear_dominator is not None:

122

node.linear_dominator = parent_node.linear_dominator

123

return None

124

# We don't know this node, or its parent node, so start walking to

125

# next

126

return parent_node

127

128

for node in self._nodes.itervalues():

129

# The parent is not filled in, so walk until we get somewhere

130

if node.linear_dominator is not None: #already done

131

continue

132

next_node = check_node(node)

133

if next_node is None:

134

# Nothing more needs to be done

135

continue

136

stack = []

137

while next_node is not None:

138

stack.append(node)

139

node = next_node

140

next_node = check_node(node)

141

# The stack now contains the linear chain, and 'node' should have

142

# been labeled

143

dominator = node.linear_dominator

144

while stack:

145

next_node = stack.pop()

146

next_node.linear_dominator = dominator

147

node = next_node

148

149

def _find_gdfo(self):

150

def find_tails():

151

return [node for node in self._nodes.itervalues()

152

if not node.parent_keys]

153

tails = find_tails()

154

todo = []

155

heappush = heapq.heappush

156

heappop = heapq.heappop

157

nodes = self._nodes

158

for node in tails:

159

node.gdfo = 1

160

heappush(todo, (1, node))

161

processed = 0

162

while todo:

163

gdfo, next = heappop(todo)

164

processed += 1

165

if next.gdfo is not None and gdfo < next.gdfo:

166

# This node was reached from a longer path, we assume it was

167

# enqued correctly with the longer gdfo, so don't continue

168

# processing now

169

continue

170

next_gdfo = gdfo + 1

171

for child_key in next.child_keys:

172

child_node = nodes[child_key]

173

if child_node.gdfo is None or child_node.gdfo < next_gdfo:

174

# Only enque children when all of their parents have been

175

# resolved

176

for parent_key in child_node.parent_keys:

177

# We know that 'this' parent is counted

178

if parent_key != next.key:

179

parent_node = nodes[parent_key]

180

if parent_node.gdfo is None:

181

break

182

else:

183

child_node.gdfo = next_gdfo

184

heappush(todo, (next_gdfo, child_node))

185

186

def _get_dominators_to_nodes(self, candidate_nodes):

187

"""Get the reverse mapping from dominator_key => candidate_nodes.

188

189

As a side effect, this can also remove potential candidate nodes if we

190

determine that they share a dominator.

191

"""

192

dom_to_node = {}

193

keys_to_remove = []

194

for node in candidate_nodes.values():

195

if node.linear_dominator in dom_to_node:

196

# This node already exists, resolve which node supersedes the

197

# other

198

other_node = dom_to_node[node.linear_dominator]

199

# There should be no way that nodes sharing a dominator could

200

# 'tie' for gdfo

201

if other_node.gdfo > node.gdfo:

202

# The other node has this node as an ancestor

203

keys_to_remove.append(node.key)

204

else:

205

# Replace the other node, and set this as the new key

206

keys_to_remove.append(other_node.key)

207

dom_to_node[node.linear_dominator] = node

208

else:

209

dom_to_node[node.linear_dominator] = node

210

for key in keys_to_remove:

211

candidate_nodes.pop(key)

212

return dom_to_node

213

214

def heads(self, keys):

215

"""Return the heads from amongst keys.

216

217

This is done by searching the ancestries of each key. Any key that is

218

reachable from another key is not returned; all the others are.

219

220

This operation scales with the relative depth between any two keys. If

221

any two keys are completely disconnected all ancestry of both sides

222

will be retrieved.

223

224

:param keys: An iterable of keys.

225

:return: A set of the heads. Note that as a set there is no ordering

226

information. Callers will need to filter their input to create

227

order if they need it.

228

"""

229

candidate_nodes = dict((key, self._nodes[key]) for key in keys)

230

if revision.NULL_REVISION in candidate_nodes:

231

# NULL_REVISION is only a head if it is the only entry

232

candidate_nodes.pop(revision.NULL_REVISION)

233

if not candidate_nodes:

234

return set([revision.NULL_REVISION])

235

if len(candidate_nodes) < 2:

236

return frozenset(candidate_nodes)

237

heads_key = frozenset(candidate_nodes)

238

if heads_key != frozenset(keys):

239

note('%s != %s', heads_key, frozenset(keys))

240

try:

241

heads = self._known_heads[heads_key]

242

return heads

243

except KeyError:

244

pass # compute it ourselves

245

dom_to_node = self._get_dominators_to_nodes(candidate_nodes)

246

if len(candidate_nodes) < 2:

247

# We shrunk candidate_nodes and determined a new head

248

return frozenset(candidate_nodes)

249

dom_heads_key = None

250

# Check the linear dominators of these keys, to see if we already

251

# know the heads answer

252

dom_heads_key = frozenset([node.linear_dominator

253

for node in candidate_nodes.itervalues()])

254

if dom_heads_key in self._known_heads:

255

# map back into the original keys

256

heads = self._known_heads[dom_heads_key]

257

heads = frozenset([dom_to_node[key].key for key in heads])

258

return heads

259

heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)

260

if self.do_cache:

261

self._known_heads[heads_key] = heads

262

# Cache the dominator heads

263

if dom_heads_key is not None:

264

dom_heads = frozenset([candidate_nodes[key].linear_dominator

265

for key in heads])

266

self._known_heads[dom_heads_key] = dom_heads

267

return heads

268

269

def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):

270

queue = []

271

to_cleanup = []

272

to_cleanup_append = to_cleanup.append

273

for node in candidate_nodes.itervalues():

274

node.ancestor_of = (node.key,)

275

queue.append((-node.gdfo, node))

276

to_cleanup_append(node)

277

heapq.heapify(queue)

278

# These are nodes that we determined are 'common' that we are no longer

279

# walking

280

# Now we walk nodes until all nodes that are being walked are 'common'

281

num_candidates = len(candidate_nodes)

282

nodes = self._nodes

283

heappop = heapq.heappop

284

heappush = heapq.heappush

285

while queue and len(candidate_nodes) > 1:

286

_, node = heappop(queue)

287

next_ancestor_of = node.ancestor_of

288

if len(next_ancestor_of) == num_candidates:

289

# This node is now considered 'common'

290

# Make sure all parent nodes are marked as such

291

for parent_key in node.parent_keys:

292

parent_node = nodes[parent_key]

293

if parent_node.ancestor_of is not None:

294

parent_node.ancestor_of = next_ancestor_of

295

if node.linear_dominator != node.key:

296

parent_node = nodes[node.linear_dominator]

297

if parent_node.ancestor_of is not None:

298

parent_node.ancestor_of = next_ancestor_of

299

continue

300

if node.parent_keys is None:

301

# This is a ghost

302

continue

303

# Now project the current nodes ancestor list to the parent nodes,

304

# and queue them up to be walked

305

# Note: using linear_dominator speeds things up quite a bit

306

# enough that we actually start to be slightly faster

307

# than the default heads() implementation

308

if node.linear_dominator != node.key:

309

# We are at the tip of a long linear region

310

# We know that there is nothing between here and the tail

311

# that is interesting, so skip to the end

312

parent_keys = [node.linear_dominator]

313

else:

314

parent_keys = node.parent_keys

315

for parent_key in parent_keys:

316

if parent_key in candidate_nodes:

317

candidate_nodes.pop(parent_key)

318

if len(candidate_nodes) <= 1:

319

break

320

elif parent_key in dom_to_node:

321

orig_node = dom_to_node[parent_key]

322

if orig_node is not node:

323

if orig_node.key in candidate_nodes:

324

candidate_nodes.pop(orig_node.key)

325

if len(candidate_nodes) <= 1:

326

break

327

parent_node = nodes[parent_key]

328

ancestor_of = parent_node.ancestor_of

329

if ancestor_of is None:

330

# This node hasn't been walked yet

331

parent_node.ancestor_of = next_ancestor_of

332

# Enqueue this node

333

heappush(queue, (-parent_node.gdfo, parent_node))

334

to_cleanup_append(parent_node)

335

elif ancestor_of != next_ancestor_of:

336

# Combine to get the full set of parents

337

all_ancestors = set(ancestor_of)

338

all_ancestors.update(next_ancestor_of)

339

parent_node.ancestor_of = tuple(sorted(all_ancestors))

340

def cleanup():

341

for node in to_cleanup:

342

node.ancestor_of = None

343

cleanup()

344

return frozenset(candidate_nodes)