~bzr-pqm/bzr/bzr.dev : revision 4371.3.19

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Implementation of Graph algorithms when we have already loaded everything.

18

"""

19

20

cdef extern from "python-compat.h":

21

pass

22

23

cdef extern from "Python.h":

24

ctypedef int Py_ssize_t

25

ctypedef struct PyObject:

26

pass

27

28

import heapq

29

30

from bzrlib import revision

31

32

33

cdef class _KnownGraphNode:

34

"""Represents a single object in the known graph."""

35

36

cdef object key

37

# Ideally, we could change this into fixed arrays, rather than get the

38

# extra allocations. Consider that 99% of all revisions will have <= 2

39

# parents, we may want to put in the effort

40

cdef list parents

41

cdef list children

42

cdef _KnownGraphNode linear_dominator_node

43

cdef public long dominator_distance

44

cdef public long gdfo

45

# This could also be simplified

46

cdef object ancestor_of

47

48

def __init__(self, key, parents):

49

self.key = key

50

if parents is None:

51

self.parents = parents

52

else:

53

if not isinstance(parents, list):

54

raise TypeError('parents must be a list')

55

for parent in parents:

56

if not isinstance(parent, _KnownGraphNode):

57

raise TypeError('parents must be a list of _KnownGraphNode')

58

self.parents = parents

59

self.children = []

60

# oldest ancestor, such that no parents between here and there have >1

61

# child or >1 parent.

62

self.linear_dominator_node = None

63

self.dominator_distance = 0

64

# Greatest distance from origin

65

self.gdfo = -1

66

# This will become a tuple of known heads that have this node as an

67

# ancestor

68

self.ancestor_of = None

69

70

property child_keys:

71

def __get__(self):

72

cdef list keys

73

cdef _KnownGraphNode child

74

75

keys = []

76

for child in self.children:

77

keys.append(child.key)

78

return keys

79

80

property linear_dominator:

81

def __get__(self):

82

if self.linear_dominator_node is None:

83

return None

84

else:

85

return self.linear_dominator_node.key

86

87

cdef clear_references(self):

88

self.parents = None

89

self.children = None

90

self.linear_dominator_node = None

91

92

def __repr__(self):

93

parent_keys = []

94

for parent in self.parents:

95

parent_keys.append(parent.key)

96

child_keys = []

97

for child in self.children:

98

child_keys.append(child.key)

99

return '%s(%s gdfo:%s par:%s child:%s dom:%s %s)' % (

100

self.__class__.__name__, self.key, self.gdfo,

101

parent_keys, child_keys,

102

self.linear_dominator, self.dominator_distance)

103

104

105

# TODO: slab allocate all _KnownGraphNode objects.

106

# We already know how many we are going to need...

107

108

cdef class KnownGraph:

109

"""This is a class which assumes we already know the full graph."""

110

111

cdef public object _nodes

112

cdef dict _known_heads

113

cdef public int do_cache

114

115

def __init__(self, parent_map, do_cache=True):

116

"""Create a new KnownGraph instance.

117

118

:param parent_map: A dictionary mapping key => parent_keys

119

"""

120

self._nodes = {}

121

# Maps {sorted(revision_id, revision_id): heads}

122

self._known_heads = {}

123

self.do_cache = int(do_cache)

124

self._initialize_nodes(parent_map)

125

self._find_linear_dominators()

126

self._find_gdfo()

127

128

def __dealloc__(self):

129

cdef _KnownGraphNode child

130

131

for child in self._nodes.itervalues():

132

child.clear_references()

133

134

def _initialize_nodes(self, parent_map):

135

"""Populate self._nodes.

136

137

After this has finished, self._nodes will have an entry for every entry

138

in parent_map. Ghosts will have a parent_keys = None, all nodes found

139

will also have .child_keys populated with all known child_keys.

140

"""

141

cdef _KnownGraphNode node

142

cdef _KnownGraphNode parent_node

143

cdef list parent_nodes

144

145

for key, parent_keys in parent_map.iteritems():

146

parent_nodes = []

147

for parent_key in parent_keys:

148

try:

149

parent_node = self._nodes[parent_key]

150

except KeyError:

151

parent_node = _KnownGraphNode(parent_key, None)

152

self._nodes[parent_key] = parent_node

153

parent_nodes.append(parent_node)

154

if key in self._nodes:

155

node = self._nodes[key]

156

assert node.parents is None

157

node.parents = parent_nodes

158

else:

159

node = _KnownGraphNode(key, parent_nodes)

160

self._nodes[key] = node

161

for parent_node in parent_nodes:

162

parent_node.children.append(node)

163

164

cdef object _check_is_linear(self, _KnownGraphNode node):

165

"""Check to see if a given node is part of a linear chain."""

166

cdef _KnownGraphNode parent_node

167

if node.parents is None or len(node.parents) != 1:

168

# This node is either a ghost, a tail, or has multiple parents

169

# It its own dominator

170

node.linear_dominator_node = node

171

node.dominator_distance = 0

172

return None

173

parent_node = node.parents[0]

174

if len(parent_node.children) > 1:

175

# The parent has multiple children, so *this* node is the

176

# dominator

177

node.linear_dominator_node = node

178

node.dominator_distance = 0

179

return None

180

# The parent is already filled in, so add and continue

181

if parent_node.linear_dominator_node is not None:

182

node.linear_dominator_node = parent_node.linear_dominator_node

183

node.dominator_distance = parent_node.dominator_distance + 1

184

return None

185

# We don't know this node, or its parent node, so start walking to

186

# next

187

return parent_node

188

189

def _find_linear_dominators(self):

190

"""For each node in the set, find any linear dominators.

191

192

For any given node, the 'linear dominator' is an ancestor, such that

193

all parents between this node and that one have a single parent, and a

194

single child. So if A->B->C->D then B,C,D all have a linear dominator

195

of A. Because there are no interesting siblings, we can quickly skip to

196

the nearest dominator when doing comparisons.

197

"""

198

cdef _KnownGraphNode node

199

cdef _KnownGraphNode next_node

200

cdef list stack

201

202

for node in self._nodes.itervalues():

203

# The parent is not filled in, so walk until we get somewhere

204

if node.linear_dominator_node is not None: #already done

205

continue

206

next_node = self._check_is_linear(node)

207

if next_node is None:

208

# Nothing more needs to be done

209

continue

210

stack = []

211

while next_node is not None:

212

stack.append(node)

213

node = next_node

214

next_node = self._check_is_linear(node)

215

# The stack now contains the linear chain, and 'node' should have

216

# been labeled

217

assert node.linear_dominator_node is not None

218

dominator = node.linear_dominator_node

219

while stack:

220

next_node = stack.pop()

221

next_node.linear_dominator_node = dominator

222

next_node.dominator_distance = node.dominator_distance + 1

223

node = next_node

224

225

cdef list _find_tails(self):

226

cdef list tails

227

cdef _KnownGraphNode node

228

tails = []

229

230

for node in self._nodes.itervalues():

231

if not node.parents:

232

tails.append(node)

233

return tails

234

235

def _find_gdfo(self):

236

# TODO: Consider moving the tails search into the first-pass over the

237

# data, inside _find_linear_dominators

238

cdef _KnownGraphNode node

239

cdef _KnownGraphNode child_node

240

cdef _KnownGraphNode parent_node

241

242

tails = self._find_tails()

243

todo = []

244

heappush = heapq.heappush

245

heappop = heapq.heappop

246

for node in tails:

247

node.gdfo = 1

248

heappush(todo, (1, node))

249

processed = 0

250

max_gdfo = len(self._nodes) + 1

251

while todo:

252

gdfo, node = heappop(todo)

253

processed += 1

254

if node.gdfo != -1 and gdfo < node.gdfo:

255

# This node was reached from a longer path, we assume it was

256

# enqued correctly with the longer gdfo, so don't continue

257

# processing now

258

continue

259

next_gdfo = gdfo + 1

260

assert next_gdfo <= max_gdfo

261

for child_node in node.children:

262

if child_node.gdfo < next_gdfo:

263

# Only enque children when all of their parents have been

264

# resolved

265

for parent_node in child_node.parents:

266

# We know that 'this' parent is counted

267

if parent_node is not node:

268

if parent_node.gdfo == -1:

269

break

270

else:

271

child_node.gdfo = next_gdfo

272

heappush(todo, (next_gdfo, child_node))

273

274

def heads(self, keys):

275

"""Return the heads from amongst keys.

276

277

This is done by searching the ancestries of each key. Any key that is

278

reachable from another key is not returned; all the others are.

279

280

This operation scales with the relative depth between any two keys. If

281

any two keys are completely disconnected all ancestry of both sides

282

will be retrieved.

283

284

:param keys: An iterable of keys.

285

:return: A set of the heads. Note that as a set there is no ordering

286

information. Callers will need to filter their input to create

287

order if they need it.

288

"""

289

cdef dict candidate_nodes

290

cdef dict dom_to_node

291

292

candidate_nodes = {}

293

for key in keys:

294

candidate_nodes[key] = self._nodes[key]

295

if revision.NULL_REVISION in candidate_nodes:

296

# NULL_REVISION is only a head if it is the only entry

297

candidate_nodes.pop(revision.NULL_REVISION)

298

if not candidate_nodes:

299

return set([revision.NULL_REVISION])

300

if len(candidate_nodes) < 2:

301

return frozenset(candidate_nodes)

302

heads_key = frozenset(candidate_nodes)

303

try:

304

heads = self._known_heads[heads_key]

305

return heads

306

except KeyError:

307

pass # compute it ourselves

308

## dominator = None

309

## # TODO: We could optimize for the len(candidate_nodes) > 2 by checking

310

## # for *any* pair-wise matching, and then eliminating one of the

311

## # nodes trivially. However, the fairly common case is just 2

312

## # keys, so we'll focus on that, first

313

## for node in candidate_nodes.itervalues():

314

## if dominator is None:

315

## dominator = node.linear_dominator

316

## elif dominator != node.linear_dominator:

317

## break

318

## else:

319

## # In 'time bzr annotate NEWS' this only catches *one* item, so it

320

## # probably isn't worth the optimization

321

## # All of these nodes have the same linear_dominator, which means

322

## # they are in a line, the head is just the one with the highest

323

## # distance

324

## def get_distance(key):

325

## return self._nodes[key].dominator_distance

326

## def get_linear_head():

327

## return max(candidate_nodes, key=get_distance)

328

## return set([get_linear_head()])

329

# Check the linear dominators of these keys, to see if we already

330

# know the heads answer

331

# dom_key = []

332

# for node in candidate_nodes.itervalues():

333

# dom_key.append(node.linear_dominator.key)

334

# dom_key = frozenset(dom_key)

335

# if dom_key in self._known_heads:

336

# dom_to_node = dict([(node.linear_dominator, key)

337

# for key, node in candidate_nodes.iteritems()])

338

# # map back into the original keys

339

# heads = self._known_heads[dom_key]

340

# heads = frozenset([dom_to_node[key] for key in heads])

341

# return heads

342

heads = self._heads_from_candidate_nodes(candidate_nodes)

343

# if self.do_cache:

344

# self._known_heads[heads_key] = heads

345

# # Cache the dominator heads

346

# if dom_key is not None:

347

# dom_heads = frozenset([candidate_nodes[key].linear_dominator

348

# for key in heads])

349

# self._known_heads[dom_key] = dom_heads

350

return heads

351

352

def _heads_from_candidate_nodes(self, dict candidate_nodes):

353

cdef list to_cleanup

354

cdef _KnownGraphNode node

355

cdef _KnownGraphNode parent_node

356

cdef int num_candidates

357

358

queue = []

359

to_cleanup = []

360

for node in candidate_nodes.itervalues():

361

assert node.ancestor_of is None

362

node.ancestor_of = (node.key,)

363

queue.append((-node.gdfo, node))

364

to_cleanup.append(node)

365

heapq.heapify(queue)

366

# These are nodes that we determined are 'common' that we are no longer

367

# walking

368

# Now we walk nodes until all nodes that are being walked are 'common'

369

num_candidates = len(candidate_nodes)

370

nodes = self._nodes

371

heappop = heapq.heappop

372

heappush = heapq.heappush

373

while queue and len(candidate_nodes) > 1:

374

_, node = heappop(queue)

375

if len(node.ancestor_of) == num_candidates:

376

# This node is now considered 'common'

377

# Make sure all parent nodes are marked as such

378

for parent_node in node.parents:

379

if parent_node.ancestor_of is not None:

380

parent_node.ancestor_of = node.ancestor_of

381

if node.linear_dominator_node is not node:

382

parent_node = node.linear_dominator_node

383

if parent_node.ancestor_of is not None:

384

parent_node.ancestor_of = node.ancestor_of

385

continue

386

if node.parents is None:

387

# This is a ghost

388

continue

389

# Now project the current nodes ancestor list to the parent nodes,

390

# and queue them up to be walked

391

# Note: using linear_dominator speeds things up quite a bit

392

# enough that we actually start to be slightly faster

393

# than the default heads() implementation

394

if node.linear_dominator_node is not node:

395

# We are at the tip of a long linear region

396

# We know that there is nothing between here and the tail

397

# that is interesting, so skip to the end

398

parents = [node.linear_dominator_node]

399

else:

400

parents = node.parents

401

for parent_node in node.parents:

402

if parent_node.key in candidate_nodes:

403

candidate_nodes.pop(parent_node.key)

404

if len(candidate_nodes) <= 1:

405

break

406

if parent_node.ancestor_of is None:

407

# This node hasn't been walked yet

408

parent_node.ancestor_of = node.ancestor_of

409

# Enqueue this node

410

heappush(queue, (-parent_node.gdfo, parent_node))

411

to_cleanup.append(parent_node)

412

elif parent_node.ancestor_of != node.ancestor_of:

413

# Combine to get the full set of parents

414

all_ancestors = set(parent_node.ancestor_of)

415

all_ancestors.update(node.ancestor_of)

416

parent_node.ancestor_of = tuple(sorted(all_ancestors))

417

for node in to_cleanup:

418

node.ancestor_of = None

419

return frozenset(candidate_nodes)