232
233
node = <_KnownGraphNode>temp_node
236
cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
237
cdef Py_ssize_t num_parent_keys, pos
238
cdef _KnownGraphNode parent_node
240
num_parent_keys = len(parent_keys)
241
# We know how many parents, so we pre allocate the tuple
242
parent_nodes = PyTuple_New(num_parent_keys)
243
for pos from 0 <= pos < num_parent_keys:
244
# Note: it costs us 10ms out of 40ms to lookup all of these
245
# parents, it doesn't seem to be an allocation overhead,
246
# but rather a lookup overhead. There doesn't seem to be
247
# a way around it, and that is one reason why
248
# KnownGraphNode maintains a direct pointer to the parent
250
# We use [] because parent_keys may be a tuple or list
251
parent_node = self._get_or_create_node(parent_keys[pos])
252
# PyTuple_SET_ITEM will steal a reference, so INCREF first
253
Py_INCREF(parent_node)
254
PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
255
PyList_Append(parent_node.children, node)
256
node.parents = parent_nodes
235
258
def _initialize_nodes(self, parent_map):
236
259
"""Populate self._nodes.
253
276
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
254
277
key = <object>temp_key
255
278
parent_keys = <object>temp_parent_keys
256
num_parent_keys = len(parent_keys)
257
279
node = self._get_or_create_node(key)
258
# We know how many parents, so we pre allocate the tuple
259
parent_nodes = PyTuple_New(num_parent_keys)
260
for pos2 from 0 <= pos2 < num_parent_keys:
261
# Note: it costs us 10ms out of 40ms to lookup all of these
262
# parents, it doesn't seem to be an allocation overhead,
263
# but rather a lookup overhead. There doesn't seem to be
264
# a way around it, and that is one reason why
265
# KnownGraphNode maintains a direct pointer to the parent
267
# We use [] because parent_keys may be a tuple or list
268
parent_node = self._get_or_create_node(parent_keys[pos2])
269
# PyTuple_SET_ITEM will steal a reference, so INCREF first
270
Py_INCREF(parent_node)
271
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
272
PyList_Append(parent_node.children, node)
273
node.parents = parent_nodes
280
self._populate_parents(node, parent_keys)
275
282
def _find_tails(self):
276
283
cdef PyObject *temp_node
344
def add_node(self, key, parent_keys):
345
"""Add a new node to the graph.
347
If this fills in a ghost, then the gdfos of all children will be
350
:param key: The node being added. If this is a duplicate, this is a
352
:param parent_keys: The parents of the given node.
353
:return: None (should we return if this was a ghost, etc?)
355
cdef PyObject *maybe_node
356
cdef _KnownGraphNode node, parent_node, child_node
357
cdef long parent_gdfo, next_gdfo
359
maybe_node = PyDict_GetItem(self._nodes, key)
360
if maybe_node != NULL:
361
node = <_KnownGraphNode>maybe_node
362
if node.parents is None:
363
# We are filling in a ghost
364
self._populate_parents(node, parent_keys)
365
# We can't trust cached heads anymore
366
self._known_heads.clear()
367
else: # Ensure that the parent_key list matches
368
existing_parent_keys = []
369
for parent_node in node.parents:
370
existing_parent_keys.append(parent_node.key)
371
# Make sure we use a list for the comparison, in case it was a
373
parent_keys = list(parent_keys)
374
if existing_parent_keys == parent_keys:
375
# Exact match, nothing more to do
378
raise ValueError('Parent key mismatch, existing node %s'
379
' has parents of %s not %s'
380
% (key, existing_parent_keys, parent_keys))
382
node = _KnownGraphNode(key)
383
PyDict_SetItem(self._nodes, key, node)
384
self._populate_parents(node, parent_keys)
386
for parent_node in node.parents:
387
if parent_node.gdfo == -1:
388
# This is a newly introduced ghost, so it gets gdfo of 1
390
if parent_gdfo < parent_node.gdfo:
391
parent_gdfo = parent_node.gdfo
392
node.gdfo = parent_gdfo + 1
393
# Now fill the gdfo to all children
394
# Note that this loop is slightly inefficient, in that we may visit the
395
# same child (and its decendents) more than once, however, it is
396
# 'efficient' in that we only walk to nodes that would be updated,
397
# rather than all nodes
398
# We use a deque rather than a simple list stack, to go for BFD rather
399
# than DFD. So that if a longer path is possible, we walk it before we
400
# get to the final child
401
pending = deque([node])
402
pending_popleft = pending.popleft
403
pending_append = pending.append
405
node = pending_popleft()
406
next_gdfo = node.gdfo + 1
407
for child_node in node.children:
408
if child_node.gdfo < next_gdfo:
409
# This child is being updated, we need to check its
411
child_node.gdfo = next_gdfo
412
pending_append(child_node)
337
414
def heads(self, keys):
338
415
"""Return the heads from amongst keys.