33
36
traverseproc tp_traverse
35
38
PyTypeObject *Py_TYPE(PyObject *)
39
int PyObject_IsTrue(PyObject *)
37
41
void *PyMem_Malloc(size_t nbytes)
38
42
void PyMem_Free(void *)
39
43
void memset(void *, int, size_t)
46
# Dummy is an object used to mark nodes that have been deleted. Since
47
# collisions require us to move a node to an alternative location, if we just
48
# set an entry to NULL on delete, we won't find any relocated nodes.
49
# We have to use _dummy_obj because we need to keep a refcount to it, but we
50
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
42
52
cdef object _dummy_obj
43
53
cdef PyObject *_dummy
44
54
_dummy_obj = object()
45
55
_dummy = <PyObject *>_dummy_obj
48
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other):
58
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
49
59
cdef long other_hash
54
64
other_hash = Py_TYPE(other).tp_hash(other)
66
# Even though other successfully hashed in the past, it seems to have
67
# changed its mind, and failed this time, so propogate the failure.
55
69
if other_hash != this_hash:
72
# This implements a subset of the PyObject_RichCompareBool functionality.
74
# 1) Doesn't try to do anything with old-style classes
75
# 2) Assumes that both objects have a tp_richcompare implementation, and
76
# that if that is not enough to compare equal, then they are not
77
# equal. (It doesn't try to cast them both to some intermediate form
78
# that would compare equal.)
57
79
res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
80
if res == NULL: # Exception
82
if PyObject_IsTrue(res):
61
85
if res == Py_NotImplemented:
63
87
res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
90
if PyObject_IsTrue(res):
170
195
as it makes a lot of assuptions about keys not already being present,
171
196
and there being no dummy entries.
173
cdef size_t i, perturb, mask
198
cdef size_t i, n_lookup
174
199
cdef long the_hash
175
cdef PyObject **table, **entry
200
cdef PyObject **table, **slot
177
203
mask = self._mask
178
204
table = self._table
180
206
the_hash = Py_TYPE(key).tp_hash(key)
184
# Because we know that we made all items unique before, we can just
185
# iterate as long as the target location is not empty, we don't have to
186
# do any comparison, etc.
187
while entry[0] != NULL:
188
i = (i << 2) + i + perturb + 1
189
entry = &table[i & mask]
190
perturb >>= PERTURB_SHIFT
210
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
211
slot = &table[i & mask]
214
self._fill = self._fill + 1
215
self._used = self._used + 1
218
raise RuntimeError('ran out of slots.')
195
220
def _py_resize(self, min_used):
196
221
"""Do not use this directly, it is only exposed for testing."""
262
287
cdef PyObject **slot, *py_key
290
py_key = <PyObject *>key
291
if (Py_TYPE(py_key).tp_richcompare == NULL
292
or Py_TYPE(py_key).tp_hash == NULL):
293
raise TypeError('Types added to SimpleSet must implement'
294
' both tp_richcompare and tp_hash')
266
296
# We need at least one empty slot
267
297
assert self._used < self._mask
268
298
slot = _lookup(self, key)
269
py_key = <PyObject *>key
270
299
if (slot[0] == NULL):
271
300
Py_INCREF(py_key)
301
self._fill = self._fill + 1
302
self._used = self._used + 1
276
305
elif (slot[0] == _dummy):
277
306
Py_INCREF(py_key)
307
self._used = self._used + 1
281
310
# No else: clause. If _lookup returns a pointer to
412
425
:param key: An object we are looking up
413
426
:param hash: The hash for key
414
:return: The location in self.table where key should be put
415
should never be NULL, but may reference a NULL (PyObject*)
427
:return: The location in self.table where key should be put.
428
location == NULL is an exception, but (*location) == NULL just
429
indicates the slot is empty and can be used.
417
# This is the heart of most functions, which is why it is pulled out as an
418
# cdef inline function.
419
cdef size_t i, perturb
431
# This uses Quadratic Probing:
432
# http://en.wikipedia.org/wiki/Quadratic_probing
434
# This leads to probe locations at:
437
# h2 = h0 + 3 = h1 + 1 + 1
438
# h3 = h0 + 6 = h2 + 1 + 2
439
# h4 = h0 + 10 = h2 + 1 + 3
440
# Note that all of these are '& mask', but that is computed *after* the
442
# This differs from the algorithm used by Set and Dict. Which, effectively,
443
# use double-hashing, and a step size that starts large, but dwindles to
444
# stepping one-by-one.
445
# This gives more 'locality' in that if you have a collision at offset X,
446
# the first fallback is X+1, which is fast to check. However, that means
447
# that an object w/ hash X+1 will also check there, and then X+2 next.
448
# However, for objects with differing hashes, their chains are different.
449
# The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
450
# So different hashes diverge quickly.
451
# A bigger problem is that we *only* ever use the lowest bits of the hash
452
# So all integers (x + SIZE*N) will resolve into the same bucket, and all
453
# use the same collision resolution. We may want to try to find a way to
454
# incorporate the upper bits of the hash with quadratic probing. (For
455
# example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
456
cdef size_t i, n_lookup
420
457
cdef Py_ssize_t mask
421
458
cdef long key_hash
423
cdef PyObject **table, **cur, **free_slot, *py_key
459
cdef PyObject **table, **slot, *cur, **free_slot, *py_key
461
# hash is a signed long(), we are using an offset at unsigned size_t
425
462
key_hash = hash(key)
426
464
mask = self._mask
427
465
table = self._table
430
467
py_key = <PyObject *>key
432
# Found a blank spot, or found the exact key
439
if _is_equal(py_key, key_hash, cur[0]):
440
# Both py_key and cur[0] belong in this slot, return it
443
# size_t is unsigned, hash is signed...
446
i = (i << 2) + i + perturb + 1
447
cur = &table[i & mask]
448
if cur[0] == NULL: # Found an empty spot
449
if free_slot: # Did we find a _dummy earlier?
468
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
469
slot = &table[i & mask]
473
if free_slot != NULL:
474
# Did we find an earlier _dummy entry?
453
if (cur[0] == py_key # exact match
454
or _is_equal(py_key, key_hash, cur[0])): # Equivalent match
456
if (cur[0] == _dummy and free_slot == NULL):
458
perturb >>= PERTURB_SHIFT
479
# Found an exact pointer to the key
482
if free_slot == NULL:
484
elif _is_equal(py_key, key_hash, cur):
485
# Both py_key and cur belong in this slot, return it
459
488
raise AssertionError('should never get here')