1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
|
# Copyright (C) 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Definition of a class that is used to intern StaticTuple objects."""
cdef extern from "Python.h":
ctypedef unsigned long size_t
ctypedef long (*hashfunc)(PyObject*)
ctypedef PyObject *(*richcmpfunc)(PyObject *, PyObject *, int)
int Py_EQ
PyObject *Py_True
PyObject *Py_NotImplemented
void Py_INCREF(PyObject *)
void Py_DECREF(PyObject *)
ctypedef struct PyTypeObject:
hashfunc tp_hash
richcmpfunc tp_richcompare
PyTypeObject *Py_TYPE(PyObject *)
void *PyMem_Malloc(size_t nbytes)
void PyMem_Free(void *)
void memset(void *, int, size_t)
cdef object _dummy_obj
cdef PyObject *_dummy
_dummy_obj = object()
_dummy = <PyObject *>_dummy_obj
cdef inline int _is_equal(PyObject *this, long this_hash, PyObject *other):
cdef long other_hash
cdef PyObject *res
if this == other:
return 1
other_hash = Py_TYPE(other).tp_hash(other)
if other_hash != this_hash:
return 0
res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
if res == Py_True:
Py_DECREF(res)
return 1
# Only handled for now because we are testing with stuff like tuples versus
# StaticTuple objects. If we decide to limit StaticTupleInterner to
# strictly only allowing StaticTuple objects, then this is no longer
# required, and Py_NotImplemented => not equal
if res == Py_NotImplemented:
Py_DECREF(res)
res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
if res == Py_True:
Py_DECREF(res)
return 1
Py_DECREF(res)
return 0
cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
type StaticTupleInterner_type]:
"""This class tracks the canonical forms for StaticTuples.
It is similar in function to the interned dictionary that is used by
strings. However:
1) It assumes that hash(obj) is cheap, so does not need to inline a copy
of it
2) It only stores one reference to the object, rather than 2 (key vs
key:value)
As such, it uses 1/3rd the amount of memory to store a pointer to the
interned object.
"""
# Attributes are defined in the .pxd file
DEF DEFAULT_SIZE=1024
DEF PERTURB_SHIFT=5
# Note that most of the members on this class are just thunks over to the C
# api. However, this provides a nice Python/Pyrex api, as well as making it
# easy to test the C api from pure python.
def __init__(self):
cdef Py_ssize_t size, n_bytes
size = DEFAULT_SIZE
self.mask = size - 1
self.used = 0
self.fill = 0
n_bytes = sizeof(PyObject*) * size;
self.table = <PyObject **>PyMem_Malloc(n_bytes)
if self.table == NULL:
raise MemoryError()
memset(self.table, 0, n_bytes)
def __dealloc__(self):
if self.table != NULL:
PyMem_Free(self.table)
self.table = NULL
def __len__(self):
return self.used
def _test_lookup(self, key):
cdef PyObject **slot
slot = _lookup(self, key)
if slot[0] == NULL:
res = '<null>'
elif slot[0] == _dummy:
res = '<dummy>'
else:
res = <object>slot[0]
return <int>(slot - self.table), res
def __contains__(self, key):
"""Is key present in this StaticTupleInterner."""
cdef PyObject **slot
slot = _lookup(self, key)
if slot[0] == NULL or slot[0] == _dummy:
return False
return True
cdef PyObject *_get(self, object key) except? NULL:
"""Return the object (or nothing) define at the given location."""
cdef PyObject **slot
slot = _lookup(self, key)
if slot[0] == NULL or slot[0] == _dummy:
return NULL
return slot[0]
def __getitem__(self, key):
"""Return a stored item that is equivalent to key."""
cdef PyObject *py_val
py_val = self._get(key)
if py_val == NULL:
raise KeyError("Key %s is not present" % key)
val = <object>(py_val)
return val
# def __setitem__(self, key, value):
# assert key == value
# self._add(key)
cdef int _insert_clean(self, PyObject *key) except -1:
"""Insert a key into self.table.
This is only meant to be used during times like '_resize',
as it makes a lot of assuptions about keys not already being present,
and there being no dummy entries.
"""
cdef size_t i, perturb, mask
cdef long the_hash
cdef PyObject **table, **entry
mask = self.mask
table = self.table
the_hash = Py_TYPE(key).tp_hash(key)
i = the_hash & mask
entry = &table[i]
perturb = the_hash
# Because we know that we made all items unique before, we can just
# iterate as long as the target location is not empty, we don't have to
# do any comparison, etc.
while entry[0] != NULL:
i = (i << 2) + i + perturb + 1
entry = &table[i & mask]
perturb >>= PERTURB_SHIFT
entry[0] = key
self.fill += 1
self.used += 1
cpdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
"""Resize the internal table.
The final table will be big enough to hold at least min_used entries.
We will copy the data from the existing table over, leaving out dummy
entries.
:return: The new size of the internal table
"""
cdef Py_ssize_t new_size, n_bytes, remaining
cdef PyObject **new_table, **old_table, **entry
new_size = DEFAULT_SIZE
while new_size <= min_used and new_size > 0:
new_size = new_size << 1
# We rolled over our signed size field
if new_size <= 0:
raise MemoryError()
if new_size == (self.mask + 1):
# Nothing to do
return new_size
# TODO: Test this
# if new_size < self.used:
# raise RuntimeError('cannot shrink StaticTupleInterner to something'
# ' smaller than the number of used slots.')
n_bytes = sizeof(PyObject*) * new_size;
new_table = <PyObject **>PyMem_Malloc(n_bytes)
if new_table == NULL:
raise MemoryError()
old_table = self.table
self.table = new_table
memset(self.table, 0, n_bytes)
self.mask = new_size - 1
self.used = 0
remaining = self.fill
self.fill = 0
# Moving everything to the other table is refcount neutral, so we don't
# worry about it.
entry = old_table
while remaining > 0:
if entry[0] == NULL: # unused slot
pass
elif entry[0] == _dummy: # dummy slot
remaining -= 1
else: # active slot
remaining -= 1
self._insert_clean(entry[0])
entry += 1
PyMem_Free(old_table)
return new_size
cpdef object add(self, key):
"""Similar to set.add(), start tracking this key.
There is one small difference, which is that we return the object that
is stored at the given location. (which is closer to the
dict.setdefault() functionality.)
"""
cdef PyObject **slot, *py_key
cdef int added = 0
# We need at least one empty slot
assert self.used < self.mask
slot = _lookup(self, key)
py_key = <PyObject *>key
if (slot[0] == NULL):
Py_INCREF(py_key)
self.fill += 1
self.used += 1
slot[0] = py_key
added = 1
elif (slot[0] == _dummy):
Py_INCREF(py_key)
self.used += 1
slot[0] = py_key
added = 1
# No else: clause. If _lookup returns a pointer to
# a live object, then we already have a value at this location.
retval = <object>(slot[0])
# PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
if added and (self.fill * 3) >= ((self.mask + 1) * 2):
# However, we always work for a load factor of 2:1
self._resize(self.used * 2)
# Even if we resized and ended up moving retval into a different slot,
# it is still the value that is held at the slot equivalent to 'key',
# so we can still return it
return retval
cpdef int discard(self, key) except -1:
"""Remove key from the dict, whether it exists or not.
:return: 0 if the item did not exist, 1 if it did
"""
cdef PyObject **slot, *py_key
slot = _lookup(self, key)
if slot[0] == NULL or slot[0] == _dummy:
return 0
self.used -= 1
Py_DECREF(slot[0])
slot[0] = _dummy
return 1
def __delitem__(self, key):
"""Remove the given item from the dict.
Raise a KeyError if the key was not present.
"""
cdef int exists
exists = self.discard(key)
if not exists:
raise KeyError('Key %s not present' % (key,))
cdef api StaticTupleInterner StaticTupleInterner_New():
"""Create a new StaticTupleInterner object."""
return StaticTupleInterner()
cdef inline int _check_self_not_none(object self) except -1:
"""Check that the parameter is not None.
Pyrex/Cython will do type checking, but only to ensure that an object is
either the right type or None. You can say "object foo not None" for pure
python functions, but not for C functions.
So this is just a helper for all the apis that need to do the check.
"""
if self is None:
raise TypeError('self must not be None')
cdef inline PyObject **_lookup(StaticTupleInterner self,
object key) except NULL:
"""Find the slot where 'key' would fit.
This is the same as a dicts 'lookup' function.
:param key: An object we are looking up
:param hash: The hash for key
:return: The location in self.table where key should be put
should never be NULL, but may reference a NULL (PyObject*)
"""
# This is the heart of most functions, which is why it is pulled out as an
# cdef inline function.
cdef size_t i, perturb
cdef Py_ssize_t mask
cdef long key_hash
cdef long this_hash
cdef PyObject **table, **cur, **free_slot, *py_key
key_hash = hash(key)
mask = self.mask
table = self.table
i = key_hash & mask
cur = &table[i]
py_key = <PyObject *>key
if cur[0] == NULL:
# Found a blank spot, or found the exact key
return cur
if cur[0] == py_key:
return cur
if cur[0] == _dummy:
free_slot = cur
else:
if _is_equal(py_key, key_hash, cur[0]):
# Both py_key and cur[0] belong in this slot, return it
return cur
free_slot = NULL
# size_t is unsigned, hash is signed...
perturb = key_hash
while True:
i = (i << 2) + i + perturb + 1
cur = &table[i & mask]
if cur[0] == NULL: # Found an empty spot
if free_slot: # Did we find a _dummy earlier?
return free_slot
else:
return cur
if (cur[0] == py_key # exact match
or _is_equal(py_key, key_hash, cur[0])): # Equivalent match
return cur
if (cur[0] == _dummy and free_slot == NULL):
free_slot = cur
perturb >>= PERTURB_SHIFT
raise AssertionError('should never get here')
cdef api PyObject **_StaticTupleInterner_Lookup(object self,
object key) except NULL:
"""Find the slot where 'key' would fit.
This is the same as a dicts 'lookup' function. This is a private
api because mutating what you get without maintaing the other invariants
is a 'bad thing'.
:param key: An object we are looking up
:param hash: The hash for key
:return: The location in self.table where key should be put
should never be NULL, but may reference a NULL (PyObject*)
"""
_check_self_not_none(self)
return _lookup(self, key)
cdef api object StaticTupleInterner_Add(object self, object key):
"""Add a key to the StaticTupleInterner (set).
:param self: The StaticTupleInterner to add the key to.
:param key: The key to be added. If the key is already present,
self will not be modified
:return: The current key stored at the location defined by 'key'.
This may be the same object, or it may be an equivalent object.
(consider dict.setdefault(key, key))
"""
cdef StaticTupleInterner true_self
_check_self_not_none(self)
true_self = self
return true_self.add(key)
cdef api bint StaticTupleInterner_Contains(object self, object key) except -1:
"""Is key present in self?"""
cdef StaticTupleInterner true_self
_check_self_not_none(self)
true_self = self
return key in true_self
cdef api int StaticTupleInterner_Discard(StaticTupleInterner self,
object key) except -1:
"""Remove the object referenced at location 'key'.
:param self: The StaticTupleInterner being modified
:param key: The key we are checking on
:return: 1 if there was an object present, 0 if there was not, and -1 on
error.
"""
cdef StaticTupleInterner true_self
_check_self_not_none(self)
true_self = self
return true_self.discard(key)
cdef api PyObject *StaticTupleInterner_Get(StaticTupleInterner self,
object key) except? NULL:
"""Get a pointer to the object present at location 'key'.
This returns an object which is equal to key which was previously added to
self. This returns a borrowed reference, as it may also return NULL if no
value is present at that location.
:param key: The value we are looking for
:return: The object present at that location
"""
cdef StaticTupleInterner true_self
_check_self_not_none(self)
true_self = self
return true_self._get(key)
cdef api Py_ssize_t StaticTupleInterner_Size(object self) except -1:
"""Get the number of active entries in 'self'"""
cdef StaticTupleInterner true_self = self
_check_self_not_none(self)
return true_self.used
|