4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
17 |
"""Definition of a class that is similar to Set with some small changes."""
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
18 |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
19 |
cdef extern from "python-compat.h": |
20 |
pass
|
|
21 |
||
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
22 |
cdef extern from "Python.h": |
23 |
ctypedef unsigned long size_t |
|
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
24 |
ctypedef long (*hashfunc)(PyObject*) except -1 |
25 |
ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int) |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
26 |
ctypedef int (*visitproc)(PyObject *, void *) |
27 |
ctypedef int (*traverseproc)(PyObject *, visitproc, void *) |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
28 |
int Py_EQ |
29 |
void Py_INCREF(PyObject *) |
|
30 |
void Py_DECREF(PyObject *) |
|
31 |
ctypedef struct PyTypeObject: |
|
32 |
hashfunc tp_hash |
|
33 |
richcmpfunc tp_richcompare |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
34 |
traverseproc tp_traverse |
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
35 |
|
36 |
PyTypeObject *Py_TYPE(PyObject *) |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
37 |
# Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
|
38 |
# thus silently truncates to 32-bits on 64-bit machines.
|
|
39 |
long PyObject_Hash(PyObject *) except -1 |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
40 |
|
41 |
void *PyMem_Malloc(size_t nbytes) |
|
42 |
void PyMem_Free(void *) |
|
43 |
void memset(void *, int, size_t) |
|
44 |
||
4679.3.86
by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file. |
45 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
46 |
# Dummy is an object used to mark nodes that have been deleted. Since
|
47 |
# collisions require us to move a node to an alternative location, if we just
|
|
48 |
# set an entry to NULL on delete, we won't find any relocated nodes.
|
|
49 |
# We have to use _dummy_obj because we need to keep a refcount to it, but we
|
|
50 |
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
|
|
51 |
# over the code base.
|
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
52 |
cdef object _dummy_obj |
53 |
cdef PyObject *_dummy |
|
54 |
_dummy_obj = object() |
|
55 |
_dummy = <PyObject *>_dummy_obj |
|
56 |
||
4679.3.74
by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code. |
57 |
|
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
58 |
cdef object _NotImplemented |
59 |
_NotImplemented = NotImplemented |
|
60 |
||
61 |
||
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
62 |
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1: |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
63 |
cdef long other_hash |
64 |
||
65 |
if this == other: |
|
66 |
return 1 |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
67 |
other_hash = PyObject_Hash(other) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
68 |
if other_hash != this_hash: |
69 |
return 0 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
70 |
|
71 |
# This implements a subset of the PyObject_RichCompareBool functionality.
|
|
72 |
# Namely it:
|
|
73 |
# 1) Doesn't try to do anything with old-style classes
|
|
74 |
# 2) Assumes that both objects have a tp_richcompare implementation, and
|
|
75 |
# that if that is not enough to compare equal, then they are not
|
|
76 |
# equal. (It doesn't try to cast them both to some intermediate form
|
|
77 |
# that would compare equal.)
|
|
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
78 |
res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ) |
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
79 |
if res is _NotImplemented: |
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
80 |
res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ) |
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
81 |
if res is _NotImplemented: |
82 |
return 0 |
|
83 |
if res: |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
84 |
return 1 |
85 |
return 0 |
|
86 |
||
87 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
88 |
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]: |
89 |
"""This class can be used to track canonical forms for objects.
|
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
90 |
|
91 |
It is similar in function to the interned dictionary that is used by
|
|
92 |
strings. However:
|
|
93 |
||
94 |
1) It assumes that hash(obj) is cheap, so does not need to inline a copy
|
|
95 |
of it
|
|
96 |
2) It only stores one reference to the object, rather than 2 (key vs
|
|
97 |
key:value)
|
|
98 |
||
99 |
As such, it uses 1/3rd the amount of memory to store a pointer to the
|
|
100 |
interned object.
|
|
101 |
"""
|
|
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
102 |
# Attributes are defined in the .pxd file
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
103 |
DEF DEFAULT_SIZE=1024 |
104 |
||
105 |
def __init__(self): |
|
106 |
cdef Py_ssize_t size, n_bytes |
|
107 |
||
108 |
size = DEFAULT_SIZE |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
109 |
self._mask = size - 1 |
110 |
self._used = 0 |
|
111 |
self._fill = 0 |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
112 |
n_bytes = sizeof(PyObject*) * size; |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
113 |
self._table = <PyObject **>PyMem_Malloc(n_bytes) |
114 |
if self._table == NULL: |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
115 |
raise MemoryError() |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
116 |
memset(self._table, 0, n_bytes) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
117 |
|
118 |
def __dealloc__(self): |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
119 |
if self._table != NULL: |
120 |
PyMem_Free(self._table) |
|
121 |
self._table = NULL |
|
122 |
||
123 |
property used: |
|
124 |
def __get__(self): |
|
125 |
return self._used |
|
126 |
||
127 |
property fill: |
|
128 |
def __get__(self): |
|
129 |
return self._fill |
|
130 |
||
131 |
property mask: |
|
132 |
def __get__(self): |
|
133 |
return self._mask |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
134 |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
135 |
def _memory_size(self): |
136 |
"""Return the number of bytes of memory consumed by this class."""
|
|
137 |
return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1)) |
|
138 |
||
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
139 |
def __len__(self): |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
140 |
return self._used |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
141 |
|
142 |
def _test_lookup(self, key): |
|
143 |
cdef PyObject **slot |
|
144 |
||
4679.3.61
by John Arbash Meinel
Invert the logic. |
145 |
slot = _lookup(self, key) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
146 |
if slot[0] == NULL: |
147 |
res = '<null>' |
|
148 |
elif slot[0] == _dummy: |
|
149 |
res = '<dummy>' |
|
150 |
else: |
|
151 |
res = <object>slot[0] |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
152 |
return <int>(slot - self._table), res |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
153 |
|
154 |
def __contains__(self, key): |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
155 |
"""Is key present in this SimpleSet."""
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
156 |
cdef PyObject **slot |
157 |
||
4679.3.61
by John Arbash Meinel
Invert the logic. |
158 |
slot = _lookup(self, key) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
159 |
if slot[0] == NULL or slot[0] == _dummy: |
160 |
return False |
|
161 |
return True |
|
162 |
||
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
163 |
cdef PyObject *_get(self, object key) except? NULL: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
164 |
"""Return the object (or nothing) define at the given location."""
|
165 |
cdef PyObject **slot |
|
166 |
||
167 |
slot = _lookup(self, key) |
|
168 |
if slot[0] == NULL or slot[0] == _dummy: |
|
169 |
return NULL |
|
170 |
return slot[0] |
|
171 |
||
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
172 |
def __getitem__(self, key): |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
173 |
"""Return a stored item that is equivalent to key."""
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
174 |
cdef PyObject *py_val |
175 |
||
176 |
py_val = self._get(key) |
|
177 |
if py_val == NULL: |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
178 |
raise KeyError("Key %s is not present" % key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
179 |
val = <object>(py_val) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
180 |
return val |
181 |
||
4679.3.63
by John Arbash Meinel
Implement resizing. |
182 |
cdef int _insert_clean(self, PyObject *key) except -1: |
183 |
"""Insert a key into self.table.
|
|
184 |
||
185 |
This is only meant to be used during times like '_resize',
|
|
186 |
as it makes a lot of assuptions about keys not already being present,
|
|
187 |
and there being no dummy entries.
|
|
188 |
"""
|
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
189 |
cdef size_t i, n_lookup |
4679.3.63
by John Arbash Meinel
Implement resizing. |
190 |
cdef long the_hash |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
191 |
cdef PyObject **table, **slot |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
192 |
cdef Py_ssize_t mask |
4679.3.63
by John Arbash Meinel
Implement resizing. |
193 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
194 |
mask = self._mask |
195 |
table = self._table |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
196 |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
197 |
the_hash = PyObject_Hash(key) |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
198 |
i = the_hash |
199 |
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
200 |
slot = &table[i & mask] |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
201 |
if slot[0] == NULL: |
202 |
slot[0] = key |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
203 |
self._fill = self._fill + 1 |
204 |
self._used = self._used + 1 |
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
205 |
return 1 |
206 |
i = i + 1 + n_lookup |
|
207 |
raise RuntimeError('ran out of slots.') |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
208 |
|
209 |
def _py_resize(self, min_used): |
|
210 |
"""Do not use this directly, it is only exposed for testing."""
|
|
211 |
return self._resize(min_used) |
|
212 |
||
213 |
cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1: |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
214 |
"""Resize the internal table.
|
215 |
||
216 |
The final table will be big enough to hold at least min_used entries.
|
|
217 |
We will copy the data from the existing table over, leaving out dummy
|
|
218 |
entries.
|
|
219 |
||
220 |
:return: The new size of the internal table
|
|
221 |
"""
|
|
222 |
cdef Py_ssize_t new_size, n_bytes, remaining |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
223 |
cdef PyObject **new_table, **old_table, **slot |
4679.3.63
by John Arbash Meinel
Implement resizing. |
224 |
|
225 |
new_size = DEFAULT_SIZE |
|
226 |
while new_size <= min_used and new_size > 0: |
|
227 |
new_size = new_size << 1 |
|
228 |
# We rolled over our signed size field
|
|
229 |
if new_size <= 0: |
|
230 |
raise MemoryError() |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
231 |
# Even if min_used == self._mask + 1, and we aren't changing the actual
|
4679.3.64
by John Arbash Meinel
Add functionality for shrinking the table. |
232 |
# size, we will still run the algorithm so that dummy entries are
|
233 |
# removed
|
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
234 |
# TODO: Test this
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
235 |
# if new_size < self._used:
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
236 |
# raise RuntimeError('cannot shrink SimpleSet to something'
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
237 |
# ' smaller than the number of used slots.')
|
238 |
n_bytes = sizeof(PyObject*) * new_size; |
|
239 |
new_table = <PyObject **>PyMem_Malloc(n_bytes) |
|
240 |
if new_table == NULL: |
|
241 |
raise MemoryError() |
|
242 |
||
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
243 |
old_table = self._table |
244 |
self._table = new_table |
|
245 |
memset(self._table, 0, n_bytes) |
|
246 |
self._mask = new_size - 1 |
|
247 |
self._used = 0 |
|
248 |
remaining = self._fill |
|
249 |
self._fill = 0 |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
250 |
|
251 |
# Moving everything to the other table is refcount neutral, so we don't
|
|
252 |
# worry about it.
|
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
253 |
slot = old_table |
4679.3.63
by John Arbash Meinel
Implement resizing. |
254 |
while remaining > 0: |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
255 |
if slot[0] == NULL: # unused slot |
4679.3.63
by John Arbash Meinel
Implement resizing. |
256 |
pass
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
257 |
elif slot[0] == _dummy: # dummy slot |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
258 |
remaining = remaining - 1 |
4679.3.63
by John Arbash Meinel
Implement resizing. |
259 |
else: # active slot |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
260 |
remaining = remaining - 1 |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
261 |
self._insert_clean(slot[0]) |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
262 |
slot = slot + 1 |
4679.3.63
by John Arbash Meinel
Implement resizing. |
263 |
PyMem_Free(old_table) |
264 |
return new_size |
|
265 |
||
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
266 |
def add(self, key): |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
267 |
"""Similar to set.add(), start tracking this key.
|
268 |
|
|
269 |
There is one small difference, which is that we return the object that
|
|
270 |
is stored at the given location. (which is closer to the
|
|
271 |
dict.setdefault() functionality.)
|
|
272 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
273 |
return self._add(key) |
274 |
||
275 |
cdef object _add(self, key): |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
276 |
cdef PyObject **slot, *py_key |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
277 |
cdef int added |
4679.3.61
by John Arbash Meinel
Invert the logic. |
278 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
279 |
py_key = <PyObject *>key |
280 |
if (Py_TYPE(py_key).tp_richcompare == NULL |
|
281 |
or Py_TYPE(py_key).tp_hash == NULL): |
|
282 |
raise TypeError('Types added to SimpleSet must implement' |
|
283 |
' both tp_richcompare and tp_hash') |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
284 |
added = 0 |
4679.3.63
by John Arbash Meinel
Implement resizing. |
285 |
# We need at least one empty slot
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
286 |
assert self._used < self._mask |
4679.3.61
by John Arbash Meinel
Invert the logic. |
287 |
slot = _lookup(self, key) |
288 |
if (slot[0] == NULL): |
|
289 |
Py_INCREF(py_key) |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
290 |
self._fill = self._fill + 1 |
291 |
self._used = self._used + 1 |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
292 |
slot[0] = py_key |
4679.3.63
by John Arbash Meinel
Implement resizing. |
293 |
added = 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
294 |
elif (slot[0] == _dummy): |
295 |
Py_INCREF(py_key) |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
296 |
self._used = self._used + 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
297 |
slot[0] = py_key |
4679.3.63
by John Arbash Meinel
Implement resizing. |
298 |
added = 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
299 |
# No else: clause. If _lookup returns a pointer to
|
300 |
# a live object, then we already have a value at this location.
|
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
301 |
retval = <object>(slot[0]) |
302 |
# PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
303 |
if added and (self._fill * 3) >= ((self._mask + 1) * 2): |
4679.3.63
by John Arbash Meinel
Implement resizing. |
304 |
# However, we always work for a load factor of 2:1
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
305 |
self._resize(self._used * 2) |
4679.3.63
by John Arbash Meinel
Implement resizing. |
306 |
# Even if we resized and ended up moving retval into a different slot,
|
307 |
# it is still the value that is held at the slot equivalent to 'key',
|
|
308 |
# so we can still return it
|
|
309 |
return retval |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
310 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
311 |
def discard(self, key): |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
312 |
"""Remove key from the set, whether it exists or not.
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
313 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
314 |
:return: False if the item did not exist, True if it did
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
315 |
"""
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
316 |
if self._discard(key): |
317 |
return True |
|
318 |
return False |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
319 |
|
320 |
cdef int _discard(self, key) except -1: |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
321 |
cdef PyObject **slot, *py_key |
322 |
||
323 |
slot = _lookup(self, key) |
|
324 |
if slot[0] == NULL or slot[0] == _dummy: |
|
325 |
return 0 |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
326 |
self._used = self._used - 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
327 |
Py_DECREF(slot[0]) |
328 |
slot[0] = _dummy |
|
4679.3.64
by John Arbash Meinel
Add functionality for shrinking the table. |
329 |
# PySet uses the heuristic: If more than 1/5 are dummies, then resize
|
330 |
# them away
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
331 |
# if ((so->_fill - so->_used) * 5 < so->mask)
|
4679.3.64
by John Arbash Meinel
Add functionality for shrinking the table. |
332 |
# However, we are planning on using this as an interning structure, in
|
333 |
# which we will be putting a lot of objects. And we expect that large
|
|
334 |
# groups of them are going to have the same lifetime.
|
|
335 |
# Dummy entries hurt a little bit because they cause the lookup to keep
|
|
336 |
# searching, but resizing is also rather expensive
|
|
337 |
# For now, we'll just use their algorithm, but we may want to revisit
|
|
338 |
# it
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
339 |
if ((self._fill - self._used) * 5 > self._mask): |
340 |
self._resize(self._used * 2) |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
341 |
return 1 |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
342 |
|
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
343 |
def __iter__(self): |
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
344 |
return _SimpleSet_iterator(self) |
345 |
||
346 |
||
347 |
cdef class _SimpleSet_iterator: |
|
348 |
"""Iterator over the SimpleSet structure."""
|
|
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
349 |
|
350 |
cdef Py_ssize_t pos |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
351 |
cdef SimpleSet set |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
352 |
cdef Py_ssize_t _used # track if things have been mutated while iterating |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
353 |
cdef Py_ssize_t len # number of entries left |
354 |
||
355 |
def __init__(self, obj): |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
356 |
self.set = obj |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
357 |
self.pos = 0 |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
358 |
self._used = self.set._used |
359 |
self.len = self.set._used |
|
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
360 |
|
361 |
def __iter__(self): |
|
362 |
return self |
|
363 |
||
364 |
def __next__(self): |
|
365 |
cdef Py_ssize_t mask, i |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
366 |
cdef PyObject *key |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
367 |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
368 |
if self.set is None: |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
369 |
raise StopIteration |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
370 |
if self.set._used != self._used: |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
371 |
# Force this exception to continue to be raised
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
372 |
self._used = -1 |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
373 |
raise RuntimeError("Set size changed during iteration") |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
374 |
if not SimpleSet_Next(self.set, &self.pos, &key): |
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
375 |
self.set = None |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
376 |
raise StopIteration |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
377 |
# we found something
|
378 |
the_key = <object>key # INCREF |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
379 |
self.len = self.len - 1 |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
380 |
return the_key |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
381 |
|
382 |
def __length_hint__(self): |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
383 |
if self.set is not None and self._used == self.set._used: |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
384 |
return self.len |
385 |
return 0 |
|
386 |
||
387 |
||
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
388 |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
389 |
cdef api SimpleSet SimpleSet_New(): |
390 |
"""Create a new SimpleSet object."""
|
|
391 |
return SimpleSet() |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
392 |
|
393 |
||
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
394 |
cdef SimpleSet _check_self(object self): |
4679.3.61
by John Arbash Meinel
Invert the logic. |
395 |
"""Check that the parameter is not None.
|
396 |
||
397 |
Pyrex/Cython will do type checking, but only to ensure that an object is
|
|
398 |
either the right type or None. You can say "object foo not None" for pure
|
|
399 |
python functions, but not for C functions.
|
|
400 |
So this is just a helper for all the apis that need to do the check.
|
|
401 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
402 |
cdef SimpleSet true_self |
4679.3.61
by John Arbash Meinel
Invert the logic. |
403 |
if self is None: |
404 |
raise TypeError('self must not be None') |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
405 |
true_self = self |
406 |
return true_self |
|
407 |
||
408 |
||
409 |
cdef PyObject **_lookup(SimpleSet self, object key) except NULL: |
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
410 |
"""Find the slot where 'key' would fit.
|
411 |
||
412 |
This is the same as a dicts 'lookup' function.
|
|
413 |
||
414 |
:param key: An object we are looking up
|
|
415 |
:param hash: The hash for key
|
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
416 |
:return: The location in self.table where key should be put.
|
417 |
location == NULL is an exception, but (*location) == NULL just
|
|
418 |
indicates the slot is empty and can be used.
|
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
419 |
"""
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
420 |
# This uses Quadratic Probing:
|
421 |
# http://en.wikipedia.org/wiki/Quadratic_probing
|
|
422 |
# with c1 = c2 = 1/2
|
|
423 |
# This leads to probe locations at:
|
|
424 |
# h0 = hash(k1)
|
|
425 |
# h1 = h0 + 1
|
|
426 |
# h2 = h0 + 3 = h1 + 1 + 1
|
|
427 |
# h3 = h0 + 6 = h2 + 1 + 2
|
|
428 |
# h4 = h0 + 10 = h2 + 1 + 3
|
|
429 |
# Note that all of these are '& mask', but that is computed *after* the
|
|
430 |
# offset.
|
|
431 |
# This differs from the algorithm used by Set and Dict. Which, effectively,
|
|
432 |
# use double-hashing, and a step size that starts large, but dwindles to
|
|
433 |
# stepping one-by-one.
|
|
434 |
# This gives more 'locality' in that if you have a collision at offset X,
|
|
435 |
# the first fallback is X+1, which is fast to check. However, that means
|
|
436 |
# that an object w/ hash X+1 will also check there, and then X+2 next.
|
|
437 |
# However, for objects with differing hashes, their chains are different.
|
|
438 |
# The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
|
|
439 |
# So different hashes diverge quickly.
|
|
440 |
# A bigger problem is that we *only* ever use the lowest bits of the hash
|
|
441 |
# So all integers (x + SIZE*N) will resolve into the same bucket, and all
|
|
442 |
# use the same collision resolution. We may want to try to find a way to
|
|
443 |
# incorporate the upper bits of the hash with quadratic probing. (For
|
|
444 |
# example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
|
|
445 |
cdef size_t i, n_lookup |
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
446 |
cdef Py_ssize_t mask |
4679.3.61
by John Arbash Meinel
Invert the logic. |
447 |
cdef long key_hash |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
448 |
cdef PyObject **table, **slot, *cur, **free_slot, *py_key |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
449 |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
450 |
py_key = <PyObject *>key |
451 |
# Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
|
|
452 |
# (it treats hash() as returning an 'int' rather than a 'long')
|
|
453 |
key_hash = PyObject_Hash(py_key) |
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
454 |
i = <size_t>key_hash |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
455 |
mask = self._mask |
456 |
table = self._table |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
457 |
free_slot = NULL |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
458 |
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever |
459 |
slot = &table[i & mask] |
|
460 |
cur = slot[0] |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
461 |
if cur == NULL: |
462 |
# Found a blank spot
|
|
463 |
if free_slot != NULL: |
|
464 |
# Did we find an earlier _dummy entry?
|
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
465 |
return free_slot |
466 |
else: |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
467 |
return slot |
468 |
if cur == py_key: |
|
469 |
# Found an exact pointer to the key
|
|
470 |
return slot |
|
471 |
if cur == _dummy: |
|
472 |
if free_slot == NULL: |
|
473 |
free_slot = slot |
|
474 |
elif _is_equal(py_key, key_hash, cur): |
|
475 |
# Both py_key and cur belong in this slot, return it
|
|
476 |
return slot |
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
477 |
i = i + 1 + n_lookup |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
478 |
raise AssertionError('should never get here') |
479 |
||
480 |
||
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
481 |
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
482 |
"""Find the slot where 'key' would fit.
|
483 |
||
484 |
This is the same as a dicts 'lookup' function. This is a private
|
|
485 |
api because mutating what you get without maintaing the other invariants
|
|
486 |
is a 'bad thing'.
|
|
487 |
||
488 |
:param key: An object we are looking up
|
|
489 |
:param hash: The hash for key
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
490 |
:return: The location in self._table where key should be put
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
491 |
should never be NULL, but may reference a NULL (PyObject*)
|
492 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
493 |
return _lookup(_check_self(self), key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
494 |
|
495 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
496 |
cdef api object SimpleSet_Add(object self, object key): |
497 |
"""Add a key to the SimpleSet (set).
|
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
498 |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
499 |
:param self: The SimpleSet to add the key to.
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
500 |
:param key: The key to be added. If the key is already present,
|
501 |
self will not be modified
|
|
502 |
:return: The current key stored at the location defined by 'key'.
|
|
503 |
This may be the same object, or it may be an equivalent object.
|
|
504 |
(consider dict.setdefault(key, key))
|
|
505 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
506 |
return _check_self(self)._add(key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
507 |
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
508 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
509 |
cdef api int SimpleSet_Contains(object self, object key) except -1: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
510 |
"""Is key present in self?"""
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
511 |
return (key in _check_self(self)) |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
512 |
|
513 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
514 |
cdef api int SimpleSet_Discard(object self, object key) except -1: |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
515 |
"""Remove the object referenced at location 'key'.
|
516 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
517 |
:param self: The SimpleSet being modified
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
518 |
:param key: The key we are checking on
|
519 |
:return: 1 if there was an object present, 0 if there was not, and -1 on
|
|
520 |
error.
|
|
521 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
522 |
return _check_self(self)._discard(key) |
523 |
||
524 |
||
525 |
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL: |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
526 |
"""Get a pointer to the object present at location 'key'.
|
527 |
||
528 |
This returns an object which is equal to key which was previously added to
|
|
529 |
self. This returns a borrowed reference, as it may also return NULL if no
|
|
530 |
value is present at that location.
|
|
531 |
||
532 |
:param key: The value we are looking for
|
|
533 |
:return: The object present at that location
|
|
534 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
535 |
return _check_self(self)._get(key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
536 |
|
537 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
538 |
cdef api Py_ssize_t SimpleSet_Size(object self) except -1: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
539 |
"""Get the number of active entries in 'self'"""
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
540 |
return _check_self(self)._used |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
541 |
|
542 |
||
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
543 |
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key): |
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
544 |
"""Walk over items in a SimpleSet.
|
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
545 |
|
546 |
:param pos: should be initialized to 0 by the caller, and will be updated
|
|
547 |
by this function
|
|
548 |
:param key: Will return a borrowed reference to key
|
|
549 |
:return: 0 if nothing left, 1 if we are returning a new value
|
|
550 |
"""
|
|
551 |
cdef Py_ssize_t i, mask |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
552 |
cdef SimpleSet true_self |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
553 |
cdef PyObject **table |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
554 |
true_self = _check_self(self) |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
555 |
i = pos[0] |
556 |
if (i < 0): |
|
557 |
return 0 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
558 |
mask = true_self._mask |
559 |
table= true_self._table |
|
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
560 |
while (i <= mask and (table[i] == NULL or table[i] == _dummy)): |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
561 |
i = i + 1 |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
562 |
pos[0] = i + 1 |
563 |
if (i > mask): |
|
564 |
return 0 # All done |
|
565 |
if (key != NULL): |
|
566 |
key[0] = table[i] |
|
567 |
return 1 |
|
568 |
||
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
569 |
|
570 |
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg): |
|
571 |
"""This is an implementation of 'tp_traverse' that hits the whole table.
|
|
572 |
||
573 |
Cython/Pyrex don't seem to let you define a tp_traverse, and they only
|
|
574 |
define one for you if you have an 'object' attribute. Since they don't
|
|
575 |
support C arrays of objects, we access the PyObject * directly.
|
|
576 |
"""
|
|
577 |
cdef Py_ssize_t pos |
|
578 |
cdef PyObject *next_key |
|
579 |
cdef int ret |
|
580 |
||
581 |
pos = 0 |
|
582 |
while SimpleSet_Next(self, &pos, &next_key): |
|
583 |
ret = visit(next_key, arg) |
|
584 |
if ret: |
|
585 |
return ret |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
586 |
return 0 |
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
587 |
|
588 |
# It is a little bit ugly to do this, but it works, and means that Meliae can
|
|
589 |
# dump the total memory consumed by all child objects.
|
|
590 |
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse |