4763.2.4
by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry. |
1 |
# Copyright (C) 2009, 2010 Canonical Ltd
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
17 |
"""Definition of a class that is similar to Set with some small changes."""
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
18 |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
19 |
cdef extern from "python-compat.h": |
20 |
pass
|
|
21 |
||
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
22 |
cdef extern from "Python.h": |
23 |
ctypedef unsigned long size_t |
|
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
24 |
ctypedef long (*hashfunc)(PyObject*) except -1 |
25 |
ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int) |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
26 |
ctypedef int (*visitproc)(PyObject *, void *) |
27 |
ctypedef int (*traverseproc)(PyObject *, visitproc, void *) |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
28 |
int Py_EQ |
29 |
void Py_INCREF(PyObject *) |
|
30 |
void Py_DECREF(PyObject *) |
|
31 |
ctypedef struct PyTypeObject: |
|
32 |
hashfunc tp_hash |
|
33 |
richcmpfunc tp_richcompare |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
34 |
traverseproc tp_traverse |
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
35 |
|
36 |
PyTypeObject *Py_TYPE(PyObject *) |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
37 |
# Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
|
38 |
# thus silently truncates to 32-bits on 64-bit machines.
|
|
39 |
long PyObject_Hash(PyObject *) except -1 |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
40 |
|
41 |
void *PyMem_Malloc(size_t nbytes) |
|
42 |
void PyMem_Free(void *) |
|
43 |
void memset(void *, int, size_t) |
|
44 |
||
4679.3.86
by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file. |
45 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
46 |
# Dummy is an object used to mark nodes that have been deleted. Since
|
47 |
# collisions require us to move a node to an alternative location, if we just
|
|
48 |
# set an entry to NULL on delete, we won't find any relocated nodes.
|
|
49 |
# We have to use _dummy_obj because we need to keep a refcount to it, but we
|
|
50 |
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
|
|
51 |
# over the code base.
|
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
52 |
cdef object _dummy_obj |
53 |
cdef PyObject *_dummy |
|
54 |
_dummy_obj = object() |
|
55 |
_dummy = <PyObject *>_dummy_obj |
|
56 |
||
4679.3.74
by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code. |
57 |
|
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
58 |
cdef object _NotImplemented |
59 |
_NotImplemented = NotImplemented |
|
60 |
||
61 |
||
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
62 |
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1: |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
63 |
cdef long other_hash |
64 |
||
65 |
if this == other: |
|
66 |
return 1 |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
67 |
other_hash = PyObject_Hash(other) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
68 |
if other_hash != this_hash: |
69 |
return 0 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
70 |
|
71 |
# This implements a subset of the PyObject_RichCompareBool functionality.
|
|
72 |
# Namely it:
|
|
73 |
# 1) Doesn't try to do anything with old-style classes
|
|
74 |
# 2) Assumes that both objects have a tp_richcompare implementation, and
|
|
75 |
# that if that is not enough to compare equal, then they are not
|
|
76 |
# equal. (It doesn't try to cast them both to some intermediate form
|
|
77 |
# that would compare equal.)
|
|
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
78 |
res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ) |
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
79 |
if res is _NotImplemented: |
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
80 |
res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ) |
4744.1.1
by John Arbash Meinel
Add a test case for the bug w/ NotImplemented. |
81 |
if res is _NotImplemented: |
82 |
return 0 |
|
83 |
if res: |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
84 |
return 1 |
85 |
return 0 |
|
86 |
||
87 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
88 |
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]: |
89 |
"""This class can be used to track canonical forms for objects.
|
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
90 |
|
91 |
It is similar in function to the interned dictionary that is used by
|
|
92 |
strings. However:
|
|
93 |
||
94 |
1) It assumes that hash(obj) is cheap, so does not need to inline a copy
|
|
95 |
of it
|
|
96 |
2) It only stores one reference to the object, rather than 2 (key vs
|
|
97 |
key:value)
|
|
98 |
||
99 |
As such, it uses 1/3rd the amount of memory to store a pointer to the
|
|
100 |
interned object.
|
|
101 |
"""
|
|
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
102 |
# Attributes are defined in the .pxd file
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
103 |
DEF DEFAULT_SIZE=1024 |
104 |
||
105 |
def __init__(self): |
|
106 |
cdef Py_ssize_t size, n_bytes |
|
107 |
||
108 |
size = DEFAULT_SIZE |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
109 |
self._mask = size - 1 |
110 |
self._used = 0 |
|
111 |
self._fill = 0 |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
112 |
n_bytes = sizeof(PyObject*) * size; |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
113 |
self._table = <PyObject **>PyMem_Malloc(n_bytes) |
114 |
if self._table == NULL: |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
115 |
raise MemoryError() |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
116 |
memset(self._table, 0, n_bytes) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
117 |
|
5361.2.1
by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table. |
118 |
def __sizeof__(self): |
5361.2.6
by John Arbash Meinel
We also have to re-implement it for _simple_set_pyx.pyx |
119 |
# Note: Pyrex doesn't allow sizeof(class) so we re-implement it here.
|
120 |
# Bits are:
|
|
121 |
# 1: PyObject
|
|
122 |
# 2: vtable *
|
|
123 |
# 3: 3 Py_ssize_t
|
|
124 |
# 4: PyObject**
|
|
125 |
# Note that we might get alignment, etc, wrong, but at least this is
|
|
126 |
# better than no estimate at all
|
|
127 |
# return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*))
|
|
128 |
return (sizeof(PyObject) + sizeof(void*) |
|
129 |
+ 3*sizeof(Py_ssize_t) + sizeof(PyObject**) |
|
130 |
+ (self._mask + 1) * sizeof(PyObject*)) |
|
5361.2.1
by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table. |
131 |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
132 |
def __dealloc__(self): |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
133 |
if self._table != NULL: |
134 |
PyMem_Free(self._table) |
|
135 |
self._table = NULL |
|
136 |
||
137 |
property used: |
|
138 |
def __get__(self): |
|
139 |
return self._used |
|
140 |
||
141 |
property fill: |
|
142 |
def __get__(self): |
|
143 |
return self._fill |
|
144 |
||
145 |
property mask: |
|
146 |
def __get__(self): |
|
147 |
return self._mask |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
148 |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
149 |
def _memory_size(self): |
150 |
"""Return the number of bytes of memory consumed by this class."""
|
|
151 |
return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1)) |
|
152 |
||
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
153 |
def __len__(self): |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
154 |
return self._used |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
155 |
|
156 |
def _test_lookup(self, key): |
|
157 |
cdef PyObject **slot |
|
158 |
||
4679.3.61
by John Arbash Meinel
Invert the logic. |
159 |
slot = _lookup(self, key) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
160 |
if slot[0] == NULL: |
161 |
res = '<null>' |
|
162 |
elif slot[0] == _dummy: |
|
163 |
res = '<dummy>' |
|
164 |
else: |
|
165 |
res = <object>slot[0] |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
166 |
return <int>(slot - self._table), res |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
167 |
|
168 |
def __contains__(self, key): |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
169 |
"""Is key present in this SimpleSet."""
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
170 |
cdef PyObject **slot |
171 |
||
4679.3.61
by John Arbash Meinel
Invert the logic. |
172 |
slot = _lookup(self, key) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
173 |
if slot[0] == NULL or slot[0] == _dummy: |
174 |
return False |
|
175 |
return True |
|
176 |
||
4679.3.62
by John Arbash Meinel
Move some of the information into the pxd header file. |
177 |
cdef PyObject *_get(self, object key) except? NULL: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
178 |
"""Return the object (or nothing) define at the given location."""
|
179 |
cdef PyObject **slot |
|
180 |
||
181 |
slot = _lookup(self, key) |
|
182 |
if slot[0] == NULL or slot[0] == _dummy: |
|
183 |
return NULL |
|
184 |
return slot[0] |
|
185 |
||
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
186 |
def __getitem__(self, key): |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
187 |
"""Return a stored item that is equivalent to key."""
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
188 |
cdef PyObject *py_val |
189 |
||
190 |
py_val = self._get(key) |
|
191 |
if py_val == NULL: |
|
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
192 |
raise KeyError("Key %s is not present" % key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
193 |
val = <object>(py_val) |
4679.3.58
by John Arbash Meinel
Adding a StaticTupleInterner class. |
194 |
return val |
195 |
||
4679.3.63
by John Arbash Meinel
Implement resizing. |
196 |
cdef int _insert_clean(self, PyObject *key) except -1: |
197 |
"""Insert a key into self.table.
|
|
198 |
||
199 |
This is only meant to be used during times like '_resize',
|
|
200 |
as it makes a lot of assuptions about keys not already being present,
|
|
201 |
and there being no dummy entries.
|
|
202 |
"""
|
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
203 |
cdef size_t i, n_lookup |
4679.3.63
by John Arbash Meinel
Implement resizing. |
204 |
cdef long the_hash |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
205 |
cdef PyObject **table, **slot |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
206 |
cdef Py_ssize_t mask |
4679.3.63
by John Arbash Meinel
Implement resizing. |
207 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
208 |
mask = self._mask |
209 |
table = self._table |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
210 |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
211 |
the_hash = PyObject_Hash(key) |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
212 |
i = the_hash |
213 |
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
214 |
slot = &table[i & mask] |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
215 |
if slot[0] == NULL: |
216 |
slot[0] = key |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
217 |
self._fill = self._fill + 1 |
218 |
self._used = self._used + 1 |
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
219 |
return 1 |
220 |
i = i + 1 + n_lookup |
|
221 |
raise RuntimeError('ran out of slots.') |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
222 |
|
223 |
def _py_resize(self, min_used): |
|
224 |
"""Do not use this directly, it is only exposed for testing."""
|
|
225 |
return self._resize(min_used) |
|
226 |
||
227 |
cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1: |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
228 |
"""Resize the internal table.
|
229 |
||
230 |
The final table will be big enough to hold at least min_used entries.
|
|
231 |
We will copy the data from the existing table over, leaving out dummy
|
|
232 |
entries.
|
|
233 |
||
234 |
:return: The new size of the internal table
|
|
235 |
"""
|
|
236 |
cdef Py_ssize_t new_size, n_bytes, remaining |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
237 |
cdef PyObject **new_table, **old_table, **slot |
4679.3.63
by John Arbash Meinel
Implement resizing. |
238 |
|
239 |
new_size = DEFAULT_SIZE |
|
240 |
while new_size <= min_used and new_size > 0: |
|
241 |
new_size = new_size << 1 |
|
242 |
# We rolled over our signed size field
|
|
243 |
if new_size <= 0: |
|
244 |
raise MemoryError() |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
245 |
# Even if min_used == self._mask + 1, and we aren't changing the actual
|
4679.3.64
by John Arbash Meinel
Add functionality for shrinking the table. |
246 |
# size, we will still run the algorithm so that dummy entries are
|
247 |
# removed
|
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
248 |
# TODO: Test this
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
249 |
# if new_size < self._used:
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
250 |
# raise RuntimeError('cannot shrink SimpleSet to something'
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
251 |
# ' smaller than the number of used slots.')
|
252 |
n_bytes = sizeof(PyObject*) * new_size; |
|
253 |
new_table = <PyObject **>PyMem_Malloc(n_bytes) |
|
254 |
if new_table == NULL: |
|
255 |
raise MemoryError() |
|
256 |
||
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
257 |
old_table = self._table |
258 |
self._table = new_table |
|
259 |
memset(self._table, 0, n_bytes) |
|
260 |
self._mask = new_size - 1 |
|
261 |
self._used = 0 |
|
262 |
remaining = self._fill |
|
263 |
self._fill = 0 |
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
264 |
|
265 |
# Moving everything to the other table is refcount neutral, so we don't
|
|
266 |
# worry about it.
|
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
267 |
slot = old_table |
4679.3.63
by John Arbash Meinel
Implement resizing. |
268 |
while remaining > 0: |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
269 |
if slot[0] == NULL: # unused slot |
4679.3.63
by John Arbash Meinel
Implement resizing. |
270 |
pass
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
271 |
elif slot[0] == _dummy: # dummy slot |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
272 |
remaining = remaining - 1 |
4679.3.63
by John Arbash Meinel
Implement resizing. |
273 |
else: # active slot |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
274 |
remaining = remaining - 1 |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
275 |
self._insert_clean(slot[0]) |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
276 |
slot = slot + 1 |
4679.3.63
by John Arbash Meinel
Implement resizing. |
277 |
PyMem_Free(old_table) |
278 |
return new_size |
|
279 |
||
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
280 |
def add(self, key): |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
281 |
"""Similar to set.add(), start tracking this key.
|
282 |
|
|
283 |
There is one small difference, which is that we return the object that
|
|
284 |
is stored at the given location. (which is closer to the
|
|
285 |
dict.setdefault() functionality.)
|
|
286 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
287 |
return self._add(key) |
288 |
||
289 |
cdef object _add(self, key): |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
290 |
cdef PyObject **slot, *py_key |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
291 |
cdef int added |
4679.3.61
by John Arbash Meinel
Invert the logic. |
292 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
293 |
py_key = <PyObject *>key |
294 |
if (Py_TYPE(py_key).tp_richcompare == NULL |
|
295 |
or Py_TYPE(py_key).tp_hash == NULL): |
|
296 |
raise TypeError('Types added to SimpleSet must implement' |
|
297 |
' both tp_richcompare and tp_hash') |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
298 |
added = 0 |
4679.3.63
by John Arbash Meinel
Implement resizing. |
299 |
# We need at least one empty slot
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
300 |
assert self._used < self._mask |
4679.3.61
by John Arbash Meinel
Invert the logic. |
301 |
slot = _lookup(self, key) |
302 |
if (slot[0] == NULL): |
|
303 |
Py_INCREF(py_key) |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
304 |
self._fill = self._fill + 1 |
305 |
self._used = self._used + 1 |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
306 |
slot[0] = py_key |
4679.3.63
by John Arbash Meinel
Implement resizing. |
307 |
added = 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
308 |
elif (slot[0] == _dummy): |
309 |
Py_INCREF(py_key) |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
310 |
self._used = self._used + 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
311 |
slot[0] = py_key |
4679.3.63
by John Arbash Meinel
Implement resizing. |
312 |
added = 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
313 |
# No else: clause. If _lookup returns a pointer to
|
314 |
# a live object, then we already have a value at this location.
|
|
4679.3.63
by John Arbash Meinel
Implement resizing. |
315 |
retval = <object>(slot[0]) |
316 |
# PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
317 |
if added and (self._fill * 3) >= ((self._mask + 1) * 2): |
4679.3.63
by John Arbash Meinel
Implement resizing. |
318 |
# However, we always work for a load factor of 2:1
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
319 |
self._resize(self._used * 2) |
4679.3.63
by John Arbash Meinel
Implement resizing. |
320 |
# Even if we resized and ended up moving retval into a different slot,
|
321 |
# it is still the value that is held at the slot equivalent to 'key',
|
|
322 |
# so we can still return it
|
|
323 |
return retval |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
324 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
325 |
def discard(self, key): |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
326 |
"""Remove key from the set, whether it exists or not.
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
327 |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
328 |
:return: False if the item did not exist, True if it did
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
329 |
"""
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
330 |
if self._discard(key): |
331 |
return True |
|
332 |
return False |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
333 |
|
334 |
cdef int _discard(self, key) except -1: |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
335 |
cdef PyObject **slot, *py_key |
336 |
||
337 |
slot = _lookup(self, key) |
|
338 |
if slot[0] == NULL or slot[0] == _dummy: |
|
339 |
return 0 |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
340 |
self._used = self._used - 1 |
4679.3.61
by John Arbash Meinel
Invert the logic. |
341 |
Py_DECREF(slot[0]) |
342 |
slot[0] = _dummy |
|
4679.3.64
by John Arbash Meinel
Add functionality for shrinking the table. |
343 |
# PySet uses the heuristic: If more than 1/5 are dummies, then resize
|
344 |
# them away
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
345 |
# if ((so->_fill - so->_used) * 5 < so->mask)
|
4679.3.64
by John Arbash Meinel
Add functionality for shrinking the table. |
346 |
# However, we are planning on using this as an interning structure, in
|
347 |
# which we will be putting a lot of objects. And we expect that large
|
|
348 |
# groups of them are going to have the same lifetime.
|
|
349 |
# Dummy entries hurt a little bit because they cause the lookup to keep
|
|
350 |
# searching, but resizing is also rather expensive
|
|
351 |
# For now, we'll just use their algorithm, but we may want to revisit
|
|
352 |
# it
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
353 |
if ((self._fill - self._used) * 5 > self._mask): |
354 |
self._resize(self._used * 2) |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
355 |
return 1 |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
356 |
|
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
357 |
def __iter__(self): |
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
358 |
return _SimpleSet_iterator(self) |
359 |
||
360 |
||
361 |
cdef class _SimpleSet_iterator: |
|
362 |
"""Iterator over the SimpleSet structure."""
|
|
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
363 |
|
364 |
cdef Py_ssize_t pos |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
365 |
cdef SimpleSet set |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
366 |
cdef Py_ssize_t _used # track if things have been mutated while iterating |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
367 |
cdef Py_ssize_t len # number of entries left |
368 |
||
369 |
def __init__(self, obj): |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
370 |
self.set = obj |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
371 |
self.pos = 0 |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
372 |
self._used = self.set._used |
373 |
self.len = self.set._used |
|
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
374 |
|
375 |
def __iter__(self): |
|
376 |
return self |
|
377 |
||
378 |
def __next__(self): |
|
379 |
cdef Py_ssize_t mask, i |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
380 |
cdef PyObject *key |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
381 |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
382 |
if self.set is None: |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
383 |
raise StopIteration |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
384 |
if self.set._used != self._used: |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
385 |
# Force this exception to continue to be raised
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
386 |
self._used = -1 |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
387 |
raise RuntimeError("Set size changed during iteration") |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
388 |
if not SimpleSet_Next(self.set, &self.pos, &key): |
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
389 |
self.set = None |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
390 |
raise StopIteration |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
391 |
# we found something
|
392 |
the_key = <object>key # INCREF |
|
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
393 |
self.len = self.len - 1 |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
394 |
return the_key |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
395 |
|
396 |
def __length_hint__(self): |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
397 |
if self.set is not None and self._used == self.set._used: |
4679.3.65
by John Arbash Meinel
Add __iter__ support. |
398 |
return self.len |
399 |
return 0 |
|
400 |
||
401 |
||
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
402 |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
403 |
cdef api SimpleSet SimpleSet_New(): |
404 |
"""Create a new SimpleSet object."""
|
|
405 |
return SimpleSet() |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
406 |
|
407 |
||
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
408 |
cdef SimpleSet _check_self(object self): |
4679.3.61
by John Arbash Meinel
Invert the logic. |
409 |
"""Check that the parameter is not None.
|
410 |
||
411 |
Pyrex/Cython will do type checking, but only to ensure that an object is
|
|
412 |
either the right type or None. You can say "object foo not None" for pure
|
|
413 |
python functions, but not for C functions.
|
|
414 |
So this is just a helper for all the apis that need to do the check.
|
|
415 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
416 |
cdef SimpleSet true_self |
4679.3.61
by John Arbash Meinel
Invert the logic. |
417 |
if self is None: |
418 |
raise TypeError('self must not be None') |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
419 |
true_self = self |
420 |
return true_self |
|
421 |
||
422 |
||
423 |
cdef PyObject **_lookup(SimpleSet self, object key) except NULL: |
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
424 |
"""Find the slot where 'key' would fit.
|
425 |
||
426 |
This is the same as a dicts 'lookup' function.
|
|
427 |
||
428 |
:param key: An object we are looking up
|
|
429 |
:param hash: The hash for key
|
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
430 |
:return: The location in self.table where key should be put.
|
431 |
location == NULL is an exception, but (*location) == NULL just
|
|
432 |
indicates the slot is empty and can be used.
|
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
433 |
"""
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
434 |
# This uses Quadratic Probing:
|
435 |
# http://en.wikipedia.org/wiki/Quadratic_probing
|
|
436 |
# with c1 = c2 = 1/2
|
|
437 |
# This leads to probe locations at:
|
|
438 |
# h0 = hash(k1)
|
|
439 |
# h1 = h0 + 1
|
|
440 |
# h2 = h0 + 3 = h1 + 1 + 1
|
|
441 |
# h3 = h0 + 6 = h2 + 1 + 2
|
|
442 |
# h4 = h0 + 10 = h2 + 1 + 3
|
|
443 |
# Note that all of these are '& mask', but that is computed *after* the
|
|
444 |
# offset.
|
|
445 |
# This differs from the algorithm used by Set and Dict. Which, effectively,
|
|
446 |
# use double-hashing, and a step size that starts large, but dwindles to
|
|
447 |
# stepping one-by-one.
|
|
448 |
# This gives more 'locality' in that if you have a collision at offset X,
|
|
449 |
# the first fallback is X+1, which is fast to check. However, that means
|
|
450 |
# that an object w/ hash X+1 will also check there, and then X+2 next.
|
|
451 |
# However, for objects with differing hashes, their chains are different.
|
|
452 |
# The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
|
|
453 |
# So different hashes diverge quickly.
|
|
454 |
# A bigger problem is that we *only* ever use the lowest bits of the hash
|
|
455 |
# So all integers (x + SIZE*N) will resolve into the same bucket, and all
|
|
456 |
# use the same collision resolution. We may want to try to find a way to
|
|
457 |
# incorporate the upper bits of the hash with quadratic probing. (For
|
|
458 |
# example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
|
|
459 |
cdef size_t i, n_lookup |
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
460 |
cdef Py_ssize_t mask |
4679.3.61
by John Arbash Meinel
Invert the logic. |
461 |
cdef long key_hash |
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
462 |
cdef PyObject **table, **slot, *cur, **free_slot, *py_key |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
463 |
|
4739.2.1
by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5 |
464 |
py_key = <PyObject *>key |
465 |
# Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
|
|
466 |
# (it treats hash() as returning an 'int' rather than a 'long')
|
|
467 |
key_hash = PyObject_Hash(py_key) |
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
468 |
i = <size_t>key_hash |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
469 |
mask = self._mask |
470 |
table = self._table |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
471 |
free_slot = NULL |
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
472 |
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever |
473 |
slot = &table[i & mask] |
|
474 |
cur = slot[0] |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
475 |
if cur == NULL: |
476 |
# Found a blank spot
|
|
477 |
if free_slot != NULL: |
|
478 |
# Did we find an earlier _dummy entry?
|
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
479 |
return free_slot |
480 |
else: |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
481 |
return slot |
482 |
if cur == py_key: |
|
483 |
# Found an exact pointer to the key
|
|
484 |
return slot |
|
485 |
if cur == _dummy: |
|
486 |
if free_slot == NULL: |
|
487 |
free_slot = slot |
|
488 |
elif _is_equal(py_key, key_hash, cur): |
|
489 |
# Both py_key and cur belong in this slot, return it
|
|
490 |
return slot |
|
4679.3.91
by John Arbash Meinel
Change the _lookup function to use Quadratic Probing. |
491 |
i = i + 1 + n_lookup |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
492 |
raise AssertionError('should never get here') |
493 |
||
494 |
||
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
495 |
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
496 |
"""Find the slot where 'key' would fit.
|
497 |
||
498 |
This is the same as a dicts 'lookup' function. This is a private
|
|
499 |
api because mutating what you get without maintaing the other invariants
|
|
500 |
is a 'bad thing'.
|
|
501 |
||
502 |
:param key: An object we are looking up
|
|
503 |
:param hash: The hash for key
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
504 |
:return: The location in self._table where key should be put
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
505 |
should never be NULL, but may reference a NULL (PyObject*)
|
506 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
507 |
return _lookup(_check_self(self), key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
508 |
|
509 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
510 |
cdef api object SimpleSet_Add(object self, object key): |
511 |
"""Add a key to the SimpleSet (set).
|
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
512 |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
513 |
:param self: The SimpleSet to add the key to.
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
514 |
:param key: The key to be added. If the key is already present,
|
515 |
self will not be modified
|
|
516 |
:return: The current key stored at the location defined by 'key'.
|
|
517 |
This may be the same object, or it may be an equivalent object.
|
|
518 |
(consider dict.setdefault(key, key))
|
|
519 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
520 |
return _check_self(self)._add(key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
521 |
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
522 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
523 |
cdef api int SimpleSet_Contains(object self, object key) except -1: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
524 |
"""Is key present in self?"""
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
525 |
return (key in _check_self(self)) |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
526 |
|
527 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
528 |
cdef api int SimpleSet_Discard(object self, object key) except -1: |
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
529 |
"""Remove the object referenced at location 'key'.
|
530 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
531 |
:param self: The SimpleSet being modified
|
4679.3.60
by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner. |
532 |
:param key: The key we are checking on
|
533 |
:return: 1 if there was an object present, 0 if there was not, and -1 on
|
|
534 |
error.
|
|
535 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
536 |
return _check_self(self)._discard(key) |
537 |
||
538 |
||
539 |
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL: |
|
4679.3.61
by John Arbash Meinel
Invert the logic. |
540 |
"""Get a pointer to the object present at location 'key'.
|
541 |
||
542 |
This returns an object which is equal to key which was previously added to
|
|
543 |
self. This returns a borrowed reference, as it may also return NULL if no
|
|
544 |
value is present at that location.
|
|
545 |
||
546 |
:param key: The value we are looking for
|
|
547 |
:return: The object present at that location
|
|
548 |
"""
|
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
549 |
return _check_self(self)._get(key) |
4679.3.61
by John Arbash Meinel
Invert the logic. |
550 |
|
551 |
||
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
552 |
cdef api Py_ssize_t SimpleSet_Size(object self) except -1: |
4679.3.61
by John Arbash Meinel
Invert the logic. |
553 |
"""Get the number of active entries in 'self'"""
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
554 |
return _check_self(self)._used |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
555 |
|
556 |
||
4932.1.2
by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'. |
557 |
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, |
558 |
PyObject **key) except -1: |
|
4679.3.76
by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet. |
559 |
"""Walk over items in a SimpleSet.
|
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
560 |
|
561 |
:param pos: should be initialized to 0 by the caller, and will be updated
|
|
562 |
by this function
|
|
563 |
:param key: Will return a borrowed reference to key
|
|
564 |
:return: 0 if nothing left, 1 if we are returning a new value
|
|
565 |
"""
|
|
566 |
cdef Py_ssize_t i, mask |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
567 |
cdef SimpleSet true_self |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
568 |
cdef PyObject **table |
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
569 |
true_self = _check_self(self) |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
570 |
i = pos[0] |
571 |
if (i < 0): |
|
572 |
return 0 |
|
4679.3.81
by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again. |
573 |
mask = true_self._mask |
574 |
table= true_self._table |
|
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
575 |
while (i <= mask and (table[i] == NULL or table[i] == _dummy)): |
4679.3.94
by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility. |
576 |
i = i + 1 |
4679.3.66
by John Arbash Meinel
Implement the C variant of __iter__ |
577 |
pos[0] = i + 1 |
578 |
if (i > mask): |
|
579 |
return 0 # All done |
|
580 |
if (key != NULL): |
|
581 |
key[0] = table[i] |
|
582 |
return 1 |
|
583 |
||
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
584 |
|
4932.1.2
by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'. |
585 |
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, |
586 |
void *arg) except -1: |
|
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
587 |
"""This is an implementation of 'tp_traverse' that hits the whole table.
|
588 |
||
589 |
Cython/Pyrex don't seem to let you define a tp_traverse, and they only
|
|
590 |
define one for you if you have an 'object' attribute. Since they don't
|
|
591 |
support C arrays of objects, we access the PyObject * directly.
|
|
592 |
"""
|
|
593 |
cdef Py_ssize_t pos |
|
594 |
cdef PyObject *next_key |
|
595 |
cdef int ret |
|
596 |
||
597 |
pos = 0 |
|
598 |
while SimpleSet_Next(self, &pos, &next_key): |
|
599 |
ret = visit(next_key, arg) |
|
600 |
if ret: |
|
601 |
return ret |
|
4679.3.88
by John Arbash Meinel
Some review comments from Andrew. |
602 |
return 0 |
4679.3.84
by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists. |
603 |
|
604 |
# It is a little bit ugly to do this, but it works, and means that Meliae can
|
|
605 |
# dump the total memory consumed by all child objects.
|
|
606 |
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse |