~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
19
cdef extern from "python-compat.h":
20
    pass
21
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
22
cdef extern from "Python.h":
23
    ctypedef unsigned long size_t
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
24
    ctypedef long (*hashfunc)(PyObject*) except -1
25
    ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int)
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
26
    ctypedef int (*visitproc)(PyObject *, void *)
27
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
28
    int Py_EQ
29
    void Py_INCREF(PyObject *)
30
    void Py_DECREF(PyObject *)
31
    ctypedef struct PyTypeObject:
32
        hashfunc tp_hash
33
        richcmpfunc tp_richcompare
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
34
        traverseproc tp_traverse
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
35
36
    PyTypeObject *Py_TYPE(PyObject *)
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
37
    # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
38
    #       thus silently truncates to 32-bits on 64-bit machines.
39
    long PyObject_Hash(PyObject *) except -1
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
40
        
41
    void *PyMem_Malloc(size_t nbytes)
42
    void PyMem_Free(void *)
43
    void memset(void *, int, size_t)
44
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
45
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
46
# Dummy is an object used to mark nodes that have been deleted. Since
47
# collisions require us to move a node to an alternative location, if we just
48
# set an entry to NULL on delete, we won't find any relocated nodes.
49
# We have to use _dummy_obj because we need to keep a refcount to it, but we
50
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
51
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
52
cdef object _dummy_obj
53
cdef PyObject *_dummy
54
_dummy_obj = object()
55
_dummy = <PyObject *>_dummy_obj
56
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
57
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
58
cdef object _NotImplemented
59
_NotImplemented = NotImplemented
60
61
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
62
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
63
    cdef long other_hash
64
65
    if this == other:
66
        return 1
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
67
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
68
    if other_hash != this_hash:
69
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
70
71
    # This implements a subset of the PyObject_RichCompareBool functionality.
72
    # Namely it:
73
    #   1) Doesn't try to do anything with old-style classes
74
    #   2) Assumes that both objects have a tp_richcompare implementation, and
75
    #      that if that is not enough to compare equal, then they are not
76
    #      equal. (It doesn't try to cast them both to some intermediate form
77
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
78
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
79
    if res is _NotImplemented:
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
80
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
81
        if res is _NotImplemented:
82
            return 0
83
    if res:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
84
        return 1
85
    return 0
86
87
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
88
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
89
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
90
91
    It is similar in function to the interned dictionary that is used by
92
    strings. However:
93
94
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
95
         of it
96
      2) It only stores one reference to the object, rather than 2 (key vs
97
         key:value)
98
99
    As such, it uses 1/3rd the amount of memory to store a pointer to the
100
    interned object.
101
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
102
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
103
    DEF DEFAULT_SIZE=1024
104
105
    def __init__(self):
106
        cdef Py_ssize_t size, n_bytes
107
108
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
109
        self._mask = size - 1
110
        self._used = 0
111
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
112
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
113
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
114
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
115
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
116
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
117
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
118
    def __sizeof__(self):
5361.2.6 by John Arbash Meinel
We also have to re-implement it for _simple_set_pyx.pyx
119
        # Note: Pyrex doesn't allow sizeof(class) so we re-implement it here.
120
        # Bits are:
121
        #   1: PyObject
122
        #   2: vtable *
123
        #   3: 3 Py_ssize_t
124
        #   4: PyObject**
125
        # Note that we might get alignment, etc, wrong, but at least this is
126
        # better than no estimate at all
127
        # return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*))
128
        return (sizeof(PyObject) + sizeof(void*)
129
                + 3*sizeof(Py_ssize_t) + sizeof(PyObject**)
130
                + (self._mask + 1) * sizeof(PyObject*))
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
131
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
132
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
133
        if self._table != NULL:
134
            PyMem_Free(self._table)
135
            self._table = NULL
136
137
    property used:
138
        def __get__(self):
139
            return self._used
140
141
    property fill:
142
        def __get__(self):
143
            return self._fill
144
145
    property mask:
146
        def __get__(self):
147
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
148
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
149
    def _memory_size(self):
150
        """Return the number of bytes of memory consumed by this class."""
151
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
152
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
153
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
154
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
155
156
    def _test_lookup(self, key):
157
        cdef PyObject **slot
158
4679.3.61 by John Arbash Meinel
Invert the logic.
159
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
160
        if slot[0] == NULL:
161
            res = '<null>'
162
        elif slot[0] == _dummy:
163
            res = '<dummy>'
164
        else:
165
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
166
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
167
168
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
169
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
170
        cdef PyObject **slot
171
4679.3.61 by John Arbash Meinel
Invert the logic.
172
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
173
        if slot[0] == NULL or slot[0] == _dummy:
174
            return False
175
        return True
176
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
177
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
178
        """Return the object (or nothing) define at the given location."""
179
        cdef PyObject **slot
180
181
        slot = _lookup(self, key)
182
        if slot[0] == NULL or slot[0] == _dummy:
183
            return NULL
184
        return slot[0]
185
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
186
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
187
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
188
        cdef PyObject *py_val
189
190
        py_val = self._get(key)
191
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
192
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
193
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
194
        return val
195
4679.3.63 by John Arbash Meinel
Implement resizing.
196
    cdef int _insert_clean(self, PyObject *key) except -1:
197
        """Insert a key into self.table.
198
199
        This is only meant to be used during times like '_resize',
200
        as it makes a lot of assuptions about keys not already being present,
201
        and there being no dummy entries.
202
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
203
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
204
        cdef long the_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
205
        cdef PyObject **table, **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
206
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
207
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
208
        mask = self._mask
209
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
210
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
211
        the_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
212
        i = the_hash
213
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
214
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
215
            if slot[0] == NULL:
216
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
217
                self._fill = self._fill + 1
218
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
219
                return 1
220
            i = i + 1 + n_lookup
221
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
222
223
    def _py_resize(self, min_used):
224
        """Do not use this directly, it is only exposed for testing."""
225
        return self._resize(min_used)
226
227
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
228
        """Resize the internal table.
229
230
        The final table will be big enough to hold at least min_used entries.
231
        We will copy the data from the existing table over, leaving out dummy
232
        entries.
233
234
        :return: The new size of the internal table
235
        """
236
        cdef Py_ssize_t new_size, n_bytes, remaining
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
237
        cdef PyObject **new_table, **old_table, **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
238
239
        new_size = DEFAULT_SIZE
240
        while new_size <= min_used and new_size > 0:
241
            new_size = new_size << 1
242
        # We rolled over our signed size field
243
        if new_size <= 0:
244
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
245
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
246
        # size, we will still run the algorithm so that dummy entries are
247
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
248
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
249
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
250
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
251
        #                        ' smaller than the number of used slots.')
252
        n_bytes = sizeof(PyObject*) * new_size;
253
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
254
        if new_table == NULL:
255
            raise MemoryError()
256
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
257
        old_table = self._table
258
        self._table = new_table
259
        memset(self._table, 0, n_bytes)
260
        self._mask = new_size - 1
261
        self._used = 0
262
        remaining = self._fill
263
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
264
265
        # Moving everything to the other table is refcount neutral, so we don't
266
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
267
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
268
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
269
            if slot[0] == NULL: # unused slot
4679.3.63 by John Arbash Meinel
Implement resizing.
270
                pass 
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
271
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
272
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
273
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
274
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
275
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
276
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
277
        PyMem_Free(old_table)
278
        return new_size
279
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
280
    def add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
281
        """Similar to set.add(), start tracking this key.
282
        
283
        There is one small difference, which is that we return the object that
284
        is stored at the given location. (which is closer to the
285
        dict.setdefault() functionality.)
286
        """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
287
        return self._add(key)
288
289
    cdef object _add(self, key):
4679.3.61 by John Arbash Meinel
Invert the logic.
290
        cdef PyObject **slot, *py_key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
291
        cdef int added
4679.3.61 by John Arbash Meinel
Invert the logic.
292
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
293
        py_key = <PyObject *>key
294
        if (Py_TYPE(py_key).tp_richcompare == NULL
295
            or Py_TYPE(py_key).tp_hash == NULL):
296
            raise TypeError('Types added to SimpleSet must implement'
297
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
298
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
299
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
300
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
301
        slot = _lookup(self, key)
302
        if (slot[0] == NULL):
303
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
304
            self._fill = self._fill + 1
305
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
306
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
307
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
308
        elif (slot[0] == _dummy):
309
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
310
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
311
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
312
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
313
        # No else: clause. If _lookup returns a pointer to
314
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
315
        retval = <object>(slot[0])
316
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
317
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
318
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
319
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
320
        # Even if we resized and ended up moving retval into a different slot,
321
        # it is still the value that is held at the slot equivalent to 'key',
322
        # so we can still return it
323
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
324
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
325
    def discard(self, key):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
326
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
327
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
328
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
329
        """
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
330
        if self._discard(key):
331
            return True
332
        return False
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
333
334
    cdef int _discard(self, key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
335
        cdef PyObject **slot, *py_key
336
337
        slot = _lookup(self, key)
338
        if slot[0] == NULL or slot[0] == _dummy:
339
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
340
        self._used = self._used - 1
4679.3.61 by John Arbash Meinel
Invert the logic.
341
        Py_DECREF(slot[0])
342
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
343
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
344
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
345
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
346
        # However, we are planning on using this as an interning structure, in
347
        # which we will be putting a lot of objects. And we expect that large
348
        # groups of them are going to have the same lifetime.
349
        # Dummy entries hurt a little bit because they cause the lookup to keep
350
        # searching, but resizing is also rather expensive
351
        # For now, we'll just use their algorithm, but we may want to revisit
352
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
353
        if ((self._fill - self._used) * 5 > self._mask):
354
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
355
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
356
4679.3.65 by John Arbash Meinel
Add __iter__ support.
357
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
358
        return _SimpleSet_iterator(self)
359
360
361
cdef class _SimpleSet_iterator:
362
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
363
364
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
365
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
366
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
367
    cdef Py_ssize_t len # number of entries left
368
369
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
370
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
371
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
372
        self._used = self.set._used
373
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
374
375
    def __iter__(self):
376
        return self
377
378
    def __next__(self):
379
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
380
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
381
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
382
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
383
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
384
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
385
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
386
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
387
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
388
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
389
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
390
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
391
        # we found something
392
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
393
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
394
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
395
396
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
397
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
398
            return self.len
399
        return 0
400
    
401
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
402
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
403
cdef api SimpleSet SimpleSet_New():
404
    """Create a new SimpleSet object."""
405
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
406
407
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
408
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
409
    """Check that the parameter is not None.
410
411
    Pyrex/Cython will do type checking, but only to ensure that an object is
412
    either the right type or None. You can say "object foo not None" for pure
413
    python functions, but not for C functions.
414
    So this is just a helper for all the apis that need to do the check.
415
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
416
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
417
    if self is None:
418
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
419
    true_self = self
420
    return true_self
421
422
423
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
424
    """Find the slot where 'key' would fit.
425
426
    This is the same as a dicts 'lookup' function.
427
428
    :param key: An object we are looking up
429
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
430
    :return: The location in self.table where key should be put.
431
        location == NULL is an exception, but (*location) == NULL just
432
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
433
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
434
    # This uses Quadratic Probing:
435
    #  http://en.wikipedia.org/wiki/Quadratic_probing
436
    # with c1 = c2 = 1/2
437
    # This leads to probe locations at:
438
    #  h0 = hash(k1)
439
    #  h1 = h0 + 1
440
    #  h2 = h0 + 3 = h1 + 1 + 1
441
    #  h3 = h0 + 6 = h2 + 1 + 2
442
    #  h4 = h0 + 10 = h2 + 1 + 3
443
    # Note that all of these are '& mask', but that is computed *after* the
444
    # offset.
445
    # This differs from the algorithm used by Set and Dict. Which, effectively,
446
    # use double-hashing, and a step size that starts large, but dwindles to
447
    # stepping one-by-one.
448
    # This gives more 'locality' in that if you have a collision at offset X,
449
    # the first fallback is X+1, which is fast to check. However, that means
450
    # that an object w/ hash X+1 will also check there, and then X+2 next.
451
    # However, for objects with differing hashes, their chains are different.
452
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
453
    # So different hashes diverge quickly.
454
    # A bigger problem is that we *only* ever use the lowest bits of the hash
455
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
456
    # use the same collision resolution. We may want to try to find a way to
457
    # incorporate the upper bits of the hash with quadratic probing. (For
458
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
459
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
460
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
461
    cdef long key_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
462
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
463
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
464
    py_key = <PyObject *>key
465
    # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
466
    #       (it treats hash() as returning an 'int' rather than a 'long')
467
    key_hash = PyObject_Hash(py_key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
468
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
469
    mask = self._mask
470
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
471
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
472
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
473
        slot = &table[i & mask]
474
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
475
        if cur == NULL:
476
            # Found a blank spot
477
            if free_slot != NULL:
478
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
479
                return free_slot
480
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
481
                return slot
482
        if cur == py_key:
483
            # Found an exact pointer to the key
484
            return slot
485
        if cur == _dummy:
486
            if free_slot == NULL:
487
                free_slot = slot
488
        elif _is_equal(py_key, key_hash, cur):
489
            # Both py_key and cur belong in this slot, return it
490
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
491
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
492
    raise AssertionError('should never get here')
493
494
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
495
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
496
    """Find the slot where 'key' would fit.
497
498
    This is the same as a dicts 'lookup' function. This is a private
499
    api because mutating what you get without maintaing the other invariants
500
    is a 'bad thing'.
501
502
    :param key: An object we are looking up
503
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
504
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
505
        should never be NULL, but may reference a NULL (PyObject*)
506
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
507
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
508
509
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
510
cdef api object SimpleSet_Add(object self, object key):
511
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
512
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
513
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
514
    :param key: The key to be added. If the key is already present,
515
        self will not be modified
516
    :return: The current key stored at the location defined by 'key'.
517
        This may be the same object, or it may be an equivalent object.
518
        (consider dict.setdefault(key, key))
519
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
520
    return _check_self(self)._add(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
521
    
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
522
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
523
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
524
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
525
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
526
527
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
528
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
529
    """Remove the object referenced at location 'key'.
530
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
531
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
532
    :param key: The key we are checking on
533
    :return: 1 if there was an object present, 0 if there was not, and -1 on
534
        error.
535
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
536
    return _check_self(self)._discard(key)
537
538
539
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
540
    """Get a pointer to the object present at location 'key'.
541
542
    This returns an object which is equal to key which was previously added to
543
    self. This returns a borrowed reference, as it may also return NULL if no
544
    value is present at that location.
545
546
    :param key: The value we are looking for
547
    :return: The object present at that location
548
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
549
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
550
551
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
552
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
553
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
554
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
555
556
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
557
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
558
                            PyObject **key) except -1:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
559
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
560
561
    :param pos: should be initialized to 0 by the caller, and will be updated
562
        by this function
563
    :param key: Will return a borrowed reference to key
564
    :return: 0 if nothing left, 1 if we are returning a new value
565
    """
566
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
567
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
568
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
569
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
570
    i = pos[0]
571
    if (i < 0):
572
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
573
    mask = true_self._mask
574
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
575
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
576
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
577
    pos[0] = i + 1
578
    if (i > mask):
579
        return 0 # All done
580
    if (key != NULL):
581
        key[0] = table[i]
582
    return 1
583
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
584
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
585
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
586
                            void *arg) except -1:
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
587
    """This is an implementation of 'tp_traverse' that hits the whole table.
588
589
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
590
    define one for you if you have an 'object' attribute. Since they don't
591
    support C arrays of objects, we access the PyObject * directly.
592
    """
593
    cdef Py_ssize_t pos
594
    cdef PyObject *next_key
595
    cdef int ret
596
597
    pos = 0
598
    while SimpleSet_Next(self, &pos, &next_key):
599
        ret = visit(next_key, arg)
600
        if ret:
601
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
602
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
603
604
# It is a little bit ugly to do this, but it works, and means that Meliae can
605
# dump the total memory consumed by all child objects.
606
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse