~bzr-pqm/bzr/bzr.dev

4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
19
cdef extern from "python-compat.h":
20
    pass
21
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
22
cdef extern from "Python.h":
23
    ctypedef unsigned long size_t
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
24
    ctypedef long (*hashfunc)(PyObject*) except -1
25
    ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int)
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
26
    ctypedef int (*visitproc)(PyObject *, void *)
27
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
28
    int Py_EQ
29
    void Py_INCREF(PyObject *)
30
    void Py_DECREF(PyObject *)
31
    ctypedef struct PyTypeObject:
32
        hashfunc tp_hash
33
        richcmpfunc tp_richcompare
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
34
        traverseproc tp_traverse
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
35
36
    PyTypeObject *Py_TYPE(PyObject *)
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
37
    # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
38
    #       thus silently truncates to 32-bits on 64-bit machines.
39
    long PyObject_Hash(PyObject *) except -1
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
40
        
41
    void *PyMem_Malloc(size_t nbytes)
42
    void PyMem_Free(void *)
43
    void memset(void *, int, size_t)
44
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
45
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
46
# Dummy is an object used to mark nodes that have been deleted. Since
47
# collisions require us to move a node to an alternative location, if we just
48
# set an entry to NULL on delete, we won't find any relocated nodes.
49
# We have to use _dummy_obj because we need to keep a refcount to it, but we
50
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
51
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
52
cdef object _dummy_obj
53
cdef PyObject *_dummy
54
_dummy_obj = object()
55
_dummy = <PyObject *>_dummy_obj
56
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
57
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
58
cdef object _NotImplemented
59
_NotImplemented = NotImplemented
60
61
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
62
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
63
    cdef long other_hash
64
65
    if this == other:
66
        return 1
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
67
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
68
    if other_hash != this_hash:
69
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
70
71
    # This implements a subset of the PyObject_RichCompareBool functionality.
72
    # Namely it:
73
    #   1) Doesn't try to do anything with old-style classes
74
    #   2) Assumes that both objects have a tp_richcompare implementation, and
75
    #      that if that is not enough to compare equal, then they are not
76
    #      equal. (It doesn't try to cast them both to some intermediate form
77
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
78
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
79
    if res is _NotImplemented:
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
80
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
81
        if res is _NotImplemented:
82
            return 0
83
    if res:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
84
        return 1
85
    return 0
86
87
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
88
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
89
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
90
91
    It is similar in function to the interned dictionary that is used by
92
    strings. However:
93
94
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
95
         of it
96
      2) It only stores one reference to the object, rather than 2 (key vs
97
         key:value)
98
99
    As such, it uses 1/3rd the amount of memory to store a pointer to the
100
    interned object.
101
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
102
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
103
    DEF DEFAULT_SIZE=1024
104
105
    def __init__(self):
106
        cdef Py_ssize_t size, n_bytes
107
108
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
109
        self._mask = size - 1
110
        self._used = 0
111
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
112
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
113
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
114
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
115
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
116
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
117
118
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
119
        if self._table != NULL:
120
            PyMem_Free(self._table)
121
            self._table = NULL
122
123
    property used:
124
        def __get__(self):
125
            return self._used
126
127
    property fill:
128
        def __get__(self):
129
            return self._fill
130
131
    property mask:
132
        def __get__(self):
133
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
134
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
135
    def _memory_size(self):
136
        """Return the number of bytes of memory consumed by this class."""
137
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
138
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
139
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
140
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
141
142
    def _test_lookup(self, key):
143
        cdef PyObject **slot
144
4679.3.61 by John Arbash Meinel
Invert the logic.
145
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
146
        if slot[0] == NULL:
147
            res = '<null>'
148
        elif slot[0] == _dummy:
149
            res = '<dummy>'
150
        else:
151
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
152
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
153
154
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
155
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
156
        cdef PyObject **slot
157
4679.3.61 by John Arbash Meinel
Invert the logic.
158
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
159
        if slot[0] == NULL or slot[0] == _dummy:
160
            return False
161
        return True
162
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
163
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
164
        """Return the object (or nothing) define at the given location."""
165
        cdef PyObject **slot
166
167
        slot = _lookup(self, key)
168
        if slot[0] == NULL or slot[0] == _dummy:
169
            return NULL
170
        return slot[0]
171
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
172
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
173
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
174
        cdef PyObject *py_val
175
176
        py_val = self._get(key)
177
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
178
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
179
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
180
        return val
181
4679.3.63 by John Arbash Meinel
Implement resizing.
182
    cdef int _insert_clean(self, PyObject *key) except -1:
183
        """Insert a key into self.table.
184
185
        This is only meant to be used during times like '_resize',
186
        as it makes a lot of assuptions about keys not already being present,
187
        and there being no dummy entries.
188
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
189
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
190
        cdef long the_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
191
        cdef PyObject **table, **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
192
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
193
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
194
        mask = self._mask
195
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
196
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
197
        the_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
198
        i = the_hash
199
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
200
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
201
            if slot[0] == NULL:
202
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
203
                self._fill = self._fill + 1
204
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
205
                return 1
206
            i = i + 1 + n_lookup
207
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
208
209
    def _py_resize(self, min_used):
210
        """Do not use this directly, it is only exposed for testing."""
211
        return self._resize(min_used)
212
213
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
214
        """Resize the internal table.
215
216
        The final table will be big enough to hold at least min_used entries.
217
        We will copy the data from the existing table over, leaving out dummy
218
        entries.
219
220
        :return: The new size of the internal table
221
        """
222
        cdef Py_ssize_t new_size, n_bytes, remaining
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
223
        cdef PyObject **new_table, **old_table, **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
224
225
        new_size = DEFAULT_SIZE
226
        while new_size <= min_used and new_size > 0:
227
            new_size = new_size << 1
228
        # We rolled over our signed size field
229
        if new_size <= 0:
230
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
231
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
232
        # size, we will still run the algorithm so that dummy entries are
233
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
234
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
235
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
236
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
237
        #                        ' smaller than the number of used slots.')
238
        n_bytes = sizeof(PyObject*) * new_size;
239
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
240
        if new_table == NULL:
241
            raise MemoryError()
242
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
243
        old_table = self._table
244
        self._table = new_table
245
        memset(self._table, 0, n_bytes)
246
        self._mask = new_size - 1
247
        self._used = 0
248
        remaining = self._fill
249
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
250
251
        # Moving everything to the other table is refcount neutral, so we don't
252
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
253
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
254
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
255
            if slot[0] == NULL: # unused slot
4679.3.63 by John Arbash Meinel
Implement resizing.
256
                pass 
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
257
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
258
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
259
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
260
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
261
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
262
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
263
        PyMem_Free(old_table)
264
        return new_size
265
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
266
    def add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
267
        """Similar to set.add(), start tracking this key.
268
        
269
        There is one small difference, which is that we return the object that
270
        is stored at the given location. (which is closer to the
271
        dict.setdefault() functionality.)
272
        """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
273
        return self._add(key)
274
275
    cdef object _add(self, key):
4679.3.61 by John Arbash Meinel
Invert the logic.
276
        cdef PyObject **slot, *py_key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
277
        cdef int added
4679.3.61 by John Arbash Meinel
Invert the logic.
278
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
279
        py_key = <PyObject *>key
280
        if (Py_TYPE(py_key).tp_richcompare == NULL
281
            or Py_TYPE(py_key).tp_hash == NULL):
282
            raise TypeError('Types added to SimpleSet must implement'
283
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
284
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
285
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
286
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
287
        slot = _lookup(self, key)
288
        if (slot[0] == NULL):
289
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
290
            self._fill = self._fill + 1
291
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
292
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
293
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
294
        elif (slot[0] == _dummy):
295
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
296
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
297
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
298
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
299
        # No else: clause. If _lookup returns a pointer to
300
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
301
        retval = <object>(slot[0])
302
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
303
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
304
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
305
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
306
        # Even if we resized and ended up moving retval into a different slot,
307
        # it is still the value that is held at the slot equivalent to 'key',
308
        # so we can still return it
309
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
310
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
311
    def discard(self, key):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
312
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
313
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
314
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
315
        """
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
316
        if self._discard(key):
317
            return True
318
        return False
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
319
320
    cdef int _discard(self, key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
321
        cdef PyObject **slot, *py_key
322
323
        slot = _lookup(self, key)
324
        if slot[0] == NULL or slot[0] == _dummy:
325
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
326
        self._used = self._used - 1
4679.3.61 by John Arbash Meinel
Invert the logic.
327
        Py_DECREF(slot[0])
328
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
329
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
330
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
331
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
332
        # However, we are planning on using this as an interning structure, in
333
        # which we will be putting a lot of objects. And we expect that large
334
        # groups of them are going to have the same lifetime.
335
        # Dummy entries hurt a little bit because they cause the lookup to keep
336
        # searching, but resizing is also rather expensive
337
        # For now, we'll just use their algorithm, but we may want to revisit
338
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
339
        if ((self._fill - self._used) * 5 > self._mask):
340
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
341
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
342
4679.3.65 by John Arbash Meinel
Add __iter__ support.
343
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
344
        return _SimpleSet_iterator(self)
345
346
347
cdef class _SimpleSet_iterator:
348
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
349
350
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
351
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
352
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
353
    cdef Py_ssize_t len # number of entries left
354
355
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
356
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
357
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
358
        self._used = self.set._used
359
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
360
361
    def __iter__(self):
362
        return self
363
364
    def __next__(self):
365
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
366
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
367
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
368
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
369
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
370
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
371
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
372
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
373
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
374
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
375
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
376
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
377
        # we found something
378
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
379
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
380
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
381
382
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
383
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
384
            return self.len
385
        return 0
386
    
387
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
388
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
389
cdef api SimpleSet SimpleSet_New():
390
    """Create a new SimpleSet object."""
391
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
392
393
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
394
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
395
    """Check that the parameter is not None.
396
397
    Pyrex/Cython will do type checking, but only to ensure that an object is
398
    either the right type or None. You can say "object foo not None" for pure
399
    python functions, but not for C functions.
400
    So this is just a helper for all the apis that need to do the check.
401
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
402
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
403
    if self is None:
404
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
405
    true_self = self
406
    return true_self
407
408
409
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
410
    """Find the slot where 'key' would fit.
411
412
    This is the same as a dicts 'lookup' function.
413
414
    :param key: An object we are looking up
415
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
416
    :return: The location in self.table where key should be put.
417
        location == NULL is an exception, but (*location) == NULL just
418
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
419
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
420
    # This uses Quadratic Probing:
421
    #  http://en.wikipedia.org/wiki/Quadratic_probing
422
    # with c1 = c2 = 1/2
423
    # This leads to probe locations at:
424
    #  h0 = hash(k1)
425
    #  h1 = h0 + 1
426
    #  h2 = h0 + 3 = h1 + 1 + 1
427
    #  h3 = h0 + 6 = h2 + 1 + 2
428
    #  h4 = h0 + 10 = h2 + 1 + 3
429
    # Note that all of these are '& mask', but that is computed *after* the
430
    # offset.
431
    # This differs from the algorithm used by Set and Dict. Which, effectively,
432
    # use double-hashing, and a step size that starts large, but dwindles to
433
    # stepping one-by-one.
434
    # This gives more 'locality' in that if you have a collision at offset X,
435
    # the first fallback is X+1, which is fast to check. However, that means
436
    # that an object w/ hash X+1 will also check there, and then X+2 next.
437
    # However, for objects with differing hashes, their chains are different.
438
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
439
    # So different hashes diverge quickly.
440
    # A bigger problem is that we *only* ever use the lowest bits of the hash
441
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
442
    # use the same collision resolution. We may want to try to find a way to
443
    # incorporate the upper bits of the hash with quadratic probing. (For
444
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
445
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
446
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
447
    cdef long key_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
448
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
449
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
450
    py_key = <PyObject *>key
451
    # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
452
    #       (it treats hash() as returning an 'int' rather than a 'long')
453
    key_hash = PyObject_Hash(py_key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
454
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
455
    mask = self._mask
456
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
457
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
458
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
459
        slot = &table[i & mask]
460
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
461
        if cur == NULL:
462
            # Found a blank spot
463
            if free_slot != NULL:
464
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
465
                return free_slot
466
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
467
                return slot
468
        if cur == py_key:
469
            # Found an exact pointer to the key
470
            return slot
471
        if cur == _dummy:
472
            if free_slot == NULL:
473
                free_slot = slot
474
        elif _is_equal(py_key, key_hash, cur):
475
            # Both py_key and cur belong in this slot, return it
476
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
477
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
478
    raise AssertionError('should never get here')
479
480
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
481
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
482
    """Find the slot where 'key' would fit.
483
484
    This is the same as a dicts 'lookup' function. This is a private
485
    api because mutating what you get without maintaing the other invariants
486
    is a 'bad thing'.
487
488
    :param key: An object we are looking up
489
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
490
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
491
        should never be NULL, but may reference a NULL (PyObject*)
492
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
493
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
494
495
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
496
cdef api object SimpleSet_Add(object self, object key):
497
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
498
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
499
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
500
    :param key: The key to be added. If the key is already present,
501
        self will not be modified
502
    :return: The current key stored at the location defined by 'key'.
503
        This may be the same object, or it may be an equivalent object.
504
        (consider dict.setdefault(key, key))
505
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
506
    return _check_self(self)._add(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
507
    
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
508
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
509
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
510
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
511
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
512
513
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
514
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
515
    """Remove the object referenced at location 'key'.
516
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
517
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
518
    :param key: The key we are checking on
519
    :return: 1 if there was an object present, 0 if there was not, and -1 on
520
        error.
521
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
522
    return _check_self(self)._discard(key)
523
524
525
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
526
    """Get a pointer to the object present at location 'key'.
527
528
    This returns an object which is equal to key which was previously added to
529
    self. This returns a borrowed reference, as it may also return NULL if no
530
    value is present at that location.
531
532
    :param key: The value we are looking for
533
    :return: The object present at that location
534
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
535
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
536
537
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
538
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
539
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
540
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
541
542
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
543
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
544
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
545
546
    :param pos: should be initialized to 0 by the caller, and will be updated
547
        by this function
548
    :param key: Will return a borrowed reference to key
549
    :return: 0 if nothing left, 1 if we are returning a new value
550
    """
551
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
552
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
553
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
554
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
555
    i = pos[0]
556
    if (i < 0):
557
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
558
    mask = true_self._mask
559
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
560
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
561
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
562
    pos[0] = i + 1
563
    if (i > mask):
564
        return 0 # All done
565
    if (key != NULL):
566
        key[0] = table[i]
567
    return 1
568
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
569
570
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg):
571
    """This is an implementation of 'tp_traverse' that hits the whole table.
572
573
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
574
    define one for you if you have an 'object' attribute. Since they don't
575
    support C arrays of objects, we access the PyObject * directly.
576
    """
577
    cdef Py_ssize_t pos
578
    cdef PyObject *next_key
579
    cdef int ret
580
581
    pos = 0
582
    while SimpleSet_Next(self, &pos, &next_key):
583
        ret = visit(next_key, arg)
584
        if ret:
585
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
586
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
587
588
# It is a little bit ugly to do this, but it works, and means that Meliae can
589
# dump the total memory consumed by all child objects.
590
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse