~bzr-pqm/bzr/bzr.dev

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# Copyright (C) 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Definition of a class that is used to intern StaticTuple objects."""

cdef extern from "Python.h":
    ctypedef unsigned long size_t
    ctypedef long (*hashfunc)(PyObject*)
    ctypedef PyObject *(*richcmpfunc)(PyObject *, PyObject *, int)
    int Py_EQ
    PyObject *Py_True
    PyObject *Py_NotImplemented
    void Py_INCREF(PyObject *)
    void Py_DECREF(PyObject *)
    ctypedef struct PyTypeObject:
        hashfunc tp_hash
        richcmpfunc tp_richcompare

    PyTypeObject *Py_TYPE(PyObject *)
        
    void *PyMem_Malloc(size_t nbytes)
    void PyMem_Free(void *)
    void memset(void *, int, size_t)

cdef object _dummy_obj
cdef PyObject *_dummy
_dummy_obj = object()
_dummy = <PyObject *>_dummy_obj


cdef inline int _is_equal(PyObject *this, long this_hash, PyObject *other):
    cdef long other_hash
    cdef PyObject *res

    if this == other:
        return 1
    other_hash = Py_TYPE(other).tp_hash(other)
    if other_hash != this_hash:
        return 0
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
    if res == Py_True:
        Py_DECREF(res)
        return 1
    # Only handled for now because we are testing with stuff like tuples versus
    # StaticTuple objects. If we decide to limit StaticTupleInterner to
    # strictly only allowing StaticTuple objects, then this is no longer
    # required, and Py_NotImplemented => not equal
    if res == Py_NotImplemented:
        Py_DECREF(res)
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
    if res == Py_True:
        Py_DECREF(res)
        return 1
    Py_DECREF(res)
    return 0


cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
                                           type StaticTupleInterner_type]:
    """This class tracks the canonical forms for StaticTuples.

    It is similar in function to the interned dictionary that is used by
    strings. However:

      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
         of it
      2) It only stores one reference to the object, rather than 2 (key vs
         key:value)

    As such, it uses 1/3rd the amount of memory to store a pointer to the
    interned object.
    """
    # Attributes are defined in the .pxd file
    DEF DEFAULT_SIZE=1024
    DEF PERTURB_SHIFT=5

    # Note that most of the members on this class are just thunks over to the C
    # api. However, this provides a nice Python/Pyrex api, as well as making it
    # easy to test the C api from pure python.

    def __init__(self):
        cdef Py_ssize_t size, n_bytes

        size = DEFAULT_SIZE
        self.mask = size - 1
        self.used = 0
        self.fill = 0
        n_bytes = sizeof(PyObject*) * size;
        self.table = <PyObject **>PyMem_Malloc(n_bytes)
        if self.table == NULL:
            raise MemoryError()
        memset(self.table, 0, n_bytes)

    def __dealloc__(self):
        if self.table != NULL:
            PyMem_Free(self.table)
            self.table = NULL

    def __len__(self):
        return self.used

    def _test_lookup(self, key):
        cdef PyObject **slot

        slot = _lookup(self, key)
        if slot[0] == NULL:
            res = '<null>'
        elif slot[0] == _dummy:
            res = '<dummy>'
        else:
            res = <object>slot[0]
        return <int>(slot - self.table), res

    def __contains__(self, key):
        """Is key present in this StaticTupleInterner."""
        cdef PyObject **slot

        slot = _lookup(self, key)
        if slot[0] == NULL or slot[0] == _dummy:
            return False
        return True

    cdef PyObject *_get(self, object key) except? NULL:
        """Return the object (or nothing) define at the given location."""
        cdef PyObject **slot

        slot = _lookup(self, key)
        if slot[0] == NULL or slot[0] == _dummy:
            return NULL
        return slot[0]

    def __getitem__(self, key):
        """Return a stored item that is equivalent to key."""
        cdef PyObject *py_val

        py_val = self._get(key)
        if py_val == NULL:
            raise KeyError("Key %s is not present" % key)
        val = <object>(py_val)
        return val

    # def __setitem__(self, key, value):
    #     assert key == value
    #     self._add(key)

    cdef int _insert_clean(self, PyObject *key) except -1:
        """Insert a key into self.table.

        This is only meant to be used during times like '_resize',
        as it makes a lot of assuptions about keys not already being present,
        and there being no dummy entries.
        """
        cdef size_t i, perturb, mask
        cdef long the_hash
        cdef PyObject **table, **entry

        mask = self.mask
        table = self.table

        the_hash = Py_TYPE(key).tp_hash(key)
        i = the_hash & mask
        entry = &table[i]
        perturb = the_hash
        # Because we know that we made all items unique before, we can just
        # iterate as long as the target location is not empty, we don't have to
        # do any comparison, etc.
        while entry[0] != NULL:
            i = (i << 2) + i + perturb + 1
            entry = &table[i & mask]
            perturb >>= PERTURB_SHIFT
        entry[0] = key
        self.fill += 1
        self.used += 1

    cpdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
        """Resize the internal table.

        The final table will be big enough to hold at least min_used entries.
        We will copy the data from the existing table over, leaving out dummy
        entries.

        :return: The new size of the internal table
        """
        cdef Py_ssize_t new_size, n_bytes, remaining
        cdef PyObject **new_table, **old_table, **entry

        new_size = DEFAULT_SIZE
        while new_size <= min_used and new_size > 0:
            new_size = new_size << 1
        # We rolled over our signed size field
        if new_size <= 0:
            raise MemoryError()
        if new_size == (self.mask + 1):
            # Nothing to do
            return new_size
        # TODO: Test this
        # if new_size < self.used:
        #     raise RuntimeError('cannot shrink StaticTupleInterner to something'
        #                        ' smaller than the number of used slots.')
        n_bytes = sizeof(PyObject*) * new_size;
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
        if new_table == NULL:
            raise MemoryError()

        old_table = self.table
        self.table = new_table
        memset(self.table, 0, n_bytes)
        self.mask = new_size - 1
        self.used = 0
        remaining = self.fill
        self.fill = 0

        # Moving everything to the other table is refcount neutral, so we don't
        # worry about it.
        entry = old_table
        while remaining > 0:
            if entry[0] == NULL: # unused slot
                pass 
            elif entry[0] == _dummy: # dummy slot
                remaining -= 1
            else: # active slot
                remaining -= 1
                self._insert_clean(entry[0])
            entry += 1
        PyMem_Free(old_table)
        return new_size

    cpdef object add(self, key):
        """Similar to set.add(), start tracking this key.
        
        There is one small difference, which is that we return the object that
        is stored at the given location. (which is closer to the
        dict.setdefault() functionality.)
        """
        cdef PyObject **slot, *py_key
        cdef int added = 0

        # We need at least one empty slot
        assert self.used < self.mask
        slot = _lookup(self, key)
        py_key = <PyObject *>key
        if (slot[0] == NULL):
            Py_INCREF(py_key)
            self.fill += 1
            self.used += 1
            slot[0] = py_key
            added = 1
        elif (slot[0] == _dummy):
            Py_INCREF(py_key)
            self.used += 1
            slot[0] = py_key
            added = 1
        # No else: clause. If _lookup returns a pointer to
        # a live object, then we already have a value at this location.
        retval = <object>(slot[0])
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
        if added and (self.fill * 3) >= ((self.mask + 1) * 2):
            # However, we always work for a load factor of 2:1
            self._resize(self.used * 2)
        # Even if we resized and ended up moving retval into a different slot,
        # it is still the value that is held at the slot equivalent to 'key',
        # so we can still return it
        return retval

    cpdef int discard(self, key) except -1:
        """Remove key from the dict, whether it exists or not.

        :return: 0 if the item did not exist, 1 if it did
        """
        cdef PyObject **slot, *py_key

        slot = _lookup(self, key)
        if slot[0] == NULL or slot[0] == _dummy:
            return 0
        self.used -= 1
        Py_DECREF(slot[0])
        slot[0] = _dummy
        return 1


    def __delitem__(self, key):
        """Remove the given item from the dict.

        Raise a KeyError if the key was not present.
        """
        cdef int exists
        exists = self.discard(key)
        if not exists:
            raise KeyError('Key %s not present' % (key,))


cdef api StaticTupleInterner StaticTupleInterner_New():
    """Create a new StaticTupleInterner object."""
    return StaticTupleInterner()


cdef inline int _check_self_not_none(object self) except -1:
    """Check that the parameter is not None.

    Pyrex/Cython will do type checking, but only to ensure that an object is
    either the right type or None. You can say "object foo not None" for pure
    python functions, but not for C functions.
    So this is just a helper for all the apis that need to do the check.
    """
    if self is None:
        raise TypeError('self must not be None')


cdef inline PyObject **_lookup(StaticTupleInterner self,
                               object key) except NULL:
    """Find the slot where 'key' would fit.

    This is the same as a dicts 'lookup' function.

    :param key: An object we are looking up
    :param hash: The hash for key
    :return: The location in self.table where key should be put
        should never be NULL, but may reference a NULL (PyObject*)
    """
    # This is the heart of most functions, which is why it is pulled out as an
    # cdef inline function.
    cdef size_t i, perturb
    cdef Py_ssize_t mask
    cdef long key_hash
    cdef long this_hash
    cdef PyObject **table, **cur, **free_slot, *py_key

    key_hash = hash(key)
    mask = self.mask
    table = self.table
    i = key_hash & mask
    cur = &table[i]
    py_key = <PyObject *>key
    if cur[0] == NULL:
        # Found a blank spot, or found the exact key
        return cur
    if cur[0] == py_key:
        return cur
    if cur[0] == _dummy:
        free_slot = cur
    else:
        if _is_equal(py_key, key_hash, cur[0]):
            # Both py_key and cur[0] belong in this slot, return it
            return cur
        free_slot = NULL
    # size_t is unsigned, hash is signed...
    perturb = key_hash
    while True:
        i = (i << 2) + i + perturb + 1
        cur = &table[i & mask]
        if cur[0] == NULL: # Found an empty spot
            if free_slot: # Did we find a _dummy earlier?
                return free_slot
            else:
                return cur
        if (cur[0] == py_key # exact match
            or _is_equal(py_key, key_hash, cur[0])): # Equivalent match
            return cur
        if (cur[0] == _dummy and free_slot == NULL):
            free_slot = cur
        perturb >>= PERTURB_SHIFT
    raise AssertionError('should never get here')


cdef api PyObject **_StaticTupleInterner_Lookup(object self,
                                                object key) except NULL:
    """Find the slot where 'key' would fit.

    This is the same as a dicts 'lookup' function. This is a private
    api because mutating what you get without maintaing the other invariants
    is a 'bad thing'.

    :param key: An object we are looking up
    :param hash: The hash for key
    :return: The location in self.table where key should be put
        should never be NULL, but may reference a NULL (PyObject*)
    """
    _check_self_not_none(self)
    return _lookup(self, key)


cdef api object StaticTupleInterner_Add(object self, object key):
    """Add a key to the StaticTupleInterner (set).

    :param self: The StaticTupleInterner to add the key to.
    :param key: The key to be added. If the key is already present,
        self will not be modified
    :return: The current key stored at the location defined by 'key'.
        This may be the same object, or it may be an equivalent object.
        (consider dict.setdefault(key, key))
    """
    cdef StaticTupleInterner true_self
    _check_self_not_none(self)
    true_self = self
    return true_self.add(key)
    

cdef api bint StaticTupleInterner_Contains(object self, object key) except -1:
    """Is key present in self?"""
    cdef StaticTupleInterner true_self
    _check_self_not_none(self)
    true_self = self
    return key in true_self


cdef api int StaticTupleInterner_Discard(StaticTupleInterner self,
                                         object key) except -1:
    """Remove the object referenced at location 'key'.

    :param self: The StaticTupleInterner being modified
    :param key: The key we are checking on
    :return: 1 if there was an object present, 0 if there was not, and -1 on
        error.
    """
    cdef StaticTupleInterner true_self
    _check_self_not_none(self)
    true_self = self
    return true_self.discard(key)


cdef api PyObject *StaticTupleInterner_Get(StaticTupleInterner self,
                                           object key) except? NULL:
    """Get a pointer to the object present at location 'key'.

    This returns an object which is equal to key which was previously added to
    self. This returns a borrowed reference, as it may also return NULL if no
    value is present at that location.

    :param key: The value we are looking for
    :return: The object present at that location
    """
    cdef StaticTupleInterner true_self
    _check_self_not_none(self)
    true_self = self
    return true_self._get(key)


cdef api Py_ssize_t StaticTupleInterner_Size(object self) except -1:
    """Get the number of active entries in 'self'"""
    cdef StaticTupleInterner true_self = self
    _check_self_not_none(self)
    return true_self.used